├── README.md
├── assets
    └── ov_parts.jpg
├── baselines
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   ├── cat_seg.cpython-38.pyc
    │   ├── clipseg.cpython-38.pyc
    │   ├── config.cpython-38.pyc
    │   ├── mask_former_model.cpython-38.pyc
    │   ├── test_time_augmentation.cpython-38.pyc
    │   └── zero_shot_obj_part_mask_former_model.cpython-38.pyc
    ├── cat_seg.py
    ├── clipseg.py
    ├── config.py
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── build.cpython-38.pyc
    │   ├── augmentations.py
    │   ├── build.py
    │   ├── dataset_mappers
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── object_part_mapper.cpython-38.pyc
    │   │   │   └── oracle_dataset_mapper.cpython-38.pyc
    │   │   ├── object_part_mapper.py
    │   │   └── oracle_dataset_mapper.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── coco.cpython-38.pyc
    │   │   │   ├── register_ade_part_234.cpython-38.pyc
    │   │   │   ├── register_pascal_part_116.cpython-38.pyc
    │   │   │   └── utils.cpython-38.pyc
    │   │   ├── ade20kpart234_mapping.json
    │   │   ├── coco.py
    │   │   ├── mask_cls_collect.py
    │   │   ├── register_ade_part_234.py
    │   │   ├── register_pascal_part_116.py
    │   │   └── utils.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── augmentation.cpython-38.pyc
    │   │       ├── augmentation_impl.cpython-38.pyc
    │   │       └── transform.cpython-38.pyc
    │   │   ├── augmentation.py
    │   │   ├── augmentation_impl.py
    │   │   └── transform.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── classification_evaluation.cpython-38.pyc
    │   │   ├── generalized_sem_seg_evaluation.cpython-38.pyc
    │   │   └── pseudo_sem_seg_evaluation.cpython-38.pyc
    │   └── generalized_sem_seg_evaluation.py
    ├── mask_former_model.py
    ├── modeling
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── criterion.cpython-38.pyc
    │   │   └── matcher.cpython-38.pyc
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── clip_resnet.cpython-38.pyc
    │   │   │   └── swin.cpython-38.pyc
    │   │   ├── clip_resnet.py
    │   │   └── swin.py
    │   ├── clip_adapter
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── adapter.cpython-38.pyc
    │   │   │   ├── text_prompt.cpython-38.pyc
    │   │   │   └── utils.cpython-38.pyc
    │   │   ├── adapter.py
    │   │   ├── text_prompt.py
    │   │   └── utils.py
    │   ├── criterion.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── cat_seg_head.cpython-38.pyc
    │   │   │   ├── mask_former_head.cpython-38.pyc
    │   │   │   ├── pixel_decoder.cpython-38.pyc
    │   │   │   ├── zero_shot_mask_former_head.cpython-38.pyc
    │   │   │   └── zero_shot_obj_part_mask_former_head.cpython-38.pyc
    │   │   ├── cat_seg_head.py
    │   │   ├── mask_former_head.py
    │   │   ├── pixel_decoder.py
    │   │   ├── zero_shot_mask_former_head.py
    │   │   └── zero_shot_obj_part_mask_former_head.py
    │   ├── matcher.py
    │   └── transformer
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── cat_seg_predictor.cpython-38.pyc
    │   │       ├── model.cpython-38.pyc
    │   │       ├── position_encoding.cpython-38.pyc
    │   │       ├── transformer.cpython-38.pyc
    │   │       ├── transformer_predictor.cpython-38.pyc
    │   │       ├── zero_shot_obj_part_transformer_predictor.cpython-38.pyc
    │   │       └── zero_shot_transformer_predictor.cpython-38.pyc
    │   │   ├── cat_seg_predictor.py
    │   │   ├── model.py
    │   │   ├── position_encoding.py
    │   │   ├── transformer.py
    │   │   ├── transformer_predictor.py
    │   │   ├── zero_shot_obj_part_transformer_predictor.py
    │   │   └── zero_shot_transformer_predictor.py
    ├── test_time_augmentation.py
    ├── third_party
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── clip.cpython-37.pyc
    │   │   ├── clip.cpython-38.pyc
    │   │   ├── imagenet_templates.cpython-38.pyc
    │   │   ├── model_vpt.cpython-37.pyc
    │   │   ├── model_vpt.cpython-38.pyc
    │   │   ├── simple_tokenizer.cpython-37.pyc
    │   │   └── simple_tokenizer.cpython-38.pyc
    │   ├── bpe_simple_vocab_16e6.txt.gz
    │   ├── clip.py
    │   ├── imagenet_templates.py
    │   ├── model.py
    │   ├── model_vpt.py
    │   └── simple_tokenizer.py
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── events.cpython-38.pyc
    │   │   ├── misc.cpython-38.pyc
    │   │   └── post_process_utils.cpython-38.pyc
    │   ├── events.py
    │   ├── misc.py
    │   ├── post_process_utils.py
    │   └── selective_search.py
    └── zero_shot_obj_part_mask_former_model.py
├── configs
    ├── Base-VOC11K-20.yaml
    ├── base_catseg_config.yaml
    ├── cross_dataset
    │   └── clipseg_ade.yaml
    ├── few_shot
    │   ├── catseg_ade.yaml
    │   ├── catseg_voc.yaml
    │   ├── clipseg_ade.yaml
    │   └── clipseg_voc.yaml
    ├── maskformer_R50_bs16_20k.yaml
    └── zero_shot
    │   ├── catseg_ade.yaml
    │   ├── catseg_voc.yaml
    │   ├── clipseg_ade.yaml
    │   ├── clipseg_voc.yaml
    │   ├── zsseg+_R50_coop_ade.yaml
    │   └── zsseg+_R50_coop_voc.yaml
├── open_clip
    ├── CITATION.cff
    ├── HISTORY.md
    ├── LICENSE
    ├── MANIFEST.in
    ├── Makefile
    ├── README.md
    ├── pytest.ini
    ├── requirements-test.txt
    ├── requirements-training.txt
    ├── requirements.txt
    ├── setup.py
    ├── src
    │   ├── open_clip
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── constants.cpython-38.pyc
    │   │   │   ├── factory.cpython-38.pyc
    │   │   │   ├── hf_configs.cpython-38.pyc
    │   │   │   ├── hf_model.cpython-38.pyc
    │   │   │   ├── loss.cpython-38.pyc
    │   │   │   ├── model.cpython-38.pyc
    │   │   │   ├── modified_resnet.cpython-38.pyc
    │   │   │   ├── openai.cpython-38.pyc
    │   │   │   ├── pretrained.cpython-38.pyc
    │   │   │   ├── timm_model.cpython-38.pyc
    │   │   │   ├── tokenizer.cpython-38.pyc
    │   │   │   ├── transform.cpython-38.pyc
    │   │   │   ├── transformer.cpython-38.pyc
    │   │   │   ├── utils.cpython-38.pyc
    │   │   │   └── version.cpython-38.pyc
    │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   ├── constants.py
    │   │   ├── factory.py
    │   │   ├── hf_configs.py
    │   │   ├── hf_model.py
    │   │   ├── loss.py
    │   │   ├── model.py
    │   │   ├── model_configs
    │   │   │   ├── RN101-quickgelu.json
    │   │   │   ├── RN101.json
    │   │   │   ├── RN50-quickgelu.json
    │   │   │   ├── RN50.json
    │   │   │   ├── RN50x16.json
    │   │   │   ├── RN50x4.json
    │   │   │   ├── RN50x64.json
    │   │   │   ├── ViT-B-16-plus-240.json
    │   │   │   ├── ViT-B-16-plus.json
    │   │   │   ├── ViT-B-16.json
    │   │   │   ├── ViT-B-32-plus-256.json
    │   │   │   ├── ViT-B-32-quickgelu.json
    │   │   │   ├── ViT-B-32.json
    │   │   │   ├── ViT-H-14.json
    │   │   │   ├── ViT-H-16.json
    │   │   │   ├── ViT-L-14-280.json
    │   │   │   ├── ViT-L-14-336.json
    │   │   │   ├── ViT-L-14.json
    │   │   │   ├── ViT-L-16-320.json
    │   │   │   ├── ViT-L-16.json
    │   │   │   ├── ViT-M-16-alt.json
    │   │   │   ├── ViT-M-16.json
    │   │   │   ├── ViT-M-32-alt.json
    │   │   │   ├── ViT-M-32.json
    │   │   │   ├── ViT-S-16-alt.json
    │   │   │   ├── ViT-S-16.json
    │   │   │   ├── ViT-S-32-alt.json
    │   │   │   ├── ViT-S-32.json
    │   │   │   ├── ViT-bigG-14.json
    │   │   │   ├── ViT-e-14.json
    │   │   │   ├── ViT-g-14.json
    │   │   │   ├── convnext_base.json
    │   │   │   ├── convnext_base_w.json
    │   │   │   ├── convnext_base_w_320.json
    │   │   │   ├── convnext_large.json
    │   │   │   ├── convnext_large_d.json
    │   │   │   ├── convnext_small.json
    │   │   │   ├── convnext_tiny.json
    │   │   │   ├── convnext_xlarge.json
    │   │   │   ├── convnext_xxlarge.json
    │   │   │   ├── convnext_xxlarge_320.json
    │   │   │   ├── mt5-base-ViT-B-32.json
    │   │   │   ├── mt5-xl-ViT-H-14.json
    │   │   │   ├── roberta-ViT-B-32.json
    │   │   │   ├── swin_base_patch4_window7_224.json
    │   │   │   ├── vit_medium_patch16_gap_256.json
    │   │   │   ├── vit_relpos_medium_patch16_cls_224.json
    │   │   │   ├── xlm-roberta-base-ViT-B-32.json
    │   │   │   └── xlm-roberta-large-ViT-H-14.json
    │   │   ├── modified_resnet.py
    │   │   ├── openai.py
    │   │   ├── pretrained.py
    │   │   ├── timm_model.py
    │   │   ├── tokenizer.py
    │   │   ├── transform.py
    │   │   ├── transformer.py
    │   │   ├── utils.py
    │   │   └── version.py
    │   ├── open_clip_torch.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   ├── requires.txt
    │   │   └── top_level.txt
    │   └── training
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   ├── distributed.py
    │   │   ├── file_utils.py
    │   │   ├── imagenet_zeroshot_data.py
    │   │   ├── logger.py
    │   │   ├── main.py
    │   │   ├── params.py
    │   │   ├── precision.py
    │   │   ├── profile.py
    │   │   ├── scheduler.py
    │   │   ├── train.py
    │   │   └── zero_shot.py
    └── tests
    │   ├── test_download_pretrained.py
    │   ├── test_hf_model.py
    │   ├── test_inference.py
    │   ├── test_inference_simple.py
    │   ├── test_num_shards.py
    │   ├── test_training_simple.py
    │   └── util_test.py
├── requirements.txt
├── train_net.py
└── transformers
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-38.pyc
        ├── activations.cpython-38.pyc
        ├── configuration_utils.cpython-38.pyc
        ├── convert_slow_tokenizer.cpython-38.pyc
        ├── deepspeed.cpython-38.pyc
        ├── dependency_versions_check.cpython-38.pyc
        ├── dependency_versions_table.cpython-38.pyc
        ├── dynamic_module_utils.cpython-38.pyc
        ├── feature_extraction_utils.cpython-38.pyc
        ├── file_utils.cpython-38.pyc
        ├── image_processing_utils.cpython-38.pyc
        ├── image_transforms.cpython-38.pyc
        ├── image_utils.cpython-38.pyc
        ├── modeling_outputs.cpython-38.pyc
        ├── modeling_utils.cpython-38.pyc
        ├── processing_utils.cpython-38.pyc
        ├── pytorch_utils.cpython-38.pyc
        ├── tokenization_utils.cpython-38.pyc
        ├── tokenization_utils_base.cpython-38.pyc
        └── tokenization_utils_fast.cpython-38.pyc
    ├── activations.py
    ├── activations_tf.py
    ├── audio_utils.py
    ├── benchmark
        ├── __init__.py
        ├── benchmark.py
        ├── benchmark_args.py
        ├── benchmark_args_tf.py
        ├── benchmark_args_utils.py
        ├── benchmark_tf.py
        └── benchmark_utils.py
    ├── commands
        ├── __init__.py
        ├── add_new_model.py
        ├── add_new_model_like.py
        ├── convert.py
        ├── download.py
        ├── env.py
        ├── lfs.py
        ├── pt_to_tf.py
        ├── run.py
        ├── serving.py
        ├── train.py
        ├── transformers_cli.py
        └── user.py
    ├── configuration_utils.py
    ├── convert_graph_to_onnx.py
    ├── convert_pytorch_checkpoint_to_tf2.py
    ├── convert_slow_tokenizer.py
    ├── convert_slow_tokenizers_checkpoints_to_fast.py
    ├── convert_tf_hub_seq_to_seq_bert_to_pytorch.py
    ├── data
        ├── __init__.py
        ├── data_collator.py
        ├── datasets
        │   ├── __init__.py
        │   ├── glue.py
        │   ├── language_modeling.py
        │   └── squad.py
        ├── metrics
        │   ├── __init__.py
        │   └── squad_metrics.py
        ├── processors
        │   ├── __init__.py
        │   ├── glue.py
        │   ├── squad.py
        │   ├── utils.py
        │   └── xnli.py
        └── test_generation_utils.py
    ├── debug_utils.py
    ├── deepspeed.py
    ├── dependency_versions_check.py
    ├── dependency_versions_table.py
    ├── dynamic_module_utils.py
    ├── feature_extraction_sequence_utils.py
    ├── feature_extraction_utils.py
    ├── file_utils.py
    ├── generation
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-38.pyc
        │   ├── beam_constraints.cpython-38.pyc
        │   ├── beam_search.cpython-38.pyc
        │   ├── configuration_utils.cpython-38.pyc
        │   ├── logits_process.cpython-38.pyc
        │   ├── stopping_criteria.cpython-38.pyc
        │   └── utils.cpython-38.pyc
        ├── beam_constraints.py
        ├── beam_search.py
        ├── configuration_utils.py
        ├── flax_logits_process.py
        ├── flax_utils.py
        ├── logits_process.py
        ├── stopping_criteria.py
        ├── streamers.py
        ├── tf_logits_process.py
        ├── tf_utils.py
        └── utils.py
    ├── generation_flax_utils.py
    ├── generation_tf_utils.py
    ├── generation_utils.py
    ├── hf_argparser.py
    ├── image_processing_utils.py
    ├── image_transforms.py
    ├── image_utils.py
    ├── integrations.py
    ├── keras_callbacks.py
    ├── modelcard.py
    ├── modeling_flax_outputs.py
    ├── modeling_flax_pytorch_utils.py
    ├── modeling_flax_utils.py
    ├── modeling_outputs.py
    ├── modeling_tf_outputs.py
    ├── modeling_tf_pytorch_utils.py
    ├── modeling_tf_utils.py
    ├── modeling_utils.py
    ├── models
        ├── auto
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── auto_factory.cpython-38.pyc
        │   │   ├── configuration_auto.cpython-38.pyc
        │   │   └── modeling_auto.cpython-38.pyc
        │   ├── auto_factory.py
        │   ├── configuration_auto.py
        │   ├── feature_extraction_auto.py
        │   ├── image_processing_auto.py
        │   ├── modeling_auto.py
        │   ├── modeling_flax_auto.py
        │   ├── modeling_tf_auto.py
        │   ├── processing_auto.py
        │   └── tokenization_auto.py
        ├── clip
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── tokenization_clip.cpython-38.pyc
        │   │   └── tokenization_clip_fast.cpython-38.pyc
        │   ├── configuration_clip.py
        │   ├── convert_clip_original_pytorch_to_hf.py
        │   ├── feature_extraction_clip.py
        │   ├── image_processing_clip.py
        │   ├── modeling_clip.py
        │   ├── modeling_flax_clip.py
        │   ├── modeling_tf_clip.py
        │   ├── processing_clip.py
        │   ├── tokenization_clip.py
        │   └── tokenization_clip_fast.py
        ├── clipseg
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-38.pyc
        │   │   ├── configuration_clipseg.cpython-38.pyc
        │   │   ├── modeling_clipseg.cpython-38.pyc
        │   │   └── processing_clipseg.cpython-38.pyc
        │   ├── configuration_clipseg.py
        │   ├── convert_clipseg_original_pytorch_to_hf.py
        │   ├── modeling_clipseg.py
        │   └── processing_clipseg.py
        └── vit
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-38.pyc
        │       └── image_processing_vit.cpython-38.pyc
        │   ├── configuration_vit.py
        │   ├── convert_dino_to_pytorch.py
        │   ├── convert_vit_timm_to_pytorch.py
        │   ├── feature_extraction_vit.py
        │   ├── image_processing_vit.py
        │   ├── modeling_flax_vit.py
        │   ├── modeling_tf_vit.py
        │   └── modeling_vit.py
    ├── onnx
        ├── __init__.py
        ├── __main__.py
        ├── config.py
        ├── convert.py
        ├── features.py
        └── utils.py
    ├── optimization.py
    ├── optimization_tf.py
    ├── pipelines
        ├── __init__.py
        ├── audio_classification.py
        ├── audio_utils.py
        ├── automatic_speech_recognition.py
        ├── base.py
        ├── conversational.py
        ├── depth_estimation.py
        ├── document_question_answering.py
        ├── feature_extraction.py
        ├── fill_mask.py
        ├── image_classification.py
        ├── image_segmentation.py
        ├── image_to_text.py
        ├── mask_generation.py
        ├── object_detection.py
        ├── pt_utils.py
        ├── question_answering.py
        ├── table_question_answering.py
        ├── text2text_generation.py
        ├── text_classification.py
        ├── text_generation.py
        ├── token_classification.py
        ├── video_classification.py
        ├── visual_question_answering.py
        ├── zero_shot_audio_classification.py
        ├── zero_shot_classification.py
        ├── zero_shot_image_classification.py
        └── zero_shot_object_detection.py
    ├── processing_utils.py
    ├── pytorch_utils.py
    ├── sagemaker
        ├── __init__.py
        ├── trainer_sm.py
        └── training_args_sm.py
    ├── testing_utils.py
    ├── tf_utils.py
    ├── time_series_utils.py
    ├── tokenization_utils.py
    ├── tokenization_utils_base.py
    ├── tokenization_utils_fast.py
    ├── trainer.py
    ├── trainer_callback.py
    ├── trainer_pt_utils.py
    ├── trainer_seq2seq.py
    ├── trainer_tf.py
    ├── trainer_utils.py
    ├── training_args.py
    ├── training_args_seq2seq.py
    ├── training_args_tf.py
    └── utils
        ├── __init__.py
        ├── __pycache__
            ├── __init__.cpython-38.pyc
            ├── constants.cpython-38.pyc
            ├── doc.cpython-38.pyc
            ├── dummy_flax_objects.cpython-38.pyc
            ├── dummy_keras_nlp_objects.cpython-38.pyc
            ├── dummy_sentencepiece_and_tokenizers_objects.cpython-38.pyc
            ├── dummy_speech_objects.cpython-38.pyc
            ├── dummy_tensorflow_text_objects.cpython-38.pyc
            ├── dummy_tf_objects.cpython-38.pyc
            ├── dummy_tokenizers_objects.cpython-38.pyc
            ├── generic.cpython-38.pyc
            ├── hub.cpython-38.pyc
            ├── import_utils.cpython-38.pyc
            ├── logging.cpython-38.pyc
            ├── quantization_config.cpython-38.pyc
            └── versions.cpython-38.pyc
        ├── backbone_utils.py
        ├── bitsandbytes.py
        ├── constants.py
        ├── doc.py
        ├── dummy_detectron2_objects.py
        ├── dummy_flax_objects.py
        ├── dummy_keras_nlp_objects.py
        ├── dummy_pt_objects.py
        ├── dummy_sentencepiece_and_tokenizers_objects.py
        ├── dummy_sentencepiece_objects.py
        ├── dummy_speech_objects.py
        ├── dummy_tensorflow_text_objects.py
        ├── dummy_tf_objects.py
        ├── dummy_tokenizers_objects.py
        ├── dummy_vision_objects.py
        ├── fx.py
        ├── generic.py
        ├── hp_naming.py
        ├── hub.py
        ├── import_utils.py
        ├── logging.py
        ├── model_parallel_utils.py
        ├── notebook.py
        ├── quantization_config.py
        ├── sentencepiece_model_pb2.py
        └── versions.py


/assets/ov_parts.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/assets/ov_parts.jpg


--------------------------------------------------------------------------------
/baselines/__init__.py:
--------------------------------------------------------------------------------
1 | from . import data
2 | from . import modeling
3 | from .config import add_mask_former_config
4 | 
5 | from .test_time_augmentation import SemanticSegmentorWithTTA
6 | from .mask_former_model import MaskFormer
7 | from .zero_shot_obj_part_mask_former_model import ZeroShotObjPartMaskFormer
8 | from .clipseg import CLIPSeg
9 | from .cat_seg import CATSeg


--------------------------------------------------------------------------------
/baselines/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/cat_seg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/cat_seg.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/clipseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/clipseg.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/mask_former_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/mask_former_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/test_time_augmentation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/test_time_augmentation.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/__pycache__/zero_shot_obj_part_mask_former_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/__pycache__/zero_shot_obj_part_mask_former_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_mappers import *
2 | from . import datasets
3 | from .build import (
4 |     build_detection_train_loader,
5 |     build_detection_test_loader,
6 | )
7 | 


--------------------------------------------------------------------------------
/baselines/data/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/__pycache__/build.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/__pycache__/build.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .object_part_mapper import SemanticObjPartDatasetMapper
3 | from .oracle_dataset_mapper import OracleDatasetMapper


--------------------------------------------------------------------------------
/baselines/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/dataset_mappers/__pycache__/object_part_mapper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/dataset_mappers/__pycache__/object_part_mapper.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/dataset_mappers/__pycache__/oracle_dataset_mapper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/dataset_mappers/__pycache__/oracle_dataset_mapper.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/dataset_mappers/oracle_dataset_mapper.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | from detectron2.data import detection_utils as utils
 9 | from detectron2.data import transforms as T
10 | from detectron2.data import DatasetMapper
11 | 
12 | 
13 | class OracleDatasetMapper(DatasetMapper):
14 |     def __call__(self, dataset_dict):
15 |         """
16 |         Args:
17 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
18 | 
19 |         Returns:
20 |             dict: a format that builtin models in detectron2 accept
21 |         """
22 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
23 |         # USER: Write your own image loading if it's not from a file
24 |         image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
25 |         utils.check_image_size(dataset_dict, image)
26 | 
27 |         # USER: Remove if you don't do semantic/panoptic segmentation.
28 |         if "sem_seg_file_name" in dataset_dict:
29 |             sem_seg_gt = utils.read_image(
30 |                 dataset_dict.pop("sem_seg_file_name"), "L"
31 |             ).squeeze(2)
32 |         else:
33 |             sem_seg_gt = None
34 | 
35 |         aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
36 |         transforms = self.augmentations(aug_input)
37 |         image, sem_seg_gt = aug_input.image, aug_input.sem_seg
38 | 
39 |         image_shape = image.shape[:2]  # h, w
40 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
41 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
42 |         # Therefore it's important to use torch.Tensor.
43 |         dataset_dict["image"] = torch.as_tensor(
44 |             np.ascontiguousarray(image.transpose(2, 0, 1))
45 |         )
46 |         if sem_seg_gt is not None:
47 |             dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
48 | 
49 |         # USER: Remove if you don't use pre-computed proposals.
50 |         # Most users would not need this feature.
51 |         if self.proposal_topk is not None:
52 |             utils.transform_proposals(
53 |                 dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
54 |             )
55 | 
56 |         if "annotations" in dataset_dict:
57 |             self._transform_annotations(dataset_dict, transforms, image_shape)
58 | 
59 |         return dataset_dict
60 | 


--------------------------------------------------------------------------------
/baselines/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .register_pascal_part_116 import register_pascal_part_116
3 | from .register_ade_part_234 import register_ade20k_part_234


--------------------------------------------------------------------------------
/baselines/data/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/datasets/__pycache__/coco.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/datasets/__pycache__/coco.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/datasets/__pycache__/register_ade_part_234.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/datasets/__pycache__/register_ade_part_234.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/datasets/__pycache__/register_pascal_part_116.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/datasets/__pycache__/register_pascal_part_116.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/datasets/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/datasets/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/datasets/mask_cls_collect.py:
--------------------------------------------------------------------------------
 1 | # From Repository: https://github.com/MendelXu/zsseg.baseline/blob/master/tools/mask_cls_collect.py
 2 | 
 3 | import os
 4 | import glob
 5 | import functools
 6 | from mmcv.utils import track_parallel_progress
 7 | import numpy as np
 8 | from PIL import Image
 9 | import json
10 | import warnings
11 | import fire
12 | from itertools import chain
13 | 
14 | 
15 | def count_cls(file_path, ignore_index=[255], depth=1):
16 |     cls_label = np.unique(np.asarray(Image.open(file_path))).tolist()
17 |     cls_label = [l for l in cls_label if l not in ignore_index]
18 |     return [os.path.join(*file_path.split(os.sep)[-depth:]), cls_label]
19 | 
20 | 
21 | def main(gt_dir, map_file_save_path, ignore_index=[255], ext=".png", recursive=False):
22 |     if not os.path.isdir(gt_dir):
23 |         warnings.warn(f"{gt_dir} is not a valid directory")
24 |         return
25 |     gt_file_list = glob.glob(os.path.join(gt_dir, "*" + ext), recursive=recursive)
26 |     print(f"Find {len(gt_file_list)}")
27 |     _func = functools.partial(count_cls, ignore_index=ignore_index)
28 |     results = track_parallel_progress(_func, gt_file_list, nproc=16)
29 |     results = {r[0]: r[1] for r in results}
30 |     with open(map_file_save_path, "w") as f:
31 |         json.dump(results, f)
32 | 
33 | 
34 | def main_ctyscapes(
35 |     gt_dir, map_file_save_path, ignore_index=[255], ext=".png", recursive=False
36 | ):
37 |     if not os.path.isdir(gt_dir):
38 |         warnings.warn(f"{gt_dir} is not a valid directory")
39 |         return
40 |     cities = os.listdir(gt_dir)
41 |     gt_file_list = list(
42 |         chain.from_iterable(
43 |             [
44 |                 glob.glob(
45 |                     os.path.join(gt_dir, city, "*" + ext),
46 |                 )
47 |                 for city in cities
48 |             ]
49 |         )
50 |     )
51 |     print(gt_file_list[0])
52 |     print(f"Find {len(gt_file_list)}")
53 |     _func = functools.partial(count_cls, ignore_index=ignore_index, depth=2)
54 |     results = track_parallel_progress(_func, gt_file_list, nproc=16)
55 |     results = {r[0]: r[1] for r in results}
56 |     with open(map_file_save_path, "w") as f:
57 |         json.dump(results, f)
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     fire.Fire(main)
62 | 


--------------------------------------------------------------------------------
/baselines/data/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | 
 5 | from detectron2.data.datasets.coco import load_sem_seg
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | def load_obj_part_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg", data_list=None):
10 |     data_dicts = load_sem_seg(gt_root, image_root, gt_ext, image_ext)
11 |     if data_list is not None:
12 |         img_list = json.load(open(data_list,'r'))
13 |         img_list = [item["file_name"] for item in img_list]
14 |     new_data_dicts = []
15 |     for i,data in enumerate(data_dicts):
16 |         if data_list is not None:
17 |             if data["file_name"] not in img_list:
18 |                 continue
19 |         data_dicts[i]["obj_sem_seg_file_name"] = data["sem_seg_file_name"].replace('part','obj')
20 |         new_data_dicts.append(data_dicts[i])
21 |     return new_data_dicts
22 | 
23 | 
24 | def load_binary_mask(gt_root, image_root, gt_ext="png", image_ext="jpg", label_count="_part_label_count.json", base_classes=None):
25 |     """
26 |     Flatten the results of `load_sem_seg` to annotations for binary mask.
27 | 
28 |     `label_count_file` contains a dictionary like:
29 |     ```
30 |     {
31 |         "xxx.png":[0,3,5],
32 |         "xxxx.png":[3,4,7],
33 |     }
34 |     ```
35 |     """
36 |     label_count_file = gt_root + label_count
37 |     with open(label_count_file) as f:
38 |         label_count_dict = json.load(f)
39 | 
40 |     data_dicts = load_sem_seg(gt_root, image_root, gt_ext, image_ext)
41 |     flattened_data_dicts = []
42 |     for data in data_dicts:
43 |         data['obj_sem_seg_file_name'] = data["sem_seg_file_name"].replace('_part','_obj')
44 |         category_per_image = label_count_dict[
45 |             os.path.basename(data["sem_seg_file_name"])
46 |         ]
47 |         if base_classes is not None:
48 |             category_per_image = [i for i in category_per_image if i in base_classes]
49 |         flattened_data = [
50 |             dict(**{"category_id": cat}, **data) for cat in category_per_image
51 |         ]
52 |         flattened_data_dicts.extend(flattened_data)
53 |     logger.info(
54 |         "Loaded {} images with flattened semantic segmentation from {}".format(
55 |             len(flattened_data_dicts), image_root
56 |         )
57 |     )
58 |     return flattened_data_dicts
59 | 


--------------------------------------------------------------------------------
/baselines/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from fvcore.transforms.transform import Transform, TransformList  # order them first
 3 | from fvcore.transforms.transform import *
 4 | from .transform import *
 5 | from .augmentation import *
 6 | from .augmentation_impl import *
 7 | 
 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 9 | 
10 | 
11 | from detectron2.utils.env import fixup_module_metadata
12 | 
13 | fixup_module_metadata(__name__, globals(), __all__)
14 | del fixup_module_metadata
15 | 


--------------------------------------------------------------------------------
/baselines/data/transforms/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/transforms/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/transforms/__pycache__/augmentation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/transforms/__pycache__/augmentation.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/transforms/__pycache__/augmentation_impl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/transforms/__pycache__/augmentation_impl.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/data/transforms/__pycache__/transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/data/transforms/__pycache__/transform.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_sem_seg_evaluation import GeneralizedSemSegEvaluator
2 | 


--------------------------------------------------------------------------------
/baselines/evaluation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/evaluation/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/evaluation/__pycache__/classification_evaluation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/evaluation/__pycache__/classification_evaluation.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/evaluation/__pycache__/generalized_sem_seg_evaluation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/evaluation/__pycache__/generalized_sem_seg_evaluation.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/evaluation/__pycache__/pseudo_sem_seg_evaluation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/evaluation/__pycache__/pseudo_sem_seg_evaluation.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .heads.mask_former_head import MaskFormerHead
3 | from .heads.zero_shot_obj_part_mask_former_head import ZeroShotObjPartMaskFormerHead
4 | from .heads.cat_seg_head import CATSegHead


--------------------------------------------------------------------------------
/baselines/modeling/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/__pycache__/criterion.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/__pycache__/criterion.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/__pycache__/matcher.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/__pycache__/matcher.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/baselines/modeling/backbone/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/backbone/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/backbone/__pycache__/clip_resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/backbone/__pycache__/clip_resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/backbone/__pycache__/swin.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/backbone/__pycache__/swin.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/clip_adapter/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/clip_adapter/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/clip_adapter/__pycache__/adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/clip_adapter/__pycache__/adapter.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/clip_adapter/__pycache__/text_prompt.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/clip_adapter/__pycache__/text_prompt.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/clip_adapter/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/clip_adapter/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/cat_seg_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/cat_seg_head.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/mask_former_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/mask_former_head.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/pixel_decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/pixel_decoder.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/zero_shot_mask_former_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/zero_shot_mask_former_head.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/__pycache__/zero_shot_obj_part_mask_former_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/heads/__pycache__/zero_shot_obj_part_mask_former_head.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/heads/cat_seg_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | from copy import deepcopy
 4 | from typing import Callable, Dict, List, Optional, Tuple, Union
 5 | from einops import rearrange
 6 | 
 7 | import fvcore.nn.weight_init as weight_init
 8 | from torch import nn
 9 | from torch.nn import functional as F
10 | 
11 | from detectron2.config import configurable
12 | from detectron2.layers import Conv2d, ShapeSpec, get_norm
13 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
14 | 
15 | from ..transformer.cat_seg_predictor import CATSegPredictor
16 | 
17 | 
18 | @SEM_SEG_HEADS_REGISTRY.register()
19 | class CATSegHead(nn.Module):
20 | 
21 |     @configurable
22 |     def __init__(
23 |         self,
24 |         input_shape: Dict[str, ShapeSpec],
25 |         *,
26 |         num_classes: int,
27 |         ignore_value: int = -1,
28 |         # extra parameters
29 |         feature_resolution: list,
30 |         transformer_predictor: nn.Module,
31 |     ):
32 |         """
33 |         NOTE: this interface is experimental.
34 |         Args:
35 |             input_shape: shapes (channels and stride) of the input features
36 |             num_classes: number of classes to predict
37 |             pixel_decoder: the pixel decoder module
38 |             loss_weight: loss weight
39 |             ignore_value: category id to be ignored during training.
40 |             transformer_predictor: the transformer decoder that makes prediction
41 |             transformer_in_feature: input feature name to the transformer_predictor
42 |         """
43 |         super().__init__()
44 |         input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)
45 |         self.in_features = [k for k, v in input_shape]
46 |         self.ignore_value = ignore_value
47 |         self.predictor = transformer_predictor
48 |         self.num_classes = num_classes
49 |         self.feature_resolution = feature_resolution
50 | 
51 |     @classmethod
52 |     def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
53 |         return {
54 |             "input_shape": {
55 |                 k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
56 |             },
57 |             "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
58 |             "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
59 |             "feature_resolution": cfg.MODEL.SEM_SEG_HEAD.FEATURE_RESOLUTION,
60 |             "transformer_predictor": CATSegPredictor(
61 |                 cfg,
62 |             ),
63 |         }
64 | 
65 |     def forward(self, features, guidance_features, test_text=None):
66 |         """
67 |         Arguments:
68 |             img_feats: (B, C, HW)
69 |             affinity_features: (B, C, )
70 |         """
71 |         img_feat = rearrange(features[:, 1:, :], "b (h w) c->b c h w", h=self.feature_resolution[0], w=self.feature_resolution[1])
72 |         return self.predictor(img_feat, guidance_features, test_text)


--------------------------------------------------------------------------------
/baselines/modeling/heads/zero_shot_obj_part_mask_former_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import logging
 3 | from copy import deepcopy
 4 | from typing import Callable, Dict, List, Optional, Tuple, Union
 5 | 
 6 | import fvcore.nn.weight_init as weight_init
 7 | from torch import nn
 8 | from torch.nn import functional as F
 9 | 
10 | from detectron2.config import configurable
11 | from detectron2.layers import Conv2d, ShapeSpec, get_norm
12 | from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
13 | 
14 | from ..transformer.zero_shot_obj_part_transformer_predictor import ZeroShotTransformerObjPartPredictor
15 | from .pixel_decoder import build_pixel_decoder
16 | from .zero_shot_mask_former_head import ZeroShotMaskFormerHead
17 | 
18 | @SEM_SEG_HEADS_REGISTRY.register()
19 | class ZeroShotObjPartMaskFormerHead(ZeroShotMaskFormerHead):
20 |     @configurable
21 |     def __init__(
22 |         self,
23 |         input_shape: Dict[str, ShapeSpec],
24 |         *,
25 |         num_classes: int,
26 |         pixel_decoder: nn.Module,
27 |         loss_weight: float = 1.0,
28 |         ignore_value: int = -1,
29 |         # extra parameters
30 |         transformer_predictor: nn.Module,
31 |         transformer_in_feature: str,
32 |     ):
33 |         super().__init__(
34 |             input_shape=input_shape,
35 |             num_classes=num_classes,
36 |             pixel_decoder=pixel_decoder,
37 |             loss_weight=loss_weight,
38 |             ignore_value=ignore_value,
39 |             transformer_predictor=transformer_predictor,
40 |             transformer_in_feature=transformer_in_feature
41 |         )
42 |     @classmethod
43 |     def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
44 |         return {
45 |             "input_shape": {
46 |                 k: v
47 |                 for k, v in input_shape.items()
48 |                 if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
49 |             },
50 |             "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
51 |             "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
52 |             "pixel_decoder": build_pixel_decoder(cfg, input_shape),
53 |             "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT,
54 |             "transformer_in_feature": cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE,
55 |             "transformer_predictor": ZeroShotTransformerObjPartPredictor(
56 |                 cfg,
57 |                 cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
58 |                 if cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE == "transformer_encoder"
59 |                 else input_shape[cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE].channels,
60 |                 mask_classification=True,
61 |             ),
62 |         }
63 |     
64 |     def forward(self, features, obj_masks):
65 |         return self.layers(features, obj_masks)
66 |     
67 |     def layers(self, features, obj_masks):
68 |         (
69 |             mask_features,
70 |             transformer_encoder_features,
71 |         ) = self.pixel_decoder.forward_features(features)
72 |         if self.transformer_in_feature == "transformer_encoder":
73 |             assert (
74 |                 transformer_encoder_features is not None
75 |             ), "Please use the TransformerEncoderPixelDecoder."
76 |             predictions = self.predictor(transformer_encoder_features, mask_features, obj_masks)
77 |         else:
78 |             predictions = self.predictor(
79 |                 features[self.transformer_in_feature], mask_features, obj_masks
80 |             )
81 |         return predictions


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/cat_seg_predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/cat_seg_predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/position_encoding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/position_encoding.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/transformer_predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/transformer_predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/zero_shot_obj_part_transformer_predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/zero_shot_obj_part_transformer_predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/__pycache__/zero_shot_transformer_predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/modeling/transformer/__pycache__/zero_shot_transformer_predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/modeling/transformer/position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
 3 | """
 4 | Various positional encodings for the transformer.
 5 | """
 6 | import math
 7 | 
 8 | import torch
 9 | from torch import nn
10 | 
11 | 
12 | class PositionEmbeddingSine(nn.Module):
13 |     """
14 |     This is a more standard version of the position embedding, very similar to the one
15 |     used by the Attention is all you need paper, generalized to work on images.
16 |     """
17 | 
18 |     def __init__(
19 |         self, num_pos_feats=64, temperature=10000, normalize=False, scale=None
20 |     ):
21 |         super().__init__()
22 |         self.num_pos_feats = num_pos_feats
23 |         self.temperature = temperature
24 |         self.normalize = normalize
25 |         if scale is not None and normalize is False:
26 |             raise ValueError("normalize should be True if scale is passed")
27 |         if scale is None:
28 |             scale = 2 * math.pi
29 |         self.scale = scale
30 | 
31 |     def forward(self, x, mask=None):
32 |         if mask is None:
33 |             mask = torch.zeros(
34 |                 (x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool
35 |             )
36 |         not_mask = ~mask
37 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
38 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
39 |         if self.normalize:
40 |             eps = 1e-6
41 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
42 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
43 | 
44 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
45 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
46 | 
47 |         pos_x = x_embed[:, :, :, None] / dim_t
48 |         pos_y = y_embed[:, :, :, None] / dim_t
49 |         pos_x = torch.stack(
50 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
51 |         ).flatten(3)
52 |         pos_y = torch.stack(
53 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
54 |         ).flatten(3)
55 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
56 |         return pos
57 | 


--------------------------------------------------------------------------------
/baselines/modeling/transformer/zero_shot_transformer_predictor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/detr.py
 3 | from torch import nn
 4 | from detectron2.config import configurable
 5 | from .transformer_predictor import TransformerPredictor, MLP
 6 | 
 7 | 
 8 | class ZeroShotTransformerPredictor(TransformerPredictor):
 9 |     @configurable
10 |     def __init__(
11 |         self,
12 |         in_channels,
13 |         mask_classification=True,
14 |         *,
15 |         embedding_dim: int,
16 |         embed_hidden_dim: int,
17 |         embed_layers: int,
18 |         hidden_dim: int,
19 |         num_queries: int,
20 |         nheads: int,
21 |         dropout: float,
22 |         dim_feedforward: int,
23 |         enc_layers: int,
24 |         dec_layers: int,
25 |         pre_norm: bool,
26 |         deep_supervision: bool,
27 |         mask_dim: int,
28 |         enforce_input_project: bool,
29 |     ):
30 |         super().__init__(
31 |             in_channels,
32 |             False,
33 |             num_classes=embedding_dim,
34 |             hidden_dim=hidden_dim,
35 |             num_queries=num_queries,
36 |             nheads=nheads,
37 |             dropout=dropout,
38 |             dim_feedforward=dim_feedforward,
39 |             enc_layers=enc_layers,
40 |             dec_layers=dec_layers,
41 |             pre_norm=pre_norm,
42 |             deep_supervision=deep_supervision,
43 |             mask_dim=mask_dim,
44 |             enforce_input_project=enforce_input_project,
45 |         )
46 |         self.mask_classification = mask_classification
47 |         # output FFNs
48 |         if self.mask_classification:
49 |             self.class_embed = MLP(
50 |                 hidden_dim, embed_hidden_dim, embedding_dim, embed_layers
51 |             )
52 | 
53 |     def freeze_pretrained(self):
54 |         for name, module in self.named_children():
55 |             if name not in ["class_embed"]:
56 |                 for param in module.parameters():
57 |                     param.requires_grad = False
58 | 
59 |     @classmethod
60 |     def from_config(cls, cfg, in_channels, mask_classification):
61 |         ret = {}
62 |         ret["in_channels"] = in_channels
63 |         ret["mask_classification"] = mask_classification
64 | 
65 |         ret["embedding_dim"] = cfg.MODEL.SEM_SEG_HEAD.EMBEDDING_DIM
66 |         ret["embed_hidden_dim"] = cfg.MODEL.SEM_SEG_HEAD.EMBED_HIDDEN_DIM
67 |         ret["embed_layers"] = cfg.MODEL.SEM_SEG_HEAD.EMBED_LAYERS
68 |         ret["hidden_dim"] = cfg.MODEL.MASK_FORMER.HIDDEN_DIM
69 |         ret["num_queries"] = cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES
70 |         # Transformer parameters:
71 |         ret["nheads"] = cfg.MODEL.MASK_FORMER.NHEADS
72 |         ret["dropout"] = cfg.MODEL.MASK_FORMER.DROPOUT
73 |         ret["dim_feedforward"] = cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD
74 |         ret["enc_layers"] = cfg.MODEL.MASK_FORMER.ENC_LAYERS
75 |         ret["dec_layers"] = cfg.MODEL.MASK_FORMER.DEC_LAYERS
76 |         ret["pre_norm"] = cfg.MODEL.MASK_FORMER.PRE_NORM
77 |         ret["deep_supervision"] = cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION
78 |         ret["enforce_input_project"] = cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ
79 | 
80 |         ret["mask_dim"] = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM
81 | 
82 |         return ret
83 | 


--------------------------------------------------------------------------------
/baselines/third_party/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__init__.py


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/clip.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/clip.cpython-37.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/clip.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/clip.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/imagenet_templates.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/imagenet_templates.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/model_vpt.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/model_vpt.cpython-37.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/model_vpt.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/model_vpt.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/simple_tokenizer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/simple_tokenizer.cpython-37.pyc


--------------------------------------------------------------------------------
/baselines/third_party/__pycache__/simple_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/__pycache__/simple_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/third_party/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/third_party/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/baselines/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # from .events import setup_wandb, WandbWriter
3 | 


--------------------------------------------------------------------------------
/baselines/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/utils/__pycache__/events.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/utils/__pycache__/events.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/utils/__pycache__/post_process_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/baselines/utils/__pycache__/post_process_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/baselines/utils/post_process_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | import numpy as np
 4 | 
 5 | try:
 6 |     import pydensecrf.densecrf as dcrf
 7 |     from pydensecrf.utils import (
 8 |         unary_from_softmax,
 9 |         unary_from_labels,
10 |         create_pairwise_bilateral,
11 |         create_pairwise_gaussian,
12 |     )
13 | except:
14 |     dcrf = None
15 | 
16 | 
17 | def dense_crf_post_process(
18 |     logits,
19 |     image,
20 |     n_labels=None,
21 |     max_iters=5,
22 |     pos_xy_std=(3, 3),
23 |     pos_w=3,
24 |     bi_xy_std=(80, 80),
25 |     bi_rgb_std=(13, 13, 13),
26 |     bi_w=10,
27 | ):
28 |     """
29 |     logits : [C,H,W]
30 |     image : [3,H,W]
31 |     """
32 |     if dcrf is None:
33 |         raise FileNotFoundError(
34 |             "pydensecrf is required to perform dense crf inference."
35 |         )
36 |     if isinstance(logits, torch.Tensor):
37 |         logits = F.softmax(logits, dim=0).detach().cpu().numpy()
38 |         U = unary_from_softmax(logits)
39 |         n_labels = logits.shape[0]
40 |     elif logits.ndim == 3:
41 |         U = unary_from_softmax(logits)
42 |         n_labels = logits.shape[0]
43 |     else:
44 |         assert n_labels is not None
45 |         U = unary_from_labels(logits, n_labels, zero_unsure=False)
46 | 
47 |     d = dcrf.DenseCRF2D(image.shape[1], image.shape[0], n_labels)
48 | 
49 |     d.setUnaryEnergy(U)
50 | 
51 |     # This adds the color-independent term, features are the locations only.
52 |     d.addPairwiseGaussian(
53 |         sxy=pos_xy_std,
54 |         compat=pos_w,
55 |         kernel=dcrf.DIAG_KERNEL,
56 |         normalization=dcrf.NORMALIZE_SYMMETRIC,
57 |     )
58 | 
59 |     # This adds the color-dependent term, i.e. features are (x,y,r,g,b).
60 |     d.addPairwiseBilateral(
61 |         sxy=bi_xy_std,
62 |         srgb=bi_rgb_std,
63 |         rgbim=image,
64 |         compat=bi_w,
65 |         kernel=dcrf.DIAG_KERNEL,
66 |         normalization=dcrf.NORMALIZE_SYMMETRIC,
67 |     )
68 |     # Run five inference steps.
69 |     logits = d.inference(max_iters)
70 |     logits = np.asarray(logits).reshape((n_labels, image.shape[0], image.shape[1]))
71 |     return torch.from_numpy(logits)
72 | 


--------------------------------------------------------------------------------
/configs/Base-VOC11K-20.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic" # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1] # not used
16 | DATASETS:
17 |   TRAIN: ("voc_sem_seg_train",)
18 |   TEST: ("voc_sem_seg_test",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   MAX_ITER: 20000
23 |   WARMUP_FACTOR: 1.0
24 |   WARMUP_ITERS: 0
25 |   WEIGHT_DECAY: 0.0001
26 |   OPTIMIZER: "ADAMW"
27 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 | INPUT:
35 |   MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 16)]"]
36 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
37 |   MIN_SIZE_TEST: 512
38 |   MAX_SIZE_TRAIN: 2048
39 |   MAX_SIZE_TEST: 2048
40 |   CROP:
41 |     ENABLED: True
42 |     TYPE: "absolute"
43 |     SIZE: (512, 512)
44 |     SINGLE_CATEGORY_MAX_AREA: 1.0
45 |   COLOR_AUG_SSD: True
46 |   SIZE_DIVISIBILITY: 512  # used in dataset mapper
47 |   FORMAT: "RGB"
48 |   DATASET_MAPPER_NAME: "mask_former_semantic"
49 | TEST:
50 |   EVAL_PERIOD: 5000
51 |   AUG:
52 |     ENABLED: False
53 |     MIN_SIZES: [256, 384, 512, 640, 768, 896]
54 |     MAX_SIZE: 3584
55 |     FLIP: True
56 | DATALOADER:
57 |   FILTER_EMPTY_ANNOTATIONS: True
58 |   NUM_WORKERS: 4
59 | VERSION: 2
60 | 


--------------------------------------------------------------------------------
/configs/base_catseg_config.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CATSeg"
 3 |   BACKBONE:
 4 |     FREEZE_AT: 0
 5 |     NAME: "D2SwinTransformer"
 6 |   SWIN:
 7 |     EMBED_DIM: 192
 8 |     DEPTHS: [2, 2, 18, 2]
 9 |     NUM_HEADS: [4, 8, 16, 32]
10 |     WINDOW_SIZE: 12
11 |     APE: False
12 |     DROP_PATH_RATE: 0.3
13 |     PATCH_NORM: True
14 |     PRETRAIN_IMG_SIZE: 384
15 |     OUT_FEATURES: ["res2", "res3", "res4"]
16 |   WEIGHTS: "swin_large_patch4_window12_384_22k.pkl"
17 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
18 |   PIXEL_STD: [58.395, 57.120, 57.375]
19 |   SEM_SEG_HEAD:
20 |     NAME: "OpenVocabHead"
21 |     IN_FEATURES: ["res2", "res3", "res4"]
22 |     IGNORE_VALUE: 255
23 |     NUM_CLASSES: 171
24 |     CLIP_PRETRAINED: "ViT-L/14@336px"
25 |     PROMPT_DEPTH: 0
26 |     PROMPT_LENGTH: 0
27 |     TEXT_GUIDANCE_DIM: 768
28 |     TEXT_GUIDANCE_PROJ_DIM: 128
29 |     APPEARANCE_GUIDANCE_DIM: 768
30 |     APPEARANCE_GUIDANCE_PROJ_DIM: 128
31 |     DECODER_DIMS: [64, 32]
32 |     DECODER_GUIDANCE_DIMS: [256, 128]
33 |     DECODER_GUIDANCE_PROJ_DIMS: [32, 16]
34 |     NUM_LAYERS: 4
35 |     NUM_HEADS: 4
36 |     HIDDEN_DIMS: 128
37 |     POOLING_SIZES: [6, 6]
38 |     FEATURE_RESOLUTION: [24, 24]
39 |     WINDOW_SIZES: 12
40 |     ATTENTION_TYPE: "linear"
41 |     CLIP_FINETUNE: "attention"
42 |   PROMPT_ENSEMBLE_TYPE: "imagenet"
43 | DATASETS:
44 |   TRAIN: ("coco_2017_train_stuff_all_sem_seg",)
45 |   TEST: ("coco_2017_test_stuff_all_sem_seg",)
46 | SOLVER:
47 |   IMS_PER_BATCH: 4
48 |   BASE_LR: 0.0002
49 |   MAX_ITER: 80000
50 |   WARMUP_FACTOR: 1.0
51 |   WARMUP_ITERS: 0
52 |   WEIGHT_DECAY: 0.0001
53 |   OPTIMIZER: "ADAMW"
54 |   LR_SCHEDULER_NAME: "WarmupCosineLR"
55 |   BACKBONE_MULTIPLIER: 0.01
56 |   CLIP_MULTIPLIER: 0.01
57 |   CLIP_GRADIENTS:
58 |     ENABLED: True
59 |     CLIP_TYPE: "full_model"
60 |     CLIP_VALUE: 0.01
61 |     NORM_TYPE: 2.0
62 | INPUT:
63 |   MIN_SIZE_TRAIN: (384, )
64 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
65 |   MIN_SIZE_TEST: 640
66 |   MAX_SIZE_TEST: 2560
67 |   CROP:
68 |     ENABLED: True
69 |     TYPE: "absolute"
70 |     SIZE: (384, 384)
71 |     SINGLE_CATEGORY_MAX_AREA: 1.0
72 |   COLOR_AUG_SSD: True
73 |   SIZE_DIVISIBILITY: 384
74 |   FORMAT: "RGB"
75 |   DATASET_MAPPER_NAME: "mask_former_semantic"
76 | TEST:
77 |   EVAL_PERIOD: 5000
78 |   SLIDING_WINDOW: False
79 | DATALOADER:
80 |   FILTER_EMPTY_ANNOTATIONS: True
81 |   NUM_WORKERS: 8
82 | VERSION: 2
83 | CUDNN_BENCHMARK: True
84 | 


--------------------------------------------------------------------------------
/configs/cross_dataset/clipseg_ade.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | MODEL:
 3 |   META_ARCHITECTURE: "CLIPSeg"
 4 | INPUT:
 5 |   DATASET_MAPPER_NAME: "obj_part_semantic"
 6 | DATASETS:
 7 |   TRAIN: ("ade_obj_part_sem_seg_train",)
 8 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
 9 | DATALOADER:
10 |   FILTER_EMPTY_ANNOTATIONS: True
11 |   NUM_WORKERS: 8
12 | SOLVER:
13 |   IMS_PER_BATCH: 8
14 |   BASE_LR: 0.0001
15 |   MAX_ITER: 20000
16 |   WARMUP_FACTOR: 1.0
17 |   WARMUP_ITERS: 0
18 |   WEIGHT_DECAY: 0.0001
19 |   OPTIMIZER: "ADAMW"
20 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
21 |   CLIP_GRADIENTS:
22 |     ENABLED: True
23 |     CLIP_TYPE: "full_model"
24 |     CLIP_VALUE: 0.01
25 |     NORM_TYPE: 2.0
26 | TEST:
27 |   EVAL_PERIOD: 5000
28 | 


--------------------------------------------------------------------------------
/configs/few_shot/catseg_ade.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | _BASE_: base_catseg_config.yaml
 3 | MODEL:
 4 |   META_ARCHITECTURE: "CATSeg"
 5 |   BACKBONE:
 6 |     FREEZE_AT: 0
 7 |     NAME: "build_resnet_backbone"
 8 |   WEIGHTS: "pretrain_weights/model_final_base.pth"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     STEM_TYPE: "basic" 
12 |     STEM_OUT_CHANNELS: 64
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4"]
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     NAME: "CATSegHead"
19 |     IN_FEATURES: ["res2", "res3", "res4"]
20 |     IGNORE_VALUE: 255
21 |     NUM_CLASSES: 74
22 |     BG_ON: True
23 |     CLIP_PRETRAINED: "ViT-B/16"
24 |     PROMPT_DEPTH: 0
25 |     PROMPT_LENGTH: 0
26 |     TEXT_GUIDANCE_DIM: 512
27 |     TEXT_GUIDANCE_PROJ_DIM: 128
28 |     APPEARANCE_GUIDANCE_DIM: 1024
29 |     APPEARANCE_GUIDANCE_PROJ_DIM: 128
30 |     DECODER_DIMS: [64, 32]
31 |     DECODER_GUIDANCE_DIMS: [512, 256]
32 |     DECODER_GUIDANCE_PROJ_DIMS: [32, 16]
33 |     NUM_LAYERS: 2
34 |     NUM_HEADS: 4
35 |     HIDDEN_DIMS: 128
36 |     POOLING_SIZES: [2, 2]
37 |     FEATURE_RESOLUTION: [24, 24]
38 |     WINDOW_SIZES: 12
39 |     ATTENTION_TYPE: "linear"
40 |     CLIP_FINETUNE: ""
41 |   PROMPT_ENSEMBLE_TYPE: "imagenet"
42 | INPUT:
43 |   DATASET_MAPPER_NAME: "obj_part_semantic"
44 |   MAX_SIZE_TRAIN: 768
45 |   MAX_SIZE_TEST: 768
46 | DATASETS:
47 |   TRAIN: ("ade_obj_part_sem_seg_train_few_shot",)
48 |   TEST: ("ade_obj_part_sem_seg_val_obj_few_shot",)
49 | DATALOADER:
50 |   FILTER_EMPTY_ANNOTATIONS: True
51 |   NUM_WORKERS: 4
52 | SOLVER:
53 |   BACKBONE_MULTIPLIER: 0.01
54 |   BASE_LR: 0.0002
55 |   IMS_PER_BATCH: 8
56 |   MAX_ITER: 80000
57 | TEST:
58 |   EVAL_PERIOD: 5000
59 | 


--------------------------------------------------------------------------------
/configs/few_shot/catseg_voc.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | _BASE_: base_catseg_config.yaml
 3 | MODEL:
 4 |   META_ARCHITECTURE: "CATSeg"
 5 |   BACKBONE:
 6 |     FREEZE_AT: 0
 7 |     NAME: "build_resnet_backbone"
 8 |   WEIGHTS: "pretrain_weights/model_final_base.pth"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     STEM_TYPE: "basic" 
12 |     STEM_OUT_CHANNELS: 64
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4"]
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     NAME: "CATSegHead"
19 |     IN_FEATURES: ["res2", "res3", "res4"]
20 |     IGNORE_VALUE: 255
21 |     NUM_CLASSES: 74
22 |     BG_ON: True
23 |     CLIP_PRETRAINED: "ViT-B/16"
24 |     PROMPT_DEPTH: 0
25 |     PROMPT_LENGTH: 0
26 |     TEXT_GUIDANCE_DIM: 512
27 |     TEXT_GUIDANCE_PROJ_DIM: 128
28 |     APPEARANCE_GUIDANCE_DIM: 1024
29 |     APPEARANCE_GUIDANCE_PROJ_DIM: 128
30 |     DECODER_DIMS: [64, 32]
31 |     DECODER_GUIDANCE_DIMS: [512, 256]
32 |     DECODER_GUIDANCE_PROJ_DIMS: [32, 16]
33 |     NUM_LAYERS: 2
34 |     NUM_HEADS: 4
35 |     HIDDEN_DIMS: 128
36 |     POOLING_SIZES: [2, 2]
37 |     FEATURE_RESOLUTION: [24, 24]
38 |     WINDOW_SIZES: 12
39 |     ATTENTION_TYPE: "linear"
40 |     CLIP_FINETUNE: ""
41 |   PROMPT_ENSEMBLE_TYPE: "imagenet"
42 | INPUT:
43 |   DATASET_MAPPER_NAME: "obj_part_semantic"
44 |   MAX_SIZE_TRAIN: 768
45 |   MAX_SIZE_TEST: 768
46 | DATASETS:
47 |   TRAIN: ("voc_obj_part_sem_seg_train_few_shot",)
48 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
49 | DATALOADER:
50 |   FILTER_EMPTY_ANNOTATIONS: True
51 |   NUM_WORKERS: 4
52 | SOLVER:
53 |   BACKBONE_MULTIPLIER: 0.01
54 |   BASE_LR: 0.0002
55 |   IMS_PER_BATCH: 8
56 |   MAX_ITER: 80000
57 | TEST:
58 |   EVAL_PERIOD: 5000
59 | 


--------------------------------------------------------------------------------
/configs/few_shot/clipseg_ade.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | MODEL:
 3 |   META_ARCHITECTURE: "CLIPSeg"
 4 | INPUT:
 5 |   DATASET_MAPPER_NAME: "obj_part_semantic"
 6 | DATASETS:
 7 |   TRAIN: ("ade_obj_part_sem_seg_train_few_shot",)
 8 |   TEST: ("ade_obj_part_sem_seg_val_few_shot",)
 9 | DATALOADER:
10 |   FILTER_EMPTY_ANNOTATIONS: True
11 |   NUM_WORKERS: 8
12 | SOLVER:
13 |   IMS_PER_BATCH: 8
14 |   BASE_LR: 0.0001
15 |   MAX_ITER: 20000
16 |   WARMUP_FACTOR: 1.0
17 |   WARMUP_ITERS: 0
18 |   WEIGHT_DECAY: 0.0001
19 |   OPTIMIZER: "ADAMW"
20 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
21 |   CLIP_GRADIENTS:
22 |     ENABLED: True
23 |     CLIP_TYPE: "full_model"
24 |     CLIP_VALUE: 0.01
25 |     NORM_TYPE: 2.0
26 | TEST:
27 |   EVAL_PERIOD: 5000
28 | 


--------------------------------------------------------------------------------
/configs/few_shot/clipseg_voc.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | MODEL:
 3 |   META_ARCHITECTURE: "CLIPSeg"
 4 | INPUT:
 5 |   DATASET_MAPPER_NAME: "obj_part_semantic"
 6 | DATASETS:
 7 |   TRAIN: ("voc_obj_part_sem_seg_train_few_shot",)
 8 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
 9 | DATALOADER:
10 |   FILTER_EMPTY_ANNOTATIONS: True
11 |   NUM_WORKERS: 8
12 | SOLVER:
13 |   IMS_PER_BATCH: 8
14 |   BASE_LR: 0.0001
15 |   MAX_ITER: 20000
16 |   WARMUP_FACTOR: 1.0
17 |   WARMUP_ITERS: 0
18 |   WEIGHT_DECAY: 0.0001
19 |   OPTIMIZER: "ADAMW"
20 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
21 |   CLIP_GRADIENTS:
22 |     ENABLED: True
23 |     CLIP_TYPE: "full_model"
24 |     CLIP_VALUE: 0.01
25 |     NORM_TYPE: 2.0
26 | TEST:
27 |   EVAL_PERIOD: 5000
28 | 


--------------------------------------------------------------------------------
/configs/maskformer_R50_bs16_20k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-VOC11K-20.yaml
 2 | MODEL:
 3 |   META_ARCHITECTURE: "MaskFormer"
 4 |   SEM_SEG_HEAD:
 5 |     NAME: "MaskFormerHead"
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 20
 9 |     COMMON_STRIDE: 4 # not used, hard-coded
10 |     LOSS_WEIGHT: 1.0
11 |     CONVS_DIM: 256
12 |     MASK_DIM: 256
13 |     NORM: "GN"
14 |   MASK_FORMER:
15 |     TRANSFORMER_IN_FEATURE: "res5"
16 |     DEEP_SUPERVISION: True
17 |     NO_OBJECT_WEIGHT: 0.1
18 |     DICE_WEIGHT: 1.0
19 |     MASK_WEIGHT: 20.0
20 |     HIDDEN_DIM: 256
21 |     NUM_OBJECT_QUERIES: 100
22 |     NHEADS: 8
23 |     DROPOUT: 0.1
24 |     DIM_FEEDFORWARD: 2048
25 |     ENC_LAYERS: 0
26 |     DEC_LAYERS: 6
27 |     PRE_NORM: False


--------------------------------------------------------------------------------
/configs/zero_shot/catseg_ade.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | _BASE_: ../base_catseg_config.yaml
 3 | MODEL:
 4 |   META_ARCHITECTURE: "CATSeg"
 5 |   BACKBONE:
 6 |     FREEZE_AT: 0
 7 |     NAME: "build_resnet_backbone"
 8 |   WEIGHTS: "pretrain_weights/model_final_base.pth"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     STEM_TYPE: "basic" 
12 |     STEM_OUT_CHANNELS: 64
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4"]
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     NAME: "CATSegHead"
19 |     IN_FEATURES: ["res2", "res3", "res4"]
20 |     IGNORE_VALUE: 255
21 |     NUM_CLASSES: 74
22 |     BG_ON: True
23 |     CLIP_PRETRAINED: "ViT-B/16"
24 |     PROMPT_DEPTH: 0
25 |     PROMPT_LENGTH: 0
26 |     TEXT_GUIDANCE_DIM: 512
27 |     TEXT_GUIDANCE_PROJ_DIM: 128
28 |     APPEARANCE_GUIDANCE_DIM: 1024
29 |     APPEARANCE_GUIDANCE_PROJ_DIM: 128
30 |     DECODER_DIMS: [64, 32]
31 |     DECODER_GUIDANCE_DIMS: [512, 256]
32 |     DECODER_GUIDANCE_PROJ_DIMS: [32, 16]
33 |     NUM_LAYERS: 2
34 |     NUM_HEADS: 4
35 |     HIDDEN_DIMS: 128
36 |     POOLING_SIZES: [2, 2]
37 |     FEATURE_RESOLUTION: [24, 24]
38 |     WINDOW_SIZES: 12
39 |     ATTENTION_TYPE: "linear"
40 |     CLIP_FINETUNE: ""
41 |   PROMPT_ENSEMBLE_TYPE: "imagenet"
42 | INPUT:
43 |   DATASET_MAPPER_NAME: "obj_part_semantic"
44 |   MAX_SIZE_TRAIN: 768
45 |   MAX_SIZE_TEST: 768
46 | DATASETS:
47 |   TRAIN: ("ade_obj_part_sem_seg_train",)
48 |   TEST: ("ade_obj_part_sem_seg_val_obj_condition",)
49 | DATALOADER:
50 |   FILTER_EMPTY_ANNOTATIONS: True
51 |   NUM_WORKERS: 4
52 | SOLVER:
53 |   BACKBONE_MULTIPLIER: 0.01
54 |   BASE_LR: 0.0002
55 |   IMS_PER_BATCH: 8
56 |   MAX_ITER: 80000
57 | TEST:
58 |   EVAL_PERIOD: 5000
59 |   SLIDING_WINDOW: False
60 | 


--------------------------------------------------------------------------------
/configs/zero_shot/catseg_voc.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | _BASE_: ../base_catseg_config.yaml
 3 | MODEL:
 4 |   META_ARCHITECTURE: "CATSeg"
 5 |   BACKBONE:
 6 |     FREEZE_AT: 0
 7 |     NAME: "build_resnet_backbone"
 8 |   WEIGHTS: "pretrain_weights/model_final_base.pth"
 9 |   RESNETS:
10 |     DEPTH: 101
11 |     STEM_TYPE: "basic" 
12 |     STEM_OUT_CHANNELS: 64
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4"]
15 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
16 |   PIXEL_STD: [58.395, 57.120, 57.375]
17 |   SEM_SEG_HEAD:
18 |     NAME: "CATSegHead"
19 |     IN_FEATURES: ["res2", "res3", "res4"]
20 |     IGNORE_VALUE: 255
21 |     NUM_CLASSES: 74
22 |     BG_ON: True
23 |     CLIP_PRETRAINED: "ViT-B/16"
24 |     PROMPT_DEPTH: 0
25 |     PROMPT_LENGTH: 0
26 |     TEXT_GUIDANCE_DIM: 512
27 |     TEXT_GUIDANCE_PROJ_DIM: 128
28 |     APPEARANCE_GUIDANCE_DIM: 1024
29 |     APPEARANCE_GUIDANCE_PROJ_DIM: 128
30 |     DECODER_DIMS: [64, 32]
31 |     DECODER_GUIDANCE_DIMS: [512, 256]
32 |     DECODER_GUIDANCE_PROJ_DIMS: [32, 16]
33 |     NUM_LAYERS: 2
34 |     NUM_HEADS: 4
35 |     HIDDEN_DIMS: 128
36 |     POOLING_SIZES: [2, 2]
37 |     FEATURE_RESOLUTION: [24, 24]
38 |     WINDOW_SIZES: 12
39 |     ATTENTION_TYPE: "linear"
40 |     CLIP_FINETUNE: ""
41 |   PROMPT_ENSEMBLE_TYPE: "imagenet"
42 | INPUT:
43 |   DATASET_MAPPER_NAME: "obj_part_semantic"
44 |   MAX_SIZE_TRAIN: 768
45 |   MAX_SIZE_TEST: 768
46 | DATASETS:
47 |   TRAIN: ("voc_obj_part_sem_seg_train",)
48 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
49 | DATALOADER:
50 |   FILTER_EMPTY_ANNOTATIONS: True
51 |   NUM_WORKERS: 4
52 | SOLVER:
53 |   BACKBONE_MULTIPLIER: 0.01
54 |   BASE_LR: 0.0002
55 |   IMS_PER_BATCH: 8
56 |   MAX_ITER: 80000
57 | TEST:
58 |   EVAL_PERIOD: 5000
59 |   SLIDING_WINDOW: False
60 | 


--------------------------------------------------------------------------------
/configs/zero_shot/clipseg_ade.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | MODEL:
 3 |   META_ARCHITECTURE: "CLIPSeg"
 4 | INPUT:
 5 |   DATASET_MAPPER_NAME: "obj_part_semantic"
 6 | DATASETS:
 7 |   TRAIN: ("ade_obj_part_sem_seg_train",)
 8 |   TEST: ("ade_obj_part_sem_seg_val_obj_condition",)
 9 | DATALOADER:
10 |   FILTER_EMPTY_ANNOTATIONS: True
11 |   NUM_WORKERS: 8
12 | SOLVER:
13 |   IMS_PER_BATCH: 8
14 |   BASE_LR: 0.0001
15 |   MAX_ITER: 20000
16 |   WARMUP_FACTOR: 1.0
17 |   WARMUP_ITERS: 0
18 |   WEIGHT_DECAY: 0.0001
19 |   OPTIMIZER: "ADAMW"
20 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
21 |   CLIP_GRADIENTS:
22 |     ENABLED: True
23 |     CLIP_TYPE: "full_model"
24 |     CLIP_VALUE: 0.01
25 |     NORM_TYPE: 2.0
26 | TEST:
27 |   EVAL_PERIOD: 5000
28 | 


--------------------------------------------------------------------------------
/configs/zero_shot/clipseg_voc.yaml:
--------------------------------------------------------------------------------
 1 | ORACLE: True
 2 | MODEL:
 3 |   META_ARCHITECTURE: "CLIPSeg"
 4 | INPUT:
 5 |   DATASET_MAPPER_NAME: "obj_part_semantic"
 6 | DATASETS:
 7 |   TRAIN: ("voc_obj_part_sem_seg_train",)
 8 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
 9 | DATALOADER:
10 |   FILTER_EMPTY_ANNOTATIONS: True
11 |   NUM_WORKERS: 8
12 | SOLVER:
13 |   IMS_PER_BATCH: 8
14 |   BASE_LR: 0.0001
15 |   MAX_ITER: 20000
16 |   WARMUP_FACTOR: 1.0
17 |   WARMUP_ITERS: 0
18 |   WEIGHT_DECAY: 0.0001
19 |   OPTIMIZER: "ADAMW"
20 |   LR_SCHEDULER_NAME: "WarmupPolyLR"
21 |   CLIP_GRADIENTS:
22 |     ENABLED: True
23 |     CLIP_TYPE: "full_model"
24 |     CLIP_VALUE: 0.01
25 |     NORM_TYPE: 2.0
26 | TEST:
27 |   EVAL_PERIOD: 5000
28 | 


--------------------------------------------------------------------------------
/configs/zero_shot/zsseg+_R50_coop_ade.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_R50_bs16_20k.yaml
 2 | ORACLE: True
 3 | MODEL:
 4 |   META_ARCHITECTURE: "ZeroShotObjPartMaskFormer"
 5 |   SEM_SEG_HEAD:
 6 |     NAME: "ZeroShotObjPartMaskFormerHead"
 7 |     NUM_CLASSES: 74 #only used in set criterion
 8 |     EMBEDDING_DIM: 512
 9 |     EMBED_LAYERS: 2
10 |   CLIP_ADAPTER:
11 |     PROMPT_LEARNER: "learnable_obj_part"
12 |   #   # for learnable prompt
13 |     PROMPT_DIM: 512
14 |     PROMPT_SHAPE: (4, 4 ,0)
15 |     CLIP_MODEL_NAME: "ViT-B/16"
16 |     MASK_FILL: "mean"
17 |     MASK_EXPAND_RATIO: 1.2
18 |     MASK_THR: 0.5
19 |     MASK_MATTING: False
20 |     REGION_RESIZED: True
21 |     CLIP_ENSEMBLE: True
22 |     CLIP_ENSEMBLE_WEIGHT: 0.5
23 |     PROMPT_CHECKPOINT: 'clip_weights/ade_cpt_coop_model.pth'
24 |   MASK_FORMER:
25 |     NUM_OBJECT_QUERIES: 50
26 |     CLASS_WEIGHT: 1.0
27 | INPUT:
28 |   DATASET_MAPPER_NAME: "obj_part_semantic"
29 |   MAX_SIZE_TRAIN: 768
30 |   MAX_SIZE_TEST: 768
31 | SOLVER:
32 |   IMS_PER_BATCH: 8
33 | TEST: 
34 |   EVAL_PERIOD: 5000
35 | DATASETS:
36 |   TRAIN: ("ade_obj_part_sem_seg_train_obj_condition",)
37 |   TEST: ("ade_obj_part_sem_seg_val_obj_condition",)
38 | DATALOADER:
39 |   FILTER_EMPTY_ANNOTATIONS: True
40 |   NUM_WORKERS: 4
41 | 


--------------------------------------------------------------------------------
/configs/zero_shot/zsseg+_R50_coop_voc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../maskformer_R50_bs16_20k.yaml
 2 | ORACLE: True
 3 | MODEL:
 4 |   META_ARCHITECTURE: "ZeroShotObjPartMaskFormer"
 5 |   SEM_SEG_HEAD:
 6 |     NAME: "ZeroShotObjPartMaskFormerHead"
 7 |     NUM_CLASSES: 74 #only used in set criterion
 8 |     EMBEDDING_DIM: 512
 9 |     EMBED_LAYERS: 2
10 |   CLIP_ADAPTER:
11 |     PROMPT_LEARNER: "learnable_obj_part"
12 |   #   # for learnable prompt
13 |     PROMPT_DIM: 512
14 |     PROMPT_SHAPE: (4, 4 ,0)
15 |     CLIP_MODEL_NAME: "ViT-B/16"
16 |     MASK_FILL: "mean"
17 |     MASK_EXPAND_RATIO: 1.2
18 |     MASK_THR: 0.5
19 |     MASK_MATTING: False
20 |     REGION_RESIZED: True
21 |     CLIP_ENSEMBLE: True
22 |     CLIP_ENSEMBLE_WEIGHT: 0.5
23 |     PROMPT_CHECKPOINT: 'clip_weights/voc_cpt_coop_model.pth'
24 |   MASK_FORMER:
25 |     NUM_OBJECT_QUERIES: 50
26 |     CLASS_WEIGHT: 1.0
27 | INPUT:
28 |   DATASET_MAPPER_NAME: "obj_part_semantic"
29 |   MAX_SIZE_TRAIN: 768
30 |   MAX_SIZE_TEST: 768
31 | SOLVER:
32 |   IMS_PER_BATCH: 8
33 | TEST: 
34 |   EVAL_PERIOD: 5000
35 | DATASETS:
36 |   TRAIN: ("voc_obj_part_sem_seg_train_obj_condition",)
37 |   TEST: ("voc_obj_part_sem_seg_val_obj_condition",)
38 | DATALOADER:
39 |   FILTER_EMPTY_ANNOTATIONS: True
40 |   NUM_WORKERS: 4
41 | 


--------------------------------------------------------------------------------
/open_clip/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.1.0
 2 | message: If you use this software, please cite it as below.
 3 | authors:
 4 |   - family-names: Ilharco
 5 |     given-names: Gabriel
 6 |   - family-names: Wortsman
 7 |     given-names: Mitchell
 8 |   - family-names: Wightman
 9 |     given-names: Ross
10 |   - family-names: Gordon
11 |     given-names: Cade   
12 |   - family-names: Carlini
13 |     given-names: Nicholas
14 |   - family-names: Taori
15 |     given-names: Rohan
16 |   - family-names: Dave
17 |     given-names: Achal
18 |   - family-names: Shankar
19 |     given-names: Vaishaal
20 |   - family-names: Namkoong
21 |     given-names: Hongseok
22 |   - family-names: Miller
23 |     given-names: John
24 |   - family-names: Hajishirzi
25 |     given-names: Hannaneh
26 |   - family-names: Farhadi
27 |     given-names: Ali
28 |   - family-names: Schmidt
29 |     given-names: Ludwig
30 | title: OpenCLIP
31 | version: v0.1
32 | doi: 10.5281/zenodo.5143773
33 | date-released: 2021-07-28
34 | 


--------------------------------------------------------------------------------
/open_clip/HISTORY.md:
--------------------------------------------------------------------------------
  1 | ## 2.10.1
  2 | 
  3 | * `hf-hub:org/model_id` support for loading models w/ config and weights in Hugging Face Hub
  4 | 
  5 | ## 2.10.0
  6 | 
  7 | * Added a ViT-bigG-14 model.
  8 | * Added an up-to-date example slurm script for large training jobs.
  9 | * Added a option to sync logs and checkpoints to S3 during training.
 10 | * New options for LR schedulers, constant and constant with cooldown
 11 | * Fix wandb autoresuming when resume is not set
 12 | * ConvNeXt `base` & `base_w` pretrained models added
 13 | * `timm-` model prefix removed from configs
 14 | * `timm` augmentation + regularization (dropout / drop-path) supported
 15 | 
 16 | ## 2.9.3
 17 | 
 18 | * Fix wandb collapsing multiple parallel runs into a single one
 19 | 
 20 | ## 2.9.2
 21 | 
 22 | * Fix braceexpand memory explosion for complex webdataset urls
 23 | 
 24 | ## 2.9.1
 25 | 
 26 | * Fix release
 27 | 
 28 | ## 2.9.0
 29 | 
 30 | * Add training feature to auto-resume from the latest checkpoint on restart via `--resume latest`
 31 | * Allow webp in webdataset
 32 | * Fix logging for number of samples when using gradient accumulation
 33 | * Add model configs for convnext xxlarge
 34 | 
 35 | ## 2.8.2
 36 | 
 37 | * wrapped patchdropout in a torch.nn.Module
 38 | 
 39 | ## 2.8.1
 40 | 
 41 | * relax protobuf dependency
 42 | * override the default patch dropout value in 'vision_cfg'
 43 | 
 44 | ## 2.8.0
 45 | 
 46 | * better support for HF models
 47 | * add support for gradient accumulation
 48 | * CI fixes
 49 | * add support for patch dropout
 50 | * add convnext configs
 51 | 
 52 | 
 53 | ## 2.7.0
 54 | 
 55 | * add multilingual H/14 xlm roberta large
 56 | 
 57 | ## 2.6.1
 58 | 
 59 | * fix setup.py _read_reqs
 60 | 
 61 | ## 2.6.0
 62 | 
 63 | * Make openclip training usable from pypi.
 64 | * Add xlm roberta large vit h 14 config.
 65 | 
 66 | ## 2.5.0
 67 | 
 68 | * pretrained B/32 xlm roberta base: first multilingual clip trained on laion5B
 69 | * pretrained B/32 roberta base: first clip trained using an HF text encoder
 70 | 
 71 | ## 2.4.1
 72 | 
 73 | * Add missing hf_tokenizer_name in CLIPTextCfg.
 74 | 
 75 | ## 2.4.0
 76 | 
 77 | * Fix #211, missing RN50x64 config. Fix type of dropout param for ResNet models
 78 | * Bring back LayerNorm impl that casts to input for non bf16/fp16 
 79 | * zero_shot.py: set correct tokenizer based on args
 80 | * training/params.py: remove hf params and get them from model config
 81 | 
 82 | ## 2.3.1
 83 | 
 84 | * Implement grad checkpointing for hf model.
 85 | * custom_text: True if hf_model_name is set
 86 | * Disable hf tokenizer parallelism 
 87 | 
 88 | ## 2.3.0
 89 | 
 90 | * Generalizable Text Transformer with HuggingFace Models (@iejMac)
 91 | 
 92 | ## 2.2.0
 93 | 
 94 | * Support for custom text tower
 95 | * Add checksum verification for pretrained model weights 
 96 | 
 97 | ## 2.1.0
 98 | 
 99 | * lot including sota models, bfloat16 option, better loading, better metrics
100 | 
101 | ## 1.2.0
102 | 
103 | * ViT-B/32 trained on Laion2B-en
104 | * add missing openai RN50x64 model
105 | 
106 | ## 1.1.1
107 | 
108 | * ViT-B/16+
109 | * Add grad checkpointing support
110 | * more robust data loader
111 | 


--------------------------------------------------------------------------------
/open_clip/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2021 Gabriel Ilharco, Mitchell Wortsman, 
 2 | Nicholas Carlini, Rohan Taori, Achal Dave, Vaishaal Shankar, 
 3 | John Miller, Hongseok Namkoong, Hannaneh Hajishirzi, Ali Farhadi, 
 4 | Ludwig Schmidt
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining
 7 | a copy of this software and associated documentation files (the
 8 | "Software"), to deal in the Software without restriction, including
 9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/open_clip/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/open_clip/bpe_simple_vocab_16e6.txt.gz
2 | include src/open_clip/model_configs/*.json
3 | 
4 | 


--------------------------------------------------------------------------------
/open_clip/Makefile:
--------------------------------------------------------------------------------
 1 | install: ## [Local development] Upgrade pip, install requirements, install package.
 2 | 	python -m pip install -U pip
 3 | 	python -m pip install -e .
 4 | 
 5 | install-training:
 6 | 	python -m pip install -r requirements-training.txt
 7 | 
 8 | install-test: ## [Local development] Install test requirements
 9 | 	python -m pip install -r requirements-test.txt
10 | 
11 | test: ## [Local development] Run unit tests
12 | 	python -m pytest -x -s -v tests
13 | 


--------------------------------------------------------------------------------
/open_clip/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     regression_test
4 | 


--------------------------------------------------------------------------------
/open_clip/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest-split==0.8.0
2 | pytest==7.2.0
3 | transformers
4 | timm==0.6.11
5 | 


--------------------------------------------------------------------------------
/open_clip/requirements-training.txt:
--------------------------------------------------------------------------------
 1 | torch>=1.9.0
 2 | torchvision
 3 | webdataset>=0.2.5
 4 | regex
 5 | ftfy
 6 | tqdm
 7 | pandas
 8 | braceexpand
 9 | huggingface_hub
10 | transformers
11 | timm
12 | fsspec
13 | 


--------------------------------------------------------------------------------
/open_clip/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch>=1.9.0
 2 | torchvision
 3 | regex
 4 | ftfy
 5 | tqdm
 6 | huggingface_hub
 7 | sentencepiece
 8 | protobuf==3.20.*
 9 | timm
10 | 


--------------------------------------------------------------------------------
/open_clip/setup.py:
--------------------------------------------------------------------------------
 1 | """ Setup
 2 | """
 3 | from setuptools import setup, find_packages
 4 | from codecs import open
 5 | from os import path
 6 | 
 7 | here = path.abspath(path.dirname(__file__))
 8 | 
 9 | # Get the long description from the README file
10 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
11 |     long_description = f.read()
12 | 
13 | def _read_reqs(relpath):
14 |     fullpath = path.join(path.dirname(__file__), relpath)
15 |     with open(fullpath) as f:
16 |         return [s.strip() for s in f.readlines() if (s.strip() and not s.startswith("#"))]
17 | 
18 | REQUIREMENTS = _read_reqs("requirements.txt")
19 | TRAINING_REQUIREMENTS = _read_reqs("requirements-training.txt")
20 | 
21 | exec(open('src/open_clip/version.py').read())
22 | setup(
23 |     name='open_clip_torch',
24 |     version=__version__,
25 |     description='OpenCLIP',
26 |     long_description=long_description,
27 |     long_description_content_type='text/markdown',
28 |     url='https://github.com/mlfoundations/open_clip',
29 |     author='',
30 |     author_email='',
31 |     classifiers=[
32 |         # How mature is this project? Common values are
33 |         #   3 - Alpha
34 |         #   4 - Beta
35 |         #   5 - Production/Stable
36 |         'Development Status :: 3 - Alpha',
37 |         'Intended Audience :: Education',
38 |         'Intended Audience :: Science/Research',
39 |         'License :: OSI Approved :: Apache Software License',
40 |         'Programming Language :: Python :: 3.7',
41 |         'Programming Language :: Python :: 3.8',
42 |         'Programming Language :: Python :: 3.9',
43 |         'Programming Language :: Python :: 3.10',
44 |         'Topic :: Scientific/Engineering',
45 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
46 |         'Topic :: Software Development',
47 |         'Topic :: Software Development :: Libraries',
48 |         'Topic :: Software Development :: Libraries :: Python Modules',
49 |     ],
50 | 
51 |     # Note that this is a string of words separated by whitespace, not a list.
52 |     keywords='CLIP pretrained',
53 |     package_dir={'': 'src'},
54 |     packages=find_packages(where='src'),
55 |     include_package_data=True,
56 |     install_requires=REQUIREMENTS,
57 |     extras_require={
58 |         "training": TRAINING_REQUIREMENTS,
59 |     },
60 |     python_requires='>=3.7',
61 | )
62 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__init__.py:
--------------------------------------------------------------------------------
 1 | from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
 2 | from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer
 3 | from .factory import list_models, add_model_config, get_model_config, load_checkpoint
 4 | from .loss import ClipLoss
 5 | from .model import CLIP, CustomTextCLIP, CLIPTextCfg, CLIPVisionCfg,\
 6 |     convert_weights_to_lp, convert_weights_to_fp16, trace_model, get_cast_dtype
 7 | from .openai import load_openai_model, list_openai_models
 8 | from .pretrained import list_pretrained, list_pretrained_models_by_tag, list_pretrained_tags_by_model,\
 9 |     get_pretrained_url, download_pretrained_from_url, is_pretrained_cfg, get_pretrained_cfg, download_pretrained
10 | from .tokenizer import SimpleTokenizer, tokenize
11 | from .transform import image_transform, AugmentationCfg
12 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/constants.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/constants.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/factory.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/factory.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/hf_configs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/hf_configs.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/hf_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/hf_model.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/loss.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/modified_resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/modified_resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/openai.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/openai.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/pretrained.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/pretrained.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/timm_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/timm_model.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/transform.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/__pycache__/version.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/__pycache__/version.cpython-38.pyc


--------------------------------------------------------------------------------
/open_clip/src/open_clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/open_clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/open_clip/src/open_clip/constants.py:
--------------------------------------------------------------------------------
1 | OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
2 | OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
3 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/hf_configs.py:
--------------------------------------------------------------------------------
 1 | # HF architecture dict:
 2 | arch_dict = {
 3 |     # https://huggingface.co/docs/transformers/model_doc/roberta#roberta
 4 |     "roberta": {
 5 |         "config_names": {
 6 |             "context_length": "max_position_embeddings",
 7 |             "vocab_size": "vocab_size",
 8 |             "width": "hidden_size",
 9 |             "heads": "num_attention_heads",
10 |             "layers": "num_hidden_layers",
11 |             "layer_attr": "layer",
12 |             "token_embeddings_attr": "embeddings"
13 |         },
14 |         "pooler": "mean_pooler",
15 |     },
16 |     # https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.XLMRobertaConfig
17 |     "xlm-roberta": {
18 |         "config_names": {
19 |             "context_length": "max_position_embeddings",
20 |             "vocab_size": "vocab_size",
21 |             "width": "hidden_size",
22 |             "heads": "num_attention_heads",
23 |             "layers": "num_hidden_layers",
24 |             "layer_attr": "layer",
25 |             "token_embeddings_attr": "embeddings"
26 |         },
27 |         "pooler": "mean_pooler",
28 |     },
29 |     # https://huggingface.co/docs/transformers/model_doc/mt5#mt5
30 |     "mt5": {
31 |         "config_names": {
32 |             # unlimited seqlen
33 |             # https://github.com/google-research/text-to-text-transfer-transformer/issues/273
34 |             # https://github.com/huggingface/transformers/blob/v4.24.0/src/transformers/models/t5/modeling_t5.py#L374
35 |             "context_length": "",
36 |             "vocab_size": "vocab_size",
37 |             "width": "d_model",
38 |             "heads": "num_heads",
39 |             "layers": "num_layers",
40 |             "layer_attr": "block",
41 |             "token_embeddings_attr": "embed_tokens"
42 |         },
43 |         "pooler": "mean_pooler",
44 |     },
45 | }
46 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN101-quickgelu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "quick_gelu": true,
 4 |     "vision_cfg": {
 5 |         "image_size": 224,
 6 |         "layers": [
 7 |             3,
 8 |             4,
 9 |             23,
10 |             3
11 |         ],
12 |         "width": 64,
13 |         "patch_size": null
14 |     },
15 |     "text_cfg": {
16 |         "context_length": 77,
17 |         "vocab_size": 49408,
18 |         "width": 512,
19 |         "heads": 8,
20 |         "layers": 12
21 |     }
22 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN101.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": [
 6 |             3,
 7 |             4,
 8 |             23,
 9 |             3
10 |         ],
11 |         "width": 64,
12 |         "patch_size": null
13 |     },
14 |     "text_cfg": {
15 |         "context_length": 77,
16 |         "vocab_size": 49408,
17 |         "width": 512,
18 |         "heads": 8,
19 |         "layers": 12
20 |     }
21 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN50-quickgelu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "quick_gelu": true,
 4 |     "vision_cfg": {
 5 |         "image_size": 224,
 6 |         "layers": [
 7 |             3,
 8 |             4,
 9 |             6,
10 |             3
11 |         ],
12 |         "width": 64,
13 |         "patch_size": null
14 |     },
15 |     "text_cfg": {
16 |         "context_length": 77,
17 |         "vocab_size": 49408,
18 |         "width": 512,
19 |         "heads": 8,
20 |         "layers": 12
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": [
 6 |             3,
 7 |             4,
 8 |             6,
 9 |             3
10 |         ],
11 |         "width": 64,
12 |         "patch_size": null
13 |     },
14 |     "text_cfg": {
15 |         "context_length": 77,
16 |         "vocab_size": 49408,
17 |         "width": 512,
18 |         "heads": 8,
19 |         "layers": 12
20 |     }
21 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN50x16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 384,
 5 |         "layers": [
 6 |             6,
 7 |             8,
 8 |             18,
 9 |             8
10 |         ],
11 |         "width": 96,
12 |         "patch_size": null
13 |     },
14 |     "text_cfg": {
15 |         "context_length": 77,
16 |         "vocab_size": 49408,
17 |         "width": 768,
18 |         "heads": 12,
19 |         "layers": 12
20 |     }
21 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN50x4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "image_size": 288,
 5 |         "layers": [
 6 |             4,
 7 |             6,
 8 |             10,
 9 |             6
10 |         ],
11 |         "width": 80,
12 |         "patch_size": null
13 |     },
14 |     "text_cfg": {
15 |         "context_length": 77,
16 |         "vocab_size": 49408,
17 |         "width": 640,
18 |         "heads": 10,
19 |         "layers": 12
20 |     }
21 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/RN50x64.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 448,
 5 |         "layers": [
 6 |             3,
 7 |             15,
 8 |             36,
 9 |             10
10 |         ],
11 |         "width": 128,
12 |         "patch_size": null
13 |     },
14 |     "text_cfg": {
15 |         "context_length": 77,
16 |         "vocab_size": 49408,
17 |         "width": 1024,
18 |         "heads": 16,
19 |         "layers": 12
20 |     }
21 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-16-plus-240.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "image_size": 240,
 5 |         "layers": 12,
 6 |         "width": 896,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 640,
13 |         "heads": 10,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-16-plus.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 896,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 640,
13 |         "heads": 10,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 512,
13 |         "heads": 8,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-32-plus-256.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "image_size": 256,
 5 |         "layers": 12,
 6 |         "width": 896,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 640,
13 |         "heads": 10,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-32-quickgelu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "quick_gelu": true,
 4 |     "vision_cfg": {
 5 |         "image_size": 224,
 6 |         "layers": 12,
 7 |         "width": 768,
 8 |         "patch_size": 32
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-B-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 512,
13 |         "heads": 8,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-H-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 32,
 6 |         "width": 1280,
 7 |         "head_width": 80,
 8 |         "patch_size": 14
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 1024,
14 |         "heads": 16,
15 |         "layers": 24
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-H-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 32,
 6 |         "width": 1280,
 7 |         "head_width": 80,
 8 |         "patch_size": 16
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 1024,
14 |         "heads": 16,
15 |         "layers": 24
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-L-14-280.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 280,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "patch_size": 14
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 768,
13 |         "heads": 12,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-L-14-336.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 336,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "patch_size": 14
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 768,
13 |         "heads": 12,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-L-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "patch_size": 14
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 768,
13 |         "heads": 12,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-L-16-320.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 320,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 768,
13 |         "heads": 12,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-L-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 24,
 6 |         "width": 1024,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 768,
13 |         "heads": 12,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-M-16-alt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 384,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 512,
 7 |         "patch_size": 16,
 8 |         "ls_init_value": 1e-4
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 384,
14 |         "heads": 6,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-M-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 512,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 512,
13 |         "heads": 8,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-M-32-alt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 384,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 512,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 384,
13 |         "heads": 6,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-M-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 512,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 512,
13 |         "heads": 8,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-S-16-alt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 256,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 384,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 256,
13 |         "heads": 4,
14 |         "layers": 10
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-S-16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 384,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 384,
 7 |         "patch_size": 16
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 384,
13 |         "heads": 6,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-S-32-alt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 256,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 384,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 256,
13 |         "heads": 4,
14 |         "layers": 10
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-S-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 384,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 384,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "context_length": 77,
11 |         "vocab_size": 49408,
12 |         "width": 384,
13 |         "heads": 6,
14 |         "layers": 12
15 |     }
16 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-bigG-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1280,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 48,
 6 |         "width": 1664,
 7 |         "head_width": 104,
 8 |         "mlp_ratio": 4.9231,
 9 |         "patch_size": 14
10 |     },
11 |     "text_cfg": {
12 |         "context_length": 77,
13 |         "vocab_size": 49408,
14 |         "width": 1280,
15 |         "heads": 20,
16 |         "layers": 32
17 |     }
18 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-e-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1280,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 56,
 6 |         "width": 1792,
 7 |         "head_width": 112,
 8 |         "mlp_ratio": 8.5715,
 9 |         "patch_size": 14
10 |     },
11 |     "text_cfg": {
12 |         "context_length": 77,
13 |         "vocab_size": 49408,
14 |         "width": 1280,
15 |         "heads": 20,
16 |         "layers": 36
17 |     }
18 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/ViT-g-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 40,
 6 |         "width": 1408,
 7 |         "head_width": 88,
 8 |         "mlp_ratio": 4.3637,
 9 |         "patch_size": 14
10 |     },
11 |     "text_cfg": {
12 |         "context_length": 77,
13 |         "vocab_size": 49408,
14 |         "width": 1024,
15 |         "heads": 16,
16 |         "layers": 24
17 |     }
18 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_base",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_base_w.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_base",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 256
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 640,
14 |         "heads": 10,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_base_w_320.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_base",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 320
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 640,
14 |         "heads": 10,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_large.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_large",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 768,
14 |         "heads": 12,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_large_d.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 768,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_large",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "mlp",
 8 |         "timm_drop": 0.1,
 9 |         "timm_drop_path": 0.1,
10 |         "image_size": 256
11 |     },
12 |     "text_cfg": {
13 |         "context_length": 77,
14 |         "vocab_size": 49408,
15 |         "width": 768,
16 |         "heads": 12,
17 |         "layers": 16
18 |     }
19 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_small",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_tiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_tiny",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_xlarge.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_xlarge",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 1024,
14 |         "heads": 16,
15 |         "layers": 16
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_xxlarge.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_xxlarge",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 256
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 1024,
14 |         "heads": 16,
15 |         "layers": 24
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/convnext_xxlarge_320.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "convnext_xxlarge",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 320
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 1024,
14 |         "heads": 16,
15 |         "layers": 24
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/mt5-base-ViT-B-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "hf_model_name": "google/mt5-base",
11 |         "hf_tokenizer_name": "google/mt5-base",
12 |         "proj": "mlp",
13 |         "pooler_type": "mean_pooler"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/mt5-xl-ViT-H-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 32,
 6 |         "width": 1280,
 7 |         "head_width": 80,
 8 |         "patch_size": 14
 9 |     },
10 |     "text_cfg": {
11 |         "hf_model_name": "google/mt5-xl",
12 |         "hf_tokenizer_name": "google/mt5-xl",
13 |         "proj": "mlp",
14 |         "pooler_type": "mean_pooler"
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/roberta-ViT-B-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "quick_gelu": true,
 4 |     "vision_cfg": {
 5 |         "image_size": 224,
 6 |         "layers": 12,
 7 |         "width": 768,
 8 |         "patch_size": 32
 9 |     },
10 |     "text_cfg": {
11 |         "hf_model_name": "roberta-base",
12 |         "hf_tokenizer_name": "roberta-base",
13 |         "proj": "mlp",
14 |         "pooler_type": "mean_pooler"
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/swin_base_patch4_window7_224.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 640,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "swin_base_patch4_window7_224",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 640,
14 |         "heads": 10,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/vit_medium_patch16_gap_256.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "vit_medium_patch16_gap_256",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 256
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "timm_model_name": "vit_relpos_medium_patch16_cls_224",
 5 |         "timm_model_pretrained": false,
 6 |         "timm_pool": "",
 7 |         "timm_proj": "linear",
 8 |         "image_size": 224
 9 |     },
10 |     "text_cfg": {
11 |         "context_length": 77,
12 |         "vocab_size": 49408,
13 |         "width": 512,
14 |         "heads": 8,
15 |         "layers": 12
16 |     }
17 | }


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 512,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 12,
 6 |         "width": 768,
 7 |         "patch_size": 32
 8 |     },
 9 |     "text_cfg": {
10 |         "hf_model_name": "xlm-roberta-base",
11 |         "hf_tokenizer_name": "xlm-roberta-base",
12 |         "proj": "mlp",
13 |         "pooler_type": "mean_pooler"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embed_dim": 1024,
 3 |     "vision_cfg": {
 4 |         "image_size": 224,
 5 |         "layers": 32,
 6 |         "width": 1280,
 7 |         "head_width": 80,
 8 |         "patch_size": 14
 9 |     },
10 |     "text_cfg": {
11 |         "hf_model_name": "xlm-roberta-large",
12 |         "hf_tokenizer_name": "xlm-roberta-large",
13 |         "proj": "mlp",
14 |         "pooler_type": "mean_pooler"
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/utils.py:
--------------------------------------------------------------------------------
 1 | from itertools import repeat
 2 | import collections.abc
 3 | 
 4 | from torch import nn as nn
 5 | from torchvision.ops.misc import FrozenBatchNorm2d
 6 | 
 7 | 
 8 | def freeze_batch_norm_2d(module, module_match={}, name=''):
 9 |     """
10 |     Converts all `BatchNorm2d` and `SyncBatchNorm` layers of provided module into `FrozenBatchNorm2d`. If `module` is
11 |     itself an instance of either `BatchNorm2d` or `SyncBatchNorm`, it is converted into `FrozenBatchNorm2d` and
12 |     returned. Otherwise, the module is walked recursively and submodules are converted in place.
13 | 
14 |     Args:
15 |         module (torch.nn.Module): Any PyTorch module.
16 |         module_match (dict): Dictionary of full module names to freeze (all if empty)
17 |         name (str): Full module name (prefix)
18 | 
19 |     Returns:
20 |         torch.nn.Module: Resulting module
21 | 
22 |     Inspired by https://github.com/pytorch/pytorch/blob/a5895f85be0f10212791145bfedc0261d364f103/torch/nn/modules/batchnorm.py#L762
23 |     """
24 |     res = module
25 |     is_match = True
26 |     if module_match:
27 |         is_match = name in module_match
28 |     if is_match and isinstance(module, (nn.modules.batchnorm.BatchNorm2d, nn.modules.batchnorm.SyncBatchNorm)):
29 |         res = FrozenBatchNorm2d(module.num_features)
30 |         res.num_features = module.num_features
31 |         res.affine = module.affine
32 |         if module.affine:
33 |             res.weight.data = module.weight.data.clone().detach()
34 |             res.bias.data = module.bias.data.clone().detach()
35 |         res.running_mean.data = module.running_mean.data
36 |         res.running_var.data = module.running_var.data
37 |         res.eps = module.eps
38 |     else:
39 |         for child_name, child in module.named_children():
40 |             full_child_name = '.'.join([name, child_name]) if name else child_name
41 |             new_child = freeze_batch_norm_2d(child, module_match, full_child_name)
42 |             if new_child is not child:
43 |                 res.add_module(child_name, new_child)
44 |     return res
45 | 
46 | 
47 | # From PyTorch internals
48 | def _ntuple(n):
49 |     def parse(x):
50 |         if isinstance(x, collections.abc.Iterable):
51 |             return x
52 |         return tuple(repeat(x, n))
53 |     return parse
54 | 
55 | 
56 | to_1tuple = _ntuple(1)
57 | to_2tuple = _ntuple(2)
58 | to_3tuple = _ntuple(3)
59 | to_4tuple = _ntuple(4)
60 | to_ntuple = lambda n, x: _ntuple(n)(x)
61 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.10.1'
2 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip_torch.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | MANIFEST.in
 2 | README.md
 3 | setup.py
 4 | src/open_clip/__init__.py
 5 | src/open_clip/bpe_simple_vocab_16e6.txt.gz
 6 | src/open_clip/constants.py
 7 | src/open_clip/factory.py
 8 | src/open_clip/hf_configs.py
 9 | src/open_clip/hf_model.py
10 | src/open_clip/loss.py
11 | src/open_clip/model.py
12 | src/open_clip/modified_resnet.py
13 | src/open_clip/openai.py
14 | src/open_clip/pretrained.py
15 | src/open_clip/timm_model.py
16 | src/open_clip/tokenizer.py
17 | src/open_clip/transform.py
18 | src/open_clip/transformer.py
19 | src/open_clip/utils.py
20 | src/open_clip/version.py
21 | src/open_clip/model_configs/RN101-quickgelu.json
22 | src/open_clip/model_configs/RN101.json
23 | src/open_clip/model_configs/RN50-quickgelu.json
24 | src/open_clip/model_configs/RN50.json
25 | src/open_clip/model_configs/RN50x16.json
26 | src/open_clip/model_configs/RN50x4.json
27 | src/open_clip/model_configs/RN50x64.json
28 | src/open_clip/model_configs/ViT-B-16-plus-240.json
29 | src/open_clip/model_configs/ViT-B-16-plus.json
30 | src/open_clip/model_configs/ViT-B-16.json
31 | src/open_clip/model_configs/ViT-B-32-plus-256.json
32 | src/open_clip/model_configs/ViT-B-32-quickgelu.json
33 | src/open_clip/model_configs/ViT-B-32.json
34 | src/open_clip/model_configs/ViT-H-14.json
35 | src/open_clip/model_configs/ViT-H-16.json
36 | src/open_clip/model_configs/ViT-L-14-280.json
37 | src/open_clip/model_configs/ViT-L-14-336.json
38 | src/open_clip/model_configs/ViT-L-14.json
39 | src/open_clip/model_configs/ViT-L-16-320.json
40 | src/open_clip/model_configs/ViT-L-16.json
41 | src/open_clip/model_configs/ViT-M-16-alt.json
42 | src/open_clip/model_configs/ViT-M-16.json
43 | src/open_clip/model_configs/ViT-M-32-alt.json
44 | src/open_clip/model_configs/ViT-M-32.json
45 | src/open_clip/model_configs/ViT-S-16-alt.json
46 | src/open_clip/model_configs/ViT-S-16.json
47 | src/open_clip/model_configs/ViT-S-32-alt.json
48 | src/open_clip/model_configs/ViT-S-32.json
49 | src/open_clip/model_configs/ViT-bigG-14.json
50 | src/open_clip/model_configs/ViT-e-14.json
51 | src/open_clip/model_configs/ViT-g-14.json
52 | src/open_clip/model_configs/convnext_base.json
53 | src/open_clip/model_configs/convnext_base_w.json
54 | src/open_clip/model_configs/convnext_base_w_320.json
55 | src/open_clip/model_configs/convnext_large.json
56 | src/open_clip/model_configs/convnext_large_d.json
57 | src/open_clip/model_configs/convnext_small.json
58 | src/open_clip/model_configs/convnext_tiny.json
59 | src/open_clip/model_configs/convnext_xlarge.json
60 | src/open_clip/model_configs/convnext_xxlarge.json
61 | src/open_clip/model_configs/convnext_xxlarge_320.json
62 | src/open_clip/model_configs/mt5-base-ViT-B-32.json
63 | src/open_clip/model_configs/mt5-xl-ViT-H-14.json
64 | src/open_clip/model_configs/roberta-ViT-B-32.json
65 | src/open_clip/model_configs/swin_base_patch4_window7_224.json
66 | src/open_clip/model_configs/vit_medium_patch16_gap_256.json
67 | src/open_clip/model_configs/vit_relpos_medium_patch16_cls_224.json
68 | src/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json
69 | src/open_clip/model_configs/xlm-roberta-large-ViT-H-14.json
70 | src/open_clip_torch.egg-info/PKG-INFO
71 | src/open_clip_torch.egg-info/SOURCES.txt
72 | src/open_clip_torch.egg-info/dependency_links.txt
73 | src/open_clip_torch.egg-info/requires.txt
74 | src/open_clip_torch.egg-info/top_level.txt
75 | src/training/__init__.py
76 | src/training/data.py
77 | src/training/distributed.py
78 | src/training/file_utils.py
79 | src/training/imagenet_zeroshot_data.py
80 | src/training/logger.py
81 | src/training/main.py
82 | src/training/params.py
83 | src/training/precision.py
84 | src/training/profile.py
85 | src/training/scheduler.py
86 | src/training/train.py
87 | src/training/zero_shot.py


--------------------------------------------------------------------------------
/open_clip/src/open_clip_torch.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip_torch.egg-info/requires.txt:
--------------------------------------------------------------------------------
 1 | torch>=1.9.0
 2 | torchvision
 3 | regex
 4 | ftfy
 5 | tqdm
 6 | huggingface_hub
 7 | sentencepiece
 8 | protobuf==3.20.*
 9 | timm
10 | 
11 | [training]
12 | torch>=1.9.0
13 | torchvision
14 | webdataset>=0.2.5
15 | regex
16 | ftfy
17 | tqdm
18 | pandas
19 | braceexpand
20 | huggingface_hub
21 | transformers
22 | timm
23 | fsspec
24 | 


--------------------------------------------------------------------------------
/open_clip/src/open_clip_torch.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | open_clip
2 | training
3 | 


--------------------------------------------------------------------------------
/open_clip/src/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/open_clip/src/training/__init__.py


--------------------------------------------------------------------------------
/open_clip/src/training/file_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import multiprocessing
 4 | import subprocess
 5 | import time
 6 | import fsspec
 7 | import torch
 8 | from tqdm import tqdm
 9 | 
10 | def remote_sync_s3(local_dir, remote_dir):
11 |     # skip epoch_latest which can change during sync.
12 |     result = subprocess.run(["aws", "s3", "sync", local_dir, remote_dir, '--exclude', '*epoch_latest.pt'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
13 |     if result.returncode != 0:
14 |         logging.error(f"Error: Failed to sync with S3 bucket {result.stderr.decode('utf-8')}")
15 |         return False
16 |         
17 |     logging.info(f"Successfully synced with S3 bucket")
18 |     return True
19 | 
20 | def remote_sync_fsspec(local_dir, remote_dir):
21 |     # FIXME currently this is slow and not recommended. Look into speeding up.
22 |     a = fsspec.get_mapper(local_dir)
23 |     b = fsspec.get_mapper(remote_dir)
24 | 
25 |     for k in a:
26 |         # skip epoch_latest which can change during sync.
27 |         if 'epoch_latest.pt' in k:
28 |             continue
29 | 
30 |         logging.info(f'Attempting to sync {k}')
31 |         if k in b and len(a[k]) == len(b[k]):
32 |             logging.debug(f'Skipping remote sync for {k}.')
33 |             continue
34 | 
35 |         try:
36 |             logging.info(f'Successful sync for {k}.')
37 |             b[k] = a[k]
38 |         except Exception as e:
39 |             logging.info(f'Error during remote sync for {k}: {e}')
40 |             return False
41 | 
42 |     return True
43 | 
44 | def remote_sync(local_dir, remote_dir, protocol):
45 |     logging.info('Starting remote sync.')
46 |     if protocol == 's3':
47 |         return remote_sync_s3(local_dir, remote_dir)
48 |     elif protocol == 'fsspec':
49 |         return remote_sync_fsspec(local_dir, remote_dir)
50 |     else:
51 |         logging.error('Remote protocol not known')
52 |         return False
53 | 
54 | def keep_running_remote_sync(sync_every, local_dir, remote_dir, protocol):
55 |     while True:
56 |         time.sleep(sync_every)
57 |         remote_sync(local_dir, remote_dir, protocol)
58 | 
59 | def start_sync_process(sync_every, local_dir, remote_dir, protocol):
60 |     p = multiprocessing.Process(target=keep_running_remote_sync, args=(sync_every, local_dir, remote_dir, protocol))
61 |     return p
62 | 
63 | # Note: we are not currently using this save function.
64 | def pt_save(pt_obj, file_path):
65 |     of = fsspec.open(file_path, "wb")
66 |     with of as f:
67 |         torch.save(pt_obj, file_path)
68 | 
69 | def pt_load(file_path, map_location=None):
70 |     if not file_path.startswith('/'):
71 |         logging.info('Loading remote checkpoint, which may take a bit.')
72 |     of = fsspec.open(file_path, "rb")
73 |     with of as f:
74 |         out = torch.load(f, map_location=map_location)
75 |     return out
76 | 
77 | def check_exists(file_path):
78 |     try:
79 |         with fsspec.open(file_path):
80 |             pass
81 |     except FileNotFoundError:
82 |         return False
83 |     return True
84 | 


--------------------------------------------------------------------------------
/open_clip/src/training/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logging(log_file, level, include_host=False):
 5 |     if include_host:
 6 |         import socket
 7 |         hostname = socket.gethostname()
 8 |         formatter = logging.Formatter(
 9 |             f'%(asctime)s |  {hostname} | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S')
10 |     else:
11 |         formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S')
12 | 
13 |     logging.root.setLevel(level)
14 |     loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
15 |     for logger in loggers:
16 |         logger.setLevel(level)
17 | 
18 |     stream_handler = logging.StreamHandler()
19 |     stream_handler.setFormatter(formatter)
20 |     logging.root.addHandler(stream_handler)
21 | 
22 |     if log_file:
23 |         file_handler = logging.FileHandler(filename=log_file)
24 |         file_handler.setFormatter(formatter)
25 |         logging.root.addHandler(file_handler)
26 | 
27 | 


--------------------------------------------------------------------------------
/open_clip/src/training/precision.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from contextlib import suppress
 3 | 
 4 | 
 5 | def get_autocast(precision):
 6 |     if precision == 'amp':
 7 |         return torch.cuda.amp.autocast
 8 |     elif precision == 'amp_bfloat16' or precision == 'amp_bf16':
 9 |         # amp_bfloat16 is more stable than amp float16 for clip training
10 |         return lambda: torch.cuda.amp.autocast(dtype=torch.bfloat16)
11 |     else:
12 |         return suppress
13 | 


--------------------------------------------------------------------------------
/open_clip/src/training/scheduler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def assign_learning_rate(optimizer, new_lr):
 5 |     for param_group in optimizer.param_groups:
 6 |         param_group["lr"] = new_lr
 7 | 
 8 | 
 9 | def _warmup_lr(base_lr, warmup_length, step):
10 |     return base_lr * (step + 1) / warmup_length
11 | 
12 | 
13 | def const_lr(optimizer, base_lr, warmup_length, steps):
14 |     def _lr_adjuster(step):
15 |         if step < warmup_length:
16 |             lr = _warmup_lr(base_lr, warmup_length, step)
17 |         else:
18 |             lr = base_lr
19 |         assign_learning_rate(optimizer, lr)
20 |         return lr
21 |     return _lr_adjuster
22 | 
23 | 
24 | def const_lr_cooldown(optimizer, base_lr, warmup_length, steps, cooldown_steps, cooldown_power=1.0, cooldown_end_lr=0.):
25 |     def _lr_adjuster(step):
26 |         start_cooldown_step = steps - cooldown_steps
27 |         if step < warmup_length:
28 |             lr = _warmup_lr(base_lr, warmup_length, step)
29 |         else:
30 |             if step < start_cooldown_step:
31 |                 lr = base_lr
32 |             else:
33 |                 e = step - start_cooldown_step
34 |                 es = steps - start_cooldown_step
35 |                 # linear decay if power == 1; polynomial decay otherwise;
36 |                 decay = (1 - (e/es)) ** cooldown_power
37 |                 lr = decay * (base_lr - cooldown_end_lr) + cooldown_end_lr
38 |         assign_learning_rate(optimizer, lr)
39 |         return lr
40 |     return _lr_adjuster
41 | 
42 | 
43 | def cosine_lr(optimizer, base_lr, warmup_length, steps):
44 |     def _lr_adjuster(step):
45 |         if step < warmup_length:
46 |             lr = _warmup_lr(base_lr, warmup_length, step)
47 |         else:
48 |             e = step - warmup_length
49 |             es = steps - warmup_length
50 |             lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
51 |         assign_learning_rate(optimizer, lr)
52 |         return lr
53 |     return _lr_adjuster
54 | 


--------------------------------------------------------------------------------
/open_clip/src/training/zero_shot.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from tqdm import tqdm
 6 | 
 7 | from open_clip import get_cast_dtype, get_tokenizer
 8 | from .precision import get_autocast
 9 | from .imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template
10 | 
11 | 
12 | def zero_shot_classifier(model, classnames, templates, args):
13 |     tokenizer = get_tokenizer(args.model)
14 |     with torch.no_grad():
15 |         zeroshot_weights = []
16 |         for classname in tqdm(classnames):
17 |             texts = [template(classname) for template in templates]  # format with class
18 |             texts = tokenizer(texts).to(args.device)  # tokenize
19 |             if args.distributed and not args.horovod:
20 |                 class_embeddings = model.module.encode_text(texts)
21 |             else:
22 |                 class_embeddings = model.encode_text(texts)
23 |             class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0)
24 |             class_embedding /= class_embedding.norm()
25 |             zeroshot_weights.append(class_embedding)
26 |         zeroshot_weights = torch.stack(zeroshot_weights, dim=1).to(args.device)
27 |     return zeroshot_weights
28 | 
29 | 
30 | def accuracy(output, target, topk=(1,)):
31 |     pred = output.topk(max(topk), 1, True, True)[1].t()
32 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
33 |     return [float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) for k in topk]
34 | 
35 | 
36 | def run(model, classifier, dataloader, args):
37 |     autocast = get_autocast(args.precision)
38 |     cast_dtype = get_cast_dtype(args.precision)
39 |     with torch.no_grad():
40 |         top1, top5, n = 0., 0., 0.
41 |         for images, target in tqdm(dataloader, unit_scale=args.batch_size):
42 |             images = images.to(args.device)
43 |             if cast_dtype is not None:
44 |                 images = images.to(dtype=cast_dtype)
45 |             target = target.to(args.device)
46 | 
47 |             with autocast():
48 |                 # predict
49 |                 if args.distributed and not args.horovod:
50 |                     image_features = model.module.encode_image(images)
51 |                 else:
52 |                     image_features = model.encode_image(images)
53 |                 image_features = F.normalize(image_features, dim=-1)
54 |                 logits = 100. * image_features @ classifier
55 | 
56 |             # measure accuracy
57 |             acc1, acc5 = accuracy(logits, target, topk=(1, 5))
58 |             top1 += acc1
59 |             top5 += acc5
60 |             n += images.size(0)
61 | 
62 |     top1 = (top1 / n)
63 |     top5 = (top5 / n)
64 |     return top1, top5
65 | 
66 | 
67 | def zero_shot_eval(model, data, epoch, args):
68 |     if 'imagenet-val' not in data and 'imagenet-v2' not in data:
69 |         return {}
70 |     if args.zeroshot_frequency == 0:
71 |         return {}
72 |     if (epoch % args.zeroshot_frequency) != 0 and epoch != args.epochs:
73 |         return {}
74 | 
75 |     logging.info('Starting zero-shot imagenet.')
76 | 
77 |     logging.info('Building zero-shot classifier')
78 |     classifier = zero_shot_classifier(model, imagenet_classnames, openai_imagenet_template, args)
79 | 
80 |     logging.info('Using classifier')
81 |     results = {}
82 |     if 'imagenet-val' in data:
83 |         top1, top5 = run(model, classifier, data['imagenet-val'].dataloader, args)
84 |         results['imagenet-zeroshot-val-top1'] = top1
85 |         results['imagenet-zeroshot-val-top5'] = top5
86 |     if 'imagenet-v2' in data:
87 |         top1, top5 = run(model, classifier, data['imagenet-v2'].dataloader, args)
88 |         results['imagenetv2-zeroshot-val-top1'] = top1
89 |         results['imagenetv2-zeroshot-val-top5'] = top5
90 | 
91 |     logging.info('Finished zero-shot imagenet.')
92 | 
93 |     return results
94 | 


--------------------------------------------------------------------------------
/open_clip/tests/test_hf_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import torch
 4 | from open_clip.hf_model import _POOLERS, HFTextEncoder
 5 | from transformers import AutoConfig
 6 | from transformers.modeling_outputs import BaseModelOutput
 7 | # test poolers
 8 | def test_poolers():
 9 |     bs, sl, d = 2, 10, 5
10 |     h = torch.arange(sl).repeat(bs).reshape(bs, sl)[..., None] * torch.linspace(0.2, 1., d)
11 |     mask = torch.ones(bs, sl, dtype=torch.long)
12 |     mask[:2, 6:] = 0
13 |     x = BaseModelOutput(h)
14 |     for name, cls in _POOLERS.items():
15 |         pooler = cls()
16 |         res = pooler(x, mask)
17 |         assert res.shape == (bs, d), f"{name} returned wrong shape"
18 | 
19 | # test HFTextEncoder
20 | @pytest.mark.parametrize("model_id", ["arampacha/roberta-tiny", "roberta-base", "xlm-roberta-base", "google/mt5-base"])
21 | def test_pretrained_text_encoder(model_id):
22 |     bs, sl, d = 2, 10, 64
23 |     cfg = AutoConfig.from_pretrained(model_id)
24 |     model = HFTextEncoder(model_id, d, proj='linear')
25 |     x = torch.randint(0, cfg.vocab_size, (bs, sl))
26 |     with torch.no_grad():
27 |         emb = model(x)
28 | 
29 |     assert emb.shape == (bs, d)
30 | 


--------------------------------------------------------------------------------
/open_clip/tests/test_inference.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import pytest
 4 | import torch
 5 | import open_clip
 6 | import util_test
 7 | 
 8 | os.environ['CUDA_VISIBLE_DEVICES'] = ''
 9 | 
10 | models_to_test = set(open_clip.list_models())
11 | 
12 | # testing excemptions
13 | models_to_test = models_to_test.difference({
14 |         # not available with timm yet
15 |         # see https://github.com/mlfoundations/open_clip/issues/219
16 |         'convnext_xlarge',
17 |         'convnext_xxlarge',
18 |         'convnext_xxlarge_320',
19 |         'vit_medium_patch16_gap_256',
20 |         # exceeds GH runner memory limit
21 |         'ViT-bigG-14',
22 |         'ViT-e-14',
23 |         'mt5-xl-ViT-H-14',
24 | })
25 | 
26 | if 'OPEN_CLIP_TEST_REG_MODELS' in os.environ:
27 |     external_model_list = os.environ['OPEN_CLIP_TEST_REG_MODELS']
28 |     with open(external_model_list, 'r') as f:
29 |         models_to_test = set(f.read().splitlines()).intersection(models_to_test)
30 |     print(f"Selected models from {external_model_list}: {models_to_test}")
31 | 
32 | models_to_test = list(models_to_test)
33 | models_to_test.sort()
34 | 
35 | @pytest.mark.regression_test
36 | @pytest.mark.parametrize('model_name', models_to_test)
37 | def test_inference_with_data(
38 |         model_name,
39 |         pretrained = None,
40 |         pretrained_hf = False,
41 |         precision = 'fp32',
42 |         jit = False,
43 |         force_quick_gelu = False,
44 | ):
45 |     util_test.seed_all()
46 |     model, _, preprocess_val = open_clip.create_model_and_transforms(
47 |             model_name,
48 |             pretrained = pretrained,
49 |             precision = precision,
50 |             jit = jit,
51 |             force_quick_gelu = force_quick_gelu,
52 |             pretrained_hf = pretrained_hf
53 |     )
54 |     model_id = f'{model_name}_{pretrained or pretrained_hf}_{precision}'
55 |     input_dir, output_dir = util_test.get_data_dirs()
56 |     # text
57 |     input_text_path = os.path.join(input_dir, 'random_text.pt')
58 |     gt_text_path = os.path.join(output_dir, f'{model_id}_random_text.pt')
59 |     if not os.path.isfile(input_text_path):
60 |         pytest.skip(reason = f"missing test data, expected at {input_text_path}")
61 |     if not os.path.isfile(gt_text_path):
62 |         pytest.skip(reason = f"missing test data, expected at {gt_text_path}")
63 |     input_text = torch.load(input_text_path)
64 |     gt_text = torch.load(gt_text_path)
65 |     y_text = util_test.inference_text(model, model_name, input_text)
66 |     assert (y_text == gt_text).all(), f"text output differs @ {input_text_path}"
67 |     # image
68 |     image_size = model.visual.image_size
69 |     if not isinstance(image_size, tuple):
70 |         image_size = (image_size, image_size)
71 |     input_image_path = os.path.join(input_dir, f'random_image_{image_size[0]}_{image_size[1]}.pt')
72 |     gt_image_path = os.path.join(output_dir, f'{model_id}_random_image.pt')
73 |     if not os.path.isfile(input_image_path):
74 |         pytest.skip(reason = f"missing test data, expected at {input_image_path}")
75 |     if not os.path.isfile(gt_image_path):
76 |         pytest.skip(reason = f"missing test data, expected at {gt_image_path}")
77 |     input_image = torch.load(input_image_path)
78 |     gt_image = torch.load(gt_image_path)
79 |     y_image = util_test.inference_image(model, preprocess_val, input_image)
80 |     assert (y_image == gt_image).all(), f"image output differs @ {input_image_path}"
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/open_clip/tests/test_inference_simple.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from PIL import Image
 4 | from open_clip.factory import get_tokenizer
 5 | import pytest
 6 | import open_clip
 7 | import os
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
 9 | 
10 | @pytest.mark.parametrize("model_type,pretrained", [("ViT-B-32-quickgelu", "laion400m_e32"), ("roberta-ViT-B-32", "laion2b_s12b_b32k")])
11 | def test_inference_simple(model_type, pretrained):
12 |     model, _, preprocess = open_clip.create_model_and_transforms(model_type, pretrained=pretrained, jit=False)
13 |     tokenizer = get_tokenizer(model_type)
14 | 
15 |     current_dir = os.path.dirname(os.path.realpath(__file__))
16 | 
17 |     image = preprocess(Image.open(current_dir + "/../docs/CLIP.png")).unsqueeze(0)
18 |     text = tokenizer(["a diagram", "a dog", "a cat"])
19 | 
20 |     with torch.no_grad():
21 |         image_features = model.encode_image(image)
22 |         text_features = model.encode_text(text)
23 | 
24 |         text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
25 | 
26 |     assert text_probs.cpu().numpy()[0].tolist() == [1.0, 0.0, 0.0]
27 | 


--------------------------------------------------------------------------------
/open_clip/tests/test_num_shards.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from training.data import get_dataset_size
 4 | 
 5 | @pytest.mark.parametrize(
 6 |     "shards,expected_size",
 7 |     [
 8 |         ('/path/to/shard.tar', 1),
 9 |         ('/path/to/shard_{000..000}.tar', 1),
10 |         ('/path/to/shard_{000..009}.tar', 10),
11 |         ('/path/to/shard_{000..009}_{000..009}.tar', 100),
12 |         ('/path/to/shard.tar::/path/to/other_shard_{000..009}.tar', 11),
13 |         ('/path/to/shard_{000..009}.tar::/path/to/other_shard_{000..009}.tar', 20),
14 |         (['/path/to/shard.tar'], 1),
15 |         (['/path/to/shard.tar', '/path/to/other_shard.tar'], 2),
16 |     ]
17 | )
18 | def test_num_shards(shards, expected_size):
19 |     _, size = get_dataset_size(shards)
20 |     assert size == expected_size, f'Expected {expected_size} for {shards} but found {size} instead.'
21 | 


--------------------------------------------------------------------------------
/open_clip/tests/test_training_simple.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import sys
 4 | import pytest
 5 | from PIL import Image
 6 | import torch
 7 | from training.main import main
 8 | 
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = ""
10 | 
11 | @pytest.mark.skipif(sys.platform.startswith('darwin'), reason="macos pickle bug with locals")
12 | def test_training():
13 |     main([
14 |     '--save-frequency', '1',
15 |     '--zeroshot-frequency', '1',
16 |     '--dataset-type', "synthetic",
17 |     '--train-num-samples', '16',
18 |     '--warmup', '1',
19 |     '--batch-size', '4',
20 |     '--lr', '1e-3',
21 |     '--wd', '0.1',
22 |     '--epochs', '1',
23 |     '--workers', '2',
24 |     '--model', 'RN50'
25 |     ])
26 | 
27 | @pytest.mark.skipif(sys.platform.startswith('darwin'), reason="macos pickle bug with locals")
28 | def test_training_mt5():
29 |     main([
30 |     '--save-frequency', '1',
31 |     '--zeroshot-frequency', '1',
32 |     '--dataset-type', "synthetic",
33 |     '--train-num-samples', '16',
34 |     '--warmup', '1',
35 |     '--batch-size', '4',
36 |     '--lr', '1e-3',
37 |     '--wd', '0.1',
38 |     '--epochs', '1',
39 |     '--workers', '2',
40 |     '--model', 'mt5-base-ViT-B-32',
41 |     '--lock-text',
42 |     '--lock-text-unlocked-layers', '2'
43 |     ])
44 | 
45 | 
46 | 
47 | @pytest.mark.skipif(sys.platform.startswith('darwin'), reason="macos pickle bug with locals")
48 | def test_training_unfreezing_vit():
49 |     main([
50 |     '--save-frequency', '1',
51 |     '--zeroshot-frequency', '1',
52 |     '--dataset-type', "synthetic",
53 |     '--train-num-samples', '16',
54 |     '--warmup', '1',
55 |     '--batch-size', '4',
56 |     '--lr', '1e-3',
57 |     '--wd', '0.1',
58 |     '--epochs', '1',
59 |     '--workers', '2',
60 |     '--model', 'ViT-B-32',
61 |     '--lock-image',
62 |     '--lock-image-unlocked-groups', '5'
63 |     ])


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | timm==0.9.1
 2 | scikit-image==0.15.0
 3 | scikit-learn==0.24.2
 4 | opencv-python==4.5.5.64
 5 | hydra-core==1.3.2
 6 | openmim==0.3.6
 7 | mmcv-full==1.7.1
 8 | mmsegmentation==0.29.1
 9 | torch==1.12.1+cu113
10 | torchvision==0.13.1
11 | tokenizers==0.11.1
12 | Pillow~=9.5
13 | detectron2 #Following https://detectron2.readthedocs.io/en/latest/tutorials/install.html
14 | 


--------------------------------------------------------------------------------
/transformers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/activations.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/activations.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/configuration_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/configuration_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/convert_slow_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/convert_slow_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/deepspeed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/deepspeed.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/dependency_versions_check.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/dependency_versions_check.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/dependency_versions_table.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/dependency_versions_table.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/dynamic_module_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/dynamic_module_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/feature_extraction_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/feature_extraction_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/file_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/file_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/image_processing_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/image_processing_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/image_transforms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/image_transforms.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/image_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/image_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/modeling_outputs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/modeling_outputs.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/modeling_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/modeling_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/processing_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/processing_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/pytorch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/pytorch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/tokenization_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/tokenization_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/tokenization_utils_base.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/tokenization_utils_base.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/__pycache__/tokenization_utils_fast.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/__pycache__/tokenization_utils_fast.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/benchmark/__init__.py


--------------------------------------------------------------------------------
/transformers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseTransformersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/transformers/commands/download.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from argparse import ArgumentParser
16 | 
17 | from . import BaseTransformersCLICommand
18 | 
19 | 
20 | def download_command_factory(args):
21 |     return DownloadCommand(args.model, args.cache_dir, args.force)
22 | 
23 | 
24 | class DownloadCommand(BaseTransformersCLICommand):
25 |     @staticmethod
26 |     def register_subcommand(parser: ArgumentParser):
27 |         download_parser = parser.add_parser("download")
28 |         download_parser.add_argument(
29 |             "--cache-dir", type=str, default=None, help="Path to location to store the models"
30 |         )
31 |         download_parser.add_argument(
32 |             "--force", action="store_true", help="Force the model to be download even if already in cache-dir"
33 |         )
34 |         download_parser.add_argument("model", type=str, help="Name of the model to download")
35 |         download_parser.set_defaults(func=download_command_factory)
36 | 
37 |     def __init__(self, model: str, cache: str, force: bool):
38 |         self._model = model
39 |         self._cache = cache
40 |         self._force = force
41 | 
42 |     def run(self):
43 |         from ..models.auto import AutoModel, AutoTokenizer
44 | 
45 |         AutoModel.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
46 |         AutoTokenizer.from_pretrained(self._model, cache_dir=self._cache, force_download=self._force)
47 | 


--------------------------------------------------------------------------------
/transformers/commands/transformers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from argparse import ArgumentParser
17 | 
18 | from .add_new_model import AddNewModelCommand
19 | from .add_new_model_like import AddNewModelLikeCommand
20 | from .convert import ConvertCommand
21 | from .download import DownloadCommand
22 | from .env import EnvironmentCommand
23 | from .lfs import LfsCommands
24 | from .pt_to_tf import PTtoTFCommand
25 | from .run import RunCommand
26 | from .serving import ServeCommand
27 | from .user import UserCommands
28 | 
29 | 
30 | def main():
31 |     parser = ArgumentParser("Transformers CLI tool", usage="transformers-cli <command> [<args>]")
32 |     commands_parser = parser.add_subparsers(help="transformers-cli command helpers")
33 | 
34 |     # Register commands
35 |     ConvertCommand.register_subcommand(commands_parser)
36 |     DownloadCommand.register_subcommand(commands_parser)
37 |     EnvironmentCommand.register_subcommand(commands_parser)
38 |     RunCommand.register_subcommand(commands_parser)
39 |     ServeCommand.register_subcommand(commands_parser)
40 |     UserCommands.register_subcommand(commands_parser)
41 |     AddNewModelCommand.register_subcommand(commands_parser)
42 |     AddNewModelLikeCommand.register_subcommand(commands_parser)
43 |     LfsCommands.register_subcommand(commands_parser)
44 |     PTtoTFCommand.register_subcommand(commands_parser)
45 | 
46 |     # Let's go
47 |     args = parser.parse_args()
48 | 
49 |     if not hasattr(args, "func"):
50 |         parser.print_help()
51 |         exit(1)
52 | 
53 |     # Run
54 |     service = args.func(args)
55 |     service.run()
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------
/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Convert Seq2Seq TF Hub checkpoint."""
16 | 
17 | 
18 | import argparse
19 | 
20 | from . import (
21 |     BertConfig,
22 |     BertGenerationConfig,
23 |     BertGenerationDecoder,
24 |     BertGenerationEncoder,
25 |     load_tf_weights_in_bert_generation,
26 |     logging,
27 | )
28 | 
29 | 
30 | logging.set_verbosity_info()
31 | 
32 | 
33 | def convert_tf_checkpoint_to_pytorch(tf_hub_path, pytorch_dump_path, is_encoder_named_decoder, vocab_size, is_encoder):
34 |     # Initialise PyTorch model
35 |     bert_config = BertConfig.from_pretrained(
36 |         "bert-large-cased",
37 |         vocab_size=vocab_size,
38 |         max_position_embeddings=512,
39 |         is_decoder=True,
40 |         add_cross_attention=True,
41 |     )
42 |     bert_config_dict = bert_config.to_dict()
43 |     del bert_config_dict["type_vocab_size"]
44 |     config = BertGenerationConfig(**bert_config_dict)
45 |     if is_encoder:
46 |         model = BertGenerationEncoder(config)
47 |     else:
48 |         model = BertGenerationDecoder(config)
49 |     print(f"Building PyTorch model from configuration: {config}")
50 | 
51 |     # Load weights from tf checkpoint
52 |     load_tf_weights_in_bert_generation(
53 |         model,
54 |         tf_hub_path,
55 |         model_class="bert",
56 |         is_encoder_named_decoder=is_encoder_named_decoder,
57 |         is_encoder=is_encoder,
58 |     )
59 | 
60 |     # Save pytorch-model
61 |     print(f"Save PyTorch model and config to {pytorch_dump_path}")
62 |     model.save_pretrained(pytorch_dump_path)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     parser = argparse.ArgumentParser()
67 |     # Required parameters
68 |     parser.add_argument(
69 |         "--tf_hub_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
70 |     )
71 |     parser.add_argument(
72 |         "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
73 |     )
74 |     parser.add_argument(
75 |         "--is_encoder_named_decoder",
76 |         action="store_true",
77 |         help="If decoder has to be renamed to encoder in PyTorch model.",
78 |     )
79 |     parser.add_argument("--is_encoder", action="store_true", help="If model is an encoder.")
80 |     parser.add_argument("--vocab_size", default=50358, type=int, help="Vocab size of model")
81 |     args = parser.parse_args()
82 |     convert_tf_checkpoint_to_pytorch(
83 |         args.tf_hub_path,
84 |         args.pytorch_dump_path,
85 |         args.is_encoder_named_decoder,
86 |         args.vocab_size,
87 |         is_encoder=args.is_encoder,
88 |     )
89 | 


--------------------------------------------------------------------------------
/transformers/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .data_collator import (
16 |     DataCollatorForLanguageModeling,
17 |     DataCollatorForPermutationLanguageModeling,
18 |     DataCollatorForSeq2Seq,
19 |     DataCollatorForSOP,
20 |     DataCollatorForTokenClassification,
21 |     DataCollatorForWholeWordMask,
22 |     DataCollatorWithPadding,
23 |     DefaultDataCollator,
24 |     default_data_collator,
25 | )
26 | from .metrics import glue_compute_metrics, xnli_compute_metrics
27 | from .processors import (
28 |     DataProcessor,
29 |     InputExample,
30 |     InputFeatures,
31 |     SingleSentenceClassificationProcessor,
32 |     SquadExample,
33 |     SquadFeatures,
34 |     SquadV1Processor,
35 |     SquadV2Processor,
36 |     glue_convert_examples_to_features,
37 |     glue_output_modes,
38 |     glue_processors,
39 |     glue_tasks_num_labels,
40 |     squad_convert_examples_to_features,
41 |     xnli_output_modes,
42 |     xnli_processors,
43 |     xnli_tasks_num_labels,
44 | )
45 | 


--------------------------------------------------------------------------------
/transformers/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .glue import GlueDataset, GlueDataTrainingArguments
16 | from .language_modeling import (
17 |     LineByLineTextDataset,
18 |     LineByLineWithRefDataset,
19 |     LineByLineWithSOPTextDataset,
20 |     TextDataset,
21 |     TextDatasetForNextSentencePrediction,
22 | )
23 | from .squad import SquadDataset, SquadDataTrainingArguments
24 | 


--------------------------------------------------------------------------------
/transformers/data/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | import warnings
14 | 
15 | from ...utils import is_sklearn_available, requires_backends
16 | 
17 | 
18 | if is_sklearn_available():
19 |     from scipy.stats import pearsonr, spearmanr
20 |     from sklearn.metrics import f1_score, matthews_corrcoef
21 | 
22 | 
23 | DEPRECATION_WARNING = (
24 |     "This metric will be removed from the library soon, metrics should be handled with the 🤗 Evaluate "
25 |     "library. You can have a look at this example script for pointers: "
26 |     "https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.py"
27 | )
28 | 
29 | 
30 | def simple_accuracy(preds, labels):
31 |     warnings.warn(DEPRECATION_WARNING, FutureWarning)
32 |     requires_backends(simple_accuracy, "sklearn")
33 |     return (preds == labels).mean()
34 | 
35 | 
36 | def acc_and_f1(preds, labels):
37 |     warnings.warn(DEPRECATION_WARNING, FutureWarning)
38 |     requires_backends(acc_and_f1, "sklearn")
39 |     acc = simple_accuracy(preds, labels)
40 |     f1 = f1_score(y_true=labels, y_pred=preds)
41 |     return {
42 |         "acc": acc,
43 |         "f1": f1,
44 |         "acc_and_f1": (acc + f1) / 2,
45 |     }
46 | 
47 | 
48 | def pearson_and_spearman(preds, labels):
49 |     warnings.warn(DEPRECATION_WARNING, FutureWarning)
50 |     requires_backends(pearson_and_spearman, "sklearn")
51 |     pearson_corr = pearsonr(preds, labels)[0]
52 |     spearman_corr = spearmanr(preds, labels)[0]
53 |     return {
54 |         "pearson": pearson_corr,
55 |         "spearmanr": spearman_corr,
56 |         "corr": (pearson_corr + spearman_corr) / 2,
57 |     }
58 | 
59 | 
60 | def glue_compute_metrics(task_name, preds, labels):
61 |     warnings.warn(DEPRECATION_WARNING, FutureWarning)
62 |     requires_backends(glue_compute_metrics, "sklearn")
63 |     assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
64 |     if task_name == "cola":
65 |         return {"mcc": matthews_corrcoef(labels, preds)}
66 |     elif task_name == "sst-2":
67 |         return {"acc": simple_accuracy(preds, labels)}
68 |     elif task_name == "mrpc":
69 |         return acc_and_f1(preds, labels)
70 |     elif task_name == "sts-b":
71 |         return pearson_and_spearman(preds, labels)
72 |     elif task_name == "qqp":
73 |         return acc_and_f1(preds, labels)
74 |     elif task_name == "mnli":
75 |         return {"mnli/acc": simple_accuracy(preds, labels)}
76 |     elif task_name == "mnli-mm":
77 |         return {"mnli-mm/acc": simple_accuracy(preds, labels)}
78 |     elif task_name == "qnli":
79 |         return {"acc": simple_accuracy(preds, labels)}
80 |     elif task_name == "rte":
81 |         return {"acc": simple_accuracy(preds, labels)}
82 |     elif task_name == "wnli":
83 |         return {"acc": simple_accuracy(preds, labels)}
84 |     elif task_name == "hans":
85 |         return {"acc": simple_accuracy(preds, labels)}
86 |     else:
87 |         raise KeyError(task_name)
88 | 
89 | 
90 | def xnli_compute_metrics(task_name, preds, labels):
91 |     warnings.warn(DEPRECATION_WARNING, FutureWarning)
92 |     requires_backends(xnli_compute_metrics, "sklearn")
93 |     assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
94 |     if task_name == "xnli":
95 |         return {"acc": simple_accuracy(preds, labels)}
96 |     else:
97 |         raise KeyError(task_name)
98 | 


--------------------------------------------------------------------------------
/transformers/data/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .glue import glue_convert_examples_to_features, glue_output_modes, glue_processors, glue_tasks_num_labels
16 | from .squad import SquadExample, SquadFeatures, SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
17 | from .utils import DataProcessor, InputExample, InputFeatures, SingleSentenceClassificationProcessor
18 | from .xnli import xnli_output_modes, xnli_processors, xnli_tasks_num_labels
19 | 


--------------------------------------------------------------------------------
/transformers/data/processors/xnli.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """ XNLI utils (dataset loading and evaluation)"""
17 | 
18 | 
19 | import os
20 | 
21 | from ...utils import logging
22 | from .utils import DataProcessor, InputExample
23 | 
24 | 
25 | logger = logging.get_logger(__name__)
26 | 
27 | 
28 | class XnliProcessor(DataProcessor):
29 |     """
30 |     Processor for the XNLI dataset. Adapted from
31 |     https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/run_classifier.py#L207
32 |     """
33 | 
34 |     def __init__(self, language, train_language=None):
35 |         self.language = language
36 |         self.train_language = train_language
37 | 
38 |     def get_train_examples(self, data_dir):
39 |         """See base class."""
40 |         lg = self.language if self.train_language is None else self.train_language
41 |         lines = self._read_tsv(os.path.join(data_dir, f"XNLI-MT-1.0/multinli/multinli.train.{lg}.tsv"))
42 |         examples = []
43 |         for i, line in enumerate(lines):
44 |             if i == 0:
45 |                 continue
46 |             guid = f"train-{i}"
47 |             text_a = line[0]
48 |             text_b = line[1]
49 |             label = "contradiction" if line[2] == "contradictory" else line[2]
50 |             if not isinstance(text_a, str):
51 |                 raise ValueError(f"Training input {text_a} is not a string")
52 |             if not isinstance(text_b, str):
53 |                 raise ValueError(f"Training input {text_b} is not a string")
54 |             if not isinstance(label, str):
55 |                 raise ValueError(f"Training label {label} is not a string")
56 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
57 |         return examples
58 | 
59 |     def get_test_examples(self, data_dir):
60 |         """See base class."""
61 |         lines = self._read_tsv(os.path.join(data_dir, "XNLI-1.0/xnli.test.tsv"))
62 |         examples = []
63 |         for i, line in enumerate(lines):
64 |             if i == 0:
65 |                 continue
66 |             language = line[0]
67 |             if language != self.language:
68 |                 continue
69 |             guid = f"test-{i}"
70 |             text_a = line[6]
71 |             text_b = line[7]
72 |             label = line[1]
73 |             if not isinstance(text_a, str):
74 |                 raise ValueError(f"Training input {text_a} is not a string")
75 |             if not isinstance(text_b, str):
76 |                 raise ValueError(f"Training input {text_b} is not a string")
77 |             if not isinstance(label, str):
78 |                 raise ValueError(f"Training label {label} is not a string")
79 |             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
80 |         return examples
81 | 
82 |     def get_labels(self):
83 |         """See base class."""
84 |         return ["contradiction", "entailment", "neutral"]
85 | 
86 | 
87 | xnli_processors = {
88 |     "xnli": XnliProcessor,
89 | }
90 | 
91 | xnli_output_modes = {
92 |     "xnli": "classification",
93 | }
94 | 
95 | xnli_tasks_num_labels = {
96 |     "xnli": 3,
97 | }
98 | 


--------------------------------------------------------------------------------
/transformers/data/test_generation_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import random
 16 | import unittest
 17 | 
 18 | import timeout_decorator
 19 | 
 20 | from ..testing_utils import require_torch
 21 | from ..utils import cached_property, is_torch_available
 22 | 
 23 | 
 24 | if is_torch_available():
 25 |     import torch
 26 | 
 27 |     from ..models.marian import MarianConfig, MarianMTModel
 28 | 
 29 | 
 30 | @require_torch
 31 | class GenerationUtilsTest(unittest.TestCase):
 32 |     @cached_property
 33 |     def config(self):
 34 |         config = MarianConfig.from_pretrained("sshleifer/tiny-marian-en-de")
 35 |         return config
 36 | 
 37 |     @cached_property
 38 |     def model(self):
 39 |         return MarianMTModel(self.config)
 40 | 
 41 |     def test_postprocess_next_token_scores(self):
 42 |         config = self.config
 43 |         model = self.model
 44 |         # Initialize an input id tensor with batch size 8 and sequence length 12
 45 |         input_ids = torch.arange(0, 96, 1).view((8, 12))
 46 |         eos = config.eos_token_id
 47 |         bad_words_ids_test_cases = [[[299]], [[23, 24], [54]], [[config.eos_token_id]], []]
 48 |         masked_scores = [
 49 |             [(0, 299), (1, 299), (2, 299), (3, 299), (4, 299), (5, 299), (6, 299), (7, 299)],
 50 |             [(1, 24), (0, 54), (1, 54), (2, 54), (3, 54), (4, 54), (5, 54), (6, 54), (7, 54)],
 51 |             [(0, eos), (1, eos), (2, eos), (3, eos), (4, eos), (5, eos), (6, eos), (7, eos)],
 52 |             [],
 53 |         ]
 54 | 
 55 |         for test_case_index, bad_words_ids in enumerate(bad_words_ids_test_cases):
 56 |             # Initialize a scores tensor with batch size 8 and vocabulary size 300
 57 |             scores = torch.rand((8, 300))
 58 |             output = model.postprocess_next_token_scores(
 59 |                 scores,
 60 |                 input_ids,
 61 |                 0,
 62 |                 bad_words_ids,
 63 |                 13,
 64 |                 15,
 65 |                 config.max_length,
 66 |                 config.eos_token_id,
 67 |                 config.repetition_penalty,
 68 |                 32,
 69 |                 5,
 70 |             )
 71 |             for masked_score in masked_scores[test_case_index]:
 72 |                 self.assertTrue(output[masked_score[0], masked_score[1]] == -float("inf"))
 73 | 
 74 |     @timeout_decorator.timeout(10)
 75 |     def test_postprocess_next_token_scores_large_bad_words_list(self):
 76 |         config = self.config
 77 |         model = self.model
 78 |         # Initialize an input id tensor with batch size 8 and sequence length 12
 79 |         input_ids = torch.arange(0, 96, 1).view((8, 12))
 80 | 
 81 |         bad_words_ids = []
 82 |         for _ in range(100):
 83 |             length_bad_word = random.randint(1, 4)
 84 |             bad_words_ids.append(random.sample(range(1, 300), length_bad_word))
 85 | 
 86 |         scores = torch.rand((8, 300))
 87 |         _ = model.postprocess_next_token_scores(
 88 |             scores,
 89 |             input_ids,
 90 |             0,
 91 |             bad_words_ids,
 92 |             13,
 93 |             15,
 94 |             config.max_length,
 95 |             config.eos_token_id,
 96 |             config.repetition_penalty,
 97 |             32,
 98 |             5,
 99 |         )
100 | 


--------------------------------------------------------------------------------
/transformers/dependency_versions_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 | 
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 | 
19 | 
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 | 
26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split()
27 | if sys.version_info < (3, 7):
28 |     pkgs_to_check_at_runtime.append("dataclasses")
29 | if sys.version_info < (3, 8):
30 |     pkgs_to_check_at_runtime.append("importlib_metadata")
31 | 
32 | for pkg in pkgs_to_check_at_runtime:
33 |     if pkg in deps:
34 |         if pkg == "tokenizers":
35 |             # must be loaded here, or else tqdm check may fail
36 |             from .utils import is_tokenizers_available
37 | 
38 |             if not is_tokenizers_available():
39 |                 continue  # not required, check version only if installed
40 | 
41 |         require_version_core(deps[pkg])
42 |     else:
43 |         raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
44 | 
45 | 
46 | def dep_version_check(pkg, hint=None):
47 |     require_version(deps[pkg], hint)
48 | 


--------------------------------------------------------------------------------
/transformers/dependency_versions_table.py:
--------------------------------------------------------------------------------
 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update:
 2 | # 1. modify the `_deps` dict in setup.py
 3 | # 2. run `make deps_table_update``
 4 | deps = {
 5 |     "Pillow": "Pillow",
 6 |     "accelerate": "accelerate>=0.17.0",
 7 |     "av": "av==9.2.0",
 8 |     "beautifulsoup4": "beautifulsoup4",
 9 |     "black": "black~=23.1",
10 |     "codecarbon": "codecarbon==1.2.0",
11 |     "cookiecutter": "cookiecutter==1.7.3",
12 |     "dataclasses": "dataclasses",
13 |     "datasets": "datasets!=2.5.0",
14 |     "decord": "decord==0.6.0",
15 |     "deepspeed": "deepspeed>=0.8.3",
16 |     "dill": "dill<0.3.5",
17 |     "evaluate": "evaluate>=0.2.0",
18 |     "fairscale": "fairscale>0.3",
19 |     "faiss-cpu": "faiss-cpu",
20 |     "fastapi": "fastapi",
21 |     "filelock": "filelock",
22 |     "flax": "flax>=0.4.1,<=0.6.9",
23 |     "ftfy": "ftfy",
24 |     "fugashi": "fugashi>=1.0",
25 |     "GitPython": "GitPython<3.1.19",
26 |     "hf-doc-builder": "hf-doc-builder>=0.3.0",
27 |     "huggingface-hub": "huggingface-hub>=0.11.0,<1.0",
28 |     "importlib_metadata": "importlib_metadata",
29 |     "ipadic": "ipadic>=1.0.0,<2.0",
30 |     "isort": "isort>=5.5.4",
31 |     "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
32 |     "jaxlib": "jaxlib>=0.1.65,<=0.3.6",
33 |     "jieba": "jieba",
34 |     "kenlm": "kenlm",
35 |     "keras-nlp": "keras-nlp>=0.3.1",
36 |     "librosa": "librosa",
37 |     "nltk": "nltk",
38 |     "natten": "natten>=0.14.6",
39 |     "numba": "numba<0.57.0",
40 |     "numpy": "numpy>=1.17",
41 |     "onnxconverter-common": "onnxconverter-common",
42 |     "onnxruntime-tools": "onnxruntime-tools>=1.4.2",
43 |     "onnxruntime": "onnxruntime>=1.4.0",
44 |     "optuna": "optuna",
45 |     "optax": "optax>=0.0.8,<=0.1.4",
46 |     "packaging": "packaging>=20.0",
47 |     "parameterized": "parameterized",
48 |     "phonemizer": "phonemizer",
49 |     "protobuf": "protobuf<=3.20.2",
50 |     "psutil": "psutil",
51 |     "pyyaml": "pyyaml>=5.1",
52 |     "pydantic": "pydantic",
53 |     "pytest": "pytest",
54 |     "pytest-timeout": "pytest-timeout",
55 |     "pytest-xdist": "pytest-xdist",
56 |     "python": "python>=3.7.0",
57 |     "ray[tune]": "ray[tune]",
58 |     "regex": "regex!=2019.12.17",
59 |     "requests": "requests",
60 |     "rhoknp": "rhoknp>=1.1.0",
61 |     "rjieba": "rjieba",
62 |     "rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
63 |     "ruff": "ruff>=0.0.241,<=0.0.259",
64 |     "sacrebleu": "sacrebleu>=1.4.12,<2.0.0",
65 |     "sacremoses": "sacremoses",
66 |     "safetensors": "safetensors>=0.2.1",
67 |     "sagemaker": "sagemaker>=2.31.0",
68 |     "scikit-learn": "scikit-learn",
69 |     "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
70 |     "sigopt": "sigopt",
71 |     "starlette": "starlette",
72 |     "sudachipy": "sudachipy>=0.6.6",
73 |     "sudachidict_core": "sudachidict_core>=20220729",
74 |     "tensorflow-cpu": "tensorflow-cpu>=2.4,<2.13",
75 |     "tensorflow": "tensorflow>=2.4,<2.13",
76 |     "tensorflow-text": "tensorflow-text<2.13",
77 |     "tf2onnx": "tf2onnx",
78 |     "timeout-decorator": "timeout-decorator",
79 |     "timm": "timm",
80 |     "tokenizers": "tokenizers>=0.11.1,!=0.11.3,<0.14",
81 |     "torch": "torch>=1.9,!=1.12.0",
82 |     "torchaudio": "torchaudio",
83 |     "torchvision": "torchvision",
84 |     "pyctcdecode": "pyctcdecode>=0.4.0",
85 |     "tqdm": "tqdm>=4.27",
86 |     "unidic": "unidic>=1.0.2",
87 |     "unidic_lite": "unidic_lite>=1.0.7",
88 |     "urllib3": "urllib3<2.0.0",
89 |     "uvicorn": "uvicorn",
90 | }
91 | 


--------------------------------------------------------------------------------
/transformers/file_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | File utilities: utilities related to download and cache models
 16 | 
 17 | This module should not be update anymore and is only left for backward compatibility.
 18 | """
 19 | 
 20 | from . import __version__
 21 | 
 22 | # Backward compatibility imports, to make sure all those objects can be found in file_utils
 23 | from .utils import (
 24 |     CLOUDFRONT_DISTRIB_PREFIX,
 25 |     CONFIG_NAME,
 26 |     DISABLE_TELEMETRY,
 27 |     DUMMY_INPUTS,
 28 |     DUMMY_MASK,
 29 |     ENV_VARS_TRUE_AND_AUTO_VALUES,
 30 |     ENV_VARS_TRUE_VALUES,
 31 |     FEATURE_EXTRACTOR_NAME,
 32 |     FLAX_WEIGHTS_NAME,
 33 |     HF_MODULES_CACHE,
 34 |     HUGGINGFACE_CO_PREFIX,
 35 |     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
 36 |     MODEL_CARD_NAME,
 37 |     MULTIPLE_CHOICE_DUMMY_INPUTS,
 38 |     PYTORCH_PRETRAINED_BERT_CACHE,
 39 |     PYTORCH_TRANSFORMERS_CACHE,
 40 |     S3_BUCKET_PREFIX,
 41 |     SENTENCEPIECE_UNDERLINE,
 42 |     SPIECE_UNDERLINE,
 43 |     TF2_WEIGHTS_NAME,
 44 |     TF_WEIGHTS_NAME,
 45 |     TORCH_FX_REQUIRED_VERSION,
 46 |     TRANSFORMERS_CACHE,
 47 |     TRANSFORMERS_DYNAMIC_MODULE_NAME,
 48 |     USE_JAX,
 49 |     USE_TF,
 50 |     USE_TORCH,
 51 |     WEIGHTS_INDEX_NAME,
 52 |     WEIGHTS_NAME,
 53 |     ContextManagers,
 54 |     DummyObject,
 55 |     EntryNotFoundError,
 56 |     ExplicitEnum,
 57 |     ModelOutput,
 58 |     PaddingStrategy,
 59 |     PushToHubMixin,
 60 |     RepositoryNotFoundError,
 61 |     RevisionNotFoundError,
 62 |     TensorType,
 63 |     _LazyModule,
 64 |     add_code_sample_docstrings,
 65 |     add_end_docstrings,
 66 |     add_start_docstrings,
 67 |     add_start_docstrings_to_model_forward,
 68 |     cached_property,
 69 |     copy_func,
 70 |     default_cache_path,
 71 |     define_sagemaker_information,
 72 |     get_cached_models,
 73 |     get_file_from_repo,
 74 |     get_full_repo_name,
 75 |     has_file,
 76 |     http_user_agent,
 77 |     is_apex_available,
 78 |     is_bs4_available,
 79 |     is_coloredlogs_available,
 80 |     is_datasets_available,
 81 |     is_detectron2_available,
 82 |     is_faiss_available,
 83 |     is_flax_available,
 84 |     is_ftfy_available,
 85 |     is_in_notebook,
 86 |     is_ipex_available,
 87 |     is_librosa_available,
 88 |     is_offline_mode,
 89 |     is_onnx_available,
 90 |     is_pandas_available,
 91 |     is_phonemizer_available,
 92 |     is_protobuf_available,
 93 |     is_psutil_available,
 94 |     is_py3nvml_available,
 95 |     is_pyctcdecode_available,
 96 |     is_pytesseract_available,
 97 |     is_pytorch_quantization_available,
 98 |     is_rjieba_available,
 99 |     is_sagemaker_dp_enabled,
100 |     is_sagemaker_mp_enabled,
101 |     is_scipy_available,
102 |     is_sentencepiece_available,
103 |     is_sklearn_available,
104 |     is_soundfile_availble,
105 |     is_spacy_available,
106 |     is_speech_available,
107 |     is_tensor,
108 |     is_tensorflow_probability_available,
109 |     is_tf2onnx_available,
110 |     is_tf_available,
111 |     is_timm_available,
112 |     is_tokenizers_available,
113 |     is_torch_available,
114 |     is_torch_bf16_available,
115 |     is_torch_cuda_available,
116 |     is_torch_fx_available,
117 |     is_torch_fx_proxy,
118 |     is_torch_tf32_available,
119 |     is_torch_tpu_available,
120 |     is_torchaudio_available,
121 |     is_training_run_on_sagemaker,
122 |     is_vision_available,
123 |     replace_return_docstrings,
124 |     requires_backends,
125 |     to_numpy,
126 |     to_py_obj,
127 |     torch_only_method,
128 |     torch_version,
129 | )
130 | 


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/beam_constraints.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/beam_constraints.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/beam_search.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/beam_search.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/configuration_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/configuration_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/logits_process.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/logits_process.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/stopping_criteria.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/stopping_criteria.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/generation/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/generation_flax_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Google AI Flax Team Authors, and The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import warnings
18 | 
19 | from .generation import FlaxGenerationMixin
20 | 
21 | 
22 | class FlaxGenerationMixin(FlaxGenerationMixin):
23 |     # warning at import time
24 |     warnings.warn(
25 |         "Importing `FlaxGenerationMixin` from `src/transformers/generation_flax_utils.py` is deprecated and will "
26 |         "be removed in Transformers v5. Import as `from transformers import FlaxGenerationMixin` instead.",
27 |         FutureWarning,
28 |     )
29 | 


--------------------------------------------------------------------------------
/transformers/generation_tf_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import warnings
18 | 
19 | from .generation import TFGenerationMixin
20 | 
21 | 
22 | class TFGenerationMixin(TFGenerationMixin):
23 |     # warning at import time
24 |     warnings.warn(
25 |         "Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will "
26 |         "be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.",
27 |         FutureWarning,
28 |     )
29 | 


--------------------------------------------------------------------------------
/transformers/generation_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import warnings
18 | 
19 | from .generation import GenerationMixin
20 | 
21 | 
22 | class GenerationMixin(GenerationMixin):
23 |     # warning at import time
24 |     warnings.warn(
25 |         "Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will "
26 |         "be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.",
27 |         FutureWarning,
28 |     )
29 | 


--------------------------------------------------------------------------------
/transformers/models/auto/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/auto/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/auto/__pycache__/auto_factory.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/auto/__pycache__/auto_factory.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/auto/__pycache__/configuration_auto.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/auto/__pycache__/configuration_auto.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/auto/__pycache__/modeling_auto.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/auto/__pycache__/modeling_auto.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clip/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clip/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clip/__pycache__/tokenization_clip.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clip/__pycache__/tokenization_clip.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clip/__pycache__/tokenization_clip_fast.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clip/__pycache__/tokenization_clip_fast.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clip/feature_extraction_clip.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Feature extractor class for CLIP."""
16 | 
17 | import warnings
18 | 
19 | from ...utils import logging
20 | from .image_processing_clip import CLIPImageProcessor
21 | 
22 | 
23 | logger = logging.get_logger(__name__)
24 | 
25 | 
26 | class CLIPFeatureExtractor(CLIPImageProcessor):
27 |     def __init__(self, *args, **kwargs) -> None:
28 |         warnings.warn(
29 |             "The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please"
30 |             " use CLIPImageProcessor instead.",
31 |             FutureWarning,
32 |         )
33 |         super().__init__(*args, **kwargs)
34 | 


--------------------------------------------------------------------------------
/transformers/models/clipseg/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import TYPE_CHECKING
15 | 
16 | from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
17 | 
18 | 
19 | _import_structure = {
20 |     "configuration_clipseg": [
21 |         "CLIPSEG_PRETRAINED_CONFIG_ARCHIVE_MAP",
22 |         "CLIPSegConfig",
23 |         "CLIPSegTextConfig",
24 |         "CLIPSegVisionConfig",
25 |     ],
26 |     "processing_clipseg": ["CLIPSegProcessor"],
27 | }
28 | 
29 | try:
30 |     if not is_torch_available():
31 |         raise OptionalDependencyNotAvailable()
32 | except OptionalDependencyNotAvailable:
33 |     pass
34 | else:
35 |     _import_structure["modeling_clipseg"] = [
36 |         "CLIPSEG_PRETRAINED_MODEL_ARCHIVE_LIST",
37 |         "CLIPSegModel",
38 |         "CLIPSegPreTrainedModel",
39 |         "CLIPSegTextModel",
40 |         "CLIPSegVisionModel",
41 |         "CLIPSegForImageSegmentation",
42 |     ]
43 | 
44 | if TYPE_CHECKING:
45 |     from .configuration_clipseg import (
46 |         CLIPSEG_PRETRAINED_CONFIG_ARCHIVE_MAP,
47 |         CLIPSegConfig,
48 |         CLIPSegTextConfig,
49 |         CLIPSegVisionConfig,
50 |     )
51 |     from .processing_clipseg import CLIPSegProcessor
52 | 
53 |     try:
54 |         if not is_torch_available():
55 |             raise OptionalDependencyNotAvailable()
56 |     except OptionalDependencyNotAvailable:
57 |         pass
58 |     else:
59 |         from .modeling_clipseg import (
60 |             CLIPSEG_PRETRAINED_MODEL_ARCHIVE_LIST,
61 |             CLIPSegForImageSegmentation,
62 |             CLIPSegModel,
63 |             CLIPSegPreTrainedModel,
64 |             CLIPSegTextModel,
65 |             CLIPSegVisionModel,
66 |         )
67 | 
68 | else:
69 |     import sys
70 | 
71 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
72 | 


--------------------------------------------------------------------------------
/transformers/models/clipseg/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clipseg/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clipseg/__pycache__/configuration_clipseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clipseg/__pycache__/configuration_clipseg.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clipseg/__pycache__/modeling_clipseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clipseg/__pycache__/modeling_clipseg.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/clipseg/__pycache__/processing_clipseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/clipseg/__pycache__/processing_clipseg.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/vit/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from typing import TYPE_CHECKING
 15 | 
 16 | from ...utils import (
 17 |     OptionalDependencyNotAvailable,
 18 |     _LazyModule,
 19 |     is_flax_available,
 20 |     is_tf_available,
 21 |     is_torch_available,
 22 |     is_vision_available,
 23 | )
 24 | 
 25 | 
 26 | _import_structure = {"configuration_vit": ["VIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTConfig", "ViTOnnxConfig"]}
 27 | 
 28 | try:
 29 |     if not is_vision_available():
 30 |         raise OptionalDependencyNotAvailable()
 31 | except OptionalDependencyNotAvailable:
 32 |     pass
 33 | else:
 34 |     _import_structure["feature_extraction_vit"] = ["ViTFeatureExtractor"]
 35 |     _import_structure["image_processing_vit"] = ["ViTImageProcessor"]
 36 | 
 37 | try:
 38 |     if not is_torch_available():
 39 |         raise OptionalDependencyNotAvailable()
 40 | except OptionalDependencyNotAvailable:
 41 |     pass
 42 | else:
 43 |     _import_structure["modeling_vit"] = [
 44 |         "VIT_PRETRAINED_MODEL_ARCHIVE_LIST",
 45 |         "ViTForImageClassification",
 46 |         "ViTForMaskedImageModeling",
 47 |         "ViTModel",
 48 |         "ViTPreTrainedModel",
 49 |     ]
 50 | 
 51 | try:
 52 |     if not is_tf_available():
 53 |         raise OptionalDependencyNotAvailable()
 54 | except OptionalDependencyNotAvailable:
 55 |     pass
 56 | else:
 57 |     _import_structure["modeling_tf_vit"] = [
 58 |         "TFViTForImageClassification",
 59 |         "TFViTModel",
 60 |         "TFViTPreTrainedModel",
 61 |     ]
 62 | 
 63 | try:
 64 |     if not is_flax_available():
 65 |         raise OptionalDependencyNotAvailable()
 66 | except OptionalDependencyNotAvailable:
 67 |     pass
 68 | else:
 69 |     _import_structure["modeling_flax_vit"] = [
 70 |         "FlaxViTForImageClassification",
 71 |         "FlaxViTModel",
 72 |         "FlaxViTPreTrainedModel",
 73 |     ]
 74 | 
 75 | if TYPE_CHECKING:
 76 |     from .configuration_vit import VIT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTConfig, ViTOnnxConfig
 77 | 
 78 |     try:
 79 |         if not is_vision_available():
 80 |             raise OptionalDependencyNotAvailable()
 81 |     except OptionalDependencyNotAvailable:
 82 |         pass
 83 |     else:
 84 |         from .feature_extraction_vit import ViTFeatureExtractor
 85 |         from .image_processing_vit import ViTImageProcessor
 86 | 
 87 |     try:
 88 |         if not is_torch_available():
 89 |             raise OptionalDependencyNotAvailable()
 90 |     except OptionalDependencyNotAvailable:
 91 |         pass
 92 |     else:
 93 |         from .modeling_vit import (
 94 |             VIT_PRETRAINED_MODEL_ARCHIVE_LIST,
 95 |             ViTForImageClassification,
 96 |             ViTForMaskedImageModeling,
 97 |             ViTModel,
 98 |             ViTPreTrainedModel,
 99 |         )
100 | 
101 |     try:
102 |         if not is_tf_available():
103 |             raise OptionalDependencyNotAvailable()
104 |     except OptionalDependencyNotAvailable:
105 |         pass
106 |     else:
107 |         from .modeling_tf_vit import TFViTForImageClassification, TFViTModel, TFViTPreTrainedModel
108 | 
109 |     try:
110 |         if not is_flax_available():
111 |             raise OptionalDependencyNotAvailable()
112 |     except OptionalDependencyNotAvailable:
113 |         pass
114 |     else:
115 |         from .modeling_flax_vit import FlaxViTForImageClassification, FlaxViTModel, FlaxViTPreTrainedModel
116 | 
117 | 
118 | else:
119 |     import sys
120 | 
121 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
122 | 


--------------------------------------------------------------------------------
/transformers/models/vit/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/vit/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/vit/__pycache__/image_processing_vit.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/models/vit/__pycache__/image_processing_vit.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/models/vit/feature_extraction_vit.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Feature extractor class for ViT."""
16 | 
17 | import warnings
18 | 
19 | from ...utils import logging
20 | from .image_processing_vit import ViTImageProcessor
21 | 
22 | 
23 | logger = logging.get_logger(__name__)
24 | 
25 | 
26 | class ViTFeatureExtractor(ViTImageProcessor):
27 |     def __init__(self, *args, **kwargs) -> None:
28 |         warnings.warn(
29 |             "The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please"
30 |             " use ViTImageProcessor instead.",
31 |             FutureWarning,
32 |         )
33 |         super().__init__(*args, **kwargs)
34 | 


--------------------------------------------------------------------------------
/transformers/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ..utils import _LazyModule
18 | 
19 | 
20 | _import_structure = {
21 |     "config": [
22 |         "EXTERNAL_DATA_FORMAT_SIZE_LIMIT",
23 |         "OnnxConfig",
24 |         "OnnxConfigWithPast",
25 |         "OnnxSeq2SeqConfigWithPast",
26 |         "PatchingSpec",
27 |     ],
28 |     "convert": ["export", "validate_model_outputs"],
29 |     "features": ["FeaturesManager"],
30 |     "utils": ["ParameterFormat", "compute_serialized_parameters_size"],
31 | }
32 | 
33 | 
34 | if TYPE_CHECKING:
35 |     from .config import (
36 |         EXTERNAL_DATA_FORMAT_SIZE_LIMIT,
37 |         OnnxConfig,
38 |         OnnxConfigWithPast,
39 |         OnnxSeq2SeqConfigWithPast,
40 |         PatchingSpec,
41 |     )
42 |     from .convert import export, validate_model_outputs
43 |     from .features import FeaturesManager
44 |     from .utils import ParameterFormat, compute_serialized_parameters_size
45 | 
46 | else:
47 |     import sys
48 | 
49 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
50 | 


--------------------------------------------------------------------------------
/transformers/onnx/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from ctypes import c_float, sizeof
 16 | from enum import Enum
 17 | from typing import TYPE_CHECKING, Optional, Union
 18 | 
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer  # tests_ignore
 22 | 
 23 | 
 24 | class ParameterFormat(Enum):
 25 |     Float = c_float
 26 | 
 27 |     @property
 28 |     def size(self) -> int:
 29 |         """
 30 |         Number of byte required for this data type
 31 | 
 32 |         Returns:
 33 |             Integer > 0
 34 |         """
 35 |         return sizeof(self.value)
 36 | 
 37 | 
 38 | def compute_effective_axis_dimension(dimension: int, fixed_dimension: int, num_token_to_add: int = 0) -> int:
 39 |     """
 40 | 
 41 |     Args:
 42 |         dimension:
 43 |         fixed_dimension:
 44 |         num_token_to_add:
 45 | 
 46 |     Returns:
 47 | 
 48 |     """
 49 |     # < 0 is possible if using a dynamic axis
 50 |     if dimension <= 0:
 51 |         dimension = fixed_dimension
 52 | 
 53 |     dimension -= num_token_to_add
 54 |     return dimension
 55 | 
 56 | 
 57 | def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterFormat) -> int:
 58 |     """
 59 |     Compute the size taken by all the parameters in the given the storage format when serializing the model
 60 | 
 61 |     Args:
 62 |         num_parameters: Number of parameters to be saved
 63 |         dtype: The data format each parameter will be saved
 64 | 
 65 |     Returns:
 66 |         Size (in byte) taken to save all the parameters
 67 |     """
 68 |     return num_parameters * dtype.size
 69 | 
 70 | 
 71 | def get_preprocessor(model_name: str) -> Optional[Union["AutoTokenizer", "AutoFeatureExtractor", "AutoProcessor"]]:
 72 |     """
 73 |     Gets a preprocessor (tokenizer, feature extractor or processor) that is available for `model_name`.
 74 | 
 75 |     Args:
 76 |         model_name (`str`): Name of the model for which a preprocessor are loaded.
 77 | 
 78 |     Returns:
 79 |         `Optional[Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]]`:
 80 |             If a processor is found, it is returned. Otherwise, if a tokenizer or a feature extractor exists, it is
 81 |             returned. If both a tokenizer and a feature extractor exist, an error is raised. The function returns
 82 |             `None` if no preprocessor is found.
 83 |     """
 84 |     # Avoid circular imports by only importing this here.
 85 |     from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer  # tests_ignore
 86 | 
 87 |     try:
 88 |         return AutoProcessor.from_pretrained(model_name)
 89 |     except (ValueError, OSError, KeyError):
 90 |         tokenizer, feature_extractor = None, None
 91 |         try:
 92 |             tokenizer = AutoTokenizer.from_pretrained(model_name)
 93 |         except (OSError, KeyError):
 94 |             pass
 95 |         try:
 96 |             feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
 97 |         except (OSError, KeyError):
 98 |             pass
 99 | 
100 |         if tokenizer is not None and feature_extractor is not None:
101 |             raise ValueError(
102 |                 f"Couldn't auto-detect preprocessor for {model_name}. Found both a tokenizer and a feature extractor."
103 |             )
104 |         elif tokenizer is None and feature_extractor is None:
105 |             return None
106 |         elif tokenizer is not None:
107 |             return tokenizer
108 |         else:
109 |             return feature_extractor
110 | 


--------------------------------------------------------------------------------
/transformers/sagemaker/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .trainer_sm import SageMakerTrainer
16 | from .training_args_sm import SageMakerTrainingArguments, is_sagemaker_dp_enabled
17 | 


--------------------------------------------------------------------------------
/transformers/sagemaker/trainer_sm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import warnings
15 | 
16 | from ..trainer import Trainer
17 | from ..utils import logging
18 | 
19 | 
20 | logger = logging.get_logger(__name__)
21 | 
22 | 
23 | class SageMakerTrainer(Trainer):
24 |     def __init__(self, args=None, **kwargs):
25 |         warnings.warn(
26 |             "`SageMakerTrainer` is deprecated and will be removed in v5 of Transformers. You can use `Trainer` "
27 |             "instead.",
28 |             FutureWarning,
29 |         )
30 |         super().__init__(args=args, **kwargs)
31 | 


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/constants.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/constants.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/doc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/doc.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_flax_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_flax_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_keras_nlp_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_keras_nlp_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_sentencepiece_and_tokenizers_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_sentencepiece_and_tokenizers_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_speech_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_speech_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_tensorflow_text_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_tensorflow_text_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_tf_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_tf_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/dummy_tokenizers_objects.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/dummy_tokenizers_objects.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/generic.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/generic.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/hub.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/hub.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/import_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/import_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/logging.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/logging.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/quantization_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/quantization_config.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/__pycache__/versions.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenRobotLab/OV_PARTS/939b1a56e9d42ff8b631b0aa46ee757e06d444af/transformers/utils/__pycache__/versions.cpython-38.pyc


--------------------------------------------------------------------------------
/transformers/utils/constants.py:
--------------------------------------------------------------------------------
1 | IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
2 | IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]
3 | IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5]
4 | IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5]
5 | OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
6 | OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
7 | 


--------------------------------------------------------------------------------
/transformers/utils/dummy_detectron2_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import requires_backends
 3 | 
 4 | 
 5 | LAYOUTLM_V2_PRETRAINED_MODEL_ARCHIVE_LIST = None
 6 | 
 7 | 
 8 | class LayoutLMv2Model:
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["detectron2"])
11 | 
12 |     @classmethod
13 |     def from_pretrained(cls, *args, **kwargs):
14 |         requires_backends(cls, ["detectron2"])
15 | 


--------------------------------------------------------------------------------
/transformers/utils/dummy_keras_nlp_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class TFGPT2Tokenizer(metaclass=DummyObject):
 6 |     _backends = ["keras_nlp"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["keras_nlp"])
10 | 


--------------------------------------------------------------------------------
/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | SLOW_TO_FAST_CONVERTERS = None
 6 | 
 7 | 
 8 | def convert_slow_tokenizer(*args, **kwargs):
 9 |     requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"])
10 | 


--------------------------------------------------------------------------------
/transformers/utils/dummy_speech_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class ASTFeatureExtractor(metaclass=DummyObject):
 6 |     _backends = ["speech"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["speech"])
10 | 
11 | 
12 | class MCTCTFeatureExtractor(metaclass=DummyObject):
13 |     _backends = ["speech"]
14 | 
15 |     def __init__(self, *args, **kwargs):
16 |         requires_backends(self, ["speech"])
17 | 
18 | 
19 | class Speech2TextFeatureExtractor(metaclass=DummyObject):
20 |     _backends = ["speech"]
21 | 
22 |     def __init__(self, *args, **kwargs):
23 |         requires_backends(self, ["speech"])
24 | 
25 | 
26 | class SpeechT5FeatureExtractor(metaclass=DummyObject):
27 |     _backends = ["speech"]
28 | 
29 |     def __init__(self, *args, **kwargs):
30 |         requires_backends(self, ["speech"])
31 | 
32 | 
33 | class TvltFeatureExtractor(metaclass=DummyObject):
34 |     _backends = ["speech"]
35 | 
36 |     def __init__(self, *args, **kwargs):
37 |         requires_backends(self, ["speech"])
38 | 


--------------------------------------------------------------------------------
/transformers/utils/dummy_tensorflow_text_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | from ..utils import DummyObject, requires_backends
 3 | 
 4 | 
 5 | class TFBertTokenizer(metaclass=DummyObject):
 6 |     _backends = ["tensorflow_text"]
 7 | 
 8 |     def __init__(self, *args, **kwargs):
 9 |         requires_backends(self, ["tensorflow_text"])
10 | 


--------------------------------------------------------------------------------
/transformers/utils/model_parallel_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from math import ceil
17 | 
18 | 
19 | def assert_device_map(device_map, num_blocks):
20 |     blocks = list(range(0, num_blocks))
21 | 
22 |     device_map_blocks = [item for sublist in list(device_map.values()) for item in sublist]
23 | 
24 |     # Duplicate check
25 |     duplicate_blocks = []
26 |     for i in device_map_blocks:
27 |         if device_map_blocks.count(i) > 1 and i not in duplicate_blocks:
28 |             duplicate_blocks.append(i)
29 |     # Missing blocks
30 |     missing_blocks = [i for i in blocks if i not in device_map_blocks]
31 |     extra_blocks = [i for i in device_map_blocks if i not in blocks]
32 | 
33 |     if len(duplicate_blocks) != 0:
34 |         raise ValueError(
35 |             "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device."
36 |             " These attention blocks were specified more than once: " + str(duplicate_blocks)
37 |         )
38 |     if len(missing_blocks) != 0:
39 |         raise ValueError(
40 |             "There are attention blocks for this model that are not specified in the device_map. Add these attention "
41 |             "blocks to a device on the device_map: " + str(missing_blocks)
42 |         )
43 |     if len(extra_blocks) != 0:
44 |         raise ValueError(
45 |             "The device_map contains more attention blocks than this model has. Remove these from the device_map:"
46 |             + str(extra_blocks)
47 |         )
48 | 
49 | 
50 | def get_device_map(n_layers, devices):
51 |     """Returns a dictionary of layers distributed evenly across all devices."""
52 |     layers = list(range(n_layers))
53 |     n_blocks = int(ceil(n_layers / len(devices)))
54 |     layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)]
55 | 
56 |     return dict(zip(devices, layers_list))
57 | 


--------------------------------------------------------------------------------