├── .gitignore ├── MUSK.png ├── README.md ├── assets └── lungaca1014.jpeg ├── benchmarks ├── .gitignore ├── __init__.py ├── clip_benchmark │ ├── __init__.py │ ├── cli.py │ ├── datasets │ │ ├── __init__.py │ │ ├── builder.py │ │ └── histopathology_datasets.py │ ├── metrics │ │ ├── __init__.py │ │ ├── image_caption_selection.py │ │ ├── image_retrieval.py │ │ ├── linear_probe.py │ │ ├── zeroshot_classification.py │ │ └── zeroshot_retrieval.py │ ├── model_collection.py │ ├── models │ │ ├── __init__.py │ │ ├── japanese_clip.py │ │ ├── open_clip.py │ │ └── torchscale │ │ │ ├── __init__.py │ │ │ ├── architecture │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ ├── encoder_decoder.py │ │ │ ├── retnet.py │ │ │ └── utils.py │ │ │ ├── component │ │ │ ├── __init__.py │ │ │ ├── dilated_attention.py │ │ │ ├── droppath.py │ │ │ ├── embedding.py │ │ │ ├── feedforward_network.py │ │ │ ├── flash_attention.py │ │ │ ├── gate_linear_unit.py │ │ │ ├── multihead_attention.py │ │ │ ├── multiscale_retention.py │ │ │ ├── multiway_network.py │ │ │ ├── relative_position_bias.py │ │ │ ├── rms_norm.py │ │ │ ├── utils.py │ │ │ ├── xmoe │ │ │ │ ├── __init__.py │ │ │ │ ├── global_groups.py │ │ │ │ ├── moe_layer.py │ │ │ │ └── routing.py │ │ │ └── xpos_relative_position.py │ │ │ └── model │ │ │ ├── BEiT3.py │ │ │ ├── LongNet.py │ │ │ └── __init__.py │ └── webdataset_builder.py ├── demo.ipynb └── models.txt ├── demo.ipynb ├── musk ├── __init__.py ├── modeling.py ├── models │ └── tokenizer.spm ├── torchscale │ ├── __init__.py │ ├── architecture │ │ ├── __init__.py │ │ ├── config.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── encoder_decoder.py │ │ ├── retnet.py │ │ └── utils.py │ ├── component │ │ ├── __init__.py │ │ ├── dilated_attention.py │ │ ├── droppath.py │ │ ├── embedding.py │ │ ├── feedforward_network.py │ │ ├── flash_attention.py │ │ ├── gate_linear_unit.py │ │ ├── multihead_attention.py │ │ ├── multiscale_retention.py │ │ ├── multiway_network.py │ │ ├── relative_position_bias.py │ │ ├── rms_norm.py │ │ ├── utils.py │ │ ├── xmoe │ │ │ ├── __init__.py │ │ │ ├── global_groups.py │ │ │ ├── moe_layer.py │ │ │ └── routing.py │ │ └── xpos_relative_position.py │ └── model │ │ ├── BEiT3.py │ │ ├── LongNet.py │ │ └── __init__.py └── utils.py ├── pyproject.toml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/.gitignore -------------------------------------------------------------------------------- /MUSK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/MUSK.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/README.md -------------------------------------------------------------------------------- /assets/lungaca1014.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/assets/lungaca1014.jpeg -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/.gitignore -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/cli.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/datasets/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/datasets/builder.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/datasets/histopathology_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/datasets/histopathology_datasets.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/image_caption_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/image_caption_selection.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/image_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/image_retrieval.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/linear_probe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/linear_probe.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/zeroshot_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/zeroshot_classification.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/metrics/zeroshot_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/zeroshot_retrieval.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/model_collection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/model_collection.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/japanese_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/japanese_clip.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/open_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/open_clip.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/config.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/decoder.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/encoder.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/encoder_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/encoder_decoder.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/retnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/retnet.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/architecture/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/utils.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/dilated_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/dilated_attention.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/droppath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/droppath.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/embedding.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/feedforward_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/feedforward_network.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/flash_attention.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/gate_linear_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/gate_linear_unit.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/multihead_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multihead_attention.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/multiscale_retention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multiscale_retention.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/multiway_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multiway_network.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/relative_position_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/relative_position_bias.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/rms_norm.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/utils.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/xmoe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/xmoe/global_groups.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/global_groups.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/xmoe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/moe_layer.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/xmoe/routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/routing.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/component/xpos_relative_position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xpos_relative_position.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/model/BEiT3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/BEiT3.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/model/LongNet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/LongNet.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/models/torchscale/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/__init__.py -------------------------------------------------------------------------------- /benchmarks/clip_benchmark/webdataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/webdataset_builder.py -------------------------------------------------------------------------------- /benchmarks/demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/demo.ipynb -------------------------------------------------------------------------------- /benchmarks/models.txt: -------------------------------------------------------------------------------- 1 | musk_large_patch16_384,hf_hub:xiangjx/musk -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/demo.ipynb -------------------------------------------------------------------------------- /musk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/__init__.py -------------------------------------------------------------------------------- /musk/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/modeling.py -------------------------------------------------------------------------------- /musk/models/tokenizer.spm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/models/tokenizer.spm -------------------------------------------------------------------------------- /musk/torchscale/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/__init__.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/__init__.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/config.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/decoder.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/encoder.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/encoder_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/encoder_decoder.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/retnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/retnet.py -------------------------------------------------------------------------------- /musk/torchscale/architecture/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/utils.py -------------------------------------------------------------------------------- /musk/torchscale/component/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/__init__.py -------------------------------------------------------------------------------- /musk/torchscale/component/dilated_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/dilated_attention.py -------------------------------------------------------------------------------- /musk/torchscale/component/droppath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/droppath.py -------------------------------------------------------------------------------- /musk/torchscale/component/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/embedding.py -------------------------------------------------------------------------------- /musk/torchscale/component/feedforward_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/feedforward_network.py -------------------------------------------------------------------------------- /musk/torchscale/component/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/flash_attention.py -------------------------------------------------------------------------------- /musk/torchscale/component/gate_linear_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/gate_linear_unit.py -------------------------------------------------------------------------------- /musk/torchscale/component/multihead_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multihead_attention.py -------------------------------------------------------------------------------- /musk/torchscale/component/multiscale_retention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multiscale_retention.py -------------------------------------------------------------------------------- /musk/torchscale/component/multiway_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multiway_network.py -------------------------------------------------------------------------------- /musk/torchscale/component/relative_position_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/relative_position_bias.py -------------------------------------------------------------------------------- /musk/torchscale/component/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/rms_norm.py -------------------------------------------------------------------------------- /musk/torchscale/component/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/utils.py -------------------------------------------------------------------------------- /musk/torchscale/component/xmoe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/__init__.py -------------------------------------------------------------------------------- /musk/torchscale/component/xmoe/global_groups.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/global_groups.py -------------------------------------------------------------------------------- /musk/torchscale/component/xmoe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/moe_layer.py -------------------------------------------------------------------------------- /musk/torchscale/component/xmoe/routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/routing.py -------------------------------------------------------------------------------- /musk/torchscale/component/xpos_relative_position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xpos_relative_position.py -------------------------------------------------------------------------------- /musk/torchscale/model/BEiT3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/BEiT3.py -------------------------------------------------------------------------------- /musk/torchscale/model/LongNet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/LongNet.py -------------------------------------------------------------------------------- /musk/torchscale/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/__init__.py -------------------------------------------------------------------------------- /musk/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/requirements.txt --------------------------------------------------------------------------------