├── .gitignore
├── MUSK.png
├── README.md
├── assets
    └── lungaca1014.jpeg
├── benchmarks
    ├── .gitignore
    ├── __init__.py
    ├── clip_benchmark
    │   ├── __init__.py
    │   ├── cli.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── builder.py
    │   │   └── histopathology_datasets.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── image_caption_selection.py
    │   │   ├── image_retrieval.py
    │   │   ├── linear_probe.py
    │   │   ├── zeroshot_classification.py
    │   │   └── zeroshot_retrieval.py
    │   ├── model_collection.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── japanese_clip.py
    │   │   ├── open_clip.py
    │   │   └── torchscale
    │   │   │   ├── __init__.py
    │   │   │   ├── architecture
    │   │   │       ├── __init__.py
    │   │   │       ├── config.py
    │   │   │       ├── decoder.py
    │   │   │       ├── encoder.py
    │   │   │       ├── encoder_decoder.py
    │   │   │       ├── retnet.py
    │   │   │       └── utils.py
    │   │   │   ├── component
    │   │   │       ├── __init__.py
    │   │   │       ├── dilated_attention.py
    │   │   │       ├── droppath.py
    │   │   │       ├── embedding.py
    │   │   │       ├── feedforward_network.py
    │   │   │       ├── flash_attention.py
    │   │   │       ├── gate_linear_unit.py
    │   │   │       ├── multihead_attention.py
    │   │   │       ├── multiscale_retention.py
    │   │   │       ├── multiway_network.py
    │   │   │       ├── relative_position_bias.py
    │   │   │       ├── rms_norm.py
    │   │   │       ├── utils.py
    │   │   │       ├── xmoe
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── global_groups.py
    │   │   │       │   ├── moe_layer.py
    │   │   │       │   └── routing.py
    │   │   │       └── xpos_relative_position.py
    │   │   │   └── model
    │   │   │       ├── BEiT3.py
    │   │   │       ├── LongNet.py
    │   │   │       └── __init__.py
    │   └── webdataset_builder.py
    ├── demo.ipynb
    └── models.txt
├── demo.ipynb
├── musk
    ├── __init__.py
    ├── modeling.py
    ├── models
    │   └── tokenizer.spm
    ├── torchscale
    │   ├── __init__.py
    │   ├── architecture
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── decoder.py
    │   │   ├── encoder.py
    │   │   ├── encoder_decoder.py
    │   │   ├── retnet.py
    │   │   └── utils.py
    │   ├── component
    │   │   ├── __init__.py
    │   │   ├── dilated_attention.py
    │   │   ├── droppath.py
    │   │   ├── embedding.py
    │   │   ├── feedforward_network.py
    │   │   ├── flash_attention.py
    │   │   ├── gate_linear_unit.py
    │   │   ├── multihead_attention.py
    │   │   ├── multiscale_retention.py
    │   │   ├── multiway_network.py
    │   │   ├── relative_position_bias.py
    │   │   ├── rms_norm.py
    │   │   ├── utils.py
    │   │   ├── xmoe
    │   │   │   ├── __init__.py
    │   │   │   ├── global_groups.py
    │   │   │   ├── moe_layer.py
    │   │   │   └── routing.py
    │   │   └── xpos_relative_position.py
    │   └── model
    │   │   ├── BEiT3.py
    │   │   ├── LongNet.py
    │   │   └── __init__.py
    └── utils.py
├── pyproject.toml
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/.gitignore


--------------------------------------------------------------------------------
/MUSK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/MUSK.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/README.md


--------------------------------------------------------------------------------
/assets/lungaca1014.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/assets/lungaca1014.jpeg


--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/.gitignore


--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/cli.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/datasets/builder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/datasets/builder.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/datasets/histopathology_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/datasets/histopathology_datasets.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/image_caption_selection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/image_caption_selection.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/image_retrieval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/image_retrieval.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/linear_probe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/linear_probe.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/zeroshot_classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/zeroshot_classification.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/metrics/zeroshot_retrieval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/metrics/zeroshot_retrieval.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/model_collection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/model_collection.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/japanese_clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/japanese_clip.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/open_clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/open_clip.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/config.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/decoder.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/encoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/encoder.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/encoder_decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/encoder_decoder.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/retnet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/retnet.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/architecture/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/architecture/utils.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/dilated_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/dilated_attention.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/droppath.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/droppath.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/embedding.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/feedforward_network.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/feedforward_network.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/flash_attention.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/gate_linear_unit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/gate_linear_unit.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/multihead_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multihead_attention.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/multiscale_retention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multiscale_retention.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/multiway_network.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/multiway_network.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/relative_position_bias.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/relative_position_bias.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/rms_norm.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/utils.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/xmoe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/xmoe/global_groups.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/global_groups.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/xmoe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/moe_layer.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/xmoe/routing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xmoe/routing.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/component/xpos_relative_position.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/component/xpos_relative_position.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/model/BEiT3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/BEiT3.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/model/LongNet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/LongNet.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/models/torchscale/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/models/torchscale/model/__init__.py


--------------------------------------------------------------------------------
/benchmarks/clip_benchmark/webdataset_builder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/clip_benchmark/webdataset_builder.py


--------------------------------------------------------------------------------
/benchmarks/demo.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/benchmarks/demo.ipynb


--------------------------------------------------------------------------------
/benchmarks/models.txt:
--------------------------------------------------------------------------------
1 | musk_large_patch16_384,hf_hub:xiangjx/musk


--------------------------------------------------------------------------------
/demo.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/demo.ipynb


--------------------------------------------------------------------------------
/musk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/__init__.py


--------------------------------------------------------------------------------
/musk/modeling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/modeling.py


--------------------------------------------------------------------------------
/musk/models/tokenizer.spm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/models/tokenizer.spm


--------------------------------------------------------------------------------
/musk/torchscale/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/__init__.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/__init__.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/config.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/decoder.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/encoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/encoder.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/encoder_decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/encoder_decoder.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/retnet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/retnet.py


--------------------------------------------------------------------------------
/musk/torchscale/architecture/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/architecture/utils.py


--------------------------------------------------------------------------------
/musk/torchscale/component/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/__init__.py


--------------------------------------------------------------------------------
/musk/torchscale/component/dilated_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/dilated_attention.py


--------------------------------------------------------------------------------
/musk/torchscale/component/droppath.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/droppath.py


--------------------------------------------------------------------------------
/musk/torchscale/component/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/embedding.py


--------------------------------------------------------------------------------
/musk/torchscale/component/feedforward_network.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/feedforward_network.py


--------------------------------------------------------------------------------
/musk/torchscale/component/flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/flash_attention.py


--------------------------------------------------------------------------------
/musk/torchscale/component/gate_linear_unit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/gate_linear_unit.py


--------------------------------------------------------------------------------
/musk/torchscale/component/multihead_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multihead_attention.py


--------------------------------------------------------------------------------
/musk/torchscale/component/multiscale_retention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multiscale_retention.py


--------------------------------------------------------------------------------
/musk/torchscale/component/multiway_network.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/multiway_network.py


--------------------------------------------------------------------------------
/musk/torchscale/component/relative_position_bias.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/relative_position_bias.py


--------------------------------------------------------------------------------
/musk/torchscale/component/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/rms_norm.py


--------------------------------------------------------------------------------
/musk/torchscale/component/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/utils.py


--------------------------------------------------------------------------------
/musk/torchscale/component/xmoe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/__init__.py


--------------------------------------------------------------------------------
/musk/torchscale/component/xmoe/global_groups.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/global_groups.py


--------------------------------------------------------------------------------
/musk/torchscale/component/xmoe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/moe_layer.py


--------------------------------------------------------------------------------
/musk/torchscale/component/xmoe/routing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xmoe/routing.py


--------------------------------------------------------------------------------
/musk/torchscale/component/xpos_relative_position.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/component/xpos_relative_position.py


--------------------------------------------------------------------------------
/musk/torchscale/model/BEiT3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/BEiT3.py


--------------------------------------------------------------------------------
/musk/torchscale/model/LongNet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/LongNet.py


--------------------------------------------------------------------------------
/musk/torchscale/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/torchscale/model/__init__.py


--------------------------------------------------------------------------------
/musk/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/musk/utils.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lilab-stanford/MUSK/HEAD/requirements.txt


--------------------------------------------------------------------------------