├── .github ├── CODEOWNERS ├── DOC.md ├── ISSUE_TEMPLATE │ ├── bug.md │ ├── devops.md │ ├── 使用问题.md │ ├── 功能需求.md │ ├── 工作任务.md │ └── 系统改进.md ├── linters │ ├── .golangci.yml │ ├── .jscpd.json │ ├── .markdown-lint.yml │ ├── .yaml-lint.yml │ ├── README.md │ └── pylint_config ├── reviewers.yaml ├── scripts │ ├── .shellcheckrc │ ├── check_all.sh │ ├── check_dir_naming.sh │ ├── check_dockerfile.sh │ ├── check_go_tests.sh │ ├── check_markdown_filename.sh │ ├── check_readme.sh │ ├── gpu_task.py │ └── inference.py ├── super_linter.env └── workflows │ ├── README.md │ ├── additional_lint.yaml │ ├── base_image.yaml │ ├── chatui.yaml │ ├── ci.yaml │ ├── cpodmanager.yaml │ ├── docker_image.yaml │ ├── generate-sdk.yml │ ├── go.yml │ ├── go_service.yaml │ ├── golangci_lint.yaml │ ├── gpu_task_ci.yaml │ ├── helm_chart_release.yaml │ ├── preload-model.yaml │ ├── python.yaml │ ├── sxcloud.yaml │ └── sxcloud_dev.yaml ├── .gitignore ├── .gitmodules ├── 3kctl ├── 3kctl.py ├── README.md ├── __init__.py ├── conf │ ├── config-sample.yaml │ ├── config.ini │ └── softwares.yaml ├── deploy │ ├── __init__.py │ ├── deploy.py │ ├── kubernetes.py │ ├── registry.py │ ├── software.py │ ├── test_software.py │ ├── test_utils.py │ └── utils.py ├── download │ ├── __init__.py │ ├── download.py │ ├── hugging_face.py │ ├── k8s.py │ ├── model_scope.py │ ├── oss.py │ └── resource.py ├── requirements.txt ├── serve │ ├── __init__.py │ └── service.py └── upload │ ├── __init__.py │ ├── test_utils.py │ ├── upload.py │ └── utils.py ├── DEV.md ├── LANGUAGES ├── LICENSE ├── README.md ├── chatui ├── Dockerfile ├── README.md ├── api │ ├── api.py │ ├── app.py │ ├── dist │ │ ├── assets │ │ │ ├── assistant-avatar.png │ │ │ ├── chatui.png │ │ │ ├── delete.png │ │ │ ├── ic_delete.png │ │ │ ├── marked.min.js │ │ │ ├── script.js │ │ │ ├── send-icon.png │ │ │ ├── styles.css │ │ │ ├── upload_image.png │ │ │ ├── user-avatar.png │ │ │ └── voice-icon.png │ │ └── index.html │ ├── model.py │ ├── poetry.lock │ ├── pyproject.toml │ └── requirements.txt └── start.sh ├── cmd ├── README.md ├── downloader │ ├── Dockerfile │ ├── cmd │ │ ├── git.go │ │ ├── oss.go │ │ └── root.go │ ├── internal │ │ ├── consts │ │ │ └── consts.go │ │ ├── download │ │ │ ├── config │ │ │ │ └── config.go │ │ │ ├── download.go │ │ │ ├── downloader.go │ │ │ ├── git │ │ │ │ └── git.go │ │ │ └── oss │ │ │ │ └── oss.go │ │ ├── errors │ │ │ └── errors.go │ │ ├── oss │ │ │ ├── config.go │ │ │ ├── tools.go │ │ │ └── tools_test.go │ │ └── record │ │ │ ├── crd │ │ │ └── crd.go │ │ │ ├── none │ │ │ └── none.go │ │ │ └── record.go │ ├── main.go │ └── yaml │ │ └── downloader-rbac.yaml ├── gateway │ ├── Dockerfile │ ├── README.md │ ├── etc │ │ ├── gateway-api.yaml │ │ ├── gateway-api_k8s.yaml │ │ ├── gateway-api_prod.yaml │ │ └── gateway-api_test.yaml │ └── gateway.go ├── modeluploader │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── buildimage.sh │ ├── mannualstart.yaml │ └── modeluploadjob.go ├── scheduler │ ├── Dockerfile │ ├── README.md │ ├── etc │ │ ├── scheduler-api.yaml │ │ ├── scheduler-api_k8s.yaml │ │ ├── scheduler-api_prod.yaml │ │ └── scheduler-api_test.yaml │ ├── ftl │ │ ├── README.md │ │ ├── email.ftl │ │ ├── token.ftl │ │ └── token_pass.ftl │ ├── scheduler.api │ ├── scheduler.go │ ├── scheduler.json │ ├── sdk.api │ ├── sdk.json │ └── types.api ├── sxwlctl │ ├── build_and_push.sh │ ├── cmd │ │ ├── download.go │ │ ├── load.go │ │ ├── root.go │ │ └── upload.go │ ├── install.sh │ ├── internal │ │ ├── consts │ │ │ └── consts.go │ │ └── sxy │ │ │ ├── auth.go │ │ │ └── resource.go │ └── main.go └── uploader │ ├── Dockerfile │ ├── README.md │ ├── cmd │ ├── oss.go │ └── root.go │ └── main.go ├── cpodoperator ├── .gitignore ├── Makefile ├── PROJECT ├── README.md ├── api │ ├── v1 │ │ ├── README.md │ │ ├── datasetstorage_types.go │ │ ├── groupversion_info.go │ │ ├── modelstorage_types.go │ │ └── zz_generated.deepcopy.go │ └── v1beta1 │ │ ├── constants.go │ │ ├── cpodjob_types.go │ │ ├── finetune_types.go │ │ ├── groupversion_info.go │ │ ├── inference_type.go │ │ ├── jupyterlab_types.go │ │ ├── llamafactory_types.go │ │ ├── modelstorage_types.go │ │ ├── predictor.go │ │ ├── yamlresource_types.go │ │ └── zz_generated.deepcopy.go ├── cmd │ ├── main.go │ ├── operator │ │ ├── Dockerfile │ │ └── main.go │ └── portalsynch │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── client-config │ │ └── clientconfig.go │ │ └── main.go ├── config │ ├── crd │ │ ├── bases │ │ │ ├── cpod.cpod_cpodjobs.yaml │ │ │ ├── cpod.cpod_datasetstorages.yaml │ │ │ ├── cpod.cpod_finetunes.yaml │ │ │ ├── cpod.cpod_inferences.yaml │ │ │ ├── cpod.cpod_jupyterlabs.yaml │ │ │ ├── cpod.cpod_juypterlabs.yaml │ │ │ ├── cpod.cpod_llamafactories.yaml │ │ │ ├── cpod.cpod_modelstorages.yaml │ │ │ └── cpod.cpod_yamlresources.yaml │ │ ├── kustomization.yaml │ │ └── kustomizeconfig.yaml │ ├── default │ │ ├── kustomization.yaml │ │ ├── manager_auth_proxy_patch.yaml │ │ └── manager_config_patch.yaml │ ├── manager │ │ ├── kustomization.yaml │ │ ├── manager.yaml │ │ └── portalsynch.yaml │ ├── prometheus │ │ ├── kustomization.yaml │ │ └── monitor.yaml │ ├── rbac │ │ ├── auth_proxy_client_clusterrole.yaml │ │ ├── auth_proxy_role.yaml │ │ ├── auth_proxy_role_binding.yaml │ │ ├── auth_proxy_service.yaml │ │ ├── cpodjob_editor_role.yaml │ │ ├── cpodjob_viewer_role.yaml │ │ ├── datasetstorage_editor_role.yaml │ │ ├── datasetstorage_viewer_role.yaml │ │ ├── finetune_editor_role.yaml │ │ ├── finetune_viewer_role.yaml │ │ ├── jupyterlab_editor_role.yaml │ │ ├── jupyterlab_viewer_role.yaml │ │ ├── juypterlab_editor_role.yaml │ │ ├── juypterlab_viewer_role.yaml │ │ ├── kustomization.yaml │ │ ├── leader_election_role.yaml │ │ ├── leader_election_role_binding.yaml │ │ ├── llamafactory_editor_role.yaml │ │ ├── llamafactory_viewer_role.yaml │ │ ├── modelstorage_editor_role.yaml │ │ ├── modelstorage_viewer_role.yaml │ │ ├── public.yaml │ │ ├── role.yaml │ │ ├── role_binding.yaml │ │ └── service_account.yaml │ └── samples │ │ ├── cpod_v1_datasetstorage.yaml │ │ ├── cpod_v1_modelstorage.yaml │ │ ├── cpod_v1beta1_cpodjob_bert.yaml │ │ ├── cpod_v1beta1_cpodjob_llama27b.yaml │ │ ├── cpod_v1beta1_cpodjob_mpi.yaml │ │ ├── cpod_v1beta1_cpodjob_pytorch_gpt1.3b.yaml │ │ ├── cpod_v1beta1_cpodjob_pytorch_modihand.yaml │ │ ├── cpod_v1beta1_finetune.yaml │ │ ├── cpod_v1beta1_jupyterlab.yaml │ │ ├── cpod_v1beta1_juypterlab.yaml │ │ ├── cpod_v1beta1_llamafactory.yaml │ │ ├── cpod_v1beta1_modelstorage.yaml │ │ ├── cpod_v1betav1_cpodjob.yaml │ │ ├── cpod_v1betav1_inference.yaml │ │ ├── cpod_v1betav1_inference_llama27b_vllm.yaml │ │ └── kustomization.yaml ├── go.mod ├── go.sum ├── hack │ └── boilerplate.go.txt ├── internal │ ├── controller │ │ ├── cpodjob_controller.go │ │ ├── cpodjob_controller_test.go │ │ ├── finetune_controller.go │ │ ├── inference_controller.go │ │ ├── inference_controller_test.go │ │ ├── jupyterlab_controller.go │ │ ├── jupyterlab_controller_test.go │ │ ├── llamafactory_controller.go │ │ ├── llamafactory_controller_test.go │ │ ├── modelstorage_controller.go │ │ ├── suite_test.go │ │ └── yamlresource_controller.go │ └── synchronizer │ │ ├── README.md │ │ ├── cpodobserver.go │ │ ├── manager.go │ │ ├── playground.go │ │ ├── syncjob.go │ │ └── uploader.go ├── pkg │ ├── finetune │ │ └── model.go │ ├── modelhub │ │ ├── interface.go │ │ ├── modelscope.go │ │ └── modelscope_test.go │ ├── provider │ │ ├── litellm │ │ │ ├── litellm.go │ │ │ └── litellm_test.go │ │ └── sxwl │ │ │ ├── README.md │ │ │ ├── interface.go │ │ │ ├── sxwl.go │ │ │ └── sxwl_test.go │ ├── resource │ │ ├── README.md │ │ └── resource.go │ └── util │ │ ├── metrics.go │ │ ├── oss.go │ │ └── status.go └── test │ └── cases │ └── modihand_pytorchjob.json ├── deployment ├── PRICING ├── README.md ├── add_nodes ├── api_test │ ├── README.md │ └── api_test.py ├── build_k8s ├── ceph │ ├── README.md │ ├── ceph-admin-secret.yaml │ ├── ceph-rbd-sc.yaml │ ├── rook_ceph_node_cleanup.sh │ ├── secret.yaml │ ├── secrets-default.yaml │ ├── secrets.yaml │ └── test-pod.yaml ├── charts │ └── sx3k │ │ ├── .helmignore │ │ ├── Chart.yaml │ │ ├── charts │ │ ├── cpodoperator │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ │ ├── NOTES.txt │ │ │ │ ├── _helpers.tpl │ │ │ │ ├── configmap.yaml │ │ │ │ ├── crds.yaml │ │ │ │ ├── deployment.yaml │ │ │ │ ├── namespace.yaml │ │ │ │ ├── rbac.yaml │ │ │ │ └── service.yaml │ │ │ └── values.yaml │ │ └── sxcloud │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ ├── NOTES.txt │ │ │ ├── _helpers.tpl │ │ │ ├── configmap.yaml │ │ │ ├── configmap.yaml.bak │ │ │ ├── deployment.yaml │ │ │ ├── ingress.yaml │ │ │ ├── namespace.yaml │ │ │ ├── persistence.yaml │ │ │ ├── secret.yaml │ │ │ └── service.yaml │ │ │ └── values.yaml │ │ ├── cufile.log │ │ ├── templates │ │ ├── NOTES.txt │ │ └── _helpers.tpl │ │ └── values.yaml ├── cpodmanager │ └── Dockerfile ├── datasets │ └── pre_loaded.txt ├── docker │ └── images.txt ├── fix_sample.py ├── gpumon │ ├── README.md │ ├── dcgm_exporter.yaml │ └── promethues-gpumonitor.yaml ├── jupyterlab │ └── Dockerfile ├── kubeadm_init.sh ├── manifest │ ├── Makefile │ ├── README.md │ ├── make-package.sh │ ├── manifest-local.yaml │ └── manifest.yaml ├── models │ ├── README.md │ ├── pre_loaded.yaml │ └── sync_model.py ├── modeluploader │ └── Dockerfile ├── nfd │ ├── README.md │ └── values.yaml ├── online │ ├── Makefile │ ├── README.md │ └── make-install.sh ├── operator │ ├── README.md │ ├── fix_pvc.py │ ├── install_kubeflow.sh │ ├── local-path-config.yaml │ ├── local-path-storage.yaml │ ├── local-sc.yaml │ └── verification_single_gpu.yaml ├── sxcloud │ ├── Dockerfile │ └── database │ │ └── init.sql ├── values │ ├── README.md │ ├── ceph-csi-cephfs.yaml │ ├── gpu-operator.yaml │ ├── infiniband-exporter.yaml │ ├── juicefs-csi.yaml │ ├── kruise.yaml │ ├── kube-prometheus-stack.yaml │ ├── loki-stack.yaml │ ├── mpi-operator.yaml │ ├── network-operator.yaml │ ├── node-feature-discovery.yaml │ ├── rook-ceph-cluster-single.yaml │ ├── rook-ceph-cluster.yaml │ ├── rook-ceph.yaml │ ├── sxcloud.yaml │ ├── training-operator.yaml │ └── volcano-development.yaml ├── values_online │ ├── README.md │ ├── ceph-csi-cephfs.yaml │ ├── cpodoperator.yaml │ ├── gpu-operator.yaml │ ├── infiniband-exporter.yaml │ ├── juicefs-csi.yaml │ ├── kruise.yaml │ ├── kube-prometheus-stack.yaml │ ├── loki-stack.yaml │ ├── mpi-operator.yaml │ ├── network-operator.yaml │ ├── node-feature-discovery.yaml │ ├── rook-ceph-cluster-single.yaml │ ├── rook-ceph-cluster.yaml │ ├── rook-ceph.yaml │ ├── sxcloud.yaml │ ├── training-operator.yaml │ └── volcano-development.yaml ├── yaml_apps │ ├── cert-manager.yaml │ ├── cpodoperator.yaml │ ├── dashboard-admin-sa.yaml │ ├── downloader-rbac.yaml │ ├── image-preload.yaml │ ├── infiniband-exporter.yaml │ ├── ingress-nginx.yaml │ ├── jupyterlab.yaml │ ├── kserve.yaml │ ├── kubernetes-dashboard.yaml │ ├── metrics-server.yaml │ ├── milvus.yaml │ ├── milvus.yaml.bak │ ├── mpi-operator.yaml │ ├── opencost.yaml │ ├── shared-pv.yaml │ ├── tensorboard.yaml │ ├── training-operator.yaml │ └── volcano-development.yaml └── yaml_apps_online │ ├── cert-manager.yaml │ ├── cpodoperator.yaml │ ├── dashboard-admin-sa.yaml │ ├── downloader-rbac.yaml │ ├── image-preload.yaml │ ├── infiniband-exporter.yaml │ ├── ingress-nginx.yaml │ ├── jupyterlab.yaml │ ├── kserve.yaml │ ├── kubernetes-dashboard.yaml │ ├── metrics-server.yaml │ ├── milvus.yaml │ ├── mpi-operator.yaml │ ├── opencost.yaml │ ├── shared-pv.yaml │ ├── tensorboard.yaml │ ├── training-operator.yaml │ └── volcano-development.yaml ├── devops ├── CLI_LIST.md ├── DOCKER.md ├── GPU_MODELS ├── IB.md ├── README.md ├── ansible │ ├── README.md │ ├── go.yaml │ ├── python.yaml │ └── user.yaml ├── docker │ ├── alpine-git-lfs │ │ └── Dockerfile │ ├── cpodmanager │ │ └── README.md │ ├── go.dockerfile.tpl │ └── ib-exporter │ │ ├── Dockerfile │ │ └── README.md ├── force_delete_ns.sh ├── gcp │ ├── ADD_DATA_DISK.md │ └── README.md ├── go_proxy.sh └── python │ └── CONDA.md ├── docs ├── BILLING.md ├── CPOD_OPERATOR.md ├── NAMESPACEISOLATION.md └── README.md ├── e2e ├── README.md ├── a_ib_test.go ├── b_mpijob_test.go ├── b_pytorchjob_test.go ├── c_gpt3_test.go ├── ib │ └── ib.yaml ├── main_test.go ├── model │ └── gpt3-1.3b │ │ └── 1h1g.yaml ├── mpijob │ └── mnist.yaml └── pytorchjob │ └── multinode.yaml ├── examples ├── README.md ├── airplane-cv │ ├── ocr_paddle_multi_matting.py │ └── requirements.txt ├── airplane-demo │ ├── .gitignore │ ├── README.md │ ├── app.py │ ├── config.py │ ├── data │ │ └── images.db │ ├── frontend │ │ ├── .env.development │ │ ├── .env.production │ │ ├── .gitignore │ │ ├── index.html │ │ ├── package.json │ │ ├── src │ │ │ ├── App.vue │ │ │ ├── assets │ │ │ │ └── logo.png │ │ │ ├── components │ │ │ │ └── SearchPage.vue │ │ │ └── main.js │ │ ├── vite.config.js │ │ └── yarn.lock │ ├── preprocess.py │ ├── requirements.txt │ └── search.py ├── basic-transformer │ ├── Dockerfile │ ├── README.md │ ├── model │ │ └── basic_transformer.py │ ├── requirements.txt │ ├── run_onnx_inference.sh │ ├── scripts │ │ ├── onnx_inference.py │ │ ├── to_onnx.py │ │ └── train_script.py │ ├── train.sh │ ├── transform_to_onnx.sh │ └── utils │ │ ├── model_config.py │ │ └── train_utils.py ├── bert │ ├── Dockerfile │ ├── README.md │ ├── base.Dockerfile │ ├── build_and_run_mpijob.sh │ ├── deepspeed_env │ ├── k8s │ │ ├── README.md │ │ ├── bert_cpu.yaml │ │ ├── bert_gpu.yaml │ │ ├── debug.yaml │ │ ├── mpi_bert_ds.yaml │ │ ├── mpi_bert_ds_for_cephcsi.yaml │ │ └── train_bert_ds_single_host.yaml │ ├── model_roberta_base │ │ ├── README.md │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ ├── requirements.txt │ ├── train_bert.py │ └── train_bert_ds.py ├── cmmlu │ ├── categories.py │ ├── mp_utils.py │ └── openchat.py ├── dataset │ ├── Dockerfile │ ├── dataset.json │ └── demo.py ├── docker │ ├── README.md │ ├── cuda-base.Dockerfile │ └── torch-base.Dockerfile ├── env-verify │ ├── README.md │ ├── hello_modelscope_inference.py │ ├── hello_modelscope_train_multi.py │ ├── hello_modelscope_train_single.py │ ├── test_cuda.cmd │ ├── test_cuda.cu │ ├── test_cudnn.cmd │ └── test_cudnn.cu ├── exporter │ └── exporter │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── exporter │ │ ├── go.mod │ │ ├── go.sum │ │ ├── image.png │ │ ├── main.go │ │ └── pod.yaml ├── face-demo │ ├── .gitignore │ ├── README.md │ ├── backend │ │ ├── .python-version │ │ ├── app.py │ │ ├── config.py │ │ ├── db.py │ │ ├── face_processor.py │ │ ├── requirements.txt │ │ └── util.py │ ├── docker-compose.yml │ └── frontend │ │ ├── .env.development │ │ ├── .env.production │ │ ├── index.html │ │ ├── package.json │ │ ├── src │ │ ├── App.vue │ │ ├── assets │ │ │ └── logo.png │ │ ├── components │ │ │ └── ImagePreview.vue │ │ ├── main.js │ │ ├── router.js │ │ └── views │ │ │ ├── ImageManagement.vue │ │ │ └── Search.vue │ │ ├── vite.config.js │ │ └── yarn.lock ├── fintune-infer-eval │ ├── README.md │ ├── calculate_score.py │ ├── config.py │ ├── evaluation.json │ ├── main.py │ ├── requirements.txt │ └── tools │ │ ├── chaoyang.py │ │ └── process_json.py ├── gemma │ ├── README.md │ └── train.py ├── gpt3 │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── configuration.json │ ├── finetune_poetry.py │ ├── gpt3_pvc.yaml │ ├── ptjob_gpt3_1.3b_1h1g.yaml │ ├── ptjob_gpt3_1.3b_1h8g.yaml │ ├── ptjob_gpt3_1.3b_2h16g.yaml │ └── ptjob_sample.yaml ├── infiniband │ ├── Dockerfile │ ├── README.md │ ├── all_reduce.py │ ├── ib-check.yaml │ ├── sriov-ib-network-deployment.yaml │ ├── sriov-ib-network-node-policy.yaml │ ├── sriov-ib-network-pod.yaml │ └── sriov-ib-network.yaml ├── jax │ ├── speedup.ipynb │ └── xla.py ├── litellm-stack │ ├── README.md │ └── chat_test.py ├── llama2-pt │ ├── Dockerfile │ ├── README.md │ ├── finetune.py │ └── main.py ├── llama2 │ ├── Dockerfile │ ├── README.md │ ├── config │ │ └── dp_zero3_config.json │ ├── cpodjob │ │ ├── Dockerfile │ │ └── train.py │ ├── finetuning-and-deploy │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── cpodjob.yaml │ │ └── inferenceservice.yaml │ ├── k8s │ │ └── llama2_deepspeed_mpi.yaml │ ├── llama2_demo.py │ └── requirements.txt ├── llamafactory │ ├── Dockerfile │ ├── README.md │ ├── cpodjob.yaml │ └── dataset_info.json ├── llm-agent │ ├── Dockerfile │ ├── README.md │ ├── __init__.py │ ├── agent_core.py │ ├── app.py │ ├── buildimage.sh │ ├── city.py │ ├── config.py │ ├── domain.py │ ├── llm.py │ ├── memory.py │ ├── plan.py │ ├── registry.py │ ├── stylish.py │ ├── test_plan.py │ ├── test_tools.py │ ├── tools.py │ └── weather-agent.yaml ├── nccl │ ├── 0.build_and_push_docker_image.sh │ ├── 0001-solve-compilation-error.patch │ ├── 1.k8s_apply_yaml.sh │ ├── 2.docker_run.sh │ ├── DDP_MNIST_demo.py │ ├── Dockerfile │ ├── README.md │ ├── base.Dockerfile │ ├── dist_nccl_demo.py │ ├── entrypoint.sh │ ├── go_worker4.sh │ ├── go_worker5.sh │ ├── k8s_nccl_test.yaml │ └── nccl_test_locally.py ├── nim │ ├── Dockerfile │ ├── README.md │ └── run.sh ├── p-eval │ └── eval.py ├── pytorch-multinode │ ├── Dockerfile │ ├── README.md │ ├── datautils.py │ ├── main.py │ └── pytorch_multinode_linertrain.yaml ├── qanything │ ├── README.md │ └── import_knowledge.py ├── rag-h5 │ ├── 21book.png │ ├── Dockerfile │ ├── assistant-avatar.png │ ├── index.html │ ├── script.js │ ├── send-icon.png │ ├── styles.css │ ├── user-avatar.png │ └── voice-icon.png ├── rag │ ├── Dockerfile │ ├── README.md │ ├── config.py │ ├── id_text_map.json │ ├── image-1.png │ ├── image-2.png │ ├── rag-service-deployment.yaml │ ├── rag-service-service.yaml │ ├── requirements.txt │ ├── save_to_milvus.py │ ├── service.py │ ├── trump_news.json │ ├── trump_news.txt │ └── truncate_milvus.py ├── ray │ ├── Dockerfile │ ├── README.md │ ├── locustfile.py │ ├── rayservice-example.yaml │ └── va │ │ ├── README.md │ │ └── vllm_app.py └── rocm-images │ ├── Dockerfile.llamafactory │ ├── Dockerfile.vllm │ └── README.md ├── experimental ├── README.md ├── ceph-csi │ ├── README.md │ ├── deployment.yaml │ ├── pvc.yaml │ ├── sc.yaml │ ├── secret.yaml │ └── statefulset.yaml ├── cy │ └── README.md ├── demo_test.go └── qmapper_prototype │ ├── Dockerfile │ ├── README.md │ ├── example │ └── basic_transformer │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── model │ │ └── basic_transformer.py │ │ ├── requirements.txt │ │ ├── scripts │ │ └── train_script_qmapper.py │ │ ├── test_qmapper.sh │ │ └── utils │ │ ├── model_config.py │ │ ├── train_utils.py │ │ └── train_utils_qmapper.py │ ├── qmapper │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── meta_ir │ │ │ ├── __init__.py │ │ │ ├── cluster_info.py │ │ │ ├── definitions.py │ │ │ ├── shard_annotation.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ └── graph_operations.py │ │ │ └── visual.py │ │ ├── qcompile.py │ │ ├── ray_connect.py │ │ └── shard_discovery.py │ ├── bridge │ │ ├── __init__.py │ │ └── torch_bridge │ │ │ ├── __init__.py │ │ │ ├── comp_graph.py │ │ │ ├── environment_variables.py │ │ │ └── third_party_utils.py │ ├── codegen │ │ ├── __init__.py │ │ ├── internal_meta_ir │ │ │ ├── internal_meta_ir.py │ │ │ ├── internal_operators │ │ │ │ ├── add.py │ │ │ │ ├── config │ │ │ │ ├── div.py │ │ │ │ ├── expand.py │ │ │ │ ├── internal_operators.py │ │ │ │ ├── log_softmax.py │ │ │ │ ├── matmul.py │ │ │ │ ├── mul.py │ │ │ │ ├── neg.py │ │ │ │ ├── pow.py │ │ │ │ ├── reciprocal.py │ │ │ │ ├── sqrt.py │ │ │ │ ├── sub.py │ │ │ │ ├── sum.py │ │ │ │ ├── transpose.py │ │ │ │ └── view.py │ │ │ └── ir_transform.py │ │ ├── test.py │ │ └── tvm_transform.py │ ├── config.py │ ├── csrc │ │ ├── CMakeLists.txt │ │ ├── CMakeLists.txt.old │ │ ├── CMakeLists.txt.old.old │ │ ├── autoparallel │ │ │ ├── autoparallel.cc │ │ │ ├── autoparallel.h │ │ │ └── passes │ │ │ │ ├── shard_optimize.cc │ │ │ │ ├── shard_optimize.h │ │ │ │ ├── stage_split.cc │ │ │ │ └── stage_split.h │ │ ├── comm │ │ │ ├── comm_iface.cc │ │ │ ├── comm_iface.h │ │ │ ├── components │ │ │ │ ├── cpu_iface │ │ │ │ │ ├── cpu_iface.cc │ │ │ │ │ └── cpu_iface.h │ │ │ │ ├── cuda_iface │ │ │ │ │ ├── cuda_iface.cc │ │ │ │ │ └── cuda_iface.h │ │ │ │ └── rocm_iface │ │ │ │ │ ├── rocm_iface.cc │ │ │ │ │ └── rocm_iface.h │ │ │ ├── config.h │ │ │ ├── execute_context │ │ │ │ ├── cpu │ │ │ │ │ ├── execute_engine_cpu.cc │ │ │ │ │ └── execute_engine_cpu.h │ │ │ │ ├── cuda │ │ │ │ │ ├── execute_engine_cuda.cc │ │ │ │ │ ├── execute_engine_cuda.h │ │ │ │ │ ├── qmap_cuda_executor.cu │ │ │ │ │ └── qmap_cuda_reduce.cu │ │ │ │ ├── execute_engine.cc │ │ │ │ ├── execute_engine.h │ │ │ │ └── rocm │ │ │ │ │ ├── execute_engine_rocm.cc │ │ │ │ │ └── execute_engine_rocm.h │ │ │ ├── memory_pool │ │ │ │ ├── cpu │ │ │ │ │ ├── memory_pool_cpu.cc │ │ │ │ │ └── memory_pool_cpu.h │ │ │ │ ├── cuda │ │ │ │ │ ├── memory_pool_cuda.cc │ │ │ │ │ └── memory_pool_cuda.h │ │ │ │ ├── memory_pool.cc │ │ │ │ ├── memory_pool.h │ │ │ │ └── rocm │ │ │ │ │ ├── memory_pool_rocm.cc │ │ │ │ │ └── memory_pool_rocm.h │ │ │ └── ucx_iface │ │ │ │ ├── ucx_iface.cc │ │ │ │ ├── ucx_iface.h │ │ │ │ ├── ucx_impls.cc │ │ │ │ └── ucx_impls.h │ │ ├── meta │ │ │ ├── meta_ir.cc │ │ │ ├── meta_ir.h │ │ │ ├── sharding_info.cc │ │ │ └── sharding_info.h │ │ ├── simulator │ │ │ ├── simulator.cc │ │ │ └── simulator.h │ │ ├── tests │ │ │ └── comm │ │ │ │ ├── components │ │ │ │ ├── cuda_iface_tests.cc │ │ │ │ └── kernels │ │ │ │ │ ├── vector_add.cu │ │ │ │ │ └── vector_add.h │ │ │ │ └── memory_pool │ │ │ │ └── memory_pool_tests.cc │ │ └── utils │ │ │ ├── qmap_compiler_defs.cc │ │ │ ├── qmap_compiler_defs.h │ │ │ ├── qmap_locks.cc │ │ │ └── qmap_locks.h │ ├── executable │ │ ├── __init__.py │ │ └── executable.py │ ├── optimization │ │ ├── cost_model │ │ │ ├── __init__.py │ │ │ └── cost_model.py │ │ └── search │ │ │ ├── __init__.py │ │ │ ├── inter_strategy_search.py │ │ │ └── intra_strategy_search.py │ └── simulator │ │ └── simulator.py │ └── test │ ├── cuda │ └── cudrv_test.cu │ ├── dynamo │ ├── condition.py │ ├── mlp_example.py │ └── pytree.py │ ├── qmapper │ ├── config │ ├── test_auto_search.py │ ├── test_dyamo_export.py │ ├── test_new_internal_ir.py │ ├── test_search.py │ ├── test_simlulator.py │ └── test_split.py │ └── torch │ └── eq.py ├── go.mod ├── go.sum ├── home ├── README.md ├── cairong │ ├── README.md │ ├── bubble_sort.go │ ├── bubble_sort_test.go │ └── game-of-live │ │ ├── main.go │ │ └── main_test.go ├── chenshu │ ├── bubble │ │ ├── README.md │ │ ├── bubble_sort.go │ │ └── bubble_sort_test.go │ └── gameoflife │ │ ├── main.go │ │ └── main_test.go ├── congpeiqing │ ├── README.md │ └── code-retreat │ │ ├── code_retreat.go │ │ ├── code_retreat_test.go │ │ └── main.go ├── donggang │ ├── README.md │ ├── bubble_sort.go │ ├── bubble_sort_test.go │ ├── cmd │ │ └── main.go │ ├── conwaygame.go │ └── conwaygame_test.go ├── steven │ └── README.md ├── tianyu │ ├── README.md │ ├── bubble_sort.go │ └── ty │ │ ├── README.md │ │ └── bubble_sort.go ├── wenhua │ └── README.md ├── yzhao │ ├── README.md │ ├── bubble_sort.go │ ├── bubble_sort_test.go │ ├── code-retreat-20240119 │ │ ├── README.md │ │ ├── main.go │ │ └── main_test.go │ ├── code-retreat-20240126 │ │ ├── README.md │ │ ├── bard │ │ │ └── main.go │ │ ├── glm4 │ │ │ └── main.go │ │ └── gpt4 │ │ │ ├── main.go │ │ │ └── main_test.go │ └── code-retreat-20240419 │ │ ├── fast_median.go │ │ ├── fast_median_test.go │ │ ├── heap_media.go │ │ └── heap_media_test.go ├── zhaoyan │ └── README.md └── zhongcheng │ ├── README.md │ ├── bubble_sort.go │ └── bubble_sort_test.go ├── internal ├── README.md ├── gateway │ ├── config │ │ └── config.go │ ├── gateway │ │ ├── config.go │ │ ├── handler.go │ │ └── match.go │ └── svc │ │ └── service_context.go └── scheduler │ ├── config │ ├── config.go │ └── hyperparameter.go │ ├── handler │ ├── adapter_by_name_handler.go │ ├── app_job_add_handler.go │ ├── app_job_delete_handler.go │ ├── app_job_get_handler.go │ ├── app_list_handler.go │ ├── app_register_handler.go │ ├── app_unregister_handler.go │ ├── balance_add_handler.go │ ├── balance_get_handler.go │ ├── base_image_list_handler.go │ ├── billing_list_handler.go │ ├── billing_tasks_handler.go │ ├── chat_completions_handler.go │ ├── cluster_cpod_name_put_handler.go │ ├── cluster_cpods_handler.go │ ├── cpod_job_handler.go │ ├── cpod_status_handler.go │ ├── create_new_user_i_d_handler.go │ ├── custom_routes.go │ ├── dataset_by_name_handler.go │ ├── dingtalk_callback_handler.go │ ├── dingtalk_userinfo_handler.go │ ├── error.go │ ├── finetune_handler.go │ ├── finetune_status_handler.go │ ├── gpu_job_status_handler.go │ ├── gpu_job_stop_handler.go │ ├── gpu_type_handler.go │ ├── inference_delete_handler.go │ ├── inference_deploy_handler.go │ ├── inference_info_handler.go │ ├── inference_playground_handler.go │ ├── inference_status_handler.go │ ├── inference_stop_handler.go │ ├── job_create_handler.go │ ├── job_delete_handler.go │ ├── job_get_handler.go │ ├── job_status_handler.go │ ├── job_stop_handler.go │ ├── jobs_del_handler.go │ ├── jupyterlab_create_handler.go │ ├── jupyterlab_del_handler.go │ ├── jupyterlab_image_create_handler.go │ ├── jupyterlab_image_del_handler.go │ ├── jupyterlab_image_list_handler.go │ ├── jupyterlab_image_version_list_handler.go │ ├── jupyterlab_list_handler.go │ ├── jupyterlab_pause_handler.go │ ├── jupyterlab_resume_handler.go │ ├── jupyterlab_update_handler.go │ ├── model_by_name_handler.go │ ├── node_add_handler.go │ ├── node_list_handler.go │ ├── oss_sync_handler.go │ ├── quota_add_handler.go │ ├── quota_delete_handler.go │ ├── quota_list_handler.go │ ├── quota_update_handler.go │ ├── recharge_list_handler.go │ ├── register_handler.go │ ├── resource_adapters_handler.go │ ├── resource_datasets_handler.go │ ├── resource_load_handler.go │ ├── resource_meta_add_handler.go │ ├── resource_meta_del_handler.go │ ├── resource_models_handler.go │ ├── resource_task_get_handler.go │ ├── resource_task_status_handler.go │ ├── resource_task_update_handler.go │ ├── routes.go │ ├── send_email_handler.go │ ├── upload_status_handler.go │ ├── uploader_access_handler.go │ ├── user_info_handler.go │ ├── user_list_handler.go │ └── user_login_handler.go │ ├── job │ └── quota.go │ ├── logic │ ├── adapter_by_name_logic.go │ ├── app_job_add_logic.go │ ├── app_job_delete_logic.go │ ├── app_job_get_logic.go │ ├── app_list_logic.go │ ├── app_register_logic.go │ ├── app_unregister_logic.go │ ├── balance_add_logic.go │ ├── balance_get_logic.go │ ├── base_image_list_logic.go │ ├── billing_list_logic.go │ ├── billing_tasks_logic.go │ ├── chat_completions_logic.go │ ├── cluster_cpod_name_put_logic.go │ ├── cluster_cpods_logic.go │ ├── consts.go │ ├── cpod_job_logic.go │ ├── cpod_status_logic.go │ ├── create_new_user_i_d_logic.go │ ├── dataset_by_name_logic.go │ ├── dingtalk_callback_logic.go │ ├── dingtalk_userinfo_logic.go │ ├── finetune_logic.go │ ├── finetune_status_logic.go │ ├── gpu_job_status_logic.go │ ├── gpu_job_stop_logic.go │ ├── gpu_type_logic.go │ ├── inference_delete_logic.go │ ├── inference_deploy_logic.go │ ├── inference_info_logic.go │ ├── inference_playground_logic.go │ ├── inference_status_logic.go │ ├── inference_stop_logic.go │ ├── job_create_logic.go │ ├── job_delete_logic.go │ ├── job_get_logic.go │ ├── job_status_logic.go │ ├── job_stop_logic.go │ ├── jobs_del_logic.go │ ├── jupyterlab_create_logic.go │ ├── jupyterlab_del_logic.go │ ├── jupyterlab_image_create_logic.go │ ├── jupyterlab_image_del_logic.go │ ├── jupyterlab_image_list_logic.go │ ├── jupyterlab_image_version_list_logic.go │ ├── jupyterlab_list_logic.go │ ├── jupyterlab_pause_logic.go │ ├── jupyterlab_resume_logic.go │ ├── jupyterlab_update_logic.go │ ├── model_by_name_logic.go │ ├── node_add_logic.go │ ├── node_list_logic.go │ ├── oss_sync_logic.go │ ├── quota_add_logic.go │ ├── quota_delete_logic.go │ ├── quota_list_logic.go │ ├── quota_update_logic.go │ ├── recharge_list_logic.go │ ├── register_logic.go │ ├── resource_adapters_logic.go │ ├── resource_datasets_logic.go │ ├── resource_load_logic.go │ ├── resource_meta_add_logic.go │ ├── resource_meta_del_logic.go │ ├── resource_models_logic.go │ ├── resource_task_get_logic.go │ ├── resource_task_status_logic.go │ ├── resource_task_update_logic.go │ ├── send_email_logic.go │ ├── upload_status_logic.go │ ├── uploader_access_logic.go │ ├── user_info_logic.go │ ├── user_list_logic.go │ └── user_login_logic.go │ ├── model │ ├── README.md │ ├── consts.go │ ├── resource_sync_task_model.go │ ├── resource_sync_task_model_gen.go │ ├── sys_app_job_model.go │ ├── sys_app_job_model_gen.go │ ├── sys_app_model.go │ ├── sys_app_model_gen.go │ ├── sys_cpod_cache_model.go │ ├── sys_cpod_cache_model_gen.go │ ├── sys_cpod_node_model.go │ ├── sys_cpod_node_model_gen.go │ ├── sys_fileurl_model.go │ ├── sys_fileurl_model_gen.go │ ├── sys_inference_model.go │ ├── sys_inference_model_gen.go │ ├── sys_jupyterlab_model.go │ ├── sys_jupyterlab_model_gen.go │ ├── sys_oss_resource_model.go │ ├── sys_oss_resource_model_gen.go │ ├── sys_price_model.go │ ├── sys_price_model_gen.go │ ├── sys_quota_model.go │ ├── sys_quota_model_gen.go │ ├── sys_user_job_model.go │ ├── sys_user_job_model_gen.go │ ├── sys_user_model.go │ ├── sys_user_model_gen.go │ ├── user_balance_model.go │ ├── user_balance_model_gen.go │ ├── user_billing_model.go │ ├── user_billing_model_gen.go │ ├── user_recharge_model.go │ ├── user_recharge_model_gen.go │ ├── vars.go │ ├── verify_code_model.go │ └── verify_code_model_gen.go │ ├── pay │ ├── balance.go │ └── billing.go │ ├── resource │ └── manager.go │ ├── svc │ └── service_context.go │ ├── types │ ├── custom_types.go │ └── types.go │ └── user │ └── user.go ├── openapi └── README.md ├── pkg ├── README.md ├── bcrypt │ ├── bcrypt.go │ └── bcrypt_test.go ├── cluster │ ├── README.md │ └── client-go │ │ ├── README.md │ │ ├── client.go │ │ └── clientgo.go ├── config │ ├── const.go │ └── fromenv.go ├── consts │ ├── job.go │ ├── k8s.go │ └── model.go ├── email │ ├── email.go │ ├── smtpclient.go │ └── template.go ├── fs │ ├── dir.go │ ├── dir_test.go │ ├── file.go │ └── json.go ├── log │ └── log.go ├── math │ ├── math.go │ └── math_test.go ├── model-uploader │ ├── README.md │ ├── model_uploader.go │ └── model_uploader_test.go ├── orm │ └── string.go ├── rsa │ ├── rsa.go │ └── rsa_test.go ├── storage │ ├── README.md │ ├── format.go │ ├── format_test.go │ ├── oss.go │ ├── oss_test.go │ ├── pack.go │ ├── pack_test.go │ ├── resource.go │ └── resource_test.go ├── strings │ ├── strings.go │ └── strings_test.go ├── testing │ ├── fs.go │ └── fs_test.go ├── time │ └── time.go ├── utils │ ├── config │ │ ├── README.md │ │ ├── configmap.go │ │ ├── configmap_test.go │ │ ├── env_var.go │ │ ├── env_var_test.go │ │ ├── file.go │ │ └── file_test.go │ ├── consts │ │ ├── http.go │ │ └── k8s.go │ ├── errors │ │ ├── errors.go │ │ └── errors_test.go │ └── fs │ │ └── fs.go └── uuid │ └── uuid.go ├── tools ├── .shellcheckrc ├── README.md ├── dingtalk-sync │ ├── README.md │ ├── main.py │ └── requirements.txt ├── download-hf-datasets │ └── main.py ├── go-zero-template │ ├── api │ │ ├── config.tpl │ │ ├── context.tpl │ │ ├── etc.tpl │ │ ├── handler.tpl │ │ ├── logic.tpl │ │ ├── main.tpl │ │ ├── middleware.tpl │ │ ├── route-addition.tpl │ │ ├── routes.tpl │ │ ├── template.tpl │ │ └── types.tpl │ ├── docker │ │ └── docker.tpl │ ├── gateway │ │ ├── etc.tpl │ │ └── main.tpl │ ├── kube │ │ ├── deployment.tpl │ │ └── job.tpl │ ├── model │ │ ├── delete.tpl │ │ ├── err.tpl │ │ ├── field.tpl │ │ ├── find-one-by-field-extra-method.tpl │ │ ├── find-one-by-field.tpl │ │ ├── find-one.tpl │ │ ├── import-no-cache.tpl │ │ ├── import.tpl │ │ ├── insert.tpl │ │ ├── interface-delete.tpl │ │ ├── interface-find-one-by-field.tpl │ │ ├── interface-find-one.tpl │ │ ├── interface-insert.tpl │ │ ├── interface-update.tpl │ │ ├── model-gen.tpl │ │ ├── model-new.tpl │ │ ├── model.tpl │ │ ├── table-name.tpl │ │ ├── tag.tpl │ │ ├── types.tpl │ │ ├── update.tpl │ │ └── var.tpl │ ├── newapi │ │ └── newtemplate.tpl │ └── rpc │ │ ├── call.tpl │ │ ├── config.tpl │ │ ├── etc.tpl │ │ ├── logic-func.tpl │ │ ├── logic.tpl │ │ ├── main.tpl │ │ ├── server-func.tpl │ │ ├── server.tpl │ │ ├── svc.tpl │ │ └── template.tpl ├── hf-model │ ├── README.md │ └── import_model.py ├── lint.sh ├── super_linter.sh └── torch_check_cuda.py └── ui ├── .editorconfig ├── .eslintignore ├── .eslintrc.js ├── .gitignore ├── .prettierignore ├── .prettierrc.js ├── Dockerfile ├── README.md ├── config ├── config.ts ├── defaultSettings.ts ├── proxy.ts └── routes.ts ├── jest.config.ts ├── jsconfig.json ├── nginx.conf ├── package.json ├── pkg └── sumdb │ └── sum.golang.org │ └── latest ├── public ├── CNAME ├── favicon.ico ├── icons │ └── icon.ico └── scripts │ └── loading.js ├── src ├── access.ts ├── app.tsx ├── components │ ├── AsyncButton │ │ └── index.tsx │ ├── Footer │ │ └── index.tsx │ ├── HeaderDropdown │ │ └── index.tsx │ ├── MyAccount │ │ ├── BillHistory │ │ │ └── index.tsx │ │ ├── DepositHistory │ │ │ └── index.tsx │ │ └── index.tsx │ ├── RightContent │ │ ├── AvatarDropdown.tsx │ │ └── index.tsx │ └── index.ts ├── global.less ├── global.tsx ├── locales │ ├── en-US.ts │ ├── en-US │ │ ├── JupyterLab.ts │ │ ├── adapter.ts │ │ ├── applicationMenu.ts │ │ ├── clusterCpods.ts │ │ ├── clusterInformation.ts │ │ ├── dataset.ts │ │ ├── global.ts │ │ ├── inferenceState.ts │ │ ├── jobDetail.ts │ │ ├── login.ts │ │ ├── modelRepository.ts │ │ ├── myAccount.ts │ │ ├── oem.ts │ │ ├── playground.ts │ │ ├── userJob.ts │ │ ├── userJobCommit.ts │ │ └── userQuota.ts │ ├── zh-CN.ts │ └── zh-CN │ │ ├── JupyterLab.ts │ │ ├── adapter.ts │ │ ├── applicationMenu.ts │ │ ├── clusterCpods.ts │ │ ├── clusterInformation.ts │ │ ├── dataset.ts │ │ ├── global.ts │ │ ├── inferenceState.ts │ │ ├── jobDetail.ts │ │ ├── login.ts │ │ ├── modelRepository.ts │ │ ├── myAccount.ts │ │ ├── oem.ts │ │ ├── playground.ts │ │ ├── userJob.ts │ │ ├── userJobCommit.ts │ │ └── userQuota.ts ├── manifest.json ├── models │ └── chat-h5-model.ts ├── pages │ ├── 404.tsx │ ├── Adapter │ │ └── index.tsx │ ├── ApplicationManagement │ │ ├── AddAppForm │ │ │ └── index.tsx │ │ └── index.tsx │ ├── ApplicationMenu │ │ ├── AddAppForm │ │ │ └── index.tsx │ │ ├── AppCard │ │ │ └── index.tsx │ │ └── index.tsx │ ├── ChatTrial │ │ ├── ChatContainer │ │ │ ├── index.less │ │ │ └── index.tsx │ │ ├── MarkdownContent │ │ │ └── index.tsx │ │ ├── MessageInput │ │ │ ├── assets │ │ │ │ ├── ic_delete.png │ │ │ │ └── send-icon.png │ │ │ ├── index.less │ │ │ └── index.tsx │ │ ├── MessageItem │ │ │ ├── RobotMessageItem │ │ │ │ ├── ImageList │ │ │ │ │ └── index.tsx │ │ │ │ └── index.tsx │ │ │ ├── UserMessageItem │ │ │ │ └── index.tsx │ │ │ ├── assets │ │ │ │ ├── assistant-avatar.png │ │ │ │ └── user-avatar.png │ │ │ ├── index.less │ │ │ └── index.tsx │ │ ├── index.less │ │ ├── index.tsx │ │ ├── services │ │ │ └── index.ts │ │ └── utils │ │ │ ├── chatSettingConfigured.ts │ │ │ ├── commonSettingConfigured.ts │ │ │ ├── index.ts │ │ │ ├── interface.ts │ │ │ └── store.ts │ ├── ClusterCpods │ │ └── index.tsx │ ├── ClusterInformation │ │ ├── AddNodeDrawer │ │ │ └── index.tsx │ │ └── index.tsx │ ├── Dataset │ │ └── index.tsx │ ├── Grafana │ │ └── index.tsx │ ├── InferenceState │ │ └── index.tsx │ ├── JobDetail │ │ └── index.tsx │ ├── JupyterLab │ │ ├── AddJupyterLab │ │ │ └── index.tsx │ │ ├── ImageManagementTab │ │ │ ├── ImageDetail │ │ │ │ └── index.tsx │ │ │ └── index.tsx │ │ ├── JupyterLabTab │ │ │ ├── BuildingImage │ │ │ │ └── index.tsx │ │ │ ├── PauseButton.tsx │ │ │ ├── RunButton.tsx │ │ │ └── index.tsx │ │ └── index.tsx │ ├── Jupyterlalb │ │ └── index.tsx │ ├── LLaMAFactory │ │ └── index.tsx │ ├── ModelRepository │ │ ├── FineTuningDrawer │ │ │ └── index.tsx │ │ ├── InferenceDrawer │ │ │ └── index.tsx │ │ └── index.tsx │ ├── Playground │ │ ├── index.less │ │ └── index.tsx │ ├── RunpodHome │ │ ├── components │ │ │ ├── FeatureCards.tsx │ │ │ ├── FeatureCards │ │ │ │ └── index.tsx │ │ │ ├── Layout │ │ │ │ ├── Header.tsx │ │ │ │ ├── Sidebar.tsx │ │ │ │ └── index.tsx │ │ │ ├── ResourceSection.tsx │ │ │ ├── UsageChart.tsx │ │ │ └── UsageSection.tsx │ │ └── index.tsx │ ├── Tensorboard │ │ └── index.tsx │ ├── User │ │ ├── DingTalkCallback │ │ │ └── index.tsx │ │ └── Login │ │ │ ├── LoginForm │ │ │ └── index.tsx │ │ │ ├── RegistrationForm │ │ │ ├── SourceRegist.tsx │ │ │ ├── UserRegist.tsx │ │ │ └── index.tsx │ │ │ └── index.tsx │ ├── UserJob │ │ ├── DetailModel.tsx │ │ ├── DetailModel │ │ │ ├── assets │ │ │ │ └── GPUpic.png │ │ │ └── index.tsx │ │ └── index.tsx │ ├── UserJobCommit │ │ └── index.tsx │ ├── UserQuota │ │ ├── EditDrawer │ │ │ └── index.tsx │ │ └── index.tsx │ └── Welcome.tsx ├── requestErrorConfig.ts ├── service-worker.js ├── services │ ├── api.ts │ └── index.ts ├── typings.d.ts └── utils │ ├── index.ts │ └── rsaEncrypt.ts ├── tests └── setupTests.jsx └── tsconfig.json /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | @cairong-ai 2 | @guochenshu 3 | @yzhao-2023 4 | @sxwl-donggang 5 | @congpeiqing 6 | @cysxwl 7 | @jimgreat666 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: 报告系统 Bug 4 | title: "【BUG】" 5 | labels: bug, enhancement 6 | assignees: yzhao-2023 7 | 8 | --- 9 | 10 | **复现步骤** 11 | 复现步骤如下: 12 | 1. Go to '...' 13 | 2. Click on '....' 14 | 3. Scroll down to '....' 15 | 4. See error 16 | 17 | **预期的正确/合理行为** 18 | 简要描述你预期的行为 19 | 20 | **提供截图** 21 | 截图请标注出需要注意的文字/内容 22 | 23 | **提供其他资料** 24 | 日志、网页链接、文档链接等 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/devops.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: DevOps 3 | about: 创建 DevOps 相关任务和工作 4 | title: "【DevOps】" 5 | labels: DevOps 6 | assignees: '' 7 | 8 | --- 9 | 10 | **背景** 11 | 该任务创建之前的状态如何? 12 | 13 | **改动** 14 | 改动是什么?目标、价值如何? 15 | 16 | **其他备选** 17 | 其他还有什么方案? 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/使用问题.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 使用问题 3 | about: 关于三千平台的问题 4 | title: "【提问】" 5 | labels: question 6 | assignees: yzhao-2023 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe your question** 14 | A clear and concise description of what information you are seeking by asking this question 15 | 16 | **Describe other sources you have consulted** 17 | A clear and concise description of all the additional information you have collected before asking the question 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the question here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/功能需求.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 功能需求 3 | about: 来自用户的新的功能、能力的要求 4 | title: "【功能需求】" 5 | labels: requirement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **背景** 11 | > 描述系统现有的状况和相关的具体信息 12 | 13 | **这个功能需求是解决什么问题?** 14 | > 描述问题的具体场景,以及想要达到的效果 15 | 16 | **解决方案** 17 | > 列举目前考虑的可选解决方案 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/工作任务.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 工作任务 3 | about: 创建一个一般性的任务 4 | title: "【任务】" 5 | labels: task 6 | assignees: '' 7 | 8 | --- 9 | 10 | **目标** 11 | > 本提议所希望看到的改动的目标是什么? 12 | > 这些改动能带来什么价值? 13 | 14 | **实施方案** 15 | > 详细提供该系统改进的实施方案 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/系统改进.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 系统改进 3 | about: 对系统内部设计进行改善,此类改善不直接影响用户可见的功能和行为 4 | title: "【系统改进】" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **现状** 11 | > 当前的现状是什么(当前的行为、设计、架构)? 12 | 13 | **系统改进的目标** 14 | > 本提议所希望看到的改动的目标是什么? 15 | > 这些改动能带来什么价值? 16 | 17 | **系统改进的实施方案** 18 | > 详细提供该系统改进的实施方案 19 | -------------------------------------------------------------------------------- /.github/linters/.jscpd.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": [ 3 | "tools/go-zero-template/" 4 | ] 5 | } -------------------------------------------------------------------------------- /.github/linters/README.md: -------------------------------------------------------------------------------- 1 | # Linters 2 | 3 | This is the default location for storing super-linter's config files for 4 | [individual linters](https://github.com/github/super-linter#template-rules-files). 5 | 6 | The names of these config files should not be changed. 7 | 8 | All of them are dot-files that are hidden by default. This also follows the 9 | official guideline. 10 | 11 | .shellcheckrc does not work for super-linter, 12 | see [super-linter/issues/4645](https://github.com/super-linter/super-linter/issues/4645) 13 | to ignore certain error, place a .shellcheckrc file under the target directory 14 | to ignore that error for files under that directory (non-recursively). 15 | -------------------------------------------------------------------------------- /.github/reviewers.yaml: -------------------------------------------------------------------------------- 1 | reviewers: 2 | # The default reviewers 3 | # https://github.com/marketplace/actions/auto-request-review#reviewers-configuration 4 | defaults: 5 | - glen-nascentcoreai 6 | -------------------------------------------------------------------------------- /.github/scripts/.shellcheckrc: -------------------------------------------------------------------------------- 1 | disable=SC2086,SC2044 2 | -------------------------------------------------------------------------------- /.github/scripts/check_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | .github/scripts/check_markdown_filename.sh 4 | .github/scripts/check_dir_naming.sh 5 | .github/scripts/check_dockerfile.sh 6 | -------------------------------------------------------------------------------- /.github/scripts/check_dir_naming.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Checking directory names include only lower case chars and '-'" 4 | found_breakage=false 5 | # shellcheck disable=SC2044 6 | for dirname in $(find . -type d); do 7 | fname=$(basename "${dirname}") 8 | if [[ ${fname} == "ISSUE_TEMPLATE" ]]; then 9 | # Skip GitHub ISSUE_TEMPLATE 10 | continue 11 | fi 12 | if ! [[ ${fname} =~ [.0-9a-z-]+ ]]; then 13 | found_breakage=true 14 | echo "${dirname}" 15 | fi 16 | done 17 | 18 | if [[ "${found_breakage}" == "true" ]]; then 19 | exit 1 20 | fi 21 | -------------------------------------------------------------------------------- /.github/scripts/check_dockerfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Checking dockerfile naming" 4 | found_breakage=false 5 | # shellcheck disable=SC2044 6 | for fpath in $(find . -type f -iname dockerfile); do 7 | fname=$(basename "${fpath}") 8 | if [[ "${fname}" != "Dockerfile" ]]; then 9 | found_breakage=true 10 | echo "${fpath}" 11 | fi 12 | done 13 | 14 | if [[ "${found_breakage}" == "true" ]]; then 15 | exit 1 16 | fi 17 | -------------------------------------------------------------------------------- /.github/scripts/check_markdown_filename.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Checking markdown files are named with uppercase chars" 4 | found_md_not_upper=false 5 | # shellcheck disable=SC2044 6 | for mdfile in $(find . -name '*.md'); do 7 | # Skip .github directory 8 | if [[ ${mdfile} =~ ^\.\/\.github ]]; then 9 | continue 10 | fi 11 | fname=$(basename "${mdfile}") 12 | if ! [[ ${fname%%.md} =~ ^[A-Z_]+$ ]]; then 13 | found_md_not_upper=true 14 | echo "${mdfile}" 15 | fi 16 | done 17 | 18 | if [[ "${found_md_not_upper}" == "true" ]]; then 19 | exit 1 20 | fi 21 | -------------------------------------------------------------------------------- /.github/scripts/check_readme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Checking README.md exists in all directories" 4 | found_dirs_missing_readme=false 5 | # shellcheck disable=SC2044 6 | for dir in $(find cli manager tools home -type d); do 7 | dir_name=$(basename "${dir}") 8 | if [[ "${dir_name}" != "testdata" && ! -f "${dir}/README.md" ]]; then 9 | found_dirs_missing_readme=true 10 | echo "${dir}" 11 | fi 12 | done 13 | 14 | if [[ "${found_dirs_missing_readme}" == "true" ]]; then 15 | exit 1 16 | fi 17 | -------------------------------------------------------------------------------- /.github/super_linter.env: -------------------------------------------------------------------------------- 1 | VALIDATE_ALL_CODEBASE=false 2 | PYTHON_PYLINT_CONFIG_FILE=pylint_config 3 | VALIDATE_PYTHON_BLACK=false 4 | VALIDATE_PYTHON_FLAKE8=false 5 | VALIDATE_PYTHON_ISORT=false 6 | VALIDATE_PYTHON_MYPY=false 7 | VALIDATE_PYTHON_PYLINT=false 8 | VALIDATE_DOCKERFILE_HADOLINT=false 9 | VALIDATE_JSCPD_ALL_CODEBASE=false 10 | KUBERNETES_KUBECONFORM_OPTIONS=--ignore-missing-schemas 11 | -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | # Workflows 2 | 3 | Github Actions workflows. 4 | -------------------------------------------------------------------------------- /.github/workflows/additional_lint.yaml: -------------------------------------------------------------------------------- 1 | name: Additional lint 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | permissions: 7 | contents: read 8 | jobs: 9 | check-all: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - run: .github/scripts/check_all.sh 14 | shell: bash 15 | -------------------------------------------------------------------------------- /.github/workflows/chatui.yaml: -------------------------------------------------------------------------------- 1 | name: ChatUI Docker Build 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "chatui/Dockerfile" 7 | 8 | jobs: 9 | docker-build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v3 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v2 18 | 19 | - name: Build Docker image 20 | uses: docker/build-push-action@v4 21 | with: 22 | context: ./chatui 23 | file: ./chatui/Dockerfile 24 | push: false 25 | tags: chatui:pr-${{ github.event.pull_request.number }} 26 | -------------------------------------------------------------------------------- /.github/workflows/gpu_task_ci.yaml: -------------------------------------------------------------------------------- 1 | name: GPU Task CI 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | cron-job: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v4 12 | 13 | - name: Set up Python 14 | uses: actions/setup-python@v4 15 | with: 16 | python-version: "3.x" 17 | 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install requests # 添加必要的依赖 22 | 23 | - name: Run task 24 | run: python .github/scripts/gpu_task.py 25 | env: 26 | SXWL_TOKEN: ${{ secrets.AUTHORIZATION_PRD }} 27 | -------------------------------------------------------------------------------- /.github/workflows/helm_chart_release.yaml: -------------------------------------------------------------------------------- 1 | name: Release Helm Chart 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | 16 | - name: Set up Helm 17 | uses: azure/setup-helm@v3 18 | with: 19 | helm-version: 'v3.0.0' 20 | 21 | - name: Package Helm chart 22 | run: helm package ./deployment/charts/sx3k 23 | 24 | - name: Create Helm chart release 25 | uses: softprops/action-gh-release@v1 26 | with: 27 | files: sx3k-*.tgz 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | -------------------------------------------------------------------------------- /3kctl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/3kctl/__init__.py -------------------------------------------------------------------------------- /3kctl/conf/config.ini: -------------------------------------------------------------------------------- 1 | [deploy] 2 | work_dir = /opt/3k 3 | kk_bin = bin/kk 4 | cluster_config = conf/config-sample.yaml 5 | package = packages/3k.tar.gz 6 | 7 | [registry] 8 | harbor_addr = https://dockerhub.kubekey.local 9 | ca_file = /etc/docker/certs.d/dockerhub.kubekey.local/ca.crt 10 | cert_file = /etc/docker/certs.d/dockerhub.kubekey.local/dockerhub.kubekey.local.cert 11 | key_file = /etc/docker/certs.d/dockerhub.kubekey.local/dockerhub.kubekey.local.key 12 | harbor_user = admin 13 | harbor_pass = Harbor12345 14 | -------------------------------------------------------------------------------- /3kctl/deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/3kctl/deploy/__init__.py -------------------------------------------------------------------------------- /3kctl/download/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/3kctl/download/__init__.py -------------------------------------------------------------------------------- /3kctl/requirements.txt: -------------------------------------------------------------------------------- 1 | plumbum==1.8.2 2 | kubernetes==28.1.0 3 | modelscope==1.9.5 4 | colorama==0.4.6 5 | gunicorn==21.2.0 6 | paramiko==3.4.0 7 | PyYAML==6.0.1 8 | requests==2.31.0 9 | flask==3.0.2 -------------------------------------------------------------------------------- /3kctl/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/3kctl/serve/__init__.py -------------------------------------------------------------------------------- /3kctl/upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/3kctl/upload/__init__.py -------------------------------------------------------------------------------- /LANGUAGES: -------------------------------------------------------------------------------- 1 | golang 2 | python 3 | shell 4 | yaml -------------------------------------------------------------------------------- /chatui/Dockerfile: -------------------------------------------------------------------------------- 1 | # This builds the Docker image for the ChatUI 2 | FROM python:3.11-slim 3 | WORKDIR /app 4 | COPY api/requirements.txt /app/api/requirements.txt 5 | RUN pip install --no-cache-dir -r api/requirements.txt 6 | COPY start.sh /app 7 | COPY api /app/api 8 | CMD ["/bin/bash", "start.sh"] 9 | -------------------------------------------------------------------------------- /chatui/README.md: -------------------------------------------------------------------------------- 1 | ## AiChat 2 | - 使用 fastapi 搭建本地 server,转发前端会话请求以及完成图片上传 3 | - 前端为 H5 页面 4 | 5 | ## 构建镜像 6 | ```bash 7 | git clone https://github.com/NascentCore/3k.git 8 | cd chatui 9 | docker buildx build --platform linux/amd64 -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/chatui:latest . 10 | ``` -------------------------------------------------------------------------------- /chatui/api/dist/assets/assistant-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/assistant-avatar.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/chatui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/chatui.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/delete.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/ic_delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/ic_delete.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/send-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/send-icon.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/upload_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/upload_image.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/user-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/user-avatar.png -------------------------------------------------------------------------------- /chatui/api/dist/assets/voice-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/chatui/api/dist/assets/voice-icon.png -------------------------------------------------------------------------------- /chatui/api/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "aichat" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["cairong "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.8" 9 | fastapi = {extras = ["all"], version = "^0.90.1"} 10 | httpx = "^0.23.3" 11 | jinja2 = "^3.1.2" 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /chatui/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 'app start!'; 3 | 4 | 5 | python3 /app/api/app.py -------------------------------------------------------------------------------- /cmd/README.md: -------------------------------------------------------------------------------- 1 | # cmd 2 | 3 | 三千平台范畴内的各个组件的可执行文件 -------------------------------------------------------------------------------- /cmd/downloader/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.21-alpine AS builder 2 | 3 | ENV GOPROXY https://goproxy.cn,direct 4 | WORKDIR /build 5 | ADD go.mod . 6 | ADD go.sum . 7 | ADD ./cmd/downloader ./cmd/downloader 8 | ADD ./pkg ./pkg 9 | 10 | WORKDIR /build/cmd/downloader 11 | RUN go build -o app main.go 12 | 13 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/alpine-git-lfs:v1.0.2 14 | 15 | ENV TZ Asia/Shanghai 16 | 17 | WORKDIR /app 18 | COPY --from=builder /build/cmd/downloader/app /app/app 19 | 20 | ENTRYPOINT ["./app"] 21 | CMD ["-o","/data", "-s", ""] -------------------------------------------------------------------------------- /cmd/downloader/cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | // rootCmd represents the base command when called without any subcommands 10 | var rootCmd = &cobra.Command{ 11 | Use: "downloader", 12 | Short: "downloader for model or dataset", 13 | Long: `downloader for model or dataset`, 14 | } 15 | 16 | // Execute adds all child commands to the root command and sets flags appropriately. 17 | // This is called by main.main(). It only needs to happen once to the rootCmd. 18 | func Execute() { 19 | err := rootCmd.Execute() 20 | if err != nil { 21 | os.Exit(1) 22 | } 23 | } 24 | 25 | func init() { 26 | } 27 | -------------------------------------------------------------------------------- /cmd/downloader/internal/consts/consts.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | const ( 4 | GitDownloader = "git_downloader" 5 | OSSDownloader = "oss_downloader" 6 | ) 7 | 8 | const ( 9 | CRD = "crd" 10 | None = "none" 11 | ) 12 | 13 | const ( 14 | Phase = "phase" 15 | PhaseInit = "" 16 | PhaseDownloading = "downloading" 17 | PhaseFail = "fail" 18 | PhaseComplete = "done" 19 | ) 20 | -------------------------------------------------------------------------------- /cmd/downloader/internal/download/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | oss2 "sxwl/3k/cmd/downloader/internal/oss" 5 | "sxwl/3k/cmd/downloader/internal/record" 6 | ) 7 | 8 | type Config struct { 9 | record.RecordConfig 10 | oss2.OSSConfig 11 | GitUrl string 12 | Total int64 13 | OutDir string 14 | Record string 15 | IsCRD bool 16 | } 17 | -------------------------------------------------------------------------------- /cmd/downloader/internal/download/downloader.go: -------------------------------------------------------------------------------- 1 | package download 2 | 3 | import ( 4 | "fmt" 5 | "sxwl/3k/cmd/downloader/internal/consts" 6 | "sxwl/3k/cmd/downloader/internal/download/config" 7 | "sxwl/3k/cmd/downloader/internal/download/git" 8 | "sxwl/3k/cmd/downloader/internal/download/oss" 9 | ) 10 | 11 | type Downloader interface { 12 | Download() error 13 | } 14 | 15 | func NewDownloader(typ string, c config.Config) Downloader { 16 | switch typ { 17 | case consts.GitDownloader: 18 | return git.NewDownloader(c) 19 | case consts.OSSDownloader: 20 | return oss.NewDownloader(c) 21 | default: 22 | panic(fmt.Sprintf("downloader type: %s not support", typ)) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /cmd/downloader/internal/errors/errors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrCrdNotExist = errors.New("crd not exists") 7 | ErrJobDownloading = errors.New("job downloading") 8 | ErrJobComplete = errors.New("job has completed") 9 | ErrCrdDataType = errors.New("crd content data type error") 10 | ErrUnsupportedPhase = errors.New("unsupported phase") 11 | ) 12 | -------------------------------------------------------------------------------- /cmd/downloader/internal/oss/config.go: -------------------------------------------------------------------------------- 1 | package oss 2 | 3 | type OSSConfig struct { 4 | Endpoint string 5 | AccessID string 6 | AccessKey string 7 | Bucket string 8 | Object string 9 | } 10 | -------------------------------------------------------------------------------- /cmd/downloader/internal/record/none/none.go: -------------------------------------------------------------------------------- 1 | package none 2 | 3 | import ( 4 | "sxwl/3k/cmd/downloader/internal/consts" 5 | ) 6 | 7 | type Recorder struct { 8 | } 9 | 10 | func NewRecorder() *Recorder { 11 | return &Recorder{} 12 | } 13 | 14 | func (r *Recorder) Check() error { 15 | return nil 16 | } 17 | 18 | func (r *Recorder) Begin() error { 19 | return nil 20 | } 21 | 22 | func (r *Recorder) Fail() error { 23 | return nil 24 | } 25 | 26 | func (r *Recorder) Complete() error { 27 | return nil 28 | } 29 | 30 | func (r *Recorder) Name() string { 31 | return consts.None 32 | } 33 | -------------------------------------------------------------------------------- /cmd/downloader/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "sxwl/3k/cmd/downloader/cmd" 4 | 5 | func main() { 6 | cmd.Execute() 7 | } 8 | -------------------------------------------------------------------------------- /cmd/modeluploader/.gitignore: -------------------------------------------------------------------------------- 1 | # Executable 2 | modeluploader 3 | -------------------------------------------------------------------------------- /cmd/modeluploader/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 alpine:3.18.4 2 | WORKDIR /app 3 | COPY ./modeluploadjob /app/ 4 | CMD /app/modeluploadjob -------------------------------------------------------------------------------- /cmd/modeluploader/README.md: -------------------------------------------------------------------------------- 1 | # modeluploader 2 | 3 | 此目录是为了构建上传模型文件的可执行文件。 4 | 5 | CPod Manager在创建一个MPIJob的同时,会创建一个K8S Job, 此Job会执行这个可执行文件。 6 | 7 | ## modeluploadjob的工作流程 8 | 1. 监控MPIJob的工作状态。 9 | 2. 如果状态转为完成(代表模型已经训练完成), 从Ceph中读取训练结果,上传至S3存储。 10 | 3. 上传完成后Job结束。 11 | 12 | # 部署 13 | ## 首先要创建Secret 14 | kubectl create secret generic akas4oss -n cpod --from-literal=AK=[AccessKey] --from-literal=AS=[AccessSecret] 15 | -------------------------------------------------------------------------------- /cmd/modeluploader/buildimage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build modeluploadjob.go 3 | docker build -f ./Dockerfile . -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/modeluploader:$(git rev-parse --short HEAD) 4 | docker push sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/modeluploader:$(git rev-parse --short HEAD) 5 | -------------------------------------------------------------------------------- /cmd/scheduler/ftl/README.md: -------------------------------------------------------------------------------- 1 | # FTL 2 | 3 | TODO: Add content 4 | -------------------------------------------------------------------------------- /cmd/sxwlctl/internal/consts/consts.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | const ( 4 | FileCanFinetune = "sxwl-can-fine-tune.md" 5 | FileCanInference = "sxwl-can-inference.md" 6 | FileInferTemplate = "sxwl-infer-template-%s.md" 7 | ) 8 | -------------------------------------------------------------------------------- /cmd/sxwlctl/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2024 NAME HERE 3 | 4 | */ 5 | package main 6 | 7 | import "sxwl/3k/cmd/sxwlctl/cmd" 8 | 9 | func main() { 10 | cmd.Execute() 11 | } 12 | -------------------------------------------------------------------------------- /cmd/uploader/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=$BUILDPLATFORM golang:1.21-alpine AS builder 2 | 3 | ARG TARGETARCH 4 | ARG BUILDPLATFORM 5 | 6 | ENV GOPROXY https://goproxy.cn,direct 7 | WORKDIR /build 8 | ADD go.mod . 9 | ADD go.sum . 10 | ADD ./cmd/uploader ./cmd/uploader 11 | ADD ./pkg ./pkg 12 | 13 | WORKDIR /build/cmd/uploader 14 | RUN GOOS=linux GOARCH=$TARGETARCH go build -o app main.go 15 | 16 | FROM alpine:3.19 17 | 18 | ENV TZ Asia/Shanghai 19 | 20 | WORKDIR /app 21 | COPY --from=builder /build/cmd/uploader/app /app/app 22 | 23 | ENTRYPOINT ["./app"] -------------------------------------------------------------------------------- /cmd/uploader/README.md: -------------------------------------------------------------------------------- 1 | # 上传工具 2 | 3 | 上传工具被打包成容器镜像,通过 Operator 动态创建上传任务,将各类数据资产(用户微调模型、用户上传模型/数据集等)上传到 4 | 算想云对象存储。 5 | -------------------------------------------------------------------------------- /cmd/uploader/cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | // rootCmd represents the base command when called without any subcommands 10 | var rootCmd = &cobra.Command{ 11 | Use: "uploader", 12 | Short: "uploader for resource uploading in sxwl", 13 | Long: `uploader for resource uploading in sxwl`, 14 | } 15 | 16 | // Execute adds all child commands to the root command and sets flags appropriately. 17 | // This is called by main.main(). It only needs to happen once to the rootCmd. 18 | func Execute() { 19 | err := rootCmd.Execute() 20 | if err != nil { 21 | os.Exit(1) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cmd/uploader/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "sxwl/3k/cmd/uploader/cmd" 4 | 5 | func main() { 6 | cmd.Execute() 7 | } 8 | -------------------------------------------------------------------------------- /cpodoperator/.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | bin/* 8 | Dockerfile.cross 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Kubernetes Generated files - skip generated files, except for vendored files 17 | 18 | !vendor/**/zz_generated.* 19 | 20 | # editor and IDE paraphernalia 21 | .idea 22 | .vscode/ 23 | *.swp 24 | *.swo 25 | *~ 26 | 27 | # Unit test / coverage reports 28 | htmlcov/ 29 | .tox/ 30 | .nox/ 31 | .coverage 32 | .coverage.* 33 | coverage.* 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | *.cover 38 | .hypothesis/ 39 | .pytest_cache/ -------------------------------------------------------------------------------- /cpodoperator/api/v1/README.md: -------------------------------------------------------------------------------- 1 | # V1 2 | 3 | Do not use V1, all client should use v1beta1 4 | 5 | These API objects were generated by Kubebuilder. 6 | SXWL.AI currently are under beta phase, we do not release stable API. 7 | -------------------------------------------------------------------------------- /cpodoperator/cmd/portalsynch/README.md: -------------------------------------------------------------------------------- 1 | # portal synch 2 | 1. synch cpodjobs with portal 3 | 2. upload cpod info to portal 4 | 5 | ## depends on 6 | configmap named " cpod-info " 7 | it should be created like 8 | ```yaml 9 | apiVersion: v1 10 | data: 11 | access_key: xxx 12 | cpod_id: xxx 13 | kind: ConfigMap 14 | metadata: 15 | name: cpod-info 16 | namespace: cpodjob-system 17 | ``` -------------------------------------------------------------------------------- /cpodoperator/config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | version: v1 8 | group: apiextensions.k8s.io 9 | path: spec/conversion/webhook/clientConfig/service/name 10 | 11 | namespace: 12 | - kind: CustomResourceDefinition 13 | version: v1 14 | group: apiextensions.k8s.io 15 | path: spec/conversion/webhook/clientConfig/service/namespace 16 | create: false 17 | 18 | varReference: 19 | - path: metadata/annotations 20 | -------------------------------------------------------------------------------- /cpodoperator/config/default/manager_config_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | -------------------------------------------------------------------------------- /cpodoperator/config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | - portalsynch.yaml 4 | apiVersion: kustomize.config.k8s.io/v1beta1 5 | kind: Kustomization 6 | images: 7 | - name: controller 8 | newName: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/cpodoperator 9 | newTag: 0ce99ad 10 | - name: portalsynch 11 | newName: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/portalsynch 12 | newTag: 0ce99ad 13 | -------------------------------------------------------------------------------- /cpodoperator/config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/auth_proxy_client_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: clusterrole 6 | app.kubernetes.io/instance: metrics-reader 7 | app.kubernetes.io/component: kube-rbac-proxy 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: metrics-reader 12 | rules: 13 | - nonResourceURLs: 14 | - "/metrics" 15 | verbs: 16 | - get 17 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: clusterrole 6 | app.kubernetes.io/instance: proxy-role 7 | app.kubernetes.io/component: kube-rbac-proxy 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: proxy-role 12 | rules: 13 | - apiGroups: 14 | - authentication.k8s.io 15 | resources: 16 | - tokenreviews 17 | verbs: 18 | - create 19 | - apiGroups: 20 | - authorization.k8s.io 21 | resources: 22 | - subjectaccessreviews 23 | verbs: 24 | - create 25 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: clusterrolebinding 6 | app.kubernetes.io/instance: proxy-rolebinding 7 | app.kubernetes.io/component: kube-rbac-proxy 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: proxy-rolebinding 12 | roleRef: 13 | apiGroup: rbac.authorization.k8s.io 14 | kind: ClusterRole 15 | name: proxy-role 16 | subjects: 17 | - kind: ServiceAccount 18 | name: controller-manager 19 | namespace: system 20 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | app.kubernetes.io/name: service 7 | app.kubernetes.io/instance: controller-manager-metrics-service 8 | app.kubernetes.io/component: kube-rbac-proxy 9 | app.kubernetes.io/created-by: cpodjob 10 | app.kubernetes.io/part-of: cpodjob 11 | app.kubernetes.io/managed-by: kustomize 12 | name: controller-manager-metrics-service 13 | namespace: system 14 | spec: 15 | ports: 16 | - name: https 17 | port: 8443 18 | protocol: TCP 19 | targetPort: https 20 | selector: 21 | control-plane: controller-manager 22 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: rolebinding 6 | app.kubernetes.io/instance: leader-election-rolebinding 7 | app.kubernetes.io/component: rbac 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: leader-election-rolebinding 12 | roleRef: 13 | apiGroup: rbac.authorization.k8s.io 14 | kind: Role 15 | name: leader-election-role 16 | subjects: 17 | - kind: ServiceAccount 18 | name: controller-manager 19 | namespace: system 20 | -------------------------------------------------------------------------------- /cpodoperator/config/rbac/public.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: public 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: cluster-admin 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: public -------------------------------------------------------------------------------- /cpodoperator/config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: clusterrolebinding 6 | app.kubernetes.io/instance: manager-rolebinding 7 | app.kubernetes.io/component: rbac 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: manager-rolebinding 12 | roleRef: 13 | apiGroup: rbac.authorization.k8s.io 14 | kind: ClusterRole 15 | name: manager-role 16 | subjects: 17 | - kind: ServiceAccount 18 | name: controller-manager 19 | namespace: system -------------------------------------------------------------------------------- /cpodoperator/config/rbac/service_account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: serviceaccount 6 | app.kubernetes.io/instance: controller-manager-sa 7 | app.kubernetes.io/component: rbac 8 | app.kubernetes.io/created-by: cpodjob 9 | app.kubernetes.io/part-of: cpodjob 10 | app.kubernetes.io/managed-by: kustomize 11 | name: controller-manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1_datasetstorage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1 2 | kind: DataSetStorage 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: datasetstorage 6 | app.kubernetes.io/instance: datasetstorage-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: datasetstorage-sample 11 | spec: 12 | # TODO(user): Add fields here 13 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1_modelstorage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1 2 | kind: ModelStorage 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: modelstorage 6 | app.kubernetes.io/instance: modelstorage-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: modelstorage-sample 11 | spec: 12 | # TODO(user): Add fields here 13 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1beta1_cpodjob_mpi.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: CPodJob 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: cpodjob 6 | app.kubernetes.io/instance: cpodjob-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: cpodjob-sample 11 | namespace: cpodoperator 12 | spec: 13 | image: dockerhub.kubekey.local/kubesphereio/mpi-horovod-mnist:latest 14 | command: 15 | - mpirun 16 | jobType: mpi 17 | gpuRequiredPerReplica: 1 18 | gpuType: NVIDIA-GeForce-RTX-3090 19 | ckptPath: /workspace/ckpt 20 | ckptVolumeSize: 100 21 | workerReplicas: 1 22 | 23 | 24 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1beta1_finetune.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: FineTune 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: finetune 6 | app.kubernetes.io/instance: finetune-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: finetune-sample 11 | namespace: cpod 12 | spec: 13 | model: "LLaMA-2-7B" 14 | dataset: "llama-2-7b-dataset" 15 | hyperParameters: 16 | n_epochs: "3" 17 | batch_size: "4" 18 | learning_rate_multiplier: "5e-5" 19 | 20 | 21 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1beta1_juypterlab.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: JuypterLab 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: juypterlab 6 | app.kubernetes.io/instance: juypterlab-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: juypterlab-sample 11 | spec: 12 | # TODO(user): Add fields here 13 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1beta1_llamafactory.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: LlamaFactory 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: llamafactory 6 | app.kubernetes.io/instance: llamafactory-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: llamafactory-sample 11 | spec: 12 | # TODO(user): Add fields here 13 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1beta1_modelstorage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: ModelStorage 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: modelstorage 6 | app.kubernetes.io/instance: modelstorage-sample 7 | app.kubernetes.io/part-of: cpodjob 8 | app.kubernetes.io/managed-by: kustomize 9 | app.kubernetes.io/created-by: cpodjob 10 | name: modelstorage-sample 11 | spec: 12 | name: "ZhipuAI/chatglm3-6b" 13 | type: "modelscope" -------------------------------------------------------------------------------- /cpodoperator/config/samples/cpod_v1betav1_inference.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cpod.cpod/v1beta1 2 | kind: Inference 3 | metadata: 4 | name: sklearn-iris 5 | namespace: cpod 6 | spec: 7 | predictor: 8 | sklearn: 9 | storageUri: "gs://kfserving-examples/models/sklearn/1.0/model" 10 | name: "" 11 | -------------------------------------------------------------------------------- /cpodoperator/config/samples/kustomization.yaml: -------------------------------------------------------------------------------- 1 | ## Append samples of your project ## 2 | resources: 3 | - cpod_v1beta1_cpodjob.yaml 4 | - cpod_v1beta1_modelstorage.yaml 5 | - cpod_v1_modelstorage.yaml 6 | - cpod_v1_datasetstorage.yaml 7 | - cpod_v1beta1_finetune.yaml 8 | - cpod_v1beta1_juypterlab.yaml 9 | - cpod_v1beta1_llamafactory.yaml 10 | - cpod_v1beta1_jupyterlab.yaml 11 | #+kubebuilder:scaffold:manifestskustomizesamples 12 | -------------------------------------------------------------------------------- /cpodoperator/hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /cpodoperator/internal/synchronizer/README.md: -------------------------------------------------------------------------------- 1 | # Synchronizer 2 | 3 | 这里的代码用于在三千平台与算想云之间同步各类信息。 4 | 5 | For PortalSync only. 6 | -------------------------------------------------------------------------------- /cpodoperator/pkg/provider/sxwl/README.md: -------------------------------------------------------------------------------- 1 | # SXWL 2 | 3 | Abstract API to communicate with `Cloud`, including fetching assigned jobs, and reporting CPod status. 4 | 5 | For PortalSync 6 | -------------------------------------------------------------------------------- /cpodoperator/pkg/resource/README.md: -------------------------------------------------------------------------------- 1 | # resource 2 | collect cpod resource info -------------------------------------------------------------------------------- /cpodoperator/test/cases/modihand_pytorchjob.json: -------------------------------------------------------------------------------- 1 | { 2 | "jobName": "job1", 3 | "gpuNumber": 1, 4 | "gpuType": "NVIDIA-GeForce-RTX-3090", 5 | "ckptPath": "/data", 6 | "ckptVol": "10", 7 | "modelPath": "/sixpen_models/modihand_outputs/test_10059997", 8 | "modelVol": "100", 9 | "imagePath": "dockerhub.kubekey.local/kubesphereio/modihand:test", 10 | "datasetPath": "/tmp/ds", 11 | "jobType": "Pytorch", 12 | "stopType": "0", 13 | "pretrainedModelName": "model-storage-8bfc0ffceca0f0ce", 14 | "runCommand": "torchrun run.py", 15 | "pretrainedModelPath": "/sixpen_models/chatlm3", 16 | "datasetName": "modihand-dataset" 17 | } -------------------------------------------------------------------------------- /deployment/PRICING: -------------------------------------------------------------------------------- 1 | NVIDIA-GeForce-RTX-3090 2.2元/时 2 | NVIDIA-A100-SXM4-40GB 9.4元/时 3 | NVIDIA-A100-SXM4-80GB 13.4元/时 4 | NVIDIA-V100-SXM2-16GB 9.9元/时 5 | -------------------------------------------------------------------------------- /deployment/add_nodes: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# < 4 ]]; then 4 | echo "Add a new node to K8S Cluster: $0 ..." 5 | echo "Got $0 $@" 6 | echo "Exiting ..." 7 | exit 1 8 | fi 9 | 10 | workpath="${HOME}/kubernetes" 11 | mv fix_sample.py $workpath/fix_sample.py 12 | cd ${workpath} 13 | 14 | echo "请输入sudo密码:" 15 | read pwd 16 | echo $pwd | sudo -S ./kk create config --from-cluster <<< yes 17 | echo $pwd | sudo -S chmod a+w sample.yaml 18 | 19 | python fix_sample.py ${workpath} $1 $2 $3 $4 20 | 21 | echo $pwd | sudo -S ./kk add nodes -f sample.yaml 22 | echo "add k8s node successfully !" 23 | echo $pwd | sudo -S kubectl get nodes 24 | 25 | -------------------------------------------------------------------------------- /deployment/api_test/README.md: -------------------------------------------------------------------------------- 1 | # API Test 2 | 3 | This is a test for API. 4 | 5 | ## Usage 6 | 1. 下载 3k 仓库 7 | ```bash 8 | git clone https://github.com/NascentCore/3k.git 9 | cd 3k 10 | ``` 11 | 12 | 2. 安装 python 依赖 13 | ```bash 14 | pip install requests 15 | ``` 16 | 17 | 3. 配置环境变量 18 | 19 | API_URL 和 TOKEN 根据测试的环境填写相应的值 20 | 21 | ```bash 22 | export SXCLOUD_API_URL='' 23 | export SXCLOUD_API_TOKEN='' 24 | export FEISHU_WEBHOOK='' 25 | ``` 26 | 27 | 4. 执行测试脚本 28 | ```bash 29 | python api_test.py 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /deployment/ceph/README.md: -------------------------------------------------------------------------------- 1 | # CEPH 2 | 3 | Code related to managing CEPH cluster on Kubernetes. 4 | -------------------------------------------------------------------------------- /deployment/ceph/ceph-admin-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | key: QVFEblBlTmt5dU9hR3hBQUNYMEg1RzFaTEdJaEZNbHlSNlUxaEE9PQ== #为 base64 之后的结果 4 | kind: Secret 5 | metadata: 6 | name: ceph-admin-secret 7 | namespace: kube-system 8 | type: kubernetes.io/rbd 9 | -------------------------------------------------------------------------------- /deployment/ceph/ceph-rbd-sc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: ceph-rbd 5 | annotations: 6 | storageclass.beta.kubernetes.io/is-default-class: "false" 7 | provisioner: ceph.com/rbd 8 | parameters: 9 | monitors: 214.2.5.1,214.2.5.2,214.2.5.3 10 | adminId: admin 11 | adminSecretName: ceph-admin-secret 12 | adminSecretNamespace: kube-system 13 | pool: rbd 14 | userId: kube 15 | userSecretName: ceph-secret 16 | fsType: ext4 17 | imageFormat: "2" 18 | imageFeatures: "layering" 19 | -------------------------------------------------------------------------------- /deployment/ceph/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: ceph-secret 5 | data: 6 | key: 7 | -------------------------------------------------------------------------------- /deployment/ceph/secrets-default.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: ceph-secret 5 | type: "kubernetes.io/rbd" 6 | data: 7 | # ceph auth add client.kube mon 'allow r' osd 'allow rwx pool=kube'# ceph auth get-key client.kube | base64 8 | key: QVFEblBlTmt5dU9hR3hBQUNYMEg1RzFaTEdJaEZNbHlSNlUxaEE9PQ== 9 | -------------------------------------------------------------------------------- /deployment/ceph/secrets.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: ceph-admin-secret 5 | namespace: kube-system 6 | type: "kubernetes.io/rbd" 7 | data: 8 | # ceph auth get-key client.admin | base64 9 | key: QVFEblBlTmt5dU9hR3hBQUNYMEg1RzFaTEdJaEZNbHlSNlUxaEE9PQ== 10 | --- 11 | apiVersion: v1 12 | kind: Secret 13 | metadata: 14 | name: ceph-secret 15 | namespace: kube-system 16 | type: "kubernetes.io/rbd" 17 | data: 18 | # ceph auth add client.kube mon 'allow r' osd 'allow rwx pool=kube' 19 | # ceph auth get-key client.kube | base64 20 | key: QVFEblBlTmt5dU9hR3hBQUNYMEg1RzFaTEdJaEZNbHlSNlUxaEE9PQ== 21 | -------------------------------------------------------------------------------- /deployment/ceph/test-pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: ceph-pod1 5 | spec: 6 | containers: 7 | - name: ceph-busybox 8 | image: busybox 9 | command: ["sleep", "60000"] 10 | volumeMounts: 11 | - name: ceph-vol1 12 | mountPath: /usr/share/busybox 13 | readOnly: false 14 | volumes: 15 | - name: ceph-vol1 16 | persistentVolumeClaim: 17 | claimName: ceph-claim 18 | --- 19 | kind: PersistentVolumeClaim 20 | apiVersion: v1 21 | metadata: 22 | name: ceph-claim 23 | spec: 24 | storageClassName: ceph-rbd 25 | accessModes: 26 | - ReadWriteOnce 27 | resources: 28 | requests: 29 | storage: 2Gi 30 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/cpodoperator/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/cpodoperator/templates/NOTES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/deployment/charts/sx3k/charts/cpodoperator/templates/NOTES.txt -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/cpodoperator/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.enabled }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: cpod-info 6 | namespace: {{ .Values.namespace }} 7 | data: 8 | access_key: {{ .Values.sxcloud.accessKey | quote }} 9 | api_address: {{ .Values.sxcloud.apiAddress | quote }} 10 | cpod_id: {{ .Values.sxcloud.cpodId| quote }} 11 | log_level: {{ .Values.sxcloud.logLevel | quote }} 12 | oss_bucket: {{ .Values.sxcloud.ossBucket | quote }} 13 | storage_class: {{ .Values.sxcloud.storageClass | quote }} 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/cpodoperator/templates/namespace.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.enabled }} 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: {{ .Values.namespace }} 6 | {{- end }} 7 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/templates/NOTES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/deployment/charts/sx3k/charts/sxcloud/templates/NOTES.txt -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "sxcloud.name" -}} 5 | {{- .Chart.Name -}} 6 | {{- end -}} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | */}} 11 | {{- define "sxcloud.fullname" -}} 12 | {{- printf "%s-%s" (include "sxcloud.name" .) .Release.Name -}} 13 | {{- end -}} 14 | 15 | {{/* 16 | Create common labels. 17 | */}} 18 | {{- define "sxcloud.labels" -}} 19 | helm.sh/chart: {{ include "sxcloud.name" . }}-{{ .Chart.Version | replace "+" "_" }} 20 | {{- if .Chart.AppVersion }} 21 | app.kubernetes.io/version: {{ .Chart.AppVersion }} 22 | {{- end }} 23 | app.kubernetes.io/managed-by: {{ .Release.Service }} 24 | {{- end -}} 25 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.enabled }} 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | annotations: 6 | kubernetes.io/ingress.class: nginx 7 | name: sxcloud-ingress 8 | namespace: {{ .Values.namespace }} 9 | spec: 10 | rules: 11 | - host: {{ .Values.ingress.domain }} 12 | http: 13 | paths: 14 | - backend: 15 | service: 16 | name: webui-service 17 | port: 18 | number: 80 19 | path: / 20 | pathType: Prefix 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/templates/namespace.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.enabled }} 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: {{ .Values.namespace }} 6 | {{- end }} 7 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/charts/sxcloud/templates/persistence.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.enabled }} 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: mysql-pv-claim 6 | namespace: {{ .Values.namespace }} 7 | annotations: 8 | "helm.sh/resource-policy": keep 9 | spec: 10 | accessModes: 11 | - {{ .Values.persistence.accessMode }} 12 | resources: 13 | requests: 14 | storage: {{ .Values.persistence.size }} 15 | storageClassName: {{ .Values.persistence.storageClass }} 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /deployment/charts/sx3k/cufile.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/deployment/charts/sx3k/cufile.log -------------------------------------------------------------------------------- /deployment/charts/sx3k/templates/NOTES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/deployment/charts/sx3k/templates/NOTES.txt -------------------------------------------------------------------------------- /deployment/cpodmanager/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | WORKDIR /app 3 | COPY ./cpodmanager /app/ 4 | CMD /app/cpodmanager 5 | -------------------------------------------------------------------------------- /deployment/docker/images.txt: -------------------------------------------------------------------------------- 1 | juicedata/juicefs-csi-driver:v0.24.0 2 | -------------------------------------------------------------------------------- /deployment/jupyterlab/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.cn-beijing.aliyuncs.com/sxwl-ai/torch-base:latest 2 | 3 | # 安装 JupyterLab 4 | RUN pip install jupyterlab jupyterlab-language-pack-zh-CN numpy matplotlib graphviz 5 | RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/translation-extension && \ 6 | echo '{"locale": "zh_CN"}' > /root/.jupyter/lab/user-settings/@jupyterlab/translation-extension/plugin.jupyterlab-settings 7 | 8 | # 设置环境变量以支持中文 9 | ENV LANG zh_CN.UTF-8 10 | ENV JUPYTERLAB_LANGUAGE zh-CN 11 | 12 | # 设置工作目录 13 | WORKDIR /workspace 14 | 15 | # 启动 JupyterLab 16 | CMD ["sh", "-c", "jupyter lab --ip=0.0.0.0 --allow-root --NotebookApp.token=$JUPYTER_TOKEN"] 17 | -------------------------------------------------------------------------------- /deployment/manifest/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | # 定义脚本文件名 4 | SCRIPT = make-package.sh 5 | 6 | # 定义默认目标 7 | .PHONY: all 8 | all: generate 9 | 10 | # 定义 generate 目标,执行脚本 11 | .PHONY: generate 12 | generate: $(SCRIPT) 13 | @echo "Executing generate script..." 14 | @sh $(SCRIPT) 15 | 16 | # 确保脚本文件存在 17 | $(SCRIPT): 18 | @echo "Error: $(SCRIPT) not found!" 19 | @exit 1 20 | 21 | # 允许用户清理生成的文件(如果需要) 22 | .PHONY: clean 23 | clean: 24 | @echo "Cleaning up..." 25 | rm -rf 3k-artifacts.tar.gz -------------------------------------------------------------------------------- /deployment/models/README.md: -------------------------------------------------------------------------------- 1 | # 同步模型 2 | `sync_model.py` 用于从 huggingface 或 modelscpoe 同步开源模型到算想云。 3 | 4 | ## 功能 5 | - 从算想云 API 获取同步模型列表 6 | - 从 huggingface 或 modelscope 下载模型 7 | - 同步模型至算想云 OSS 8 | - 更新算想云模型状态 9 | 10 | ## 部署 11 | - 在新加坡节点部署该脚本 12 | ```crontab 13 | * * * * * cd /data/cairong && /home/cairong/.venv/bin/python sync_model.py 14 | ``` -------------------------------------------------------------------------------- /deployment/modeluploader/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | WORKDIR /app 3 | COPY ./modeluploadjob /app/ 4 | CMD /app/modeluploadjob 5 | -------------------------------------------------------------------------------- /deployment/nfd/README.md: -------------------------------------------------------------------------------- 1 | # NFD 2 | 3 | Values.yaml file for installing [Node Feature Discovery]( 4 | https://github.com/kubernetes-sigs/node-feature-discovery) (NFD) components with 5 | HELM. 6 | 7 | In order to be compatible with NVIDIA GPU & Network operator, we also merge the 8 | nfd values.yaml file from 9 | [GPU](https://github.com/NVIDIA/gpu-operator/blob/master/deployments/gpu-operator/values.yaml) 10 | [Network](https://github.com/Mellanox/network-operator/blob/master/deployment/network-operator/values.yaml) 11 | operator and use them here. 12 | 13 | Primarily the `deviceClassWhiteList` config is changed to include all of the GPU 14 | and InfiniBand devices. 15 | -------------------------------------------------------------------------------- /deployment/online/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | # 定义脚本文件名 4 | SCRIPT = make-install.sh 5 | 6 | # 定义默认目标 7 | .PHONY: operators 8 | operators: operators 9 | 10 | # 定义 operators 目标,执行脚本 11 | .PHONY: operators 12 | operators: $(SCRIPT) 13 | @echo "Executing online install script..." 14 | @sh $(SCRIPT) 15 | 16 | # 确保脚本文件存在 17 | $(SCRIPT): 18 | @echo "Error: $(SCRIPT) not found!" 19 | @exit 1 20 | 21 | # 允许用户清理生成的文件(如果需要) 22 | .PHONY: clean 23 | clean: 24 | @echo "Cleaning up..." -------------------------------------------------------------------------------- /deployment/operator/local-sc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: local-path 5 | annotations: 6 | storageclass.kubernetes.io/is-default-class: "true" 7 | provisioner: rancher.io/local-path 8 | volumeBindingMode: WaitForFirstConsumer 9 | reclaimPolicy: Delete 10 | -------------------------------------------------------------------------------- /deployment/sxcloud/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | ARG JAR_FILE=aiadmin/aiadmin-system/target/*.jar 3 | COPY ${JAR_FILE} app.jar 4 | ENTRYPOINT ["java","-jar","/app.jar"] 5 | -------------------------------------------------------------------------------- /deployment/values/README.md: -------------------------------------------------------------------------------- 1 | # 说明 2 | 3 | 此目录下为交付物各 operator 安装时的配置选项 4 | -------------------------------------------------------------------------------- /deployment/values/kruise.yaml: -------------------------------------------------------------------------------- 1 | featureGates: "ImagePullJobGate=true" 2 | manager: 3 | replicas: 2 4 | image: 5 | repository: dockerhub.kubekey.local/kubesphereio/kruise-manager 6 | tag: v1.5.1 7 | -------------------------------------------------------------------------------- /deployment/values/loki-stack.yaml: -------------------------------------------------------------------------------- 1 | loki: 2 | enabled: true 3 | image: 4 | repository: dockerhub.kubekey.local/kubesphereio/loki 5 | tag: 2.6.1 6 | persistence: 7 | enabled: true 8 | storageClassName: juicefs-sc 9 | accessModes: 10 | - ReadWriteOnce 11 | size: 100Gi 12 | config: 13 | table_manager: 14 | retention_deletes_enabled: true 15 | retention_period: 720h 16 | 17 | promtail: 18 | enabled: true 19 | image: 20 | registry: dockerhub.kubekey.local 21 | repository: kubesphereio/promtail 22 | tag: 2.8.3 23 | 24 | test_pod: 25 | image: dockerhub.kubekey.local/kubesphereio/bats:1.8.2 26 | -------------------------------------------------------------------------------- /deployment/values/node-feature-discovery.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: dockerhub.kubekey.local/kubesphereio/node-feature-discovery 3 | 4 | worker: 5 | config: 6 | sources: 7 | pci: 8 | deviceClassWhitelist: 9 | - "02" 10 | - "0200" 11 | - "0207" 12 | - "0300" 13 | - "0302" 14 | 15 | master: 16 | config: 17 | extraLabelNs: 18 | - "nvidia.com" 19 | -------------------------------------------------------------------------------- /deployment/values/rook-ceph-cluster.yaml: -------------------------------------------------------------------------------- 1 | cephClusterSpec: 2 | cephVersion: 3 | image: dockerhub.kubekey.local/kubesphereio/ceph:v17.2.6 4 | placement: 5 | all: 6 | nodeAffinity: 7 | requiredDuringSchedulingIgnoredDuringExecution: 8 | nodeSelectorTerms: 9 | - matchExpressions: 10 | - key: role 11 | operator: In 12 | values: 13 | - ceph 14 | 15 | toolbox: 16 | enabled: true 17 | image: dockerhub.kubekey.local/kubesphereio/ceph:v17.2.6 18 | -------------------------------------------------------------------------------- /deployment/values_online/README.md: -------------------------------------------------------------------------------- 1 | # 说明 2 | 3 | 此目录下为交付物各 operator 安装时的配置选项 4 | -------------------------------------------------------------------------------- /deployment/values_online/kruise.yaml: -------------------------------------------------------------------------------- 1 | featureGates: "ImagePullJobGate=true" 2 | manager: 3 | replicas: 2 4 | image: 5 | repository: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/kruise-manager 6 | tag: v1.5.1 7 | -------------------------------------------------------------------------------- /deployment/values_online/loki-stack.yaml: -------------------------------------------------------------------------------- 1 | loki: 2 | enabled: true 3 | image: 4 | repository: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/loki 5 | tag: 2.6.1 6 | persistence: 7 | enabled: true 8 | storageClassName: juicefs-sc 9 | accessModes: 10 | - ReadWriteOnce 11 | size: 100Gi 12 | config: 13 | table_manager: 14 | retention_deletes_enabled: true 15 | retention_period: 720h 16 | 17 | promtail: 18 | enabled: true 19 | image: 20 | registry: sxwl-registry.cn-beijing.cr.aliyuncs.com 21 | repository: sxwl-ai/promtail 22 | tag: 2.8.3 23 | 24 | test_pod: 25 | image: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/bats:1.8.2 26 | -------------------------------------------------------------------------------- /deployment/values_online/node-feature-discovery.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/node-feature-discovery 3 | 4 | worker: 5 | config: 6 | sources: 7 | pci: 8 | deviceClassWhitelist: 9 | - "02" 10 | - "0200" 11 | - "0207" 12 | - "0300" 13 | - "0302" 14 | 15 | master: 16 | config: 17 | extraLabelNs: 18 | - "nvidia.com" 19 | -------------------------------------------------------------------------------- /deployment/values_online/rook-ceph-cluster.yaml: -------------------------------------------------------------------------------- 1 | cephClusterSpec: 2 | cephVersion: 3 | image: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/ceph:v17.2.6 4 | placement: 5 | all: 6 | nodeAffinity: 7 | requiredDuringSchedulingIgnoredDuringExecution: 8 | nodeSelectorTerms: 9 | - matchExpressions: 10 | - key: role 11 | operator: In 12 | values: 13 | - ceph 14 | 15 | toolbox: 16 | enabled: true 17 | image: sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/ceph:v17.2.6 18 | -------------------------------------------------------------------------------- /deployment/yaml_apps/image-preload.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps.kruise.io/v1alpha1 2 | kind: ImageListPullJob 3 | metadata: 4 | name: job-with-never 5 | spec: 6 | images: 7 | - sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/torch-base:v2024-01-12-01 8 | parallelism: 3 9 | completionPolicy: 10 | type: Never 11 | pullPolicy: 12 | backoffLimit: 3 13 | timeoutSeconds: 600 14 | -------------------------------------------------------------------------------- /deployment/yaml_apps_online/image-preload.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps.kruise.io/v1alpha1 2 | kind: ImageListPullJob 3 | metadata: 4 | name: job-with-never 5 | spec: 6 | images: 7 | - sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/torch-base:v2024-01-12-01 8 | parallelism: 3 9 | completionPolicy: 10 | type: Never 11 | pullPolicy: 12 | backoffLimit: 3 13 | timeoutSeconds: 600 14 | -------------------------------------------------------------------------------- /devops/CLI_LIST.md: -------------------------------------------------------------------------------- 1 | # List of command-line tools 2 | 3 | ## Common 4 | * `lstopo`: show the topology of the system. It gives information about the NUMA memory nodes, shared caches, CPU packages, processor cores and threads and much more. 5 | * `lspci -t`: list PCI devices in a tree, shows their connections. [Seems the same as above] 6 | 7 | ## IB 8 | * `ibping`: Pings an InfiniBand address. Issued on the Linux InfiniBand host. 9 | * `mst`: MST (Mellanox Software Tools) service 10 | -------------------------------------------------------------------------------- /devops/DOCKER.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | 3 | Do the following to overcome the GFW: 4 | 5 | 1. Build image on GCP 6 | 2. Push the image onto Aliyun Singapore 7 | 3. Pull image from China Mainland 8 | 9 | ```bash 10 | sudo apt install git docker.io 11 | sudo usermod -aG docker $USER 12 | newgrp docker 13 | ``` 14 | -------------------------------------------------------------------------------- /devops/GPU_MODELS: -------------------------------------------------------------------------------- 1 | # This list the model as discovered by NVIDIA GPU Feature Discovery (GFD), 2 | # and populated by K8s Node Feature Discovery (NFD) 3 | # These values are copied from actual hardware and a complte NFD & GFD installation on K8s, 4 | # and then use kubectl describe node to get the actual model 5 | # These models are populated onto portal for customers to do selection 6 | # TODO(peiqing,jiming): Let the cluster report GPU models 7 | NVIDIA-A100-SXM4-80GB 8 | NVIDIA-GeForce-RTX-3090 9 | -------------------------------------------------------------------------------- /devops/IB.md: -------------------------------------------------------------------------------- 1 | # IB 2 | 3 | ``` 4 | numactl -N 0 ib_read_bw -F 1.1.1.177 -d mlx5_0 -b --report_gbits --run_infinitely 5 | cat << EOF > /etc/modules-load.d/custom.conf 6 | nvidia_peermem 7 | ib_umad 8 | rdma_ucm 9 | rdma_cm 10 | ib_core 11 | EOF 12 | ``` 13 | 硬件、交换机、物理拓扑设计不合理、驱动固件版本、抖动、内核模块 14 | -------------------------------------------------------------------------------- /devops/README.md: -------------------------------------------------------------------------------- 1 | # DevOps 2 | 3 | Scripts and tools and documentation for working with the codebase in this repo. 4 | Including CI/CD, release, deployment etc. 5 | 6 | For example, anyone who wants to contribute to or use 3k, can consult content 7 | here to setup the correct environment. 8 | 9 | ## Helm Charts release 10 | - 在 3k repo 中加入了 deployment/charts 目录 11 | - 其中 `sx3k` 为父 chart ,其中包含 `sxcloud` 和 `cpodoperator` 两个子 chart 12 | - 子 chart 可通过 `values.yaml` 中的 `enabled` 来控制是否安装 13 | - 发布 release 时只需修改父 chart 下的 `values.yaml`,将镜像更新为新的版本,然后添加相应的 tag 即可自动发布 14 | ```bash 15 | git tag -a v1.0.0 -m "Release version 1.0.0" 16 | git push origin v1.0.0 17 | ``` -------------------------------------------------------------------------------- /devops/ansible/README.md: -------------------------------------------------------------------------------- 1 | # Ansible 2 | 3 | Ansible files to setup development environment on Linux and Mac. 4 | 5 | ## user.yaml 6 | ### 功能 7 | - 在 managed_nodes 机器上创建用户 8 | - 为该用户授予sudo权限 9 | - 将用户加入 docker 用户组,如果该用户组存在 10 | - 为该用户创建ssh密钥对 11 | - 将公钥复制到所有节点上,使节点间ssh互信 12 | 13 | ### 用法 14 | 1. 创建你的 inventory 文件,示例如下: 15 | ```bash 16 | $ cat my-hosts 17 | [managed_nodes] 18 | worker1 19 | worker2 20 | ``` 21 | 22 | 2. 执行如下命令 23 | ```bash 24 | USER_NAME=my-user-name USER_PASSWORD=my-password ansible-playbook -i my-hosts user.yml 25 | ``` 26 | -------------------------------------------------------------------------------- /devops/ansible/go.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Install go related packages 3 | # TODO(yzhao/cairong): Add more actions to install full Golang environment 4 | - name: Dev machine playbook 5 | hosts: localhost 6 | connection: local 7 | vars: 8 | # Add more variables 9 | tasks: 10 | - name: install golangci-lint 11 | shell: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b /usr/local/go/bin v1.54.2 12 | sudo: true 13 | tags: go 14 | -------------------------------------------------------------------------------- /devops/docker/alpine-git-lfs/Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用 Alpine 基础镜像 2 | FROM alpine:latest 3 | 4 | # 安装 Git 和 Git LFS 5 | RUN apk add --no-cache git git-lfs 6 | 7 | # 设置 Git LFS 8 | RUN git lfs install -------------------------------------------------------------------------------- /devops/docker/cpodmanager/README.md: -------------------------------------------------------------------------------- 1 | # CpodManager 2 | 3 | Build cpod manager docker container image 4 | TODO(yzhao): Change to use bazel 5 | 6 | ```shell 7 | cd "$(git rev-parse --show-toplevel)" 8 | docker build -f devops/docker/cpodmanager/Dockerfile . -t cpodmanager 9 | docker run cpodmanager /app/cpodmanager 10 | ``` 11 | -------------------------------------------------------------------------------- /devops/docker/ib-exporter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | WORKDIR /infiniband-exporter 3 | RUN apt-get update --fix-missing && apt-get install -y python3 pip infiniband-diags --fix-missing && \ 4 | pip install prometheus-client==0.7.1 5 | ADD https://github.com/guilbaults/infiniband-exporter/blob/master/infiniband-exporter.py ./ 6 | CMD python3 infiniband-exporter.py 7 | -------------------------------------------------------------------------------- /devops/docker/ib-exporter/README.md: -------------------------------------------------------------------------------- 1 | # Infiniband Exporter 2 | 3 | An open source Infiniband Exporter to expose IB-related metrics. 4 | -------------------------------------------------------------------------------- /devops/force_delete_ns.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | 3 | if [[ $# < 1 ]]; then 4 | echo "Need namespace, $0 ..." 5 | exit 1 6 | fi 7 | 8 | ns="$1" 9 | 10 | kubectl get namespace "${ns}" -o json |\ 11 | tr -d "\n" |\ 12 | sed "s/\"finalizers\": \[[^]]\+\]/\"finalizers\": []/" |\ 13 | kubectl replace --raw /api/v1/namespaces/"${ns}"/finalize -f - 14 | -------------------------------------------------------------------------------- /devops/gcp/README.md: -------------------------------------------------------------------------------- 1 | # GCP 2 | 3 | Gcp related operational docs 4 | -------------------------------------------------------------------------------- /devops/go_proxy.sh: -------------------------------------------------------------------------------- 1 | # Based on https://goproxy.cn/ 2 | # You need to `source go_proxy.sh` for the change to take effect 3 | go env -w GO111MODULE=on 4 | go env -w GOPROXY=https://goproxy.cn,direct 5 | -------------------------------------------------------------------------------- /docs/BILLING.md: -------------------------------------------------------------------------------- 1 | # Billing 2 | 3 | TODO: @chenshu 4 | * 介绍如何充值 5 | * 提交任务如何扣费 6 | * 欠费会发生什么情况 7 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/docs/README.md -------------------------------------------------------------------------------- /e2e/model/gpt3-1.3b/1h1g.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "kubeflow.org/v1" 2 | kind: PyTorchJob 3 | metadata: 4 | name: gpt3-1h1g 5 | namespace: training-operator 6 | spec: 7 | pytorchReplicaSpecs: 8 | Worker: 9 | replicas: 1 10 | restartPolicy: OnFailure 11 | template: 12 | spec: 13 | containers: 14 | - name: pytorch 15 | image: dockerhub.kubekey.local/kubesphereio/modelscope_gpt3_1h1g1dp:latest 16 | imagePullPolicy: Always 17 | command: 18 | - "torchrun" 19 | - "finetune_poetry.py" -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Various testing and verification workloads. 4 | -------------------------------------------------------------------------------- /examples/airplane-demo/.gitignore: -------------------------------------------------------------------------------- 1 | # 操作系统生成的文件 2 | .DS_Store 3 | Thumbs.db 4 | 5 | # IDE 和编辑器文件 6 | .idea/ 7 | .vscode/ 8 | *.swp 9 | *.swo 10 | *~ 11 | 12 | # 依赖目录 13 | node_modules/ 14 | vendor/ 15 | .env 16 | .venv/ 17 | .python-version 18 | 19 | # 编译输出 20 | dist/ 21 | build/ 22 | out/ 23 | *.class 24 | *.o 25 | *.pyc 26 | 27 | # 日志文件 28 | *.log 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | 33 | # 本地配置文件 34 | .env.local 35 | .env.development.local 36 | .env.test.local 37 | .env.production.local 38 | 39 | # 缓存目录 40 | .cache/ 41 | .temp/ 42 | .tmp/ 43 | 44 | # 数据目录 45 | data/images/ 46 | 47 | # 测试覆盖率报告 48 | coverage/ 49 | -------------------------------------------------------------------------------- /examples/airplane-demo/data/images.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/airplane-demo/data/images.db -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/.env.development: -------------------------------------------------------------------------------- 1 | VITE_API_BASE_URL=http://localhost:8000/api -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/.env.production: -------------------------------------------------------------------------------- 1 | VITE_API_BASE_URL=https://your-production-domain.com/api -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 飞机图片搜索 7 | 8 | 9 |
10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite", 7 | "build": "vite build", 8 | "preview": "vite preview" 9 | }, 10 | "dependencies": { 11 | "vue": "^3.3.0", 12 | "axios": "^1.6.0", 13 | "element-plus": "^2.4.0" 14 | }, 15 | "devDependencies": { 16 | "@vitejs/plugin-vue": "^4.5.0", 17 | "vite": "^5.0.0" 18 | } 19 | } -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/src/App.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/airplane-demo/frontend/src/assets/logo.png -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/src/main.js: -------------------------------------------------------------------------------- 1 | import { createApp } from 'vue' 2 | import ElementPlus from 'element-plus' 3 | import 'element-plus/dist/index.css' 4 | import App from './App.vue' 5 | 6 | const app = createApp(App) 7 | app.use(ElementPlus) 8 | app.mount('#app') -------------------------------------------------------------------------------- /examples/airplane-demo/frontend/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import vue from '@vitejs/plugin-vue' 3 | 4 | export default defineConfig({ 5 | plugins: [vue()] 6 | }) -------------------------------------------------------------------------------- /examples/airplane-demo/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==24.1.0 2 | annotated-types==0.7.0 3 | anyio==4.7.0 4 | certifi==2024.8.30 5 | charset-normalizer==3.4.0 6 | click==8.1.7 7 | exceptiongroup==1.2.2 8 | fastapi==0.115.6 9 | h11==0.14.0 10 | idna==3.10 11 | jieba==0.42.1 12 | pydantic==2.10.3 13 | pydantic_core==2.27.1 14 | python-multipart==0.0.19 15 | requests==2.32.3 16 | sniffio==1.3.1 17 | starlette==0.41.3 18 | typing_extensions==4.12.2 19 | urllib3==2.2.3 20 | uvicorn==0.32.1 21 | -------------------------------------------------------------------------------- /examples/basic-transformer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y build-essential 5 | 6 | WORKDIR /basic_transformer 7 | 8 | COPY . ./ 9 | 10 | ENV PIP_ROOT_USER_ACTION=ignore 11 | 12 | RUN python3 -m pip config \ 13 | set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ 14 | python3 -m pip install --no-cache-dir --upgrade pip && \ 15 | python3 -m pip install --no-cache-dir -r requirements.txt -------------------------------------------------------------------------------- /examples/basic-transformer/README.md: -------------------------------------------------------------------------------- 1 | # Basic Transformer 2 | 3 | A basic transformer written in PyTorch, with only the network definition, which 4 | is used as a convenience for perform Transformer training in a heterogeneous 5 | cluster. 6 | 7 | It has no rich integrations with tooling. 8 | 9 | Future extension might include inference. 10 | 11 | ## 运行 12 | 13 | ```bash 14 | 15 | pip install -r requirementes.txt 16 | 17 | bash train.sh 18 | 19 | ``` 20 | -------------------------------------------------------------------------------- /examples/basic-transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | torch == 2.1.0 2 | torchtext 3 | onnxruntime 4 | numpy -------------------------------------------------------------------------------- /examples/basic-transformer/run_onnx_inference.sh: -------------------------------------------------------------------------------- 1 | cp ./scripts/onnx_inference.py . 2 | mkdir -p train_out 3 | python onnx_inference.py 4 | rm -rf onnx_inference.py -------------------------------------------------------------------------------- /examples/basic-transformer/train.sh: -------------------------------------------------------------------------------- 1 | cp ./scripts/train_script.py . 2 | mkdir -p train_out 3 | python train_script.py 4 | rm -rf train_script.py -------------------------------------------------------------------------------- /examples/basic-transformer/transform_to_onnx.sh: -------------------------------------------------------------------------------- 1 | cp ./scripts/to_onnx.py . 2 | mkdir -p train_out 3 | python to_onnx.py 4 | rm -rf to_onnx.py -------------------------------------------------------------------------------- /examples/basic-transformer/utils/model_config.py: -------------------------------------------------------------------------------- 1 | model_config = { 2 | 'vocab_size':20000, # 3 | 'num_layers':6, # 4 | 'd_model':512, # 5 | 'd_ff':2048, # 6 | 'n_head': 8, # 7 | 'dropout':0.1, # 8 | 'train_length': 512 9 | } -------------------------------------------------------------------------------- /examples/bert/Dockerfile: -------------------------------------------------------------------------------- 1 | # cuda_base was built from base.Dockerfile 2 | # You must change this to the right one you built from base.Dockerfile 3 | FROM registry.ap-southeast-1.aliyuncs.com/sxwl-ai/cuda_base:2023-10-23 4 | WORKDIR /workspace 5 | # Include dataset for hermetic testing 6 | RUN wget --progress=dot:giga https://sxwl-ai.oss-cn-beijing.aliyuncs.com/hf-datasets/hf_wikitext.tar.gz && \ 7 | mkdir dataset && tar zxvf hf_wikitext.tar.gz --directory dataset && \ 8 | rm hf_wikitext.tar.gz 9 | # Allow editing source file when exec inside the container 10 | RUN apt-get update --fix-missing && apt-get install -y vim --fix-missing 11 | COPY *.py ./ 12 | COPY model_roberta_base ./model_roberta_base 13 | -------------------------------------------------------------------------------- /examples/bert/deepspeed_env: -------------------------------------------------------------------------------- 1 | NCCL_SOCKET_IFNAME=eth0 2 | NCCL_DEBUG=INFO 3 | NCCL_IB_DISABLE=1 4 | NCCL_IGNORE_DISABLED_P2P=1 5 | -------------------------------------------------------------------------------- /examples/bert/k8s/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | To launch Bert training MPI Job, run below command: 4 | 5 | ``` 6 | kubectl apply -f mpi_bert_ds.yaml -n bert-training 7 | # Check pods are created 8 | kubectl get pods -n bert-training 9 | ``` 10 | 11 | This will create a MPIJob in the `bert-training` namespace. 12 | -------------------------------------------------------------------------------- /examples/bert/k8s/bert_cpu.yaml: -------------------------------------------------------------------------------- 1 | # Single CPU bert training on Kubernetes 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: bert-cpu 6 | spec: 7 | restartPolicy: Never 8 | containers: 9 | - name: bert-container 10 | image: swr.cn-east-3.myhuaweicloud.com/sxwl/train_bert_deepspeed:v1 11 | imagePullPolicy: IfNotPresent 12 | args: 13 | - python3 14 | - train_bert.py 15 | - --checkpoint_dir 16 | - ./experiments/single/ 17 | -------------------------------------------------------------------------------- /examples/bert/k8s/bert_gpu.yaml: -------------------------------------------------------------------------------- 1 | # Single GPU training on K8s 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: bert-gpu 6 | spec: 7 | restartPolicy: Never 8 | containers: 9 | - name: bert-container 10 | image: swr.cn-east-3.myhuaweicloud.com/sxwl/train_bert_deepspeed:v1 11 | resources: 12 | limits: 13 | nvidia.com/gpu: 1 # requesting 1 GPU 14 | imagePullPolicy: IfNotPresent 15 | args: 16 | - python3 17 | - train_bert.py 18 | - --local_rank 19 | - "0" 20 | - --checkpoint_dir 21 | - ./experiments/single/ 22 | tolerations: 23 | - key: nvidia.com/gpu 24 | operator: Exists 25 | effect: NoSchedule 26 | -------------------------------------------------------------------------------- /examples/bert/model_roberta_base/README.md: -------------------------------------------------------------------------------- 1 | # Model Roberta Base 2 | 3 | Here are the metadata files for the bert model's pre-trained weights. 4 | These are not used in any way, but kept for records. 5 | -------------------------------------------------------------------------------- /examples/bert/model_roberta_base/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": "", 3 | "cls_token": "", 4 | "eos_token": "", 5 | "mask_token": { 6 | "content": "", 7 | "lstrip": true, 8 | "normalized": false, 9 | "rstrip": false, 10 | "single_word": false 11 | }, 12 | "pad_token": "", 13 | "sep_token": "", 14 | "unk_token": "" 15 | } 16 | -------------------------------------------------------------------------------- /examples/bert/model_roberta_base/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": "", 4 | "cls_token": "", 5 | "eos_token": "", 6 | "errors": "replace", 7 | "mask_token": "", 8 | "model_max_length": 512, 9 | "pad_token": "", 10 | "sep_token": "", 11 | "special_tokens_map_file": null, 12 | "tokenizer_class": "RobertaTokenizer", 13 | "trim_offsets": true, 14 | "unk_token": "" 15 | } 16 | -------------------------------------------------------------------------------- /examples/bert/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets==2.12.0 2 | deepspeed==0.10.0 3 | fire==0.4.0 4 | loguru==0.5.3 5 | numpy==1.24.3 6 | pytz==2023.3 7 | sh==1.14.2 8 | torch==2.0.1 9 | transformers==4.27.1 10 | tensorboard 11 | mpi4py 12 | -------------------------------------------------------------------------------- /examples/dataset/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/llamafactory:finetune-v1alpha6 2 | WORKDIR /demo 3 | COPY demo.py /workspace 4 | CMD ["python", "demo.py"] -------------------------------------------------------------------------------- /examples/env-verify/README.md: -------------------------------------------------------------------------------- 1 | # 介绍 2 | 检测机器是否安装完成了model scope运行所需的环境 3 | 4 | ## cuda 5 | nvcc test_cuda.cu -o test_cuda 6 | ./test_cuda 7 | 8 | ## cuDNN 9 | nvcc -I/usr/local/cuda/include -L/usr/local/cuda/lib64 test_cudnn.cu -o test_cudnn -lcudnn 10 | ./test_cudnn 11 | 12 | ## model scope 运行 13 | 进入相应的python环境然后开始检测. 14 | 15 | ### model scope 推理 16 | python hello_modelscope_inference.py 17 | 18 | ### model scope 单机单卡 19 | python hello_modelscope_train_single.py 20 | 21 | ### model scope 单机多卡 22 | torchrun --nproc_per_node=4 --master_port=9527 ./hello_modelscope_train_multi.py 23 | 24 | -------------------------------------------------------------------------------- /examples/env-verify/hello_modelscope_inference.py: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | from modelscope.pipelines import pipeline 4 | word_segmentation = pipeline('word-segmentation',model='damo/nlp_structbert_word-segmentation_chinese-base') 5 | 6 | input_str = '今天天气不错,适合出去游玩' 7 | print(word_segmentation(input_str)) 8 | -------------------------------------------------------------------------------- /examples/env-verify/test_cuda.cmd: -------------------------------------------------------------------------------- 1 | nvcc test_cuda.cu -o test_cuda 2 | ./test_cuda 3 | -------------------------------------------------------------------------------- /examples/env-verify/test_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __global__ void add(int a, int b, int *c) { 4 | *c = a + b; 5 | } 6 | 7 | int main() { 8 | int c; 9 | int *dev_c; 10 | cudaMalloc((void **)&dev_c, sizeof(int)); 11 | add<<<1,1>>>(2, 7, dev_c); 12 | cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost); 13 | cudaFree(dev_c); 14 | printf("2 + 7 = %d\n", c); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /examples/env-verify/test_cudnn.cmd: -------------------------------------------------------------------------------- 1 | nvcc -I/usr/local/cuda/include -L/usr/local/cuda/lib64 test_cudnn.cu -o test_cudnn -lcudnn 2 | ./test_cudnn 3 | -------------------------------------------------------------------------------- /examples/env-verify/test_cudnn.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | cudnnHandle_t cudnn; 6 | cudnnCreate(&cudnn); 7 | 8 | // 输出cuDNN版本信息 9 | std::cout << "cuDNN version: " << CUDNN_VERSION << std::endl; 10 | 11 | // 验证cuDNN的一些功能,可以根据需要添加其他操作 12 | cudnnTensorDescriptor_t tensorDesc; 13 | cudnnCreateTensorDescriptor(&tensorDesc); 14 | cudnnSetTensor4dDescriptor(tensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 3, 224, 224); 15 | 16 | std::cout << "cuDNN validation successful." << std::endl; 17 | 18 | // 释放资源 19 | cudnnDestroyTensorDescriptor(tensorDesc); 20 | cudnnDestroy(cudnn); 21 | 22 | return 0; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /examples/exporter/exporter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/ubuntu:20.04 2 | WORKDIR /workspace 3 | COPY ./exporter ./ 4 | CMD ["./exporter"] -------------------------------------------------------------------------------- /examples/exporter/exporter/README.md: -------------------------------------------------------------------------------- 1 | 1. 创建测试pod 2 | 3 | ```bash 4 | 5 | kubectl create -f pod.yaml 6 | 7 | ``` 8 | 9 | 2. 查看pod日志 10 | 11 | ```bash 12 | 13 | kubectl -n gpu-operator logs exporter 14 | 15 | ``` 16 | 17 | ![alt text](image.png) -------------------------------------------------------------------------------- /examples/exporter/exporter/exporter: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/exporter/exporter/exporter -------------------------------------------------------------------------------- /examples/exporter/exporter/go.mod: -------------------------------------------------------------------------------- 1 | module sxwl.ai/exporter 2 | 3 | go 1.23.0 4 | 5 | require ( 6 | github.com/sirupsen/logrus v1.9.3 7 | google.golang.org/grpc v1.65.0 8 | k8s.io/kubelet v0.30.2 9 | ) 10 | 11 | require ( 12 | github.com/gogo/protobuf v1.3.2 // indirect 13 | golang.org/x/net v0.25.0 // indirect 14 | golang.org/x/sys v0.20.0 // indirect 15 | golang.org/x/text v0.15.0 // indirect 16 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect 17 | google.golang.org/protobuf v1.34.1 // indirect 18 | ) 19 | -------------------------------------------------------------------------------- /examples/exporter/exporter/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/exporter/exporter/image.png -------------------------------------------------------------------------------- /examples/face-demo/.gitignore: -------------------------------------------------------------------------------- 1 | # 数据目录 2 | data/ 3 | 4 | # docker数据目录 5 | volumes/ 6 | 7 | # Python 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # 数据库文件 13 | *.db 14 | *.sqlite 15 | *.sqlite3 16 | 17 | # 模型文件 18 | *.pth 19 | *.pt 20 | *.h5 21 | 22 | # 日志文件 23 | *.log 24 | logs/ 25 | 26 | # 环境配置 27 | .env 28 | venv/ 29 | 30 | # Milvus数据 31 | milvus/ 32 | 33 | # Vue.js 项目 34 | frontend/node_modules/ 35 | frontend/dist/ 36 | frontend/.env.local 37 | frontend/.env.*.local 38 | 39 | # 日志文件 40 | frontend/npm-debug.log* 41 | frontend/yarn-debug.log* 42 | frontend/yarn-error.log* 43 | frontend/pnpm-debug.log* 44 | 45 | # 编辑器目录和文件 46 | .idea/ 47 | .vscode/ 48 | *.suo 49 | *.ntvs* 50 | *.njsproj 51 | *.sln 52 | *.sw? -------------------------------------------------------------------------------- /examples/face-demo/backend/.python-version: -------------------------------------------------------------------------------- 1 | 3.10.7 2 | -------------------------------------------------------------------------------- /examples/face-demo/backend/util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | # 创建logger 5 | logger = logging.getLogger('face_processor') 6 | logger.setLevel(logging.INFO) 7 | 8 | # 创建控制台处理器 9 | console_handler = logging.StreamHandler(sys.stdout) 10 | console_handler.setLevel(logging.INFO) 11 | 12 | # 创建格式化器 13 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 14 | console_handler.setFormatter(formatter) 15 | 16 | # 将处理器添加到logger 17 | logger.addHandler(console_handler) -------------------------------------------------------------------------------- /examples/face-demo/frontend/.env.development: -------------------------------------------------------------------------------- 1 | VITE_API_BASE_URL=http://localhost:8000 -------------------------------------------------------------------------------- /examples/face-demo/frontend/.env.production: -------------------------------------------------------------------------------- 1 | VITE_API_BASE_URL=https://你的生产环境域名 -------------------------------------------------------------------------------- /examples/face-demo/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 算想云人像图库 7 | 8 | 9 |
10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/face-demo/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite", 7 | "build": "vite build", 8 | "preview": "vite preview" 9 | }, 10 | "dependencies": { 11 | "axios": "^1.7.9", 12 | "element-plus": "^2.9.1", 13 | "vue": "^3.3.0", 14 | "vue-router": "^4.0.0" 15 | }, 16 | "devDependencies": { 17 | "@vitejs/plugin-vue": "^4.5.0", 18 | "vite": "^5.0.0" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/face-demo/frontend/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/face-demo/frontend/src/assets/logo.png -------------------------------------------------------------------------------- /examples/face-demo/frontend/src/main.js: -------------------------------------------------------------------------------- 1 | import { createApp } from 'vue' 2 | import ElementPlus from 'element-plus' 3 | import 'element-plus/dist/index.css' 4 | import App from './App.vue' 5 | import router from './router' 6 | import axios from 'axios' 7 | 8 | const baseURL = import.meta.env.VITE_API_BASE_URL 9 | if (!baseURL) { 10 | console.warn('VITE_API_BASE_URL 未设置') 11 | } 12 | axios.defaults.baseURL = baseURL 13 | 14 | const app = createApp(App) 15 | app.use(ElementPlus) 16 | app.use(router) 17 | app.mount('#app') -------------------------------------------------------------------------------- /examples/face-demo/frontend/src/router.js: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from 'vue-router' 2 | import ImageManagement from './views/ImageManagement.vue' 3 | import Search from './views/Search.vue' 4 | 5 | const routes = [ 6 | { 7 | path: '/', 8 | redirect: '/image-management' 9 | }, 10 | { 11 | path: '/image-management', 12 | name: 'ImageManagement', 13 | component: ImageManagement 14 | }, 15 | { 16 | path: '/search', 17 | name: 'Search', 18 | component: Search 19 | } 20 | ] 21 | 22 | const router = createRouter({ 23 | history: createWebHistory(), 24 | routes 25 | }) 26 | 27 | export default router -------------------------------------------------------------------------------- /examples/face-demo/frontend/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig, loadEnv } from 'vite' 2 | import vue from '@vitejs/plugin-vue' 3 | import path from 'path' 4 | 5 | export default defineConfig(({ mode }) => { 6 | const env = loadEnv(mode, process.cwd()) 7 | 8 | return { 9 | plugins: [vue()], 10 | server: { 11 | proxy: { 12 | '/api': { 13 | target: env.VITE_API_BASE_URL || 'http://localhost:5000', 14 | changeOrigin: true, 15 | rewrite: (path) => path 16 | } 17 | } 18 | }, 19 | resolve: { 20 | alias: { 21 | '@': path.resolve(__dirname, 'src') 22 | } 23 | } 24 | } 25 | }) -------------------------------------------------------------------------------- /examples/fintune-infer-eval/README.md: -------------------------------------------------------------------------------- 1 | # 算想云微调推理以及评测示例 2 | 本项目展示了如何使用算想云平台进行大语言模型的微调训练和评测。主要包含以下内容: 3 | 4 | - 使用中文医疗数据集对Gemma-2b模型进行微调 5 | - 基于Rouge评分对微调后的模型进行评测 6 | - 提供完整的训练和评测流程示例代码 7 | 8 | 项目使用算想云提供的API接口,支持模型训练、部署和推理等功能。评测数据集包含医疗健康领域的问答数据,可用于验证模型在特定领域的表现。 9 | 10 | ## 运行步骤 11 | 1. 安装依赖 12 | ```bash 13 | pip install -r requirements.txt 14 | ``` 15 | 2. 配置config.py文件 16 | 3. 运行脚本 17 | ```bash 18 | python main.py 19 | ``` -------------------------------------------------------------------------------- /examples/fintune-infer-eval/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | certifi==2024.8.30 3 | charset-normalizer==3.4.0 4 | idna==3.10 5 | jieba==0.42.1 6 | pydantic==2.9.2 7 | pydantic_core==2.23.4 8 | python-dateutil==2.9.0.post0 9 | requests==2.32.3 10 | six==1.16.0 11 | sxwl-client==1.0.2 12 | typing_extensions==4.12.2 13 | urllib3==2.2.3 14 | -------------------------------------------------------------------------------- /examples/gpt3/.gitignore: -------------------------------------------------------------------------------- 1 | datasets 2 | tmp 3 | nlp_gpt3_text-generation_1.3B -------------------------------------------------------------------------------- /examples/gpt3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.cn-beijing.aliyuncs.com/sxwl-ai/torch-base:v1 2 | WORKDIR /workspace 3 | # Put the layer that will not change frequently on it 4 | RUN python3 -m pip install --no-cache-dir modelscope megatron_util jieba \ 5 | -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html \ 6 | && pip3 install --no-cache-dir -U datasets 7 | COPY chinese-poetry-collection ./chinese-poetry-collection 8 | COPY nlp_gpt3_text-generation_1.3B ./nlp_gpt3_text-generation_1.3B 9 | COPY finetune_dureader.py finetune_poetry.py ./ 10 | # https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor 11 | -------------------------------------------------------------------------------- /examples/infiniband/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.cn-beijing.aliyuncs.com/sxwl-ai/torch-base:v1 2 | WORKDIR /workspace 3 | RUN apt-get -y install ninja-build=1.10.0-1build1 4 | COPY test.py ./ -------------------------------------------------------------------------------- /examples/infiniband/all_reduce.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.distributed as dist 4 | import torch.multiprocessing as mp 5 | 6 | dist.init_process_group(backend="nccl") 7 | 8 | print("LOCAL_RANK:{}".format(os.environ["LOCAL_RANK"])) 9 | print("GLOBAL_RANK:{}".format(os.environ["RANK"])) 10 | print("GROUP_RANK:{}".format(os.environ["GROUP_RANK"])) 11 | print("ROLE_RANK:{}".format(os.environ["ROLE_RANK"])) 12 | print("LOCAL_WORLD_SIZE:{}".format(os.environ["LOCAL_WORLD_SIZE"])) 13 | 14 | tensor = torch.ones(1,device="cuda:{}".format(os.environ["LOCAL_RANK"])) 15 | dist.all_reduce(tensor,op=dist.ReduceOp.SUM) 16 | print('ALL_REDUCE_SUM_OF_ALL_WORKER:{}'.format(tensor)) -------------------------------------------------------------------------------- /examples/infiniband/sriov-ib-network-node-policy.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: sriovnetwork.openshift.io/v1 3 | kind: SriovNetworkNodePolicy 4 | metadata: 5 | name: infiniband-sriov 6 | namespace: nvidia-network-operator 7 | spec: 8 | deviceType: netdevice 9 | mtu: 1500 10 | nodeSelector: 11 | feature.node.kubernetes.io/pci-15b3.present: "true" 12 | nicSelector: 13 | vendor: 15b3 14 | linkType: ib 15 | isRdma: true 16 | numVfs: 8 17 | priority: 90 18 | resourceName: mlnxnics 19 | -------------------------------------------------------------------------------- /examples/infiniband/sriov-ib-network-pod.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: test-sriov-ib-pod 6 | annotations: 7 | k8s.v1.cni.cncf.io/networks: example-sriov-ib-network 8 | spec: 9 | containers: 10 | - name: test-sriov-ib-pod 11 | image: mellanox/centos_7_4_mofed_4_2_1_2_0_0_60 12 | imagePullPolicy: IfNotPresent 13 | command: 14 | - sh 15 | - - 16 | - sleep inf 17 | securityContext: 18 | capabilities: 19 | add: 20 | - IPC_LOCK 21 | resources: 22 | requests: 23 | nvidia.com/mlnxnics: "1" 24 | limits: 25 | nvidia.com/mlnxnics: "1" 26 | -------------------------------------------------------------------------------- /examples/jax/xla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/jax/xla.py -------------------------------------------------------------------------------- /examples/llama2-pt/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/torch-base:v2024-01-12-01 2 | 3 | RUN pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 llama-recipes 4 | 5 | WORKDIR /workspace 6 | COPY *.py ./ 7 | -------------------------------------------------------------------------------- /examples/llama2-pt/main.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import finetune 3 | 4 | if __name__ == "__main__": 5 | fire.Fire(finetune.main) 6 | 7 | -------------------------------------------------------------------------------- /examples/llama2/cpodjob/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/pytorch:2.3.1-cuda12.1-cudnn8-runtime 2 | 3 | WORKDIR /workspace 4 | 5 | RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple accelerate peft bitsandbytes transformers trl tensorboard 6 | 7 | COPY train.py ./ 8 | 9 | CMD ["python", "train.py"] -------------------------------------------------------------------------------- /examples/llama2/finetuning-and-deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base:latest 2 | WORKDIR /workspace 3 | RUN pip3 install --upgrade pip 4 | RUN pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple packaging 5 | RUN pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu118 6 | #RUN MAX_JOBS=4 pip install flash-attn==2.3.0 --no-build-isolation 7 | COPY Fastchat ./ 8 | RUN pip3 install "fschat[model_worker,train]" -------------------------------------------------------------------------------- /examples/llama2/requirements.txt: -------------------------------------------------------------------------------- 1 | # Latest packages as of 20231014. 2 | 3 | #deepspeed==0.11.1 4 | deepspeed 5 | #torch==2.0.1 6 | #torch==2.1.0 7 | torch 8 | #transformers==4.34.0 9 | #transformers 10 | # Newer versions seem having issue with importing LlamaConfig, LlamaTokenizer, etc. 11 | transformers==4.31.0 12 | accelerate 13 | sentencepiece 14 | #loguru==0.5.3 15 | loguru 16 | -------------------------------------------------------------------------------- /examples/llamafactory/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base:latest 2 | WORKDIR /workspace 3 | COPY LLaMA-Factory ./ 4 | #COPY acclerate_config.yaml /root/.cache/huggingface/accelerate/default_config.yaml 5 | COPY dataset_info.json /data/dataset/ 6 | RUN pip3 install --upgrade pip 7 | RUN pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt deepspeed -U transformers 8 | RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 9 | RUN pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorboard -------------------------------------------------------------------------------- /examples/llamafactory/README.md: -------------------------------------------------------------------------------- 1 | # llama factory 无代码微调 2 | 3 | ## 镜像编译 4 | 5 | ```bash 6 | 7 | # 克隆 llama factory 8 | git clone git@github.com:hiyouga/LLaMA-Factory.git 9 | 10 | # 国内可以用这个代理 git clone https://mirror.ghproxy.com/https://github.com/hiyouga/LLaMA-Factory.git 11 | 12 | # 编译 13 | docker build -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/llamafactory:latest . 14 | 15 | ``` 16 | 17 | ## -------------------------------------------------------------------------------- /examples/llamafactory/cpodjob.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/llamafactory/cpodjob.yaml -------------------------------------------------------------------------------- /examples/llamafactory/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "file_name": "custom/dataset.json" 4 | } 5 | } -------------------------------------------------------------------------------- /examples/llm-agent/Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用官方Python运行环境作为基础镜像 2 | FROM --platform=linux/amd64 python:3.8-slim 3 | 4 | # 设置工作目录为/app 5 | WORKDIR /app 6 | 7 | # 将当前目录下的所有Python文件复制到容器中的/app目录 8 | COPY ./*.py /app/ 9 | 10 | # 安装Flask及其他可能需要的库 11 | RUN pip install Flask requests 12 | 13 | # 配置环境变量 14 | ENV FLASK_APP=app.py 15 | ENV FLASK_RUN_HOST=0.0.0.0 16 | 17 | # 暴露端口5000供外部访问 18 | EXPOSE 5000 19 | 20 | # 容器启动时执行的命令,运行Flask服务 21 | CMD ["flask", "run"] 22 | -------------------------------------------------------------------------------- /examples/llm-agent/README.md: -------------------------------------------------------------------------------- 1 | # LLM Agent 2 | 3 | A basic LLM agent demo reading a weather forecast webpage to answer weather 4 | forecast problem. 5 | -------------------------------------------------------------------------------- /examples/llm-agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/llm-agent/__init__.py -------------------------------------------------------------------------------- /examples/llm-agent/buildimage.sh: -------------------------------------------------------------------------------- 1 | docker build -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/llmagent:$(git rev-parse --short HEAD) . -------------------------------------------------------------------------------- /examples/llm-agent/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | LLM_URL = os.getenv('LLM_URL') 4 | 5 | STYLISH_DESC = "你是一名AI生活助手,要用最生活化、活泼的语言回答问题。" 6 | 7 | SERVICE_LIST = [ "天气服务" , "生活服务" ] 8 | 9 | TOOL_LIST = [ 10 | { 11 | "name": "weather_assistant", 12 | "describe": "I can query the weather by city and date", 13 | "input_example": "北京 2024-02-22", 14 | "output_example": "晴天" 15 | }, 16 | { 17 | "name": "common_sense_assistant", 18 | "describe": "I have a lot of common sense knowledge about daily life", 19 | "input_example": "明天是晴天,适合洗车吗?", 20 | "output_example": "是的,明天是晴天的话,非常适合洗车" 21 | } 22 | ] -------------------------------------------------------------------------------- /examples/llm-agent/domain.py: -------------------------------------------------------------------------------- 1 | from llm import call_llm 2 | from config import SERVICE_LIST 3 | 4 | def domain_check(ipt) : 5 | domains = ",".join(SERVICE_LIST) 6 | s = f"假设你是一个机器人,以下你的服务的范围【 {domains} 】, 请判断此问题 【 {ipt} 】是否在你的服务范围内,如果是请说YES,如果不是请说NO。" 7 | res = call_llm(s) 8 | return res == "YES" 9 | 10 | 11 | if __name__ == "__main__" : 12 | print(domain_check("明天适合洗车吗?")) 13 | print(domain_check("明天天气怎么样?")) 14 | 15 | 16 | -------------------------------------------------------------------------------- /examples/llm-agent/memory.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import OpenAI 2 | from langchain.chains import ConversationChain 3 | 4 | # first initialize the large language model 5 | llm = OpenAI( 6 | temperature=0, 7 | openai_api_key="OPENAI_API_KEY", 8 | model_name="text-davinci-003" 9 | ) 10 | 11 | # now initialize the conversation chain 12 | conversation = ConversationChain(llm=llm) 13 | 14 | print(conversation.prompt.template) -------------------------------------------------------------------------------- /examples/llm-agent/stylish.py: -------------------------------------------------------------------------------- 1 | from llm import call_llm 2 | from config import STYLISH_DESC 3 | 4 | 5 | def stylish_output(ipt) : 6 | return call_llm(ipt= f"【{STYLISH_DESC}】请按照此风格,重新描述以下的信息:【{ipt}】") 7 | 8 | 9 | if __name__ == "__main__" : 10 | print(stylish_output("明天天气睛转多云,适合洗车。")) -------------------------------------------------------------------------------- /examples/llm-agent/test_plan.py: -------------------------------------------------------------------------------- 1 | from plan import planning 2 | 3 | tool_list = [ 4 | { 5 | "name": "weather_assistant", 6 | "describe": "I can query the weather by city and date", 7 | "input_example": "北京,2024-02-22", 8 | "output_example": "晴天" 9 | }, 10 | { 11 | "name": "common_sense_assistant", 12 | "describe": "I have a lot of common sense knowledge about daily life", 13 | "input_example": "明天是晴天,适合洗车吗?", 14 | "output_example": "是的,明天是晴天的话,非常适合洗车" 15 | } 16 | ] 17 | 18 | print(planning("下周二适合洗车吗?", tool_list)) -------------------------------------------------------------------------------- /examples/llm-agent/weather-agent.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: weather-agent 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: weather-agent 10 | template: 11 | metadata: 12 | labels: 13 | app: weather-agent 14 | spec: 15 | containers: 16 | - name: weather-agent 17 | image: dockerhub.kubekey.local/kubesphereio/weather-agent:v3 18 | imagePullPolicy: IfNotPresent 19 | ports: 20 | - containerPort: 5000 21 | env: 22 | - name: WEATHER_KEY 23 | value: "" 24 | - name: LLM_URL 25 | value: "" -------------------------------------------------------------------------------- /examples/nccl/0.build_and_push_docker_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # 该脚本用于构造image,并上传华为云镜像服务 3 | # Usage: ./0.build_and_push_docker_image.sh 4 | #------------------------------------------------------------------- 5 | 6 | # 这几句先构造base镜像上传,base镜像不会轻易改动 7 | #docker rmi swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base 8 | #docker build . -f base.Dockerfile -t swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base 9 | #docker push swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base 10 | 11 | # 在base镜像基础上,加上nccl的小demo 12 | docker rmi -f swr.cn-east-3.myhuaweicloud.com/sxwl/for_nccl_test 13 | docker build -t swr.cn-east-3.myhuaweicloud.com/sxwl/for_nccl_test . 14 | docker push swr.cn-east-3.myhuaweicloud.com/sxwl/for_nccl_test 15 | -------------------------------------------------------------------------------- /examples/nccl/1.k8s_apply_yaml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # 该脚本新建一个k8s容器,用来做NCCL连通性测试 3 | # bash ./1.k8s_apply_yaml.sh 4 | #------------------------------------------------------------ 5 | 6 | kubectl delete pod/for-nccl-test 7 | sudo ctr -n k8s.io images rm swr.cn-east-3.myhuaweicloud.com/sxwl/for_nccl_test:latest 8 | kubectl apply -f ./k8s_nccl_test.yaml 9 | -------------------------------------------------------------------------------- /examples/nccl/2.docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # 该脚本用来启动docker容器 3 | # Usage: ./2.docker_run.sh 4 | #----------------------------------------------------------- 5 | 6 | IMAGE_NAME="swr.cn-east-3.myhuaweicloud.com/sxwl/for_nccl_test:latest" 7 | DOCKER_NAME="lwn_for_nccl_test" 8 | 9 | docker rm -f ${DOCKER_NAME} 10 | docker run -it --rm \ 11 | --shm-size=1g \ 12 | --runtime=nvidia \ 13 | --name ${DOCKER_NAME} \ 14 | --cap-add=IPC_LOCK \ 15 | --network host \ 16 | --device=/dev/infiniband/uverbs0 \ 17 | --hostname ${DOCKER_NAME} \ 18 | ${IMAGE_NAME} 19 | -------------------------------------------------------------------------------- /examples/nccl/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM swr.cn-east-3.myhuaweicloud.com/sxwl/torch-base:latest 2 | 3 | RUN DEBIAN_FRONTEND=noninteractive apt install -y tzdata 4 | 5 | #安装ssh服务 6 | RUN apt-get install -y passwd openssh-server 7 | 8 | #设置初始密码 9 | RUN echo "root:root"|chpasswd 10 | 11 | RUN mkdir /var/run/sshd 12 | RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config 13 | RUN sed -i "s/.*PermitRootLogin.*/PermitRootLogin yes/" /etc/ssh/sshd_config 14 | RUN sed -i 's/^.*Port 22$/Port 14343/g' /etc/ssh/sshd_config 15 | 16 | ADD . /workspace 17 | WORKDIR /workspace 18 | 19 | ENTRYPOINT /workspace/entrypoint.sh 20 | -------------------------------------------------------------------------------- /examples/nccl/go_worker4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | WORLD_SIZE=2 \ 4 | RANK=0 \ 5 | MASTER_PORT=29501 \ 6 | MASTER_ADDR=214.2.5.4 \ 7 | TORCH_CPP_LOG_LEVEL=INFO \ 8 | TORCH_DISTRIBUTED_DEBUG=INFO \ 9 | NCCL_SOCKET_IFNAME=bond0 \ 10 | python ./dist_nccl_demo.py 11 | -------------------------------------------------------------------------------- /examples/nccl/go_worker5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | WORLD_SIZE=2 \ 4 | RANK=1 \ 5 | MASTER_PORT=29501 \ 6 | MASTER_ADDR=214.2.5.4 \ 7 | TORCH_CPP_LOG_LEVEL=INFO \ 8 | TORCH_DISTRIBUTED_DEBUG=INFO \ 9 | NCCL_SOCKET_IFNAME=ens22f0 \ 10 | python ./dist_nccl_demo.py 11 | -------------------------------------------------------------------------------- /examples/nim/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.cn-hangzhou.aliyuncs.com/miclon/py-nodejs:latest as build 2 | WORKDIR /app 3 | COPY . /app 4 | RUN pnpm config set registry https://registry.npmmirror.com/ 5 | RUN cd /app/chat-ui/web && pnpm install && pnpm run build 6 | 7 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/llamafactory:inference-v2.1 8 | WORKDIR /app 9 | COPY chat-ui/api/requirements.txt /app/web/requirements.txt 10 | RUN pip install --no-cache-dir -r web/requirements.txt 11 | COPY --from=build /app/chat-ui/web/dist /app/web/api/dist 12 | COPY run.sh /app 13 | COPY chat-ui/api /app/web/api 14 | COPY model/meta-llama-3.1-8b-instruct /mnt/models 15 | CMD ["/bin/bash", "run.sh"] 16 | 17 | -------------------------------------------------------------------------------- /examples/nim/README.md: -------------------------------------------------------------------------------- 1 | 2 | 运行镜像,该镜像不包含模型,需要将模型挂载到 /mnt/models 目录下 3 | 4 | ```bash 5 | docker run -it --rm --gpus '"device=1"' --ipc=host -p 8080:8080 -p 8000:8000 -v /data2/dg/models/meta-llama-3.1-8b-instruct:/mnt/models sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/sxwl-nim:v1.2 6 | ``` 7 | 8 | 运行镜像,该镜像包含模型,模型在镜像中 9 | 10 | ```bash 11 | docker run -it --rm --gpus '"device=1"' --ipc=host -p 8080:8080 -p 8000:8000 sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/sxwl-nim-with-model:v1.2 12 | ``` 13 | -------------------------------------------------------------------------------- /examples/nim/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 'app start!'; 3 | 4 | API_PORT=8080 python /app/src/api.py --model_name_or_path /mnt/models --infer_backend vllm --template llama3 --vllm_maxlen=8192 & 5 | 6 | # 等待 vllm 启动完成 7 | sleep 20 8 | 9 | API_URL=http://localhost:8080/v1/chat/completions python3 /app/web/api/app.py -------------------------------------------------------------------------------- /examples/pytorch-multinode/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.ap-southeast-1.aliyuncs.com/sxwl-ai/cuda_base:2023-10-23 2 | WORKDIR /workspace 3 | RUN pip3 install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 4 | COPY main.py datautils.py ./ 5 | -------------------------------------------------------------------------------- /examples/pytorch-multinode/datautils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | 4 | class MyTrainDataset(Dataset): 5 | def __init__(self, size): 6 | self.size = size 7 | self.data = [(torch.rand(20), torch.rand(1)) for _ in range(size)] 8 | 9 | def __len__(self): 10 | return self.size 11 | 12 | def __getitem__(self, index): 13 | return self.data[index] -------------------------------------------------------------------------------- /examples/qanything/README.md: -------------------------------------------------------------------------------- 1 | # 导入本地文档至知识库 2 | 3 | ## 导入步骤 4 | 1. 按如下目录结构组织文档 5 | ```bash 6 | data 7 | ├── 知识库一 8 | │   ├── 文档一.pdf 9 | │   └── 文档二.pdf 10 | ├── 知识库二 11 | │   ├── 文档三.docx 12 | │   └── 文档四.docx 13 | └── 知识库三 14 | ├── 文档五.docx 15 | └── 文档六.pdf 16 | ``` 17 | 18 | 2. 执行脚本 19 | ```bash 20 | python import_knowledge.py --datadir=/path/to/data 21 | ``` 22 | 23 | 3. 该脚本会以 data 目录下的子目录名称创建知识库,并导入该子目录下的文档到该知识库 24 | -------------------------------------------------------------------------------- /examples/rag-h5/21book.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag-h5/21book.png -------------------------------------------------------------------------------- /examples/rag-h5/Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用官方的 Nginx 镜像作为基础镜像 2 | FROM nginx:alpine 3 | 4 | # 将本地的网页文件复制到 Nginx 的默认静态文件目录 5 | COPY index.html /usr/share/nginx/html/ 6 | COPY styles.css /usr/share/nginx/html/ 7 | COPY script.js /usr/share/nginx/html/ 8 | COPY 21book.png /usr/share/nginx/html/ 9 | 10 | # 如果有其他静态资源(如图标),请确保也复制到容器中 11 | COPY voice-icon.png /usr/share/nginx/html/ 12 | COPY send-icon.png /usr/share/nginx/html/ 13 | COPY user-avatar.png /usr/share/nginx/html/ 14 | COPY assistant-avatar.png /usr/share/nginx/html/ 15 | 16 | # 暴露 Nginx 的默认端口 17 | EXPOSE 80 18 | 19 | # 启动 Nginx 20 | CMD ["nginx", "-g", "daemon off;"] -------------------------------------------------------------------------------- /examples/rag-h5/assistant-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag-h5/assistant-avatar.png -------------------------------------------------------------------------------- /examples/rag-h5/send-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag-h5/send-icon.png -------------------------------------------------------------------------------- /examples/rag-h5/user-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag-h5/user-avatar.png -------------------------------------------------------------------------------- /examples/rag-h5/voice-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag-h5/voice-icon.png -------------------------------------------------------------------------------- /examples/rag/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Config: 4 | MILVUS_HOST = os.getenv('MILVUS_HOST', '127.0.0.1') 5 | MILVUS_PORT = os.getenv('MILVUS_PORT', '19530') 6 | MILVUS_COLLECTION_NAME = 'text_collection' 7 | LLAMA2_CHAT_URL = os.getenv('LLAMA2_CHAT_URL', 'http://10.233.50.150/v1/chat/completions') 8 | OPENCHAT_URL = os.getenv('OPENCHAT_URL', 'http://openchat.llm.sxwl.ai:30005/v1/chat/completions') 9 | ID_TEXT_DIR = os.getenv('ID_TEXT_DIR', '/data/id_text') -------------------------------------------------------------------------------- /examples/rag/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag/image-1.png -------------------------------------------------------------------------------- /examples/rag/image-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/examples/rag/image-2.png -------------------------------------------------------------------------------- /examples/rag/rag-service-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: rag-service 5 | spec: 6 | selector: 7 | app: rag-service 8 | ports: 9 | - protocol: TCP 10 | port: 80 11 | targetPort: 5000 12 | nodePort: 32000 13 | type: NodePort 14 | -------------------------------------------------------------------------------- /examples/ray/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/ray:2.37.0.1b620f-py310-cu121 2 | RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple vllm==0.6.3.post1 3 | COPY vllm_app.py /app/vllm_app.py 4 | COPY __init__.py /app/__init__.py -------------------------------------------------------------------------------- /examples/ray/README.md: -------------------------------------------------------------------------------- 1 | ## 编译 vllm ray 镜像 2 | 3 | 该镜像是 rayService 的 HEAD 和 Worker 镜像,包含 ray 和 vllm 以及一个 APPlication 4 | 5 | ```bash 6 | docker build -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/vllm-ray:latest -f Dockerfile . 7 | ``` 8 | 9 | ## 生成 ray serve 配置文件 10 | 11 | ## 使用 locust 压力测试 12 | 13 | ```bash 14 | locust -f locustfile.py --host=http://10.10.10.10:8000 15 | ``` 16 | -------------------------------------------------------------------------------- /examples/ray/locustfile.py: -------------------------------------------------------------------------------- 1 | from locust import HttpUser, task 2 | 3 | class HelloWorldUser(HttpUser): 4 | @task 5 | def hello_world(self): 6 | self.client.post(url="/v1/chat/completions",json={"model":"/data2/dg/models/meta-llama-3.1-8b-instruct","messages":[{"role":"user","content":"hello world"}]}) -------------------------------------------------------------------------------- /examples/ray/va/README.md: -------------------------------------------------------------------------------- 1 | # vLLM Ray App 2 | 3 | Implement the Ray App for launching vLLM inference serving. 4 | This Ray app is built based on [Ray vLLM example](https://docs.ray.io/en/latest/serve/tutorials/vllm-example.html). 5 | -------------------------------------------------------------------------------- /examples/rocm-images/README.md: -------------------------------------------------------------------------------- 1 | # llamafactory/vllm for ROCm 镜像构建 2 | 3 | ## 克隆代码仓库 4 | ```shell 5 | git clone https://github.com/NascentCore/3k.git 6 | cd 3k/examples/rocm-images 7 | ``` 8 | 9 | ## llamafactory 镜像构建 10 | ```shell 11 | docker buildx build --platform linux/amd64 -f Dockerfile.llamafactory -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/llamafactory-rocm:main . 12 | ``` 13 | 14 | ## vllm 镜像构建 15 | ```shell 16 | docker buildx build --platform linux/amd64 -f Dockerfile.vllm -t sxwl-registry.cn-beijing.cr.aliyuncs.com/sxwl-ai/vllm-rocm:main . 17 | ``` 18 | -------------------------------------------------------------------------------- /experimental/README.md: -------------------------------------------------------------------------------- 1 | # Experimental 2 | 3 | Stores code that is not ready for production but is meaningful for reference. 4 | For example, if you want to comment a block of code, but dont want to lose them, 5 | those code should be put inside experimental. 6 | -------------------------------------------------------------------------------- /experimental/ceph-csi/README.md: -------------------------------------------------------------------------------- 1 | # Ceph-CSI 2 | 3 | For testing Ceph-CSI 4 | @cairong 5 | -------------------------------------------------------------------------------- /experimental/ceph-csi/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: cephfs-test 5 | spec: 6 | accessModes: 7 | - ReadWriteMany 8 | resources: 9 | requests: 10 | storage: 1Gi 11 | storageClassName: rook-cephfs 12 | -------------------------------------------------------------------------------- /experimental/ceph-csi/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: csi-cephfs-secret 5 | stringData: 6 | userID: admin 7 | userKey: 8 | adminID: admin 9 | adminKey: 10 | -------------------------------------------------------------------------------- /experimental/cy/README.md: -------------------------------------------------------------------------------- 1 | git clone git@github.com:NascentCore/3k.git 2 | cd 3k 3 | 4 | # 如果已经有本地 repo,则可以先与远程主干同步 5 | git pull 6 | 7 | # 为自己的改动创建新的 branch 8 | git checkout -b first_pr 9 | 10 | # 假设 username 是 xueyou 11 | mkdir -p home/xueyou 12 | # 用你最习惯的编辑器,创建 REAMD.md,写一段自我介绍 13 | vi home/xueyou/README.md 14 | 15 | # Xueyou // 标题 16 | 17 | 我是张学友;。。。 18 | 19 | # 完成编辑后 commit 20 | git add . 21 | git commit -m "'s first pr" 22 | 23 | # Push 到远程目录 24 | git push -u origin HEAD 25 | -------------------------------------------------------------------------------- /experimental/demo_test.go: -------------------------------------------------------------------------------- 1 | package experimental 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | // TestEmptyTest is an example of a Go test but does nothing. 8 | func TestEmptyTest(t *testing.T) { 9 | } 10 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/README.md: -------------------------------------------------------------------------------- 1 | ## qmapper的代码仓库 2 | 3 | ### 目标 4 | 实现用户训练和推理脚本到异构分布式计算程序的一键转化 5 | 6 | ### MetaIR 7 | qmapper使用的计算图中间表示 8 | 9 | ### 可视化Qmapper 10 | 见./example/basic_transformer/Readme.md 11 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/example/basic_transformer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y build-essential 5 | 6 | WORKDIR /basic_transformer 7 | 8 | COPY . ./ 9 | 10 | ENV PIP_ROOT_USER_ACTION=ignore 11 | 12 | RUN python3 -m pip config \ 13 | set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ 14 | python3 -m pip install --no-cache-dir --upgrade pip && \ 15 | python3 -m pip install --no-cache-dir -r requirements.txt -------------------------------------------------------------------------------- /experimental/qmapper_prototype/example/basic_transformer/README.md: -------------------------------------------------------------------------------- 1 | ## 模型简介 2 | 原始版本的Transformer,用于测试qmapper功能和性能 3 | 4 | ## 测试qmapper导出MetaIR功能 5 | 6 | 需要修改pytorch的代码: 7 | torch/optim/adam.py下的def _single_tensor_adam函数需要替换为qmapper/api/third_party_utils.py下的 _traceable_single_tensor_adam函数 8 | 9 | 10 | 然后,在basic_transformer目录下运行test_qmapper.sh脚本,可以得到关于这个transformer模型的meta ir的计算图,在当前目录下会保存为svg和pdf两种可视化格式 11 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/example/basic_transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | torch == 2.1.0 2 | torchtext 3 | onnxruntime 4 | numpy -------------------------------------------------------------------------------- /experimental/qmapper_prototype/example/basic_transformer/test_qmapper.sh: -------------------------------------------------------------------------------- 1 | cp ./scripts/train_script_qmapper.py . 2 | mkdir -p train_out 3 | python train_script_qmapper.py 4 | rm -rf train_script_qmapper.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/example/basic_transformer/utils/model_config.py: -------------------------------------------------------------------------------- 1 | model_config = { 2 | 'vocab_size':2000, # 3 | 'num_layers':2, # 4 | 'd_model':512, # 5 | 'd_ff':512, # 6 | 'n_head': 8, # 7 | 'dropout':0.1, # 8 | 'train_length': 512 9 | } -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/api/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/api/meta_ir/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/api/meta_ir/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/api/meta_ir/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/api/meta_ir/utils/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/api/meta_ir/utils/graph_operations.py: -------------------------------------------------------------------------------- 1 | import community 2 | import networkx as nx -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/api/ray_connect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/api/ray_connect.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/bridge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/bridge/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/bridge/torch_bridge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/bridge/torch_bridge/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/bridge/torch_bridge/environment_variables.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | dist.get_group_rank() 3 | dist.get_backend_config() -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/codegen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/codegen/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/codegen/test.py: -------------------------------------------------------------------------------- 1 | import qmapper.codegen.tvm_transform as tvm_transform 2 | import tvm 3 | 4 | tvm_transform.operator_export('mm', (1024,1024,1024,'float32'), tvm.target.Target(target='llvm', host='llvm')) -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/autoparallel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/autoparallel/autoparallel.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/autoparallel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/shard_optimize.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/shard_optimize.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/shard_optimize.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/shard_optimize.h -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/stage_split.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/stage_split.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/stage_split.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/autoparallel/passes/stage_split.h -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/components/cpu_iface/cpu_iface.cc: -------------------------------------------------------------------------------- 1 | #include "components/cpu_iface/cpu_iface.h" -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/components/cpu_iface/cpu_iface.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef QMAP_CPU_IFACE_H 4 | #define QMAP_CPU_IFACE_H 5 | 6 | #define CACHE_LINE_SIZE 128 7 | #define memory_bus_store_fence() asm volatile ("sfence" ::: "memory") 8 | #define memory_bus_load_fence() asm volatile ("lfence" ::: "memory") 9 | #define cpu_fence() asm volatile("" ::: "memory") 10 | #define cpu_load_fence() asm volatile("" ::: "memory") 11 | #define cpu_mfence() asm volatile("" ::: "memory") 12 | 13 | #endif -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/components/cuda_iface/cuda_iface.cc: -------------------------------------------------------------------------------- 1 | #include "components/cuda_iface/cuda_iface.h" -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/components/rocm_iface/rocm_iface.cc: -------------------------------------------------------------------------------- 1 | #include "components/cuda_iface/rocm_iface.h" -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifndef QMAP_COMM_CONFIG 7 | #define QMAP_COMM_CONFIG 8 | 9 | namespace qmap { 10 | namespace comm { 11 | 12 | typedef struct register_info { 13 | void *elem; 14 | size_t elem_size; 15 | } register_info_t; 16 | 17 | class CommConfig { 18 | public: 19 | static std::vector mc_libs; 20 | }; 21 | 22 | }; // namespace comm 23 | }; // namespace qmap 24 | 25 | #endif -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/cpu/execute_engine_cpu.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/cpu/execute_engine_cpu.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/cpu/execute_engine_cpu.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "comm_iface.h" 4 | #include 5 | 6 | #ifndef QMAP_COMM_EXECUTE_ENGINE_H 7 | #define QMAP_COMM_EXECUTE_ENGINE_H 8 | 9 | namespace qmap { 10 | namespace comm { 11 | namespace ec_cpu { 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | }; // namespace ec_cpu 22 | }; // namespace comm 23 | }; // namespace qmap 24 | 25 | #endif -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/cuda/execute_engine_cuda.cc: -------------------------------------------------------------------------------- 1 | #include "execute_context/cuda/execute_engine_cuda.h" 2 | 3 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/cuda/qmap_cuda_executor.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" { 4 | 5 | } -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/execute_engine.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/execute_engine.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/rocm/execute_engine_rocm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/rocm/execute_engine_rocm.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/rocm/execute_engine_rocm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/execute_context/rocm/execute_engine_rocm.h -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/memory_pool/rocm/memory_pool_rocm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/memory_pool/rocm/memory_pool_rocm.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/comm/memory_pool/rocm/memory_pool_rocm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/comm/memory_pool/rocm/memory_pool_rocm.h -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/meta/meta_ir.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/meta/meta_ir.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/meta/sharding_info.cc: -------------------------------------------------------------------------------- 1 | #include "sharding_info.h" 2 | 3 | int qmap::ir::ShardDim::global_shard_dim_cnt = 0; 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/simulator/simulator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/simulator/simulator.cc -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/simulator/simulator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/csrc/simulator/simulator.h -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/tests/comm/components/kernels/vector_add.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | extern "C" cudaError_t matAdd(float *a,float *b, float *c, int length); -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/utils/qmap_compiler_defs.cc: -------------------------------------------------------------------------------- 1 | #include "qmap_compiler_defs.h" -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/utils/qmap_compiler_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef QMAP_COMPILER_DEFS 2 | #define QMAP_COMPILER_DEFS 3 | 4 | namespace qmap { 5 | namespace utils { 6 | 7 | #define qmap_compiler_fence() asm volatile("":: "memory") 8 | #define class_offset_of(class, member) (reinterpret_cast(&static_cast(0)->member)) 9 | #define class_container_of(class, member, member_p) (reinterpret_cast(reinterpret_cast(member_p) - class_offset_of(class, member))) 10 | 11 | }; // namespace utils 12 | }; // namespace qmap 13 | 14 | #endif -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/utils/qmap_locks.cc: -------------------------------------------------------------------------------- 1 | #include "qmap_locks.h" 2 | #include 3 | 4 | void qmap::utils::SpinLock::lock() { 5 | while (flag.test_and_set(std::memory_order_acquire)) {} 6 | } 7 | 8 | void qmap::utils::SpinLock::unlock() { 9 | flag.clear(std::memory_order_release); 10 | } 11 | 12 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/csrc/utils/qmap_locks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #ifndef QMAP_LOCKS 6 | #define QMAP_LOCKS 7 | 8 | namespace qmap { 9 | namespace utils { 10 | 11 | class SpinLock { 12 | public: 13 | void lock(); 14 | void unlock(); 15 | 16 | private: 17 | std::atomic_flag flag = ATOMIC_FLAG_INIT; 18 | }; 19 | 20 | }; // namespace utils 21 | }; // namespace qmap 22 | 23 | #endif -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/executable/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/executable/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/executable/executable.py: -------------------------------------------------------------------------------- 1 | from ..codegen.internal_meta_ir.internal_meta_ir import InternalMetaGraph, InternalDtype, InternalMetaOperator, InternalMetaVariable, InternalType 2 | from ..api.meta_ir.cluster_info import ClusterInfo 3 | from typing import Dict, List 4 | 5 | class Executable: 6 | def __init__(self,cluster_info: ClusterInfo, internal_graph: InternalMetaGraph, shard_options: Dict[int, List[int]]): 7 | self.internal_graph = internal_graph 8 | self.cluster_info = cluster_info 9 | self.shard_options = shard_options 10 | 11 | -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/optimization/cost_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/optimization/cost_model/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/optimization/search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/experimental/qmapper_prototype/qmapper/optimization/search/__init__.py -------------------------------------------------------------------------------- /experimental/qmapper_prototype/qmapper/optimization/search/inter_strategy_search.py: -------------------------------------------------------------------------------- 1 | from ..cost_model.cost_model import CostModel 2 | from ...codegen.internal_meta_ir.internal_meta_ir import InternalDtype, InternalMetaGraph, InternalMetaOperator, InternalMetaVariable, InternalType, SplitPass, ReducePass, ReplicatePass 3 | from ...api.meta_ir.cluster_info import ClusterInfo 4 | from typing import List, Dict 5 | from copy import deepcopy 6 | import itertools 7 | 8 | class InterStrategySearch: 9 | def __init__(self): 10 | pass -------------------------------------------------------------------------------- /experimental/qmapper_prototype/test/torch/eq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | x = torch.tensor(0, dtype=torch.int64) 3 | y = torch.tensor([1,2,3], dtype=torch.float32) 4 | t = torch.ops.aten._log_softmax.default(y, x, False) 5 | print(torch.ops.aten._log_softmax.default(y, x, False)) 6 | print(torch.ops.aten._log_softmax_backward_data.default(t,t,x,torch.float32)) -------------------------------------------------------------------------------- /home/README.md: -------------------------------------------------------------------------------- 1 | # Home 2 | 3 | For new team member to practice creating Pull Request to 3k repo. 4 | Each new team member should create a dir named after their ldap. 5 | -------------------------------------------------------------------------------- /home/cairong/README.md: -------------------------------------------------------------------------------- 1 | My name is licairong, I am a devops, this is my first PR. 2 | -------------------------------------------------------------------------------- /home/cairong/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package cairong 2 | 3 | func BubbleSort(arr []int) { 4 | for i := 0; i < len(arr)-1; i++ { 5 | for j := 0; j< len(arr)-i-1; j++ { 6 | if arr[j] > arr[j+1] { 7 | arr[j], arr[j+1] = arr[j+1], arr[j] 8 | } 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /home/chenshu/bubble/README.md: -------------------------------------------------------------------------------- 1 | # Introduce 2 | Hi everyone I am guochenshu, this is my first pr. 3 | -------------------------------------------------------------------------------- /home/chenshu/bubble/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package bubble 2 | 3 | func BubbleSort(nums []int) { 4 | if len(nums) <= 1 { 5 | return 6 | } 7 | 8 | for i := 0; i < len(nums); i++ { 9 | flag := false 10 | for j := 0; j < len(nums)-i-1; j++ { 11 | if nums[j] > nums[j+1] { 12 | nums[j], nums[j+1] = nums[j+1], nums[j] 13 | flag = true 14 | } 15 | } 16 | if !flag { 17 | break 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /home/chenshu/bubble/bubble_sort_test.go: -------------------------------------------------------------------------------- 1 | package bubble 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func Test_bubbleSort(t *testing.T) { 9 | type args struct { 10 | nums []int 11 | } 12 | tests := []struct { 13 | name string 14 | args args 15 | want []int 16 | }{ 17 | { 18 | args: args{nums: []int{3, 2, 1, 4}}, 19 | want: []int{1, 2, 3, 4}, 20 | }, 21 | } 22 | for _, tt := range tests { 23 | t.Run(tt.name, func(t *testing.T) { 24 | BubbleSort(tt.args.nums) 25 | if !reflect.DeepEqual(tt.args.nums, tt.want) { 26 | t.Errorf("BubbleSort() = %v, want %v", tt.args.nums, tt.want) 27 | } 28 | }) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /home/congpeiqing/README.md: -------------------------------------------------------------------------------- 1 | # I'm CongPeiqing 2 | Hello , I'm CongPeiqing , a coder, 3 | Interested in AI cross CloudNative 4 | -------------------------------------------------------------------------------- /home/congpeiqing/code-retreat/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func printBoard(board [][]bool) { 6 | for _, line := range board { 7 | for _, b := range line { 8 | if b { 9 | fmt.Print("O ") 10 | } else { 11 | fmt.Print("X ") 12 | } 13 | } 14 | fmt.Print("\n") 15 | } 16 | } 17 | 18 | func main() { 19 | board := [][]bool{ 20 | {false, false, false, false, false}, 21 | {false, false, true, false, false}, 22 | {false, false, true, false, false}, 23 | {false, false, true, false, false}, 24 | {false, false, false, false, false}} 25 | 26 | for i := 0; i < 10; i++ { 27 | fmt.Printf("-- %d ----------\n", i) 28 | board = next(board) 29 | printBoard(board) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /home/donggang/README.md: -------------------------------------------------------------------------------- 1 | # DongGang 2 | 3 | My name is donggang. I'm a software engineer, 4 | a hacker, a fitness enthusiast, a cooking enthusiast, 5 | and a lifelong learner. 6 | -------------------------------------------------------------------------------- /home/donggang/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package donggang 2 | 3 | func BubbleSort(arr []int) { 4 | n := len(arr) 5 | 6 | for i := 0; i < n; i++ { 7 | for j := 0; j < n-i-1; j++ { 8 | if arr[j] > arr[j+1] { 9 | arr[j], arr[j+1] = arr[j+1], arr[j] 10 | } 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /home/donggang/cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sxwl/3k/home/donggang" 5 | "time" 6 | ) 7 | 8 | func main() { 9 | var grid donggang.Grid 10 | 11 | // 设置一些初始的活细胞 12 | grid.Set(5, 5, true) 13 | grid.Set(5, 6, true) 14 | grid.Set(5, 7, true) 15 | grid.Set(6, 7, true) 16 | grid.Set(7, 6, true) 17 | 18 | for { 19 | grid.Print() 20 | time.Sleep(200 * time.Millisecond) 21 | grid = grid.NextGeneration() 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /home/steven/README.md: -------------------------------------------------------------------------------- 1 | # my first_pr 2 | -------------------------------------------------------------------------------- /home/tianyu/README.md: -------------------------------------------------------------------------------- 1 | # First PR 2 | -------------------------------------------------------------------------------- /home/tianyu/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func bubbleSort(arr[] int) []int { 8 | length := len(arr) 9 | if length <=1 { 10 | return arr 11 | } 12 | for i :=0;i < length - 1;i++ { 13 | for j :=0;j < length - i -1;j++ { 14 | if arr[j+1] > arr[j] { 15 | arr[j],arr[j+1] = arr[j+1],arr[j] 16 | } 17 | } 18 | } 19 | return arr 20 | } 21 | 22 | func main() { 23 | arr := []int{11,8,2,5,7,10,3,6} 24 | fmt.Println(bubbleSort(arr)) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /home/tianyu/ty/README.md: -------------------------------------------------------------------------------- 1 | # First PR 2 | -------------------------------------------------------------------------------- /home/tianyu/ty/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func bubbleSort(arr[] int) []int { 8 | length := len(arr) 9 | if length <=1 { 10 | return arr 11 | } 12 | for i :=0;i < length - 1;i++ { 13 | for j :=0;j < length - i -1;j++ { 14 | if arr[j+1] > arr[j] { 15 | arr[j],arr[j+1] = arr[j+1],arr[j] 16 | } 17 | } 18 | } 19 | return arr 20 | } 21 | 22 | func main() { 23 | arr := []int{11,8,2,5,7,10,3,6} 24 | fmt.Println(bubbleSort(arr)) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /home/wenhua/README.md: -------------------------------------------------------------------------------- 1 | this is my first PR 2 | -------------------------------------------------------------------------------- /home/yzhao/README.md: -------------------------------------------------------------------------------- 1 | # Yzhao 2 | 3 | My name is Yaxiong Zhao. 4 | -------------------------------------------------------------------------------- /home/yzhao/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package bubblesort 2 | 3 | func BubbleSort(ints []int) { 4 | for i := len(ints); i >= 0; i -= 1 { 5 | for j := 0; j < i - 1; j += 1 { 6 | if ints[j] > ints[j+1] { 7 | ints[j], ints[j+1] = ints[j+1], ints[j] 8 | } 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /home/yzhao/bubble_sort_test.go: -------------------------------------------------------------------------------- 1 | package bubblesort 2 | 3 | import ( 4 | "testing" 5 | "reflect" 6 | ) 7 | 8 | func TestBubbleSort(t *testing.T) { 9 | ints := []int{5, 4, 3, 2, 1} 10 | BubbleSort(ints) 11 | expInts := []int{1, 2, 3, 4, 5} 12 | if !reflect.DeepEqual(ints, expInts) { 13 | t.Fatalf("expected %v got %v", expInts, ints) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /home/yzhao/code-retreat-20240119/README.md: -------------------------------------------------------------------------------- 1 | # Code Retreat @2024-01-19 2 | 3 | Conway game of life 4 | -------------------------------------------------------------------------------- /home/yzhao/code-retreat-20240419/fast_median.go: -------------------------------------------------------------------------------- 1 | package median 2 | 3 | import "sort" 4 | 5 | type FastMedian []int 6 | 7 | func (this *FastMedian) AddNum(v int) { 8 | *this = append(*this, v) 9 | } 10 | 11 | func (this FastMedian) GetMedian() float64 { 12 | if len(this) == 0 { 13 | panic("Empty!") 14 | } 15 | if len(this) == 1 { 16 | return float64(this[0]) 17 | } 18 | sort.Ints(this) 19 | midIdx := len(this) / 2 20 | if len(this)%2 == 0 { 21 | return (float64(this[midIdx]) + float64(this[midIdx-1])) / 2 22 | } else { 23 | return float64(this[midIdx]) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /home/yzhao/code-retreat-20240419/fast_median_test.go: -------------------------------------------------------------------------------- 1 | package median 2 | 3 | import "testing" 4 | 5 | func TestGetMedian(t *testing.T) { 6 | var m FastMedian 7 | m.AddNum(1) 8 | m.AddNum(2) 9 | if m.GetMedian() != 1.5 { 10 | t.Fail() 11 | } 12 | m.AddNum(3) 13 | if m.GetMedian() != 2.0 { 14 | t.Fail() 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /home/zhaoyan/README.md: -------------------------------------------------------------------------------- 1 | # introduce 2 | Hello, everyone. My chinese name is 吴兆岩,the English name is Ryan. 3 | I'm a developer. This is my first pr. 4 | -------------------------------------------------------------------------------- /home/zhongcheng/README.md: -------------------------------------------------------------------------------- 1 | # introduce 2 | Hello, everyone. My chinese name is 钟成,the English name is Hardy Simpson. 3 | I'm a developer. This is my first pr. -------------------------------------------------------------------------------- /home/zhongcheng/bubble_sort.go: -------------------------------------------------------------------------------- 1 | package cairong 2 | 3 | func BubbleSort(arr []int) { 4 | for i := 0; i < len(arr)-1; i++ { 5 | for j := 0; j< len(arr)-i-1; j++ { 6 | if arr[j] > arr[j+1] { 7 | arr[j], arr[j+1] = arr[j+1], arr[j] 8 | } 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /internal/README.md: -------------------------------------------------------------------------------- 1 | # Internal 2 | 3 | TODO: Add content 4 | -------------------------------------------------------------------------------- /internal/gateway/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "sxwl/3k/internal/gateway/gateway" 5 | 6 | "github.com/zeromicro/go-zero/rest" 7 | ) 8 | 9 | type Config struct { 10 | rest.RestConf 11 | Gateway gateway.Config 12 | } 13 | -------------------------------------------------------------------------------- /internal/gateway/gateway/config.go: -------------------------------------------------------------------------------- 1 | package gateway 2 | 3 | type Router struct { 4 | Path string 5 | ToPath string `json:",optional"` //nolint:staticcheck 6 | Auth bool `json:",optional"` //nolint:staticcheck 7 | Server string 8 | } 9 | 10 | type Config struct { 11 | Servers map[string]string 12 | Routers []Router 13 | } 14 | -------------------------------------------------------------------------------- /internal/gateway/svc/service_context.go: -------------------------------------------------------------------------------- 1 | package svc 2 | 3 | import ( 4 | "sxwl/3k/internal/gateway/config" 5 | ) 6 | 7 | type ServiceContext struct { 8 | Config config.Config 9 | } 10 | 11 | func NewServiceContext(c config.Config) *ServiceContext { 12 | return &ServiceContext{ 13 | Config: c, 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /internal/scheduler/config/hyperparameter.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | const ( 4 | ParamEpochs = "n_epochs" 5 | ParamBatchSize = "batch_size" 6 | ParamLearningRate = "learning_rate_multiplier" 7 | ) 8 | -------------------------------------------------------------------------------- /internal/scheduler/handler/create_new_user_i_d_handler.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/zeromicro/go-zero/rest/httpx" 7 | "sxwl/3k/internal/scheduler/logic" 8 | "sxwl/3k/internal/scheduler/svc" 9 | ) 10 | 11 | func CreateNewUserIDHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { 12 | return func(w http.ResponseWriter, r *http.Request) { 13 | l := logic.NewCreateNewUserIDLogic(r.Context(), svcCtx) 14 | resp, err := l.CreateNewUserID() 15 | if err != nil { 16 | httpx.ErrorCtx(r.Context(), w, err) 17 | } else { 18 | httpx.OkJsonCtx(r.Context(), w, resp) 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /internal/scheduler/handler/custom_routes.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "net/http" 5 | 6 | "sxwl/3k/internal/scheduler/svc" 7 | 8 | "github.com/zeromicro/go-zero/rest" 9 | ) 10 | 11 | func RegisterCustomHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { 12 | server.AddRoutes( 13 | []rest.Route{ 14 | //{ 15 | // Method: http.MethodGet, 16 | // Path: "/cpod/job", 17 | // Handler: CpodJobHandler(serverCtx), 18 | //}, 19 | { 20 | Method: http.MethodDelete, 21 | Path: "/job/job", 22 | Handler: JobDeleteHandler(serverCtx), 23 | }, 24 | }, 25 | ) 26 | } 27 | -------------------------------------------------------------------------------- /internal/scheduler/handler/error.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | 7 | "github.com/zeromicro/go-zero/core/logx" 8 | "github.com/zeromicro/go-zero/rest/httpx" 9 | ) 10 | 11 | type response struct { 12 | Message string `json:"message"` 13 | } 14 | 15 | func InitErrorHandler() { 16 | httpx.SetErrorHandlerCtx(func(ctx context.Context, err error) (int, any) { 17 | errMsg := err.Error() 18 | 19 | logx.WithContext(ctx).Errorf("[api error] errMsg=%s", errMsg) 20 | 21 | return http.StatusBadRequest, response{ 22 | Message: errMsg, 23 | } 24 | }) 25 | } 26 | -------------------------------------------------------------------------------- /internal/scheduler/handler/user_list_handler.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "net/http" 5 | 6 | "sxwl/3k/internal/scheduler/logic" 7 | "sxwl/3k/internal/scheduler/svc" 8 | 9 | "github.com/zeromicro/go-zero/rest/httpx" 10 | ) 11 | 12 | func UserListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { 13 | return func(w http.ResponseWriter, r *http.Request) { 14 | l := logic.NewUserListLogic(r.Context(), svcCtx) 15 | resp, err := l.UserList() 16 | if err != nil { 17 | httpx.ErrorCtx(r.Context(), w, err) 18 | } else { 19 | httpx.OkJsonCtx(r.Context(), w, resp) 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /internal/scheduler/model/README.md: -------------------------------------------------------------------------------- 1 | # Model 2 | 3 | Data modeling code for storing to and retrieving from database. 4 | 5 | All `*_gen.go` files, i.e. files with `_gen.go` suffix, are generated by `gomicro`. 6 | -------------------------------------------------------------------------------- /internal/scheduler/model/vars.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import "github.com/zeromicro/go-zero/core/stores/sqlx" 4 | 5 | var ErrNotFound = sqlx.ErrNotFound 6 | -------------------------------------------------------------------------------- /internal/scheduler/types/custom_types.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | //type CpodJobReq struct { 4 | // CPODID string `form:"cpodid"` 5 | //} 6 | -------------------------------------------------------------------------------- /internal/scheduler/user/user.go: -------------------------------------------------------------------------------- 1 | package user 2 | 3 | import ( 4 | "sxwl/3k/pkg/uuid" 5 | ) 6 | 7 | func NewUserID() (string, error) { 8 | return uuid.WithPrefix("user") 9 | } 10 | -------------------------------------------------------------------------------- /openapi/README.md: -------------------------------------------------------------------------------- 1 | # OpenAPI 2 | 3 | OpenAPI definition of the NascentCore Cloud. 4 | -------------------------------------------------------------------------------- /pkg/README.md: -------------------------------------------------------------------------------- 1 | # pkg 2 | packages -------------------------------------------------------------------------------- /pkg/bcrypt/bcrypt.go: -------------------------------------------------------------------------------- 1 | package bcrypt 2 | 3 | import ( 4 | "golang.org/x/crypto/bcrypt" 5 | ) 6 | 7 | // GeneratePasswordHash takes a plaintext password and generates a bcrypt hashed password 8 | func GeneratePasswordHash(password string) (string, error) { 9 | bytes, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost) 10 | if err != nil { 11 | return "", err 12 | } 13 | return string(bytes), nil 14 | } 15 | 16 | // CheckPasswordHash compares a plaintext password with a hashed password to see if they match 17 | func CheckPasswordHash(password, hash string) bool { 18 | err := bcrypt.CompareHashAndPassword([]byte(hash), []byte(password)) 19 | return err == nil 20 | } 21 | -------------------------------------------------------------------------------- /pkg/cluster/README.md: -------------------------------------------------------------------------------- 1 | # cluster 2 | 3 | handle things related to k8s 4 | 1. get resource info 5 | 2. create apiobject 6 | 3. get object status 7 | 4. ...... 8 | 9 | -------------------------------------------------------------------------------- /pkg/cluster/client-go/README.md: -------------------------------------------------------------------------------- 1 | # clientgo 2 | interact with cluster with clientgo -------------------------------------------------------------------------------- /pkg/consts/k8s.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | const ( 4 | ApiGroup = "cpod.cpod" 5 | ) 6 | -------------------------------------------------------------------------------- /pkg/consts/model.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | const ( 4 | ModelCategoryChat = "chat" 5 | ModelCategoryEmbedding = "embedding" 6 | ) 7 | -------------------------------------------------------------------------------- /pkg/email/email.go: -------------------------------------------------------------------------------- 1 | package email 2 | 3 | import "io" 4 | 5 | // Emailer defines the interface for sending emails. 6 | type Emailer interface { 7 | AddTemplate(name string, tmpl io.Reader) error 8 | SendPlainText(to []string, subject, body string) error 9 | SendTemplateEmail(to []string, subject, templateName string, data interface{}) error 10 | } 11 | 12 | // Config holds configuration for any SMTP client 13 | type Config struct { 14 | Host string 15 | Port int 16 | Username string 17 | SenderName string 18 | Password string 19 | } 20 | -------------------------------------------------------------------------------- /pkg/email/template.go: -------------------------------------------------------------------------------- 1 | package email 2 | 3 | import ( 4 | "bytes" 5 | "html/template" 6 | "io" 7 | ) 8 | 9 | // ParseTemplate parses an HTML template from an io.Reader and injects data. 10 | func ParseTemplate(tmpl io.Reader, data interface{}) (string, error) { 11 | templateData, err := io.ReadAll(tmpl) 12 | if err != nil { 13 | return "", err 14 | } 15 | t, err := template.New("email").Parse(string(templateData)) 16 | if err != nil { 17 | return "", err 18 | } 19 | var buf bytes.Buffer 20 | if err := t.Execute(&buf, data); err != nil { 21 | return "", err 22 | } 23 | return buf.String(), nil 24 | } 25 | -------------------------------------------------------------------------------- /pkg/log/log.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "sxwl/3k/pkg/config" 5 | 6 | "go.uber.org/zap" 7 | ) 8 | 9 | // NO_TEST_NEEDED 10 | 11 | var Logger *zap.Logger = initLogger() 12 | var SLogger *zap.SugaredLogger = Logger.Sugar() 13 | 14 | func initLogger() *zap.Logger { 15 | var logger *zap.Logger 16 | var err error 17 | // TODO: 根据环境变量生成不同的Logger 18 | deploy := config.DEPLOY 19 | if deploy == "DEBUG" || deploy == "DEV" || deploy == "TEST" { 20 | logger, err = zap.NewDevelopment() 21 | } else { 22 | logger, err = zap.NewProduction() 23 | } 24 | if err != nil { 25 | panic("logger init err") 26 | } 27 | return logger 28 | } 29 | -------------------------------------------------------------------------------- /pkg/math/math.go: -------------------------------------------------------------------------------- 1 | package math 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | // Round rounds val to the nearest multiple of 1 / factor 8 | func Round(val float64, places int) float64 { 9 | factor := math.Pow(10, float64(places)) 10 | return math.Round(val*factor) / factor 11 | } 12 | -------------------------------------------------------------------------------- /pkg/model-uploader/README.md: -------------------------------------------------------------------------------- 1 | # model-uploader 2 | upload model after mpijob finish -------------------------------------------------------------------------------- /pkg/orm/string.go: -------------------------------------------------------------------------------- 1 | package orm 2 | 3 | import ( 4 | "database/sql" 5 | "time" 6 | ) 7 | 8 | func NullString(s string) sql.NullString { 9 | return sql.NullString{String: s, Valid: true} 10 | } 11 | 12 | func NullTime(t time.Time) sql.NullTime { 13 | return sql.NullTime{Time: t, Valid: true} 14 | } 15 | 16 | func NullInt64(i int64) sql.NullInt64 { 17 | return sql.NullInt64{Int64: i, Valid: true} 18 | } 19 | -------------------------------------------------------------------------------- /pkg/storage/README.md: -------------------------------------------------------------------------------- 1 | # storage 2 | handle things related to storage -------------------------------------------------------------------------------- /pkg/storage/pack_test.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestPack(t *testing.T) { 9 | err := Pack(".", []string{}) 10 | if err != nil { 11 | t.Error(err) 12 | } 13 | os.Remove("./data.zip") 14 | } 15 | -------------------------------------------------------------------------------- /pkg/testing/fs_test.go: -------------------------------------------------------------------------------- 1 | package testing 2 | 3 | import ( 4 | "testing" 5 | 6 | "sxwl/3k/pkg/utils/fs" 7 | ) 8 | 9 | func TestCreateTmpFile(t *testing.T) { 10 | p := CreateTmpFile() 11 | if !fs.Exists(p) { 12 | t.Errorf("%s should be created, but does not exist", p) 13 | } 14 | 15 | p = CreateTmpFileWithContent("content") 16 | if MustReadFile(p) != "content" { 17 | t.Errorf("p is not empty, %s", p) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /pkg/time/time.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func GetNearestMinute(t time.Time) time.Time { 8 | nearestMinute := time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), 0, 0, t.Location()) 9 | return nearestMinute 10 | } 11 | -------------------------------------------------------------------------------- /pkg/utils/config/README.md: -------------------------------------------------------------------------------- 1 | # Config 2 | 3 | Config reading APIs. 4 | -------------------------------------------------------------------------------- /pkg/utils/config/env_var.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // Reading environment variables as config entries 4 | 5 | import ( 6 | "os" 7 | "strings" 8 | ) 9 | 10 | // Deprecated: Moved to src/utils/sys/env_var.go:EnvVars. 11 | func GetEnvVars() map[string]string { 12 | envVars := make(map[string]string) 13 | for _, e := range os.Environ() { 14 | pair := strings.SplitN(e, "=", 2) 15 | varName := pair[0] 16 | varValue := "" 17 | if len(pair) > 1 { 18 | varValue = pair[1] 19 | } 20 | envVars[varName] = varValue 21 | } 22 | return envVars 23 | } 24 | -------------------------------------------------------------------------------- /pkg/utils/config/env_var_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestGetEnvVar(t *testing.T) { 9 | os.Setenv("FOO", "1") 10 | envVars := GetEnvVars() 11 | fooVal, found := envVars["FOO"] 12 | if !found { 13 | t.Errorf("Could not find environment variable FOO") 14 | } 15 | if fooVal != "1" { 16 | t.Errorf("Env var FOO's value is wrong, expected '1', got %s", fooVal) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /pkg/utils/config/file_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestParse(t *testing.T) { 9 | got := parse("a: foo\nb:bar \n c:baz") 10 | expected := map[string]string { 11 | "a": "foo", 12 | "b": "bar", 13 | "c": "baz", 14 | } 15 | if !reflect.DeepEqual(got, expected) { 16 | t.Errorf("got %v expect %v", got, expected) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /pkg/utils/consts/http.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | // NO_TEST_NEEDED 4 | 5 | // Defines HTTP-related consts used in the codebase 6 | 7 | const ( 8 | GET = "GET" 9 | POST = "POST" 10 | Authorization = "Authorization" 11 | ) 12 | -------------------------------------------------------------------------------- /pkg/utils/consts/k8s.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | // NO_TEST_NEEDED 4 | 5 | // Defines K8s related consts used in the codebase 6 | 7 | const ( 8 | K8S_LABEL_NV_GPU_PRODUCT = "nvidia.com/gpu.product" 9 | K8S_LABEL_NV_GPU_PRESENT = "nvidia.com/gpu.present" 10 | ) 11 | -------------------------------------------------------------------------------- /pkg/utils/errors/errors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // UnImpl returns an error that indicate the function named as the input 8 | // is not implemented yet. 9 | func UnImpl(name string) error { 10 | return fmt.Errorf("%s is not implemented yet", name) 11 | } 12 | -------------------------------------------------------------------------------- /pkg/utils/errors/errors_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | // TestUnImpl tests UnImpl returns an error with expected error message. 8 | func TestUnImpl(t *testing.T) { 9 | if UnImpl("test").Error() != "test is not implemented yet" { 10 | t.Fatal(UnImpl("test")) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pkg/utils/fs/fs.go: -------------------------------------------------------------------------------- 1 | package fs 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | func Exists(path string) bool { 8 | _, err := os.Stat(path) 9 | return !os.IsNotExist(err) 10 | } 11 | -------------------------------------------------------------------------------- /pkg/uuid/uuid.go: -------------------------------------------------------------------------------- 1 | package uuid 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/google/uuid" 7 | ) 8 | 9 | func WithPrefix(prefix string) (string, error) { 10 | newUUID, err := uuid.NewRandom() 11 | if err != nil { 12 | return "", err 13 | } 14 | 15 | return fmt.Sprintf("%s-%s", prefix, newUUID.String()), nil 16 | } 17 | -------------------------------------------------------------------------------- /tools/.shellcheckrc: -------------------------------------------------------------------------------- 1 | disable=SC2086 2 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | 各类小工具的代码。 4 | -------------------------------------------------------------------------------- /tools/dingtalk-sync/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.8.30 2 | charset-normalizer==3.4.0 3 | idna==3.10 4 | PyMySQL==1.1.1 5 | requests==2.32.3 6 | urllib3==2.2.3 7 | -------------------------------------------------------------------------------- /tools/download-hf-datasets/main.py: -------------------------------------------------------------------------------- 1 | import datasets 2 | 3 | dataset = datasets.load_dataset("wikitext","wikitext-2-v1",split="train") 4 | dataset.save_to_disk('wikitext') 5 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/config.tpl: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import {{.authImport}} 4 | 5 | type Config struct { 6 | rest.RestConf 7 | {{.auth}} 8 | {{.jwtTrans}} 9 | } 10 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/context.tpl: -------------------------------------------------------------------------------- 1 | package svc 2 | 3 | import ( 4 | {{.configImport}} 5 | ) 6 | 7 | type ServiceContext struct { 8 | Config {{.config}} 9 | {{.middleware}} 10 | } 11 | 12 | func NewServiceContext(c {{.config}}) *ServiceContext { 13 | return &ServiceContext{ 14 | Config: c, 15 | {{.middlewareAssignment}} 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/etc.tpl: -------------------------------------------------------------------------------- 1 | Name: {{.serviceName}} 2 | Host: {{.host}} 3 | Port: {{.port}} 4 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/logic.tpl: -------------------------------------------------------------------------------- 1 | package {{.pkgName}} 2 | 3 | import ( 4 | {{.imports}} 5 | ) 6 | 7 | type {{.logic}} struct { 8 | logx.Logger 9 | ctx context.Context 10 | svcCtx *svc.ServiceContext 11 | } 12 | 13 | func New{{.logic}}(ctx context.Context, svcCtx *svc.ServiceContext) *{{.logic}} { 14 | return &{{.logic}}{ 15 | Logger: logx.WithContext(ctx), 16 | ctx: ctx, 17 | svcCtx: svcCtx, 18 | } 19 | } 20 | 21 | func (l *{{.logic}}) {{.function}}({{.request}}) {{.responseType}} { 22 | // todo: add your logic here and delete this line 23 | 24 | {{.returnString}} 25 | } 26 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/main.tpl: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | 7 | {{.importPackages}} 8 | ) 9 | 10 | var configFile = flag.String("f", "etc/{{.serviceName}}.yaml", "the config file") 11 | 12 | func main() { 13 | flag.Parse() 14 | 15 | var c config.Config 16 | conf.MustLoad(*configFile, &c) 17 | 18 | server := rest.MustNewServer(c.RestConf) 19 | defer server.Stop() 20 | 21 | ctx := svc.NewServiceContext(c) 22 | handler.RegisterHandlers(server, ctx) 23 | 24 | fmt.Printf("Starting server at %s:%d...\n", c.Host, c.Port) 25 | server.Start() 26 | } 27 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/middleware.tpl: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import "net/http" 4 | 5 | type {{.name}} struct { 6 | } 7 | 8 | func New{{.name}}() *{{.name}} { 9 | return &{{.name}}{} 10 | } 11 | 12 | func (m *{{.name}})Handle(next http.HandlerFunc) http.HandlerFunc { 13 | return func(w http.ResponseWriter, r *http.Request) { 14 | // TODO generate middleware implement function, delete after code implementation 15 | 16 | // Passthrough to next handler if need 17 | next(w, r) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/route-addition.tpl: -------------------------------------------------------------------------------- 1 | 2 | server.AddRoutes( 3 | {{.routes}} {{.jwt}}{{.signature}} {{.prefix}} {{.timeout}} {{.maxBytes}} 4 | ) 5 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/routes.tpl: -------------------------------------------------------------------------------- 1 | // Code generated by goctl. DO NOT EDIT. 2 | package handler 3 | 4 | import ( 5 | "net/http"{{if .hasTimeout}} 6 | "time"{{end}} 7 | 8 | {{.importPackages}} 9 | ) 10 | 11 | func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { 12 | {{.routesAdditions}} 13 | } 14 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/template.tpl: -------------------------------------------------------------------------------- 1 | syntax = "v1" 2 | 3 | info ( 4 | title: // TODO: add title 5 | desc: // TODO: add description 6 | author: "{{.gitUser}}" 7 | email: "{{.gitEmail}}" 8 | ) 9 | 10 | type request { 11 | // TODO: add members here and delete this comment 12 | } 13 | 14 | type response { 15 | // TODO: add members here and delete this comment 16 | } 17 | 18 | service {{.serviceName}} { 19 | @handler GetUser // TODO: set handler name and delete this comment 20 | get /users/id/:userId(request) returns(response) 21 | 22 | @handler CreateUser // TODO: set handler name and delete this comment 23 | post /users/create(request) 24 | } 25 | -------------------------------------------------------------------------------- /tools/go-zero-template/api/types.tpl: -------------------------------------------------------------------------------- 1 | // Code generated by goctl. DO NOT EDIT. 2 | package types{{if .containsTime}} 3 | import ( 4 | "time" 5 | ){{end}} 6 | {{.types}} 7 | -------------------------------------------------------------------------------- /tools/go-zero-template/gateway/main.tpl: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | 6 | "github.com/zeromicro/go-zero/core/conf" 7 | "github.com/zeromicro/go-zero/gateway" 8 | ) 9 | 10 | var configFile = flag.String("f", "etc/gateway.yaml", "config file") 11 | 12 | func main() { 13 | flag.Parse() 14 | 15 | var c gateway.GatewayConf 16 | conf.MustLoad(*configFile, &c) 17 | gw := gateway.MustNewServer(c) 18 | defer gw.Stop() 19 | gw.Start() 20 | } 21 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/err.tpl: -------------------------------------------------------------------------------- 1 | package {{.pkg}} 2 | 3 | import "github.com/zeromicro/go-zero/core/stores/sqlx" 4 | 5 | var ErrNotFound = sqlx.ErrNotFound 6 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/field.tpl: -------------------------------------------------------------------------------- 1 | {{.name}} {{.type}} {{.tag}} {{if .hasComment}}// {{.comment}}{{end}} -------------------------------------------------------------------------------- /tools/go-zero-template/model/find-one-by-field-extra-method.tpl: -------------------------------------------------------------------------------- 1 | func (m *default{{.upperStartCamelObject}}Model) formatPrimary(primary any) string { 2 | return fmt.Sprintf("%s%v", {{.primaryKeyLeft}}, primary) 3 | } 4 | 5 | func (m *default{{.upperStartCamelObject}}Model) queryPrimary(ctx context.Context, conn sqlx.SqlConn, v, primary any) error { 6 | query := fmt.Sprintf("select %s from %s where {{.originalPrimaryField}} = {{if .postgreSql}}$1{{else}}?{{end}} limit 1", {{.lowerStartCamelObject}}Rows, m.table ) 7 | return conn.QueryRowCtx(ctx, v, query, primary) 8 | } 9 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/import-no-cache.tpl: -------------------------------------------------------------------------------- 1 | import ( 2 | "context" 3 | "database/sql" 4 | "fmt" 5 | "strings" 6 | {{if .time}}"time"{{end}} 7 | 8 | {{if .containsPQ}}"github.com/lib/pq"{{end}} 9 | "github.com/zeromicro/go-zero/core/stores/builder" 10 | "github.com/zeromicro/go-zero/core/stores/sqlc" 11 | "github.com/zeromicro/go-zero/core/stores/sqlx" 12 | "github.com/zeromicro/go-zero/core/stringx" 13 | ) 14 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/import.tpl: -------------------------------------------------------------------------------- 1 | import ( 2 | "context" 3 | "database/sql" 4 | "fmt" 5 | "strings" 6 | {{if .time}}"time"{{end}} 7 | 8 | {{if .containsPQ}}"github.com/lib/pq"{{end}} 9 | "github.com/zeromicro/go-zero/core/stores/builder" 10 | "github.com/zeromicro/go-zero/core/stores/cache" 11 | "github.com/zeromicro/go-zero/core/stores/sqlc" 12 | "github.com/zeromicro/go-zero/core/stores/sqlx" 13 | "github.com/zeromicro/go-zero/core/stringx" 14 | ) 15 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/interface-delete.tpl: -------------------------------------------------------------------------------- 1 | Delete(ctx context.Context, {{.lowerStartCamelPrimaryKey}} {{.dataType}}) error -------------------------------------------------------------------------------- /tools/go-zero-template/model/interface-find-one-by-field.tpl: -------------------------------------------------------------------------------- 1 | FindOneBy{{.upperField}}(ctx context.Context, {{.in}}) (*{{.upperStartCamelObject}}, error) -------------------------------------------------------------------------------- /tools/go-zero-template/model/interface-find-one.tpl: -------------------------------------------------------------------------------- 1 | FindOne(ctx context.Context, {{.lowerStartCamelPrimaryKey}} {{.dataType}}) (*{{.upperStartCamelObject}}, error) -------------------------------------------------------------------------------- /tools/go-zero-template/model/interface-insert.tpl: -------------------------------------------------------------------------------- 1 | Insert(ctx context.Context, data *{{.upperStartCamelObject}}) (sql.Result,error) -------------------------------------------------------------------------------- /tools/go-zero-template/model/interface-update.tpl: -------------------------------------------------------------------------------- 1 | Update(ctx context.Context, {{if .containsIndexCache}}newData{{else}}data{{end}} *{{.upperStartCamelObject}}) error -------------------------------------------------------------------------------- /tools/go-zero-template/model/model-gen.tpl: -------------------------------------------------------------------------------- 1 | // Code generated by goctl. DO NOT EDIT. 2 | 3 | package {{.pkg}} 4 | {{.imports}} 5 | {{.vars}} 6 | {{.types}} 7 | {{.new}} 8 | {{.delete}} 9 | {{.find}} 10 | {{.insert}} 11 | {{.update}} 12 | {{.extraMethod}} 13 | {{.tableName}} 14 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/model-new.tpl: -------------------------------------------------------------------------------- 1 | func new{{.upperStartCamelObject}}Model(conn sqlx.SqlConn{{if .withCache}}, c cache.CacheConf, opts ...cache.Option{{end}}) *default{{.upperStartCamelObject}}Model { 2 | return &default{{.upperStartCamelObject}}Model{ 3 | {{if .withCache}}CachedConn: sqlc.NewConn(conn, c, opts...){{else}}conn:conn{{end}}, 4 | table: {{.table}}, 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/table-name.tpl: -------------------------------------------------------------------------------- 1 | func (m *default{{.upperStartCamelObject}}Model) tableName() string { 2 | return m.table 3 | } 4 | -------------------------------------------------------------------------------- /tools/go-zero-template/model/tag.tpl: -------------------------------------------------------------------------------- 1 | `db:"{{.field}}"` -------------------------------------------------------------------------------- /tools/go-zero-template/model/types.tpl: -------------------------------------------------------------------------------- 1 | type ( 2 | {{.lowerStartCamelObject}}Model interface{ 3 | {{.method}} 4 | } 5 | 6 | default{{.upperStartCamelObject}}Model struct { 7 | {{if .withCache}}sqlc.CachedConn{{else}}conn sqlx.SqlConn{{end}} 8 | table string 9 | } 10 | 11 | {{.upperStartCamelObject}} struct { 12 | {{.fields}} 13 | } 14 | ) 15 | -------------------------------------------------------------------------------- /tools/go-zero-template/newapi/newtemplate.tpl: -------------------------------------------------------------------------------- 1 | type Request { 2 | Name string `path:"name,options=you|me"` 3 | } 4 | 5 | type Response { 6 | Message string `json:"message"` 7 | } 8 | 9 | service {{.name}}-api { 10 | @handler {{.handler}}Handler 11 | get /from/:name(Request) returns (Response) 12 | } 13 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/call.tpl: -------------------------------------------------------------------------------- 1 | {{.head}} 2 | 3 | package {{.filePackage}} 4 | 5 | import ( 6 | "context" 7 | 8 | {{.pbPackage}} 9 | {{if ne .pbPackage .protoGoPackage}}{{.protoGoPackage}}{{end}} 10 | 11 | "github.com/zeromicro/go-zero/zrpc" 12 | "google.golang.org/grpc" 13 | ) 14 | 15 | type ( 16 | {{.alias}} 17 | 18 | {{.serviceName}} interface { 19 | {{.interface}} 20 | } 21 | 22 | default{{.serviceName}} struct { 23 | cli zrpc.Client 24 | } 25 | ) 26 | 27 | func New{{.serviceName}}(cli zrpc.Client) {{.serviceName}} { 28 | return &default{{.serviceName}}{ 29 | cli: cli, 30 | } 31 | } 32 | 33 | {{.functions}} 34 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/config.tpl: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "github.com/zeromicro/go-zero/zrpc" 4 | 5 | type Config struct { 6 | zrpc.RpcServerConf 7 | } 8 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/etc.tpl: -------------------------------------------------------------------------------- 1 | Name: {{.serviceName}}.rpc 2 | ListenOn: 0.0.0.0:8080 3 | Etcd: 4 | Hosts: 5 | - 127.0.0.1:2379 6 | Key: {{.serviceName}}.rpc 7 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/logic-func.tpl: -------------------------------------------------------------------------------- 1 | {{if .hasComment}}{{.comment}}{{end}} 2 | func (l *{{.logicName}}) {{.method}} ({{if .hasReq}}in {{.request}}{{if .stream}},stream {{.streamBody}}{{end}}{{else}}stream {{.streamBody}}{{end}}) ({{if .hasReply}}{{.response}},{{end}} error) { 3 | // todo: add your logic here and delete this line 4 | 5 | return {{if .hasReply}}&{{.responseType}}{},{{end}} nil 6 | } 7 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/logic.tpl: -------------------------------------------------------------------------------- 1 | package {{.packageName}} 2 | 3 | import ( 4 | "context" 5 | 6 | {{.imports}} 7 | 8 | "github.com/zeromicro/go-zero/core/logx" 9 | ) 10 | 11 | type {{.logicName}} struct { 12 | ctx context.Context 13 | svcCtx *svc.ServiceContext 14 | logx.Logger 15 | } 16 | 17 | func New{{.logicName}}(ctx context.Context,svcCtx *svc.ServiceContext) *{{.logicName}} { 18 | return &{{.logicName}}{ 19 | ctx: ctx, 20 | svcCtx: svcCtx, 21 | Logger: logx.WithContext(ctx), 22 | } 23 | } 24 | {{.functions}} 25 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/server-func.tpl: -------------------------------------------------------------------------------- 1 | 2 | {{if .hasComment}}{{.comment}}{{end}} 3 | func (s *{{.server}}Server) {{.method}} ({{if .notStream}}ctx context.Context,{{if .hasReq}} in {{.request}}{{end}}{{else}}{{if .hasReq}} in {{.request}},{{end}}stream {{.streamBody}}{{end}}) ({{if .notStream}}{{.response}},{{end}}error) { 4 | l := {{.logicPkg}}.New{{.logicName}}({{if .notStream}}ctx,{{else}}stream.Context(),{{end}}s.svcCtx) 5 | return l.{{.method}}({{if .hasReq}}in{{if .stream}} ,stream{{end}}{{else}}{{if .stream}}stream{{end}}{{end}}) 6 | } 7 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/server.tpl: -------------------------------------------------------------------------------- 1 | {{.head}} 2 | 3 | package server 4 | 5 | import ( 6 | {{if .notStream}}"context"{{end}} 7 | 8 | {{.imports}} 9 | ) 10 | 11 | type {{.server}}Server struct { 12 | svcCtx *svc.ServiceContext 13 | {{.unimplementedServer}} 14 | } 15 | 16 | func New{{.server}}Server(svcCtx *svc.ServiceContext) *{{.server}}Server { 17 | return &{{.server}}Server{ 18 | svcCtx: svcCtx, 19 | } 20 | } 21 | 22 | {{.funcs}} 23 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/svc.tpl: -------------------------------------------------------------------------------- 1 | package svc 2 | 3 | import {{.imports}} 4 | 5 | type ServiceContext struct { 6 | Config config.Config 7 | } 8 | 9 | func NewServiceContext(c config.Config) *ServiceContext { 10 | return &ServiceContext{ 11 | Config:c, 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tools/go-zero-template/rpc/template.tpl: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package {{.package}}; 4 | option go_package="./{{.package}}"; 5 | 6 | message Request { 7 | string ping = 1; 8 | } 9 | 10 | message Response { 11 | string pong = 1; 12 | } 13 | 14 | service {{.serviceName}} { 15 | rpc Ping(Request) returns(Response); 16 | } 17 | -------------------------------------------------------------------------------- /tools/hf-model/README.md: -------------------------------------------------------------------------------- 1 | # HF Model 2 | 3 | Code for testing HF Model API. 4 | -------------------------------------------------------------------------------- /tools/hf-model/import_model.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer 2 | 3 | # This meant to download the requested pretrained model from HuggingFace. 4 | # Should be replaced by a git lfs command instead. 5 | tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 6 | -------------------------------------------------------------------------------- /tools/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function print_divider() { 4 | echo "============================" 5 | } 6 | 7 | echo 8 | print_divider 9 | echo "Running golangci-lint ..." 10 | print_divider 11 | golangci-lint run --fix --config=.github/linters/.golangci.yml 12 | 13 | echo 14 | print_divider 15 | echo "Running check_add ..." 16 | print_divider 17 | .github/scripts/check_all.sh 18 | -------------------------------------------------------------------------------- /tools/super_linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# -lt 1 ]]; then 4 | echo "$0 " 5 | exit 1 6 | fi 7 | 8 | target="$1" 9 | tot=$(git rev-parse --show-toplevel) 10 | tmp_lint="/tmp/lint" 11 | 12 | docker run --rm --env-file ${tot}/.github/super_linter.env \ 13 | -e USE_FIND_ALGORITHM=true -e RUN_LOCAL=true \ 14 | -v ${tot}/.github/super_linter.env:${tmp_lint}/.github/super_linter.env \ 15 | -v ${tot}/.github/linters:${tmp_lint}/.github/linters \ 16 | -v ${tot}/.git:${tmp_lint}/.git \ 17 | -v ${tot}/${target}:${tmp_lint}/${target} \ 18 | --workdir ${tmp_lint} \ 19 | registry.ap-southeast-1.aliyuncs.com/sxwl-ai/super-linter:slim-v5 20 | -------------------------------------------------------------------------------- /tools/torch_check_cuda.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # python torch_check_cuda.py 4 | # Prints the info about the GPU 5 | print("Is CUDA available:", torch.cuda.is_available(), 6 | "\nCurrent device:", torch.cuda.current_device(), 7 | "\nDevice[0] name:", torch.cuda.get_device_name(0)) 8 | -------------------------------------------------------------------------------- /ui/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [*.md] 13 | trim_trailing_whitespace = false 14 | 15 | [Makefile] 16 | indent_style = tab 17 | -------------------------------------------------------------------------------- /ui/.eslintignore: -------------------------------------------------------------------------------- 1 | /lambda/ 2 | /scripts 3 | /config 4 | .history 5 | public 6 | dist 7 | .umi 8 | mock -------------------------------------------------------------------------------- /ui/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: [require.resolve('@umijs/lint/dist/config/eslint')], 3 | globals: { 4 | page: true, 5 | REACT_APP_ENV: true, 6 | }, 7 | }; 8 | -------------------------------------------------------------------------------- /ui/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | **/node_modules 5 | # roadhog-api-doc ignore 6 | /src/utils/request-temp.js 7 | _roadhog-api-doc 8 | 9 | # production 10 | /dist 11 | 12 | # misc 13 | .DS_Store 14 | npm-debug.log* 15 | yarn-error.log 16 | 17 | /coverage 18 | .idea 19 | yarn.lock 20 | package-lock.json 21 | *bak 22 | .vscode 23 | 24 | 25 | # visual studio code 26 | .history 27 | *.log 28 | functions/* 29 | .temp/** 30 | 31 | # umi 32 | .umi 33 | .umi-production 34 | .umi-test 35 | 36 | # screenshot 37 | screenshot 38 | .firebase 39 | .eslintcache 40 | 41 | build 42 | pnpm-lock.yaml 43 | -------------------------------------------------------------------------------- /ui/.prettierignore: -------------------------------------------------------------------------------- 1 | **/*.svg 2 | .umi 3 | .umi-production 4 | /dist 5 | .dockerignore 6 | .DS_Store 7 | .eslintignore 8 | *.png 9 | *.toml 10 | docker 11 | .editorconfig 12 | Dockerfile* 13 | .gitignore 14 | .prettierignore 15 | LICENSE 16 | .eslintcache 17 | *.lock 18 | yarn-error.log 19 | .history 20 | CNAME 21 | /build 22 | /public 23 | -------------------------------------------------------------------------------- /ui/.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | singleQuote: true, 3 | trailingComma: 'all', 4 | printWidth: 100, 5 | proseWrap: 'never', 6 | endOfLine: 'lf', 7 | overrides: [ 8 | { 9 | files: '.prettierrc', 10 | options: { 11 | parser: 'json', 12 | }, 13 | }, 14 | { 15 | files: 'document.ejs', 16 | options: { 17 | parser: 'html', 18 | }, 19 | }, 20 | ], 21 | }; 22 | -------------------------------------------------------------------------------- /ui/Dockerfile: -------------------------------------------------------------------------------- 1 | # 阶段 1: 构建 2 | # 使用Node官方镜像作为构建环境 3 | FROM node:18 as build-stage 4 | 5 | # 设置工作目录 6 | WORKDIR /app 7 | 8 | # 复制package.json和yarn.lock文件到工作目录 9 | COPY package.json ./ 10 | 11 | # 安装项目依赖 12 | RUN yarn install 13 | 14 | # 复制剩余的项目文件到工作目录 15 | COPY . . 16 | 17 | # 构建项目 18 | RUN yarn build 19 | 20 | # 阶段 2: 部署 21 | # 使用官方Nginx镜像作为运行环境 22 | FROM nginx:latest as production-stage 23 | 24 | # 将构建好的文件从构建阶段复制到Nginx容器 25 | COPY --from=build-stage /app/dist /usr/share/nginx/html 26 | 27 | # (可选)如果你有自定义的Nginx配置文件,可以用它来替换默认的配置 28 | COPY --from=build-stage /app/nginx.conf /etc/nginx/conf.d/default.conf 29 | 30 | # 暴露容器的80端口 31 | EXPOSE 80 32 | 33 | # 使用Nginx镜像的默认启动命令启动Nginx 34 | CMD ["nginx", "-g", "daemon off;"] 35 | -------------------------------------------------------------------------------- /ui/README.md: -------------------------------------------------------------------------------- 1 | # 算想云 Web UI 2 | 3 | 这里是算想云 Web UI 代码 4 | 5 | ## 国际化配置目录 6 | 7 | src/locales 8 | 9 | ## 编译构建 10 | 11 | \*\*推荐 node 版本:18 及以上 12 | 13 | ``` 14 | # 安装依赖 15 | yarn install 16 | 17 | # 启动服务 localhost:8000 18 | yarn start 19 | 20 | # 打包项目 21 | # 不管是将项目部署到 nginx 还是其他服务器,都需要先将项目打包 22 | 23 | yarn build 24 | 25 | # 打包完成后会在根目录生成 dist 文件夹,我们需要将他上传到服务器中, 26 | # 举例上传到/home/aiadmin中 27 | # 在 nginx/conf/nginx.conf 添加配置 28 | server { 29 | listen 80; 30 | server_name 域名; 31 | location / { 32 | root /home/aiadmin/dist; #dist上传的路径 33 | index index.html; 34 | } 35 | } 36 | 37 | #重启Nginx 38 | systemctl restart nginx 39 | 40 | 重启 nginx 后,访问你的域名 41 | ``` 42 | -------------------------------------------------------------------------------- /ui/jest.config.ts: -------------------------------------------------------------------------------- 1 | import { configUmiAlias, createConfig } from '@umijs/max/test'; 2 | 3 | export default async () => { 4 | const config = await configUmiAlias({ 5 | ...createConfig({ 6 | target: 'browser', 7 | }), 8 | }); 9 | console.log(JSON.stringify(config)); 10 | 11 | return { 12 | ...config, 13 | testEnvironmentOptions: { 14 | ...(config?.testEnvironmentOptions || {}), 15 | url: 'http://localhost:8000', 16 | }, 17 | setupFiles: [...(config.setupFiles || []), './tests/setupTests.jsx'], 18 | globals: { 19 | ...config.globals, 20 | localStorage: null, 21 | }, 22 | }; 23 | }; 24 | -------------------------------------------------------------------------------- /ui/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "jsx": "react-jsx", 4 | "emitDecoratorMetadata": true, 5 | "experimentalDecorators": true, 6 | "baseUrl": ".", 7 | "paths": { 8 | "@/*": ["./src/*"] 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /ui/pkg/sumdb/sum.golang.org/latest: -------------------------------------------------------------------------------- 1 | go.sum database tree 2 | 30427680 3 | Nqi9bSYvzcucRDlafkhGSpCqd0QP5sTd8cd4KPcGYuw= 4 | 5 | — sum.golang.org Az3grmZWaBQsrDbMWFXJYGat9aXIvSfehEXKLoKTD6lY9ztfmwYYo783EEPEqS0+r55tbjxFrWVuQh6SizAQYhIYaAc= 6 | -------------------------------------------------------------------------------- /ui/public/CNAME: -------------------------------------------------------------------------------- 1 | preview.pro.ant.design -------------------------------------------------------------------------------- /ui/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/public/favicon.ico -------------------------------------------------------------------------------- /ui/public/icons/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/public/icons/icon.ico -------------------------------------------------------------------------------- /ui/src/access.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @see https://umijs.org/docs/max/access#access 3 | * */ 4 | export default function access(initialState: { currentUser?: API.CurrentUser } | undefined) { 5 | const { currentUser } = initialState ?? {}; 6 | 7 | return { 8 | adminRouteFilter: currentUser && currentUser?.isAdmin, 9 | }; 10 | } 11 | -------------------------------------------------------------------------------- /ui/src/components/Footer/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | const Footer: React.FC = () => { 4 | return <>; 5 | }; 6 | 7 | export default Footer; 8 | -------------------------------------------------------------------------------- /ui/src/components/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 这个文件作为组件的目录 3 | * 目的是统一管理对外输出的组件,方便分类 4 | */ 5 | /** 6 | * 布局组件 7 | */ 8 | import Footer from './Footer'; 9 | import { Question, SelectLang } from './RightContent'; 10 | import { AvatarDropdown, AvatarName } from './RightContent/AvatarDropdown'; 11 | 12 | export { Footer, Question, SelectLang, AvatarDropdown, AvatarName }; 13 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/adapter.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 适配器 国际化 3 | */ 4 | export default { 5 | 'pages.adapter.tabs.title.public': 'Public Adapters', 6 | 'pages.adapter.tabs.title.user': 'User Adapters', 7 | 'pages.adapter.table.column.name': 'Adapter Name', 8 | 'pages.adapter.table.column.size': 'Adapter Size', 9 | }; 10 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/applicationMenu.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 大模型应用 3 | */ 4 | export default { 5 | 'pages.applicationMenu.drawer.title.deployApp': 'Deploy Application', 6 | 'pages.applicationMenu.appCard.status': 'Status', 7 | 'pages.applicationMenu.appCard.access': 'Access Knowledge Base', 8 | 'pages.applicationMenu.appCard.deployment': 'Deployment', 9 | 'pages.applicationMenu.appCard.onDeployment': 'Deploying', 10 | 'pages.applicationMenu.appAddForm.form.app_name': 'Application Name', 11 | 'pages.applicationMenu.appAddForm.form.inference_select': 'Inference Instance', 12 | 'pages.applicationMenu.appAddForm.form.inference_select_placeholder': 'Please choice inference instance', 13 | }; 14 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/dataset.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 数据集 国际化 3 | */ 4 | export default { 5 | 'pages.dataset.tabs.title.public': 'Public Dataset', 6 | 'pages.dataset.tabs.title.user': 'User Dataset', 7 | 'pages.dataset.table.column.name': 'Dataset Name', 8 | 'pages.dataset.table.column.size': 'Dataset Size', 9 | 'pages.dataset.table.column.desc': 'Dataset Description', 10 | 'pages.dataset.table.column.samples': 'Total Samples', 11 | 'pages.dataset.table.column.actions': 'Actions', 12 | 'pages.dataset.table.action.preview': 'Preview', 13 | 'pages.dataset.preview.title': 'Dataset Preview', 14 | 'pages.dataset.preview.title.with.count': 'Dataset Preview (First 5 Records)', 15 | }; 16 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/jobDetail.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | 'jobDetail.title': 'Job Details', 3 | 'jobDetail.description': 'This is the job details page.', 4 | 'pages.jobDetail.table.column.jobId': 'Job ID', 5 | 'pages.jobDetail.table.column.modelName': 'Model', 6 | 'pages.jobDetail.table.column.jobType': 'Type', 7 | 'pages.jobDetail.table.column.gpuModel': 'GPU Model', 8 | 'pages.jobDetail.table.column.gpuCount': 'GPU Count', 9 | 'pages.jobDetail.table.column.status': 'Status', 10 | 'pages.jobDetail.table.column.startTime': 'Start Time', 11 | 'pages.jobDetail.table.column.endTime': 'End Time', 12 | 'pages.jobDetail.table.column.action': 'Action', 13 | // 添加其他需要的国际化 ID 14 | }; 15 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/oem.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * oem 相关配置 3 | */ 4 | export default { 5 | 'app.title': 'SXWL.AI', 6 | 'app.logo': 'https://sxwl.ai/img/favicon.ico', 7 | 'pages.login.title': 'SXWL.AI', 8 | 'pages.login.servicePhone': 'Service phone:15910236560', 9 | 'pages.login.serviceEmail': 'Service email:help@sxwl.ai', 10 | }; 11 | -------------------------------------------------------------------------------- /ui/src/locales/en-US/playground.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | 'playground.copy.success': 'Code copied to clipboard', 3 | 'playground.copy.failed': 'Failed to copy', 4 | 'playground.copy.button': 'Copy', 5 | }; -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/adapter.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 适配器 国际化 3 | */ 4 | export default { 5 | 'pages.adapter.tabs.title.public': '公共适配器', 6 | 'pages.adapter.tabs.title.user': '私有适配器', 7 | 'pages.adapter.table.column.name': '适配器名称', 8 | 'pages.adapter.table.column.size': '适配器体积', 9 | }; 10 | -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/applicationMenu.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 大模型应用 3 | */ 4 | export default { 5 | 'pages.applicationMenu.darwer.title.deployApp': '部署应用', 6 | 'pages.applicationMenu.appCard.status': '状态', 7 | 'pages.applicationMenu.appCard.access': '访问知识库', 8 | 'pages.applicationMenu.appCard.deployment': '部署', 9 | 'pages.applicationMenu.appCard.onDeployment': '部署中', 10 | 'pages.applicationMenu.appAddForm.form.app_name': '应用名称', 11 | 'pages.applicationMenu.appAddForm.form.inference_select': '推理实例', 12 | 'pages.applicationMenu.appAddForm.form.inference_select_placeholder': '请选择推理服务实例', 13 | }; 14 | -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/dataset.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 数据集 国际化 3 | */ 4 | export default { 5 | 'pages.dataset.tabs.title.public': '公共数据集', 6 | 'pages.dataset.tabs.title.user': '私有数据集', 7 | 'pages.dataset.table.column.name': '数据集名称', 8 | 'pages.dataset.table.column.size': '数据集大小', 9 | 'pages.dataset.table.column.desc': '数据集说明', 10 | 'pages.dataset.table.column.samples': '样本总数', 11 | 'pages.dataset.table.column.actions': '操作', 12 | 'pages.dataset.table.action.preview': '预览', 13 | 'pages.dataset.preview.title': '数据集预览', 14 | 'pages.dataset.preview.title.with.count': '数据集预览(前5条记录)', 15 | }; 16 | -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/jobDetail.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | 'jobDetail.title': '任务详情', 3 | 'jobDetail.description': '这是任务的详细信息页面。', 4 | 'pages.jobDetail.table.column.jobId': '任务ID', 5 | 'pages.jobDetail.table.column.modelName': '模型', 6 | 'pages.jobDetail.table.column.jobType': '类型', 7 | 'pages.jobDetail.table.column.gpuModel': 'GPU型号', 8 | 'pages.jobDetail.table.column.gpuCount': 'GPU数量', 9 | 'pages.jobDetail.table.column.status': '状态', 10 | 'pages.jobDetail.table.column.startTime': '启动时间', 11 | 'pages.jobDetail.table.column.endTime': '终止时间', 12 | 'pages.jobDetail.table.column.action': '操作', 13 | // 添加其他需要的国际化 ID 14 | }; 15 | -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/oem.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * oem 相关配置 3 | */ 4 | 5 | export default { 6 | 'app.title': '算想云', 7 | 'app.logo': 'https://sxwl.ai/img/favicon.ico', 8 | 'pages.login.title': '算想云', 9 | 'pages.login.servicePhone': '客服电话:15910236560', 10 | 'pages.login.serviceEmail': '客服邮箱:help@sxwl.ai', 11 | }; 12 | -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/playground.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | 'playground.copy.success': '代码已复制到剪贴板', 3 | 'playground.copy.failed': '复制失败', 4 | 'playground.copy.button': '复制', 5 | }; -------------------------------------------------------------------------------- /ui/src/locales/zh-CN/userQuota.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * 用户配额 国际化 3 | */ 4 | export default { 5 | 'pages.userQuota.table.column.name': '用户', 6 | 'pages.userQuota.table.column.role': '资源类型', 7 | 'pages.userQuota.table.column.gpu_product': '资源配额', 8 | 'pages.userQuota.table.column.action': '操作', 9 | 'pages.userQuota.edit.form.success': '操作成功', 10 | 'pages.userQuota.edit.form.name': '用户', 11 | 'pages.userQuota.edit.form.role': '资源类型', 12 | 'pages.userQuota.edit.form.gpu_product': '资源配额', 13 | 'pages.userQuota.edit.form.confirm': '确定', 14 | 'pages.userQuota.edit.form.cancel': '放弃', 15 | 'pages.userQuota.add.form.title': '添加配额', 16 | 'pages.userQuota.edit.form.title': '修改配额', 17 | }; 18 | -------------------------------------------------------------------------------- /ui/src/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "算想云", 3 | "short_name": "算想未来-算想云", 4 | "display": "standalone", 5 | "start_url": "./?utm_source=homescreen", 6 | "theme_color": "#002140", 7 | "background_color": "#001529", 8 | "icons": [ 9 | { 10 | "src": "icons/icon-192x192.png", 11 | "sizes": "192x192" 12 | }, 13 | { 14 | "src": "icons/icon-128x128.png", 15 | "sizes": "128x128" 16 | }, 17 | { 18 | "src": "icons/icon-512x512.png", 19 | "sizes": "512x512" 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /ui/src/pages/404.tsx: -------------------------------------------------------------------------------- 1 | import { history } from '@umijs/max'; 2 | import { Button, Result } from 'antd'; 3 | import React from 'react'; 4 | 5 | const NoFoundPage: React.FC = () => ( 6 | history.push('/')}> 12 | 返回首页 13 | 14 | } 15 | /> 16 | ); 17 | 18 | export default NoFoundPage; 19 | -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/ChatContainer/index.less: -------------------------------------------------------------------------------- 1 | .messageContainer { 2 | max-width: 760px; 3 | height: calc(100vh - 90px); 4 | padding: 20px 16px 0 16px; 5 | overflow-y: auto; 6 | margin: auto; 7 | &::-webkit-scrollbar { 8 | display: none; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/MessageInput/assets/ic_delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/src/pages/ChatTrial/MessageInput/assets/ic_delete.png -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/MessageInput/assets/send-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/src/pages/ChatTrial/MessageInput/assets/send-icon.png -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/MessageItem/assets/assistant-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/src/pages/ChatTrial/MessageItem/assets/assistant-avatar.png -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/MessageItem/assets/user-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NascentCore/3k/e48caff664ad721a9b4cc47e55646b6f59818fe1/ui/src/pages/ChatTrial/MessageItem/assets/user-avatar.png -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/MessageItem/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import RobotMessageItem from './RobotMessageItem'; 3 | import UserMessageItem from './UserMessageItem'; 4 | import { IChatItemMsg } from '@/models/chat-h5-model'; 5 | 6 | interface IProps { 7 | messageItem: IChatItemMsg; 8 | } 9 | 10 | const Index: React.FC = ({ messageItem }) => { 11 | const MessageComponent = messageItem.role === 'user' ? UserMessageItem : RobotMessageItem; 12 | 13 | return ; 14 | }; 15 | 16 | export default Index; 17 | -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/index.less: -------------------------------------------------------------------------------- 1 | .container { 2 | background-color: #1e1e1e; 3 | width: 100%; 4 | height: 100vh; 5 | } 6 | -------------------------------------------------------------------------------- /ui/src/pages/ChatTrial/utils/interface.ts: -------------------------------------------------------------------------------- 1 | export enum SubmitKey { 2 | Enter = 'Enter', 3 | CtrlEnter = 'Ctrl + Enter', 4 | ShiftEnter = 'Shift + Enter', 5 | AltEnter = 'Alt + Enter', 6 | MetaEnter = 'Meta + Enter', 7 | } 8 | -------------------------------------------------------------------------------- /ui/src/pages/Grafana/index.tsx: -------------------------------------------------------------------------------- 1 | import { PageContainer } from '@ant-design/pro-components'; 2 | import React, { useEffect } from 'react'; 3 | 4 | const Admin: React.FC = () => { 5 | useEffect(() => { 6 | (document as any).querySelector('main.ant-layout-content').style.padding = 0; 7 | }, []); 8 | const url = `${window.location.protocol}//${window.location.hostname}:30006/d/Oxed_c6Wz/nvidia-dcgm-exporter-dashboard?orgId=1`; 9 | return ( 10 |
11 |