├── runtime ├── datamate-python │ ├── app │ │ ├── __init__.py │ │ ├── db │ │ │ └── __init__.py │ │ ├── module │ │ │ ├── ratio │ │ │ │ ├── __init__.py │ │ │ │ ├── schema │ │ │ │ │ └── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ └── interface │ │ │ │ │ └── __init__.py │ │ │ ├── shared │ │ │ │ ├── __init__.py │ │ │ │ ├── util │ │ │ │ │ └── __init__.py │ │ │ │ ├── common │ │ │ │ │ └── __init__.py │ │ │ │ └── schema │ │ │ │ │ └── __init__.py │ │ │ ├── system │ │ │ │ ├── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ ├── schema │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── health.py │ │ │ │ └── interface │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── about.py │ │ │ ├── annotation │ │ │ │ ├── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ ├── client │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── labelstudio │ │ │ │ │ │ └── __init__.py │ │ │ │ ├── config │ │ │ │ │ └── __init__.py │ │ │ │ ├── utils │ │ │ │ │ └── __init__.py │ │ │ │ ├── interface │ │ │ │ │ └── __init__.py │ │ │ │ └── schema │ │ │ │ │ └── tag.py │ │ │ ├── evaluation │ │ │ │ ├── __init__.py │ │ │ │ ├── schema │ │ │ │ │ └── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ └── interface │ │ │ │ │ └── __init__.py │ │ │ ├── generation │ │ │ │ ├── __init__.py │ │ │ │ ├── schema │ │ │ │ │ └── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ └── interface │ │ │ │ │ └── __init__.py │ │ │ ├── dataset │ │ │ │ ├── __init__.py │ │ │ │ ├── service │ │ │ │ │ └── __init__.py │ │ │ │ └── schema │ │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ └── core │ │ │ ├── __init__.py │ │ │ └── exception.py │ ├── .gitignore │ ├── uvicorn_start.sh │ └── .env.example ├── python-executor │ └── datamate │ │ ├── common │ │ └── __init__.py │ │ ├── core │ │ ├── __init__.py │ │ └── constant.py │ │ ├── sql_manager │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── wrappers │ │ ├── __init__.py │ │ ├── data_juicer_wrapper.py │ │ └── datamate_wrapper.py │ │ ├── scheduler │ │ └── __init__.py │ │ └── ops │ │ └── __init__.py ├── datax │ ├── nfsreader │ │ └── src │ │ │ └── main │ │ │ └── resources │ │ │ ├── plugin_job_template.json │ │ │ └── plugin.json │ ├── nfswriter │ │ └── src │ │ │ └── main │ │ │ └── resources │ │ │ ├── plugin_job_template.json │ │ │ └── plugin.json │ ├── obsreader │ │ └── src │ │ │ └── main │ │ │ └── resources │ │ │ ├── plugin.json │ │ │ └── plugin_job_template.json │ └── obswriter │ │ └── src │ │ └── main │ │ └── resources │ │ ├── plugin.json │ │ └── plugin_job_template.json └── ops │ ├── mapper │ ├── pii_ner_detection │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_resize │ │ └── __init__.py │ ├── img_denoise │ │ ├── __init__.py │ │ └── metadata.yml │ ├── text_to_word │ │ ├── __init__.py │ │ └── metadata.yml │ ├── emoji_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_type_unify │ │ └── __init__.py │ ├── content_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── email_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── legend_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── url_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── xml_tag_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── html_tag_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_shadow_remove │ │ ├── __init__.py │ │ └── metadata.yml │ ├── extra_space_cleaner │ │ ├── __init__.py │ │ ├── resources │ │ │ └── special_token.txt │ │ └── metadata.yml │ ├── id_number_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_enhanced_contrast │ │ ├── __init__.py │ │ └── metadata.yml │ ├── ip_address_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_enhanced_brightness │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_enhanced_saturation │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_enhanced_sharpness │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_direction_correct │ │ ├── __init__.py │ │ └── metadata.yml │ ├── phone_number_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── political_word_cleaner │ │ ├── __init__.py │ │ ├── resources │ │ │ └── special_symbols.txt │ │ └── metadata.yml │ ├── traditional_chinese │ │ ├── __init__.py │ │ └── metadata.yml │ ├── unicode_space_cleaner │ │ └── __init__.py │ ├── knowledge_relation_slice │ │ ├── __init__.py │ │ └── metadata.yml │ ├── garble_characters_cleaner │ │ ├── __init__.py │ │ ├── metadata.yml │ │ └── resources │ │ │ └── charset.json │ ├── credit_card_number_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── remove_duplicate_sentences │ │ ├── __init__.py │ │ └── metadata.yml │ ├── full_width_characters_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_perspective_transformation │ │ ├── __init__.py │ │ └── metadata.yml │ ├── invisible_characters_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ └── sexual_and_violent_word_cleaner │ │ ├── __init__.py │ │ ├── resources │ │ └── special_symbols.txt │ │ └── metadata.yml │ ├── examples │ └── test_operator │ │ ├── __init__.py │ │ └── process.py │ ├── slicer │ ├── segmentation │ │ ├── __init__.py │ │ └── metadata.yml │ ├── slide_simple_slicer │ │ └── __init__.py │ ├── slide_annotation_slicer │ │ ├── __init__.py │ │ └── metadata.yml │ └── __init__.py │ ├── formatter │ ├── slide_formatter │ │ ├── __init__.py │ │ └── metadata.yml │ ├── mineru_formatter │ │ └── __init__.py │ └── __init__.py │ ├── filter │ ├── remove_duplicate_file │ │ ├── __init__.py │ │ ├── metadata.yml │ │ └── sql │ │ │ └── sql_config.json │ ├── img_blurred_images_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── img_similar_images_cleaner │ │ ├── __init__.py │ │ ├── metadata.yml │ │ └── sql │ │ │ └── sql_config.json │ ├── img_duplicated_images_cleaner │ │ ├── __init__.py │ │ ├── metadata.yml │ │ └── sql │ │ │ └── sql_config.json │ ├── img_advertisement_images_cleaner │ │ ├── __init__.py │ │ └── metadata.yml │ ├── remove_file_with_short_or_long_length │ │ └── __init__.py │ ├── file_with_high_repeat_word_rate_filter │ │ ├── __init__.py │ │ └── metadata.yml │ ├── remove_file_with_many_sensitive_words │ │ ├── __init__.py │ │ ├── resources │ │ │ └── special_symbols.txt │ │ └── metadata.yml │ ├── file_with_high_repeat_phrase_rate_filter │ │ └── __init__.py │ └── file_with_high_special_char_rate_filter │ │ ├── __init__.py │ │ ├── resources │ │ └── special_token.txt │ │ └── metadata.yml │ └── llms │ ├── text_quality_evaluation │ ├── __init__.py │ ├── resources │ │ └── template.txt │ └── metadata.yml │ ├── qa_condition_evaluator │ ├── __init__.py │ ├── resources │ │ └── template.txt │ └── metadata.yml │ └── __init__.py ├── frontend ├── src │ ├── pages │ │ ├── DataAnnotation │ │ │ ├── Detail │ │ │ │ └── TaskDetail.tsx │ │ │ └── Template │ │ │ │ ├── components │ │ │ │ └── index.ts │ │ │ │ └── index.ts │ │ ├── OperatorMarket │ │ │ ├── operator.const.tsx │ │ │ └── Detail │ │ │ │ └── components │ │ │ │ └── Documentation.tsx │ │ ├── RatioTask │ │ │ └── ratio.api.ts │ │ └── Layout │ │ │ └── MainLayout.tsx │ ├── vite-env.d.ts │ ├── store │ │ ├── hooks.ts │ │ ├── index.ts │ │ └── slices │ │ │ └── settingsSlice.ts │ ├── mock │ │ ├── mock-middleware │ │ │ ├── index.cjs │ │ │ ├── error-handle-middleware.cjs │ │ │ ├── strong-match-middleware.cjs │ │ │ └── send-json-middleawre.cjs │ │ └── nodemon.json │ └── hooks │ │ ├── useDebouncedEffect.ts │ │ ├── useSearchParams.tsx │ │ └── useStyle.ts ├── public │ └── huawei-logo.webp ├── tsconfig.json ├── .editorconfig ├── .gitignore └── index.html ├── scripts ├── db │ ├── 00-database-init.sql │ └── data-common-init.sql └── images │ ├── gateway │ └── start.sh │ ├── backend │ └── start.sh │ ├── runtime │ └── start.sh │ ├── database │ ├── Dockerfile │ └── utf8.cnf │ └── frontend │ └── Dockerfile ├── deployment ├── helm │ ├── milvus │ │ ├── charts │ │ │ ├── pulsar │ │ │ │ ├── charts │ │ │ │ │ └── kube-prometheus-stack │ │ │ │ │ │ ├── charts │ │ │ │ │ │ ├── grafana │ │ │ │ │ │ │ ├── dashboards │ │ │ │ │ │ │ │ └── custom-dashboard.json │ │ │ │ │ │ │ ├── ci │ │ │ │ │ │ │ │ ├── with-persistence.yaml │ │ │ │ │ │ │ │ ├── default-values.yaml │ │ │ │ │ │ │ │ ├── with-extraconfigmapmounts-values.yaml │ │ │ │ │ │ │ │ ├── with-image-renderer-values.yaml │ │ │ │ │ │ │ │ └── with-dashboard-values.yaml │ │ │ │ │ │ │ ├── templates │ │ │ │ │ │ │ │ ├── extra-manifests.yaml │ │ │ │ │ │ │ │ ├── secret-env.yaml │ │ │ │ │ │ │ │ ├── configmap.yaml │ │ │ │ │ │ │ │ ├── tests │ │ │ │ │ │ │ │ │ └── test-serviceaccount.yaml │ │ │ │ │ │ │ │ ├── configmap-dashboard-provider.yaml │ │ │ │ │ │ │ │ └── serviceaccount.yaml │ │ │ │ │ │ │ └── .helmignore │ │ │ │ │ │ ├── crds │ │ │ │ │ │ │ ├── Chart.yaml │ │ │ │ │ │ │ └── README.md │ │ │ │ │ │ ├── prometheus-node-exporter │ │ │ │ │ │ │ ├── ci │ │ │ │ │ │ │ │ └── port-values.yaml │ │ │ │ │ │ │ ├── templates │ │ │ │ │ │ │ │ ├── extra-manifests.yaml │ │ │ │ │ │ │ │ ├── endpoints.yaml │ │ │ │ │ │ │ │ └── psp-clusterrole.yaml │ │ │ │ │ │ │ └── .helmignore │ │ │ │ │ │ ├── kube-state-metrics │ │ │ │ │ │ │ ├── templates │ │ │ │ │ │ │ │ ├── extra-manifests.yaml │ │ │ │ │ │ │ │ ├── kubeconfig-secret.yaml │ │ │ │ │ │ │ │ └── crs-configmap.yaml │ │ │ │ │ │ │ └── .helmignore │ │ │ │ │ │ └── prometheus-windows-exporter │ │ │ │ │ │ │ ├── .helmignore │ │ │ │ │ │ │ ├── templates │ │ │ │ │ │ │ └── config.yaml │ │ │ │ │ │ │ └── Chart.yaml │ │ │ │ │ │ ├── templates │ │ │ │ │ │ ├── extra-objects.yaml │ │ │ │ │ │ ├── NOTES.txt │ │ │ │ │ │ ├── prometheus-operator │ │ │ │ │ │ │ ├── admission-webhooks │ │ │ │ │ │ │ │ └── _prometheus-operator-webhook.tpl │ │ │ │ │ │ │ ├── _prometheus-operator.tpl │ │ │ │ │ │ │ └── serviceaccount.yaml │ │ │ │ │ │ └── prometheus │ │ │ │ │ │ │ └── csi-secret.yaml │ │ │ │ │ │ └── .helmignore │ │ │ │ ├── NOTICE │ │ │ │ ├── Chart.lock │ │ │ │ └── Chart.yaml │ │ │ ├── etcd │ │ │ │ ├── templates │ │ │ │ │ ├── extra-list.yaml │ │ │ │ │ └── token-secrets.yaml │ │ │ │ ├── charts │ │ │ │ │ └── common │ │ │ │ │ │ ├── values.yaml │ │ │ │ │ │ ├── .helmignore │ │ │ │ │ │ ├── templates │ │ │ │ │ │ ├── _tplvalues.tpl │ │ │ │ │ │ └── _warnings.tpl │ │ │ │ │ │ └── Chart.yaml │ │ │ │ ├── Chart.lock │ │ │ │ └── .helmignore │ │ │ ├── kafka │ │ │ │ ├── templates │ │ │ │ │ └── extra-list.yaml │ │ │ │ ├── charts │ │ │ │ │ ├── common │ │ │ │ │ │ ├── values.yaml │ │ │ │ │ │ ├── .helmignore │ │ │ │ │ │ ├── templates │ │ │ │ │ │ │ ├── _tplvalues.tpl │ │ │ │ │ │ │ └── _warnings.tpl │ │ │ │ │ │ └── Chart.yaml │ │ │ │ │ └── zookeeper │ │ │ │ │ │ ├── templates │ │ │ │ │ │ └── extra-list.yaml │ │ │ │ │ │ ├── charts │ │ │ │ │ │ └── common │ │ │ │ │ │ │ ├── values.yaml │ │ │ │ │ │ │ ├── .helmignore │ │ │ │ │ │ │ └── templates │ │ │ │ │ │ │ ├── _tplvalues.tpl │ │ │ │ │ │ │ └── _warnings.tpl │ │ │ │ │ │ ├── Chart.lock │ │ │ │ │ │ ├── .helmignore │ │ │ │ │ │ └── ci │ │ │ │ │ │ └── values-with-auth-tls-and-metrics.yaml │ │ │ │ ├── ci │ │ │ │ │ ├── values-with-tls-pem-auth.yaml │ │ │ │ │ ├── values-with-metrics.yaml │ │ │ │ │ ├── values-with-tls-jks-auth.yaml │ │ │ │ │ └── values-with-external-tls.yaml │ │ │ │ ├── Chart.lock │ │ │ │ └── .helmignore │ │ │ ├── tei │ │ │ │ ├── ci │ │ │ │ │ └── tei-mininum-values.yaml │ │ │ │ ├── templates │ │ │ │ │ ├── headless-service.yaml │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ │ ├── .helmignore │ │ │ │ └── Chart.yaml │ │ │ ├── pulsarv2 │ │ │ │ ├── Chart.yaml │ │ │ │ └── .helmignore │ │ │ └── minio │ │ │ │ ├── templates │ │ │ │ ├── serviceaccount.yaml │ │ │ │ ├── configmap.yaml │ │ │ │ ├── poddisruptionbudget.yaml │ │ │ │ └── post-install-prometheus-metrics-serviceaccount.yaml │ │ │ │ ├── Chart.yaml │ │ │ │ └── .helmignore │ │ ├── .helmignore │ │ ├── templates │ │ │ ├── proxy-tls-secret.yaml │ │ │ ├── configmap.yaml │ │ │ ├── serviceaccount.yaml │ │ │ ├── datanode-svc.yaml │ │ │ └── querynode-svc.yaml │ │ └── Chart.yaml │ ├── deer-flow │ │ ├── charts │ │ │ ├── public │ │ │ │ ├── values.yaml │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ └── secret.yaml │ │ │ ├── backend │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ └── frontend │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ ├── serviceaccount.yaml │ │ │ │ └── service.yaml │ │ └── .helmignore │ ├── datamate │ │ ├── charts │ │ │ ├── ray-cluster │ │ │ │ ├── Chart.yaml │ │ │ │ └── templates │ │ │ │ │ └── service.yaml │ │ │ ├── kuberay-operator │ │ │ │ ├── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ ├── role.yaml │ │ │ │ │ ├── service.yaml │ │ │ │ │ ├── ray_rayjob_viewer_role.yaml │ │ │ │ │ ├── ray_rayservice_viewer_role.yaml │ │ │ │ │ ├── multiple_namespaces_role.yaml │ │ │ │ │ ├── rolebinding.yaml │ │ │ │ │ └── leader_election_role_binding.yaml │ │ │ │ └── Chart.yaml │ │ │ ├── backend │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ ├── database │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ ├── frontend │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ ├── gateway │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ ├── public │ │ │ │ └── .helmignore │ │ │ ├── runtime │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ │ ├── serviceaccount.yaml │ │ │ │ │ └── service.yaml │ │ │ └── backend-python │ │ │ │ ├── .helmignore │ │ │ │ └── templates │ │ │ │ ├── serviceaccount.yaml │ │ │ │ └── service.yaml │ │ └── .helmignore │ └── label-studio │ │ └── Chart.yaml └── docker │ └── datamate │ └── backend.conf ├── backend ├── services │ ├── data-collection-service │ │ ├── image.png │ │ ├── image1.png │ │ ├── image2.png │ │ ├── image3.png │ │ └── src │ │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── datamate │ │ │ └── collection │ │ │ ├── infrastructure │ │ │ └── datax │ │ │ │ └── config │ │ │ │ └── BaseConfig.java │ │ │ ├── common │ │ │ └── enums │ │ │ │ ├── TemplateType.java │ │ │ │ ├── SyncMode.java │ │ │ │ └── TaskStatus.java │ │ │ ├── domain │ │ │ ├── process │ │ │ │ └── ProcessRunner.java │ │ │ └── repository │ │ │ │ └── CollectionTaskRepository.java │ │ │ └── interfaces │ │ │ └── dto │ │ │ └── CollectionTaskPagingQuery.java │ ├── data-management-service │ │ └── src │ │ │ └── main │ │ │ ├── java │ │ │ └── com │ │ │ │ └── datamate │ │ │ │ └── datamanagement │ │ │ │ ├── common │ │ │ │ └── enums │ │ │ │ │ └── DuplicateMethod.java │ │ │ │ ├── domain │ │ │ │ ├── contants │ │ │ │ │ └── DatasetConstant.java │ │ │ │ └── model │ │ │ │ │ └── dataset │ │ │ │ │ └── DatasetFileUploadCheckInfo.java │ │ │ │ ├── interfaces │ │ │ │ └── dto │ │ │ │ │ ├── CopyFilesRequest.java │ │ │ │ │ ├── CreateTagRequest.java │ │ │ │ │ ├── UpdateTagRequest.java │ │ │ │ │ ├── AllDatasetStatisticsResponse.java │ │ │ │ │ ├── AddFilesRequest.java │ │ │ │ │ ├── TagResponse.java │ │ │ │ │ ├── UploadFilesPreRequest.java │ │ │ │ │ └── DatasetTypeResponse.java │ │ │ │ └── infrastructure │ │ │ │ └── client │ │ │ │ └── dto │ │ │ │ └── LocalCollectionConfig.java │ │ │ └── resources │ │ │ └── config │ │ │ └── application-datamanagement.yml │ ├── operator-market-service │ │ └── src │ │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── datamate │ │ │ └── operator │ │ │ ├── interfaces │ │ │ └── dto │ │ │ │ ├── LabelDto.java │ │ │ │ ├── CategoryRelationDto.java │ │ │ │ ├── CategoryDto.java │ │ │ │ ├── CategoryTreeResponse.java │ │ │ │ └── OperatorsListPostRequest.java │ │ │ ├── infrastructure │ │ │ ├── persistence │ │ │ │ └── mapper │ │ │ │ │ ├── CategoryMapper.java │ │ │ │ │ └── CategoryRelationMapper.java │ │ │ └── converter │ │ │ │ ├── CategoryConverter.java │ │ │ │ └── CategoryRelationConverter.java │ │ │ └── domain │ │ │ ├── repository │ │ │ └── CategoryRepository.java │ │ │ └── model │ │ │ ├── CategoryRelation.java │ │ │ └── Category.java │ ├── data-cleaning-service │ │ └── src │ │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── datamate │ │ │ └── cleaning │ │ │ ├── interfaces │ │ │ └── dto │ │ │ │ ├── CleaningTaskLog.java │ │ │ │ ├── CreateCleaningTemplateRequest.java │ │ │ │ ├── UpdateCleaningTemplateRequest.java │ │ │ │ ├── OperatorInstanceDto.java │ │ │ │ └── CleaningResultDto.java │ │ │ ├── infrastructure │ │ │ ├── persistence │ │ │ │ └── mapper │ │ │ │ │ ├── CleaningTaskMapper.java │ │ │ │ │ └── CleaningResultMapper.java │ │ │ └── converter │ │ │ │ ├── CleaningResultConverter.java │ │ │ │ └── CleaningTemplateConverter.java │ │ │ ├── domain │ │ │ └── model │ │ │ │ ├── entity │ │ │ │ ├── OperatorInstance.java │ │ │ │ └── TemplateWithInstance.java │ │ │ │ └── TaskProcess.java │ │ │ ├── common │ │ │ └── exception │ │ │ │ └── CleanErrorCode.java │ │ │ └── DataCleaningServiceConfiguration.java │ ├── rag-indexer-service │ │ └── src │ │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── datamate │ │ │ └── rag │ │ │ └── indexer │ │ │ ├── domain │ │ │ └── model │ │ │ │ ├── RagChunk.java │ │ │ │ └── FileStatus.java │ │ │ ├── infrastructure │ │ │ ├── event │ │ │ │ └── DataInsertedEvent.java │ │ │ └── persistence │ │ │ │ └── mapper │ │ │ │ ├── RagFileMapper.java │ │ │ │ └── KnowledgeBaseMapper.java │ │ │ └── interfaces │ │ │ └── dto │ │ │ ├── DeleteFilesReq.java │ │ │ ├── RagFileReq.java │ │ │ ├── RetrieveReq.java │ │ │ ├── KnowledgeBaseResp.java │ │ │ ├── ProcessType.java │ │ │ ├── AddFilesReq.java │ │ │ └── KnowledgeBaseQueryReq.java │ └── main-application │ │ └── src │ │ └── main │ │ └── resources │ │ └── config │ │ └── application-datamanagement.yml └── shared │ └── domain-common │ └── src │ └── main │ └── java │ └── com │ └── datamate │ └── common │ ├── setting │ ├── interfaces │ │ └── rest │ │ │ └── dto │ │ │ └── ParamRequest.java │ ├── domain │ │ ├── entity │ │ │ └── ModelType.java │ │ └── repository │ │ │ └── SysParamRepository.java │ └── infrastructure │ │ └── persistence │ │ └── mapper │ │ ├── SysParamMapper.java │ │ └── ModelConfigMapper.java │ ├── domain │ ├── ValueObject.java │ ├── model │ │ └── FileUploadResult.java │ ├── AggregateRoot.java │ └── utils │ │ └── CommonUtils.java │ └── infrastructure │ ├── common │ └── IgnoreResponseWrap.java │ └── exception │ ├── ErrorCode.java │ ├── CommonErrorCode.java │ └── ErrorCodeImpl.java └── .editorconfig /runtime/datamate-python/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/ratio/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/shared/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/src/pages/DataAnnotation/Detail/TaskDetail.tsx: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/shared/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/ratio/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/ratio/service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/evaluation/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/evaluation/service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/generation/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/generation/service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/sql_manager/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /scripts/db/00-database-init.sql: -------------------------------------------------------------------------------- 1 | create database if not exists datamate; 2 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/shared/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 公共模块 3 | """ 4 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/core/__init__.py: -------------------------------------------------------------------------------- 1 | # app/core/__init__.py 2 | 3 | """ 4 | Core module 5 | 6 | 7 | """ -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/dashboards/custom-dashboard.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /scripts/images/gateway/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "Starting main application..." 6 | exec "$@" -------------------------------------------------------------------------------- /frontend/public/huawei-logo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/DataMate/HEAD/frontend/public/huawei-logo.webp -------------------------------------------------------------------------------- /runtime/datamate-python/.gitignore: -------------------------------------------------------------------------------- 1 | # Local Development Environment Files 2 | .env 3 | .dev.env 4 | 5 | # logs 6 | logs/ 7 | 8 | doc/ -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .health import HealthResponse 2 | 3 | __all__ = ["HealthResponse"] -------------------------------------------------------------------------------- /scripts/images/backend/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | rpcbind 6 | 7 | echo "Starting main application..." 8 | exec "$@" -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/crds/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: crds 3 | version: 0.0.0 4 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .labelstudio import LabelStudioClient 2 | 3 | __all__ = ["LabelStudioClient"] -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .service import DatasetManagementService 2 | 3 | __all__ = ["DatasetManagementService"] -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/ci/with-persistence.yaml: -------------------------------------------------------------------------------- 1 | persistence: 2 | type: pvc 3 | enabled: true 4 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/DataMate/HEAD/backend/services/data-collection-service/image.png -------------------------------------------------------------------------------- /backend/services/data-collection-service/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/DataMate/HEAD/backend/services/data-collection-service/image1.png -------------------------------------------------------------------------------- /backend/services/data-collection-service/image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/DataMate/HEAD/backend/services/data-collection-service/image2.png -------------------------------------------------------------------------------- /backend/services/data-collection-service/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/DataMate/HEAD/backend/services/data-collection-service/image3.png -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/client/labelstudio/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import Client as LabelStudioClient 2 | 3 | __all__ = ["LabelStudioClient"] -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/dataset/service/__init__.py: -------------------------------------------------------------------------------- 1 | from .service import Service as DatasetManagementService 2 | 3 | __all__ = ["DatasetManagementService"] -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-node-exporter/ci/port-values.yaml: -------------------------------------------------------------------------------- 1 | service: 2 | targetPort: 9102 3 | port: 9102 4 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/public/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for datamate. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/extra-objects.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraManifests }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{ end }} 5 | -------------------------------------------------------------------------------- /runtime/datamate-python/uvicorn_start.sh: -------------------------------------------------------------------------------- 1 | export LOG_LEVEL=DEBUG 2 | export DEBUG=true 3 | 4 | uvicorn app.main:app \ 5 | --host 0.0.0.0 \ 6 | --port 18000 \ 7 | --reload -------------------------------------------------------------------------------- /runtime/datax/nfsreader/src/main/resources/plugin_job_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nfsreader", 3 | "parameter": { 4 | "ip": "127.0.0.1", 5 | "path": "/test" 6 | } 7 | } -------------------------------------------------------------------------------- /scripts/images/runtime/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cp -r /opt/runtime/user/* /opt/runtime/datamate/ops/user 6 | 7 | echo "Starting main application..." 8 | exec "$@" 9 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/templates/extra-list.yaml: -------------------------------------------------------------------------------- 1 | {{- range .Values.extraDeploy }} 2 | --- 3 | {{ include "common.tplvalues.render" (dict "value" . "context" $) }} 4 | {{- end }} 5 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/config/__init__.py: -------------------------------------------------------------------------------- 1 | """Tag configuration package""" 2 | from .tag_config import LabelStudioTagConfig 3 | 4 | __all__ = ['LabelStudioTagConfig'] 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/charts/common/values.yaml: -------------------------------------------------------------------------------- 1 | ## bitnami/common 2 | ## It is required by CI/CD tools and processes. 3 | ## @skip exampleValue 4 | ## 5 | exampleValue: common-chart 6 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/templates/extra-list.yaml: -------------------------------------------------------------------------------- 1 | {{- range .Values.extraDeploy }} 2 | --- 3 | {{ include "common.tplvalues.render" (dict "value" . "context" $) }} 4 | {{- end }} 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/extra-manifests.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraObjects }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{ end }} 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/common/values.yaml: -------------------------------------------------------------------------------- 1 | ## bitnami/common 2 | ## It is required by CI/CD tools and processes. 3 | ## @skip exampleValue 4 | ## 5 | exampleValue: common-chart 6 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/extra-manifests.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraManifests }} 2 | --- 3 | {{ tpl . $ }} 4 | {{ end }} 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/templates/extra-list.yaml: -------------------------------------------------------------------------------- 1 | {{- range .Values.extraDeploy }} 2 | --- 3 | {{ include "common.tplvalues.render" (dict "value" . "context" $) }} 4 | {{- end }} 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/ci/default-values.yaml: -------------------------------------------------------------------------------- 1 | # Leave this file empty to ensure that CI runs builds against the default configuration in values.yaml. 2 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/extra-manifests.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraManifests }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{ end }} 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/charts/common/values.yaml: -------------------------------------------------------------------------------- 1 | ## bitnami/common 2 | ## It is required by CI/CD tools and processes. 3 | ## @skip exampleValue 4 | ## 5 | exampleValue: common-chart 6 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from .about import router as about_router 4 | 5 | router = APIRouter() 6 | 7 | router.include_router(about_router) -------------------------------------------------------------------------------- /runtime/python-executor/datamate/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data_juicer_wrapper, datamate_wrapper 2 | 3 | WRAPPERS = { 4 | "data_juicer": data_juicer_wrapper, 5 | "datamate": datamate_wrapper 6 | } 7 | -------------------------------------------------------------------------------- /runtime/datax/nfswriter/src/main/resources/plugin_job_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nfswriter", 3 | "parameter": { 4 | "ip": "127.0.0.1", 5 | "path": "/test", 6 | "destPath": "" 7 | } 8 | } -------------------------------------------------------------------------------- /scripts/images/database/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mysql:8 2 | 3 | COPY scripts/images/database/utf8.cnf /etc/mysql/conf.d/utf8.cnf 4 | COPY scripts/db/ /docker-entrypoint-initdb.d/ 5 | 6 | RUN chmod 644 /etc/mysql/conf.d/utf8.cnf -------------------------------------------------------------------------------- /runtime/python-executor/datamate/core/constant.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | class Fields(object): 4 | result = 'execute_result' 5 | instance_id = 'instance_id' 6 | export_path = 'export_path' 7 | 8 | 9 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/infrastructure/datax/config/BaseConfig.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.infrastructure.datax.config; 2 | 3 | public interface BaseConfig { 4 | } 5 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/NOTICE: -------------------------------------------------------------------------------- 1 | Apache Pulsar 2 | Copyright 2017-2022 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /frontend/src/pages/DataAnnotation/Template/components/index.ts: -------------------------------------------------------------------------------- 1 | export { default as TagSelector } from "./TagSelector"; 2 | export { default as TagBrowser } from "./TagBrowser"; 3 | export { TagInfoPanel } from "./TagSelector"; 4 | -------------------------------------------------------------------------------- /runtime/datax/obsreader/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "obsreader", 3 | "class": "com.datamate.plugin.reader.obsreader.ObsReader", 4 | "description": "read from obs file system", 5 | "developer": "datamate" 6 | } 7 | -------------------------------------------------------------------------------- /runtime/datax/obswriter/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "obswriter", 3 | "class": "com.datamate.plugin.writer.obswriter.ObsWriter", 4 | "description": "writer obs file to local", 5 | "developer": "datamate" 6 | } 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/pii_ner_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from datamate.core.base_op import OPERATORS 2 | 3 | OPERATORS.register_module(module_name='PiiDetector', 4 | module_path='ops.mapper.pii_ner_detection.process') -------------------------------------------------------------------------------- /runtime/datax/nfswriter/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nfswriter", 3 | "class": "com.modelengine.edatamate.plugin.writer.nfswriter.NfsWriter", 4 | "description": "write to local", 5 | "developer": "modelengine" 6 | } -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/setting/interfaces/rest/dto/ParamRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.setting.interfaces.rest.dto; 2 | 3 | public record ParamRequest(String paramValue, boolean isEnabled) { 4 | } 5 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/ray-cluster/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | description: A Helm chart for Kubernetes 3 | name: ray-cluster 4 | version: 1.4.2 5 | icon: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png 6 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/crds/README.md: -------------------------------------------------------------------------------- 1 | # crds subchart 2 | 3 | See: [https://github.com/prometheus-community/helm-charts/issues/3548](https://github.com/prometheus-community/helm-charts/issues/3548) 4 | -------------------------------------------------------------------------------- /runtime/datax/nfsreader/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nfsreader", 3 | "class": "com.modelengine.edatamate.plugin.reader.nfsreader.NfsReader", 4 | "description": "read from nas file system", 5 | "developer": "modelengine" 6 | } -------------------------------------------------------------------------------- /runtime/ops/mapper/pii_ner_detection/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '高级匿名化' 2 | language: 'Python' 3 | vendor: 'others' 4 | raw_id: 'PiiDetector' 5 | version: '1.0.0' 6 | description: '高级匿名化算子,检测命名实体并匿名化。' 7 | modal: 'text' 8 | inputs: 'text' 9 | outputs: 'text' -------------------------------------------------------------------------------- /runtime/ops/mapper/img_resize/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgResize', 6 | module_path="ops.mapper.img_resize.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_denoise/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgDenoise', 6 | module_path="ops.mapper.img_denoise.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/text_to_word/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='TextToWord', 6 | module_path="ops.mapper.text_to_word.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/examples/test_operator/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='TestMapper', 6 | module_path="ops.user.test_operator.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/emoji_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='EmojiCleaner', 6 | module_path="ops.mapper.emoji_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_type_unify/__init__.py: -------------------------------------------------------------------------------- 1 | # -- encoding: utf-8 -- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgTypeUnify', 6 | module_path="ops.mapper.img_type_unify.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/slicer/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='Segmentation', 6 | module_path="ops.slicer.segmentation.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/content_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ContentCleaner', 6 | module_path="ops.mapper.content_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/email_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='EmailNumberCleaner', 6 | module_path="ops.mapper.email_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/legend_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='LegendCleaner', 6 | module_path="ops.mapper.legend_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/url_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnonymizedUrlCleaner', 6 | module_path="ops.mapper.url_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/xml_tag_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='XMLTagCleaner', 6 | module_path="ops.mapper.xml_tag_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/wrappers/data_juicer_wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datamate.scheduler import cmd_scheduler 3 | 4 | 5 | async def submit(task_id, config_path): 6 | await cmd_scheduler.submit(task_id, f"dj-process --config {config_path}") -------------------------------------------------------------------------------- /runtime/ops/formatter/slide_formatter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='SlideFormatter', 6 | module_path="ops.formatter.slide_formatter.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/html_tag_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='HtmlTagCleaner', 6 | module_path="ops.mapper.html_tag_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_shadow_remove/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgShadowRemove', 6 | module_path="ops.mapper.img_shadow_remove.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/slicer/slide_simple_slicer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='SimpleSlicer', 6 | module_path="ops.slicer.slide_simple_slicer.process") 7 | -------------------------------------------------------------------------------- /frontend/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_style = space 7 | indent_size = 2 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.{md}] 12 | trim_trailing_whitespace = false 13 | 14 | -------------------------------------------------------------------------------- /runtime/ops/formatter/mineru_formatter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='MineruFormatter', 6 | module_path="ops.formatter.mineru_formatter.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/extra_space_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ExtraSpaceCleaner', 6 | module_path="ops.mapper.extra_space_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/id_number_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnonymizedIdNumber', 6 | module_path="ops.mapper.id_number_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_contrast/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgContrast', 6 | module_path="ops.mapper.img_enhanced_contrast.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/ip_address_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnonymizedIpAddress', 6 | module_path="ops.mapper.ip_address_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .cmd_task_scheduler import CommandScheduler 2 | from .func_task_scheduler import CallableScheduler 3 | 4 | 5 | cmd_scheduler = CommandScheduler(max_concurrent=5) 6 | func_scheduler = CallableScheduler(max_concurrent=5) -------------------------------------------------------------------------------- /runtime/ops/filter/remove_duplicate_file/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='DuplicateFilesFilter', 6 | module_path="ops.filter.remove_duplicate_file.process") -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_brightness/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgBrightness', 6 | module_path="ops.mapper.img_enhanced_brightness.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_saturation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgSaturation', 6 | module_path="ops.mapper.img_enhanced_saturation.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_sharpness/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgSharpness', 6 | module_path="ops.mapper.img_enhanced_sharpness.process") 7 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/common/enums/TemplateType.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.common.enums; 2 | 3 | /** 4 | * 模板类型枚举 5 | * 6 | */ 7 | public enum TemplateType { 8 | NAS, 9 | OBS, 10 | MYSQL 11 | } 12 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/ci/tei-mininum-values.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | requests: 3 | cpu: "0.1" 4 | memory: "2Gi" 5 | limits: 6 | cpu: "8" 7 | memory: "16Gi" 8 | 9 | persistence: 10 | enabled: true 11 | persistentVolumeClaim: 12 | size: 10Gi 13 | -------------------------------------------------------------------------------- /runtime/datax/obsreader/src/main/resources/plugin_job_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "obsreader", 3 | "parameter": { 4 | "endpoint": "127.0.0.1", 5 | "bucket": "test", 6 | "accessKey": "ak-xxx", 7 | "secretKey": "sk-xxx", 8 | "prefix": "/test" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /runtime/ops/llms/text_quality_evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='TextQualityEvaluation', 6 | module_path="ops.llms.text_quality_evaluation.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_direction_correct/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgDirectionCorrect', 6 | module_path="ops.mapper.img_direction_correct.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/phone_number_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnonymizedPhoneNumber', 6 | module_path="ops.mapper.phone_number_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/political_word_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='PoliticalWordCleaner', 6 | module_path="ops.mapper.political_word_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/traditional_chinese/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='TraditionalChineseCleaner', 6 | module_path="ops.mapper.traditional_chinese.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/unicode_space_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='UnicodeSpaceCleaner', 6 | module_path="ops.mapper.unicode_space_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/slicer/slide_annotation_slicer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnnotationSlicer', 6 | module_path="ops.slicer.slide_annotation_slicer.process") 7 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/Chart.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: common 3 | repository: oci://registry-1.docker.io/bitnamicharts 4 | version: 2.4.0 5 | digest: sha256:8c1a5dc923412d11d4d841420494b499cb707305c8b9f87f45ea1a8bf3172cb3 6 | generated: "2023-05-21T14:12:59.250402885Z" 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/knowledge_relation_slice/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='KnowledgeRelationSlice', 6 | module_path="ops.mapper.knowledge_relation_slice.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_blurred_images_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgBlurredImagesCleaner', 6 | module_path="ops.filter.img_blurred_images_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_similar_images_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgSimilarImagesCleaner', 6 | module_path="ops.filter.img_similar_images_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/garble_characters_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='GrableCharactersCleaner', 6 | module_path="ops.mapper.garble_characters_cleaner.process") 7 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/Chart.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: common 3 | repository: https://charts.bitnami.com/bitnami 4 | version: 1.12.0 5 | digest: sha256:7e484480451778c273e7a165dbfaa5594ec1c9a63a114ce9d458626cadd28893 6 | generated: "2022-03-16T15:37:48.808974487Z" 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/credit_card_number_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='AnonymizedCreditCardNumber', 6 | module_path="ops.mapper.credit_card_number_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/remove_duplicate_sentences/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='DuplicateSentencesFilter', 6 | module_path="ops.mapper.remove_duplicate_sentences.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_duplicated_images_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgDuplicatedImagesCleaner', 6 | module_path="ops.filter.img_duplicated_images_cleaner.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/full_width_characters_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FullWidthCharacterCleaner', 6 | module_path="ops.mapper.full_width_characters_cleaner.process") 7 | -------------------------------------------------------------------------------- /frontend/src/pages/DataAnnotation/Template/index.ts: -------------------------------------------------------------------------------- 1 | export { default as TemplateList } from "./TemplateList"; 2 | export { default as TemplateForm } from "./TemplateForm"; 3 | export { default as TemplateDetail } from "./TemplateDetail"; 4 | export { TagBrowser, TagSelector, TagInfoPanel } from "./components"; 5 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_perspective_transformation/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgPerspectiveTransformation', 6 | module_path="ops.mapper.img_perspective_transformation.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/mapper/invisible_characters_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='InvisibleCharactersCleaner', 6 | module_path="ops.mapper.invisible_characters_cleaner.process") 7 | 8 | -------------------------------------------------------------------------------- /runtime/ops/mapper/sexual_and_violent_word_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='SexualAndViolentWordCleaner', 6 | module_path="ops.mapper.sexual_and_violent_word_cleaner.process") 7 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/common/enums/DuplicateMethod.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.common.enums; 2 | 3 | /** 4 | * 文件重名时的处理方式 5 | * 6 | * @since 2025/11/18 7 | */ 8 | public enum DuplicateMethod { 9 | ERROR, 10 | COVER 11 | } 12 | -------------------------------------------------------------------------------- /runtime/datax/obswriter/src/main/resources/plugin_job_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "obswriter", 3 | "parameter": { 4 | "endpoint": "127.0.0.1", 5 | "bucket": "test", 6 | "accessKey": "ak-xxx", 7 | "secretKey": "sk-xxx", 8 | "prefix": "/test", 9 | "destPath": "/test" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_advertisement_images_cleaner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='ImgAdvertisementImagesCleaner', 6 | module_path="ops.filter.img_advertisement_images_cleaner.process") 7 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/Chart.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: kube-prometheus-stack 3 | repository: https://prometheus-community.github.io/helm-charts 4 | version: 56.9.0 5 | digest: sha256:4d83a1bf6fe9b53b8bca45bb240d2e4c1979e068cdc67af53cdadc0885093dcf 6 | generated: "2024-02-23T21:26:50.977495+02:00" 7 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/shared/schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .common import ( 2 | BaseResponseModel, 3 | StandardResponse, 4 | PaginatedData, 5 | TaskStatus 6 | ) 7 | 8 | __all__ = [ 9 | "BaseResponseModel", 10 | "StandardResponse", 11 | "PaginatedData", 12 | "TaskStatus" 13 | ] 14 | -------------------------------------------------------------------------------- /runtime/ops/filter/remove_file_with_short_or_long_length/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FileWithShortOrLongLengthFilter', 6 | module_path="ops.filter.remove_file_with_short_or_long_length.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/llms/qa_condition_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | since: 5 | """ 6 | 7 | from datamate.core.base_op import OPERATORS 8 | 9 | OPERATORS.register_module(module_name='QAConditionEvaluator', 10 | module_path="ops.llms.qa_condition_evaluator.process") 11 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/ci/values-with-tls-pem-auth.yaml: -------------------------------------------------------------------------------- 1 | # Test values file for generating hidden section of the yaml 2 | # and check that the rendering is correct 3 | replicaCount: 3 4 | auth: 5 | clientProtocol: tls 6 | interBrokerProtocol: mtls 7 | tls: 8 | type: pem 9 | autoGenerated: true 10 | -------------------------------------------------------------------------------- /frontend/src/store/hooks.ts: -------------------------------------------------------------------------------- 1 | import { useDispatch, useSelector, TypedUseSelectorHook } from 'react-redux'; 2 | import type { RootState, AppDispatch } from './index'; 3 | 4 | // 类型化的 hooks 5 | export const useAppDispatch = () => useDispatch(); 6 | export const useAppSelector: TypedUseSelectorHook = useSelector; -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/ratio/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | router = APIRouter( 4 | prefix="/synthesis", 5 | tags = ["synthesis"] 6 | ) 7 | 8 | # Include sub-routers 9 | from .ratio_task import router as ratio_task_router 10 | 11 | router.include_router(ratio_task_router) 12 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_repeat_word_rate_filter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FileWithHighRepeatWordRateFilter', 6 | module_path="ops.filter.file_with_high_repeat_word_rate_filter.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/filter/remove_file_with_many_sensitive_words/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FileWithManySensitiveWordsFilter', 6 | module_path="ops.filter.remove_file_with_many_sensitive_words.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/examples/test_operator/process.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, Any 3 | 4 | from datamate.core.base_op import Mapper 5 | 6 | 7 | class TestMapper(Mapper): 8 | def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: 9 | sample[self.text_key] += "\n####################\n" 10 | return sample 11 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_repeat_phrase_rate_filter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FileWithHighRepeatPhraseRateFilter', 6 | module_path="ops.filter.file_with_high_repeat_phrase_rate_filter.process") 7 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_special_char_rate_filter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from datamate.core.base_op import OPERATORS 4 | 5 | OPERATORS.register_module(module_name='FileWithHighSpecialCharRateFilter', 6 | module_path="ops.filter.file_with_high_special_char_rate_filter.process") 7 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/common/enums/SyncMode.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.common.enums; 2 | 3 | /** 4 | * 同步方式:一次性(ONCE) 或 定时(SCHEDULED) 5 | */ 6 | public enum SyncMode { 7 | /** 一次性(ONCE) */ 8 | ONCE, 9 | /// 定时(SCHEDULED) 10 | SCHEDULED 11 | } 12 | 13 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/evaluation/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | router = APIRouter( 4 | prefix="/evaluation", 5 | tags = ["evaluation"] 6 | ) 7 | 8 | # Include sub-routers 9 | from .evaluation import router as evaluation_router 10 | 11 | router.include_router(evaluation_router) 12 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/contants/DatasetConstant.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.domain.contants; 2 | 3 | /** 4 | * 数据集常量 5 | */ 6 | public interface DatasetConstant { 7 | /** 8 | * 服务ID 9 | */ 10 | String SERVICE_ID = "DATA_MANAGEMENT"; 11 | } 12 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/LabelDto.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | @Getter 7 | @Setter 8 | public class LabelDto { 9 | 10 | private String id; 11 | 12 | private String name; 13 | } 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/ci/values-with-metrics.yaml: -------------------------------------------------------------------------------- 1 | # Test values file for generating hidden section of the yaml 2 | # and check that the rendering is correct 3 | replicaCount: 3 4 | metrics: 5 | kafka: 6 | enabled: true 7 | jmx: 8 | enabled: true 9 | serviceMonitor: 10 | enabled: true 11 | namespace: monitoring 12 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/generation/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | router = APIRouter( 4 | prefix="/synthesis", 5 | tags = ["synthesis"] 6 | ) 7 | 8 | # Include sub-routers 9 | from .generation_api import router as generation_router_router 10 | 11 | router.include_router(generation_router_router) 12 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskLog.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | @Getter 7 | @Setter 8 | public class CleaningTaskLog { 9 | private String level; 10 | 11 | private String message; 12 | } 13 | -------------------------------------------------------------------------------- /deployment/helm/label-studio/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: label-studio 3 | version: 0.1.0 4 | description: A Helm chart for deploying Label Studio with PostgreSQL on Kubernetes 5 | 6 | # Application metadata 7 | appVersion: "latest" 8 | 9 | # This chart is designed to mirror the behavior of deployment/docker/label-studio/docker-compose.yml 10 | 11 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/ray-cluster/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: datamate-runtime 5 | labels: 6 | ray.io/node-type: head 7 | spec: 8 | type: ClusterIP 9 | ports: 10 | - port: 8081 11 | targetPort: 8081 12 | protocol: TCP 13 | selector: 14 | ray.io/node-type: head 15 | 16 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.domain.model; 2 | 3 | /** 4 | * RAG 文档块实体类 5 | * 6 | * @author dallas 7 | * @since 2025-10-29 8 | */ 9 | 10 | public record RagChunk( 11 | String id, 12 | String text, 13 | String metadata 14 | ) { 15 | } -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryRelationDto.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | @Setter 7 | @Getter 8 | public class CategoryRelationDto { 9 | private String categoryId; 10 | 11 | private String operatorId; 12 | } 13 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_style = space 7 | indent_size = 4 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.{java,kt}] 12 | indent_size = 4 13 | 14 | [*.{py}] 15 | indent_size = 4 16 | 17 | [*.{md}] 18 | trim_trailing_whitespace = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ template "kuberay-operator.serviceAccount.name" . }} 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "kuberay-operator.labels" . | nindent 4 }} 9 | {{- end -}} 10 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/Chart.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: zookeeper 3 | repository: https://charts.bitnami.com/bitnami 4 | version: 8.1.2 5 | - name: common 6 | repository: https://charts.bitnami.com/bitnami 7 | version: 1.12.0 8 | digest: sha256:903762070537232f45dacf1d3ab09c43a9fec56656c2c66c15fbb35b64b6678c 9 | generated: "2022-03-17T20:28:29.202310166Z" 10 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsarv2/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: 2.7.4 3 | description: Apache Pulsar Helm chart for Kubernetes 4 | home: https://pulsar.apache.org 5 | icon: http://pulsar.apache.org/img/pulsar.svg 6 | maintainers: 7 | - email: contact@milvus.io 8 | name: contact 9 | name: pulsarv2 10 | sources: 11 | - https://github.com/apache/pulsar 12 | version: 2.7.8 13 | -------------------------------------------------------------------------------- /runtime/ops/llms/qa_condition_evaluator/resources/template.txt: -------------------------------------------------------------------------------- 1 | 你将会获得一个问答对,判断问答对是否满足以下标准: 2 | 标准:"{criterion}" 3 | 4 | 要求: 5 | 1. 结合以上标准,一步一步的分析问答对是否满足标准,按照模板输出你的回答。 6 | 2. 如果你对自己的判断没有较强的信心,直接算作不满足标准。 7 | 3. 你的最终裁定应该是'Y'表示是(符合标准)或'N'表示否(不符合标准)。 8 | 4. 如果你的回答不符合模板格式和规范,重新思考回答。 9 | {examples} 10 | 问答对: 11 | 问题:"{question}" 12 | 答案:"{answer}" 13 | 14 | 模板: 15 | 结果:[插入结果N或Y] 16 | 分析思路:XXX 17 | """ -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/setting/domain/entity/ModelType.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.setting.domain.entity; 2 | 3 | /** 4 | * 模型类型枚举类 5 | * 6 | * @author dallas 7 | * @since 2025-10-27 8 | */ 9 | public enum ModelType { 10 | /** 11 | * 语言模型 12 | */ 13 | CHAT, 14 | /** 15 | * 嵌入模型 16 | */ 17 | EMBEDDING 18 | } 19 | -------------------------------------------------------------------------------- /runtime/ops/slicer/segmentation/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文本切分' 2 | name_en: 'Text Segmentation' 3 | description: '将文本切分成多个切片。' 4 | description_en: 'Text Segmentation.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'Segmentation' 8 | version: '1.0.0' 9 | types: 10 | - 'consolidate' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/ci/values-with-tls-jks-auth.yaml: -------------------------------------------------------------------------------- 1 | # Test values file for generating hidden section of the yaml 2 | # and check that the rendering is correct 3 | replicaCount: 3 4 | auth: 5 | clientProtocol: tls 6 | interBrokerProtocol: mtls 7 | tls: 8 | type: jks 9 | existingSecrets: 10 | - kafka-secret-0 11 | - kafka-secret-1 12 | - kafka-secret-2 13 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Annotation Module Utilities 3 | """ 4 | from .config_validator import LabelStudioConfigValidator 5 | from .tag_converter import TagFormatConverter, create_converter_from_template_config 6 | 7 | __all__ = [ 8 | 'LabelStudioConfigValidator', 9 | 'TagFormatConverter', 10 | 'create_converter_from_template_config' 11 | ] 12 | -------------------------------------------------------------------------------- /runtime/ops/mapper/political_word_cleaner/resources/special_symbols.txt: -------------------------------------------------------------------------------- 1 | ! 2 | 3 | . 4 | , 5 | # 6 | $ 7 | % 8 | & 9 | * 10 | ( 11 | ) 12 | | 13 | ? 14 | / 15 | @ 16 | " 17 | ' 18 | ; 19 | [ 20 | ] 21 | { 22 | } 23 | + 24 | ~ 25 | - 26 | _ 27 | = 28 | ^ 29 | < 30 | > 31 | ! 32 | 。 33 | , 34 | ¥ 35 | ( 36 | ) 37 | ? 38 | 、 39 | “ 40 | ‘ 41 | ; 42 | 【 43 | 】 44 | —— 45 | … 46 | …… 47 | 《 48 | 》 49 | : 50 | : -------------------------------------------------------------------------------- /frontend/src/mock/mock-middleware/index.cjs: -------------------------------------------------------------------------------- 1 | const setHeader = require('./set-header-middleware.cjs'); 2 | const strongMatch = require('./strong-match-middleware.cjs'); 3 | const sendJSON = require('./send-json-middleawre.cjs'); 4 | const errorHandle = require('./error-handle-middleware.cjs'); 5 | 6 | module.exports = { 7 | setHeader, 8 | strongMatch, 9 | sendJSON, 10 | errorHandle, 11 | }; -------------------------------------------------------------------------------- /runtime/ops/mapper/sexual_and_violent_word_cleaner/resources/special_symbols.txt: -------------------------------------------------------------------------------- 1 | ! 2 | 3 | . 4 | , 5 | # 6 | $ 7 | % 8 | & 9 | * 10 | ( 11 | ) 12 | | 13 | ? 14 | / 15 | @ 16 | " 17 | ' 18 | ; 19 | [ 20 | ] 21 | { 22 | } 23 | + 24 | ~ 25 | - 26 | _ 27 | = 28 | ^ 29 | < 30 | > 31 | ! 32 | 。 33 | , 34 | ¥ 35 | ( 36 | ) 37 | ? 38 | 、 39 | “ 40 | ‘ 41 | ; 42 | 【 43 | 】 44 | —— 45 | … 46 | …… 47 | 《 48 | 》 49 | : 50 | : -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/domain/ValueObject.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.domain; 2 | 3 | /** 4 | * DDD值对象基类 5 | */ 6 | public abstract class ValueObject { 7 | 8 | @Override 9 | public abstract boolean equals(Object obj); 10 | 11 | @Override 12 | public abstract int hashCode(); 13 | 14 | @Override 15 | public abstract String toString(); 16 | } 17 | -------------------------------------------------------------------------------- /frontend/src/mock/mock-middleware/error-handle-middleware.cjs: -------------------------------------------------------------------------------- 1 | const errorHandle = (err, req, res, next) => { 2 | if(res.headersSent) { 3 | return next(err); 4 | } 5 | console.error('Server Error:', err.message); 6 | res.status(500).json({ 7 | code: '500', 8 | msg: 'Internal Server Error', 9 | data: null, 10 | }); 11 | }; 12 | 13 | module.exports = errorHandle; 14 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_special_char_rate_filter/resources/special_token.txt: -------------------------------------------------------------------------------- 1 | ~ 2 | · 3 | ! 4 | @ 5 | # 6 | ¥ 7 | % 8 | … 9 | & 10 | * 11 | ( 12 | ) 13 | — 14 | + 15 | - 16 | = 17 | { 18 | } 19 | | 20 | 【 21 | 】 22 | 、 23 | : 24 | “ 25 | ; 26 | ‘ 27 | 《 28 | 》 29 | ? 30 | , 31 | 。 32 | ` 33 | ! 34 | $ 35 | ^ 36 | ( 37 | ) 38 | _ 39 | [ 40 | ] 41 | \ 42 | : 43 | " 44 | ; 45 | ' 46 | < 47 | > 48 | ? 49 | , 50 | / -------------------------------------------------------------------------------- /runtime/ops/filter/remove_file_with_many_sensitive_words/resources/special_symbols.txt: -------------------------------------------------------------------------------- 1 | ! 2 | 3 | . 4 | , 5 | # 6 | $ 7 | % 8 | & 9 | * 10 | ( 11 | ) 12 | | 13 | ? 14 | / 15 | @ 16 | " 17 | ' 18 | ; 19 | [ 20 | ] 21 | { 22 | } 23 | + 24 | ~ 25 | - 26 | _ 27 | = 28 | ^ 29 | < 30 | > 31 | ! 32 | 。 33 | , 34 | ¥ 35 | ( 36 | ) 37 | ? 38 | 、 39 | “ 40 | ‘ 41 | ; 42 | 【 43 | 】 44 | —— 45 | … 46 | …… 47 | 《 48 | 》 49 | : 50 | : -------------------------------------------------------------------------------- /runtime/ops/mapper/extra_space_cleaner/resources/special_token.txt: -------------------------------------------------------------------------------- 1 | ~ 2 | · 3 | ! 4 | @ 5 | # 6 | ¥ 7 | % 8 | … 9 | & 10 | * 11 | ( 12 | ) 13 | — 14 | + 15 | - 16 | = 17 | { 18 | } 19 | | 20 | 【 21 | 】 22 | 、 23 | : 24 | “ 25 | ” 26 | ‘ 27 | ’ 28 | ; 29 | 《 30 | 》 31 | ? 32 | , 33 | 。 34 | ` 35 | ! 36 | $ 37 | ^ 38 | ( 39 | ) 40 | _ 41 | [ 42 | ] 43 | \ 44 | : 45 | " 46 | ; 47 | ' 48 | < 49 | > 50 | ? 51 | , 52 | / 53 | . -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | src/mock/sessions/* 16 | 17 | # Editor directories and files 18 | .vscode/* 19 | !.vscode/extensions.json 20 | .idea 21 | .DS_Store 22 | *.suo 23 | *.ntvs* 24 | *.njsproj 25 | *.sln 26 | *.sw? 27 | 28 | .vite -------------------------------------------------------------------------------- /frontend/src/hooks/useDebouncedEffect.ts: -------------------------------------------------------------------------------- 1 | import { useEffect } from "react"; 2 | 3 | export function useDebouncedEffect( 4 | cb: () => void, 5 | deps: any[] = [], 6 | delay: number = 300 7 | ) { 8 | useEffect(() => { 9 | const handler = setTimeout(() => { 10 | cb(); 11 | }, delay); 12 | 13 | return () => { 14 | clearTimeout(handler); 15 | }; 16 | }, [...(deps || []), delay]); 17 | } 18 | -------------------------------------------------------------------------------- /runtime/ops/mapper/xml_tag_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: 'XML标签去除' 2 | name_en: 'XML Tag Removal' 3 | description: '去除XML中的标签。' 4 | description_en: 'Removes tags from XML files.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'XMLTagCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: ' <性别>男' 14 | after: '性别: 男' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/datamate-python/.env.example: -------------------------------------------------------------------------------- 1 | # Dev settings 2 | HOST=0.0.0.0 3 | PORT=18000 4 | 5 | DEBUG=true 6 | LOG_LEVEL=DEBUG 7 | LOG_FILE_DIR=./logs 8 | 9 | # DataBase 10 | MYSQL_HOST=localhost 11 | MYSQL_PORT=3306 12 | MYSQL_USER=root 13 | MYSQL_PASSWORD=password 14 | MYSQL_DATABASE=datamate 15 | 16 | # Label Studio settings 17 | LABEL_STUDIO_BASE_URL=http://localhost:8080 18 | 19 | LABEL_STUDIO_USER_TOKEN="demo_dev_token" 20 | -------------------------------------------------------------------------------- /runtime/ops/mapper/content_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档目录去除' 2 | name_en: 'Document Contents Removal' 3 | description: '去除文档中的目录。' 4 | description_en: 'Removes tables of contents from documents.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ContentCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/formatter/slide_formatter/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '病理图片内容抽取' 2 | name_en: 'Pathology Image Content Extraction' 3 | description: '解析病理图片。' 4 | description_en: 'Analyze pathological images.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'SlideFormatter' 8 | version: '1.0.0' 9 | types: 10 | - 'collect' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import jakarta.validation.constraints.NotEmpty; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * 复制文件请求DTO 9 | * 10 | * @author dallas 11 | * @since 2025-11-13 12 | */ 13 | public record CopyFilesRequest(@NotEmpty List sourcePaths) { 14 | } 15 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "minio.serviceAccountName" . | quote }} 6 | namespace: {{ .Release.Namespace | quote }} 7 | labels: 8 | app: {{ template "minio.name" . }} 9 | chart: {{ template "minio.chart" . }} 10 | release: "{{ .Release.Name }}" 11 | {{- end -}} 12 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | {{ $.Chart.Name }} has been installed. Check its status by running: 2 | kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} get pods -l "release={{ $.Release.Name }}" 3 | 4 | Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. 5 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | DataMate 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/schema/health.py: -------------------------------------------------------------------------------- 1 | from pydantic import Field 2 | 3 | from app.module.shared.schema import BaseResponseModel 4 | from app.module.shared.schema import StandardResponse 5 | 6 | class HealthResponse(BaseResponseModel): 7 | """健康检查响应模型""" 8 | status: str = Field(..., description="服务状态") 9 | service: str = Field(..., description="服务名称") 10 | version: str = Field(..., description="应用版本") -------------------------------------------------------------------------------- /runtime/ops/filter/img_duplicated_images_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '重复图片去除' 2 | name_en: 'Duplicate Image Removal' 3 | description: '去除重复的图片。' 4 | description_en: 'Removes duplicate images.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgDuplicatedImagesCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_brightness/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片亮度增强' 2 | name_en: 'Image Brightness Enhancement' 3 | description: '自适应调节图片的亮度。' 4 | description_en: 'Adapts and adjusts image brightness.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgBrightness' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_contrast/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片对比度增强' 2 | name_en: 'Image Contrast Enhancement' 3 | description: '自适应调节图片的对比度。' 4 | description_en: 'Adapts and adjusts the image contrast.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgContrast' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/text_to_word/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '转换为Word' 2 | name_en: 'Convert-to-Word' 3 | description: '将抽取结果转换为docx的word文件。' 4 | description_en: 'Converts extraction results to Word files in DOCX format.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'TextToWord' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_advertisement_images_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '广告图片过滤' 2 | name_en: 'Ad Image Filter' 3 | description: '去除包含二维码的图片。' 4 | description_en: 'Removes images containing QR codes.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgAdvertisementImagesCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/ip_address_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: 'IP地址匿名化' 2 | name_en: 'IP Address Anonymization' 3 | description: 'IP地址匿名化' 4 | description_en: 'Anonymizes IP addresses.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnonymizedIpAddress' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这个是IP地址:10.x.x.10' 14 | after: '这个是IP地址:' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/knowledge_relation_slice/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '知识库关系切片' 2 | name_en: 'Knowledge base relationship slicing' 3 | description: '知识库关系切片' 4 | description_en: 'Knowledge base relationship slicing.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'KnowledgeRelationSlice' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/llms/text_quality_evaluation/resources/template.txt: -------------------------------------------------------------------------------- 1 | 你将会获得一个问答对,判断问答对是否满足以下标准: 2 | 标准:"{criterion}" 3 | 4 | 要求: 5 | 1. 结合以上标准,一步一步的分析question文本是否满足标准,这里的question不是指一个问题,只是输入的文本,按照模板输出每个维度的分数,你的result就是分数。额外输入一个维度平均分 6 | 2. 如果你对自己的判断没有较强的信心,直接算作不满足标准。 7 | 3. 你的最终裁定应该是1-5的评分,严格按照examples中打分的标准。 8 | 4. 如果你的回答不符合模板格式和规范,重新思考回答。 9 | {examples} 10 | 问答对: 11 | 问题:"{question}" 12 | 答案:"{answer}" 13 | 14 | 模板: 15 | 结果:[1或2或3或4或5] 16 | 分析思路:XXX 17 | """ -------------------------------------------------------------------------------- /runtime/ops/mapper/political_word_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '政治文本匿名化' 2 | name_en: 'Political Text Anonymization' 3 | description: '将政治文本进行匿名化。' 4 | description_en: 'Anonymizes political texts.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'PoliticalWordCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '特别字符:改革历程' 14 | after: '特别字符:***' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.operator.domain.model.Category; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | @Mapper 8 | public interface CategoryMapper extends BaseMapper { 9 | } 10 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: master 3 | description: High Performance, Kubernetes Native Object Storage 4 | home: https://min.io 5 | icon: https://min.io/resources/img/logo/MINIO_wordmark.png 6 | keywords: 7 | - storage 8 | - object-storage 9 | - S3 10 | maintainers: 11 | - email: contact@milvus.io 12 | name: contact 13 | name: minio 14 | sources: 15 | - https://github.com/minio/minio 16 | version: 8.0.17 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ template "minio.fullname" . }} 5 | labels: 6 | app: {{ template "minio.name" . }} 7 | chart: {{ template "minio.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | data: 11 | initialize: |- 12 | {{ include (print $.Template.BasePath "/_helper_create_bucket.txt") . | indent 4 }} 13 | -------------------------------------------------------------------------------- /runtime/ops/mapper/email_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '邮件地址匿名化' 2 | name_en: 'Email Address Anonymization' 3 | description: '邮件地址匿名化' 4 | description_en: 'Anonymizes email addresses.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'EmailNumberCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这个是邮箱号:test_email@gmail.com' 14 | after: '这个是邮箱号:' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/id_number_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '身份证号匿名化' 2 | name_en: 'ID Card Number Anonymization' 3 | description: '身份证号匿名化。' 4 | description_en: 'Anonymizes ID card numbers.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnonymizedIdNumber' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这个是身份证号110101190001011009' 14 | after: '这个是身份证号' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/phone_number_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '电话号码匿名化' 2 | name_en: 'Phone Number Anonymization' 3 | description: '电话号码匿名化' 4 | description_en: 'Anonymizes phone numbers.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnonymizedPhoneNumber' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这个是电话号码:13111111111' 14 | after: '这个是电话号码:' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/ci/values-with-external-tls.yaml: -------------------------------------------------------------------------------- 1 | # Test values file for generating hidden section of the yaml 2 | # and check that the rendering is correct 3 | replicaCount: 3 4 | auth: 5 | externalClientProtocol: tls 6 | tls: 7 | type: jks 8 | existingSecrets: 9 | - kafka-secret-0 10 | - kafka-secret-1 11 | - kafka-secret-2 12 | externalAccess: 13 | enabled: true 14 | autoDiscovery: 15 | enabled: true 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/templates/headless-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "tei.fullname" . }}-headless 5 | labels: 6 | {{- include "tei.labels" . | nindent 4 }} 7 | spec: 8 | clusterIP: None 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: http 12 | protocol: TCP 13 | name: http 14 | selector: 15 | {{- include "tei.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /runtime/ops/mapper/url_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: 'URL网址匿名化' 2 | name_en: 'URL Anonymization' 3 | description: '将文档中的url网址匿名化。' 4 | description_en: 'Anonymizes URLs in documents.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnonymizedUrlCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '需要被屏蔽的url:https://www.huawei.com' 14 | after: '需要被屏蔽的url:' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/legend_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图注表注去除' 2 | name_en: 'Figure and Table Description Removal' 3 | description: '去除文档中的图注、表注等内容。' 4 | description_en: 'Removes figure and table description from documents.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'LegendCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '图1.1.1 图注名称' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/common/IgnoreResponseWrap.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.infrastructure.common; 2 | 3 | import java.lang.annotation.*; 4 | 5 | /** 6 | * 忽略响应包装注解 7 | *

8 | * 在使用全局响应包装时,如果某个接口或类不需要进行响应包装,可以使用此注解进行标记 9 | *

10 | */ 11 | @Target({ElementType.METHOD, ElementType.TYPE}) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | @Documented 14 | public @interface IgnoreResponseWrap { 15 | } 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/templates/poddisruptionbudget.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.podDisruptionBudget.enabled }} 2 | apiVersion: {{ template "minio.pdb.apiVersion" . }} 3 | kind: PodDisruptionBudget 4 | metadata: 5 | name: minio 6 | labels: 7 | app: {{ template "minio.name" . }} 8 | spec: 9 | maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} 10 | selector: 11 | matchLabels: 12 | app: {{ template "minio.name" . }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /frontend/src/store/index.ts: -------------------------------------------------------------------------------- 1 | import { configureStore } from "@reduxjs/toolkit"; 2 | import authSlice from "./slices/authSlice"; 3 | import settingsSlice from "./slices/settingsSlice"; 4 | 5 | // 创建 Store 6 | export const store = configureStore({ 7 | reducer: { 8 | auth: authSlice, 9 | settings: settingsSlice, 10 | }, 11 | }); 12 | 13 | // 导出类型 14 | export type RootState = ReturnType; 15 | export type AppDispatch = typeof store.dispatch; -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTaskMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.cleaning.domain.model.entity.CleaningTask; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | @Mapper 8 | public interface CleaningTaskMapper extends BaseMapper { 9 | } 10 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/setting/domain/repository/SysParamRepository.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.setting.domain.repository; 2 | 3 | import com.baomidou.mybatisplus.extension.repository.IRepository; 4 | import com.datamate.common.setting.domain.entity.SysParam; 5 | 6 | /** 7 | * 系统参数仓库接口 8 | * 9 | * @author dallas 10 | * @since 2025-11-04 11 | */ 12 | public interface SysParamRepository extends IRepository { 13 | } 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsarv2/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /runtime/ops/llms/qa_condition_evaluator/metadata.yml: -------------------------------------------------------------------------------- 1 | name: 'QA评估' 2 | name_en: 'QA Assessment' 3 | description: '通过用户维度和相应描述进行QA对评估。' 4 | description_en: 'Perform QA assessment based on the user dimension and corresponding description.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'QAConditionEvaluator' 8 | version: '1.0.0' 9 | types: 10 | - 'consolidate' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/slicer/slide_annotation_slicer/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '病理图片标注切片' 2 | name_en: 'Pathological Image Annotation Slicing' 3 | description: '根据标注文件对病理图片进行切片。' 4 | description_en: 'Slicing pathology image based on annotation file..' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnnotationSlicer' 8 | version: '1.0.0' 9 | types: 10 | - 'consolidate' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | -------------------------------------------------------------------------------- /scripts/images/database/utf8.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | # 设置服务器默认字符集为 utf8mb4 (推荐,支持完整的 UTF-8,包括 emoji) 3 | character-set-server = utf8mb4 4 | # 设置默认排序规则 5 | collation-server = utf8mb4_unicode_ci 6 | # 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松) 7 | default-time-zone = 'Asia/Shanghai' 8 | log_error=/var/log/datamate/database/error.log 9 | 10 | [client] 11 | # 设置客户端连接默认字符集 12 | default-character-set = utf8mb4 13 | 14 | [mysql] 15 | # 设置 mysql 命令行客户端默认字符集 16 | default-character-set = utf8mb4 17 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.cleaning.domain.model.entity.CleaningResult; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | @Mapper 8 | public interface CleaningResultMapper extends BaseMapper { 9 | } 10 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_denoise/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片噪点去除' 2 | name_en: 'Image Noise Removal' 3 | description: '去除图片中的噪点,主要适用于自然场景。' 4 | description_en: 'Removes noises from images, which is mainly applicable to natural 5 | scenery image scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgDenoise' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_shadow_remove/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片阴影去除' 2 | name_en: 'Image Shadow Removal' 3 | description: '去除图片中的阴影,主要适用于文档场景。' 4 | description_en: 'Removes shadows from images, which is mainly applicable to document 5 | scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgShadowRemove' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryRelationMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.operator.domain.model.CategoryRelation; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | @Mapper 8 | public interface CategoryRelationMapper extends BaseMapper { 9 | } 10 | -------------------------------------------------------------------------------- /deployment/helm/datamate/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | /** 7 | * 创建标签请求DTO 8 | */ 9 | @Getter 10 | @Setter 11 | public class CreateTagRequest { 12 | /** 标签名称 */ 13 | private String name; 14 | /** 标签颜色 */ 15 | private String color; 16 | /** 标签描述 */ 17 | private String description; 18 | } 19 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.infrastructure.event; 2 | 3 | import com.datamate.rag.indexer.domain.model.KnowledgeBase; 4 | import com.datamate.rag.indexer.interfaces.dto.AddFilesReq; 5 | 6 | /** 7 | * 数据插入事件 8 | * 9 | * @author dallas 10 | * @since 2025-10-29 11 | */ 12 | public record DataInsertedEvent(KnowledgeBase knowledgeBase, AddFilesReq addFilesReq) { 13 | } 14 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 删除文件请求 10 | * 11 | * @author dallas 12 | * @since 2025-10-29 13 | */ 14 | @Setter 15 | @Getter 16 | public class DeleteFilesReq { 17 | /** 18 | * Rag文件表主键ID列表 19 | */ 20 | private List ids; 21 | } 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | # OWNERS file for Kubernetes 23 | OWNERS -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/charts/common/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "tei.serviceAccountName" . }} 6 | labels: 7 | {{- include "tei.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /frontend/src/pages/OperatorMarket/operator.const.tsx: -------------------------------------------------------------------------------- 1 | import { Code } from "lucide-react"; 2 | import { OperatorI } from "./operator.model"; 3 | import {formatDateTime} from "@/utils/unit.ts"; 4 | 5 | export const mapOperator = (op: OperatorI) => { 6 | return { 7 | ...op, 8 | icon: , 9 | createdAt: formatDateTime(op?.createdAt) || "--", 10 | updatedAt: formatDateTime(op?.updatedAt) || formatDateTime(op?.createdAt) || "--", 11 | }; 12 | }; 13 | -------------------------------------------------------------------------------- /runtime/ops/llms/text_quality_evaluation/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文本质量评估' 2 | name_en: 'Text Quality Evaluation' 3 | description: '通过用户维度和相应描述进行文本评估。' 4 | description_en: 'Text evaluation is performed based on user dimensions and corresponding descriptions.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'TextQualityEvaluation' 8 | version: '1.0.0' 9 | types: 10 | - 'consolidate' 11 | modal: 'text' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/credit_card_number_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '信用卡号匿名化' 2 | name_en: 'Credit Card Number Anonymization' 3 | description: '信用卡号匿名化' 4 | description_en: 'Anonymizes credit card numbers.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'AnonymizedCreditCardNumber' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这个是信用卡号:4111111111111111' 14 | after: '这个是信用卡号:' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/sexual_and_violent_word_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '暴力色情文本匿名化' 2 | name_en: 'Violent and Pornographic Text Anonymization' 3 | description: '将暴力、色情文本进行匿名化。' 4 | description_en: 'Anonymizes violent and pornographic texts.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'SexualAndViolentWordCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '特别字符:炸药' 14 | after: '特别字符:***' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /scripts/images/frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine AS builder 2 | 3 | WORKDIR /app 4 | 5 | COPY frontend ./ 6 | 7 | RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \ 8 | npm run build 9 | 10 | FROM nginx:1.29 AS runner 11 | 12 | COPY --from=builder /app/dist /opt/frontend 13 | 14 | RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ 15 | && rm -f /etc/nginx/conf.d/default.conf 16 | 17 | EXPOSE 80 18 | 19 | CMD ["nginx", "-g", "daemon off;"] 20 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/database/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/frontend/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/gateway/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/public/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/runtime/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/backend/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/frontend/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/public/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/common/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl: -------------------------------------------------------------------------------- 1 | {{/* Generate basic labels for prometheus-operator-webhook */}} 2 | {{- define "kube-prometheus-stack.prometheus-operator-webhook.labels" }} 3 | {{- include "kube-prometheus-stack.labels" . }} 4 | app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator 5 | app.kubernetes.io/component: prometheus-operator-webhook 6 | {{- end }} 7 | -------------------------------------------------------------------------------- /frontend/src/pages/OperatorMarket/Detail/components/Documentation.tsx: -------------------------------------------------------------------------------- 1 | import { Card } from "antd"; 2 | 3 | export default function Documentation({ operator }) { 4 | return ( 5 |
6 | 7 |
8 |
9 | {operator.documentation} 10 |
11 |
12 |
13 |
14 | ); 15 | } 16 | -------------------------------------------------------------------------------- /backend/services/main-application/src/main/resources/config/application-datamanagement.yml: -------------------------------------------------------------------------------- 1 | datamate: 2 | datamanagement: 3 | file-storage: 4 | upload-dir: ${FILE_UPLOAD_DIR:./uploads} 5 | max-file-size: 10485760 # 10MB 6 | max-request-size: 52428800 # 50MB 7 | cache: 8 | ttl: 3600 9 | max-size: 1000 10 | # MyBatis is configured centrally in main-application (mapper-locations & aliases) 11 | # to avoid list overriding issues when importing multiple module configs. 12 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/ErrorCode.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.infrastructure.exception; 2 | 3 | /** 4 | * 错误码接口 5 | * 6 | * @author dallas 7 | * @since 2025-10-17 8 | */ 9 | public interface ErrorCode { 10 | /** 11 | * 获取错误码 12 | * 13 | * @return 错误码 14 | */ 15 | String getCode(); 16 | 17 | /** 18 | * 获取错误信息 19 | * 20 | * @return 错误信息 21 | */ 22 | String getMessage(); 23 | } 24 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend-python/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/ci/with-extraconfigmapmounts-values.yaml: -------------------------------------------------------------------------------- 1 | extraConfigmapMounts: 2 | - name: '{{ include "grafana.fullname" . }}' 3 | configMap: '{{ include "grafana.fullname" . }}' 4 | mountPath: /var/lib/grafana/dashboards/test-dashboard.json 5 | # This is not a realistic test, but for this we only care about extraConfigmapMounts not being empty and pointing to an existing ConfigMap 6 | subPath: grafana.ini 7 | readOnly: true 8 | -------------------------------------------------------------------------------- /deployment/helm/milvus/templates/proxy-tls-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if and (.Values.proxy.tls) (.Values.proxy.tls.enabled) }} 2 | 3 | {{- if and (.Values.proxy.tls.crt) (.Values.proxy.tls.key) }} 4 | --- 5 | apiVersion: v1 6 | kind: Secret 7 | metadata: 8 | name: {{ .Values.proxy.tls.secretName }} 9 | namespace: {{ .Release.Namespace }} 10 | data: 11 | tls.crt: {{ .Values.proxy.tls.crt }} 12 | tls.key: {{ .Values.proxy.tls.key }} 13 | type: kubernetes.io/tls 14 | {{- end }} 15 | 16 | {{- end -}} 17 | 18 | -------------------------------------------------------------------------------- /frontend/src/mock/nodemon.json: -------------------------------------------------------------------------------- 1 | { 2 | "restartable": "rs", 3 | "ignore": [ 4 | ".git", 5 | "node_modules/**/node_modules", 6 | "dist", 7 | "build", 8 | "*.test.js", 9 | "*.spec.js" 10 | ], 11 | "verbose": true, 12 | "watch": ["*.cjs"], 13 | "exec": "node --inspect=0.0.0.0:9229 mock.cjs", 14 | "ext": "js,cjs,json", 15 | "execMap": { 16 | "js": "node --harmony" 17 | }, 18 | "env": { 19 | "NODE_ENV": "development" 20 | }, 21 | "signal": "SIGTERM" 22 | } 23 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "backend.serviceAccountName" . }} 6 | labels: 7 | {{- include "backend.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/database/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "database.serviceAccountName" . }} 6 | labels: 7 | {{- include "database.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/frontend/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "frontend.serviceAccountName" . }} 6 | labels: 7 | {{- include "frontend.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/gateway/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "gateway.serviceAccountName" . }} 6 | labels: 7 | {{- include "gateway.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/runtime/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "runtime.serviceAccountName" . }} 6 | labels: 7 | {{- include "runtime.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/backend/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "backend.serviceAccountName" . }} 6 | labels: 7 | {{- include "backend.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/frontend/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "frontend.serviceAccountName" . }} 6 | labels: 7 | {{- include "frontend.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | # OWNERS file for Kubernetes 23 | OWNERS -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/prometheus-operator/_prometheus-operator.tpl: -------------------------------------------------------------------------------- 1 | {{/* Generate basic labels for prometheus-operator */}} 2 | {{- define "kube-prometheus-stack.prometheus-operator.labels" }} 3 | {{- include "kube-prometheus-stack.labels" . }} 4 | app: {{ template "kube-prometheus-stack.name" . }}-operator 5 | app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator 6 | app.kubernetes.io/component: prometheus-operator 7 | {{- end }} 8 | -------------------------------------------------------------------------------- /frontend/src/mock/mock-middleware/strong-match-middleware.cjs: -------------------------------------------------------------------------------- 1 | const API = require('../mock-apis.cjs'); 2 | 3 | const strongMatch = (req, res, next) => { 4 | res.strongMatch = () => { 5 | const { url } = req; 6 | const index = url.indexOf('?'); 7 | const targetUrl = index !== -1 ? url.substring(0, index) : url; 8 | const isExistedUrl = Object.values(API).includes(targetUrl); 9 | return isExistedUrl; 10 | }; 11 | next(); 12 | }; 13 | module.exports = strongMatch; -------------------------------------------------------------------------------- /runtime/ops/mapper/garble_characters_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档乱码去除' 2 | name_en: 'Garbled Character Removal' 3 | description: '去除文档中的乱码和无意义的unicode。' 4 | description_en: 'Removes garbled characters and meaningless Unicode characters from 5 | documents.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'GrableCharactersCleaner' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'text' 13 | effect: 14 | before: '文档乱码����' 15 | after: '文档乱码' 16 | inputs: 'text' 17 | outputs: 'text' 18 | -------------------------------------------------------------------------------- /runtime/ops/mapper/garble_characters_cleaner/resources/charset.json: -------------------------------------------------------------------------------- 1 | { 2 | "注音符号东亚": [ 3 | "\u3100,\u312F" 4 | ], 5 | "拉丁文补充1": [ 6 | "\u00C0,\u00D6", 7 | "\u00D8,\u00F6", 8 | "\u00F8,\u00FF" 9 | ], 10 | "拉丁文扩展,A": [ 11 | "\u0100,\u017F" 12 | ], 13 | "拉丁文扩展,B": [ 14 | "\u0180,\u024F" 15 | ], 16 | "私人使用区域": [ 17 | "\uE000,\uF8FF", 18 | "\\U000f0000,\\U000ffffd", 19 | "\\U00100000,\\U0010fffd" 20 | ], 21 | "占位符": [ 22 | "\uFFFD,\uFFFD" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /runtime/ops/mapper/remove_duplicate_sentences/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档局部内容去重' 2 | name_en: 'Partial Content Deduplication' 3 | description: '文档局部内容去重。' 4 | description_en: 'Deduplicates partial file content.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'DuplicateSentencesFilter' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这是一个重复的句子。 这是一个重复的句子。 这是一个重复的句子。 这是一个重复的句子。 这是一个重复的句子。' 14 | after: '这是一个重复的句子。' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/resources/config/application-datamanagement.yml: -------------------------------------------------------------------------------- 1 | dataMate: 2 | datamanagement: 3 | file-storage: 4 | upload-dir: ${FILE_UPLOAD_DIR:./uploads} 5 | max-file-size: 10485760 # 10MB 6 | max-request-size: 52428800 # 50MB 7 | cache: 8 | ttl: 3600 9 | max-size: 1000 10 | # MyBatis is configured centrally in main-application (mapper-locations & aliases) 11 | # to avoid list overriding issues when importing multiple module configs. 12 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/charts/common/templates/_tplvalues.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Renders a value that contains template. 4 | Usage: 5 | {{ include "common.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }} 6 | */}} 7 | {{- define "common.tplvalues.render" -}} 8 | {{- if typeIs "string" .value }} 9 | {{- tpl .value .context }} 10 | {{- else }} 11 | {{- tpl (.value | toYaml) .context }} 12 | {{- end }} 13 | {{- end -}} 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/common/templates/_tplvalues.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Renders a value that contains template. 4 | Usage: 5 | {{ include "common.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }} 6 | */}} 7 | {{- define "common.tplvalues.render" -}} 8 | {{- if typeIs "string" .value }} 9 | {{- tpl .value .context }} 10 | {{- else }} 11 | {{- tpl (.value | toYaml) .context }} 12 | {{- end }} 13 | {{- end -}} 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/charts/common/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/kubeconfig-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.kubeconfig.enabled -}} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ template "kube-state-metrics.fullname" . }}-kubeconfig 6 | namespace: {{ template "kube-state-metrics.namespace" . }} 7 | labels: 8 | {{- include "kube-state-metrics.labels" . | indent 4 }} 9 | type: Opaque 10 | data: 11 | config: '{{ .Values.kubeconfig.secret }}' 12 | {{- end -}} 13 | -------------------------------------------------------------------------------- /runtime/ops/mapper/emoji_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档表情去除' 2 | name_en: 'Emoticon Removal' 3 | description: '去除文档中表情字符或者emoji符号。' 4 | description_en: 'Removes emoticons or emojis from documents.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'EmojiCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '使用方式很简单,只需要将代码放入Markdown文本中即可,富文本格式可直接复制表情😀使用。' 14 | after: '使用方式很简单,只需要将代码放入Markdown文本中即可,富文本格式可直接复制表情使用。' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/client/dto/LocalCollectionConfig.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.infrastructure.client.dto; 2 | 3 | import lombok.Data; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * 本地归集任务配置 9 | */ 10 | @Data 11 | public class LocalCollectionConfig { 12 | /** 13 | * 归集类型 14 | */ 15 | private String type; 16 | 17 | /** 18 | * 文件路径列表 19 | */ 20 | private List filePaths; 21 | } 22 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRepository.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.domain.repository; 2 | 3 | import com.baomidou.mybatisplus.extension.repository.IRepository; 4 | import com.datamate.operator.domain.model.Category; 5 | import com.datamate.operator.interfaces.dto.CategoryDto; 6 | 7 | import java.util.List; 8 | 9 | public interface CategoryRepository extends IRepository { 10 | List findAllCategories(); 11 | } 12 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import com.datamate.common.interfaces.PagingQuery; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | /** 8 | * RAG 文件请求 9 | * 10 | * @author dallas 11 | * @since 2025-10-29 12 | */ 13 | @Setter 14 | @Getter 15 | public class RagFileReq extends PagingQuery { 16 | private String fileName; 17 | private String knowledgeBaseId; 18 | } 19 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "backend.fullname" . }} 5 | labels: 6 | {{- include "backend.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "backend.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/database/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "database.fullname" . }} 5 | labels: 6 | {{- include "database.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "database.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/gateway/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "gateway.fullname" . }} 5 | labels: 6 | {{- include "gateway.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "gateway.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/runtime/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "runtime.fullname" . }} 5 | labels: 6 | {{- include "runtime.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "runtime.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/backend/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "backend.fullname" . }} 5 | labels: 6 | {{- include "backend.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "backend.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_sharpness/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片锐度增强' 2 | name_en: 'Image Sharpness Enhancement' 3 | description: '自适应调节图片的锐度,主要适用于自然场景图片。' 4 | description_en: 'Adapts and adjusts the image sharpness, which is mainly applicable 5 | to natural scenery image scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgSharpness' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/role.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.rbacEnable (not .Values.singleNamespaceInstall) }} 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: {{ include "kuberay-operator.clusterRole.name" . }} 6 | labels: 7 | {{- include "kuberay-operator.labels" . | nindent 4 }} 8 | {{ include "role.consistentRules" (dict "batchSchedulerEnabled" .Values.batchScheduler.enabled "batchSchedulerName" .Values.batchScheduler.name) }} 9 | {{- end }} 10 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/frontend/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "frontend.fullname" . }} 5 | labels: 6 | {{- include "frontend.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "frontend.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/kube-state-metrics/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.domain.model; 2 | 3 | /** 4 | * 文件状态枚举 5 | * 6 | * @author dallas 7 | * @since 2025-10-29 8 | */ 9 | public enum FileStatus { 10 | /** 11 | * 未处理 12 | */ 13 | UNPROCESSED, 14 | /** 15 | * 处理中 16 | */ 17 | PROCESSING, 18 | /** 19 | * 已处理 20 | */ 21 | PROCESSED, 22 | /** 23 | * 处理失败 24 | */ 25 | PROCESS_FAILED 26 | } 27 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend-python/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "backend-python.serviceAccountName" . }} 6 | labels: 7 | {{- include "backend-python.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/charts/common/templates/_tplvalues.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Renders a value that contains template. 4 | Usage: 5 | {{ include "common.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }} 6 | */}} 7 | {{- define "common.tplvalues.render" -}} 8 | {{- if typeIs "string" .value }} 9 | {{- tpl .value .context }} 10 | {{- else }} 11 | {{- tpl (.value | toYaml) .context }} 12 | {{- end }} 13 | {{- end -}} 14 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-node-exporter/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | # If customConfigMap is not set, this ConfigMap will be redendered. 2 | {{- if not .Values.customConfigMap }} 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: {{ template "milvus.fullname" . }} 7 | namespace: {{ .Release.Namespace }} 8 | data: 9 | default.yaml: |+ 10 | {{- include "milvus.config" . | nindent 4 }} 11 | 12 | {{- range $key, $value := .Values.extraConfigFiles }} 13 | {{ $key }}: |- 14 | {{ $value | indent 4 }} 15 | {{- end }} 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_enhanced_saturation/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片饱和度增强' 2 | name_en: 'Image Saturation Enhancement' 3 | description: '自适应调节图片的饱和度,主要适用于自然场景图片。' 4 | description_en: 'Adapts and adjusts the saturation of images, which is mainly applicable 5 | to natural scenery image scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgSaturation' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /runtime/python-executor/datamate/wrappers/datamate_wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | from datamate.scheduler import cmd_scheduler 5 | 6 | 7 | async def submit(task_id, config_path): 8 | current_dir = os.path.dirname(__file__) 9 | 10 | await cmd_scheduler.submit(task_id, f"python {os.path.join(current_dir, 'datamate_executor.py')} " 11 | f"--config_path={config_path}") 12 | 13 | 14 | def cancel(task_id): 15 | return cmd_scheduler.cancel_task(task_id) 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-windows-exporter/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_direction_correct/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片方向校正' 2 | name_en: 'Image Orientation Correction' 3 | description: '将含有文字的图片校正到文字水平方向,主要适用于文档场景。' 4 | description_en: 'Corrects images to ensure text is presented horizontally, which is 5 | mainly applicable to document scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgDirectionCorrect' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RetrieveReq.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 检索请求 10 | * 11 | * @author dallas 12 | * @since 2025-11-20 13 | */ 14 | @Getter 15 | @Setter 16 | public class RetrieveReq { 17 | private String query; 18 | private int topK; 19 | private Float threshold; 20 | private List knowledgeBaseIds; 21 | } 22 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/backend-python/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "backend-python.fullname" . }} 5 | labels: 6 | {{- include "backend-python.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | selector: 15 | {{- include "backend.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .vscode 20 | .project 21 | .idea/ 22 | *.tmproj 23 | OWNERS 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/secret-env.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.envRenderSecret }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "grafana.fullname" . }}-env 6 | namespace: {{ include "grafana.namespace" . }} 7 | labels: 8 | {{- include "grafana.labels" . | nindent 4 }} 9 | type: Opaque 10 | data: 11 | {{- range $key, $val := .Values.envRenderSecret }} 12 | {{ $key }}: {{ tpl ($val | toString) $ | b64enc | quote }} 13 | {{- end }} 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: 1.6.0 3 | description: A text-embeddings-inference Helm chart for Kubernetes 4 | home: https://github.com/huggingface/text-embeddings-inference 5 | icon: https://avatars.githubusercontent.com/u/25720743?s=200&v=4 6 | keywords: 7 | - embeddings 8 | - inference 9 | maintainers: 10 | - email: devops@zilliz.com 11 | name: zilliz 12 | name: tei 13 | sources: 14 | - https://github.com/huggingface/text-embeddings-inference 15 | type: application 16 | version: 1.6.0 17 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | /** 7 | * 更新标签请求DTO 8 | */ 9 | @Getter 10 | @Setter 11 | public class UpdateTagRequest { 12 | /** 标签 ID */ 13 | private String id; 14 | /** 标签名称 */ 15 | private String name; 16 | /** 标签颜色 */ 17 | private String color; 18 | /** 标签描述 */ 19 | private String description; 20 | } 21 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/CommonErrorCode.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.infrastructure.exception; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Getter; 5 | 6 | /** 7 | * CommonErrorCode 8 | * 9 | * @since 2025/12/5 10 | */ 11 | @Getter 12 | @AllArgsConstructor 13 | public enum CommonErrorCode implements ErrorCode{ 14 | PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在"); 15 | private final String code; 16 | private final String message; 17 | } 18 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/ErrorCodeImpl.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.infrastructure.exception; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Getter; 5 | 6 | @Getter 7 | @AllArgsConstructor 8 | public class ErrorCodeImpl implements ErrorCode { 9 | private final String code; 10 | private final String message; 11 | 12 | public static ErrorCodeImpl of(String code, String message) { 13 | return new ErrorCodeImpl(code, message); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /runtime/ops/mapper/img_perspective_transformation/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '图片透视变换' 2 | name_en: 'Image Perspective Transformation' 3 | description: '自适应校正图片的视角,主要适用于文档校正场景。' 4 | description_en: 'Adapts and corrects image perspectives, which is mainly applicable 5 | to document correction scenarios.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ImgPerspectiveTransformation' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'image' 13 | effect: 14 | before: '' 15 | after: '' 16 | inputs: 'image' 17 | outputs: 'image' 18 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/setting/infrastructure/persistence/mapper/SysParamMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.setting.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.common.setting.domain.entity.SysParam; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | /** 8 | * 系统参数映射器 9 | * 10 | * @author dallas 11 | * @since 2025-11-04 12 | */ 13 | @Mapper 14 | public interface SysParamMapper extends BaseMapper { 15 | } 16 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.infrastructure.persistence.mapper; 2 | 3 | 4 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 5 | import com.datamate.rag.indexer.domain.model.RagFile; 6 | import org.apache.ibatis.annotations.Mapper; 7 | 8 | /** 9 | * RAG文件映射器接口 10 | * 11 | * @author dallas 12 | * @since 2025-10-24 13 | */ 14 | @Mapper 15 | public interface RagFileMapper extends BaseMapper { 16 | } 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.createConfigmap }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ include "grafana.fullname" . }} 6 | namespace: {{ include "grafana.namespace" . }} 7 | labels: 8 | {{- include "grafana.labels" . | nindent 4 }} 9 | {{- with .Values.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | data: 14 | {{- include "grafana.configData" . | nindent 2 }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /frontend/src/hooks/useSearchParams.tsx: -------------------------------------------------------------------------------- 1 | import { useMemo } from "react"; 2 | import { useLocation } from "react-router"; 3 | 4 | interface AnyObject { 5 | [key: string]: any; 6 | } 7 | 8 | export function useSearchParams(): AnyObject { 9 | const { search } = useLocation(); 10 | return useMemo(() => { 11 | const urlParams = new URLSearchParams(search); 12 | const params: AnyObject = {}; 13 | for (const [key, value] of urlParams.entries()) { 14 | params[key] = value; 15 | } 16 | return params; 17 | }, [search]); 18 | } 19 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AllDatasetStatisticsResponse.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | /** 7 | * 所有数据集统计信息响应DTO 8 | */ 9 | @Getter 10 | @Setter 11 | public class AllDatasetStatisticsResponse { 12 | /** 总数据集数 */ 13 | private Integer totalDatasets = 0; 14 | 15 | /** 总文件数 */ 16 | private Long totalSize = 0L; 17 | 18 | /** 总大小(字节) */ 19 | private Long totalFiles = 0L; 20 | } 21 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/CategoryRelation.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.domain.model; 2 | 3 | import com.baomidou.mybatisplus.annotation.TableName; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Getter; 6 | import lombok.Setter; 7 | 8 | @Setter 9 | @Getter 10 | @AllArgsConstructor 11 | @TableName(value = "t_operator_category_relation", autoResultMap = true) 12 | public class CategoryRelation { 13 | private String categoryId; 14 | 15 | private String operatorId; 16 | } 17 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/setting/infrastructure/persistence/mapper/ModelConfigMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.setting.infrastructure.persistence.mapper; 2 | 3 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 4 | import com.datamate.common.setting.domain.entity.ModelConfig; 5 | import org.apache.ibatis.annotations.Mapper; 6 | 7 | /** 8 | * 模型配置映射器接口 9 | * 10 | * @author dallas 11 | * @since 2025-10-27 12 | */ 13 | @Mapper 14 | public interface ModelConfigMapper extends BaseMapper { 15 | } -------------------------------------------------------------------------------- /deployment/helm/milvus/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create }} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "milvus.serviceAccount" . }} 6 | namespace: {{ .Release.Namespace }} 7 | {{- if .Values.serviceAccount.annotations }} 8 | annotations: 9 | {{ toYaml .Values.serviceAccount.annotations | nindent 4 }} 10 | {{- end }} 11 | labels: 12 | {{ include "milvus.labels" . | indent 4 }} 13 | {{- with .Values.serviceAccount.labels }} 14 | {{- toYaml . | nindent 4 }} 15 | {{- end }} 16 | {{- end }} -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from .config import router as about_router 4 | from .project import router as project_router 5 | from .task import router as task_router 6 | from .template import router as template_router 7 | 8 | router = APIRouter( 9 | prefix="/annotation", 10 | tags = ["annotation"] 11 | ) 12 | 13 | router.include_router(about_router) 14 | router.include_router(project_router) 15 | router.include_router(task_router) 16 | router.include_router(template_router) -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import jakarta.validation.constraints.NotEmpty; 4 | import jakarta.validation.constraints.NotNull; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 添加文件请求DTO(仅创建DB记录,不执行文件系统操作) 10 | * 11 | * @author datamate 12 | * @since 2025-11-29 13 | */ 14 | public record AddFilesRequest( 15 | @NotEmpty List sourcePaths, 16 | @NotNull Boolean softAdd 17 | ) { 18 | } 19 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/domain/model/FileUploadResult.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.domain.model; 2 | 3 | import lombok.Builder; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.io.File; 8 | 9 | @Getter 10 | @Setter 11 | @Builder 12 | public class FileUploadResult { 13 | /** 切片是否已经全部上传 */ 14 | boolean isAllFilesUploaded; 15 | 16 | /** 业务上传信息 */ 17 | String checkInfo; 18 | 19 | /** 保存的文件 */ 20 | File savedFile; 21 | 22 | /** 真实文件名 */ 23 | String fileName; 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/minio/templates/post-install-prometheus-metrics-serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if or .Values.metrics.serviceMonitor.enabled .Values.metrics.podMonitor.enabled }} 2 | {{- $fullName := include "minio.fullname" . -}} 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: {{ $fullName }}-update-prometheus-secret 7 | labels: 8 | app: {{ template "minio.name" . }}-update-prometheus-secret 9 | chart: {{ template "minio.chart" . }} 10 | release: {{ .Release.Name }} 11 | heritage: {{ .Release.Service }} 12 | {{- end -}} -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/ci/with-image-renderer-values.yaml: -------------------------------------------------------------------------------- 1 | podLabels: 2 | customLableA: Aaaaa 3 | imageRenderer: 4 | enabled: true 5 | env: 6 | RENDERING_ARGS: --disable-gpu,--window-size=1280x758 7 | RENDERING_MODE: clustered 8 | podLabels: 9 | customLableB: Bbbbb 10 | networkPolicy: 11 | limitIngress: true 12 | limitEgress: true 13 | resources: 14 | limits: 15 | cpu: 1000m 16 | memory: 1000Mi 17 | requests: 18 | cpu: 500m 19 | memory: 50Mi 20 | -------------------------------------------------------------------------------- /frontend/src/mock/mock-middleware/send-json-middleawre.cjs: -------------------------------------------------------------------------------- 1 | const sendJSON = (req, res, next) => { 2 | res.sendJSON = ( 3 | data = null, 4 | { code = '0', msg = 'success', statusCode = 200, timeout = 0 } = {} 5 | ) => { 6 | const timer = setTimeout(() => { 7 | res.status(statusCode).json({ 8 | code, 9 | msg, 10 | data, 11 | }); 12 | clearTimeout(timer); 13 | }, timeout); 14 | }; 15 | next(); 16 | }; 17 | 18 | module.exports = sendJSON; -------------------------------------------------------------------------------- /runtime/ops/filter/img_duplicated_images_cleaner/sql/sql_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_sql": "SELECT * FROM operator_duplicate_img_features WHERE task_uuid = :task_uuid AND file_feature = :file_feature", 3 | "insert_sql": "INSERT INTO operator_duplicate_img_features (task_uuid, file_feature, file_name, timestamp) VALUES (:task_uuid, :file_feature, :file_name, :timestamp)", 4 | "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_duplicate_img_features (id INT AUTO_INCREMENT PRIMARY KEY,task_uuid VARCHAR(255),file_feature TEXT,file_name TEXT,timestamp DATETIME);" 5 | } -------------------------------------------------------------------------------- /runtime/ops/llms/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | since: 5 | """ 6 | 7 | import sys 8 | from pathlib import Path 9 | from datamate.common.utils.custom_importer import CustomImporter 10 | 11 | 12 | def _configure_importer(): 13 | base_path = Path(__file__).resolve().parent 14 | sys.meta_path.append(CustomImporter(base_path)) 15 | 16 | 17 | _configure_importer() 18 | 19 | 20 | def _import_operators(): 21 | from . import qa_condition_evaluator 22 | from . import text_quality_evaluation 23 | 24 | 25 | _import_operators() 26 | -------------------------------------------------------------------------------- /runtime/ops/mapper/traditional_chinese/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '繁体转简体' 2 | name_en: 'Traditional-Simplified Chinese Conversion' 3 | description: '将繁体转换为简体。' 4 | description_en: 'Converts traditional Chinese characters to simplified Chinese characters.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'TraditionalChineseCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '華為的業務涵蓋了從通信設備到智能手機、企業網絡解決方案以及雲計算等多個領域。' 14 | after: '华为的业务涵盖了从通信设备到智能手机、企业网络解决方案以及云计算等多个领域。' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /runtime/ops/slicer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | from pathlib import Path 5 | from datamate.common.utils.custom_importer import CustomImporter 6 | 7 | 8 | def _configure_importer(): 9 | base_path = Path(__file__).resolve().parent 10 | sys.meta_path.append(CustomImporter(base_path)) 11 | 12 | 13 | _configure_importer() 14 | 15 | 16 | def _import_operators(): 17 | from . import slide_simple_slicer 18 | from . import slide_annotation_slicer 19 | from . import segmentation 20 | 21 | 22 | _import_operators() 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/ci/values-with-auth-tls-and-metrics.yaml: -------------------------------------------------------------------------------- 1 | # Test values file for generating all of the yaml and check that 2 | # the rendering is correct 3 | replicaCount: 3 4 | auth: 5 | enabled: true 6 | clientUser: foo 7 | clientPassword: baz 8 | serverUsers: foo,bar 9 | serverPasswords: baz,qux 10 | metrics: 11 | enabled: true 12 | serviceMonitor: 13 | enabled: true 14 | tls: 15 | client: 16 | enabled: true 17 | autoGenerated: true 18 | quorum: 19 | enabled: true 20 | autoGenerated: true 21 | -------------------------------------------------------------------------------- /runtime/ops/formatter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | from pathlib import Path 5 | from datamate.core.base_op import OPERATORS 6 | from datamate.common.utils.custom_importer import CustomImporter 7 | 8 | 9 | def _configure_importer(): 10 | base_path = Path(__file__).resolve().parent 11 | sys.meta_path.append(CustomImporter(base_path)) 12 | 13 | 14 | _configure_importer() 15 | 16 | 17 | def _import_operators(): 18 | from . import slide_formatter 19 | from . import mineru_formatter 20 | 21 | 22 | _import_operators() 23 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.infrastructure.persistence.mapper; 2 | 3 | 4 | import com.baomidou.mybatisplus.core.mapper.BaseMapper; 5 | import com.datamate.rag.indexer.domain.model.KnowledgeBase; 6 | import org.apache.ibatis.annotations.Mapper; 7 | 8 | /** 9 | * 知识库映射器接口 10 | * 11 | * @author dallas 12 | * @since 2025-10-24 13 | */ 14 | @Mapper 15 | public interface KnowledgeBaseMapper extends BaseMapper { 16 | } 17 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryDto.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.time.LocalDateTime; 7 | 8 | @Setter 9 | @Getter 10 | public class CategoryDto { 11 | private String id; 12 | 13 | private String name; 14 | 15 | private String value; 16 | 17 | private long count; 18 | 19 | private String type; 20 | 21 | private String parentId; 22 | 23 | private LocalDateTime createdAt; 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/tei/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "tei.fullname" . }} 5 | labels: 6 | {{- include "tei.labels" . | nindent 4 }} 7 | {{- with .Values.service.annotations }} 8 | annotations: 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | spec: 12 | type: {{ .Values.service.type }} 13 | ports: 14 | - port: {{ .Values.service.port }} 15 | targetPort: http 16 | protocol: TCP 17 | name: http 18 | selector: 19 | {{- include "tei.selectorLabels" . | nindent 4 }} 20 | -------------------------------------------------------------------------------- /runtime/ops/mapper/html_tag_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: 'HTML标签去除' 2 | name_en: 'HTML Tag Removal' 3 | description: '移除文档中HTML标签,如 、

等。' 4 | description_en: 'Removes HTML tags from documents, such as , , and

.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'HtmlTagCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '

机器学习人工智能的一个分支。

' 14 | after: '机器学习是人工智能的一个分支。' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/OperatorInstance.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.domain.model.entity; 2 | 3 | import com.baomidou.mybatisplus.annotation.TableName; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | @Getter 8 | @Setter 9 | @TableName(value = "t_operator_instance", autoResultMap = true) 10 | public class OperatorInstance { 11 | private String instanceId; 12 | 13 | private String operatorId; 14 | 15 | private int opIndex; 16 | 17 | private String settingsOverride; 18 | } 19 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/tests/test-serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.testFramework.enabled .Values.serviceAccount.create }} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | labels: 6 | {{- include "grafana.labels" . | nindent 4 }} 7 | name: {{ include "grafana.serviceAccountNameTest" . }} 8 | namespace: {{ include "grafana.namespace" . }} 9 | annotations: 10 | "helm.sh/hook": test-success 11 | "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" 12 | {{- end }} 13 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/TagResponse.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | /** 7 | * 标签响应DTO 8 | */ 9 | @Getter 10 | @Setter 11 | public class TagResponse { 12 | /** 标签ID */ 13 | private String id; 14 | /** 标签名称 */ 15 | private String name; 16 | /** 标签颜色 */ 17 | private String color; 18 | /** 标签描述 */ 19 | private String description; 20 | /** 使用次数 */ 21 | private Integer usageCount; 22 | } 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-windows-exporter/templates/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ include "prometheus-windows-exporter.fullname" . }} 5 | namespace: {{ include "prometheus-windows-exporter.namespace" . }} 6 | labels: 7 | {{- include "prometheus-windows-exporter.labels" $ | nindent 4 }} 8 | {{- with .Values.service.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | data: 13 | config.yml: | 14 | {{- .Values.config | nindent 4 }} 15 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTemplateRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.interfaces.dto; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import lombok.Getter; 7 | import lombok.Setter; 8 | 9 | /** 10 | * CreateCleaningTemplateRequest 11 | */ 12 | 13 | @Getter 14 | @Setter 15 | public class CreateCleaningTemplateRequest { 16 | 17 | private String name; 18 | 19 | private String description; 20 | 21 | private List instance = new ArrayList<>(); 22 | } 23 | 24 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import jakarta.validation.constraints.Min; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | /** 8 | * 切片上传预上传请求 9 | */ 10 | @Getter 11 | @Setter 12 | public class UploadFilesPreRequest { 13 | /** 是否为压缩包上传 */ 14 | private boolean hasArchive; 15 | 16 | /** 总文件数量 */ 17 | @Min(1) 18 | private int totalFileNum; 19 | 20 | /** 总文件大小 */ 21 | private long totalSize; 22 | } 23 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-windows-exporter/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | appVersion: 0.25.1 3 | description: A Helm chart for prometheus windows-exporter 4 | home: https://github.com/prometheus-community/windows_exporter/ 5 | keywords: 6 | - windows-exporter 7 | - windows 8 | - prometheus 9 | - exporter 10 | maintainers: 11 | - email: github@jkroepke.de 12 | name: jkroepke 13 | name: prometheus-windows-exporter 14 | sources: 15 | - https://github.com/prometheus-community/windows_exporter/ 16 | type: application 17 | version: 0.3.1 18 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | # helm/charts 23 | OWNERS 24 | hack/ 25 | ci/ 26 | kube-prometheus-*.tgz 27 | 28 | unittests/ 29 | -------------------------------------------------------------------------------- /frontend/src/hooks/useStyle.ts: -------------------------------------------------------------------------------- 1 | import { createStyles } from "antd-style"; 2 | 3 | const useStyle = createStyles(({ css, token }) => { 4 | const { antCls } = token; 5 | return { 6 | customTable: css` 7 | ${antCls}-table { 8 | ${antCls}-table-container { 9 | ${antCls}-table-body, ${antCls}-table-content { 10 | scrollbar-width: thin; 11 | scrollbar-color: ${token.colorBorder} transparent; 12 | scrollbar-gutter: stable; 13 | } 14 | } 15 | } 16 | `, 17 | }; 18 | }); 19 | 20 | export default useStyle; 21 | -------------------------------------------------------------------------------- /runtime/ops/mapper/invisible_characters_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '不可见字符去除' 2 | name_en: 'Invisible Character Removal' 3 | description: '去除文档中的不可见字符,例如 0-31 号字符中的部分字符。' 4 | description_en: 'Removes invisible characters from documents, for example, removing invisible characters from characters numbered 0 to 31.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'InvisibleCharactersCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: "对“材料”怎样下\x04定义才臻于 严格和科学?" 14 | after: '对“材料”怎样下定义才臻于严格和科学?' 15 | inputs: 'text' 16 | outputs: 'text' 17 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreeResponse.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.NoArgsConstructor; 5 | import lombok.Setter; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | 11 | @Getter 12 | @Setter 13 | @NoArgsConstructor 14 | public class CategoryTreeResponse { 15 | private String id; 16 | 17 | private String name; 18 | 19 | private Integer count; 20 | 21 | private List categories = new ArrayList<>(); 22 | } 23 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/TaskProcess.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.domain.model; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | 10 | @Getter 11 | @Setter 12 | public class TaskProcess { 13 | private String instanceId; 14 | 15 | private String datasetId; 16 | 17 | private String datasetPath; 18 | 19 | private String exportPath; 20 | 21 | private String executorType; 22 | 23 | private List>> process; 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/ci/with-dashboard-values.yaml: -------------------------------------------------------------------------------- 1 | dashboards: 2 | my-provider: 3 | my-awesome-dashboard: 4 | gnetId: 10000 5 | revision: 1 6 | datasource: Prometheus 7 | dashboardProviders: 8 | dashboardproviders.yaml: 9 | apiVersion: 1 10 | providers: 11 | - name: 'my-provider' 12 | orgId: 1 13 | folder: '' 14 | type: file 15 | updateIntervalSeconds: 10 16 | disableDeletion: true 17 | editable: true 18 | options: 19 | path: /var/lib/grafana/dashboards/my-provider 20 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_similar_images_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '相似图片去除' 2 | name_en: 'Similar Image Removal' 3 | description: '去除相似的图片。' 4 | description_en: 'Removes similar images.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgSimilarImagesCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | settings: 18 | similarThreshold: 19 | name: 相似度 20 | description: 相似度取值越大,图片相似度越高。 21 | type: slider 22 | defaultVal: 0.8 23 | min: 0 24 | max: 1 25 | step: 0.01 -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFileUploadCheckInfo.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.domain.model.dataset; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Getter; 5 | import lombok.NoArgsConstructor; 6 | import lombok.Setter; 7 | 8 | /** 9 | * 数据集文件上传检查信息 10 | */ 11 | @Getter 12 | @Setter 13 | @NoArgsConstructor 14 | @AllArgsConstructor 15 | public class DatasetFileUploadCheckInfo { 16 | /** 数据集id */ 17 | private String datasetId; 18 | 19 | /** 是否为压缩包上传 */ 20 | private boolean hasArchive; 21 | } 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: 2.6.5 3 | description: Milvus is an open-source vector database built to power AI applications 4 | and vector similarity search. 5 | home: https://milvus.io/ 6 | icon: https://raw.githubusercontent.com/milvus-io/docs/master/v1.0.0/assets/milvus_logo.png 7 | keywords: 8 | - milvus 9 | - elastic 10 | - vector 11 | - search 12 | - deploy 13 | kubeVersion: ^1.10.0-0 14 | maintainers: 15 | - email: contact@milvus.io 16 | name: contact 17 | url: milvus.io 18 | name: milvus 19 | sources: 20 | - https://github.com/zilliztech/milvus 21 | version: 5.0.7 22 | -------------------------------------------------------------------------------- /runtime/ops/filter/img_blurred_images_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '模糊图片过滤' 2 | name_en: 'Fuzzy Image Filter' 3 | description: '去除模糊的图片。' 4 | description_en: 'Filters out fuzzy images.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'ImgBlurredImagesCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'image' 12 | effect: 13 | before: '' 14 | after: '' 15 | inputs: 'image' 16 | outputs: 'image' 17 | settings: 18 | blurredThreshold: 19 | name: 梯度函数值 20 | description: 梯度函数值取值越小,图片模糊度越高。 21 | type: slider 22 | defaultVal: 1000 23 | min: 1 24 | max: 10000 25 | step: 1 -------------------------------------------------------------------------------- /scripts/db/data-common-init.sql: -------------------------------------------------------------------------------- 1 | -- 数据归集服务数据库初始化脚本 2 | -- 适用于datamate数据库 3 | 4 | USE datamate; 5 | 6 | CREATE TABLE IF NOT EXISTS `t_chunk_upload_request` 7 | ( 8 | `id` VARCHAR(36) PRIMARY KEY COMMENT 'UUID', 9 | `total_file_num` INT COMMENT '总文件数', 10 | `uploaded_file_num` INT COMMENT '已上传文件数', 11 | `upload_path` VARCHAR(256) COMMENT '文件路径', 12 | `timeout` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '上传请求超时时间', 13 | `service_id` VARCHAR(64) COMMENT '上传请求所属服务:DATA-MANAGEMENT(数据管理);', 14 | `check_info` TEXT COMMENT '业务信息' 15 | ) COMMENT ='文件切片上传请求表'; 16 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryConverter.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.infrastructure.converter; 2 | 3 | import com.datamate.operator.domain.model.Category; 4 | import com.datamate.operator.interfaces.dto.CategoryDto; 5 | import org.mapstruct.Mapper; 6 | import org.mapstruct.factory.Mappers; 7 | 8 | import java.util.List; 9 | 10 | @Mapper 11 | public interface CategoryConverter { 12 | CategoryConverter INSTANCE = Mappers.getMapper(CategoryConverter.class); 13 | 14 | List fromEntityToDto (List dto); 15 | } 16 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | 3 | name: kuberay-operator 4 | 5 | description: A Helm chart for deploying the Kuberay operator on Kubernetes. 6 | 7 | version: 1.4.2 8 | 9 | type: application 10 | 11 | keywords: 12 | - ray 13 | - ray operator 14 | - distributed computing 15 | - data processing 16 | - machine learning 17 | - deep learning 18 | - hyperparameter tuning 19 | - reinforcement learning 20 | - model serving 21 | 22 | home: https://github.com/ray-project/kuberay 23 | 24 | icon: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png 25 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "kuberay-operator.service.name" . }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "kuberay-operator.labels" . | nindent 4 }} 8 | spec: 9 | type: {{ .Values.service.type }} 10 | ports: 11 | - port: {{ .Values.service.port }} 12 | targetPort: http 13 | protocol: TCP 14 | name: http 15 | selector: 16 | app.kubernetes.io/name: {{ include "kuberay-operator.name" . }} 17 | app.kubernetes.io/instance: {{ .Release.Name }} 18 | -------------------------------------------------------------------------------- /deployment/helm/deer-flow/charts/public/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: deer-flow-env 5 | stringData: 6 | {{- range .Files.Lines ".env" }} 7 | {{- $line := trim . }} 8 | {{- if and $line (not (hasPrefix "#" $line)) }} 9 | {{- $pair := regexSplit "=" $line 2 }} 10 | {{- if eq (len $pair) 2 }} 11 | {{ index $pair 0 | trim }}: {{ index $pair 1 | trim | quote }} 12 | {{- end }} 13 | {{- end }} 14 | {{- end }} 15 | 16 | --- 17 | apiVersion: v1 18 | kind: Secret 19 | metadata: 20 | name: deer-flow-conf 21 | stringData: 22 | {{ (.Files.Glob "conf.yaml").AsConfig | indent 2 }} -------------------------------------------------------------------------------- /runtime/ops/mapper/extra_space_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '多余空格去除' 2 | name_en: 'Redundant Space Removal' 3 | description: '移除文档首尾、句中或标点符号附近多余空格和 tab 等。' 4 | description_en: 'Removes redundant spaces and tabs at the beginning and end of documents, 5 | in sentences, or near punctuations.' 6 | language: 'python' 7 | vendor: 'huawei' 8 | raw_id: 'ExtraSpaceCleaner' 9 | version: '1.0.0' 10 | types: 11 | - 'cleanse' 12 | modal: 'text' 13 | effect: 14 | before: ' 人工智能的研究历史有着一条从以“推理”为重 点,到以“知识”为重点,再到以“学习”为重点的自然、清晰的脉络。 ' 15 | after: '人工智能的研究历史有着一条从以“推理”为重点,到以“知识”为重点,再到以“学习”为重点的自然、清晰的脉络。' 16 | inputs: 'text' 17 | outputs: 'text' 18 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/UpdateCleaningTemplateRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.interfaces.dto; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | 7 | import lombok.Getter; 8 | import lombok.Setter; 9 | 10 | /** 11 | * UpdateCleaningTemplateRequest 12 | */ 13 | 14 | @Getter 15 | @Setter 16 | public class UpdateCleaningTemplateRequest { 17 | 18 | private String id; 19 | 20 | private String name; 21 | 22 | private String description; 23 | 24 | private List instance = new ArrayList<>(); 25 | } 26 | 27 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/exception/CleanErrorCode.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.common.exception; 2 | 3 | import com.datamate.common.infrastructure.exception.ErrorCode; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Getter; 6 | 7 | @Getter 8 | @AllArgsConstructor 9 | public enum CleanErrorCode implements ErrorCode { 10 | /** 11 | * 清洗任务名称重复 12 | */ 13 | DUPLICATE_TASK_NAME("clean.0001", "清洗任务名称重复"), 14 | 15 | IN_AND_OUT_NOT_MATCH("clean.0002", "算子输入输出不匹配"); 16 | 17 | private final String code; 18 | private final String message; 19 | } 20 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/core/exception.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class BusinessErrorCode: 4 | def __init__(self, message: str, error_code: str): 5 | self.message = message 6 | self.error_code = error_code 7 | 8 | 9 | class BusinessException(RuntimeError): 10 | def __init__(self, business_error_code: BusinessErrorCode): 11 | self.message = business_error_code.message 12 | self.error_code = business_error_code.error_code 13 | super().__init__(self.message) 14 | 15 | 16 | class BusinessErrorCodeEnum(Enum): 17 | TASK_TYPE_ERROR = BusinessErrorCode("任务类型错误", "evaluation.0001") 18 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/common/enums/TaskStatus.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.common.enums; 2 | 3 | /** 4 | * 统一的任务和执行状态枚举 5 | * 任务和执行状态枚举: - DRAFT: 草稿状态 - READY: 就绪状态 - RUNNING: 运行中 - SUCCESS: 执行成功 (对应原来的COMPLETED/SUCCESS) - FAILED: 执行失败 - STOPPED: 已停止 6 | * 7 | * @author Data Mate Platform Team 8 | */ 9 | public enum TaskStatus { 10 | /** 草稿状态 */ 11 | DRAFT, 12 | /** 就绪状态 */ 13 | READY, 14 | /** 运行中 */ 15 | RUNNING, 16 | /** 执行成功(对应原来的COMPLETED) */ 17 | SUCCESS, 18 | /** 执行失败 */ 19 | FAILED, 20 | /** 已停止 */ 21 | STOPPED 22 | } 23 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/frontend/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "frontend.fullname" . }} 5 | labels: 6 | {{- include "frontend.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: {{ .Values.service.port }} 12 | protocol: TCP 13 | name: {{ .Chart.Name }} 14 | {{- if eq .Values.service.type "NodePort" }} 15 | nodePort: {{ .Values.service.nodePort }} 16 | {{- end }} 17 | selector: 18 | {{- include "frontend.selectorLabels" . | nindent 4 }} 19 | -------------------------------------------------------------------------------- /backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetTypeResponse.java: -------------------------------------------------------------------------------- 1 | package com.datamate.datamanagement.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 数据集类型响应DTO 10 | */ 11 | @Getter 12 | @Setter 13 | public class DatasetTypeResponse { 14 | /** 类型编码 */ 15 | private String code; 16 | /** 类型名称 */ 17 | private String name; 18 | /** 类型描述 */ 19 | private String description; 20 | /** 支持的文件格式 */ 21 | private List supportedFormats; 22 | /** 图标 */ 23 | private String icon; 24 | } 25 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Category.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.domain.model; 2 | 3 | import com.baomidou.mybatisplus.annotation.TableName; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.time.LocalDateTime; 8 | 9 | @Setter 10 | @Getter 11 | @TableName(value = "t_operator_category", autoResultMap = true) 12 | public class Category { 13 | private String id; 14 | 15 | private String name; 16 | 17 | private String value; 18 | 19 | private String type; 20 | 21 | private String parentId; 22 | 23 | private LocalDateTime createdAt; 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/ray_rayjob_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | {{- /* ClusterRole for end users to view RayJob. */ -}} 2 | {{- if and .Values.rbacEnable (not .Values.singleNamespaceInstall) }} 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: rayjob-viewer-role 7 | labels: 8 | {{- include "kuberay-operator.labels" . | nindent 4 }} 9 | rules: 10 | - apiGroups: 11 | - ray.io 12 | resources: 13 | - rayjobs 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - ray.io 20 | resources: 21 | - rayjobs/status 22 | verbs: 23 | - get 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.sidecar.dashboards.enabled .Values.sidecar.dashboards.SCProvider }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | labels: 6 | {{- include "grafana.labels" . | nindent 4 }} 7 | {{- with .Values.annotations }} 8 | annotations: 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | name: {{ include "grafana.fullname" . }}-config-dashboards 12 | namespace: {{ include "grafana.namespace" . }} 13 | data: 14 | {{- include "grafana.configDashboardProviderData" . | nindent 2 }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/prometheus/csi-secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.prometheus.prometheusSpec.thanos .Values.prometheus.prometheusSpec.thanos.secretProviderClass }} 2 | --- 3 | apiVersion: secrets-store.csi.x-k8s.io/v1alpha1 4 | kind: SecretProviderClass 5 | metadata: 6 | name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus 7 | namespace: {{ template "kube-prometheus-stack.namespace" . }} 8 | labels: 9 | app: {{ template "kube-prometheus-stack.name" . }}-prometheus 10 | spec: 11 | {{ toYaml .Values.prometheus.prometheusSpec.thanos.secretProviderClass | indent 2 }} 12 | {{- end }} 13 | -------------------------------------------------------------------------------- /frontend/src/store/slices/settingsSlice.ts: -------------------------------------------------------------------------------- 1 | // Settings Slice 2 | import { createSlice } from '@reduxjs/toolkit'; 3 | 4 | const settingsSlice = createSlice({ 5 | name: "settings", 6 | initialState: { 7 | visible: false, 8 | }, 9 | reducers: { 10 | showSettings: (state) => { 11 | state.visible = true; 12 | }, 13 | hideSettings: (state) => { 14 | state.visible = false; 15 | }, 16 | toggleSettings: (state) => { 17 | state.visible = !state.visible; 18 | }, 19 | }, 20 | }); 21 | 22 | export const { showSettings, hideSettings, toggleSettings } = settingsSlice.actions; 23 | export default settingsSlice.reducer; 24 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/dataset/schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset_file import ( 2 | DatasetFileResponse, 3 | PagedDatasetFileResponse, 4 | BatchUpdateFileTagsRequest, 5 | BatchUpdateFileTagsResponse, 6 | FileTagUpdateResult, 7 | FileTagUpdate, 8 | ) 9 | 10 | from .dataset import ( 11 | DatasetResponse, 12 | DatasetTypeResponse, 13 | ) 14 | 15 | __all__ = [ 16 | "DatasetResponse", 17 | "DatasetFileResponse", 18 | "PagedDatasetFileResponse", 19 | "DatasetTypeResponse", 20 | "BatchUpdateFileTagsRequest", 21 | "BatchUpdateFileTagsResponse", 22 | "FileTagUpdateResult", 23 | "FileTagUpdate", 24 | ] -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/domain/process/ProcessRunner.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.domain.process; 2 | 3 | import com.datamate.collection.domain.model.entity.CollectionTask; 4 | 5 | /** 6 | * 归集执行器接口 7 | * 8 | * @since 2025/10/23 9 | */ 10 | public interface ProcessRunner { 11 | /** 12 | * 执行归集任务 13 | * 14 | * @param task 任务 15 | * @param executionId 执行ID 16 | * @param timeoutSeconds 超时时间(秒) 17 | * @return 执行结果 18 | * @throws Exception 执行异常 19 | */ 20 | int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception; 21 | } 22 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseResp.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import com.datamate.common.setting.domain.entity.ModelConfig; 4 | import com.datamate.rag.indexer.domain.model.KnowledgeBase; 5 | import lombok.Getter; 6 | import lombok.Setter; 7 | 8 | /** 9 | * 知识库响应实体 10 | * 11 | * @author dallas 12 | * @since 2025-11-17 13 | */ 14 | @Setter 15 | @Getter 16 | public class KnowledgeBaseResp extends KnowledgeBase { 17 | private Long fileCount; 18 | private Long chunkCount; 19 | private ModelConfig embedding; 20 | private ModelConfig chat; 21 | } -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | /** 4 | * 分块处理类型 5 | * 6 | * @author dallas 7 | * @since 2025-10-29 8 | */ 9 | public enum ProcessType { 10 | /** 11 | * 段落分块 12 | */ 13 | PARAGRAPH_CHUNK, 14 | 15 | /** 16 | * 按句子分块 17 | */ 18 | SENTENCE_CHUNK, 19 | 20 | /** 21 | * 按长度分块,字符串分块 22 | */ 23 | LENGTH_CHUNK, 24 | 25 | /** 26 | * 默认分块,按单词分块 27 | */ 28 | DEFAULT_CHUNK, 29 | 30 | /** 31 | * 自定义分割符分块 32 | */ 33 | CUSTOM_SEPARATOR_CHUNK, 34 | } 35 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/domain/AggregateRoot.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.domain; 2 | 3 | import com.datamate.common.domain.model.base.BaseEntity; 4 | 5 | /** 6 | * DDD聚合根基类 7 | */ 8 | public abstract class AggregateRoot extends BaseEntity { 9 | 10 | protected AggregateRoot() { 11 | super(); 12 | } 13 | 14 | protected AggregateRoot(ID id) { 15 | super(id); 16 | } 17 | 18 | /** 19 | * 获取聚合版本号(用于乐观锁) 20 | */ 21 | public abstract Long getVersion(); 22 | 23 | /** 24 | * 设置聚合版本号 25 | */ 26 | public abstract void setVersion(Long version); 27 | } 28 | -------------------------------------------------------------------------------- /deployment/docker/datamate/backend.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name 0.0.0.0; 4 | 5 | access_log /var/log/datamate/frontend/access.log main; 6 | error_log /var/log/datamate/frontend/error.log notice; 7 | 8 | client_max_body_size 1024M; 9 | 10 | location /api/ { 11 | proxy_pass http://datamate-gateway:8080/api/; 12 | proxy_set_header Host $host; 13 | proxy_set_header X-Real-IP $remote_addr; 14 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 15 | } 16 | 17 | location / { 18 | root /opt/frontend; 19 | try_files $uri $uri/ /index.html; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/grafana/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create }} 2 | {{- $root := . -}} 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | labels: 7 | {{- include "grafana.labels" . | nindent 4 }} 8 | {{- with .Values.serviceAccount.labels }} 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | {{- with .Values.serviceAccount.annotations }} 12 | annotations: 13 | {{- tpl (toYaml . | nindent 4) $root }} 14 | {{- end }} 15 | name: {{ include "grafana.serviceAccountName" . }} 16 | namespace: {{ include "grafana.namespace" . }} 17 | {{- end }} 18 | -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/interfaces/dto/CollectionTaskPagingQuery.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.interfaces.dto; 2 | 3 | import com.datamate.collection.common.enums.TaskStatus; 4 | import com.datamate.common.interfaces.PagingQuery; 5 | import lombok.Getter; 6 | import lombok.Setter; 7 | 8 | /** 9 | * 归集任务分页查询参数 10 | * 11 | * @since 2025/10/23 12 | */ 13 | @Getter 14 | @Setter 15 | public class CollectionTaskPagingQuery extends PagingQuery { 16 | /** 17 | * 任务状态 18 | */ 19 | private TaskStatus status; 20 | 21 | /** 22 | * 任务名称 23 | */ 24 | private String keyword; 25 | } 26 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/endpoints.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.endpoints }} 2 | apiVersion: v1 3 | kind: Endpoints 4 | metadata: 5 | name: {{ include "prometheus-node-exporter.fullname" . }} 6 | namespace: {{ include "prometheus-node-exporter.namespace" . }} 7 | labels: 8 | {{- include "prometheus-node-exporter.labels" . | nindent 4 }} 9 | subsets: 10 | - addresses: 11 | {{- range .Values.endpoints }} 12 | - ip: {{ . }} 13 | {{- end }} 14 | ports: 15 | - name: {{ .Values.service.portName }} 16 | port: 9100 17 | protocol: TCP 18 | {{- end }} 19 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/DataCleaningServiceConfiguration.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning; 2 | 3 | import org.springframework.context.annotation.ComponentScan; 4 | import org.springframework.scheduling.annotation.EnableAsync; 5 | import org.springframework.scheduling.annotation.EnableScheduling; 6 | 7 | /** 8 | * 数据归集服务配置类 9 | * 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集 10 | */ 11 | @EnableAsync 12 | @EnableScheduling 13 | @ComponentScan(basePackages = { 14 | "com.datamate.cleaning" 15 | }) 16 | public class DataCleaningServiceConfiguration { 17 | // Configuration class for JAR packaging - no main method needed 18 | } 19 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/ray_rayservice_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | {{- /* ClusterRole for end users to view RayService. */ -}} 2 | {{- if and .Values.rbacEnable (not .Values.singleNamespaceInstall) }} 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: rayservice-viewer-role 7 | labels: 8 | {{- include "kuberay-operator.labels" . | nindent 4 }} 9 | rules: 10 | - apiGroups: 11 | - ray.io 12 | resources: 13 | - rayservices 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - ray.io 20 | resources: 21 | - rayservices/status 22 | verbs: 23 | - get 24 | {{- end }} 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | appVersion: 3.0.2 3 | dependencies: 4 | - condition: kube-prometheus-stack.enabled 5 | name: kube-prometheus-stack 6 | repository: https://prometheus-community.github.io/helm-charts 7 | version: 56.x.x 8 | description: Apache Pulsar Helm chart for Kubernetes 9 | home: https://pulsar.apache.org 10 | icon: https://pulsar.apache.org/img/pulsar.svg 11 | kubeVersion: '>=1.21.0-0' 12 | maintainers: 13 | - email: dev@pulsar.apache.org 14 | name: The Apache Pulsar Team 15 | name: pulsar 16 | sources: 17 | - https://github.com/apache/pulsar 18 | - https://github.com/apache/pulsar-helm-chart 19 | version: 3.3.0 20 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/TemplateWithInstance.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.domain.model.entity; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.time.LocalDateTime; 7 | 8 | 9 | @Getter 10 | @Setter 11 | public class TemplateWithInstance { 12 | private String id; 13 | 14 | private String name; 15 | 16 | private String description; 17 | 18 | private LocalDateTime createdAt; 19 | 20 | private LocalDateTime updatedAt; 21 | 22 | private String operatorId; 23 | 24 | private Integer opIndex; 25 | 26 | private String settingsOverride; 27 | } 28 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorsListPostRequest.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.interfaces.dto; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | 7 | import com.datamate.common.interfaces.PagingQuery; 8 | import lombok.Getter; 9 | import lombok.Setter; 10 | 11 | /** 12 | * OperatorsListPostRequest 13 | */ 14 | 15 | @Getter 16 | @Setter 17 | public class OperatorsListPostRequest extends PagingQuery { 18 | private List categories = new ArrayList<>(); 19 | 20 | private String keyword; 21 | 22 | private String labelName; 23 | 24 | private Boolean isStar; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryRelationConverter.java: -------------------------------------------------------------------------------- 1 | package com.datamate.operator.infrastructure.converter; 2 | 3 | import com.datamate.operator.domain.model.CategoryRelation; 4 | import com.datamate.operator.interfaces.dto.CategoryRelationDto; 5 | import org.mapstruct.Mapper; 6 | import org.mapstruct.factory.Mappers; 7 | 8 | import java.util.List; 9 | 10 | @Mapper 11 | public interface CategoryRelationConverter { 12 | CategoryRelationConverter INSTANCE = Mappers.getMapper(CategoryRelationConverter.class); 13 | 14 | List fromEntityToDto (List dto); 15 | } 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/charts/common/Chart.yaml: -------------------------------------------------------------------------------- 1 | annotations: 2 | category: Infrastructure 3 | licenses: Apache-2.0 4 | apiVersion: v2 5 | appVersion: 2.4.0 6 | description: A Library Helm Chart for grouping common logic between bitnami charts. 7 | This chart is not deployable by itself. 8 | home: https://bitnami.com 9 | icon: https://bitnami.com/downloads/logos/bitnami-mark.png 10 | keywords: 11 | - common 12 | - helper 13 | - template 14 | - function 15 | - bitnami 16 | maintainers: 17 | - name: VMware, Inc. 18 | url: https://github.com/bitnami/charts 19 | name: common 20 | sources: 21 | - https://github.com/bitnami/charts 22 | type: library 23 | version: 2.4.0 24 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/templates/token-secrets.yaml: -------------------------------------------------------------------------------- 1 | {{- if (include "etcd.token.createSecret" .) }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ printf "%s-jwt-token" (include "common.names.fullname" .) | trunc 63 | trimSuffix "-" }} 6 | namespace: {{ .Release.Namespace | quote }} 7 | labels: {{- include "common.labels.standard" . | nindent 4 }} 8 | {{- if .Values.commonAnnotations }} 9 | annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} 10 | {{- end }} 11 | type: Opaque 12 | data: 13 | jwt-token.pem: {{ include "etcd.token.jwtToken" . | b64enc | quote }} 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/crs-configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.customResourceState.enabled}} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config 6 | namespace: {{ template "kube-state-metrics.namespace" . }} 7 | labels: 8 | {{- include "kube-state-metrics.labels" . | indent 4 }} 9 | {{- if .Values.annotations }} 10 | annotations: 11 | {{ toYaml .Values.annotations | nindent 4 }} 12 | {{- end }} 13 | data: 14 | config.yaml: | 15 | {{- toYaml .Values.customResourceState.config | nindent 4 }} 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/templates/datanode-svc.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.metrics.enabled }} 2 | {{- if and .Values.dataNode.enabled .Values.cluster.enabled }} 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: {{ template "milvus.datanode.fullname" . }} 7 | namespace: {{ .Release.Namespace }} 8 | labels: 9 | {{ include "milvus.labels" . | indent 4 }} 10 | component: "datanode" 11 | spec: 12 | type: ClusterIP 13 | clusterIP: None 14 | ports: 15 | - name: metrics 16 | protocol: TCP 17 | port: 9091 18 | targetPort: metrics 19 | selector: 20 | {{ include "milvus.matchLabels" . | indent 4 }} 21 | component: "datanode" 22 | {{- end }} 23 | {{- end }} -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningResultConverter.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.infrastructure.converter; 2 | 3 | import com.datamate.cleaning.domain.model.entity.CleaningResult; 4 | import com.datamate.cleaning.interfaces.dto.CleaningResultDto; 5 | import org.mapstruct.Mapper; 6 | import org.mapstruct.factory.Mappers; 7 | 8 | import java.util.List; 9 | 10 | @Mapper 11 | public interface CleaningResultConverter { 12 | CleaningResultConverter INSTANCE = Mappers.getMapper(CleaningResultConverter.class); 13 | 14 | List convertEntityToDto(List cleaningResult); 15 | } 16 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/multiple_namespaces_role.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.rbacEnable .Values.singleNamespaceInstall .Values.crNamespacedRbacEnable }} 2 | {{- $watchNamespaces := default (list .Release.Namespace) .Values.watchNamespace }} 3 | {{- range $namespace := $watchNamespaces }} 4 | --- 5 | apiVersion: rbac.authorization.k8s.io/v1 6 | kind: Role 7 | metadata: 8 | name: {{ include "kuberay-operator.fullname" $ }} 9 | namespace: {{ $namespace }} 10 | labels: {{ include "kuberay-operator.labels" $ | nindent 4 }} 11 | {{ include "role.consistentRules" (dict "batchSchedulerEnabled" $.Values.batchScheduler.enabled) }} 12 | {{- end }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.rbacEnable (not .Values.singleNamespaceInstall) }} 2 | kind: ClusterRoleBinding 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: {{ include "kuberay-operator.clusterRoleBinding.name" . }} 6 | labels: 7 | {{- include "kuberay-operator.labels" . | nindent 4 }} 8 | subjects: 9 | - kind: ServiceAccount 10 | name: {{ include "kuberay-operator.serviceAccount.name" . }} 11 | namespace: {{ .Release.Namespace }} 12 | roleRef: 13 | apiGroup: rbac.authorization.k8s.io 14 | kind: ClusterRole 15 | name: {{ include "kuberay-operator.clusterRole.name" . }} 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /deployment/helm/milvus/templates/querynode-svc.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.metrics.enabled }} 2 | {{- if and .Values.queryNode.enabled .Values.cluster.enabled }} 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: {{ template "milvus.querynode.fullname" . }} 7 | namespace: {{ .Release.Namespace }} 8 | labels: 9 | {{ include "milvus.labels" . | indent 4 }} 10 | component: "querynode" 11 | spec: 12 | type: ClusterIP 13 | clusterIP: None 14 | ports: 15 | - name: metrics 16 | protocol: TCP 17 | port: 9091 18 | targetPort: metrics 19 | selector: 20 | {{ include "milvus.matchLabels" . | indent 4 }} 21 | component: "querynode" 22 | {{- end }} 23 | {{- end }} -------------------------------------------------------------------------------- /runtime/ops/mapper/full_width_characters_cleaner/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '全角转半角' 2 | name_en: 'Full-to-Half Width Character' 3 | description: '将文档中的所有全角字符转换成半角字符。' 4 | description_en: 'Converts all full-width characters in documents to half-width characters.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'FullWidthCharacterCleaner' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: 'Residential and commercial design, site inspections, working drawings, 14 | Minicad, renderings.' 15 | after: 'Residential and commercial design, site inspections, working drawings, MiniCad, 16 | renderings.' 17 | inputs: 'text' 18 | outputs: 'text' 19 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: psp-{{ include "prometheus-node-exporter.fullname" . }} 6 | labels: 7 | {{- include "prometheus-node-exporter.labels" . | nindent 4 }} 8 | rules: 9 | - apiGroups: ['extensions'] 10 | resources: ['podsecuritypolicies'] 11 | verbs: ['use'] 12 | resourceNames: 13 | - {{ include "prometheus-node-exporter.fullname" . }} 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/pulsar/charts/kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.serviceAccount.create }} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} 6 | namespace: {{ template "kube-prometheus-stack.namespace" . }} 7 | labels: 8 | {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} 9 | {{- if .Values.global.imagePullSecrets }} 10 | imagePullSecrets: 11 | {{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} 12 | {{- end }} 13 | {{- end }} 14 | -------------------------------------------------------------------------------- /runtime/ops/filter/remove_duplicate_file/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '相似文档去除' 2 | name_en: 'Similar Document Removal' 3 | description: '相似文档去除。' 4 | description_en: 'Removes similar documents.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'DuplicateFilesFilter' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '这篇文档跟数据集中的另一篇文档内容几乎一样,执行该算子后,这篇文档会被去除。' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | settings: 18 | fileDuplicateThreshold: 19 | name: 文档相似度 20 | description: 基于MinHash算法和Jaccard相似度,计算当前文档与数据集中其它文档相似性,超过设定值,该文档被去除。 21 | type: slider 22 | defaultVal: 0.5 23 | min: 0 24 | max: 1 25 | step: 0.1 26 | -------------------------------------------------------------------------------- /runtime/ops/filter/remove_duplicate_file/sql/sql_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_sql": "SELECT * FROM operators_similar_text_features WHERE task_uuid = :task_uuid AND file_name != :file_name ORDER BY timestamp LIMIT :ge OFFSET :le", 3 | "create_tables_sql": "CREATE TABLE IF NOT EXISTS operators_similar_text_features (id INT AUTO_INCREMENT PRIMARY KEY, task_uuid VARCHAR(255),file_feature TEXT,file_name TEXT,timestamp DATETIME);", 4 | "insert_sql": "INSERT INTO operators_similar_text_features (task_uuid, file_feature, file_name, timestamp) VALUES (:task_uuid, :file_feature, :file_name, :timestamp)", 5 | "query_task_uuid_sql": "SELECT * FROM operators_similar_text_features WHERE task_uuid = :task_uuid" 6 | } -------------------------------------------------------------------------------- /backend/services/data-collection-service/src/main/java/com/datamate/collection/domain/repository/CollectionTaskRepository.java: -------------------------------------------------------------------------------- 1 | package com.datamate.collection.domain.repository; 2 | 3 | import com.baomidou.mybatisplus.extension.repository.IRepository; 4 | import com.datamate.collection.domain.model.entity.CollectionTask; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 归集任务仓储层 10 | * 11 | * @since 2025/10/23 12 | */ 13 | public interface CollectionTaskRepository extends IRepository { 14 | List selectActiveTasks(); 15 | 16 | void updateStatus(String id, String status); 17 | 18 | void updateLastExecution(String id, String lastExecutionId); 19 | } 20 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/etcd/charts/common/templates/_warnings.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Warning about using rolling tag. 4 | Usage: 5 | {{ include "common.warnings.rollingTag" .Values.path.to.the.imageRoot }} 6 | */}} 7 | {{- define "common.warnings.rollingTag" -}} 8 | 9 | {{- if and (contains "bitnami/" .repository) (not (.tag | toString | regexFind "-r\\d+$|sha256:")) }} 10 | WARNING: Rolling tag detected ({{ .repository }}:{{ .tag }}), please note that it is strongly recommended to avoid using rolling tags in a production environment. 11 | +info https://docs.bitnami.com/containers/how-to/understand-rolling-tags-containers/ 12 | {{- end }} 13 | 14 | {{- end -}} 15 | -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/OperatorInstanceDto.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.interfaces.dto; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | 8 | import lombok.Getter; 9 | import lombok.Setter; 10 | 11 | /** 12 | * OperatorInstance 13 | */ 14 | 15 | @Getter 16 | @Setter 17 | public class OperatorInstanceDto { 18 | 19 | private String id; 20 | 21 | private String name; 22 | 23 | private String inputs; 24 | 25 | private String outputs; 26 | 27 | private List categories; 28 | 29 | private Map overrides = new HashMap<>(); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * 添加文件请求 10 | * 11 | * @author dallas 12 | * @since 2025-10-29 13 | */ 14 | @Getter 15 | @Setter 16 | public class AddFilesReq { 17 | private String knowledgeBaseId; 18 | private ProcessType processType; 19 | private Integer chunkSize; 20 | private Integer overlapSize; 21 | private String delimiter; 22 | private List files; 23 | 24 | public record FileInfo(String id, String fileName) { 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/CommonUtils.java: -------------------------------------------------------------------------------- 1 | package com.datamate.common.domain.utils; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * 通用工具类 7 | */ 8 | public class CommonUtils { 9 | /** 10 | * 从文件路径中获取文件名(带后缀) 11 | * 12 | * @param filePath 文件路径 13 | * @return 文件名(带后缀) 14 | */ 15 | public static String trimFilePath(String filePath) { 16 | int lastSlashIndex = filePath.lastIndexOf(File.separator); 17 | 18 | String filename = filePath; 19 | if (lastSlashIndex != -1) { 20 | filename = filePath.substring(lastSlashIndex + 1); 21 | } 22 | return filename; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/common/templates/_warnings.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Warning about using rolling tag. 4 | Usage: 5 | {{ include "common.warnings.rollingTag" .Values.path.to.the.imageRoot }} 6 | */}} 7 | {{- define "common.warnings.rollingTag" -}} 8 | 9 | {{- if and (contains "bitnami/" .repository) (not (.tag | toString | regexFind "-r\\d+$|sha256:")) }} 10 | WARNING: Rolling tag detected ({{ .repository }}:{{ .tag }}), please note that it is strongly recommended to avoid using rolling tags in a production environment. 11 | +info https://docs.bitnami.com/containers/how-to/understand-rolling-tags-containers/ 12 | {{- end }} 13 | 14 | {{- end -}} 15 | -------------------------------------------------------------------------------- /frontend/src/pages/RatioTask/ratio.api.ts: -------------------------------------------------------------------------------- 1 | import { get, post, put, del, download } from "@/utils/request"; 2 | 3 | // 查询配比任务列表(分页) 4 | export function queryRatioTasksUsingGet(params?: any) { 5 | return get("/api/synthesis/ratio-task", params); 6 | } 7 | 8 | // 查询配比任务详情 9 | export function getRatioTaskByIdUsingGet(id: string) { 10 | return get(`/api/synthesis/ratio-task/${id}`); 11 | } 12 | 13 | // 创建配比任务 14 | export function createRatioTaskUsingPost(data: any) { 15 | return post("/api/synthesis/ratio-task", data); 16 | } 17 | 18 | // 删除配比任务(支持批量) 19 | export function deleteRatioTasksUsingDelete(id: string) { 20 | const url = `/api/synthesis/ratio-task?ids=${id}`; 21 | return del(url); 22 | } 23 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from .system.interface import router as system_router 4 | from .annotation.interface import router as annotation_router 5 | from .ratio.interface import router as ratio_router 6 | from .generation.interface import router as generation_router 7 | from .evaluation.interface import router as evaluation_router 8 | 9 | router = APIRouter( 10 | prefix="/api" 11 | ) 12 | 13 | router.include_router(system_router) 14 | router.include_router(annotation_router) 15 | router.include_router(ratio_router) 16 | router.include_router(generation_router) 17 | router.include_router(evaluation_router) 18 | 19 | __all__ = ["router"] 20 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/system/interface/about.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | from typing import Dict, Any 3 | from app.core.config import settings 4 | from app.module.shared.schema import StandardResponse 5 | 6 | from ..schema import HealthResponse 7 | 8 | router = APIRouter() 9 | 10 | @router.get("/health", response_model=StandardResponse[HealthResponse]) 11 | async def health_check(): 12 | """健康检查端点""" 13 | 14 | return StandardResponse( 15 | code=200, 16 | message="success", 17 | data=HealthResponse( 18 | status="healthy", 19 | service="Label Studio Adapter", 20 | version=settings.app_version 21 | ) 22 | ) -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningResultDto.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.interfaces.dto; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | @Getter 7 | @Setter 8 | public class CleaningResultDto { 9 | private String instanceId; 10 | 11 | private String srcFileId; 12 | 13 | private String destFileId; 14 | 15 | private String srcName; 16 | 17 | private String destName; 18 | 19 | private String srcType; 20 | 21 | private String destType; 22 | 23 | private long srcSize; 24 | 25 | private long destSize; 26 | 27 | private String status; 28 | 29 | private String result; 30 | } 31 | -------------------------------------------------------------------------------- /deployment/helm/datamate/charts/kuberay-operator/templates/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbacEnable -}} 2 | kind: RoleBinding 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: {{ include "kuberay-operator.leaderElectionRoleBinding.name" . }} 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "kuberay-operator.labels" . | nindent 4 }} 9 | roleRef: 10 | apiGroup: rbac.authorization.k8s.io 11 | kind: Role 12 | name: {{ include "kuberay-operator.leaderElectionRole.name" . }} 13 | subjects: 14 | - kind: ServiceAccount 15 | name: {{ include "kuberay-operator.serviceAccount.name" . }} 16 | namespace: {{ .Release.Namespace }} 17 | {{- end }} 18 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_special_char_rate_filter/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档特殊字符率检查' 2 | name_en: 'Special Character Rate Check' 3 | description: '去除特殊字符过多的文档。' 4 | description_en: 'Filters out files that contain excessive special characters.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'FileWithHighSpecialCharRateFilter' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '你好!@!@#!¥!@#' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | settings: 18 | specialCharRatio: 19 | name: 文档特殊字符率 20 | description: 特殊字符的统计数/文档总字数 > 设定值,该文档被去除。 21 | type: slider 22 | defaultVal: 0.3 23 | min: 0 24 | max: 1 25 | step: 0.1 -------------------------------------------------------------------------------- /backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java: -------------------------------------------------------------------------------- 1 | package com.datamate.rag.indexer.interfaces.dto; 2 | 3 | import com.datamate.common.interfaces.PagingQuery; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.time.LocalDateTime; 8 | 9 | /** 10 | * 11 | * 12 | * @author dallas 13 | * @since 2025-10-29 14 | */ 15 | @Setter 16 | @Getter 17 | public class KnowledgeBaseQueryReq extends PagingQuery { 18 | private String name; 19 | private String description; 20 | private LocalDateTime createdAt; 21 | private LocalDateTime updatedAt; 22 | private String createdBy; 23 | private String updatedBy; 24 | } 25 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/common/Chart.yaml: -------------------------------------------------------------------------------- 1 | annotations: 2 | category: Infrastructure 3 | apiVersion: v2 4 | appVersion: 1.12.0 5 | description: A Library Helm Chart for grouping common logic between bitnami charts. 6 | This chart is not deployable by itself. 7 | home: https://github.com/bitnami/charts/tree/master/bitnami/common 8 | icon: https://bitnami.com/downloads/logos/bitnami-mark.png 9 | keywords: 10 | - common 11 | - helper 12 | - template 13 | - function 14 | - bitnami 15 | maintainers: 16 | - email: containers@bitnami.com 17 | name: Bitnami 18 | name: common 19 | sources: 20 | - https://github.com/bitnami/charts 21 | - https://www.bitnami.com/ 22 | type: library 23 | version: 1.12.0 24 | -------------------------------------------------------------------------------- /runtime/datamate-python/app/module/annotation/schema/tag.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import List, Dict, Any 3 | from pydantic import BaseModel, Field 4 | 5 | from app.module.shared.schema import BaseResponseModel 6 | 7 | class UpdateFileTagsRequest(BaseModel): 8 | """更新文件标签请求""" 9 | tags: List[Dict[str, Any]] = Field(..., description="要更新的标签列表(部分更新)") 10 | 11 | 12 | class UpdateFileTagsResponse(BaseResponseModel): 13 | """更新文件标签响应""" 14 | file_id: str = Field(..., alias="fileId", description="文件ID") 15 | tags: List[Dict[str, Any]] = Field(..., description="更新后的完整标签列表") 16 | tags_updated_at: datetime = Field(..., alias="tagsUpdatedAt", description="标签更新时间") 17 | -------------------------------------------------------------------------------- /runtime/ops/filter/remove_file_with_many_sensitive_words/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档敏感词率检查' 2 | name_en: 'Sensitive Word Rate Check' 3 | description: '去除敏感词过多的文档。' 4 | description_en: 'Filters out files that contain excessive sensitive phrases.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'FileWithManySensitiveWordsFilter' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '出售硝酸甘油出售硝酸甘油出售硝酸甘油出售硝酸甘油' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | settings: 18 | sensitiveWordsRate: 19 | name: 文档敏感词率 20 | description: 敏感词的字数/文档总字数 > 设定值,该文档被去除。 21 | type: slider 22 | defaultVal: 0.01 23 | min: 0 24 | max: 1 25 | step: 0.01 -------------------------------------------------------------------------------- /backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTemplateConverter.java: -------------------------------------------------------------------------------- 1 | package com.datamate.cleaning.infrastructure.converter; 2 | 3 | import com.datamate.cleaning.domain.model.entity.CleaningTemplate; 4 | import com.datamate.cleaning.interfaces.dto.CleaningTemplateDto; 5 | import org.mapstruct.Mapper; 6 | import org.mapstruct.factory.Mappers; 7 | 8 | @Mapper 9 | public interface CleaningTemplateConverter { 10 | CleaningTemplateConverter INSTANCE = Mappers.getMapper(CleaningTemplateConverter.class); 11 | 12 | CleaningTemplate fromDtoToEntity(CleaningTemplateDto dto); 13 | 14 | CleaningTemplateDto fromEntityToDto(CleaningTemplate entity); 15 | } 16 | -------------------------------------------------------------------------------- /deployment/helm/milvus/charts/kafka/charts/zookeeper/charts/common/templates/_warnings.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Warning about using rolling tag. 4 | Usage: 5 | {{ include "common.warnings.rollingTag" .Values.path.to.the.imageRoot }} 6 | */}} 7 | {{- define "common.warnings.rollingTag" -}} 8 | 9 | {{- if and (contains "bitnami/" .repository) (not (.tag | toString | regexFind "-r\\d+$|sha256:")) }} 10 | WARNING: Rolling tag detected ({{ .repository }}:{{ .tag }}), please note that it is strongly recommended to avoid using rolling tags in a production environment. 11 | +info https://docs.bitnami.com/containers/how-to/understand-rolling-tags-containers/ 12 | {{- end }} 13 | 14 | {{- end -}} 15 | -------------------------------------------------------------------------------- /frontend/src/pages/Layout/MainLayout.tsx: -------------------------------------------------------------------------------- 1 | import React, { memo } from "react"; 2 | import { Outlet } from "react-router"; 3 | import Sidebar from "./Sidebar"; 4 | 5 | const MainLayout = () => { 6 | return ( 7 |
8 |
9 | {/* Sidebar */} 10 | 11 | {/* Main Content */} 12 |
13 | {/* Content Area */} 14 |
15 | 16 |
17 |
18 |
19 |
20 | ); 21 | }; 22 | 23 | export default memo(MainLayout); 24 | -------------------------------------------------------------------------------- /runtime/ops/filter/file_with_high_repeat_word_rate_filter/metadata.yml: -------------------------------------------------------------------------------- 1 | name: '文档字重复率检查' 2 | name_en: 'Word Repetition Rate Check' 3 | description: '去除重复字过多的文档。' 4 | description_en: 'Filters out files that contain excessive repeated words.' 5 | language: 'python' 6 | vendor: 'huawei' 7 | raw_id: 'FileWithHighRepeatWordRateFilter' 8 | version: '1.0.0' 9 | types: 10 | - 'cleanse' 11 | modal: 'text' 12 | effect: 13 | before: '机器学学学学学学学学学学学学学学学学学学学学学学学学学学学学学学习' 14 | after: '' 15 | inputs: 'text' 16 | outputs: 'text' 17 | settings: 18 | repeatWordRatio: 19 | name: 文档字重复率 20 | description: 某个字的统计数/文档总字数 > 设定值,该文档被去除。 21 | type: slider 22 | defaultVal: 0.5 23 | min: 0 24 | max: 1 25 | step: 0.1 -------------------------------------------------------------------------------- /runtime/ops/filter/img_similar_images_cleaner/sql/sql_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "query_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid ORDER BY timestamp LIMIT :ge OFFSET :le", 3 | "insert_sql": "INSERT INTO operator_similar_img_features (task_uuid,p_hash,des_matrix,matrix_shape,file_name,timestamp) VALUES (:task_uuid,:p_hash,:des_matrix,:matrix_shape,:file_name,:timestamp)", 4 | "query_task_uuid_sql": "SELECT * FROM operator_similar_img_features WHERE task_uuid = :task_uuid", 5 | "create_tables_sql": "CREATE TABLE IF NOT EXISTS operator_similar_img_features (id INT AUTO_INCREMENT PRIMARY KEY,task_uuid VARCHAR(255),p_hash TEXT,des_matrix BLOB,matrix_shape TEXT,file_name TEXT,timestamp DATETIME);" 6 | } -------------------------------------------------------------------------------- /runtime/python-executor/datamate/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import importlib 3 | import os 4 | import sys 5 | from pathlib import Path 6 | 7 | from loguru import logger 8 | 9 | 10 | # 获取当前目录 11 | current_dir = os.path.dirname(__file__) 12 | 13 | # 遍历子目录 14 | for module_name in os.listdir(current_dir): 15 | module_path = os.path.join(current_dir, module_name) 16 | # 检查是否是目录且包含 __init__.py 17 | if os.path.isdir(module_path) and '__init__.py' in os.listdir(module_path): 18 | # 动态导入模块 19 | try: 20 | importlib.import_module(f".{module_name}", package=__name__) 21 | except Exception as e: 22 | logger.error(f"Failed to load Ops {module_name}") 23 | --------------------------------------------------------------------------------