├── ds ├── default.go ├── sql_options.go ├── mssql_test.go ├── options.go └── sqlite.go ├── utils ├── stats.go ├── docker.go ├── spider.go ├── system.go ├── uuid.go ├── os.go ├── node.go ├── json.go ├── task.go ├── bool.go ├── rpc.go ├── debug.go ├── time.go ├── args.go ├── di.go ├── backoff.go ├── result.go ├── init.go ├── sql.go ├── encrypt_test.go ├── helpers.go ├── chan.go ├── http.go ├── array.go ├── sqlite.go ├── git.go ├── kafka.go └── cache.go ├── event ├── options.go └── func.go ├── constants ├── delegate.go ├── database.go ├── signal.go ├── sort.go ├── data_collection.go ├── filer.go ├── common.go ├── encrypt.go ├── file.go ├── export.go ├── pagination.go ├── config_spider.go ├── log.go ├── scrapy.go ├── auth.go ├── event.go ├── cache.go ├── results.go ├── channels.go ├── variable.go ├── node.go ├── anchor.go ├── register.go ├── action.go ├── user.go ├── http.go ├── message.go ├── notification.go ├── schedule.go ├── git.go ├── data_field.go ├── grpc.go ├── rpc.go ├── system.go ├── filter.go ├── task.go └── ds.go ├── interfaces ├── options.go ├── provide.go ├── entity.go ├── node_service_option.go ├── injectable.go ├── list.go ├── translation.go ├── grpc_model_binder.go ├── grpc_base_service_params.go ├── grpc_model_list_binder.go ├── model_role.go ├── address.go ├── color.go ├── event_data.go ├── test.go ├── model_user_group.go ├── with_model_id.go ├── controller_params.go ├── model_tag.go ├── with_address.go ├── with_config_path.go ├── i18n_service.go ├── model_binder.go ├── grpc_client_model_delegate.go ├── module.go ├── model_list_binder.go ├── grpc_client_model_base_service.go ├── node_service.go ├── color_service.go ├── grpc_model_base_service_message.go ├── grpc_subscribe.go ├── grpc_client_model_service.go ├── grpc_base.go ├── grpc_client_pool.go ├── model_user.go ├── model_environment.go ├── grpc_model_delegate_message.go ├── task_hook_service.go ├── node_worker_service.go ├── filter.go ├── filter_condition.go ├── export_service.go ├── user_service_options.go ├── node_master_service.go ├── event_service.go ├── export.go ├── model_node_delegate.go ├── grpc_stream.go ├── node_config_service.go ├── task_stats_service.go ├── task_base_service.go ├── stats_service.go ├── model_artifact.go ├── model_result.go ├── task_runner.go ├── model_permission.go ├── fs_file_info.go ├── process_daemon.go ├── data_source_service.go ├── result_service_registry.go ├── model_extra_value.go ├── result_service.go ├── fs_service_v2.go ├── result_service_mongo.go ├── grpc_client_model_task_service.go ├── task_scheduler_service.go ├── grpc_client_model_spider_service.go ├── model_git.go ├── grpc_client_model_task_stat_service.go ├── grpc_server.go ├── grpc_client_model_node_service.go ├── grpc_client_model_environment_service.go ├── spider_service_options.go ├── model_artifact_sys.go ├── model_delegate.go ├── model_node.go ├── model_task_stat.go ├── fs_service_options.go ├── schedule_service.go ├── model_task.go ├── user_service.go ├── model_spider.go ├── model_schedule.go ├── spider_admin_service.go ├── grpc_client.go ├── model_service_v2.go ├── fs_service.go └── model_base_service.go ├── main.go ├── README.md ├── docs ├── .gitignore ├── api │ └── index.html └── package.json ├── controllers ├── git.go ├── role.go ├── tag.go ├── permission.go ├── system_info_v2.go ├── http.go ├── delegate_action.go ├── version.go ├── delegate_list_action.go ├── binder.go ├── system_info.go ├── utils_context.go ├── utils_pagination.go ├── token_v2.go ├── login_v2.go ├── stats_v2.go ├── test │ └── main_test.go ├── setting_v2.go └── sync.go ├── task ├── log │ ├── default.go │ ├── errors.go │ ├── constants.go │ ├── interface.go │ ├── driver.go │ └── entity.go ├── stats │ └── options.go ├── scheduler │ └── options.go └── handler │ └── options.go ├── grpc ├── test │ └── main_test.go ├── client │ ├── utils_proto.go │ └── options.go ├── payload │ └── model_service_v2_payload.go └── server │ ├── options.go │ ├── dependencies_server_v2.go │ └── utils_handle.go ├── .gitignore ├── entity ├── sort.go ├── data_field.go ├── model_info.go ├── filter_select_option.go ├── system_info.go ├── doc.go ├── grpc_event_service_message.go ├── translation.go ├── event.go ├── git.go ├── color.go ├── stats.go ├── common.go ├── model_delegate.go ├── spider.go ├── rpc.go ├── pagination.go ├── node.go ├── version.go ├── grpc_subscribe.go ├── grpc_base_service_message.go ├── task.go ├── grpc_base_service_params.go ├── grpc_delegate_message.go ├── ttl_map.go ├── export.go ├── address.go ├── http.go └── es.go ├── errors ├── result.go ├── stats.go ├── schedule.go ├── git.go ├── filter.go ├── http.go ├── spider.go ├── process.go ├── store.go ├── event.go ├── fs.go ├── node.go ├── ds.go ├── user.go ├── model.go ├── task.go ├── base.go ├── grpc.go └── controller.go ├── stats └── options.go ├── notification ├── mail_theme.go ├── constants.go ├── payload.go ├── service_test.go └── mobile.go ├── container └── container.go ├── config ├── config_test.go ├── version.go ├── path.go ├── base.go └── default_config.go ├── sys_exec ├── sys_exec_windows.go ├── sys_exec_darwin.go └── sys_exec_linux.go ├── models ├── models │ ├── test.go │ ├── token_v2.go │ ├── base.go │ ├── environment_v2.go │ ├── utils_col.go │ ├── role_v2.go │ ├── setting_v2.go │ ├── variable_v2.go │ ├── project_v2.go │ ├── user_v2.go │ ├── user_role_v2.go │ ├── task_queue_item_v2.go │ ├── role_permission_v2.go │ ├── git_v2.go │ ├── permission_v2.go │ ├── data_collection_v2.go │ ├── job.go │ ├── password.go │ ├── token.go │ ├── user_role.go │ ├── setting.go │ ├── dependency_setting_v2.go │ ├── task_queue_item.go │ ├── variable.go │ ├── role_permission.go │ ├── utils_tag.go │ ├── task_stat_v2.go │ ├── data_collection.go │ ├── tag.go │ ├── environment.go │ ├── node_v2.go │ ├── dependency_setting.go │ ├── project.go │ ├── schedule_v2.go │ ├── data_source_v2.go │ ├── role.go │ ├── spider_stat_v2.go │ ├── result.go │ └── artifact.go ├── delegate │ ├── base_test.go │ ├── utils_event.go │ └── model_node.go ├── config_spider │ └── common.go ├── service │ ├── options.go │ ├── job_service.go │ ├── task_service.go │ ├── token_service.go │ ├── spider_service.go │ ├── git_service.go │ └── project_service.go └── client │ ├── options.go │ ├── model_node_delegate.go │ └── model_service.go ├── .editorconfig ├── user └── options.go ├── spider └── admin │ └── options.go ├── cmd ├── server_test.go ├── server.go └── root.go ├── node ├── config │ └── options.go └── service │ └── options.go ├── middlewares ├── middlewares.go ├── cors.go ├── filer_auth.go ├── auth.go └── auth_v2.go ├── docker-compose.yml ├── result ├── options.go └── service_registry.go ├── process ├── daemon_test.go └── options.go ├── routes ├── router_test.go └── group.go ├── fs └── default.go ├── apps ├── server_test.go └── interfaces.go ├── schedule ├── options.go ├── logger.go └── test │ └── schedule_service_test.go ├── .github └── workflows │ └── test.yml └── i18n └── service.go /ds/default.go: -------------------------------------------------------------------------------- 1 | package ds 2 | -------------------------------------------------------------------------------- /utils/stats.go: -------------------------------------------------------------------------------- 1 | package utils 2 | -------------------------------------------------------------------------------- /event/options.go: -------------------------------------------------------------------------------- 1 | package event 2 | -------------------------------------------------------------------------------- /constants/delegate.go: -------------------------------------------------------------------------------- 1 | package constants 2 | -------------------------------------------------------------------------------- /interfaces/options.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | } 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # crawlab-core 2 | Backend core modules for Crawlab 3 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules 3 | dist 4 | build 5 | tmp 6 | yarn.lock 7 | -------------------------------------------------------------------------------- /interfaces/provide.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Provide func(env string) 4 | -------------------------------------------------------------------------------- /constants/database.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ColJob = "jobs" 5 | ) 6 | -------------------------------------------------------------------------------- /constants/signal.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | SignalQuit = iota 5 | ) 6 | -------------------------------------------------------------------------------- /controllers/git.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | var GitController ListController 4 | -------------------------------------------------------------------------------- /controllers/role.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | var RoleController ListController 4 | -------------------------------------------------------------------------------- /controllers/tag.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | var TagController ListController 4 | -------------------------------------------------------------------------------- /constants/sort.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | SortQueryField = "sort" 5 | ) 6 | -------------------------------------------------------------------------------- /controllers/permission.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | var PermissionController ListController 4 | -------------------------------------------------------------------------------- /constants/data_collection.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DataCollectionKey = "_col" 5 | ) 6 | -------------------------------------------------------------------------------- /constants/filer.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DefaultFilerAuthKey = "Crawlab2021!" 5 | ) 6 | -------------------------------------------------------------------------------- /interfaces/entity.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Entity interface { 4 | Value() interface{} 5 | } 6 | -------------------------------------------------------------------------------- /interfaces/node_service_option.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type NodeServiceOption interface { 4 | } 5 | -------------------------------------------------------------------------------- /task/log/default.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import "time" 4 | 5 | var DefaultLogTtl = 30 * 24 * time.Hour 6 | -------------------------------------------------------------------------------- /constants/common.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ASCENDING = "asc" 5 | DESCENDING = "dsc" 6 | ) 7 | -------------------------------------------------------------------------------- /interfaces/injectable.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Injectable interface { 4 | Inject() error 5 | } 6 | -------------------------------------------------------------------------------- /interfaces/list.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type List interface { 4 | GetModels() (res []Model) 5 | } 6 | -------------------------------------------------------------------------------- /constants/encrypt.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DefaultEncryptServerKey = "0123456789abcdef" 5 | ) 6 | -------------------------------------------------------------------------------- /constants/file.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const EmptyFileData = " " 4 | 5 | const FsKeepFileName = ".gitkeep" 6 | -------------------------------------------------------------------------------- /interfaces/translation.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Translation interface { 4 | GetLang() (l string) 5 | } 6 | -------------------------------------------------------------------------------- /utils/docker.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | func IsDocker() (ok bool) { 4 | return EnvIsTrue("docker", false) 5 | } 6 | -------------------------------------------------------------------------------- /constants/export.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ExportTypeCsv = "csv" 5 | ExportTypeJson = "json" 6 | ) 7 | -------------------------------------------------------------------------------- /constants/pagination.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | var PaginationDefaultPage = 1 4 | var PaginationDefaultSize = 10 5 | -------------------------------------------------------------------------------- /ds/sql_options.go: -------------------------------------------------------------------------------- 1 | package ds 2 | 3 | type SqlOptions struct { 4 | DefaultHost string 5 | DefaultPort string 6 | } 7 | -------------------------------------------------------------------------------- /grpc/test/main_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import "testing" 4 | 5 | func TestMain(m *testing.M) { 6 | m.Run() 7 | } 8 | -------------------------------------------------------------------------------- /interfaces/grpc_model_binder.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcModelBinder interface { 4 | ModelBinder 5 | } 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | vendor/ 4 | tmp/ 5 | build/ 6 | dist/ 7 | *.log 8 | gen/ 9 | *.exe 10 | *.txt 11 | -------------------------------------------------------------------------------- /constants/config_spider.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | EngineScrapy = "scrapy" 5 | EngineColly = "colly" 6 | ) 7 | -------------------------------------------------------------------------------- /interfaces/grpc_base_service_params.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcBaseServiceParams interface { 4 | Entity 5 | } 6 | -------------------------------------------------------------------------------- /interfaces/grpc_model_list_binder.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcModelListBinder interface { 4 | ModelListBinder 5 | } 6 | -------------------------------------------------------------------------------- /interfaces/model_role.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Role interface { 4 | ModelWithKey 5 | ModelWithNameDescription 6 | } 7 | -------------------------------------------------------------------------------- /entity/sort.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type Sort struct { 4 | Key string `json:"key"` 5 | Direction string `json:"d"` 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/address.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Address interface { 4 | Entity 5 | String() string 6 | IsEmpty() bool 7 | } 8 | -------------------------------------------------------------------------------- /interfaces/color.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Color interface { 4 | Entity 5 | GetHex() string 6 | GetName() string 7 | } 8 | -------------------------------------------------------------------------------- /errors/result.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewResultError(msg string) (err error) { 4 | return NewError(ErrorPrefixResult, msg) 5 | } 6 | -------------------------------------------------------------------------------- /interfaces/event_data.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type EventData interface { 4 | GetEvent() string 5 | GetData() interface{} 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/test.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "testing" 4 | 5 | type Test interface { 6 | Setup(*testing.T) 7 | Cleanup() 8 | } 9 | -------------------------------------------------------------------------------- /constants/log.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ErrorRegexPattern = "(?:[ :,.]|^)((?:error|exception|traceback)s?)(?:[ :,.]|$)" 5 | ) 6 | -------------------------------------------------------------------------------- /constants/scrapy.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ScrapyProtectedStageNames = "" 4 | 5 | const ScrapyProtectedFieldNames = "_id,task_id,ts" 6 | -------------------------------------------------------------------------------- /interfaces/model_user_group.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type UserGroup interface { 4 | Model 5 | GetUsers() (users []User, err error) 6 | } 7 | -------------------------------------------------------------------------------- /constants/auth.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | OwnerTypeAll = "all" 5 | OwnerTypeMe = "me" 6 | OwnerTypePublic = "public" 7 | ) 8 | -------------------------------------------------------------------------------- /constants/event.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | GrpcEventServiceTypeRegister = "register" 5 | GrpcEventServiceTypeSend = "send" 6 | ) 7 | -------------------------------------------------------------------------------- /grpc/client/utils_proto.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import grpc2 "github.com/crawlab-team/crawlab-grpc" 4 | 5 | var EmptyRequest = &grpc2.Request{} 6 | -------------------------------------------------------------------------------- /interfaces/with_model_id.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type WithModelId interface { 4 | GetModelId() (id ModelId) 5 | SetModelId(id ModelId) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/controller_params.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ControllerParams interface { 4 | IsZero() (ok bool) 5 | IsDefault() (ok bool) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/model_tag.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Tag interface { 4 | Model 5 | GetName() string 6 | GetColor() string 7 | SetCol(string) 8 | } 9 | -------------------------------------------------------------------------------- /stats/options.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type Option func(svc interfaces.StatsService) 6 | -------------------------------------------------------------------------------- /entity/data_field.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type DataField struct { 4 | Key string `json:"key" bson:"key"` 5 | Type string `json:"type" bson:"type"` 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/with_address.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type WithAddress interface { 4 | GetAddress() (address Address) 5 | SetAddress(address Address) 6 | } 7 | -------------------------------------------------------------------------------- /constants/cache.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | CacheColName = "cache" 5 | CacheColKey = "k" 6 | CacheColValue = "v" 7 | CacheColTime = "t" 8 | ) 9 | -------------------------------------------------------------------------------- /interfaces/with_config_path.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type WithConfigPath interface { 4 | GetConfigPath() (path string) 5 | SetConfigPath(path string) 6 | } 7 | -------------------------------------------------------------------------------- /ds/mssql_test.go: -------------------------------------------------------------------------------- 1 | package ds 2 | 3 | import "testing" 4 | 5 | func TestNewDataSourceMssqlService(t *testing.T) { 6 | t.Run("insert", func(t *testing.T) { 7 | }) 8 | } 9 | -------------------------------------------------------------------------------- /event/func.go: -------------------------------------------------------------------------------- 1 | package event 2 | 3 | func SendEvent(eventName string, data ...interface{}) { 4 | svc := NewEventService() 5 | svc.SendEvent(eventName, data...) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/i18n_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type I18nService interface { 4 | AddTranslations(t []Translation) 5 | GetTranslations() (t []Translation) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/model_binder.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ModelBinder interface { 4 | Bind() (res Model, err error) 5 | Process(d Model) (res Model, err error) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_delegate.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcClientModelDelegate interface { 4 | ModelDelegate 5 | WithConfigPath 6 | Close() error 7 | } 8 | -------------------------------------------------------------------------------- /interfaces/module.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ModuleId int 4 | 5 | type Module interface { 6 | Init() error 7 | Start() 8 | Wait() 9 | Stop() 10 | } 11 | -------------------------------------------------------------------------------- /utils/spider.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | func GetSpiderCol(col string, name string) string { 4 | if col == "" { 5 | return "results_" + name 6 | } 7 | return col 8 | } 9 | -------------------------------------------------------------------------------- /utils/system.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/spf13/viper" 4 | 5 | func IsPro() bool { 6 | return viper.GetString("info.edition") == "global.edition.pro" 7 | } 8 | -------------------------------------------------------------------------------- /utils/uuid.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/google/uuid" 4 | 5 | func NewUUIDString() (res string) { 6 | id, _ := uuid.NewUUID() 7 | return id.String() 8 | } 9 | -------------------------------------------------------------------------------- /interfaces/model_list_binder.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ModelListBinder interface { 4 | Bind() (l List, err error) 5 | Process(d interface{}) (l List, err error) 6 | } 7 | -------------------------------------------------------------------------------- /notification/mail_theme.go: -------------------------------------------------------------------------------- 1 | package notification 2 | 3 | import "github.com/matcornic/hermes/v2" 4 | 5 | type MailTheme interface { 6 | hermes.Theme 7 | GetStyle() string 8 | } 9 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_base_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcClientModelBaseService interface { 4 | WithModelId 5 | WithConfigPath 6 | ModelBaseService 7 | } 8 | -------------------------------------------------------------------------------- /interfaces/node_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type NodeService interface { 4 | Module 5 | WithConfigPath 6 | WithAddress 7 | GetConfigService() NodeConfigService 8 | } 9 | -------------------------------------------------------------------------------- /task/log/errors.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrInvalidType = errors.New("invalid type") 7 | ErrNotImplemented = errors.New("not implemented") 8 | ) 9 | -------------------------------------------------------------------------------- /constants/results.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | HashKey = "_h" 5 | ) 6 | 7 | const ( 8 | DedupTypeIgnore = "ignore" 9 | DedupTypeOverwrite = "overwrite" 10 | ) 11 | -------------------------------------------------------------------------------- /constants/channels.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ChannelAllNode = "nodes:public" 5 | 6 | ChannelWorkerNode = "nodes:" 7 | 8 | ChannelMasterNode = "nodes:master" 9 | ) 10 | -------------------------------------------------------------------------------- /constants/variable.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | String = "string" 5 | Number = "number" 6 | Boolean = "boolean" 7 | Array = "array" 8 | Object = "object" 9 | ) 10 | -------------------------------------------------------------------------------- /container/container.go: -------------------------------------------------------------------------------- 1 | package container 2 | 3 | import ( 4 | "go.uber.org/dig" 5 | ) 6 | 7 | var c = dig.New() 8 | 9 | func GetContainer() *dig.Container { 10 | return c 11 | } 12 | -------------------------------------------------------------------------------- /entity/model_info.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type ModelInfo struct { 6 | Id interfaces.ModelId 7 | ColName string 8 | } 9 | -------------------------------------------------------------------------------- /constants/node.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | NodeStatusUnregistered = "u" 5 | NodeStatusRegistered = "r" 6 | NodeStatusOnline = "on" 7 | NodeStatusOffline = "off" 8 | ) 9 | -------------------------------------------------------------------------------- /entity/filter_select_option.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type FilterSelectOption struct { 4 | Value interface{} `json:"value" bson:"value"` 5 | Label string `json:"label" bson:"label"` 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/color_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ColorService interface { 4 | Injectable 5 | GetByName(name string) (res Color, err error) 6 | GetRandom() (res Color, err error) 7 | } 8 | -------------------------------------------------------------------------------- /constants/anchor.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | AnchorStartStage = "START_STAGE" 5 | AnchorStartUrl = "START_URL" 6 | AnchorItems = "ITEMS" 7 | AnchorParsers = "PARSERS" 8 | ) 9 | -------------------------------------------------------------------------------- /errors/stats.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewStatsError(msg string) (err error) { 4 | return NewError(ErrorPrefixStats, msg) 5 | } 6 | 7 | var ErrorStatsInvalidType = NewStatsError("invalid type") 8 | -------------------------------------------------------------------------------- /interfaces/grpc_model_base_service_message.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcModelBaseServiceMessage interface { 4 | GetModelId() ModelId 5 | GetData() []byte 6 | ToBytes() (data []byte) 7 | } 8 | -------------------------------------------------------------------------------- /interfaces/grpc_subscribe.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcSubscribe interface { 4 | GetStream() GrpcStream 5 | GetStreamBidirectional() GrpcStreamBidirectional 6 | GetFinished() chan bool 7 | } 8 | -------------------------------------------------------------------------------- /notification/constants.go: -------------------------------------------------------------------------------- 1 | package notification 2 | 3 | const ( 4 | TypeMail = "mail" 5 | TypeMobile = "mobile" 6 | ) 7 | 8 | const ( 9 | SettingsColName = "notification_settings" 10 | ) 11 | -------------------------------------------------------------------------------- /entity/system_info.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type SystemInfo struct { 4 | Edition string `json:"edition"` // edition. e.g. community / pro 5 | Version string `json:"version"` // version. e.g. v0.6.0 6 | } 7 | -------------------------------------------------------------------------------- /errors/schedule.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewScheduleError(msg string) (err error) { 4 | return NewError(ErrorPrefixSchedule, msg) 5 | } 6 | 7 | //var ErrorSchedule = NewScheduleError("unregistered") 8 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcClientModelService interface { 4 | WithConfigPath 5 | NewBaseServiceDelegate(id ModelId) (GrpcClientModelBaseService, error) 6 | } 7 | -------------------------------------------------------------------------------- /interfaces/grpc_base.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcBase interface { 4 | WithConfigPath 5 | Init() (err error) 6 | Start() (err error) 7 | Stop() (err error) 8 | Register() (err error) 9 | } 10 | -------------------------------------------------------------------------------- /constants/register.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | RegisterTypeMac = "mac" 5 | RegisterTypeIp = "ip" 6 | RegisterTypeHostname = "hostname" 7 | RegisterTypeCustomName = "customName" 8 | ) 9 | -------------------------------------------------------------------------------- /entity/doc.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type DocItem struct { 4 | Title string `json:"title"` 5 | Url string `json:"url"` 6 | Path string `json:"path"` 7 | Children []DocItem `json:"children"` 8 | } 9 | -------------------------------------------------------------------------------- /interfaces/grpc_client_pool.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcClientPool interface { 4 | WithConfigPath 5 | Init() error 6 | NewClient() error 7 | GetClient() (GrpcClient, error) 8 | SetSize(int) 9 | } 10 | -------------------------------------------------------------------------------- /interfaces/model_user.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type User interface { 4 | Model 5 | GetUsername() (name string) 6 | GetPassword() (p string) 7 | GetRole() (r string) 8 | GetEmail() (email string) 9 | } 10 | -------------------------------------------------------------------------------- /interfaces/model_environment.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Environment interface { 4 | Model 5 | GetKey() (key string) 6 | SetKey(key string) 7 | GetValue() (value string) 8 | SetValue(value string) 9 | } 10 | -------------------------------------------------------------------------------- /config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/stretchr/testify/require" 5 | "testing" 6 | ) 7 | 8 | func TestInitConfig(t *testing.T) { 9 | err := InitConfig() 10 | require.Nil(t, err) 11 | } 12 | -------------------------------------------------------------------------------- /interfaces/grpc_model_delegate_message.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type GrpcModelDelegateMessage interface { 4 | GetModelId() ModelId 5 | GetMethod() ModelDelegateMethod 6 | GetData() []byte 7 | ToBytes() (data []byte) 8 | } 9 | -------------------------------------------------------------------------------- /sys_exec/sys_exec_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | // +build windows 3 | 4 | package sys_exec 5 | 6 | import "os/exec" 7 | 8 | func BuildCmd(cmdStr string) *exec.Cmd { 9 | return exec.Command("cmd", "/C", cmdStr) 10 | } 11 | -------------------------------------------------------------------------------- /models/models/test.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type TestModel struct { 4 | any `collection:"testmodels"` 5 | BaseModelV2[TestModel] `bson:",inline"` 6 | Name string `json:"name" bson:"name"` 7 | } 8 | -------------------------------------------------------------------------------- /notification/payload.go: -------------------------------------------------------------------------------- 1 | package notification 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type SendPayload struct { 6 | TaskId primitive.ObjectID `json:"task_id"` 7 | Data string `json:"data"` 8 | } 9 | -------------------------------------------------------------------------------- /constants/action.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ActionTypeVisit = "visit" 5 | ActionTypeInstallDep = "install_dep" 6 | ActionTypeInstallLang = "install_lang" 7 | ActionTypeViewDisclaimer = "view_disclaimer" 8 | ) 9 | -------------------------------------------------------------------------------- /config/version.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "strings" 4 | 5 | const Version = "v0.6.3" 6 | 7 | func GetVersion() (v string) { 8 | if strings.HasPrefix(Version, "v") { 9 | return Version 10 | } 11 | return "v" + Version 12 | } 13 | -------------------------------------------------------------------------------- /entity/grpc_event_service_message.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type GrpcEventServiceMessage struct { 4 | Type string `json:"type"` 5 | Events []string `json:"events"` 6 | Key string `json:"key"` 7 | Data []byte `json:"data"` 8 | } 9 | -------------------------------------------------------------------------------- /interfaces/task_hook_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type TaskHookService interface { 4 | PreActions(Task, Spider, FsServiceV2, TaskHandlerService) (err error) 5 | PostActions(Task, Spider, FsServiceV2, TaskHandlerService) (err error) 6 | } 7 | -------------------------------------------------------------------------------- /utils/os.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "os" 5 | "os/signal" 6 | "syscall" 7 | ) 8 | 9 | func DefaultWait() { 10 | quit := make(chan os.Signal, 1) 11 | signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) 12 | <-quit 13 | } 14 | -------------------------------------------------------------------------------- /interfaces/node_worker_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "time" 4 | 5 | type NodeWorkerService interface { 6 | NodeService 7 | Register() 8 | Recv() 9 | ReportStatus() 10 | SetHeartbeatInterval(duration time.Duration) 11 | } 12 | -------------------------------------------------------------------------------- /utils/node.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | func IsMaster() bool { 4 | return EnvIsTrue("node.master", false) 5 | } 6 | 7 | func GetNodeType() string { 8 | if IsMaster() { 9 | return "master" 10 | } else { 11 | return "worker" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /entity/translation.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type Translation struct { 4 | Lang string `json:"lang"` 5 | Key string `json:"key"` 6 | Value string `json:"value"` 7 | } 8 | 9 | func (t Translation) GetLang() (l string) { 10 | return t.Lang 11 | } 12 | -------------------------------------------------------------------------------- /interfaces/filter.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Filter interface { 4 | GetIsOr() (isOr bool) 5 | SetIsOr(isOr bool) 6 | GetConditions() (conditions []FilterCondition) 7 | SetConditions(conditions []FilterCondition) 8 | IsNil() (ok bool) 9 | } 10 | -------------------------------------------------------------------------------- /utils/json.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "encoding/json" 4 | 5 | func JsonToBytes(d interface{}) (bytes []byte, err error) { 6 | switch d.(type) { 7 | case []byte: 8 | return d.([]byte), nil 9 | default: 10 | return json.Marshal(d) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /constants/user.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | RoleAdmin = "admin" 5 | RoleNormal = "normal" 6 | ) 7 | 8 | const ( 9 | DefaultAdminUsername = "admin" 10 | DefaultAdminPassword = "admin" 11 | ) 12 | 13 | const ( 14 | UserContextKey = "user" 15 | ) 16 | -------------------------------------------------------------------------------- /constants/http.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | HttpResponseStatusOk = "ok" 5 | HttpResponseMessageSuccess = "success" 6 | HttpResponseMessageError = "error" 7 | ) 8 | 9 | const ( 10 | HttpContentTypeApplicationJson = "application/json" 11 | ) 12 | -------------------------------------------------------------------------------- /constants/message.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | MsgTypeGetLog = "get-log" 5 | MsgTypeGetSystemInfo = "get-sys-info" 6 | MsgTypeCancelTask = "cancel-task" 7 | MsgTypeRemoveLog = "remove-log" 8 | MsgTypeRemoveSpider = "remove-spider" 9 | ) 10 | -------------------------------------------------------------------------------- /entity/event.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type EventData struct { 4 | Event string 5 | Data interface{} 6 | } 7 | 8 | func (d *EventData) GetEvent() string { 9 | return d.Event 10 | } 11 | 12 | func (d *EventData) GetData() interface{} { 13 | return d.Data 14 | } 15 | -------------------------------------------------------------------------------- /interfaces/filter_condition.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type FilterCondition interface { 4 | GetKey() (key string) 5 | SetKey(key string) 6 | GetOp() (op string) 7 | SetOp(op string) 8 | GetValue() (value interface{}) 9 | SetValue(value interface{}) 10 | } 11 | -------------------------------------------------------------------------------- /interfaces/export_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ExportService interface { 4 | GenerateId() (exportId string, err error) 5 | Export(exportType, target string, filter Filter) (exportId string, err error) 6 | GetExport(exportId string) (export Export, err error) 7 | } 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 4 7 | indent_style = tab 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [{*.yaml,*.yml,package.json}] 12 | indent_size = 2 13 | indent_style = space 14 | -------------------------------------------------------------------------------- /errors/git.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewGitError(msg string) (err error) { 4 | return NewError(ErrorPrefixGit, msg) 5 | } 6 | 7 | var ( 8 | ErrorGitInvalidAuthType = NewGitError("invalid auth type") 9 | ErrorGitNoMainBranch = NewGitError("no main branch") 10 | ) 11 | -------------------------------------------------------------------------------- /interfaces/user_service_options.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type UserCreateOptions struct { 4 | Username string 5 | Password string 6 | Email string 7 | Role string 8 | } 9 | 10 | type UserLoginOptions struct { 11 | Username string 12 | Password string 13 | } 14 | -------------------------------------------------------------------------------- /models/models/token_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type TokenV2 struct { 4 | any `collection:"tokens"` 5 | BaseModelV2[TokenV2] `bson:",inline"` 6 | Name string `json:"name" bson:"name"` 7 | Token string `json:"token" bson:"token"` 8 | } 9 | -------------------------------------------------------------------------------- /constants/notification.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | NotificationTriggerTaskFinish = "task_finish" 5 | NotificationTriggerTaskError = "task_error" 6 | NotificationTriggerTaskEmptyResults = "task_empty_results" 7 | NotificationTriggerTaskNever = "task_never" 8 | ) 9 | -------------------------------------------------------------------------------- /errors/filter.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewFilterError(msg string) (err error) { 4 | return NewError(ErrorPrefixFilter, msg) 5 | } 6 | 7 | var ErrorFilterInvalidOperation = NewFilterError("invalid operation") 8 | var ErrorFilterUnableToParseQuery = NewFilterError("unable to parse query") 9 | -------------------------------------------------------------------------------- /interfaces/node_master_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type NodeMasterService interface { 8 | NodeService 9 | Monitor() 10 | SetMonitorInterval(duration time.Duration) 11 | Register() error 12 | StopOnError() 13 | GetServer() GrpcServer 14 | } 15 | -------------------------------------------------------------------------------- /models/models/base.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type BaseModel struct { 8 | Id primitive.ObjectID `json:"_id" bson:"_id"` 9 | } 10 | 11 | func (d *BaseModel) GetId() (id primitive.ObjectID) { 12 | return d.Id 13 | } 14 | -------------------------------------------------------------------------------- /user/options.go: -------------------------------------------------------------------------------- 1 | package user 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type Option func(svc interfaces.UserService) 6 | 7 | func WithJwtSecret(secret string) Option { 8 | return func(svc interfaces.UserService) { 9 | svc.SetJwtSecret(secret) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /interfaces/event_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type EventFn func(data ...interface{}) (err error) 4 | 5 | type EventService interface { 6 | Register(key, include, exclude string, ch *chan EventData) 7 | Unregister(key string) 8 | SendEvent(eventName string, data ...interface{}) 9 | } 10 | -------------------------------------------------------------------------------- /constants/schedule.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | ScheduleStatusStop = "stopped" 5 | ScheduleStatusRunning = "running" 6 | ScheduleStatusError = "error" 7 | 8 | ScheduleStatusErrorNotFoundNode = "Not Found Node" 9 | ScheduleStatusErrorNotFoundSpider = "Not Found Spider" 10 | ) 11 | -------------------------------------------------------------------------------- /interfaces/export.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "time" 4 | 5 | type Export interface { 6 | GetId() string 7 | GetType() string 8 | GetTarget() string 9 | GetFilter() Filter 10 | GetStatus() string 11 | GetStartTs() time.Time 12 | GetEndTs() time.Time 13 | GetDownloadPath() string 14 | } 15 | -------------------------------------------------------------------------------- /interfaces/model_node_delegate.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "time" 4 | 5 | type ModelNodeDelegate interface { 6 | ModelDelegate 7 | UpdateStatus(active bool, activeTs *time.Time, status string) (err error) 8 | UpdateStatusOnline() (err error) 9 | UpdateStatusOffline() (err error) 10 | } 11 | -------------------------------------------------------------------------------- /constants/git.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | GitAuthTypeHttp = "http" 5 | GitAuthTypeSsh = "ssh" 6 | ) 7 | 8 | const ( 9 | GitRemoteNameUpstream = "upstream" 10 | GitRemoteNameOrigin = "origin" 11 | ) 12 | 13 | const ( 14 | GitBranchMaster = "master" 15 | GitBranchMain = "main" 16 | ) 17 | -------------------------------------------------------------------------------- /errors/http.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewHttpError(msg string) (err error) { 4 | return NewError(ErrorPrefixHttp, msg) 5 | } 6 | 7 | var ErrorHttpBadRequest = NewHttpError("bad request") 8 | var ErrorHttpUnauthorized = NewHttpError("unauthorized") 9 | var ErrorHttpNotFound = NewHttpError("not found") 10 | -------------------------------------------------------------------------------- /errors/spider.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewSpiderError(msg string) (err error) { 4 | return NewError(ErrorPrefixSpider, msg) 5 | } 6 | 7 | var ( 8 | ErrorSpiderMissingRequiredOption = NewSpiderError("missing required option") 9 | ErrorSpiderForbidden = NewSpiderError("forbidden") 10 | ) 11 | -------------------------------------------------------------------------------- /utils/task.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/crawlab-team/crawlab-core/constants" 4 | 5 | func IsCancellable(status string) bool { 6 | switch status { 7 | case constants.TaskStatusPending, 8 | constants.TaskStatusRunning: 9 | return true 10 | default: 11 | return false 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /spider/admin/options.go: -------------------------------------------------------------------------------- 1 | package admin 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type Option func(svc interfaces.SpiderAdminService) 6 | 7 | func WithConfigPath(path string) Option { 8 | return func(svc interfaces.SpiderAdminService) { 9 | svc.SetConfigPath(path) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /task/log/constants.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | const ( 4 | MetadataName = "metadata.json" 5 | ) 6 | 7 | const ( 8 | DriverTypeFile = "file" // raw file 9 | DriverTypeFs = "fs" // file system (SeaweedFS) 10 | DriverTypeMongo = "mongo" // mongodb 11 | DriverTypeEs = "es" // elastic search 12 | ) 13 | -------------------------------------------------------------------------------- /task/stats/options.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type Option func(service interfaces.TaskStatsService) 6 | 7 | func WithConfigPath(path string) Option { 8 | return func(svc interfaces.TaskStatsService) { 9 | svc.SetConfigPath(path) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /cmd/server_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/apps" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestCmdServer(t *testing.T) { 10 | _ = os.Setenv("CRAWLAB_PPROF", "true") 11 | 12 | // app 13 | svr := apps.GetServerV2() 14 | 15 | // start 16 | apps.Start(svr) 17 | } 18 | -------------------------------------------------------------------------------- /errors/process.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewProcessError(msg string) (err error) { 4 | return NewError(ErrorPrefixProcess, msg) 5 | } 6 | 7 | var ( 8 | ErrorProcessReachedMaxErrors = NewProcessError("reached max errors") 9 | ErrorProcessDaemonProcessExited = NewProcessError("daemon process exited") 10 | ) 11 | -------------------------------------------------------------------------------- /interfaces/grpc_stream.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import grpc "github.com/crawlab-team/crawlab-grpc" 4 | 5 | type GrpcStream interface { 6 | Send(msg *grpc.StreamMessage) (err error) 7 | } 8 | 9 | type GrpcStreamBidirectional interface { 10 | GrpcStream 11 | Recv() (msg *grpc.StreamMessage, err error) 12 | } 13 | -------------------------------------------------------------------------------- /interfaces/node_config_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type NodeConfigService interface { 4 | WithConfigPath 5 | Init() error 6 | Reload() error 7 | GetBasicNodeInfo() Entity 8 | GetNodeKey() string 9 | GetNodeName() string 10 | IsMaster() bool 11 | GetAuthKey() string 12 | GetMaxRunners() int 13 | } 14 | -------------------------------------------------------------------------------- /models/models/environment_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type EnvironmentV2 struct { 4 | any `collection:"environments"` 5 | BaseModelV2[EnvironmentV2] `bson:",inline"` 6 | Key string `json:"key" bson:"key"` 7 | Value string `json:"value" bson:"value"` 8 | } 9 | -------------------------------------------------------------------------------- /models/models/utils_col.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "github.com/crawlab-team/crawlab-core/utils/binders" 6 | ) 7 | 8 | func GetModelColName(id interfaces.ModelId) (colName string) { 9 | return binders.NewColNameBinder(id).MustBindString() 10 | } 11 | -------------------------------------------------------------------------------- /interfaces/task_stats_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type TaskStatsService interface { 6 | TaskBaseService 7 | InsertData(id primitive.ObjectID, records ...interface{}) (err error) 8 | InsertLogs(id primitive.ObjectID, logs ...string) (err error) 9 | } 10 | -------------------------------------------------------------------------------- /utils/bool.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/spf13/viper" 4 | 5 | func EnvIsTrue(key string, defaultOk bool) bool { 6 | isTrueBool := viper.GetBool(key) 7 | isTrueString := viper.GetString(key) 8 | if isTrueString == "" { 9 | return defaultOk 10 | } 11 | return isTrueBool || isTrueString == "Y" 12 | } 13 | -------------------------------------------------------------------------------- /errors/store.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewInjectError(msg string) (err error) { 4 | return NewError(ErrorPrefixInject, msg) 5 | } 6 | 7 | var ErrorInjectEmptyValue = NewInjectError("empty value") 8 | var ErrorInjectNotExists = NewInjectError("not exists") 9 | var ErrorInjectInvalidType = NewInjectError("invalid type") 10 | -------------------------------------------------------------------------------- /interfaces/task_base_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type TaskBaseService interface { 6 | WithConfigPath 7 | Module 8 | SaveTask(t Task, status string) (err error) 9 | IsStopped() (res bool) 10 | GetQueue(nodeId primitive.ObjectID) (queue string) 11 | } 12 | -------------------------------------------------------------------------------- /node/config/options.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | ) 6 | 7 | type Option func(svc interfaces.NodeConfigService) 8 | 9 | func WithConfigPath(path string) Option { 10 | return func(svc interfaces.NodeConfigService) { 11 | svc.SetConfigPath(path) 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /entity/git.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type GitPayload struct { 4 | Paths []string `json:"paths"` 5 | CommitMessage string `json:"commit_message"` 6 | Branch string `json:"branch"` 7 | Tag string `json:"tag"` 8 | } 9 | 10 | type GitConfig struct { 11 | Url string `json:"url" bson:"url"` 12 | } 13 | -------------------------------------------------------------------------------- /interfaces/stats_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson" 4 | 5 | type StatsService interface { 6 | GetOverviewStats(query bson.M) (data interface{}, err error) 7 | GetDailyStats(query bson.M) (data interface{}, err error) 8 | GetTaskStats(query bson.M) (data interface{}, err error) 9 | } 10 | -------------------------------------------------------------------------------- /entity/color.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type Color struct { 4 | Name string `json:"name"` 5 | Hex string `json:"hex"` 6 | } 7 | 8 | func (c *Color) GetHex() string { 9 | return c.Hex 10 | } 11 | 12 | func (c *Color) GetName() string { 13 | return c.Name 14 | } 15 | 16 | func (c *Color) Value() interface{} { 17 | return c 18 | } 19 | -------------------------------------------------------------------------------- /interfaces/model_artifact.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type ModelArtifact interface { 6 | Model 7 | GetSys() (sys ModelArtifactSys) 8 | GetTagIds() (ids []primitive.ObjectID) 9 | SetTagIds(ids []primitive.ObjectID) 10 | SetObj(obj Model) 11 | SetDel(del bool) 12 | } 13 | -------------------------------------------------------------------------------- /utils/rpc.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "encoding/json" 4 | 5 | // Object 转化为 String 6 | func ObjectToString(params interface{}) string { 7 | bytes, _ := json.Marshal(params) 8 | return BytesToString(bytes) 9 | } 10 | 11 | // 获取 RPC 参数 12 | func GetRpcParam(key string, params map[string]string) string { 13 | return params[key] 14 | } 15 | -------------------------------------------------------------------------------- /middlewares/middlewares.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | func InitMiddlewares(app *gin.Engine) (err error) { 6 | // default logger 7 | app.Use(gin.Logger()) 8 | 9 | // recovery from panics 10 | app.Use(gin.Recovery()) 11 | 12 | // cors 13 | app.Use(CORSMiddleware()) 14 | 15 | return nil 16 | } 17 | -------------------------------------------------------------------------------- /models/models/role_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type RoleV2 struct { 4 | any `collection:"roles"` 5 | BaseModelV2[RoleV2] `bson:",inline"` 6 | Key string `json:"key" bson:"key"` 7 | Name string `json:"name" bson:"name"` 8 | Description string `json:"description" bson:"description"` 9 | } 10 | -------------------------------------------------------------------------------- /interfaces/model_result.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type Result interface { 6 | Value() map[string]interface{} 7 | SetValue(key string, value interface{}) 8 | GetValue(key string) (value interface{}) 9 | GetTaskId() (id primitive.ObjectID) 10 | SetTaskId(id primitive.ObjectID) 11 | } 12 | -------------------------------------------------------------------------------- /docs/api/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /constants/data_field.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DataFieldTypeGeneral = "general" 5 | DataFieldTypeNumeric = "numeric" 6 | DataFieldTypeDate = "date" 7 | DataFieldTypeCurrency = "currency" 8 | DataFieldTypeUrl = "url" 9 | DataFieldTypeImage = "image" 10 | DataFieldTypeAudio = "audio" 11 | DataFieldTypeVideo = "video" 12 | ) 13 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "docs", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "publish": "node scripts/publish.js" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "chalk": "^4.1.2", 13 | "qiniu": "^7.4.0", 14 | "walk-sync": "^3.0.0" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /entity/stats.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type StatsDailyItem struct { 4 | Date string `json:"date" bson:"_id"` 5 | Tasks int64 `json:"tasks" bson:"tasks"` 6 | Results int64 `json:"results" bson:"results"` 7 | } 8 | 9 | type StatsTasksByStatusItem struct { 10 | Status string `json:"status" bson:"_id"` 11 | Tasks int64 `json:"tasks" bson:"tasks"` 12 | } 13 | -------------------------------------------------------------------------------- /interfaces/task_runner.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | "time" 6 | ) 7 | 8 | type TaskRunner interface { 9 | Init() (err error) 10 | Run() (err error) 11 | Cancel() (err error) 12 | SetSubscribeTimeout(timeout time.Duration) 13 | GetTaskId() (id primitive.ObjectID) 14 | CleanUp() (err error) 15 | } 16 | -------------------------------------------------------------------------------- /models/models/setting_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson" 5 | ) 6 | 7 | type SettingV2 struct { 8 | any `collection:"settings"` 9 | BaseModelV2[SettingV2] `bson:",inline"` 10 | Key string `json:"key" bson:"key"` 11 | Value bson.M `json:"value" bson:"value"` 12 | } 13 | -------------------------------------------------------------------------------- /utils/debug.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "github.com/spf13/viper" 6 | "time" 7 | ) 8 | 9 | func IsDebug() bool { 10 | return viper.GetBool("debug") 11 | } 12 | 13 | func LogDebug(msg string) { 14 | if !IsDebug() { 15 | return 16 | } 17 | fmt.Println(fmt.Sprintf("[DEBUG] %s: %s", time.Now().Format("2006-01-02 15:04:05"), msg)) 18 | } 19 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | services: 3 | mongo: 4 | image: mongo:latest 5 | container_name: mongo 6 | restart: always 7 | ports: 8 | - "27017:27017" 9 | redis: 10 | image: redis:latest 11 | container_name: redis 12 | restart: always 13 | ports: 14 | - "6379:6379" 15 | -------------------------------------------------------------------------------- /models/models/variable_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type VariableV2 struct { 4 | any `collection:"variables"` 5 | BaseModelV2[VariableV2] `bson:",inline"` 6 | Key string `json:"key" bson:"key"` 7 | Value string `json:"value" bson:"value"` 8 | Remark string `json:"remark" bson:"remark"` 9 | } 10 | -------------------------------------------------------------------------------- /task/log/interface.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | type Driver interface { 4 | Init() (err error) 5 | Close() (err error) 6 | WriteLine(id string, line string) (err error) 7 | WriteLines(id string, lines []string) (err error) 8 | Find(id string, pattern string, skip int, limit int) (lines []string, err error) 9 | Count(id string, pattern string) (n int, err error) 10 | } 11 | -------------------------------------------------------------------------------- /utils/time.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func GetLocalTime(t time.Time) time.Time { 8 | return t.In(time.Local) 9 | } 10 | 11 | func GetTimeString(t time.Time) string { 12 | return t.Format("2006-01-02 15:04:05") 13 | } 14 | 15 | func GetLocalTimeString(t time.Time) string { 16 | t = GetLocalTime(t) 17 | return GetTimeString(t) 18 | } 19 | -------------------------------------------------------------------------------- /models/models/project_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type ProjectV2 struct { 4 | any `collection:"projects"` 5 | BaseModelV2[ProjectV2] `bson:",inline"` 6 | Name string `json:"name" bson:"name"` 7 | Description string `json:"description" bson:"description"` 8 | Spiders int `json:"spiders" bson:"-"` 9 | } 10 | -------------------------------------------------------------------------------- /interfaces/model_permission.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type Permission interface { 4 | ModelWithKey 5 | ModelWithNameDescription 6 | GetType() (t string) 7 | SetType(t string) 8 | GetTarget() (target []string) 9 | SetTarget(target []string) 10 | GetAllow() (allow []string) 11 | SetAllow(allow []string) 12 | GetDeny() (deny []string) 13 | SetDeny(deny []string) 14 | } 15 | -------------------------------------------------------------------------------- /ds/options.go: -------------------------------------------------------------------------------- 1 | package ds 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type DataSourceServiceOption func(svc interfaces.DataSourceService) 9 | 10 | func WithMonitorInterval(duration time.Duration) DataSourceServiceOption { 11 | return func(svc interfaces.DataSourceService) { 12 | svc.SetMonitorInterval(duration) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /errors/event.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewEventError(msg string) (err error) { 4 | return NewError(ErrorPrefixEvent, msg) 5 | } 6 | 7 | var ErrorEventNotFound = NewEventError("not found") 8 | var ErrorEventInvalidType = NewEventError("invalid type") 9 | var ErrorEventAlreadyExists = NewEventError("already exists") 10 | var ErrorEventUnknownAction = NewEventError("unknown action") 11 | -------------------------------------------------------------------------------- /utils/args.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | func GetUserFromArgs(args ...interface{}) (u interfaces.User) { 6 | for _, arg := range args { 7 | switch arg.(type) { 8 | case interfaces.User: 9 | var ok bool 10 | u, ok = arg.(interfaces.User) 11 | if ok { 12 | return u 13 | } 14 | } 15 | } 16 | return nil 17 | } 18 | -------------------------------------------------------------------------------- /entity/common.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import "strconv" 4 | 5 | type Page struct { 6 | Skip int 7 | Limit int 8 | PageNum int 9 | PageSize int 10 | } 11 | 12 | func (p *Page) GetPage(pageNum string, pageSize string) { 13 | p.PageNum, _ = strconv.Atoi(pageNum) 14 | p.PageSize, _ = strconv.Atoi(pageSize) 15 | p.Skip = p.PageSize * (p.PageNum - 1) 16 | p.Limit = p.PageSize 17 | } 18 | -------------------------------------------------------------------------------- /interfaces/fs_file_info.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "os" 5 | "time" 6 | ) 7 | 8 | type FsFileInfo interface { 9 | GetName() string 10 | GetPath() string 11 | GetFullPath() string 12 | GetExtension() string 13 | GetIsDir() bool 14 | GetFileSize() int64 15 | GetModTime() time.Time 16 | GetMode() os.FileMode 17 | GetHash() string 18 | GetChildren() []FsFileInfo 19 | } 20 | -------------------------------------------------------------------------------- /interfaces/process_daemon.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "os/exec" 5 | "time" 6 | ) 7 | 8 | type ProcessDaemon interface { 9 | Start() (err error) 10 | Stop() 11 | GetMaxErrors() (maxErrors int) 12 | SetMaxErrors(maxErrors int) 13 | GetExitTimeout() (timeout time.Duration) 14 | SetExitTimeout(timeout time.Duration) 15 | GetCmd() (cmd *exec.Cmd) 16 | GetCh() (ch chan int) 17 | } 18 | -------------------------------------------------------------------------------- /entity/model_delegate.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type ModelDelegate struct { 6 | Id interfaces.ModelId `json:"id"` 7 | ColName string `json:"col_name"` 8 | Doc interfaces.Model `json:"doc"` 9 | Artifact interfaces.ModelArtifact `json:"a"` 10 | User interfaces.User `json:"u"` 11 | } 12 | -------------------------------------------------------------------------------- /result/options.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type Option func(opts *Options) 6 | 7 | type Options struct { 8 | registryKey string // registry key 9 | SpiderId primitive.ObjectID // data source id 10 | } 11 | 12 | func WithRegistryKey(key string) Option { 13 | return func(opts *Options) { 14 | opts.registryKey = key 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /controllers/system_info_v2.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/entity" 5 | "github.com/gin-gonic/gin" 6 | "github.com/spf13/viper" 7 | ) 8 | 9 | func GetSystemInfo(c *gin.Context) { 10 | info := &entity.SystemInfo{ 11 | Edition: viper.GetString("info.edition"), 12 | Version: viper.GetString("info.version"), 13 | } 14 | HandleSuccessWithData(c, info) 15 | } 16 | -------------------------------------------------------------------------------- /models/models/user_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type UserV2 struct { 4 | any `collection:"users"` 5 | BaseModelV2[UserV2] `bson:",inline"` 6 | Username string `json:"username" bson:"username"` 7 | Password string `json:"-,omitempty" bson:"password"` 8 | Role string `json:"role" bson:"role"` 9 | Email string `json:"email" bson:"email"` 10 | } 11 | -------------------------------------------------------------------------------- /utils/di.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/go-trace" 5 | "github.com/spf13/viper" 6 | "go.uber.org/dig" 7 | "os" 8 | ) 9 | 10 | func VisualizeContainer(c *dig.Container) (err error) { 11 | if !viper.GetBool("debug.di.visualize") { 12 | return nil 13 | } 14 | if err := dig.Visualize(c, os.Stdout); err != nil { 15 | return trace.TraceError(err) 16 | } 17 | return nil 18 | } 19 | -------------------------------------------------------------------------------- /interfaces/data_source_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | "time" 6 | ) 7 | 8 | type DataSourceService interface { 9 | ChangePassword(id primitive.ObjectID, password string) (err error) 10 | Monitor() 11 | CheckStatus(id primitive.ObjectID) (err error) 12 | SetTimeout(duration time.Duration) 13 | SetMonitorInterval(duration time.Duration) 14 | } 15 | -------------------------------------------------------------------------------- /interfaces/result_service_registry.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type ResultServiceRegistry interface { 6 | Register(key string, fn ResultServiceRegistryFn) 7 | Unregister(key string) 8 | Get(key string) (fn ResultServiceRegistryFn) 9 | } 10 | 11 | type ResultServiceRegistryFn func(colId primitive.ObjectID, dsId primitive.ObjectID) (ResultService, error) 12 | -------------------------------------------------------------------------------- /models/models/user_role_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type UserRoleV2 struct { 8 | any `collection:"user_roles"` 9 | BaseModelV2[UserRoleV2] `bson:",inline"` 10 | RoleId primitive.ObjectID `json:"role_id" bson:"role_id"` 11 | UserId primitive.ObjectID `json:"user_id" bson:"user_id"` 12 | } 13 | -------------------------------------------------------------------------------- /constants/grpc.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DefaultGrpcServerHost = "" 5 | DefaultGrpcServerPort = "9666" 6 | DefaultGrpcClientRemoteHost = "localhost" 7 | DefaultGrpcClientRemotePort = DefaultGrpcServerPort 8 | DefaultGrpcAuthKey = "Crawlab2021!" 9 | ) 10 | 11 | const ( 12 | GrpcHeaderAuthorization = "authorization" 13 | ) 14 | 15 | const ( 16 | GrpcSubscribeTypeNode = "node" 17 | ) 18 | -------------------------------------------------------------------------------- /errors/fs.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewFsError(msg string) (err error) { 4 | return NewError(ErrorPrefixFs, msg) 5 | } 6 | 7 | var ErrorFsForbidden = NewFsError("forbidden") 8 | var ErrorFsEmptyWorkspacePath = NewFsError("empty workspace path") 9 | var ErrorFsInvalidType = NewFsError("invalid type") 10 | var ErrorFsAlreadyExists = NewFsError("already exists") 11 | var ErrorFsInvalidContent = NewFsError("invalid content") 12 | -------------------------------------------------------------------------------- /interfaces/model_extra_value.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type ExtraValue interface { 8 | Model 9 | GetValue() (v interface{}) 10 | SetValue(v interface{}) 11 | GetObjectId() (oid primitive.ObjectID) 12 | SetObjectId(oid primitive.ObjectID) 13 | GetModel() (m string) 14 | SetModel(m string) 15 | GetType() (t string) 16 | SetType(t string) 17 | } 18 | -------------------------------------------------------------------------------- /notification/service_test.go: -------------------------------------------------------------------------------- 1 | package notification 2 | 3 | import ( 4 | "net/http" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestService_sendMobile(t *testing.T) { 10 | T.Setup(t) 11 | e := T.NewExpect(t) 12 | time.Sleep(1 * time.Second) 13 | 14 | data := map[string]interface{}{ 15 | "task_id": T.TestTask.GetId().Hex(), 16 | } 17 | e.POST("/send/mobile").WithJSON(data). 18 | Expect().Status(http.StatusOK) 19 | } 20 | -------------------------------------------------------------------------------- /utils/backoff.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/apex/log" 5 | "github.com/cenkalti/backoff/v4" 6 | "github.com/crawlab-team/go-trace" 7 | "time" 8 | ) 9 | 10 | func BackoffErrorNotify(prefix string) backoff.Notify { 11 | return func(err error, duration time.Duration) { 12 | log.Errorf("%s error: %v. reattempt in %.1f seconds...", prefix, err, duration.Seconds()) 13 | trace.PrintError(err) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /constants/rpc.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | RpcInstallLang = "install_lang" 5 | RpcInstallDep = "install_dep" 6 | RpcUninstallDep = "uninstall_dep" 7 | RpcGetInstalledDepList = "get_installed_dep_list" 8 | RpcGetLang = "get_lang" 9 | RpcCancelTask = "cancel_task" 10 | RpcGetSystemInfoService = "get_system_info" 11 | RpcRemoveSpider = "remove_spider" 12 | ) 13 | -------------------------------------------------------------------------------- /entity/spider.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type SpiderType struct { 4 | Type string `json:"type" bson:"_id"` 5 | Count int `json:"count" bson:"count"` 6 | } 7 | 8 | type ScrapySettingParam struct { 9 | Key string `json:"key"` 10 | Value interface{} `json:"value"` 11 | Type string `json:"type"` 12 | } 13 | 14 | type ScrapyItem struct { 15 | Name string `json:"name"` 16 | Fields []string `json:"fields"` 17 | } 18 | -------------------------------------------------------------------------------- /entity/rpc.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type RpcMessage struct { 4 | Id string `json:"id"` // 消息ID 5 | Method string `json:"method"` // 消息方法 6 | NodeId string `json:"node_id"` // 节点ID 7 | Params map[string]string `json:"params"` // 参数 8 | Timeout int `json:"timeout"` // 超时 9 | Result string `json:"result"` // 结果 10 | Error string `json:"error"` // 错误 11 | } 12 | -------------------------------------------------------------------------------- /models/models/task_queue_item_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type TaskQueueItemV2 struct { 8 | any `collection:"task_queue"` 9 | BaseModelV2[TaskQueueItemV2] `bson:",inline"` 10 | Priority int `json:"p" bson:"p"` 11 | NodeId primitive.ObjectID `json:"nid,omitempty" bson:"nid,omitempty"` 12 | } 13 | -------------------------------------------------------------------------------- /controllers/http.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | type Response[T any] struct { 4 | Status string `json:"status"` 5 | Message string `json:"message"` 6 | Data T `json:"data"` 7 | Error string `json:"error"` 8 | } 9 | 10 | type ListResponse[T any] struct { 11 | Status string `json:"status"` 12 | Message string `json:"message"` 13 | Total int `json:"total"` 14 | Data []T `json:"data"` 15 | Error string `json:"error"` 16 | } 17 | -------------------------------------------------------------------------------- /interfaces/result_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/generic" 5 | "time" 6 | ) 7 | 8 | type ResultService interface { 9 | Insert(records ...interface{}) (err error) 10 | List(query generic.ListQuery, opts *generic.ListOptions) (results []interface{}, err error) 11 | Count(query generic.ListQuery) (n int, err error) 12 | Index(fields []string) 13 | SetTime(t time.Time) 14 | GetTime() (t time.Time) 15 | } 16 | -------------------------------------------------------------------------------- /controllers/delegate_action.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | func NewActionControllerDelegate(id ControllerId, actions []Action) (d *ActionControllerDelegate) { 4 | return &ActionControllerDelegate{ 5 | id: id, 6 | actions: actions, 7 | } 8 | } 9 | 10 | type ActionControllerDelegate struct { 11 | id ControllerId 12 | actions []Action 13 | } 14 | 15 | func (ctr *ActionControllerDelegate) Actions() (actions []Action) { 16 | return ctr.actions 17 | } 18 | -------------------------------------------------------------------------------- /interfaces/fs_service_v2.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type FsServiceV2 interface { 4 | List(path string) (files []FsFileInfo, err error) 5 | GetFile(path string) (data []byte, err error) 6 | GetFileInfo(path string) (file FsFileInfo, err error) 7 | Save(path string, data []byte) (err error) 8 | CreateDir(path string) (err error) 9 | Rename(path, newPath string) (err error) 10 | Delete(path string) (err error) 11 | Copy(path, newPath string) (err error) 12 | } 13 | -------------------------------------------------------------------------------- /models/models/role_permission_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type RolePermissionV2 struct { 8 | any `collection:"role_permissions"` 9 | BaseModelV2[RolePermissionV2] `bson:",inline"` 10 | RoleId primitive.ObjectID `json:"role_id" bson:"role_id"` 11 | PermissionId primitive.ObjectID `json:"permission_id" bson:"permission_id"` 12 | } 13 | -------------------------------------------------------------------------------- /task/log/driver.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | func GetLogDriver(logDriverType string) (driver Driver, err error) { 4 | switch logDriverType { 5 | case DriverTypeFile: 6 | driver, err = GetFileLogDriver() 7 | if err != nil { 8 | return driver, err 9 | } 10 | case DriverTypeMongo: 11 | return driver, ErrNotImplemented 12 | case DriverTypeEs: 13 | return driver, ErrNotImplemented 14 | default: 15 | return driver, ErrInvalidType 16 | } 17 | return driver, nil 18 | } 19 | -------------------------------------------------------------------------------- /process/daemon_test.go: -------------------------------------------------------------------------------- 1 | package process 2 | 3 | import ( 4 | "github.com/stretchr/testify/require" 5 | "os/exec" 6 | "testing" 7 | ) 8 | 9 | func TestDaemon(t *testing.T) { 10 | d := NewProcessDaemon(func() *exec.Cmd { 11 | return exec.Command("echo", "hello") 12 | }) 13 | err := d.Start() 14 | require.Nil(t, err) 15 | 16 | d = NewProcessDaemon(func() *exec.Cmd { 17 | return exec.Command("return", "1") 18 | }) 19 | err = d.Start() 20 | require.NotNil(t, err) 21 | } 22 | -------------------------------------------------------------------------------- /process/options.go: -------------------------------------------------------------------------------- 1 | package process 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type DaemonOption func(d interfaces.ProcessDaemon) 9 | 10 | func WithDaemonMaxErrors(maxErrors int) DaemonOption { 11 | return func(d interfaces.ProcessDaemon) { 12 | d.SetMaxErrors(maxErrors) 13 | } 14 | } 15 | 16 | func WithExitTimeout(timeout time.Duration) DaemonOption { 17 | return func(d interfaces.ProcessDaemon) { 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /entity/pagination.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import "github.com/crawlab-team/crawlab-core/constants" 4 | 5 | type Pagination struct { 6 | Page int `form:"page" url:"page"` 7 | Size int `form:"size" url:"size"` 8 | } 9 | 10 | func (p *Pagination) IsZero() (ok bool) { 11 | return p.Page == 0 && 12 | p.Size == 0 13 | } 14 | 15 | func (p *Pagination) IsDefault() (ok bool) { 16 | return p.Page == constants.PaginationDefaultPage && 17 | p.Size == constants.PaginationDefaultSize 18 | } 19 | -------------------------------------------------------------------------------- /sys_exec/sys_exec_darwin.go: -------------------------------------------------------------------------------- 1 | //go:build darwin 2 | // +build darwin 3 | 4 | package sys_exec 5 | 6 | import ( 7 | "os/exec" 8 | "syscall" 9 | ) 10 | 11 | func BuildCmd(cmdStr string) *exec.Cmd { 12 | return exec.Command("sh", "-c", cmdStr) 13 | } 14 | 15 | func SetPgid(cmd *exec.Cmd) { 16 | if cmd == nil { 17 | return 18 | } 19 | if cmd.SysProcAttr == nil { 20 | cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 21 | } else { 22 | cmd.SysProcAttr.Setpgid = true 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /sys_exec/sys_exec_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package sys_exec 5 | 6 | import ( 7 | "os/exec" 8 | "syscall" 9 | ) 10 | 11 | func BuildCmd(cmdStr string) *exec.Cmd { 12 | return exec.Command("sh", "-c", cmdStr) 13 | } 14 | 15 | func SetPgid(cmd *exec.Cmd) { 16 | if cmd == nil { 17 | return 18 | } 19 | if cmd.SysProcAttr == nil { 20 | cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 21 | } else { 22 | cmd.SysProcAttr.Setpgid = true 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /entity/node.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type NodeInfo struct { 4 | Key string `json:"key"` 5 | IsMaster bool `json:"is_master"` 6 | Name string `json:"name"` 7 | Ip string `json:"ip"` 8 | Mac string `json:"mac"` 9 | Hostname string `json:"hostname"` 10 | Description string `json:"description"` 11 | AuthKey string `json:"auth_key"` 12 | MaxRunners int `json:"max_runners"` 13 | } 14 | 15 | func (n NodeInfo) Value() interface{} { 16 | return n 17 | } 18 | -------------------------------------------------------------------------------- /controllers/version.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/config" 5 | "github.com/gin-gonic/gin" 6 | "net/http" 7 | ) 8 | 9 | func GetVersion(c *gin.Context) { 10 | HandleSuccessWithData(c, config.GetVersion()) 11 | } 12 | 13 | func getVersionActions() []Action { 14 | return []Action{ 15 | { 16 | Method: http.MethodGet, 17 | Path: "", 18 | HandlerFunc: GetVersion, 19 | }, 20 | } 21 | } 22 | 23 | var VersionController ActionController 24 | -------------------------------------------------------------------------------- /interfaces/result_service_mongo.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type ResultServiceMongo interface { 10 | GetId() (id primitive.ObjectID) 11 | SetId(id primitive.ObjectID) 12 | List(query bson.M, opts *mongo.FindOptions) (results []Result, err error) 13 | Count(query bson.M) (total int, err error) 14 | Insert(docs ...interface{}) (err error) 15 | } 16 | -------------------------------------------------------------------------------- /task/scheduler/options.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Option func(svc interfaces.TaskSchedulerService) 9 | 10 | func WithConfigPath(path string) Option { 11 | return func(svc interfaces.TaskSchedulerService) { 12 | svc.SetConfigPath(path) 13 | } 14 | } 15 | 16 | func WithInterval(interval time.Duration) Option { 17 | return func(svc interfaces.TaskSchedulerService) { 18 | svc.SetInterval(interval) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_task_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type GrpcClientModelTaskService interface { 10 | ModelBaseService 11 | GetTaskById(id primitive.ObjectID) (s Task, err error) 12 | GetTask(query bson.M, opts *mongo.FindOptions) (s Task, err error) 13 | GetTaskList(query bson.M, opts *mongo.FindOptions) (res []Task, err error) 14 | } 15 | -------------------------------------------------------------------------------- /task/log/entity.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import "time" 4 | 5 | type Message struct { 6 | Id int64 `json:"id" bson:"id"` 7 | Msg string `json:"msg" bson:"msg"` 8 | Ts time.Time `json:"ts" bson:"ts"` 9 | } 10 | 11 | type Metadata struct { 12 | Size int64 `json:"size,omitempty" bson:"size"` 13 | TotalLines int64 `json:"total_lines,omitempty" bson:"total_lines"` 14 | TotalBytes int64 `json:"total_bytes,omitempty" bson:"total_bytes"` 15 | Md5 string `json:"md5,omitempty" bson:"md5"` 16 | } 17 | -------------------------------------------------------------------------------- /config/path.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | ) 6 | 7 | type PathService struct { 8 | cfgPath string 9 | } 10 | 11 | func (svc *PathService) GetConfigPath() (path string) { 12 | return svc.cfgPath 13 | } 14 | 15 | func (svc *PathService) SetConfigPath(path string) { 16 | svc.cfgPath = path 17 | } 18 | 19 | func NewConfigPathService() (svc interfaces.WithConfigPath) { 20 | svc = &PathService{} 21 | svc.SetConfigPath(GetConfigPath()) 22 | return svc 23 | } 24 | -------------------------------------------------------------------------------- /interfaces/task_scheduler_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | "time" 6 | ) 7 | 8 | type TaskSchedulerService interface { 9 | TaskBaseService 10 | // Enqueue task into the task queue 11 | Enqueue(t Task) (t2 Task, err error) 12 | // Cancel task to corresponding node 13 | Cancel(id primitive.ObjectID, args ...interface{}) (err error) 14 | // SetInterval set the interval or duration between two adjacent fetches 15 | SetInterval(interval time.Duration) 16 | } 17 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_spider_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type GrpcClientModelSpiderService interface { 10 | ModelBaseService 11 | GetSpiderById(id primitive.ObjectID) (s Spider, err error) 12 | GetSpider(query bson.M, opts *mongo.FindOptions) (s Spider, err error) 13 | GetSpiderList(query bson.M, opts *mongo.FindOptions) (res []Spider, err error) 14 | } 15 | -------------------------------------------------------------------------------- /routes/router_test.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/stretchr/testify/require" 6 | "net/http" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestInitRoutes(t *testing.T) { 12 | app := gin.New() 13 | err := InitRoutes(app) 14 | require.Nil(t, err) 15 | 16 | srv := &http.Server{ 17 | Handler: app, 18 | Addr: "localhost:8000", 19 | } 20 | go func() { 21 | err = srv.ListenAndServe() 22 | require.Nil(t, err) 23 | }() 24 | 25 | time.Sleep(5 * time.Second) 26 | } 27 | -------------------------------------------------------------------------------- /controllers/delegate_list_action.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | ) 6 | 7 | func NewListPostActionControllerDelegate(id ControllerId, svc interfaces.ModelBaseService, actions []Action) (d *ListActionControllerDelegate) { 8 | return &ListActionControllerDelegate{ 9 | NewListControllerDelegate(id, svc), 10 | NewActionControllerDelegate(id, actions), 11 | } 12 | } 13 | 14 | type ListActionControllerDelegate struct { 15 | ListController 16 | ActionController 17 | } 18 | -------------------------------------------------------------------------------- /interfaces/model_git.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | // Git interface 4 | type Git interface { 5 | Model 6 | GetUrl() (url string) 7 | SetUrl(url string) 8 | GetAuthType() (authType string) 9 | SetAuthType(authType string) 10 | GetUsername() (username string) 11 | SetUsername(username string) 12 | GetPassword() (password string) 13 | SetPassword(password string) 14 | GetCurrentBranch() (currentBranch string) 15 | SetCurrentBranch(currentBranch string) 16 | GetAutoPull() (autoPull bool) 17 | SetAutoPull(autoPull bool) 18 | } 19 | -------------------------------------------------------------------------------- /config/base.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/mitchellh/go-homedir" 5 | "github.com/spf13/viper" 6 | "path/filepath" 7 | ) 8 | 9 | var HomeDirPath, _ = homedir.Dir() 10 | 11 | const configDirName = ".crawlab" 12 | 13 | const configName = "config.json" 14 | 15 | func GetConfigPath() string { 16 | if viper.GetString("metadata") != "" { 17 | MetadataPath := viper.GetString("metadata") 18 | return filepath.Join(MetadataPath, configName) 19 | } 20 | return filepath.Join(HomeDirPath, configDirName, configName) 21 | } 22 | -------------------------------------------------------------------------------- /constants/system.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | Windows = "windows" 5 | Linux = "linux" 6 | Darwin = "darwin" 7 | ) 8 | 9 | const ( 10 | Python = "python" 11 | Nodejs = "node" 12 | Java = "java" 13 | ) 14 | 15 | const ( 16 | InstallStatusNotInstalled = "not-installed" 17 | InstallStatusInstalling = "installing" 18 | InstallStatusInstallingOther = "installing-other" 19 | InstallStatusInstalled = "installed" 20 | ) 21 | 22 | const ( 23 | LangTypeLang = "lang" 24 | LangTypeWebDriver = "webdriver" 25 | ) 26 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_task_stat_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type GrpcClientModelTaskStatService interface { 10 | ModelBaseService 11 | GetTaskStatById(id primitive.ObjectID) (s TaskStat, err error) 12 | GetTaskStat(query bson.M, opts *mongo.FindOptions) (s TaskStat, err error) 13 | GetTaskStatList(query bson.M, opts *mongo.FindOptions) (res []TaskStat, err error) 14 | } 15 | -------------------------------------------------------------------------------- /interfaces/grpc_server.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | grpc "github.com/crawlab-team/crawlab-grpc" 5 | ) 6 | 7 | type GrpcServer interface { 8 | GrpcBase 9 | SetAddress(Address) 10 | GetSubscribe(key string) (sub GrpcSubscribe, err error) 11 | SetSubscribe(key string, sub GrpcSubscribe) 12 | DeleteSubscribe(key string) 13 | SendStreamMessage(key string, code grpc.StreamMessageCode) (err error) 14 | SendStreamMessageWithData(nodeKey string, code grpc.StreamMessageCode, d interface{}) (err error) 15 | IsStopped() (res bool) 16 | } 17 | -------------------------------------------------------------------------------- /models/models/git_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type GitV2 struct { 4 | any `collection:"gits"` 5 | BaseModelV2[GitV2] `bson:",inline"` 6 | Url string `json:"url" bson:"url"` 7 | AuthType string `json:"auth_type" bson:"auth_type"` 8 | Username string `json:"username" bson:"username"` 9 | Password string `json:"password" bson:"password"` 10 | CurrentBranch string `json:"current_branch" bson:"current_branch"` 11 | AutoPull bool `json:"auto_pull" bson:"auto_pull"` 12 | } 13 | -------------------------------------------------------------------------------- /utils/result.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | ) 7 | 8 | func GetResultHash(value interface{}, keys []string) (res string, err error) { 9 | m := make(map[string]interface{}) 10 | for _, k := range keys { 11 | _value, ok := value.(interfaces.Result) 12 | if !ok { 13 | continue 14 | } 15 | v := _value.GetValue(k) 16 | m[k] = v 17 | } 18 | data, err := json.Marshal(m) 19 | if err != nil { 20 | return "", err 21 | } 22 | return EncryptMd5(string(data)), nil 23 | } 24 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_node_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type GrpcClientModelNodeService interface { 10 | ModelBaseService 11 | GetNodeById(id primitive.ObjectID) (n Node, err error) 12 | GetNode(query bson.M, opts *mongo.FindOptions) (n Node, err error) 13 | GetNodeByKey(key string) (n Node, err error) 14 | GetNodeList(query bson.M, opts *mongo.FindOptions) (res []Node, err error) 15 | } 16 | -------------------------------------------------------------------------------- /interfaces/grpc_client_model_environment_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type GrpcClientModelEnvironmentService interface { 10 | ModelBaseService 11 | GetEnvironmentById(id primitive.ObjectID) (s Environment, err error) 12 | GetEnvironment(query bson.M, opts *mongo.FindOptions) (s Environment, err error) 13 | GetEnvironmentList(query bson.M, opts *mongo.FindOptions) (res []Environment, err error) 14 | } 15 | -------------------------------------------------------------------------------- /entity/version.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | type Release struct { 4 | Name string `json:"name"` 5 | Draft bool `json:"draft"` 6 | PreRelease bool `json:"pre_release"` 7 | PublishedAt string `json:"published_at"` 8 | Body string `json:"body"` 9 | } 10 | 11 | type ReleaseSlices []Release 12 | 13 | func (r ReleaseSlices) Len() int { 14 | return len(r) 15 | } 16 | 17 | func (r ReleaseSlices) Less(i, j int) bool { 18 | return r[i].PublishedAt < r[j].PublishedAt 19 | } 20 | 21 | func (r ReleaseSlices) Swap(i, j int) { 22 | r[i], r[j] = r[j], r[i] 23 | } 24 | -------------------------------------------------------------------------------- /fs/default.go: -------------------------------------------------------------------------------- 1 | package fs 2 | 3 | import ( 4 | "github.com/apex/log" 5 | "github.com/mitchellh/go-homedir" 6 | "github.com/spf13/viper" 7 | "path/filepath" 8 | ) 9 | 10 | func init() { 11 | rootDir, err := homedir.Dir() 12 | if err != nil { 13 | log.Warnf("cannot find home directory: %v", err) 14 | return 15 | } 16 | DefaultWorkspacePath = filepath.Join(rootDir, "crawlab_workspace") 17 | 18 | workspacePath := viper.GetString("workspace") 19 | if workspacePath == "" { 20 | viper.Set("workspace", DefaultWorkspacePath) 21 | } 22 | } 23 | 24 | var DefaultWorkspacePath string 25 | -------------------------------------------------------------------------------- /interfaces/spider_service_options.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type SpiderRunOptions struct { 6 | Mode string `json:"mode"` 7 | NodeIds []primitive.ObjectID `json:"node_ids"` 8 | Cmd string `json:"cmd"` 9 | Param string `json:"param"` 10 | ScheduleId primitive.ObjectID `json:"schedule_id"` 11 | Priority int `json:"priority"` 12 | UserId primitive.ObjectID `json:"-"` 13 | } 14 | 15 | type SpiderCloneOptions struct { 16 | Name string 17 | } 18 | -------------------------------------------------------------------------------- /controllers/binder.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/entity" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | type BinderInterface interface { 10 | Bind(c *gin.Context) (res interfaces.Model, err error) 11 | BindList(c *gin.Context) (res []interfaces.Model, err error) 12 | BindBatchRequestPayload(c *gin.Context) (payload entity.BatchRequestPayload, err error) 13 | BindBatchRequestPayloadWithStringData(c *gin.Context) (payload entity.BatchRequestPayloadWithStringData, res interfaces.Model, err error) 14 | } 15 | -------------------------------------------------------------------------------- /interfaces/model_artifact_sys.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | "time" 6 | ) 7 | 8 | type ModelArtifactSys interface { 9 | GetCreateTs() time.Time 10 | SetCreateTs(ts time.Time) 11 | GetUpdateTs() time.Time 12 | SetUpdateTs(ts time.Time) 13 | GetDeleteTs() time.Time 14 | SetDeleteTs(ts time.Time) 15 | GetCreateUid() primitive.ObjectID 16 | SetCreateUid(id primitive.ObjectID) 17 | GetUpdateUid() primitive.ObjectID 18 | SetUpdateUid(id primitive.ObjectID) 19 | GetDeleteUid() primitive.ObjectID 20 | SetDeleteUid(id primitive.ObjectID) 21 | } 22 | -------------------------------------------------------------------------------- /routes/group.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/middlewares" 5 | "github.com/gin-gonic/gin" 6 | ) 7 | 8 | type RouterGroups struct { 9 | AuthGroup *gin.RouterGroup 10 | AnonymousGroup *gin.RouterGroup 11 | FilerGroup *gin.RouterGroup 12 | } 13 | 14 | func NewRouterGroups(app *gin.Engine) (groups *RouterGroups) { 15 | return &RouterGroups{ 16 | AuthGroup: app.Group("/", middlewares.AuthorizationMiddleware()), 17 | AnonymousGroup: app.Group("/"), 18 | FilerGroup: app.Group("/filer", middlewares.FilerAuthorizationMiddleware()), 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /cmd/server.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/apps" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | func init() { 9 | rootCmd.AddCommand(serverCmd) 10 | } 11 | 12 | var serverCmd = &cobra.Command{ 13 | Use: "server", 14 | Aliases: []string{"s"}, 15 | Short: "Start Crawlab server", 16 | Long: `Start Crawlab node server that can serve as API, task scheduler, task runner, etc.`, 17 | Run: func(cmd *cobra.Command, args []string) { 18 | // app 19 | //svr := apps.GetServer(opts...) 20 | svr := apps.GetServerV2() 21 | 22 | // start 23 | apps.Start(svr) 24 | }, 25 | } 26 | -------------------------------------------------------------------------------- /models/delegate/base_test.go: -------------------------------------------------------------------------------- 1 | package delegate_test 2 | 3 | import ( 4 | "context" 5 | "github.com/crawlab-team/crawlab-db/mongo" 6 | "go.mongodb.org/mongo-driver/bson" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func SetupTest(t *testing.T) { 12 | CleanupTest() 13 | t.Cleanup(CleanupTest) 14 | } 15 | 16 | func CleanupTest() { 17 | db := mongo.GetMongoDb("") 18 | names, _ := db.ListCollectionNames(context.Background(), bson.M{}) 19 | for _, n := range names { 20 | _, _ = db.Collection(n).DeleteMany(context.Background(), bson.M{}) 21 | } 22 | 23 | // avoid caching 24 | time.Sleep(200 * time.Millisecond) 25 | } 26 | -------------------------------------------------------------------------------- /interfaces/model_delegate.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ModelDelegateMethod string 4 | 5 | type ModelDelegate interface { 6 | Add() error 7 | Save() error 8 | Delete() error 9 | GetArtifact() (ModelArtifact, error) 10 | GetModel() Model 11 | Refresh() error 12 | ToBytes(interface{}) ([]byte, error) 13 | } 14 | 15 | const ( 16 | ModelDelegateMethodAdd = "add" 17 | ModelDelegateMethodSave = "save" 18 | ModelDelegateMethodDelete = "delete" 19 | ModelDelegateMethodGetArtifact = "get-artifact" 20 | ModelDelegateMethodRefresh = "refresh" 21 | ModelDelegateMethodChange = "change" 22 | ) 23 | -------------------------------------------------------------------------------- /interfaces/model_node.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "time" 4 | 5 | type Node interface { 6 | ModelWithNameDescription 7 | GetKey() (key string) 8 | GetIsMaster() (ok bool) 9 | GetActive() (active bool) 10 | SetActive(active bool) 11 | SetActiveTs(activeTs time.Time) 12 | GetStatus() (status string) 13 | SetStatus(status string) 14 | GetEnabled() (enabled bool) 15 | SetEnabled(enabled bool) 16 | GetAvailableRunners() (runners int) 17 | SetAvailableRunners(runners int) 18 | GetMaxRunners() (runners int) 19 | SetMaxRunners(runners int) 20 | IncrementAvailableRunners() 21 | DecrementAvailableRunners() 22 | } 23 | -------------------------------------------------------------------------------- /utils/init.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "sync" 6 | ) 7 | 8 | var moduleInitializedMap = sync.Map{} 9 | 10 | func InitModule(id interfaces.ModuleId, fn func() error) (err error) { 11 | res, ok := moduleInitializedMap.Load(id) 12 | if ok { 13 | initialized, _ := res.(bool) 14 | if initialized { 15 | return nil 16 | } 17 | } 18 | 19 | if err := fn(); err != nil { 20 | return err 21 | } 22 | 23 | moduleInitializedMap.Store(id, true) 24 | 25 | return nil 26 | } 27 | 28 | func ForceInitModule(fn func() error) (err error) { 29 | return fn() 30 | } 31 | -------------------------------------------------------------------------------- /errors/node.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewNodeError(msg string) (err error) { 4 | return NewError(ErrorPrefixNode, msg) 5 | } 6 | 7 | var ErrorNodeUnregistered = NewNodeError("unregistered") 8 | var ErrorNodeServiceNotExists = NewNodeError("service not exists") 9 | var ErrorNodeInvalidType = NewNodeError("invalid type") 10 | var ErrorNodeInvalidStatus = NewNodeError("invalid status") 11 | var ErrorNodeInvalidCode = NewNodeError("invalid code") 12 | var ErrorNodeInvalidNodeKey = NewNodeError("invalid node key") 13 | var ErrorNodeMonitorError = NewNodeError("monitor error") 14 | var ErrorNodeNotExists = NewNodeError("not exists") 15 | -------------------------------------------------------------------------------- /models/delegate/utils_event.go: -------------------------------------------------------------------------------- 1 | package delegate 2 | 3 | import ( 4 | "fmt" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | ) 7 | 8 | func GetEventName(d *ModelDelegate, method interfaces.ModelDelegateMethod) (eventName string) { 9 | return getEventName(d, method) 10 | } 11 | 12 | func getEventName(d *ModelDelegate, method interfaces.ModelDelegateMethod) (eventName string) { 13 | if method == interfaces.ModelDelegateMethodSave { 14 | hasChange := d.hasChange() 15 | if hasChange { 16 | method = interfaces.ModelDelegateMethodChange 17 | } 18 | } 19 | return fmt.Sprintf("model:%s:%s", d.colName, method) 20 | } 21 | -------------------------------------------------------------------------------- /interfaces/model_task_stat.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "time" 4 | 5 | type TaskStat interface { 6 | Model 7 | GetCreateTs() (ts time.Time) 8 | SetCreateTs(ts time.Time) 9 | GetStartTs() (ts time.Time) 10 | SetStartTs(ts time.Time) 11 | GetEndTs() (ts time.Time) 12 | SetEndTs(ts time.Time) 13 | GetWaitDuration() (d int64) 14 | SetWaitDuration(d int64) 15 | GetRuntimeDuration() (d int64) 16 | SetRuntimeDuration(d int64) 17 | GetTotalDuration() (d int64) 18 | SetTotalDuration(d int64) 19 | GetResultCount() (c int64) 20 | SetResultCount(c int64) 21 | GetErrorLogCount() (c int64) 22 | SetErrorLogCount(c int64) 23 | } 24 | -------------------------------------------------------------------------------- /utils/sql.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/generic" 5 | "github.com/upper/db/v4" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | func GetSqlQuery(query generic.ListQuery) (res db.Cond) { 10 | res = db.Cond{} 11 | for _, c := range query { 12 | switch c.Value.(type) { 13 | case primitive.ObjectID: 14 | c.Value = c.Value.(primitive.ObjectID).Hex() 15 | } 16 | switch c.Op { 17 | case generic.OpEqual: 18 | res[c.Key] = c.Value 19 | default: 20 | res[c.Key] = db.Cond{ 21 | c.Op: c.Value, 22 | } 23 | } 24 | } 25 | // TODO: sort 26 | return res 27 | } 28 | -------------------------------------------------------------------------------- /interfaces/fs_service_options.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | type ServiceCrudOptions struct { 4 | IsAbsolute bool // whether the path is absolute 5 | OnlyFromWorkspace bool // whether only sync from workspace 6 | NotSyncToWorkspace bool // whether not sync to workspace 7 | } 8 | 9 | type ServiceCrudOption func(o *ServiceCrudOptions) 10 | 11 | func WithOnlyFromWorkspace() ServiceCrudOption { 12 | return func(o *ServiceCrudOptions) { 13 | o.OnlyFromWorkspace = true 14 | } 15 | } 16 | 17 | func WithNotSyncToWorkspace() ServiceCrudOption { 18 | return func(o *ServiceCrudOptions) { 19 | o.NotSyncToWorkspace = true 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /interfaces/schedule_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/robfig/cron/v3" 5 | "time" 6 | ) 7 | 8 | type ScheduleService interface { 9 | WithConfigPath 10 | Module 11 | GetLocation() (loc *time.Location) 12 | SetLocation(loc *time.Location) 13 | GetDelay() (delay bool) 14 | SetDelay(delay bool) 15 | GetSkip() (skip bool) 16 | SetSkip(skip bool) 17 | GetUpdateInterval() (interval time.Duration) 18 | SetUpdateInterval(interval time.Duration) 19 | Enable(s Schedule, args ...interface{}) (err error) 20 | Disable(s Schedule, args ...interface{}) (err error) 21 | Update() 22 | GetCron() (c *cron.Cron) 23 | } 24 | -------------------------------------------------------------------------------- /entity/grpc_subscribe.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | ) 6 | 7 | type GrpcSubscribe struct { 8 | Stream interfaces.GrpcStream 9 | Finished chan bool 10 | } 11 | 12 | func (sub *GrpcSubscribe) GetStream() interfaces.GrpcStream { 13 | return sub.Stream 14 | } 15 | 16 | func (sub *GrpcSubscribe) GetStreamBidirectional() interfaces.GrpcStreamBidirectional { 17 | stream, ok := sub.Stream.(interfaces.GrpcStreamBidirectional) 18 | if !ok { 19 | return nil 20 | } 21 | return stream 22 | } 23 | 24 | func (sub *GrpcSubscribe) GetFinished() chan bool { 25 | return sub.Finished 26 | } 27 | -------------------------------------------------------------------------------- /models/config_spider/common.go: -------------------------------------------------------------------------------- 1 | package config_spider 2 | 3 | import "github.com/crawlab-team/crawlab-core/entity" 4 | 5 | func GetAllFields(data entity.ConfigSpiderData) []entity.Field { 6 | var fields []entity.Field 7 | for _, stage := range data.Stages { 8 | fields = append(fields, stage.Fields...) 9 | } 10 | return fields 11 | } 12 | 13 | func GetStartStageName(data entity.ConfigSpiderData) string { 14 | // 如果 start_stage 设置了且在 stages 里,则返回 15 | if data.StartStage != "" { 16 | return data.StartStage 17 | } 18 | 19 | // 否则返回第一个 stage 20 | for _, stage := range data.Stages { 21 | return stage.Name 22 | } 23 | return "" 24 | } 25 | -------------------------------------------------------------------------------- /utils/encrypt_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/require" 6 | "testing" 7 | ) 8 | 9 | func TestEncryptAesPassword(t *testing.T) { 10 | plainText := "crawlab" 11 | encryptedText, err := EncryptAES(plainText) 12 | require.Nil(t, err) 13 | decryptedText, err := DecryptAES(encryptedText) 14 | require.Nil(t, err) 15 | fmt.Println(fmt.Sprintf("plainText: %s", plainText)) 16 | fmt.Println(fmt.Sprintf("encryptedText: %s", encryptedText)) 17 | fmt.Println(fmt.Sprintf("decryptedText: %s", decryptedText)) 18 | require.Equal(t, decryptedText, plainText) 19 | require.NotEqual(t, decryptedText, encryptedText) 20 | } 21 | -------------------------------------------------------------------------------- /models/models/permission_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type PermissionV2 struct { 4 | any `collection:"permissions"` 5 | BaseModelV2[PermissionV2] `bson:",inline"` 6 | Key string `json:"key" bson:"key"` 7 | Name string `json:"name" bson:"name"` 8 | Description string `json:"description" bson:"description"` 9 | Type string `json:"type" bson:"type"` 10 | Target []string `json:"target" bson:"target"` 11 | Allow []string `json:"allow" bson:"allow"` 12 | Deny []string `json:"deny" bson:"deny"` 13 | } 14 | -------------------------------------------------------------------------------- /models/models/data_collection_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/entity" 5 | ) 6 | 7 | type DataCollectionV2 struct { 8 | any `collection:"data_collections"` 9 | BaseModelV2[DataCollection] `bson:",inline"` 10 | Name string `json:"name" bson:"name"` 11 | Fields []entity.DataField `json:"fields" bson:"fields"` 12 | Dedup struct { 13 | Enabled bool `json:"enabled" bson:"enabled"` 14 | Keys []string `json:"keys" bson:"keys"` 15 | Type string `json:"type" bson:"type"` 16 | } `json:"dedup" bson:"dedup"` 17 | } 18 | -------------------------------------------------------------------------------- /models/service/options.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "github.com/crawlab-team/crawlab-db/mongo" 6 | ) 7 | 8 | type Option func(ModelService) 9 | 10 | type BaseServiceOption func(svc interfaces.ModelBaseService) 11 | 12 | func WithBaseServiceModelId(id interfaces.ModelId) BaseServiceOption { 13 | return func(svc interfaces.ModelBaseService) { 14 | svc.SetModelId(id) 15 | } 16 | } 17 | 18 | func WithBaseServiceCol(col *mongo.Col) BaseServiceOption { 19 | return func(svc interfaces.ModelBaseService) { 20 | _svc, ok := svc.(*BaseService) 21 | if ok { 22 | _svc.SetCol(col) 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /apps/server_test.go: -------------------------------------------------------------------------------- 1 | package apps 2 | 3 | import ( 4 | "fmt" 5 | "github.com/imroc/req" 6 | "github.com/spf13/viper" 7 | "github.com/stretchr/testify/require" 8 | "os" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func init() { 14 | _ = os.Setenv("CRAWLAB_DEMO", "false") 15 | } 16 | 17 | func TestServer_Start(t *testing.T) { 18 | svr := GetServer() 19 | 20 | // start 21 | go Start(svr) 22 | time.Sleep(1 * time.Second) 23 | 24 | res, err := req.Get(fmt.Sprintf("http://localhost:%s/system-info", viper.GetString("server.port"))) 25 | require.Nil(t, err) 26 | resStr, err := res.ToString() 27 | require.Nil(t, err) 28 | require.Contains(t, resStr, "success") 29 | } 30 | -------------------------------------------------------------------------------- /controllers/system_info.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/entity" 5 | "github.com/gin-gonic/gin" 6 | "github.com/spf13/viper" 7 | "net/http" 8 | ) 9 | 10 | func getSystemInfo(c *gin.Context) { 11 | info := &entity.SystemInfo{ 12 | Edition: viper.GetString("info.edition"), 13 | Version: viper.GetString("info.version"), 14 | } 15 | HandleSuccessWithData(c, info) 16 | } 17 | 18 | func getSystemInfoActions() []Action { 19 | return []Action{ 20 | { 21 | Path: "", 22 | Method: http.MethodGet, 23 | HandlerFunc: getSystemInfo, 24 | }, 25 | } 26 | } 27 | 28 | var SystemInfoController ActionController 29 | -------------------------------------------------------------------------------- /models/models/job.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Job struct { 9 | Id primitive.ObjectID `bson:"_id" json:"_id"` 10 | TaskId primitive.ObjectID `bson:"task_id" json:"task_id"` 11 | } 12 | 13 | func (j *Job) GetId() (id primitive.ObjectID) { 14 | return j.Id 15 | } 16 | 17 | func (j *Job) SetId(id primitive.ObjectID) { 18 | j.Id = id 19 | } 20 | 21 | type JobList []Job 22 | 23 | func (l *JobList) GetModels() (res []interfaces.Model) { 24 | for i := range *l { 25 | d := (*l)[i] 26 | res = append(res, &d) 27 | } 28 | return res 29 | } 30 | -------------------------------------------------------------------------------- /interfaces/model_task.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type Task interface { 6 | Model 7 | GetNodeId() (id primitive.ObjectID) 8 | SetNodeId(id primitive.ObjectID) 9 | GetNodeIds() (ids []primitive.ObjectID) 10 | GetStatus() (status string) 11 | SetStatus(status string) 12 | GetError() (error string) 13 | SetError(error string) 14 | GetPid() (pid int) 15 | SetPid(pid int) 16 | GetSpiderId() (id primitive.ObjectID) 17 | GetType() (ty string) 18 | GetCmd() (cmd string) 19 | GetParam() (param string) 20 | GetPriority() (p int) 21 | GetUserId() (id primitive.ObjectID) 22 | SetUserId(id primitive.ObjectID) 23 | } 24 | -------------------------------------------------------------------------------- /grpc/payload/model_service_v2_payload.go: -------------------------------------------------------------------------------- 1 | package payload 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type ModelServiceV2Payload struct { 10 | Type string `json:"type,omitempty"` 11 | Id primitive.ObjectID `json:"_id,omitempty"` 12 | Query bson.M `json:"query,omitempty"` 13 | FindOptions *mongo.FindOptions `json:"find_options,omitempty"` 14 | Model any `json:"model,omitempty"` 15 | Update bson.M `json:"update,omitempty"` 16 | Models []any `json:"models,omitempty"` 17 | } 18 | -------------------------------------------------------------------------------- /models/models/password.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Password struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Password string `json:"password" bson:"p"` 11 | } 12 | 13 | func (p *Password) GetId() (id primitive.ObjectID) { 14 | return p.Id 15 | } 16 | 17 | func (p *Password) SetId(id primitive.ObjectID) { 18 | p.Id = id 19 | } 20 | 21 | type PasswordList []Password 22 | 23 | func (l *PasswordList) GetModels() (res []interfaces.Model) { 24 | for i := range *l { 25 | d := (*l)[i] 26 | res = append(res, &d) 27 | } 28 | return res 29 | } 30 | -------------------------------------------------------------------------------- /middlewares/cors.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | func CORSMiddleware() gin.HandlerFunc { 6 | return func(c *gin.Context) { 7 | c.Writer.Header().Set("Access-Control-Allow-Origin", "*") 8 | c.Writer.Header().Set("Access-Control-Allow-Credentials", "true") 9 | c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With") 10 | c.Writer.Header().Set("Access-Control-Allow-Methods", "DELETE, POST, OPTIONS, GET, PUT") 11 | 12 | if c.Request.Method == "OPTIONS" { 13 | c.AbortWithStatus(204) 14 | return 15 | } 16 | 17 | c.Next() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /errors/ds.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewDataSourceError(msg string) (err error) { 4 | return NewError(ErrorPrefixDataSource, msg) 5 | } 6 | 7 | var ( 8 | ErrorDataSourceInvalidType = NewDataSourceError("invalid type") 9 | ErrorDataSourceNotExists = NewDataSourceError("not exists") 10 | ErrorDataSourceNotExistsInContext = NewDataSourceError("not exists in context") 11 | ErrorDataSourceAlreadyExists = NewDataSourceError("already exists") 12 | ErrorDataSourceMismatch = NewDataSourceError("mismatch") 13 | ErrorDataSourceMissingRequiredFields = NewDataSourceError("missing required fields") 14 | ErrorDataSourceUnauthorized = NewDataSourceError("unauthorized") 15 | ) 16 | -------------------------------------------------------------------------------- /interfaces/user_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/golang-jwt/jwt/v5" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type UserService interface { 10 | Init() (err error) 11 | SetJwtSecret(secret string) 12 | SetJwtSigningMethod(method jwt.SigningMethod) 13 | Create(opts *UserCreateOptions, args ...interface{}) (err error) 14 | Login(opts *UserLoginOptions) (token string, u User, err error) 15 | CheckToken(token string) (u User, err error) 16 | ChangePassword(id primitive.ObjectID, password string, args ...interface{}) (err error) 17 | MakeToken(user User) (tokenStr string, err error) 18 | GetCurrentUser(c *gin.Context) (u User, err error) 19 | } 20 | -------------------------------------------------------------------------------- /models/models/token.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Token struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Name string `json:"name" bson:"name"` 11 | Token string `json:"token" bson:"token"` 12 | } 13 | 14 | func (t *Token) GetId() (id primitive.ObjectID) { 15 | return t.Id 16 | } 17 | 18 | func (t *Token) SetId(id primitive.ObjectID) { 19 | t.Id = id 20 | } 21 | 22 | type TokenList []Token 23 | 24 | func (l *TokenList) GetModels() (res []interfaces.Model) { 25 | for i := range *l { 26 | d := (*l)[i] 27 | res = append(res, &d) 28 | } 29 | return res 30 | } 31 | -------------------------------------------------------------------------------- /entity/grpc_base_service_message.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "github.com/crawlab-team/go-trace" 7 | ) 8 | 9 | type GrpcBaseServiceMessage struct { 10 | ModelId interfaces.ModelId `json:"id"` 11 | Data []byte `json:"d"` 12 | } 13 | 14 | func (msg *GrpcBaseServiceMessage) GetModelId() interfaces.ModelId { 15 | return msg.ModelId 16 | } 17 | 18 | func (msg *GrpcBaseServiceMessage) GetData() []byte { 19 | return msg.Data 20 | } 21 | 22 | func (msg *GrpcBaseServiceMessage) ToBytes() (data []byte) { 23 | data, err := json.Marshal(*msg) 24 | if err != nil { 25 | _ = trace.TraceError(err) 26 | return data 27 | } 28 | return data 29 | } 30 | -------------------------------------------------------------------------------- /utils/helpers.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/go-trace" 5 | "io" 6 | "reflect" 7 | "unsafe" 8 | ) 9 | 10 | func BytesToString(b []byte) string { 11 | return *(*string)(unsafe.Pointer(&b)) 12 | } 13 | 14 | func Close(c io.Closer) { 15 | err := c.Close() 16 | if err != nil { 17 | trace.PrintError(err) 18 | } 19 | } 20 | 21 | func Contains(array interface{}, val interface{}) (fla bool) { 22 | fla = false 23 | switch reflect.TypeOf(array).Kind() { 24 | case reflect.Slice: 25 | { 26 | s := reflect.ValueOf(array) 27 | for i := 0; i < s.Len(); i++ { 28 | if reflect.DeepEqual(val, s.Index(i).Interface()) { 29 | fla = true 30 | return 31 | } 32 | } 33 | } 34 | } 35 | return 36 | } 37 | -------------------------------------------------------------------------------- /interfaces/model_spider.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type Spider interface { 6 | ModelWithNameDescription 7 | GetType() (ty string) 8 | GetMode() (mode string) 9 | SetMode(mode string) 10 | GetNodeIds() (ids []primitive.ObjectID) 11 | SetNodeIds(ids []primitive.ObjectID) 12 | GetCmd() (cmd string) 13 | SetCmd(cmd string) 14 | GetParam() (param string) 15 | SetParam(param string) 16 | GetPriority() (p int) 17 | SetPriority(p int) 18 | GetColId() (id primitive.ObjectID) 19 | SetColId(id primitive.ObjectID) 20 | GetIncrementalSync() (incrementalSync bool) 21 | SetIncrementalSync(incrementalSync bool) 22 | GetAutoInstall() (autoInstall bool) 23 | SetAutoInstall(autoInstall bool) 24 | } 25 | -------------------------------------------------------------------------------- /entity/task.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "encoding/json" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type TaskMessage struct { 9 | Id primitive.ObjectID `json:"id"` 10 | Key string `json:"key"` 11 | Cmd string `json:"cmd"` 12 | Param string `json:"param"` 13 | } 14 | 15 | func (m *TaskMessage) ToString() (string, error) { 16 | data, err := json.Marshal(&m) 17 | if err != nil { 18 | return "", err 19 | } 20 | return string(data), err 21 | } 22 | 23 | type TaskRunOptions struct { 24 | } 25 | 26 | type StreamMessageTaskData struct { 27 | TaskId primitive.ObjectID `json:"task_id"` 28 | Records []Result `json:"data"` 29 | Logs []string `json:"logs"` 30 | } 31 | -------------------------------------------------------------------------------- /apps/interfaces.go: -------------------------------------------------------------------------------- 1 | package apps 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "github.com/gin-gonic/gin" 6 | "net/http" 7 | ) 8 | 9 | type App interface { 10 | Init() 11 | Start() 12 | Wait() 13 | Stop() 14 | } 15 | 16 | type ApiApp interface { 17 | App 18 | GetGinEngine() (engine *gin.Engine) 19 | GetHttpServer() (svr *http.Server) 20 | Ready() (ok bool) 21 | } 22 | 23 | type NodeApp interface { 24 | App 25 | interfaces.WithConfigPath 26 | } 27 | 28 | type ServerApp interface { 29 | NodeApp 30 | GetApi() (api ApiApp) 31 | GetNodeService() (masterSvc interfaces.NodeService) 32 | } 33 | 34 | type DockerApp interface { 35 | App 36 | GetParent() (parent NodeApp) 37 | SetParent(parent NodeApp) 38 | Ready() (ok bool) 39 | } 40 | -------------------------------------------------------------------------------- /models/models/user_role.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type UserRole struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | RoleId primitive.ObjectID `json:"role_id" bson:"role_id"` 11 | UserId primitive.ObjectID `json:"user_id" bson:"user_id"` 12 | } 13 | 14 | func (ur *UserRole) GetId() (id primitive.ObjectID) { 15 | return ur.Id 16 | } 17 | 18 | func (ur *UserRole) SetId(id primitive.ObjectID) { 19 | ur.Id = id 20 | } 21 | 22 | type UserRoleList []UserRole 23 | 24 | func (l *UserRoleList) GetModels() (res []interfaces.Model) { 25 | for i := range *l { 26 | d := (*l)[i] 27 | res = append(res, &d) 28 | } 29 | return res 30 | } 31 | -------------------------------------------------------------------------------- /interfaces/model_schedule.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/robfig/cron/v3" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Schedule interface { 9 | Model 10 | GetEnabled() (enabled bool) 11 | SetEnabled(enabled bool) 12 | GetEntryId() (id cron.EntryID) 13 | SetEntryId(id cron.EntryID) 14 | GetCron() (c string) 15 | SetCron(c string) 16 | GetSpiderId() (id primitive.ObjectID) 17 | SetSpiderId(id primitive.ObjectID) 18 | GetMode() (mode string) 19 | SetMode(mode string) 20 | GetNodeIds() (ids []primitive.ObjectID) 21 | SetNodeIds(ids []primitive.ObjectID) 22 | GetCmd() (cmd string) 23 | SetCmd(cmd string) 24 | GetParam() (param string) 25 | SetParam(param string) 26 | GetPriority() (p int) 27 | SetPriority(p int) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var ( 8 | // Used for flags. 9 | cfgFile string 10 | 11 | rootCmd = &cobra.Command{ 12 | Use: "crawlab", 13 | Short: "CLI tool for Crawlab", 14 | Long: `The CLI tool is for controlling against Crawlab. 15 | Crawlab is a distributed web crawler and task admin platform 16 | aimed at making web crawling and task management easier. 17 | `, 18 | } 19 | ) 20 | 21 | // Execute executes the root command. 22 | func Execute() error { 23 | return rootCmd.Execute() 24 | } 25 | 26 | // GetRootCmd get rootCmd instance 27 | func GetRootCmd() *cobra.Command { 28 | return rootCmd 29 | } 30 | 31 | func init() { 32 | rootCmd.PersistentFlags().StringVar(&cfgFile, "c", "", "Use Custom Config File") 33 | } 34 | -------------------------------------------------------------------------------- /controllers/utils_context.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | func GetUserFromContext(c *gin.Context) (u interfaces.User) { 11 | value, ok := c.Get(constants.UserContextKey) 12 | if !ok { 13 | return nil 14 | } 15 | u, ok = value.(interfaces.User) 16 | if !ok { 17 | return nil 18 | } 19 | return u 20 | } 21 | 22 | func GetUserFromContextV2(c *gin.Context) (u *models.UserV2) { 23 | value, ok := c.Get(constants.UserContextKey) 24 | if !ok { 25 | return nil 26 | } 27 | u, ok = value.(*models.UserV2) 28 | if !ok { 29 | return nil 30 | } 31 | return u 32 | } 33 | -------------------------------------------------------------------------------- /entity/grpc_base_service_params.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "github.com/crawlab-team/crawlab-db/mongo" 6 | "go.mongodb.org/mongo-driver/bson" 7 | "go.mongodb.org/mongo-driver/bson/primitive" 8 | ) 9 | 10 | type GrpcBaseServiceParams struct { 11 | Query bson.M `json:"q"` 12 | Id primitive.ObjectID `json:"id"` 13 | Update bson.M `json:"u"` 14 | Doc interfaces.Model `json:"d"` 15 | Fields []string `json:"f"` 16 | FindOptions *mongo.FindOptions `json:"o"` 17 | Docs []interface{} `json:"dl"` 18 | User interfaces.User `json:"U"` 19 | } 20 | 21 | func (params *GrpcBaseServiceParams) Value() interface{} { 22 | return params 23 | } 24 | -------------------------------------------------------------------------------- /models/models/setting.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type Setting struct { 10 | Id primitive.ObjectID `json:"_id" bson:"_id"` 11 | Key string `json:"key" bson:"key"` 12 | Value bson.M `json:"value" bson:"value"` 13 | } 14 | 15 | func (s *Setting) GetId() (id primitive.ObjectID) { 16 | return s.Id 17 | } 18 | 19 | func (s *Setting) SetId(id primitive.ObjectID) { 20 | s.Id = id 21 | } 22 | 23 | type SettingList []Setting 24 | 25 | func (l *SettingList) GetModels() (res []interfaces.Model) { 26 | for i := range *l { 27 | d := (*l)[i] 28 | res = append(res, &d) 29 | } 30 | return res 31 | } 32 | -------------------------------------------------------------------------------- /constants/filter.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | FilterQueryFieldConditions = "conditions" 5 | FilterQueryFieldAll = "all" 6 | ) 7 | 8 | const ( 9 | FilterObjectTypeString = "string" 10 | FilterObjectTypeNumber = "number" 11 | FilterObjectTypeBoolean = "boolean" 12 | ) 13 | 14 | const ( 15 | FilterOpNotSet = "ns" 16 | FilterOpContains = "c" 17 | FilterOpNotContains = "nc" 18 | FilterOpRegex = "r" 19 | FilterOpEqual = "eq" 20 | FilterOpNotEqual = "ne" 21 | FilterOpIn = "in" 22 | FilterOpNotIn = "nin" 23 | FilterOpGreaterThan = "gt" 24 | FilterOpLessThan = "lt" 25 | FilterOpGreaterThanEqual = "gte" 26 | FilterOpLessThanEqual = "lte" 27 | FilterOpSearch = "s" 28 | ) 29 | -------------------------------------------------------------------------------- /models/models/dependency_setting_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type DependencySettingV2 struct { 8 | any `collection:"dependency_settings"` 9 | BaseModelV2[DependencySetting] `bson:",inline"` 10 | Key string `json:"key" bson:"key"` 11 | Name string `json:"name" bson:"name"` 12 | Description string `json:"description" bson:"description"` 13 | Enabled bool `json:"enabled" bson:"enabled"` 14 | Cmd string `json:"cmd" bson:"cmd"` 15 | Proxy string `json:"proxy" bson:"proxy"` 16 | LastUpdateTs time.Time `json:"last_update_ts" bson:"last_update_ts"` 17 | } 18 | -------------------------------------------------------------------------------- /errors/user.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewUserError(msg string) (err error) { 4 | return NewError(ErrorPrefixUser, msg) 5 | } 6 | 7 | var ( 8 | ErrorUserInvalidType = NewUserError("invalid type") 9 | ErrorUserInvalidToken = NewUserError("invalid token") 10 | ErrorUserNotExists = NewUserError("not exists") 11 | ErrorUserNotExistsInContext = NewUserError("not exists in context") 12 | ErrorUserAlreadyExists = NewUserError("already exists") 13 | ErrorUserMismatch = NewUserError("mismatch") 14 | ErrorUserMissingRequiredFields = NewUserError("missing required fields") 15 | ErrorUserUnauthorized = NewUserError("unauthorized") 16 | ErrorUserInvalidPassword = NewUserError("invalid password (length must be no less than 5)") 17 | ) 18 | -------------------------------------------------------------------------------- /interfaces/spider_admin_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type SpiderAdminService interface { 8 | WithConfigPath 9 | Start() (err error) 10 | // Schedule a new task of the spider 11 | Schedule(id primitive.ObjectID, opts *SpiderRunOptions) (taskIds []primitive.ObjectID, err error) 12 | // Clone the spider 13 | Clone(id primitive.ObjectID, opts *SpiderCloneOptions) (err error) 14 | // Delete the spider 15 | Delete(id primitive.ObjectID) (err error) 16 | // SyncGit syncs all git repositories 17 | SyncGit() (err error) 18 | // SyncGitOne syncs one git repository 19 | SyncGitOne(g Git) (err error) 20 | // Export exports the spider and return zip file path 21 | Export(id primitive.ObjectID) (filePath string, err error) 22 | } 23 | -------------------------------------------------------------------------------- /models/models/task_queue_item.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type TaskQueueItem struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Priority int `json:"p" bson:"p"` 11 | NodeId primitive.ObjectID `json:"nid,omitempty" bson:"nid,omitempty"` 12 | } 13 | 14 | func (t *TaskQueueItem) GetId() (id primitive.ObjectID) { 15 | return t.Id 16 | } 17 | 18 | func (t *TaskQueueItem) SetId(id primitive.ObjectID) { 19 | t.Id = id 20 | } 21 | 22 | type TaskQueueItemList []TaskQueueItem 23 | 24 | func (l *TaskQueueItemList) GetModels() (res []interfaces.Model) { 25 | for i := range *l { 26 | d := (*l)[i] 27 | res = append(res, &d) 28 | } 29 | return res 30 | } 31 | -------------------------------------------------------------------------------- /models/models/variable.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Variable struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Key string `json:"key" bson:"key"` 11 | Value string `json:"value" bson:"value"` 12 | Remark string `json:"remark" bson:"remark"` 13 | } 14 | 15 | func (v *Variable) GetId() (id primitive.ObjectID) { 16 | return v.Id 17 | } 18 | 19 | func (v *Variable) SetId(id primitive.ObjectID) { 20 | v.Id = id 21 | } 22 | 23 | type VariableList []Variable 24 | 25 | func (l *VariableList) GetModels() (res []interfaces.Model) { 26 | for i := range *l { 27 | d := (*l)[i] 28 | res = append(res, &d) 29 | } 30 | return res 31 | } 32 | -------------------------------------------------------------------------------- /config/default_config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | var DefaultConfigYaml = ` 4 | info: 5 | version: v0.6.3 6 | edition: global.edition.community 7 | mongo: 8 | host: localhost 9 | port: 27017 10 | db: crawlab_test 11 | username: "" 12 | password: "" 13 | authSource: "admin" 14 | server: 15 | host: 0.0.0.0 16 | port: 8000 17 | spider: 18 | fs: "/spiders" 19 | workspace: "/workspace" 20 | repo: "/repo" 21 | task: 22 | workers: 16 23 | cancelWaitSeconds: 30 24 | grpc: 25 | address: localhost:9666 26 | server: 27 | address: 0.0.0.0:9666 28 | authKey: Crawlab2021! 29 | fs: 30 | filer: 31 | proxy: http://localhost:8888 32 | url: http://localhost:8000/filer 33 | authKey: Crawlab2021! 34 | node: 35 | master: Y 36 | api: 37 | endpoint: http://localhost:8000 38 | log: 39 | path: /var/log/crawlab 40 | ` 41 | -------------------------------------------------------------------------------- /constants/task.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | TaskStatusPending = "pending" 5 | TaskStatusRunning = "running" 6 | TaskStatusFinished = "finished" 7 | TaskStatusError = "error" 8 | TaskStatusCancelled = "cancelled" 9 | TaskStatusAbnormal = "abnormal" 10 | ) 11 | 12 | const ( 13 | RunTypeAllNodes = "all-nodes" 14 | RunTypeRandom = "random" 15 | RunTypeSelectedNodes = "selected-nodes" 16 | ) 17 | 18 | const ( 19 | TaskTypeSpider = "spider" 20 | TaskTypeSystem = "system" 21 | ) 22 | 23 | type TaskSignal int 24 | 25 | const ( 26 | TaskSignalFinish TaskSignal = iota 27 | TaskSignalCancel 28 | TaskSignalError 29 | TaskSignalLost 30 | ) 31 | 32 | const ( 33 | TaskListQueuePrefixPublic = "tasks:public" 34 | TaskListQueuePrefixNodes = "tasks:nodes" 35 | ) 36 | 37 | const ( 38 | TaskKey = "_tid" 39 | ) 40 | -------------------------------------------------------------------------------- /models/models/role_permission.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type RolePermission struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | RoleId primitive.ObjectID `json:"role_id" bson:"role_id"` 11 | PermissionId primitive.ObjectID `json:"permission_id" bson:"permission_id"` 12 | } 13 | 14 | func (ur *RolePermission) GetId() (id primitive.ObjectID) { 15 | return ur.Id 16 | } 17 | 18 | func (ur *RolePermission) SetId(id primitive.ObjectID) { 19 | ur.Id = id 20 | } 21 | 22 | type RolePermissionList []RolePermission 23 | 24 | func (l *RolePermissionList) GetModels() (res []interfaces.Model) { 25 | for i := range *l { 26 | d := (*l)[i] 27 | res = append(res, &d) 28 | } 29 | return res 30 | } 31 | -------------------------------------------------------------------------------- /controllers/utils_pagination.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/entity" 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | func GetDefaultPagination() (p *entity.Pagination) { 10 | return &entity.Pagination{ 11 | Page: constants.PaginationDefaultPage, 12 | Size: constants.PaginationDefaultSize, 13 | } 14 | } 15 | 16 | func GetPagination(c *gin.Context) (p *entity.Pagination, err error) { 17 | var _p entity.Pagination 18 | if err := c.ShouldBindQuery(&_p); err != nil { 19 | return GetDefaultPagination(), err 20 | } 21 | return &_p, nil 22 | } 23 | 24 | func MustGetPagination(c *gin.Context) (p *entity.Pagination) { 25 | p, err := GetPagination(c) 26 | if err != nil || p == nil { 27 | return GetDefaultPagination() 28 | } 29 | return p 30 | } 31 | -------------------------------------------------------------------------------- /errors/model.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import "errors" 4 | 5 | func NewModelError(msg string) (err error) { 6 | return NewError(ErrorPrefixModel, msg) 7 | } 8 | 9 | var ErrorModelInvalidType = NewModelError("invalid type") 10 | var ErrorModelInvalidModelId = NewModelError("invalid model id") 11 | var ErrorModelNotImplemented = NewModelError("not implemented") 12 | var ErrorModelNotFound = NewModelError("not found") 13 | var ErrorModelAlreadyExists = NewModelError("already exists") 14 | var ErrorModelNotExists = NewModelError("not exists") 15 | var ErrorModelMissingRequiredData = NewModelError("missing required data") 16 | var ErrorModelMissingId = errors.New("missing _id") 17 | var ErrorModelNotAllowed = NewModelError("not allowed") 18 | var ErrorModelDeleteListError = NewModelError("delete list error") 19 | var ErrorModelNilPointer = NewModelError("nil pointer") 20 | -------------------------------------------------------------------------------- /utils/chan.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | var TaskExecChanMap = NewChanMap() 8 | 9 | type ChanMap struct { 10 | m sync.Map 11 | } 12 | 13 | func NewChanMap() *ChanMap { 14 | return &ChanMap{m: sync.Map{}} 15 | } 16 | 17 | func (cm *ChanMap) Chan(key string) chan string { 18 | if ch, ok := cm.m.Load(key); ok { 19 | return ch.(interface{}).(chan string) 20 | } 21 | ch := make(chan string, 10) 22 | cm.m.Store(key, ch) 23 | return ch 24 | } 25 | 26 | func (cm *ChanMap) ChanBlocked(key string) chan string { 27 | if ch, ok := cm.m.Load(key); ok { 28 | return ch.(interface{}).(chan string) 29 | } 30 | ch := make(chan string) 31 | cm.m.Store(key, ch) 32 | return ch 33 | } 34 | 35 | func (cm *ChanMap) HasChanKey(key string) bool { 36 | if _, ok := cm.m.Load(key); ok { 37 | return true 38 | } 39 | return false 40 | } 41 | -------------------------------------------------------------------------------- /constants/ds.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const ( 4 | DataSourceTypeMongo = "mongo" 5 | DataSourceTypeMysql = "mysql" 6 | DataSourceTypePostgresql = "postgresql" 7 | DataSourceTypeMssql = "mssql" 8 | DataSourceTypeSqlite = "sqlite" 9 | DataSourceTypeCockroachdb = "cockroachdb" 10 | DataSourceTypeElasticSearch = "elasticsearch" 11 | DataSourceTypeKafka = "kafka" 12 | ) 13 | 14 | const ( 15 | DefaultHost = "localhost" 16 | ) 17 | 18 | const ( 19 | DefaultMongoPort = "27017" 20 | DefaultMysqlPort = "3306" 21 | DefaultPostgresqlPort = "5432" 22 | DefaultMssqlPort = "1433" 23 | DefaultCockroachdbPort = "26257" 24 | DefaultElasticsearchPort = "9200" 25 | DefaultKafkaPort = "9092" 26 | ) 27 | 28 | const ( 29 | DataSourceStatusOnline = "on" 30 | DataSourceStatusOffline = "off" 31 | ) 32 | -------------------------------------------------------------------------------- /middlewares/filer_auth.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/utils" 6 | "github.com/gin-gonic/gin" 7 | "github.com/spf13/viper" 8 | ) 9 | 10 | func FilerAuthorizationMiddleware() gin.HandlerFunc { 11 | return func(c *gin.Context) { 12 | // auth key 13 | authKey := c.GetHeader("Authorization") 14 | 15 | // server auth key 16 | svrAuthKey := viper.GetString("fs.filer.authKey") 17 | 18 | // skip to next if no server auth key is provided 19 | if svrAuthKey == "" { 20 | c.Next() 21 | return 22 | } 23 | 24 | // validate 25 | if authKey != svrAuthKey { 26 | // validation failed, return error response 27 | utils.HandleErrorUnauthorized(c, errors.ErrorHttpUnauthorized) 28 | return 29 | } 30 | 31 | // validation success 32 | c.Next() 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /schedule/options.go: -------------------------------------------------------------------------------- 1 | package schedule 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Option func(svc interfaces.ScheduleService) 9 | 10 | func WithConfigPath(path string) Option { 11 | return func(svc interfaces.ScheduleService) { 12 | svc.SetConfigPath(path) 13 | } 14 | } 15 | 16 | func WithLocation(loc *time.Location) Option { 17 | return func(svc interfaces.ScheduleService) { 18 | svc.SetLocation(loc) 19 | } 20 | } 21 | 22 | func WithDelayIfStillRunning() Option { 23 | return func(svc interfaces.ScheduleService) { 24 | svc.SetDelay(true) 25 | } 26 | } 27 | 28 | func WithSkipIfStillRunning() Option { 29 | return func(svc interfaces.ScheduleService) { 30 | svc.SetSkip(true) 31 | } 32 | } 33 | 34 | func WithUpdateInterval(interval time.Duration) Option { 35 | return func(svc interfaces.ScheduleService) { 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /errors/task.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewTaskError(msg string) (err error) { 4 | return NewError(ErrorPrefixTask, msg) 5 | } 6 | 7 | var ( 8 | ErrorTaskNotExists = NewTaskError("not exists") 9 | ErrorTaskAlreadyExists = NewTaskError("already exists") 10 | ErrorTaskInvalidType = NewTaskError("invalid type") 11 | ErrorTaskProcessStillExists = NewTaskError("process still exists") 12 | ErrorTaskUnableToCancel = NewTaskError("unable to cancel") 13 | ErrorTaskForbidden = NewTaskError("forbidden") 14 | ErrorTaskNoAvailableRunners = NewTaskError("no available runner") 15 | ErrorTaskEmptySpiderId = NewTaskError("empty spider id") 16 | ErrorTaskNoNodeId = NewTaskError("no node id") 17 | ErrorTaskNodeNotFound = NewTaskError("node not found") 18 | ErrorTaskMissingRequiredOption = NewSpiderError("missing required option") 19 | ) 20 | -------------------------------------------------------------------------------- /models/models/utils_tag.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/apex/log" 5 | "github.com/crawlab-team/crawlab-core/errors" 6 | "github.com/crawlab-team/crawlab-core/interfaces" 7 | "github.com/crawlab-team/go-trace" 8 | ) 9 | 10 | func convertInterfacesToTags(tags []interfaces.Tag) (res []Tag) { 11 | if tags == nil { 12 | return nil 13 | } 14 | for _, t := range tags { 15 | tag, ok := t.(*Tag) 16 | if !ok { 17 | log.Warnf("%v: cannot convert tag", trace.TraceError(errors.ErrorModelInvalidType)) 18 | return nil 19 | } 20 | if tag == nil { 21 | log.Warnf("%v: cannot convert tag", trace.TraceError(errors.ErrorModelInvalidType)) 22 | return nil 23 | } 24 | res = append(res, *tag) 25 | } 26 | return res 27 | } 28 | 29 | func convertTagsToInterfaces(tags []Tag) (res []interfaces.Tag) { 30 | for _, t := range tags { 31 | res = append(res, &t) 32 | } 33 | return res 34 | } 35 | -------------------------------------------------------------------------------- /entity/grpc_delegate_message.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "github.com/crawlab-team/go-trace" 7 | ) 8 | 9 | type GrpcDelegateMessage struct { 10 | ModelId interfaces.ModelId `json:"id"` 11 | Method interfaces.ModelDelegateMethod `json:"m"` 12 | Data []byte `json:"d"` 13 | } 14 | 15 | func (msg *GrpcDelegateMessage) GetModelId() interfaces.ModelId { 16 | return msg.ModelId 17 | } 18 | 19 | func (msg *GrpcDelegateMessage) GetMethod() interfaces.ModelDelegateMethod { 20 | return msg.Method 21 | } 22 | 23 | func (msg *GrpcDelegateMessage) GetData() []byte { 24 | return msg.Data 25 | } 26 | 27 | func (msg *GrpcDelegateMessage) ToBytes() (data []byte) { 28 | data, err := json.Marshal(*msg) 29 | if err != nil { 30 | _ = trace.TraceError(err) 31 | return data 32 | } 33 | return data 34 | } 35 | -------------------------------------------------------------------------------- /schedule/logger.go: -------------------------------------------------------------------------------- 1 | package schedule 2 | 3 | import ( 4 | "fmt" 5 | "github.com/apex/log" 6 | "github.com/crawlab-team/go-trace" 7 | "github.com/robfig/cron/v3" 8 | "strings" 9 | ) 10 | 11 | type Logger struct { 12 | } 13 | 14 | func (l *Logger) Info(msg string, keysAndValues ...interface{}) { 15 | p := l.getPlaceholder(len(keysAndValues)) 16 | log.Infof(fmt.Sprintf("cron: %s %s", msg, p), keysAndValues...) 17 | } 18 | 19 | func (l *Logger) Error(err error, msg string, keysAndValues ...interface{}) { 20 | p := l.getPlaceholder(len(keysAndValues)) 21 | log.Errorf(fmt.Sprintf("cron: %s %s", msg, p), keysAndValues...) 22 | trace.PrintError(err) 23 | } 24 | 25 | func (l *Logger) getPlaceholder(n int) (s string) { 26 | var arr []string 27 | for i := 0; i < n; i++ { 28 | arr = append(arr, "%v") 29 | } 30 | return strings.Join(arr, " ") 31 | } 32 | 33 | func NewLogger() cron.Logger { 34 | return &Logger{} 35 | } 36 | -------------------------------------------------------------------------------- /errors/base.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | ) 7 | 8 | const ( 9 | ErrorPrefixController = "controller" 10 | ErrorPrefixModel = "model" 11 | ErrorPrefixFilter = "filter" 12 | ErrorPrefixHttp = "http" 13 | ErrorPrefixGrpc = "grpc" 14 | ErrorPrefixNode = "node" 15 | ErrorPrefixInject = "inject" 16 | ErrorPrefixSpider = "spider" 17 | ErrorPrefixFs = "fs" 18 | ErrorPrefixTask = "task" 19 | ErrorPrefixSchedule = "schedule" 20 | ErrorPrefixUser = "user" 21 | ErrorPrefixStats = "stats" 22 | ErrorPrefixEvent = "event" 23 | ErrorPrefixProcess = "process" 24 | ErrorPrefixGit = "git" 25 | ErrorPrefixResult = "result" 26 | ErrorPrefixDataSource = "data_source" 27 | ) 28 | 29 | type ErrorPrefix string 30 | 31 | func NewError(prefix ErrorPrefix, msg string) (err error) { 32 | return errors.New(fmt.Sprintf("%s error: %s", prefix, msg)) 33 | } 34 | -------------------------------------------------------------------------------- /interfaces/grpc_client.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "context" 5 | grpc "github.com/crawlab-team/crawlab-grpc" 6 | "time" 7 | ) 8 | 9 | type GrpcClient interface { 10 | GrpcBase 11 | WithConfigPath 12 | GetModelDelegateClient() grpc.ModelDelegateClient 13 | GetModelBaseServiceClient() grpc.ModelBaseServiceClient 14 | GetNodeClient() grpc.NodeServiceClient 15 | GetTaskClient() grpc.TaskServiceClient 16 | GetMessageClient() grpc.MessageServiceClient 17 | SetAddress(Address) 18 | SetTimeout(time.Duration) 19 | SetSubscribeType(string) 20 | SetHandleMessage(bool) 21 | Context() (context.Context, context.CancelFunc) 22 | NewRequest(interface{}) *grpc.Request 23 | GetMessageChannel() chan *grpc.StreamMessage 24 | Restart() error 25 | NewModelBaseServiceRequest(ModelId, GrpcBaseServiceParams) (*grpc.Request, error) 26 | IsStarted() bool 27 | IsClosed() bool 28 | Err() error 29 | GetStream() grpc.NodeService_SubscribeClient 30 | } 31 | -------------------------------------------------------------------------------- /utils/http.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/entity" 6 | "github.com/crawlab-team/go-trace" 7 | "github.com/gin-gonic/gin" 8 | "net/http" 9 | ) 10 | 11 | func handleError(statusCode int, c *gin.Context, err error, print bool) { 12 | if print { 13 | trace.PrintError(err) 14 | } 15 | c.AbortWithStatusJSON(statusCode, entity.Response{ 16 | Status: constants.HttpResponseStatusOk, 17 | Message: constants.HttpResponseMessageError, 18 | Error: err.Error(), 19 | }) 20 | } 21 | 22 | func HandleError(statusCode int, c *gin.Context, err error) { 23 | handleError(statusCode, c, err, true) 24 | } 25 | 26 | func HandleErrorUnauthorized(c *gin.Context, err error) { 27 | HandleError(http.StatusUnauthorized, c, err) 28 | } 29 | 30 | func HandleErrorInternalServerError(c *gin.Context, err error) { 31 | HandleError(http.StatusInternalServerError, c, err) 32 | } 33 | -------------------------------------------------------------------------------- /controllers/token_v2.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/models/models" 5 | "github.com/crawlab-team/crawlab-core/models/service" 6 | "github.com/crawlab-team/crawlab-core/user" 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | func PostToken(c *gin.Context) { 11 | var t models.TokenV2 12 | if err := c.ShouldBindJSON(&t); err != nil { 13 | HandleErrorBadRequest(c, err) 14 | return 15 | } 16 | svc, err := user.GetUserServiceV2() 17 | if err != nil { 18 | HandleErrorInternalServerError(c, err) 19 | return 20 | } 21 | u := GetUserFromContextV2(c) 22 | t.SetCreated(u.Id) 23 | t.SetUpdated(u.Id) 24 | t.Token, err = svc.MakeToken(u) 25 | if err != nil { 26 | HandleErrorInternalServerError(c, err) 27 | return 28 | } 29 | _, err = service.NewModelServiceV2[models.TokenV2]().InsertOne(t) 30 | if err != nil { 31 | HandleErrorInternalServerError(c, err) 32 | return 33 | } 34 | HandleSuccess(c) 35 | } 36 | -------------------------------------------------------------------------------- /models/models/task_stat_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type TaskStatV2 struct { 8 | any `collection:"task_stats"` 9 | BaseModelV2[TaskStatV2] `bson:",inline"` 10 | CreateTs time.Time `json:"create_ts" bson:"create_ts,omitempty"` 11 | StartTs time.Time `json:"start_ts" bson:"start_ts,omitempty"` 12 | EndTs time.Time `json:"end_ts" bson:"end_ts,omitempty"` 13 | WaitDuration int64 `json:"wait_duration" bson:"wait_duration,omitempty"` // in millisecond 14 | RuntimeDuration int64 `json:"runtime_duration" bson:"runtime_duration,omitempty"` // in millisecond 15 | TotalDuration int64 `json:"total_duration" bson:"total_duration,omitempty"` // in millisecond 16 | ResultCount int64 `json:"result_count" bson:"result_count"` 17 | ErrorLogCount int64 `json:"error_log_count" bson:"error_log_count"` 18 | } 19 | -------------------------------------------------------------------------------- /errors/grpc.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewGrpcError(msg string) (err error) { 4 | return NewError(ErrorPrefixGrpc, msg) 5 | } 6 | 7 | var ( 8 | ErrorGrpcClientFailedToStart = NewGrpcError("client failed to start") 9 | ErrorGrpcServerFailedToListen = NewGrpcError("server failed to listen") 10 | ErrorGrpcServerFailedToServe = NewGrpcError("server failed to serve") 11 | ErrorGrpcClientNotExists = NewGrpcError("client not exists") 12 | ErrorGrpcClientAlreadyExists = NewGrpcError("client already exists") 13 | ErrorGrpcInvalidType = NewGrpcError("invalid type") 14 | ErrorGrpcNotAllowed = NewGrpcError("not allowed") 15 | ErrorGrpcSubscribeNotExists = NewGrpcError("subscribe not exists") 16 | ErrorGrpcStreamNotFound = NewGrpcError("stream not found") 17 | ErrorGrpcInvalidCode = NewGrpcError("invalid code") 18 | ErrorGrpcUnauthorized = NewGrpcError("unauthorized") 19 | ErrorGrpcInvalidNodeKey = NewGrpcError("invalid node key") 20 | ) 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: "Test" 2 | 3 | on: 4 | push: 5 | branches: [ main, develop ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ main, develop ] 9 | 10 | jobs: 11 | test: 12 | name: Test 13 | runs-on: ubuntu-20.04 14 | services: 15 | mongo: 16 | image: mongo:5 17 | ports: 18 | - 27017:27017 19 | env: 20 | CRAWLAB_SERVER_PORT: 9999 21 | steps: 22 | - name: Checkout repository 23 | uses: actions/checkout@v2 24 | - uses: actions/setup-go@v3 25 | with: 26 | go-version: '^1.22' 27 | - name: Run unit tests 28 | run: | 29 | mods=(\ 30 | "github.com/crawlab-team/crawlab-core/controllers" \ 31 | "github.com/crawlab-team/crawlab-core/models/client" \ 32 | "github.com/crawlab-team/crawlab-core/models/service" \ 33 | ) 34 | for pkg in ${mods[@]}; do 35 | go test ${pkg} 36 | done 37 | -------------------------------------------------------------------------------- /controllers/login_v2.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/errors" 6 | "github.com/crawlab-team/crawlab-core/user" 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | func PostLogin(c *gin.Context) { 11 | var payload struct { 12 | Username string `json:"username"` 13 | Password string `json:"password"` 14 | } 15 | if err := c.ShouldBindJSON(&payload); err != nil { 16 | HandleErrorBadRequest(c, err) 17 | return 18 | } 19 | userSvc, err := user.GetUserServiceV2() 20 | if err != nil { 21 | HandleErrorInternalServerError(c, err) 22 | return 23 | } 24 | token, loggedInUser, err := userSvc.Login(payload.Username, payload.Password) 25 | if err != nil { 26 | HandleErrorUnauthorized(c, errors.ErrorUserUnauthorized) 27 | return 28 | } 29 | c.Set(constants.UserContextKey, loggedInUser) 30 | HandleSuccessWithData(c, token) 31 | } 32 | 33 | func PostLogout(c *gin.Context) { 34 | c.Set(constants.UserContextKey, nil) 35 | HandleSuccess(c) 36 | } 37 | -------------------------------------------------------------------------------- /models/models/data_collection.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/entity" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type DataCollection struct { 10 | Id primitive.ObjectID `json:"_id" bson:"_id"` 11 | Name string `json:"name" bson:"name"` 12 | Fields []entity.DataField `json:"fields" bson:"fields"` 13 | Dedup struct { 14 | Enabled bool `json:"enabled" bson:"enabled"` 15 | Keys []string `json:"keys" bson:"keys"` 16 | Type string `json:"type" bson:"type"` 17 | } `json:"dedup" bson:"dedup"` 18 | } 19 | 20 | func (dc *DataCollection) GetId() (id primitive.ObjectID) { 21 | return dc.Id 22 | } 23 | 24 | func (dc *DataCollection) SetId(id primitive.ObjectID) { 25 | dc.Id = id 26 | } 27 | 28 | type DataCollectionList []DataCollection 29 | 30 | func (l *DataCollectionList) GetModels() (res []interfaces.Model) { 31 | for i := range *l { 32 | d := (*l)[i] 33 | res = append(res, &d) 34 | } 35 | return res 36 | } 37 | -------------------------------------------------------------------------------- /models/delegate/model_node.go: -------------------------------------------------------------------------------- 1 | package delegate 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "time" 7 | ) 8 | 9 | type ModelNodeDelegate struct { 10 | n interfaces.Node 11 | interfaces.ModelDelegate 12 | } 13 | 14 | func (d *ModelNodeDelegate) UpdateStatus(active bool, activeTs *time.Time, status string) (err error) { 15 | d.n.SetActive(active) 16 | if activeTs != nil { 17 | d.n.SetActiveTs(*activeTs) 18 | } 19 | d.n.SetStatus(status) 20 | return d.Save() 21 | } 22 | 23 | func (d *ModelNodeDelegate) UpdateStatusOnline() (err error) { 24 | now := time.Now() 25 | return d.UpdateStatus(true, &now, constants.NodeStatusOnline) 26 | } 27 | 28 | func (d *ModelNodeDelegate) UpdateStatusOffline() (err error) { 29 | return d.UpdateStatus(false, nil, constants.NodeStatusOffline) 30 | } 31 | 32 | func NewModelNodeDelegate(n interfaces.Node) interfaces.ModelNodeDelegate { 33 | return &ModelNodeDelegate{ 34 | n: n, 35 | ModelDelegate: NewModelDelegate(n), 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /i18n/service.go: -------------------------------------------------------------------------------- 1 | package i18n 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | var translations []interfaces.Translation 6 | 7 | var _svc interfaces.I18nService 8 | 9 | type Service struct { 10 | } 11 | 12 | func (svc *Service) AddTranslations(t []interfaces.Translation) { 13 | translations = append(translations, t...) 14 | } 15 | 16 | func (svc *Service) GetTranslations() (t []interfaces.Translation) { 17 | return translations 18 | } 19 | 20 | func GetI18nService(cfgPath string) (svc2 interfaces.I18nService, err error) { 21 | if _svc != nil { 22 | return _svc, nil 23 | } 24 | 25 | _svc, err = NewI18nService() 26 | if err != nil { 27 | return nil, err 28 | } 29 | 30 | return _svc, nil 31 | } 32 | 33 | func ProvideGetI18nService(cfgPath string) func() (svc interfaces.I18nService, err error) { 34 | return func() (svc interfaces.I18nService, err error) { 35 | return GetI18nService(cfgPath) 36 | } 37 | } 38 | 39 | func NewI18nService() (svc2 interfaces.I18nService, err error) { 40 | svc := &Service{} 41 | 42 | return svc, nil 43 | } 44 | -------------------------------------------------------------------------------- /task/handler/options.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Option func(svc interfaces.TaskHandlerService) 9 | 10 | func WithConfigPath(path string) Option { 11 | return func(svc interfaces.TaskHandlerService) { 12 | svc.SetConfigPath(path) 13 | } 14 | } 15 | 16 | func WithExitWatchDuration(duration time.Duration) Option { 17 | return func(svc interfaces.TaskHandlerService) { 18 | svc.SetExitWatchDuration(duration) 19 | } 20 | } 21 | 22 | func WithReportInterval(interval time.Duration) Option { 23 | return func(svc interfaces.TaskHandlerService) { 24 | svc.SetReportInterval(interval) 25 | } 26 | } 27 | 28 | func WithCancelTimeout(timeout time.Duration) Option { 29 | return func(svc interfaces.TaskHandlerService) { 30 | svc.SetCancelTimeout(timeout) 31 | } 32 | } 33 | 34 | type RunnerOption func(r interfaces.TaskRunner) 35 | 36 | func WithSubscribeTimeout(timeout time.Duration) RunnerOption { 37 | return func(r interfaces.TaskRunner) { 38 | r.SetSubscribeTimeout(timeout) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /interfaces/model_service_v2.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type ModelServiceV2[T any] interface { 10 | GetById(id primitive.ObjectID) (model *T, err error) 11 | Get(query bson.M, options *mongo.FindOptions) (model *T, err error) 12 | GetList(query bson.M, options *mongo.FindOptions) (models []T, err error) 13 | DeleteById(id primitive.ObjectID) (err error) 14 | Delete(query bson.M) (err error) 15 | DeleteList(query bson.M) (err error) 16 | UpdateById(id primitive.ObjectID, update bson.M) (err error) 17 | UpdateOne(query bson.M, update bson.M) (err error) 18 | UpdateMany(query bson.M, update bson.M) (err error) 19 | ReplaceById(id primitive.ObjectID, model T) (err error) 20 | Replace(query bson.M, model T) (err error) 21 | InsertOne(model T) (id primitive.ObjectID, err error) 22 | InsertMany(models []T) (ids []primitive.ObjectID, err error) 23 | Count(query bson.M) (total int, err error) 24 | GetCol() (col *mongo.Col) 25 | } 26 | -------------------------------------------------------------------------------- /controllers/stats_v2.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/stats" 5 | "github.com/gin-gonic/gin" 6 | "go.mongodb.org/mongo-driver/bson" 7 | "time" 8 | ) 9 | 10 | var statsDefaultQuery = bson.M{ 11 | "create_ts": bson.M{ 12 | "$gte": time.Now().Add(-30 * 24 * time.Hour), 13 | }, 14 | } 15 | 16 | func GetStatsOverview(c *gin.Context) { 17 | data, err := stats.GetStatsService().GetOverviewStats(statsDefaultQuery) 18 | if err != nil { 19 | HandleErrorInternalServerError(c, err) 20 | return 21 | } 22 | HandleSuccessWithData(c, data) 23 | } 24 | 25 | func GetStatsDaily(c *gin.Context) { 26 | data, err := stats.GetStatsService().GetDailyStats(statsDefaultQuery) 27 | if err != nil { 28 | HandleErrorInternalServerError(c, err) 29 | return 30 | } 31 | HandleSuccessWithData(c, data) 32 | } 33 | 34 | func GetStatsTasks(c *gin.Context) { 35 | data, err := stats.GetStatsService().GetTaskStats(statsDefaultQuery) 36 | if err != nil { 37 | HandleErrorInternalServerError(c, err) 38 | return 39 | } 40 | HandleSuccessWithData(c, data) 41 | } 42 | -------------------------------------------------------------------------------- /models/models/tag.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Tag struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Name string `json:"name" bson:"name"` 11 | Color string `json:"color" bson:"color"` 12 | Description string `json:"description" bson:"description"` 13 | Col string `json:"col" bson:"col"` 14 | } 15 | 16 | func (t *Tag) GetId() (id primitive.ObjectID) { 17 | return t.Id 18 | } 19 | 20 | func (t *Tag) SetId(id primitive.ObjectID) { 21 | t.Id = id 22 | } 23 | 24 | func (t *Tag) GetName() (res string) { 25 | return t.Name 26 | } 27 | 28 | func (t *Tag) GetColor() (res string) { 29 | return t.Color 30 | } 31 | 32 | func (t *Tag) SetCol(col string) { 33 | t.Col = col 34 | } 35 | 36 | type TagList []Tag 37 | 38 | func (l *TagList) GetModels() (res []interfaces.Model) { 39 | for i := range *l { 40 | d := (*l)[i] 41 | res = append(res, &d) 42 | } 43 | return res 44 | } 45 | -------------------------------------------------------------------------------- /models/client/options.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import "github.com/crawlab-team/crawlab-core/interfaces" 4 | 5 | type ModelDelegateOption func(delegate interfaces.GrpcClientModelDelegate) 6 | 7 | func WithDelegateConfigPath(path string) ModelDelegateOption { 8 | return func(d interfaces.GrpcClientModelDelegate) { 9 | d.SetConfigPath(path) 10 | } 11 | } 12 | 13 | type ModelServiceDelegateOption func(delegate interfaces.GrpcClientModelService) 14 | 15 | func WithServiceConfigPath(path string) ModelServiceDelegateOption { 16 | return func(d interfaces.GrpcClientModelService) { 17 | d.SetConfigPath(path) 18 | } 19 | } 20 | 21 | type ModelBaseServiceDelegateOption func(delegate interfaces.GrpcClientModelBaseService) 22 | 23 | func WithBaseServiceModelId(id interfaces.ModelId) ModelBaseServiceDelegateOption { 24 | return func(d interfaces.GrpcClientModelBaseService) { 25 | d.SetModelId(id) 26 | } 27 | } 28 | 29 | func WithBaseServiceConfigPath(path string) ModelBaseServiceDelegateOption { 30 | return func(d interfaces.GrpcClientModelBaseService) { 31 | d.SetConfigPath(path) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /models/client/model_node_delegate.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "time" 7 | ) 8 | 9 | type ModelNodeDelegate struct { 10 | n interfaces.Node 11 | interfaces.GrpcClientModelDelegate 12 | } 13 | 14 | func (d *ModelNodeDelegate) UpdateStatus(active bool, activeTs *time.Time, status string) (err error) { 15 | d.n.SetActive(active) 16 | if activeTs != nil { 17 | d.n.SetActiveTs(*activeTs) 18 | } 19 | d.n.SetStatus(status) 20 | return d.Save() 21 | } 22 | 23 | func (d *ModelNodeDelegate) UpdateStatusOnline() (err error) { 24 | now := time.Now() 25 | return d.UpdateStatus(true, &now, constants.NodeStatusOnline) 26 | } 27 | 28 | func (d *ModelNodeDelegate) UpdateStatusOffline() (err error) { 29 | return d.UpdateStatus(false, nil, constants.NodeStatusOffline) 30 | } 31 | 32 | func NewModelNodeDelegate(n interfaces.Node) interfaces.ModelNodeDelegate { 33 | return &ModelNodeDelegate{ 34 | n: n, 35 | GrpcClientModelDelegate: NewModelDelegate(n), 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /models/models/environment.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Environment struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Key string `json:"key" bson:"key"` 11 | Value string `json:"value" bson:"value"` 12 | } 13 | 14 | func (e *Environment) GetId() (id primitive.ObjectID) { 15 | return e.Id 16 | } 17 | 18 | func (e *Environment) SetId(id primitive.ObjectID) { 19 | e.Id = id 20 | } 21 | 22 | func (e *Environment) GetKey() (key string) { 23 | return e.Key 24 | } 25 | 26 | func (e *Environment) SetKey(key string) { 27 | e.Key = key 28 | } 29 | 30 | func (e *Environment) GetValue() (value string) { 31 | return e.Value 32 | } 33 | 34 | func (e *Environment) SetValue(value string) { 35 | e.Value = value 36 | } 37 | 38 | type EnvironmentList []Environment 39 | 40 | func (l *EnvironmentList) GetModels() (res []interfaces.Model) { 41 | for i := range *l { 42 | d := (*l)[i] 43 | res = append(res, &d) 44 | } 45 | return res 46 | } 47 | -------------------------------------------------------------------------------- /utils/array.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "reflect" 7 | "time" 8 | ) 9 | 10 | func StringArrayContains(arr []string, str string) bool { 11 | for _, s := range arr { 12 | if s == str { 13 | return true 14 | } 15 | } 16 | return false 17 | } 18 | 19 | func GetArrayItems(array interface{}) (res []interface{}, err error) { 20 | switch reflect.TypeOf(array).Kind() { 21 | case reflect.Slice, reflect.Array: 22 | s := reflect.ValueOf(array) 23 | for i := 0; i < s.Len(); i++ { 24 | obj, ok := s.Index(i).Interface().(interface{}) 25 | if !ok { 26 | return nil, errors.New("invalid type") 27 | } 28 | res = append(res, obj) 29 | } 30 | default: 31 | return nil, errors.New("invalid type") 32 | } 33 | return res, nil 34 | } 35 | 36 | func ShuffleArray(slice []interface{}) (err error) { 37 | r := rand.New(rand.NewSource(time.Now().Unix())) 38 | for len(slice) > 0 { 39 | n := len(slice) 40 | randIndex := r.Intn(n) 41 | slice[n-1], slice[randIndex] = slice[randIndex], slice[n-1] 42 | slice = slice[:n-1] 43 | } 44 | 45 | return nil 46 | } 47 | -------------------------------------------------------------------------------- /interfaces/fs_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | cfs "github.com/crawlab-team/crawlab-fs" 5 | vcs "github.com/crawlab-team/crawlab-vcs" 6 | ) 7 | 8 | type FsService interface { 9 | WithConfigPath 10 | List(path string, opts ...ServiceCrudOption) (files []FsFileInfo, err error) 11 | GetFile(path string, opts ...ServiceCrudOption) (data []byte, err error) 12 | GetFileInfo(path string, opts ...ServiceCrudOption) (file FsFileInfo, err error) 13 | Save(path string, data []byte, opts ...ServiceCrudOption) (err error) 14 | Rename(path, newPath string, opts ...ServiceCrudOption) (err error) 15 | Delete(path string, opts ...ServiceCrudOption) (err error) 16 | Copy(path, newPath string, opts ...ServiceCrudOption) (err error) 17 | Commit(msg string) (err error) 18 | SyncToFs(opts ...ServiceCrudOption) (err error) 19 | SyncToWorkspace() (err error) 20 | GetFsPath() (path string) 21 | SetFsPath(path string) 22 | GetWorkspacePath() (path string) 23 | SetWorkspacePath(path string) 24 | GetRepoPath() (path string) 25 | SetRepoPath(path string) 26 | GetFs() (fs cfs.Manager) 27 | GetGitClient() (c *vcs.GitClient) 28 | } 29 | -------------------------------------------------------------------------------- /models/models/node_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type NodeV2 struct { 8 | any `collection:"nodes"` 9 | BaseModelV2[NodeV2] `bson:",inline"` 10 | Key string `json:"key" bson:"key"` 11 | Name string `json:"name" bson:"name"` 12 | Ip string `json:"ip" bson:"ip"` 13 | Port string `json:"port" bson:"port"` 14 | Mac string `json:"mac" bson:"mac"` 15 | Hostname string `json:"hostname" bson:"hostname"` 16 | Description string `json:"description" bson:"description"` 17 | IsMaster bool `json:"is_master" bson:"is_master"` 18 | Status string `json:"status" bson:"status"` 19 | Enabled bool `json:"enabled" bson:"enabled"` 20 | Active bool `json:"active" bson:"active"` 21 | ActiveAt time.Time `json:"active_at" bson:"active_ts"` 22 | AvailableRunners int `json:"available_runners" bson:"available_runners"` 23 | MaxRunners int `json:"max_runners" bson:"max_runners"` 24 | } 25 | -------------------------------------------------------------------------------- /grpc/server/options.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | ) 6 | 7 | type Option func(svr interfaces.GrpcServer) 8 | 9 | func WithConfigPath(path string) Option { 10 | return func(svr interfaces.GrpcServer) { 11 | svr.SetConfigPath(path) 12 | } 13 | } 14 | 15 | func WithAddress(address interfaces.Address) Option { 16 | return func(svr interfaces.GrpcServer) { 17 | svr.SetAddress(address) 18 | } 19 | } 20 | 21 | type NodeServerOption func(svr *NodeServer) 22 | 23 | func WithServerNodeServerService(server interfaces.GrpcServer) NodeServerOption { 24 | return func(svr *NodeServer) { 25 | svr.server = server 26 | } 27 | } 28 | 29 | type TaskServerOption func(svr *TaskServer) 30 | 31 | func WithServerTaskServerService(server interfaces.GrpcServer) TaskServerOption { 32 | return func(svr *TaskServer) { 33 | svr.server = server 34 | } 35 | } 36 | 37 | type MessageServerOption func(svr *MessageServer) 38 | 39 | func WithServerMessageServerService(server interfaces.GrpcServer) MessageServerOption { 40 | return func(svr *MessageServer) { 41 | svr.server = server 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /result/service_registry.go: -------------------------------------------------------------------------------- 1 | package result 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "sync" 6 | ) 7 | 8 | type ServiceRegistry struct { 9 | // internals 10 | services sync.Map 11 | } 12 | 13 | func (r *ServiceRegistry) Register(key string, fn interfaces.ResultServiceRegistryFn) { 14 | r.services.Store(key, fn) 15 | } 16 | 17 | func (r *ServiceRegistry) Unregister(key string) { 18 | r.services.Delete(key) 19 | } 20 | 21 | func (r *ServiceRegistry) Get(key string) (fn interfaces.ResultServiceRegistryFn) { 22 | res, ok := r.services.Load(key) 23 | if ok { 24 | fn, ok = res.(interfaces.ResultServiceRegistryFn) 25 | if !ok { 26 | return nil 27 | } 28 | return fn 29 | } 30 | return nil 31 | } 32 | 33 | func NewResultServiceRegistry() (r interfaces.ResultServiceRegistry) { 34 | r = &ServiceRegistry{ 35 | services: sync.Map{}, 36 | } 37 | return r 38 | } 39 | 40 | var _svc interfaces.ResultServiceRegistry 41 | 42 | func GetResultServiceRegistry() (r interfaces.ResultServiceRegistry) { 43 | if _svc != nil { 44 | return _svc 45 | } 46 | _svc = NewResultServiceRegistry() 47 | return _svc 48 | } 49 | -------------------------------------------------------------------------------- /entity/ttl_map.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | type TTLMap struct { 9 | TTL time.Duration 10 | 11 | data sync.Map 12 | } 13 | 14 | type expireEntry struct { 15 | ExpiresAt time.Time 16 | Value interface{} 17 | } 18 | 19 | func (t *TTLMap) Store(key string, val interface{}) { 20 | t.data.Store(key, expireEntry{ 21 | ExpiresAt: time.Now().Add(t.TTL), 22 | Value: val, 23 | }) 24 | } 25 | 26 | func (t *TTLMap) Load(key string) (val interface{}) { 27 | entry, ok := t.data.Load(key) 28 | if !ok { 29 | return nil 30 | } 31 | 32 | expireEntry := entry.(expireEntry) 33 | if expireEntry.ExpiresAt.After(time.Now()) { 34 | return nil 35 | } 36 | 37 | return expireEntry.Value 38 | } 39 | 40 | func NewTTLMap(ttl time.Duration) (m *TTLMap) { 41 | m = &TTLMap{ 42 | TTL: ttl, 43 | } 44 | 45 | go func() { 46 | for now := range time.Tick(time.Second) { 47 | m.data.Range(func(k, v interface{}) bool { 48 | expiresAt := v.(expireEntry).ExpiresAt 49 | if expiresAt.Before(now) { 50 | m.data.Delete(k) 51 | } 52 | return true 53 | }) 54 | } 55 | }() 56 | 57 | return 58 | } 59 | -------------------------------------------------------------------------------- /utils/sqlite.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "github.com/crawlab-team/crawlab-core/models/models" 6 | "github.com/upper/db/v4" 7 | "github.com/upper/db/v4/adapter/sqlite" 8 | "time" 9 | ) 10 | 11 | func GetSqliteSession(ds *models.DataSource) (s db.Session, err error) { 12 | return getSqliteSession(context.Background(), ds) 13 | } 14 | 15 | func GetSqliteSessionWithTimeout(ds *models.DataSource, timeout time.Duration) (s db.Session, err error) { 16 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 17 | defer cancel() 18 | return getSqliteSession(ctx, ds) 19 | } 20 | 21 | func getSqliteSession(ctx context.Context, ds *models.DataSource) (s db.Session, err error) { 22 | // connect settings 23 | settings := sqlite.ConnectionURL{ 24 | Database: ds.Database, 25 | Options: nil, 26 | } 27 | 28 | // session 29 | done := make(chan struct{}) 30 | go func() { 31 | s, err = sqlite.Open(settings) 32 | close(done) 33 | }() 34 | 35 | // wait for done 36 | select { 37 | case <-ctx.Done(): 38 | if ctx.Err() != nil { 39 | err = ctx.Err() 40 | } 41 | case <-done: 42 | } 43 | 44 | return s, err 45 | } 46 | -------------------------------------------------------------------------------- /errors/controller.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | func NewControllerError(msg string) (err error) { 4 | return NewError(ErrorPrefixController, msg) 5 | } 6 | 7 | var ErrorControllerInvalidControllerId = NewControllerError("invalid controller id") 8 | var ErrorControllerInvalidType = NewControllerError("invalid type") 9 | var ErrorControllerAddError = NewControllerError("add error") 10 | var ErrorControllerUpdateError = NewControllerError("update error") 11 | var ErrorControllerDeleteError = NewControllerError("delete error") 12 | var ErrorControllerNotImplemented = NewControllerError("not implemented") 13 | var ErrorControllerNoModelService = NewControllerError("no model service") 14 | var ErrorControllerRequestPayloadInvalid = NewControllerError("request payload invalid") 15 | var ErrorControllerMissingInCache = NewControllerError("missing in cache") 16 | var ErrorControllerNotCancellable = NewControllerError("not cancellable") 17 | var ErrorControllerMissingRequestFields = NewControllerError("missing request fields") 18 | var ErrorControllerEmptyResponse = NewControllerError("empty response") 19 | var ErrorControllerFilerNotFound = NewControllerError("filer not found") 20 | -------------------------------------------------------------------------------- /node/service/options.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Option func(svc interfaces.NodeService) 9 | 10 | func WithConfigPath(path string) Option { 11 | return func(svc interfaces.NodeService) { 12 | svc.SetConfigPath(path) 13 | } 14 | } 15 | 16 | func WithAddress(address interfaces.Address) Option { 17 | return func(svc interfaces.NodeService) { 18 | svc.SetAddress(address) 19 | } 20 | } 21 | 22 | func WithMonitorInterval(duration time.Duration) Option { 23 | return func(svc interfaces.NodeService) { 24 | svc2, ok := svc.(interfaces.NodeMasterService) 25 | if ok { 26 | svc2.SetMonitorInterval(duration) 27 | } 28 | } 29 | } 30 | 31 | func WithStopOnError() Option { 32 | return func(svc interfaces.NodeService) { 33 | svc2, ok := svc.(interfaces.NodeMasterService) 34 | if ok { 35 | svc2.StopOnError() 36 | } 37 | } 38 | } 39 | 40 | func WithHeartbeatInterval(duration time.Duration) Option { 41 | return func(svc interfaces.NodeService) { 42 | svc2, ok := svc.(interfaces.NodeWorkerService) 43 | if ok { 44 | svc2.SetHeartbeatInterval(duration) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /models/models/dependency_setting.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | "time" 7 | ) 8 | 9 | type DependencySetting struct { 10 | Id primitive.ObjectID `json:"_id" bson:"_id"` 11 | Key string `json:"key" bson:"key"` 12 | Name string `json:"name" bson:"name"` 13 | Description string `json:"description" bson:"description"` 14 | Enabled bool `json:"enabled" bson:"enabled"` 15 | Cmd string `json:"cmd" bson:"cmd"` 16 | Proxy string `json:"proxy" bson:"proxy"` 17 | LastUpdateTs time.Time `json:"last_update_ts" bson:"last_update_ts"` 18 | } 19 | 20 | func (j *DependencySetting) GetId() (id primitive.ObjectID) { 21 | return j.Id 22 | } 23 | 24 | func (j *DependencySetting) SetId(id primitive.ObjectID) { 25 | j.Id = id 26 | } 27 | 28 | type DependencySettingList []DependencySetting 29 | 30 | func (l *DependencySettingList) GetModels() (res []interfaces.Model) { 31 | for i := range *l { 32 | d := (*l)[i] 33 | res = append(res, &d) 34 | } 35 | return res 36 | } 37 | -------------------------------------------------------------------------------- /models/models/project.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Project struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Name string `json:"name" bson:"name"` 11 | Description string `json:"description" bson:"description"` 12 | Spiders int `json:"spiders" bson:"-"` 13 | } 14 | 15 | func (p *Project) GetId() (id primitive.ObjectID) { 16 | return p.Id 17 | } 18 | 19 | func (p *Project) SetId(id primitive.ObjectID) { 20 | p.Id = id 21 | } 22 | 23 | func (p *Project) GetName() (name string) { 24 | return p.Name 25 | } 26 | 27 | func (p *Project) SetName(name string) { 28 | p.Name = name 29 | } 30 | 31 | func (p *Project) GetDescription() (description string) { 32 | return p.Description 33 | } 34 | 35 | func (p *Project) SetDescription(description string) { 36 | p.Description = description 37 | } 38 | 39 | type ProjectList []Project 40 | 41 | func (l *ProjectList) GetModels() (res []interfaces.Model) { 42 | for i := range *l { 43 | d := (*l)[i] 44 | res = append(res, &d) 45 | } 46 | return res 47 | } 48 | -------------------------------------------------------------------------------- /controllers/test/main_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/controllers" 6 | "github.com/crawlab-team/crawlab-core/interfaces" 7 | "github.com/crawlab-team/crawlab-core/models/service" 8 | "github.com/crawlab-team/crawlab-core/user" 9 | "go.mongodb.org/mongo-driver/mongo" 10 | "testing" 11 | ) 12 | 13 | func TestMain(m *testing.M) { 14 | // init user 15 | modelSvc, err := service.GetService() 16 | if err != nil { 17 | panic(err) 18 | } 19 | _, err = modelSvc.GetUserByUsername(constants.DefaultAdminUsername, nil) 20 | if err != nil { 21 | if err.Error() != mongo.ErrNoDocuments.Error() { 22 | panic(err) 23 | } 24 | userSvc, err := user.GetUserService() 25 | if err != nil { 26 | panic(err) 27 | } 28 | if err := userSvc.Create(&interfaces.UserCreateOptions{ 29 | Username: constants.DefaultAdminUsername, 30 | Password: constants.DefaultAdminPassword, 31 | Role: constants.RoleAdmin, 32 | }); err != nil { 33 | panic(err) 34 | } 35 | } 36 | 37 | if err := controllers.InitControllers(); err != nil { 38 | panic(err) 39 | } 40 | 41 | m.Run() 42 | 43 | T.Cleanup() 44 | } 45 | -------------------------------------------------------------------------------- /utils/git.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "github.com/crawlab-team/crawlab-core/models/models" 7 | vcs "github.com/crawlab-team/crawlab-vcs" 8 | ) 9 | 10 | func InitGitClientAuth(g interfaces.Git, gitClient *vcs.GitClient) { 11 | // set auth 12 | switch g.GetAuthType() { 13 | case constants.GitAuthTypeHttp: 14 | gitClient.SetAuthType(vcs.GitAuthTypeHTTP) 15 | gitClient.SetUsername(g.GetUsername()) 16 | gitClient.SetPassword(g.GetPassword()) 17 | case constants.GitAuthTypeSsh: 18 | gitClient.SetAuthType(vcs.GitAuthTypeSSH) 19 | gitClient.SetUsername(g.GetUsername()) 20 | gitClient.SetPrivateKey(g.GetPassword()) 21 | } 22 | } 23 | 24 | func InitGitClientAuthV2(g *models.GitV2, gitClient *vcs.GitClient) { 25 | // set auth 26 | switch g.AuthType { 27 | case constants.GitAuthTypeHttp: 28 | gitClient.SetAuthType(vcs.GitAuthTypeHTTP) 29 | gitClient.SetUsername(g.Username) 30 | gitClient.SetPassword(g.Password) 31 | case constants.GitAuthTypeSsh: 32 | gitClient.SetAuthType(vcs.GitAuthTypeSSH) 33 | gitClient.SetUsername(g.Username) 34 | gitClient.SetPrivateKey(g.Password) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /utils/kafka.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/crawlab-team/crawlab-core/constants" 7 | "github.com/crawlab-team/crawlab-core/models/models" 8 | "github.com/segmentio/kafka-go" 9 | "time" 10 | ) 11 | 12 | func GetKafkaConnection(ds *models.DataSource) (c *kafka.Conn, err error) { 13 | return getKafkaConnection(context.Background(), ds) 14 | } 15 | 16 | func GetKafkaConnectionWithTimeout(ds *models.DataSource, timeout time.Duration) (c *kafka.Conn, err error) { 17 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 18 | defer cancel() 19 | return getKafkaConnection(ctx, ds) 20 | } 21 | 22 | func getKafkaConnection(ctx context.Context, ds *models.DataSource) (c *kafka.Conn, err error) { 23 | // normalize settings 24 | host := ds.Host 25 | port := ds.Port 26 | if ds.Host == "" { 27 | host = constants.DefaultHost 28 | } 29 | if ds.Port == "" { 30 | port = constants.DefaultKafkaPort 31 | } 32 | 33 | // kafka connection address 34 | network := "tcp" 35 | address := fmt.Sprintf("%s:%s", host, port) 36 | topic := ds.Database 37 | partition := 0 // TODO: parameterize 38 | 39 | // kafka connection 40 | return kafka.DialLeader(ctx, network, address, topic, partition) 41 | } 42 | -------------------------------------------------------------------------------- /interfaces/model_base_service.go: -------------------------------------------------------------------------------- 1 | package interfaces 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-db/mongo" 5 | "go.mongodb.org/mongo-driver/bson" 6 | "go.mongodb.org/mongo-driver/bson/primitive" 7 | ) 8 | 9 | type ModelBaseService interface { 10 | GetModelId() (id ModelId) 11 | SetModelId(id ModelId) 12 | GetById(id primitive.ObjectID) (res Model, err error) 13 | Get(query bson.M, opts *mongo.FindOptions) (res Model, err error) 14 | GetList(query bson.M, opts *mongo.FindOptions) (res List, err error) 15 | DeleteById(id primitive.ObjectID, args ...interface{}) (err error) 16 | Delete(query bson.M, args ...interface{}) (err error) 17 | DeleteList(query bson.M, args ...interface{}) (err error) 18 | ForceDeleteList(query bson.M, args ...interface{}) (err error) 19 | UpdateById(id primitive.ObjectID, update bson.M, args ...interface{}) (err error) 20 | Update(query bson.M, update bson.M, fields []string, args ...interface{}) (err error) 21 | UpdateDoc(query bson.M, doc Model, fields []string, args ...interface{}) (err error) 22 | Insert(u User, docs ...interface{}) (err error) 23 | Count(query bson.M) (total int, err error) 24 | } 25 | 26 | type ModelService interface { 27 | GetBaseService(id ModelId) (svc ModelBaseService) 28 | } 29 | -------------------------------------------------------------------------------- /models/models/schedule_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/robfig/cron/v3" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type ScheduleV2 struct { 9 | any `collection:"schedules"` 10 | BaseModelV2[ScheduleV2] `bson:",inline"` 11 | Name string `json:"name" bson:"name"` 12 | Description string `json:"description" bson:"description"` 13 | SpiderId primitive.ObjectID `json:"spider_id" bson:"spider_id"` 14 | Cron string `json:"cron" bson:"cron"` 15 | EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` 16 | Cmd string `json:"cmd" bson:"cmd"` 17 | Param string `json:"param" bson:"param"` 18 | Mode string `json:"mode" bson:"mode"` 19 | NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` 20 | Priority int `json:"priority" bson:"priority"` 21 | Enabled bool `json:"enabled" bson:"enabled"` 22 | UserId primitive.ObjectID `json:"user_id" bson:"user_id"` 23 | } 24 | -------------------------------------------------------------------------------- /grpc/client/options.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Option func(client interfaces.GrpcClient) 9 | 10 | func WithConfigPath(path string) Option { 11 | return func(c interfaces.GrpcClient) { 12 | c.SetConfigPath(path) 13 | } 14 | } 15 | 16 | func WithAddress(address interfaces.Address) Option { 17 | return func(c interfaces.GrpcClient) { 18 | c.SetAddress(address) 19 | } 20 | } 21 | 22 | func WithTimeout(timeout time.Duration) Option { 23 | return func(c interfaces.GrpcClient) { 24 | } 25 | } 26 | 27 | func WithSubscribeType(subscribeType string) Option { 28 | return func(c interfaces.GrpcClient) { 29 | c.SetSubscribeType(subscribeType) 30 | } 31 | } 32 | 33 | func WithHandleMessage(handleMessage bool) Option { 34 | return func(c interfaces.GrpcClient) { 35 | c.SetHandleMessage(handleMessage) 36 | } 37 | } 38 | 39 | type PoolOption func(p interfaces.GrpcClientPool) 40 | 41 | func WithPoolConfigPath(path string) PoolOption { 42 | return func(c interfaces.GrpcClientPool) { 43 | c.SetConfigPath(path) 44 | } 45 | } 46 | 47 | func WithPoolSize(size int) PoolOption { 48 | return func(c interfaces.GrpcClientPool) { 49 | c.SetSize(size) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /models/models/data_source_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type DataSourceV2 struct { 4 | any `collection:"data_sources"` 5 | BaseModelV2[DataSource] `bson:",inline"` 6 | Name string `json:"name" bson:"name"` 7 | Type string `json:"type" bson:"type"` 8 | Description string `json:"description" bson:"description"` 9 | Host string `json:"host" bson:"host"` 10 | Port string `json:"port" bson:"port"` 11 | Url string `json:"url" bson:"url"` 12 | Hosts []string `json:"hosts" bson:"hosts"` 13 | Database string `json:"database" bson:"database"` 14 | Username string `json:"username" bson:"username"` 15 | Password string `json:"password,omitempty" bson:"-"` 16 | ConnectType string `json:"connect_type" bson:"connect_type"` 17 | Status string `json:"status" bson:"status"` 18 | Error string `json:"error" bson:"error"` 19 | Extra map[string]string `json:"extra,omitempty" bson:"extra,omitempty"` 20 | } 21 | -------------------------------------------------------------------------------- /entity/export.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "time" 6 | ) 7 | 8 | type Export struct { 9 | Id string `json:"id"` 10 | Type string `json:"type"` 11 | Target string `json:"target"` 12 | Filter interfaces.Filter `json:"filter"` 13 | Status string `json:"status"` 14 | StartTs time.Time `json:"start_ts"` 15 | EndTs time.Time `json:"end_ts"` 16 | FileName string `json:"file_name"` 17 | DownloadPath string `json:"-"` 18 | Limit int `json:"-"` 19 | } 20 | 21 | func (e *Export) GetId() string { 22 | return e.Id 23 | } 24 | 25 | func (e *Export) GetType() string { 26 | return e.Type 27 | } 28 | 29 | func (e *Export) GetTarget() string { 30 | return e.Target 31 | } 32 | 33 | func (e *Export) GetFilter() interfaces.Filter { 34 | return e.Filter 35 | } 36 | 37 | func (e *Export) GetStatus() string { 38 | return e.Status 39 | } 40 | 41 | func (e *Export) GetStartTs() time.Time { 42 | return e.StartTs 43 | } 44 | 45 | func (e *Export) GetEndTs() time.Time { 46 | return e.EndTs 47 | } 48 | 49 | func (e *Export) GetDownloadPath() string { 50 | return e.DownloadPath 51 | } 52 | -------------------------------------------------------------------------------- /grpc/server/dependencies_server_v2.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | grpc "github.com/crawlab-team/crawlab-grpc" 6 | "google.golang.org/grpc/codes" 7 | "google.golang.org/grpc/status" 8 | ) 9 | 10 | type DependenciesServerV2 struct { 11 | grpc.UnimplementedDependencyServiceV2Server 12 | } 13 | 14 | func (svr DependenciesServerV2) Connect(stream grpc.DependencyServiceV2_ConnectServer) (err error) { 15 | return status.Errorf(codes.Unimplemented, "method Connect not implemented") 16 | } 17 | 18 | func (svr DependenciesServerV2) Sync(ctx context.Context, request *grpc.DependenciesServiceV2SyncRequest) (response *grpc.Response, err error) { 19 | return nil, status.Errorf(codes.Unimplemented, "method Sync not implemented") 20 | } 21 | 22 | func (svr DependenciesServerV2) Install(stream grpc.DependencyServiceV2_InstallServer) (err error) { 23 | return status.Errorf(codes.Unimplemented, "method Install not implemented") 24 | } 25 | 26 | func (svr DependenciesServerV2) UninstallDependencies(stream grpc.DependencyServiceV2_UninstallDependenciesServer) (err error) { 27 | return status.Errorf(codes.Unimplemented, "method UninstallDependencies not implemented") 28 | } 29 | 30 | func NewDependenciesServerV2() *DependenciesServerV2 { 31 | return &DependenciesServerV2{} 32 | } 33 | -------------------------------------------------------------------------------- /entity/address.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strings" 7 | ) 8 | 9 | type Address struct { 10 | Host string 11 | Port string 12 | } 13 | 14 | func (a *Address) String() (res string) { 15 | return fmt.Sprintf("%s:%s", a.Host, a.Port) 16 | } 17 | 18 | func (a *Address) IsEmpty() (res bool) { 19 | return a.Host == "" || a.Port == "" 20 | } 21 | 22 | func (a *Address) Value() (res interface{}) { 23 | return a 24 | } 25 | 26 | type AddressOptions struct { 27 | Host string 28 | Port string 29 | } 30 | 31 | func NewAddress(opts *AddressOptions) (res *Address) { 32 | if opts == nil { 33 | opts = &AddressOptions{} 34 | } 35 | //if opts.Host == "" { 36 | // opts.Host = "localhost" 37 | //} 38 | if opts.Port == "" { 39 | opts.Port = "9666" 40 | } 41 | return &Address{ 42 | Host: opts.Host, 43 | Port: opts.Port, 44 | } 45 | } 46 | 47 | func NewAddressFromString(address string) (res *Address, err error) { 48 | parts := strings.Split(address, ":") 49 | if len(parts) == 1 { 50 | return NewAddress(&AddressOptions{Host: parts[0]}), nil 51 | } else if len(parts) == 2 { 52 | return NewAddress(&AddressOptions{Host: parts[0], Port: parts[1]}), nil 53 | } else { 54 | return nil, errors.New(fmt.Sprintf("parsing address error: %v", err)) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /schedule/test/schedule_service_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "github.com/stretchr/testify/require" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestScheduleService_Enable_Disable(t *testing.T) { 10 | var err error 11 | T.Setup(t) 12 | 13 | time.Sleep(1 * time.Second) 14 | err = T.scheduleSvc.Enable(T.TestSchedule) 15 | require.Nil(t, err) 16 | time.Sleep(1 * time.Second) 17 | 18 | require.True(t, T.TestSchedule.GetEnabled()) 19 | require.Greater(t, int(T.TestSchedule.GetEntryId()), -1) 20 | e := T.scheduleSvc.GetCron().Entry(T.TestSchedule.GetEntryId()) 21 | require.Equal(t, T.TestSchedule.GetEntryId(), e.ID) 22 | time.Sleep(1 * time.Second) 23 | 24 | err = T.scheduleSvc.Disable(T.TestSchedule) 25 | require.False(t, T.TestSchedule.GetEnabled()) 26 | require.Equal(t, 0, len(T.scheduleSvc.GetCron().Entries())) 27 | } 28 | 29 | func TestScheduleService_Run(t *testing.T) { 30 | var err error 31 | T.Setup(t) 32 | 33 | time.Sleep(1 * time.Second) 34 | err = T.scheduleSvc.Enable(T.TestSchedule) 35 | require.Nil(t, err) 36 | time.Sleep(1 * time.Minute) 37 | 38 | tasks, err := T.modelSvc.GetTaskList(nil, nil) 39 | require.Nil(t, err) 40 | require.Greater(t, len(tasks), 0) 41 | for _, task := range tasks { 42 | require.False(t, task.ScheduleId.IsZero()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /models/models/role.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Role struct { 9 | Id primitive.ObjectID `json:"_id" bson:"_id"` 10 | Key string `json:"key" bson:"key"` 11 | Name string `json:"name" bson:"name"` 12 | Description string `json:"description" bson:"description"` 13 | } 14 | 15 | func (r *Role) GetId() (id primitive.ObjectID) { 16 | return r.Id 17 | } 18 | 19 | func (r *Role) SetId(id primitive.ObjectID) { 20 | r.Id = id 21 | } 22 | 23 | func (r *Role) GetKey() (key string) { 24 | return r.Key 25 | } 26 | 27 | func (r *Role) SetKey(key string) { 28 | r.Key = key 29 | } 30 | 31 | func (r *Role) GetName() (name string) { 32 | return r.Name 33 | } 34 | 35 | func (r *Role) SetName(name string) { 36 | r.Name = name 37 | } 38 | 39 | func (r *Role) GetDescription() (description string) { 40 | return r.Description 41 | } 42 | 43 | func (r *Role) SetDescription(description string) { 44 | r.Description = description 45 | } 46 | 47 | type RoleList []Role 48 | 49 | func (l *RoleList) GetModels() (res []interfaces.Model) { 50 | for i := range *l { 51 | d := (*l)[i] 52 | res = append(res, &d) 53 | } 54 | return res 55 | } 56 | -------------------------------------------------------------------------------- /models/models/spider_stat_v2.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/bson/primitive" 5 | ) 6 | 7 | type SpiderStatV2 struct { 8 | any `collection:"spider_stats"` 9 | BaseModelV2[SpiderStatV2] `bson:",inline"` 10 | LastTaskId primitive.ObjectID `json:"last_task_id" bson:"last_task_id,omitempty"` 11 | LastTask *TaskV2 `json:"last_task,omitempty" bson:"-"` 12 | Tasks int `json:"tasks" bson:"tasks"` 13 | Results int `json:"results" bson:"results"` 14 | WaitDuration int64 `json:"wait_duration" bson:"wait_duration,omitempty"` // in second 15 | RuntimeDuration int64 `json:"runtime_duration" bson:"runtime_duration,omitempty"` // in second 16 | TotalDuration int64 `json:"total_duration" bson:"total_duration,omitempty"` // in second 17 | AverageWaitDuration int64 `json:"average_wait_duration" bson:"-"` // in second 18 | AverageRuntimeDuration int64 `json:"average_runtime_duration" bson:"-"` // in second 19 | AverageTotalDuration int64 `json:"average_total_duration" bson:"-"` // in second 20 | } 21 | -------------------------------------------------------------------------------- /controllers/setting_v2.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/models/models" 5 | "github.com/crawlab-team/crawlab-core/models/service" 6 | "github.com/gin-gonic/gin" 7 | "go.mongodb.org/mongo-driver/bson" 8 | ) 9 | 10 | func GetSetting(c *gin.Context) { 11 | // key 12 | key := c.Param("id") 13 | 14 | // setting 15 | s, err := service.NewModelServiceV2[models.SettingV2]().GetOne(bson.M{"key": key}, nil) 16 | if err != nil { 17 | HandleErrorInternalServerError(c, err) 18 | return 19 | } 20 | 21 | HandleSuccessWithData(c, s) 22 | } 23 | 24 | func PutSetting(c *gin.Context) { 25 | // key 26 | key := c.Param("id") 27 | 28 | // settings 29 | var s models.Setting 30 | if err := c.ShouldBindJSON(&s); err != nil { 31 | HandleErrorInternalServerError(c, err) 32 | return 33 | } 34 | 35 | modelSvc := service.NewModelServiceV2[models.SettingV2]() 36 | 37 | // setting 38 | _s, err := modelSvc.GetOne(bson.M{"key": key}, nil) 39 | if err != nil { 40 | HandleErrorInternalServerError(c, err) 41 | return 42 | } 43 | 44 | u := GetUserFromContextV2(c) 45 | 46 | // save 47 | _s.Value = s.Value 48 | _s.SetUpdated(u.Id) 49 | err = modelSvc.ReplaceOne(bson.M{"key": key}, *_s) 50 | if err != nil { 51 | HandleErrorInternalServerError(c, err) 52 | return 53 | } 54 | 55 | HandleSuccess(c) 56 | } 57 | -------------------------------------------------------------------------------- /grpc/server/utils_handle.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/crawlab-team/crawlab-grpc" 6 | "github.com/crawlab-team/go-trace" 7 | ) 8 | 9 | func HandleError(err error) (res *grpc.Response, err2 error) { 10 | trace.PrintError(err) 11 | return &grpc.Response{ 12 | Code: grpc.ResponseCode_ERROR, 13 | Error: err.Error(), 14 | }, err 15 | } 16 | 17 | func HandleSuccess() (res *grpc.Response, err error) { 18 | return &grpc.Response{ 19 | Code: grpc.ResponseCode_OK, 20 | Message: "success", 21 | }, nil 22 | } 23 | 24 | func HandleSuccessWithData(data interface{}) (res *grpc.Response, err error) { 25 | var bytes []byte 26 | switch data.(type) { 27 | case []byte: 28 | bytes = data.([]byte) 29 | default: 30 | bytes, err = json.Marshal(data) 31 | if err != nil { 32 | return HandleError(err) 33 | } 34 | } 35 | return &grpc.Response{ 36 | Code: grpc.ResponseCode_OK, 37 | Message: "success", 38 | Data: bytes, 39 | }, nil 40 | } 41 | 42 | func HandleSuccessWithListData(data interface{}, total int) (res *grpc.Response, err error) { 43 | bytes, err := json.Marshal(data) 44 | if err != nil { 45 | return HandleError(err) 46 | } 47 | return &grpc.Response{ 48 | Code: grpc.ResponseCode_OK, 49 | Message: "success", 50 | Data: bytes, 51 | Total: int64(total), 52 | }, nil 53 | } 54 | -------------------------------------------------------------------------------- /models/client/model_service.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | config2 "github.com/crawlab-team/crawlab-core/config" 5 | "github.com/crawlab-team/crawlab-core/container" 6 | "github.com/crawlab-team/crawlab-core/interfaces" 7 | ) 8 | 9 | type ServiceDelegate struct { 10 | // settings 11 | cfgPath string 12 | 13 | // internals 14 | c interfaces.GrpcClient 15 | } 16 | 17 | func (d *ServiceDelegate) GetConfigPath() string { 18 | return d.cfgPath 19 | } 20 | 21 | func (d *ServiceDelegate) SetConfigPath(path string) { 22 | d.cfgPath = path 23 | } 24 | 25 | func (d *ServiceDelegate) NewBaseServiceDelegate(id interfaces.ModelId) (svc interfaces.GrpcClientModelBaseService, err error) { 26 | var opts []ModelBaseServiceDelegateOption 27 | opts = append(opts, WithBaseServiceModelId(id)) 28 | if d.cfgPath != "" { 29 | opts = append(opts, WithBaseServiceConfigPath(d.cfgPath)) 30 | } 31 | return NewBaseServiceDelegate(opts...) 32 | } 33 | 34 | func NewServiceDelegate() (svc2 interfaces.GrpcClientModelService, err error) { 35 | // service delegate 36 | svc := &ServiceDelegate{ 37 | cfgPath: config2.GetConfigPath(), 38 | } 39 | 40 | // dependency injection 41 | if err := container.GetContainer().Invoke(func(client interfaces.GrpcClient) { 42 | svc.c = client 43 | }); err != nil { 44 | return nil, err 45 | } 46 | 47 | return svc, nil 48 | } 49 | -------------------------------------------------------------------------------- /controllers/sync.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/utils" 5 | "github.com/gin-gonic/gin" 6 | "github.com/spf13/viper" 7 | "net/http" 8 | "path/filepath" 9 | ) 10 | 11 | var SyncController ActionController 12 | 13 | func getSyncActions() []Action { 14 | var ctx = newSyncContext() 15 | return []Action{ 16 | { 17 | Method: http.MethodGet, 18 | Path: "/:id/scan", 19 | HandlerFunc: ctx.scan, 20 | }, 21 | { 22 | Method: http.MethodGet, 23 | Path: "/:id/download", 24 | HandlerFunc: ctx.download, 25 | }, 26 | } 27 | } 28 | 29 | type syncContext struct { 30 | } 31 | 32 | func (ctx *syncContext) scan(c *gin.Context) { 33 | id := c.Param("id") 34 | dir := ctx._getDir(id) 35 | files, err := utils.ScanDirectory(dir) 36 | if err != nil { 37 | HandleErrorInternalServerError(c, err) 38 | return 39 | } 40 | c.AbortWithStatusJSON(http.StatusOK, files) 41 | } 42 | 43 | func (ctx *syncContext) download(c *gin.Context) { 44 | id := c.Param("id") 45 | filePath := c.Query("path") 46 | dir := ctx._getDir(id) 47 | c.File(filepath.Join(dir, filePath)) 48 | } 49 | 50 | func (ctx *syncContext) _getDir(id string) string { 51 | workspacePath := viper.GetString("workspace") 52 | return filepath.Join(workspacePath, id) 53 | } 54 | 55 | func newSyncContext() syncContext { 56 | return syncContext{} 57 | } 58 | -------------------------------------------------------------------------------- /entity/http.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | import "go.mongodb.org/mongo-driver/bson/primitive" 4 | 5 | type Response struct { 6 | Status string `json:"status"` 7 | Message string `json:"message"` 8 | Data interface{} `json:"data"` 9 | Error string `json:"error"` 10 | } 11 | 12 | type ListResponse struct { 13 | Status string `json:"status"` 14 | Message string `json:"message"` 15 | Total int `json:"total"` 16 | Data interface{} `json:"data"` 17 | Error string `json:"error"` 18 | } 19 | 20 | type ListRequestData struct { 21 | PageNum int `form:"page_num" json:"page_num"` 22 | PageSize int `form:"page_size" json:"page_size"` 23 | SortKey string `form:"sort_key" json:"sort_key"` 24 | Status string `form:"status" json:"status"` 25 | Keyword string `form:"keyword" json:"keyword"` 26 | } 27 | 28 | type BatchRequestPayload struct { 29 | Ids []primitive.ObjectID `form:"ids" json:"ids"` 30 | } 31 | 32 | type BatchRequestPayloadWithStringData struct { 33 | Ids []primitive.ObjectID `form:"ids" json:"ids"` 34 | Data string `form:"data" json:"data"` 35 | Fields []string `form:"fields" json:"fields"` 36 | } 37 | 38 | type FileRequestPayload struct { 39 | Path string `json:"path" form:"path"` 40 | NewPath string `json:"new_path" form:"new_path"` 41 | Data string `json:"data" form:"data"` 42 | } 43 | -------------------------------------------------------------------------------- /models/service/job_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeJob(d interface{}, err error) (res *models2.Job, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Job) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetJobById(id primitive.ObjectID) (res *models2.Job, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdJob).GetById(id) 25 | return convertTypeJob(d, err) 26 | } 27 | 28 | func (svc *Service) GetJob(query bson.M, opts *mongo.FindOptions) (res *models2.Job, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdJob).Get(query, opts) 30 | return convertTypeJob(d, err) 31 | } 32 | 33 | func (svc *Service) GetJobList(query bson.M, opts *mongo.FindOptions) (res []models2.Job, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdJob).GetList(query, opts) 35 | for _, doc := range l.GetModels() { 36 | d := doc.(*models2.Job) 37 | res = append(res, *d) 38 | } 39 | return res, nil 40 | } 41 | -------------------------------------------------------------------------------- /ds/sqlite.go: -------------------------------------------------------------------------------- 1 | package ds 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "github.com/crawlab-team/crawlab-core/models/models" 6 | "github.com/crawlab-team/crawlab-core/models/service" 7 | utils2 "github.com/crawlab-team/crawlab-core/utils" 8 | "github.com/crawlab-team/go-trace" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | type SqliteService struct { 13 | SqlService 14 | } 15 | 16 | func NewDataSourceSqliteService(colId primitive.ObjectID, dsId primitive.ObjectID) (svc2 interfaces.ResultService, err error) { 17 | // service 18 | svc := &SqliteService{} 19 | 20 | // dependency injection 21 | svc.modelSvc, err = service.GetService() 22 | if err != nil { 23 | return nil, trace.TraceError(err) 24 | } 25 | 26 | // data source 27 | if dsId.IsZero() { 28 | svc.ds = &models.DataSource{} 29 | } else { 30 | svc.ds, err = svc.modelSvc.GetDataSourceById(dsId) 31 | if err != nil { 32 | return nil, trace.TraceError(err) 33 | } 34 | } 35 | 36 | // data collection 37 | svc.dc, err = svc.modelSvc.GetDataCollectionById(colId) 38 | if err != nil { 39 | return nil, trace.TraceError(err) 40 | } 41 | 42 | // session 43 | svc.s, err = utils2.GetSqliteSession(svc.ds) 44 | if err != nil { 45 | return nil, trace.TraceError(err) 46 | } 47 | 48 | // collection 49 | svc.col = svc.s.Collection(svc.dc.Name) 50 | 51 | return svc, nil 52 | } 53 | -------------------------------------------------------------------------------- /models/models/result.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | "go.mongodb.org/mongo-driver/bson" 7 | "go.mongodb.org/mongo-driver/bson/primitive" 8 | ) 9 | 10 | type Result bson.M 11 | 12 | func (r *Result) GetId() (id primitive.ObjectID) { 13 | res, ok := r.Value()["_id"] 14 | if ok { 15 | id, ok = res.(primitive.ObjectID) 16 | if ok { 17 | return id 18 | } 19 | } 20 | return id 21 | } 22 | 23 | func (r *Result) SetId(id primitive.ObjectID) { 24 | (*r)["_id"] = id 25 | } 26 | 27 | func (r *Result) Value() map[string]interface{} { 28 | return *r 29 | } 30 | 31 | func (r *Result) SetValue(key string, value interface{}) { 32 | (*r)[key] = value 33 | } 34 | 35 | func (r *Result) GetValue(key string) (value interface{}) { 36 | return (*r)[key] 37 | } 38 | 39 | func (r *Result) GetTaskId() (id primitive.ObjectID) { 40 | res := r.GetValue(constants.TaskKey) 41 | if res == nil { 42 | return id 43 | } 44 | id, _ = res.(primitive.ObjectID) 45 | return id 46 | } 47 | 48 | func (r *Result) SetTaskId(id primitive.ObjectID) { 49 | r.SetValue(constants.TaskKey, id) 50 | } 51 | 52 | type ResultList []Result 53 | 54 | func (l *ResultList) GetModels() (res []interfaces.Model) { 55 | for i := range *l { 56 | d := (*l)[i] 57 | res = append(res, &d) 58 | } 59 | return res 60 | } 61 | -------------------------------------------------------------------------------- /models/service/task_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeTask(d interface{}, err error) (res *models2.Task, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Task) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetTaskById(id primitive.ObjectID) (res *models2.Task, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdTask).GetById(id) 25 | return convertTypeTask(d, err) 26 | } 27 | 28 | func (svc *Service) GetTask(query bson.M, opts *mongo.FindOptions) (res *models2.Task, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdTask).Get(query, opts) 30 | return convertTypeTask(d, err) 31 | } 32 | 33 | func (svc *Service) GetTaskList(query bson.M, opts *mongo.FindOptions) (res []models2.Task, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdTask).GetList(query, opts) 35 | for _, doc := range l.GetModels() { 36 | d := doc.(*models2.Task) 37 | res = append(res, *d) 38 | } 39 | return res, nil 40 | } 41 | -------------------------------------------------------------------------------- /models/service/token_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeToken(d interface{}, err error) (res *models2.Token, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Token) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetTokenById(id primitive.ObjectID) (res *models2.Token, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdToken).GetById(id) 25 | return convertTypeToken(d, err) 26 | } 27 | 28 | func (svc *Service) GetToken(query bson.M, opts *mongo.FindOptions) (res *models2.Token, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdToken).Get(query, opts) 30 | return convertTypeToken(d, err) 31 | } 32 | 33 | func (svc *Service) GetTokenList(query bson.M, opts *mongo.FindOptions) (res []models2.Token, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdToken).GetList(query, opts) 35 | for _, doc := range l.GetModels() { 36 | d := doc.(*models2.Token) 37 | res = append(res, *d) 38 | } 39 | return res, nil 40 | } 41 | -------------------------------------------------------------------------------- /models/service/spider_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeSpider(d interface{}, err error) (res *models2.Spider, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Spider) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetSpiderById(id primitive.ObjectID) (res *models2.Spider, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdSpider).GetById(id) 25 | return convertTypeSpider(d, err) 26 | } 27 | 28 | func (svc *Service) GetSpider(query bson.M, opts *mongo.FindOptions) (res *models2.Spider, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdSpider).Get(query, opts) 30 | return convertTypeSpider(d, err) 31 | } 32 | 33 | func (svc *Service) GetSpiderList(query bson.M, opts *mongo.FindOptions) (res []models2.Spider, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdSpider).GetList(query, opts) 35 | for _, doc := range l.GetModels() { 36 | d := doc.(*models2.Spider) 37 | res = append(res, *d) 38 | } 39 | return res, nil 40 | } 41 | -------------------------------------------------------------------------------- /models/models/artifact.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/interfaces" 5 | "go.mongodb.org/mongo-driver/bson/primitive" 6 | ) 7 | 8 | type Artifact struct { 9 | Id primitive.ObjectID `bson:"_id" json:"_id"` 10 | Col string `bson:"_col" json:"_col"` 11 | Del bool `bson:"_del" json:"_del"` 12 | TagIds []primitive.ObjectID `bson:"_tid" json:"_tid"` 13 | Sys *ArtifactSys `bson:"_sys" json:"_sys"` 14 | Obj interface{} `bson:"_obj" json:"_obj"` 15 | } 16 | 17 | func (a *Artifact) GetId() (id primitive.ObjectID) { 18 | return a.Id 19 | } 20 | 21 | func (a *Artifact) SetId(id primitive.ObjectID) { 22 | a.Id = id 23 | } 24 | 25 | func (a *Artifact) GetSys() (sys interfaces.ModelArtifactSys) { 26 | if a.Sys == nil { 27 | a.Sys = &ArtifactSys{} 28 | } 29 | return a.Sys 30 | } 31 | 32 | func (a *Artifact) GetTagIds() (ids []primitive.ObjectID) { 33 | return a.TagIds 34 | } 35 | 36 | func (a *Artifact) SetTagIds(ids []primitive.ObjectID) { 37 | a.TagIds = ids 38 | } 39 | 40 | func (a *Artifact) SetObj(obj interfaces.Model) { 41 | a.Obj = obj 42 | } 43 | 44 | func (a *Artifact) SetDel(del bool) { 45 | a.Del = del 46 | } 47 | 48 | type ArtifactList []Artifact 49 | 50 | func (l *ArtifactList) GetModels() (res []interfaces.Model) { 51 | for i := range *l { 52 | d := (*l)[i] 53 | res = append(res, &d) 54 | } 55 | return res 56 | } 57 | -------------------------------------------------------------------------------- /models/service/git_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeGit(d interface{}, err error) (res *models2.Git, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Git) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetGitById(id primitive.ObjectID) (res *models2.Git, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdGit).GetById(id) 25 | return convertTypeGit(d, err) 26 | } 27 | 28 | func (svc *Service) GetGit(query bson.M, opts *mongo.FindOptions) (res *models2.Git, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdGit).Get(query, opts) 30 | return convertTypeGit(d, err) 31 | } 32 | 33 | func (svc *Service) GetGitList(query bson.M, opts *mongo.FindOptions) (res []models2.Git, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdGit).GetList(query, opts) 35 | if l == nil { 36 | return nil, nil 37 | } 38 | for _, doc := range l.GetModels() { 39 | d := doc.(*models2.Git) 40 | res = append(res, *d) 41 | } 42 | return res, nil 43 | } 44 | -------------------------------------------------------------------------------- /notification/mobile.go: -------------------------------------------------------------------------------- 1 | package notification 2 | 3 | import ( 4 | "errors" 5 | "github.com/crawlab-team/go-trace" 6 | "github.com/imroc/req" 7 | "strings" 8 | ) 9 | 10 | type ResBody struct { 11 | ErrCode int `json:"errcode"` 12 | ErrMsg string `json:"errmsg"` 13 | } 14 | 15 | func SendMobileNotification(webhook string, title string, content string) error { 16 | // request header 17 | header := req.Header{ 18 | "Content-Type": "application/json; charset=utf-8", 19 | } 20 | 21 | // request data 22 | data := req.Param{ 23 | "msgtype": "markdown", 24 | "markdown": req.Param{ 25 | "title": title, 26 | "text": content, 27 | "content": content, 28 | }, 29 | "at": req.Param{ 30 | "atMobiles": []string{}, 31 | "isAtAll": false, 32 | }, 33 | "text": content, 34 | } 35 | if strings.Contains(strings.ToLower(webhook), "feishu") { 36 | data = req.Param{ 37 | "msg_type": "text", 38 | "content": req.Param{ 39 | "text": content, 40 | }, 41 | } 42 | } 43 | 44 | // perform request 45 | res, err := req.Post(webhook, header, req.BodyJSON(&data)) 46 | if err != nil { 47 | return trace.TraceError(err) 48 | } 49 | 50 | // parse response 51 | var resBody ResBody 52 | if err := res.ToJSON(&resBody); err != nil { 53 | return trace.TraceError(err) 54 | } 55 | 56 | // validate response code 57 | if resBody.ErrCode != 0 { 58 | return errors.New(resBody.ErrMsg) 59 | } 60 | 61 | return nil 62 | } 63 | -------------------------------------------------------------------------------- /middlewares/auth.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/errors" 6 | "github.com/crawlab-team/crawlab-core/models/service" 7 | "github.com/crawlab-team/crawlab-core/user" 8 | "github.com/crawlab-team/crawlab-core/utils" 9 | "github.com/gin-gonic/gin" 10 | "github.com/spf13/viper" 11 | ) 12 | 13 | func AuthorizationMiddleware() gin.HandlerFunc { 14 | userSvc, _ := user.GetUserService() 15 | return func(c *gin.Context) { 16 | // disable auth for test 17 | if viper.GetBool("auth.disabled") { 18 | modelSvc, err := service.GetService() 19 | if err != nil { 20 | utils.HandleErrorInternalServerError(c, err) 21 | return 22 | } 23 | u, err := modelSvc.GetUserByUsername(constants.DefaultAdminUsername, nil) 24 | if err != nil { 25 | utils.HandleErrorInternalServerError(c, err) 26 | return 27 | } 28 | c.Set(constants.UserContextKey, u) 29 | c.Next() 30 | return 31 | } 32 | 33 | // token string 34 | tokenStr := c.GetHeader("Authorization") 35 | 36 | // validate token 37 | u, err := userSvc.CheckToken(tokenStr) 38 | if err != nil { 39 | // validation failed, return error response 40 | utils.HandleErrorUnauthorized(c, errors.ErrorHttpUnauthorized) 41 | return 42 | } 43 | 44 | // set user in context 45 | c.Set(constants.UserContextKey, u) 46 | 47 | // validation success 48 | c.Next() 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /models/service/project_service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/errors" 5 | "github.com/crawlab-team/crawlab-core/interfaces" 6 | models2 "github.com/crawlab-team/crawlab-core/models/models" 7 | "github.com/crawlab-team/crawlab-db/mongo" 8 | "go.mongodb.org/mongo-driver/bson" 9 | "go.mongodb.org/mongo-driver/bson/primitive" 10 | ) 11 | 12 | func convertTypeProject(d interface{}, err error) (res *models2.Project, err2 error) { 13 | if err != nil { 14 | return nil, err 15 | } 16 | res, ok := d.(*models2.Project) 17 | if !ok { 18 | return nil, errors.ErrorModelInvalidType 19 | } 20 | return res, nil 21 | } 22 | 23 | func (svc *Service) GetProjectById(id primitive.ObjectID) (res *models2.Project, err error) { 24 | d, err := svc.GetBaseService(interfaces.ModelIdProject).GetById(id) 25 | return convertTypeProject(d, err) 26 | } 27 | 28 | func (svc *Service) GetProject(query bson.M, opts *mongo.FindOptions) (res *models2.Project, err error) { 29 | d, err := svc.GetBaseService(interfaces.ModelIdProject).Get(query, opts) 30 | return convertTypeProject(d, err) 31 | } 32 | 33 | func (svc *Service) GetProjectList(query bson.M, opts *mongo.FindOptions) (res []models2.Project, err error) { 34 | l, err := svc.GetBaseService(interfaces.ModelIdProject).GetList(query, opts) 35 | for _, doc := range l.GetModels() { 36 | d := doc.(*models2.Project) 37 | res = append(res, *d) 38 | } 39 | return res, nil 40 | } 41 | -------------------------------------------------------------------------------- /utils/cache.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-db/mongo" 6 | "go.mongodb.org/mongo-driver/bson" 7 | mongo2 "go.mongodb.org/mongo-driver/mongo" 8 | "time" 9 | ) 10 | 11 | func GetFromDbCache(key string, getFn func() (string, error)) (res string, err error) { 12 | col := mongo.GetMongoCol(constants.CacheColName) 13 | 14 | var d bson.M 15 | if err := col.Find(bson.M{ 16 | constants.CacheColKey: key, 17 | }, nil).One(&d); err != nil { 18 | if err != mongo2.ErrNoDocuments { 19 | return "", err 20 | } 21 | 22 | // get cache value 23 | res, err = getFn() 24 | if err != nil { 25 | return "", err 26 | } 27 | 28 | // save cache 29 | d = bson.M{ 30 | constants.CacheColKey: key, 31 | constants.CacheColValue: res, 32 | constants.CacheColTime: time.Now(), 33 | } 34 | if _, err := col.Insert(d); err != nil { 35 | return "", err 36 | } 37 | return res, nil 38 | } 39 | 40 | // type conversion 41 | r, ok := d[constants.CacheColValue] 42 | if !ok { 43 | if err := col.Delete(bson.M{constants.CacheColKey: key}); err != nil { 44 | return "", err 45 | } 46 | return GetFromDbCache(key, getFn) 47 | } 48 | res, ok = r.(string) 49 | if !ok { 50 | if err := col.Delete(bson.M{constants.CacheColKey: key}); err != nil { 51 | return "", err 52 | } 53 | return GetFromDbCache(key, getFn) 54 | } 55 | 56 | return res, nil 57 | } 58 | -------------------------------------------------------------------------------- /middlewares/auth_v2.go: -------------------------------------------------------------------------------- 1 | package middlewares 2 | 3 | import ( 4 | "github.com/crawlab-team/crawlab-core/constants" 5 | "github.com/crawlab-team/crawlab-core/errors" 6 | "github.com/crawlab-team/crawlab-core/models/service" 7 | "github.com/crawlab-team/crawlab-core/user" 8 | "github.com/crawlab-team/crawlab-core/utils" 9 | "github.com/gin-gonic/gin" 10 | "github.com/spf13/viper" 11 | ) 12 | 13 | func AuthorizationMiddlewareV2() gin.HandlerFunc { 14 | userSvc, _ := user.GetUserServiceV2() 15 | return func(c *gin.Context) { 16 | // disable auth for test 17 | if viper.GetBool("auth.disabled") { 18 | modelSvc, err := service.GetService() 19 | if err != nil { 20 | utils.HandleErrorInternalServerError(c, err) 21 | return 22 | } 23 | u, err := modelSvc.GetUserByUsername(constants.DefaultAdminUsername, nil) 24 | if err != nil { 25 | utils.HandleErrorInternalServerError(c, err) 26 | return 27 | } 28 | c.Set(constants.UserContextKey, u) 29 | c.Next() 30 | return 31 | } 32 | 33 | // token string 34 | tokenStr := c.GetHeader("Authorization") 35 | 36 | // validate token 37 | u, err := userSvc.CheckToken(tokenStr) 38 | if err != nil { 39 | // validation failed, return error response 40 | utils.HandleErrorUnauthorized(c, errors.ErrorHttpUnauthorized) 41 | return 42 | } 43 | 44 | // set user in context 45 | c.Set(constants.UserContextKey, u) 46 | 47 | // validation success 48 | c.Next() 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /entity/es.go: -------------------------------------------------------------------------------- 1 | package entity 2 | 3 | /* ElasticsearchResponseData JSON format 4 | { 5 | "took" : 6, 6 | "timed_out" : false, 7 | "_shards" : { 8 | "total" : 1, 9 | "successful" : 1, 10 | "skipped" : 0, 11 | "failed" : 0 12 | }, 13 | "hits" : { 14 | "total" : { 15 | "value" : 60, 16 | "relation" : "eq" 17 | }, 18 | "max_score" : 1.0, 19 | "hits" : [ 20 | { 21 | "_index" : "test_table", 22 | "_id" : "c39ad9a2-9a37-49fb-b7ea-f1b55913e0af", 23 | "_score" : 1.0, 24 | "_source" : { 25 | "_tid" : "62524ac7f5f99e7ef594de64", 26 | "author" : "James Baldwin", 27 | "tags" : [ 28 | "love" 29 | ], 30 | "text" : "“Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up.”" 31 | } 32 | } 33 | ] 34 | } 35 | } 36 | */ 37 | 38 | type ElasticsearchResponseData struct { 39 | Took int64 `json:"took"` 40 | Timeout bool `json:"timeout"` 41 | Hits struct { 42 | Total struct { 43 | Value int64 `json:"value"` 44 | Relation string `json:"relation"` 45 | } `json:"total"` 46 | MaxScore float64 `json:"max_score"` 47 | Hits []struct { 48 | Index string `json:"_index"` 49 | Id string `json:"_id"` 50 | Score float64 `json:"_score"` 51 | Source interface{} `json:"_source"` 52 | } `json:"hits"` 53 | } `json:"hits"` 54 | } 55 | --------------------------------------------------------------------------------