├── .dockerignore ├── .gitignore ├── Dockerfile ├── Makefile ├── README.md ├── cmd └── start-concurrent-engine.go ├── demo.png ├── engine ├── concurrent.go └── type.go ├── fetcher ├── downloader.go └── fetcher.go ├── go.mod ├── go.sum ├── model └── bilibili.go ├── parser ├── aid.go ├── cid.go └── video.go ├── persist ├── type.go ├── videodiscard.go └── videomerge.go ├── scheduler └── scheduler.go └── tool ├── key.go └── path.go /.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Example user template template 3 | ### Example user template 4 | 5 | # IntelliJ project files 6 | .idea 7 | *.iml 8 | download 9 | README.md 10 | .gitignore 11 | out 12 | gen 13 | ### Go template 14 | # Binaries for programs and plugins 15 | *.exe 16 | *.exe~ 17 | *.dll 18 | *.so 19 | *.dylib 20 | 21 | # Test binary, built with `go test -c` 22 | *.test 23 | 24 | # Output of the go coverage tool, specifically when used with LiteIDE 25 | *.out 26 | 27 | # Dependency directories (remove the comment below to include it) 28 | # vendor/ 29 | 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Example user template template 3 | ### Example user template 4 | 5 | # IntelliJ project files 6 | .idea 7 | *.iml 8 | download 9 | out 10 | gen 11 | ### Go template 12 | # Binaries for programs and plugins 13 | *.exe 14 | *.exe~ 15 | *.dll 16 | *.so 17 | *.dylib 18 | 19 | # Test binary, built with `go test -c` 20 | *.test 21 | 22 | # Output of the go coverage tool, specifically when used with LiteIDE 23 | *.out 24 | 25 | # Dependency directories (remove the comment below to include it) 26 | # vendor/ 27 | 28 | /cmd/myapp.syso 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.12-alpine as build-env 2 | 3 | RUN mkdir /simple-golang-crawler 4 | WORKDIR /simple-golang-crawler 5 | COPY go.mod . 6 | COPY go.sum . 7 | ENV GOPROXY="https://goproxy.io" GO111MODULE=on 8 | RUN go mod download 9 | COPY . . 10 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o /go/bin/simple-golang-crawler cmd/start-concurrent-engine.go 11 | 12 | 13 | FROM alpine:3.7 14 | RUN apk update \ 15 | && apk upgrade \ 16 | && apk add --no-cache \ 17 | ca-certificates \ 18 | && update-ca-certificates 2>/dev/null || true \ 19 | && apk add --no-cache ffmpeg 20 | COPY --from=build-env /go/bin/simple-golang-crawler /go/bin/simple-golang-crawler 21 | ENTRYPOINT ["/go/bin/simple-golang-crawler"] 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APP_NAME=downloader 2 | 3 | build: ## Build the container 4 | docker build -t $(APP_NAME) . 5 | 6 | build-nc: ## Build the container without caching 7 | docker build --no-cache -t $(APP_NAME) . 8 | run: 9 | ## docker run -it -v ${CURDIR}/download:/download $(APP_NAME) 10 | docker run -it -v ${CURDIR}/output:/output $(APP_NAME) 11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastestBibiliDownloader 2 | 3 | ## 原项目地址:**[ FastestBilibiliDownloader](https://github.com/sodaling/FastestBilibiliDownloader)** 4 | 5 | > 项目仅用于学习交流,请勿用于任何商业用途! 6 | 7 | ## ⭐新增 8 | 9 | 自动解析 **想要下载的视频网址 / UP主个人主页网址**,支持: 10 | 11 | - [x] [https://www.bilibili.com/video/**旧版的av号**/](#),av号是以`av`开头的**一串数字** 12 | - [x] [https://www.bilibili.com/video/**新版的BV号**/](#),BV号是以`BV`开头的**一串字符** 13 | - [x] [https://space.bilibili.com/**UP主的ID**/](#),UP主的ID是**一串数字** 14 | 15 | ![demo.png](demo.png) 16 | 17 | ## ⚠较原项目的删减 18 | 19 | + 由于FFmeg拼接、转化耗时太长,故移除了 `video merge`中的功能 。下载后的视频为`.flv`格式。 20 | 21 | ----- 22 | 23 | ## 👍原项目说明 24 | 25 | **东半球第二快的Bilibili.com(B站)视频下载器!** 26 | 27 | 如果你想下载b站某个up主的所有视频,而且要飞快的那种,那么你可以试试这个项目-.- 28 | 29 | 目前提供两个(三个)视频下载方案: 30 | 31 | 1. 通过视频的aid,下载单个视频. 32 | 2. 通过up主的upid(b站叫mid),下载这个up主所投稿的所有视频. 33 | 3. 通过视频的BVid,下载单个视频. **(new)** 34 | 35 | 36 | > 特性: 37 | > 38 | > Github上下载b站视频代码已经有很多了.那么本下载器的特点是啥呢? 39 | > 40 | > 因为这是用Golang写的,当然了,也就利用了Golang的特性:goroutine. 41 | > 42 | > 简单来说,特点就是: 43 | > 44 | > **快!贼快!下载的视频越多越快!** 45 | > 46 | > * 当单个aid视频分了若干个part时候,或者当你选了下载up主下所有视频时候.多个视频将会同时并行下载,跑满你的网速绝对不是问题. 47 | > * 下载与合并视频并行处理.如果视频分了多个part,下载完成的同时就会立即合并.该视频合并处理和其他与其他下载和合并同时进行且互不影响. 48 | 49 | ### 运行 50 | 51 | 下载的临时视频会存放在运行路径下的**download**文件夹下,每个视频(aid)一个文件夹,以**aid_视频标题**为文件夹名称。 52 | 最终的视频会存放在运行路径下的**output**文件夹下,每个aid一个文件夹,以**视频标题**为文件夹名称。 53 | ```shell 54 | go run cmd/start-concurrent-engine.go -h # 获得参数 55 | ``` 56 | 57 | 58 | 59 | #### 使用Golang编译环境 60 | 61 | 1. 安装Golang编译环境 62 | * Ubuntu 63 | ```shell 64 | sudo apt install golang 65 | ``` 66 | 67 | 1.1 如果你在中国大陆,那么你大概率可能或许maybe需要配置代理才能顺利进行下一步。 68 | ```shell 69 | go env -w GO111MODULE=on #启用Go Moledules 70 | go env -w GOPROXY=https://goproxy.io #使用官方代理 71 | ``` 72 | 73 | 2. 一次性运行FastestBibiliDownloader 74 | 程序入口在**cmd/start-concurrent-engine.go**,只需要 75 | ```shell 76 | go run cmd/start-concurrent-engine.go -t (aid/bvid/upid) -v (id) 77 | ``` 78 | 首次运行会花时间下一大堆东西,然后按提示操作即可。 79 | 注意,合并视频需要FFmpeg的支持。不然只会下载并不会自动合并。FFmpeg的安装教程请咨询搜索引擎。 80 | 81 | 3. 编译FastestBibiliDownloader 82 | ```shell 83 | go build cmd/start-concurrent-engine.go -t (aid/bvid/upid) -v (id) 84 | ``` 85 | 之后直接运行./start-concurrent-engine即可。 86 | 87 | #### 如果你没有Golang编译环境,或者没有FFmeg环境。那么推荐用docker方式运行。已经写好了dockefile和makefile。你只需要: 88 | 89 | ```shell 90 | $ cd FastestBilibiliDownloader 91 | $ make build #下载镜像 92 | $ make run #运行镜像 93 | ``` 94 | 95 | 96 | 97 | #### 后续有空会打包bin文件到release的。 98 | 99 | ### 感谢 100 | 101 | 1. engine部分的框架参考**ccmouse**的思路,后面自己调整了整体架构部分,非常感谢。 102 | 2. [bilibili-downloader](https://github.com/stevenjoezhang/bilibili-downloader):b站请求视频的API等等都是从这位的代码获得,本身的py代码注释也非常清晰,非常感谢。 103 | 3. @sshwy帮忙抓虫纠错 104 | 4. @justin201802不厌其烦的帮忙修改 105 | 106 | >欢迎各位提pr或者fork或者什么都行,能帮助到你的话欢迎star!疫情无聊在家磨时间的产物,粗糙了一点,欢迎各位完善~ 107 | 108 | -------------------------------------------------------------------------------- /cmd/start-concurrent-engine.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "regexp" 8 | "simple-golang-crawler/engine" 9 | "simple-golang-crawler/parser" 10 | "simple-golang-crawler/persist" 11 | "simple-golang-crawler/scheduler" 12 | "strconv" 13 | "sync" 14 | ) 15 | 16 | func main() { 17 | itemProcessFun := persist.GetItemProcessFun() 18 | var err error 19 | var wg sync.WaitGroup 20 | wg.Add(1) 21 | itemChan, err := itemProcessFun(&wg) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | var urlInput string 27 | 28 | var idType = "else" 29 | var aid int64 30 | var upid int64 31 | var bvid string 32 | 33 | var params []string 34 | 35 | var req *engine.Request 36 | 37 | fmt.Println("欢迎使用B站视频下载器 v1.0.1") 38 | fmt.Println("项目地址: https://github.com/laorange/FastestBilibiliDownloader") 39 | fmt.Println("原项目地址:https://github.com/sodaling/FastestBilibiliDownloader") 40 | fmt.Println("\n\n支持以下几种格式的输入:") 41 | fmt.Println("· https://www.bilibili.com/video/旧版的av号/ | av号 是以`av`开头的一串数字") 42 | fmt.Println("· https://www.bilibili.com/video/新版的BV号/ | BV号 是以`BV`开头的一串字符") 43 | fmt.Println("· https://space.bilibili.com/UP主的ID/ | UP主的ID 是一串数字") 44 | fmt.Print("\n\n请输入想要下载的视频网址/up主个人主页网址: ") 45 | fmt.Scan(&urlInput) 46 | 47 | // bvid 48 | bvidRegexp := regexp.MustCompile(`/?(BV\w+)[/?]?`) 49 | params = bvidRegexp.FindStringSubmatch(urlInput) 50 | if params != nil { 51 | idType = "bvid" 52 | bvid = params[1] 53 | } 54 | 55 | // aid 56 | aidRegexp := regexp.MustCompile(`/?(av\d+)/?`) 57 | params = aidRegexp.FindStringSubmatch(urlInput) 58 | if params != nil { 59 | idType = "aid" 60 | aid, _ = strconv.ParseInt(params[1], 10, 64) 61 | } 62 | 63 | // upid 64 | upidRegexp := regexp.MustCompile(`space.bilibili.com/(\d+)/?`) 65 | params = upidRegexp.FindStringSubmatch(urlInput) 66 | if params != nil { 67 | idType = "upid" 68 | upid, _ = strconv.ParseInt(params[1], 10, 64) 69 | } 70 | 71 | if idType == "aid" { 72 | req = parser.GetRequestByAid(aid) 73 | } else if idType == "bvid" { 74 | aid = parser.Bv2av(bvid) 75 | req = parser.GetRequestByAid(aid) 76 | } else if idType == "upid" { 77 | req = parser.GetRequestByUpId(upid) 78 | } else { 79 | req = nil 80 | log.Fatalln("您输入的网址无法解析,请查证后重试") 81 | os.Exit(1) 82 | } 83 | 84 | queueScheduler := scheduler.NewConcurrentScheduler() 85 | conEngine := engine.NewConcurrentEngine(30, queueScheduler, itemChan) 86 | log.Println("开始下载...") 87 | conEngine.Run(req) 88 | wg.Wait() 89 | log.Print("所有视频均已下载完成。按 Ctrl+C 来退出程序。") 90 | var eof string 91 | fmt.Scan(&eof) 92 | } 93 | -------------------------------------------------------------------------------- /demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodaling/FastestBilibiliDownloader/036fd690b4430d7ad3e9fa4c6842df4dfaa17948/demo.png -------------------------------------------------------------------------------- /engine/concurrent.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | ) 7 | 8 | var _urlVisited = make(map[string]struct{}) 9 | 10 | type ConcurrentEngine struct { 11 | WorkerCount int 12 | Scheduler Scheduler 13 | ItemChan chan *Item 14 | } 15 | 16 | func NewConcurrentEngine(workerCount int, scheduler Scheduler, itemChan chan *Item) *ConcurrentEngine { 17 | return &ConcurrentEngine{WorkerCount: workerCount, Scheduler: scheduler, ItemChan: itemChan} 18 | } 19 | 20 | type Scheduler interface { 21 | Run(context.Context) 22 | GetWorkerChan() chan *Request 23 | Submit(*Request) 24 | WorkerReadyNotifier 25 | } 26 | 27 | type WorkerReadyNotifier interface { 28 | Ready(chan *Request) 29 | } 30 | 31 | func (c *ConcurrentEngine) Run(seed ...*Request) { 32 | requestCount := 0 33 | resultChan := make(chan ParseResult) 34 | ctx, cancel := context.WithCancel(context.Background()) 35 | c.Scheduler.Run(ctx) 36 | var wg sync.WaitGroup 37 | 38 | for i := 0; i < c.WorkerCount; i++ { 39 | CreateWorker(resultChan, c.Scheduler.GetWorkerChan(), c.Scheduler) 40 | } 41 | 42 | for _, req := range seed { 43 | hasVisited(req.Url) 44 | requestCount += 1 45 | c.Scheduler.Submit(req) 46 | } 47 | 48 | for { 49 | result := <-resultChan 50 | 51 | for _, item := range result.Items { 52 | wg.Add(1) 53 | go func(item *Item) { 54 | c.ItemChan <- item 55 | wg.Done() 56 | }(item) 57 | } 58 | 59 | for _, req := range result.Requests { 60 | if hasVisited(req.Url) { 61 | continue 62 | } else { 63 | requestCount += 1 64 | c.Scheduler.Submit(req) 65 | } 66 | } 67 | requestCount -= 1 68 | if requestCount == 0 { 69 | break 70 | } 71 | } 72 | 73 | cancel() 74 | wg.Wait() 75 | close(c.ItemChan) 76 | } 77 | 78 | func hasVisited(url string) bool { 79 | if _, ok := _urlVisited[url]; ok { 80 | return true 81 | } else { 82 | _urlVisited[url] = struct{}{} 83 | } 84 | return false 85 | 86 | } 87 | 88 | func CreateWorker(out chan ParseResult, in chan *Request, notifier WorkerReadyNotifier) { 89 | go func() { 90 | for { 91 | notifier.Ready(in) 92 | req := <-in 93 | ret, err := work(req) 94 | if err != nil { 95 | var errRet ParseResult 96 | out <- errRet 97 | continue 98 | } 99 | out <- ret 100 | } 101 | }() 102 | } 103 | 104 | func work(request *Request) (ParseResult, error) { 105 | content, ok := request.FetchFun(request.Url) 106 | if ok != nil { 107 | return ParseResult{}, ok 108 | } 109 | result := request.ParseFunction(content, request.Url) 110 | return result, nil 111 | } 112 | -------------------------------------------------------------------------------- /engine/type.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import "simple-golang-crawler/fetcher" 4 | 5 | type ParseFunc func(contents []byte, url string) ParseResult 6 | 7 | type Request struct { 8 | Url string 9 | ParseFunction ParseFunc 10 | FetchFun fetcher.FetchFun 11 | } 12 | 13 | func NewRequest(url string, parseFunction ParseFunc, fetchFun fetcher.FetchFun) *Request { 14 | return &Request{Url: url, ParseFunction: parseFunction, FetchFun: fetchFun} 15 | } 16 | 17 | type ParseResult struct { 18 | Requests []*Request 19 | Items []*Item 20 | } 21 | 22 | type Item struct { 23 | Payload interface{} 24 | } 25 | 26 | func NewItem(payload interface{}) *Item { 27 | return &Item{Payload: payload} 28 | } 29 | -------------------------------------------------------------------------------- /fetcher/downloader.go: -------------------------------------------------------------------------------- 1 | package fetcher 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | "os" 9 | "path/filepath" 10 | "simple-golang-crawler/model" 11 | "simple-golang-crawler/tool" 12 | "time" 13 | ) 14 | 15 | var _startUrlTem = "https://api.bilibili.com/x/web-interface/view?aid=%d" 16 | 17 | func GenVideoFetcher(video *model.Video) FetchFun { 18 | referer := fmt.Sprintf(_startUrlTem, video.ParCid.ParAid.Aid) 19 | for i := int64(1); i <= video.ParCid.Page; i++ { 20 | referer += fmt.Sprintf("/?p=%d", i) 21 | } 22 | 23 | return func(url string) (bytes []byte, err error) { 24 | <-_rateLimiter.C 25 | client := http.Client{CheckRedirect: genCheckRedirectfun(referer)} 26 | 27 | request, err := http.NewRequest("GET", url, nil) 28 | if err != nil { 29 | log.Fatalln(url, err) 30 | return nil, err 31 | } 32 | request.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0") 33 | request.Header.Set("Accept", "*/*") 34 | request.Header.Set("Accept-Language", "en-US,en;q=0.5") 35 | request.Header.Set("Accept-Encoding", "gzip, deflate, br") 36 | request.Header.Set("Range", "bytes=0-") 37 | request.Header.Set("Referer", referer) 38 | request.Header.Set("Origin", "https://www.bilibili.com") 39 | request.Header.Set("Connection", "keep-alive") 40 | 41 | resp, err := client.Do(request) 42 | if err != nil { 43 | log.Fatalf("下载 %d 时出错, 错误信息:%s", video.ParCid.Cid, err) 44 | return nil, err 45 | } 46 | 47 | if resp.StatusCode != http.StatusPartialContent { 48 | log.Fatalf("下载 %d 时出错, 错误码:%d", video.ParCid.Cid, resp.StatusCode) 49 | return nil, fmt.Errorf("错误码: %d", resp.StatusCode) 50 | } 51 | defer resp.Body.Close() 52 | 53 | aidPath := tool.GetAidFileDownloadDir(video.ParCid.ParAid.Aid, video.ParCid.ParAid.Title) 54 | filename := fmt.Sprintf("%d_%d.flv", video.ParCid.Page, video.Order) 55 | file, err := os.Create(filepath.Join(aidPath, filename)) 56 | if err != nil { 57 | log.Fatalln(err) 58 | os.Exit(1) 59 | } 60 | defer file.Close() 61 | 62 | log.Println("正在下载:" + video.ParCid.ParAid.Title + "\\" + filename) 63 | _, err = io.Copy(file, resp.Body) 64 | if err != nil { 65 | log.Printf("下载失败 aid: %d, cid: %d, title: %s, part: %s", 66 | video.ParCid.ParAid.Aid, video.ParCid.Cid, video.ParCid.ParAid.Title, video.ParCid.Part) 67 | log.Println("错误信息:", err) 68 | 69 | // request again 70 | go requestLater(file, resp, video) 71 | return nil, err 72 | } 73 | log.Println("下载完成:" + video.ParCid.ParAid.Title + "\\" + filename) 74 | 75 | return nil, nil 76 | } 77 | } 78 | 79 | func genCheckRedirectfun(referer string) func(req *http.Request, via []*http.Request) error { 80 | return func(req *http.Request, via []*http.Request) error { 81 | req.Header.Set("Referer", referer) 82 | return nil 83 | } 84 | } 85 | 86 | func requestLater(file *os.File, resp *http.Response, video *model.Video) error { 87 | 88 | log.Println("连接失败,30秒后重试 (Unable to open the file due to the remote host, request in 30 seconds)") 89 | time.Sleep(time.Second * 30) 90 | 91 | _, err := io.Copy(file, resp.Body) 92 | if err != nil { 93 | log.Printf("下载失败 aid: %d, cid: %d, title: %s, part: %s again", 94 | video.ParCid.ParAid.Aid, video.ParCid.Cid, video.ParCid.ParAid.Title, video.ParCid.Part) 95 | } 96 | return err 97 | } 98 | -------------------------------------------------------------------------------- /fetcher/fetcher.go: -------------------------------------------------------------------------------- 1 | package fetcher 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | "time" 10 | 11 | "golang.org/x/net/html/charset" 12 | "golang.org/x/text/encoding" 13 | "golang.org/x/text/encoding/unicode" 14 | "golang.org/x/text/transform" 15 | ) 16 | 17 | var _rateLimiter = time.NewTicker(100 * time.Microsecond) 18 | 19 | type FetchFun func(url string) ([]byte, error) 20 | 21 | func DefaultFetcher(url string) ([]byte, error) { 22 | <-_rateLimiter.C 23 | client := http.DefaultClient 24 | request, err := http.NewRequest("GET", url, nil) 25 | if err != nil { 26 | log.Fatalf("fetch err while request :%s,and the err is %s", url, err) 27 | return nil, err 28 | } 29 | request.Header.Add("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0") 30 | 31 | resp, err := client.Do(request) 32 | if err != nil { 33 | log.Fatalf("fetch err while request :%s,and the err is %s", url, err) 34 | return nil, err 35 | } 36 | 37 | if resp.StatusCode != http.StatusOK { 38 | return nil, fmt.Errorf("wrong status code: %d", resp.StatusCode) 39 | } 40 | 41 | bodyReader := bufio.NewReader(resp.Body) 42 | 43 | e := determineEncoding(bodyReader) 44 | utf8Reader := transform.NewReader(bodyReader, e.NewDecoder()) 45 | defer resp.Body.Close() 46 | return ioutil.ReadAll(utf8Reader) 47 | } 48 | 49 | func determineEncoding(reader *bufio.Reader) encoding.Encoding { 50 | bytes, err := reader.Peek(1024) 51 | if err != nil { 52 | return unicode.UTF8 53 | } 54 | e, _, _ := charset.DetermineEncoding(bytes, "") 55 | return e 56 | } 57 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module simple-golang-crawler 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/go-cmd/cmd v1.2.0 7 | github.com/tidwall/gjson v1.5.0 8 | github.com/tidwall/pretty v1.0.1 // indirect 9 | golang.org/x/crypto v0.0.0-20200210222208-86ce3cb69678 // indirect 10 | golang.org/x/mod v0.2.0 // indirect 11 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2 12 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e // indirect 13 | golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 // indirect 14 | golang.org/x/text v0.3.2 15 | golang.org/x/tools v0.0.0-20200211045251-2de505fc5306 // indirect 16 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/go-cmd/cmd v1.2.0 h1:Aohz0ZG0nQbvT4z55Mh+fdegX48GSAXL3cSsbYxRfvI= 2 | github.com/go-cmd/cmd v1.2.0/go.mod h1:XgKkd0L6sv9WcYV0FS8RfG1RJCSTVHTsLeAD2pTgHt0= 3 | github.com/go-test/deep v1.0.5/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= 4 | github.com/tidwall/gjson v1.5.0 h1:QCssIUI7J0RStkzIcI4A7O6P8rDA5wi5IPf70uqKSxg= 5 | github.com/tidwall/gjson v1.5.0/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= 6 | github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc= 7 | github.com/tidwall/match v1.0.1/go.mod h1:LujAq0jyVjBy028G1WhWfIzbpQfMO8bBZ6Tyb0+pL9E= 8 | github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= 9 | github.com/tidwall/pretty v1.0.1 h1:WE4RBSZ1x6McVVC8S/Md+Qse8YUv6HRObAx6ke00NY8= 10 | github.com/tidwall/pretty v1.0.1/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= 11 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 12 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 13 | golang.org/x/crypto v0.0.0-20200210222208-86ce3cb69678/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 14 | golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= 15 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 16 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 17 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 18 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= 19 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 20 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 21 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 22 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 23 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 24 | golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 25 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 26 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 27 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 28 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 29 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 30 | golang.org/x/tools v0.0.0-20200211045251-2de505fc5306/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 31 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 32 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 33 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 34 | -------------------------------------------------------------------------------- /model/bilibili.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import "sync" 4 | 5 | type VideoAid struct { 6 | Aid int64 7 | cidMap map[int64]*VideoCid 8 | totalPage int64 9 | Title string 10 | Quality int64 11 | cidLock sync.RWMutex 12 | pageLock sync.RWMutex 13 | } 14 | 15 | func (videoAid *VideoAid) AddCid(videoCid *VideoCid) { 16 | videoAid.cidLock.Lock() 17 | defer videoAid.cidLock.Unlock() 18 | videoAid.cidMap[videoCid.Cid] = videoCid 19 | } 20 | 21 | func (videoAid *VideoAid) GetCid(cid int64) *VideoCid { 22 | videoAid.cidLock.RLock() 23 | defer videoAid.cidLock.RUnlock() 24 | return videoAid.cidMap[cid] 25 | } 26 | func (videoAid *VideoAid) SetPage(num int64) { 27 | videoAid.pageLock.Lock() 28 | defer videoAid.pageLock.Unlock() 29 | videoAid.totalPage = num 30 | } 31 | 32 | func (videoAid *VideoAid) GetPage() int64 { 33 | videoAid.pageLock.RLock() 34 | defer videoAid.pageLock.RUnlock() 35 | return videoAid.totalPage 36 | } 37 | 38 | func NewVideoAidInfo(aid int64, title string) *VideoAid { 39 | return &VideoAid{Aid: aid, Title: title, cidMap: make(map[int64]*VideoCid)} 40 | } 41 | 42 | type VideoCid struct { 43 | Cid int64 44 | ParAid *VideoAid 45 | Page int64 46 | Part string 47 | AllOrder int64 48 | } 49 | 50 | type Video struct { 51 | Order int64 52 | ParCid *VideoCid 53 | } 54 | 55 | func NewVideoCidInfo(cid int64, parAid *VideoAid, page int64, part string) *VideoCid { 56 | return &VideoCid{Cid: cid, ParAid: parAid, Page: page, Part: part} 57 | } 58 | -------------------------------------------------------------------------------- /parser/aid.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "simple-golang-crawler/engine" 6 | "simple-golang-crawler/fetcher" 7 | "simple-golang-crawler/model" 8 | "simple-golang-crawler/tool" 9 | "math" 10 | "github.com/tidwall/gjson" 11 | ) 12 | 13 | var _getAidUrlTemp = "https://api.bilibili.com/x/space/arc/search?mid=%d&ps=30&tid=0&pn=%d&keyword=&order=pubdate&jsonp=jsonp" 14 | var _getCidUrlTemp = "https://api.bilibili.com/x/web-interface/view?aid=%d" 15 | //var _getCidUrlTemp = "https://api.bilibili.com/x/player/pagelist?aid=%d" 16 | 17 | var table string = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF" 18 | var s = [6]int{11, 10, 3, 8, 4, 6} 19 | var xor = 177451812 20 | var add = 8728348608 21 | var tr map[string]int 22 | 23 | 24 | func UpSpaceParseFun(contents []byte, url string) engine.ParseResult { 25 | 26 | var retParseResult engine.ParseResult 27 | value := gjson.GetManyBytes(contents, "data.list.vlist", "data.page") 28 | 29 | var upid int64 30 | retParseResult.Requests, upid = getAidDetailReqList(value[0]) 31 | retParseResult.Requests = append(retParseResult.Requests, getNewBilibiliUpSpaceReqList(value[1], upid)...) 32 | 33 | return retParseResult 34 | 35 | } 36 | 37 | func getAidDetailReqList(pageInfo gjson.Result) ([]*engine.Request, int64) { 38 | 39 | var retRequests []*engine.Request 40 | var upid int64 41 | for _, i := range pageInfo.Array() { 42 | aid := i.Get("aid").Int() 43 | upid = i.Get("mid").Int() 44 | title := i.Get("title").String() 45 | title = tool.TitleEdit(title) // remove special characters 46 | reqUrl := fmt.Sprintf(_getCidUrlTemp, aid) 47 | videoAid := model.NewVideoAidInfo(aid, title) 48 | reqParseFunction := GenGetAidChildrenParseFun(videoAid) //子视频 49 | req := engine.NewRequest(reqUrl, reqParseFunction, fetcher.DefaultFetcher) 50 | retRequests = append(retRequests, req) 51 | } 52 | return retRequests, upid 53 | } 54 | 55 | // 访问up主的时候 需要翻页 56 | func getNewBilibiliUpSpaceReqList(pageInfo gjson.Result, upid int64) []*engine.Request { 57 | 58 | var retRequests []*engine.Request 59 | 60 | count := pageInfo.Get("count").Int() 61 | pn := pageInfo.Get("pn").Int() 62 | ps := pageInfo.Get("ps").Int() 63 | var extraPage int64 64 | if count%ps > 0 { 65 | extraPage = 1 66 | } 67 | totalPage := count/ps + extraPage 68 | for i := int64(1); i <= totalPage; i++ { 69 | if i == pn { 70 | continue 71 | } 72 | reqUrl := fmt.Sprintf(_getAidUrlTemp, upid, i) 73 | req := engine.NewRequest(reqUrl, UpSpaceParseFun, fetcher.DefaultFetcher) 74 | retRequests = append(retRequests, req) 75 | } 76 | return retRequests 77 | } 78 | 79 | func GetRequestByUpId(upid int64) *engine.Request { 80 | 81 | reqUrl := fmt.Sprintf(_getAidUrlTemp, upid, 1) 82 | return engine.NewRequest(reqUrl, UpSpaceParseFun, fetcher.DefaultFetcher) 83 | } 84 | 85 | // source code: https://blog.csdn.net/dotastar00/article/details/108805779 86 | func Bv2av(x string) int64 { 87 | tr = make(map[string]int) 88 | for i:=0; i<58; i++ { 89 | tr[string(table[i])] = i 90 | } 91 | r := 0 92 | for i:=0; i<6; i++ { 93 | r += tr[string(x[s[i]])] * int(math.Pow(float64(58), float64(i))) 94 | } 95 | return int64((r - add) ^ xor) 96 | } 97 | -------------------------------------------------------------------------------- /parser/cid.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | "github.com/tidwall/gjson" 7 | "simple-golang-crawler/engine" 8 | "simple-golang-crawler/fetcher" 9 | "simple-golang-crawler/model" 10 | "simple-golang-crawler/tool" 11 | "strconv" 12 | ) 13 | 14 | var _entropy = "rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg" 15 | var _paramsTemp = "appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=" 16 | var _playApiTemp = "https://interface.bilibili.com/v2/playurl?%s&sign=%s" 17 | var _quality = "80" 18 | 19 | func GenGetAidChildrenParseFun(videoAid *model.VideoAid) engine.ParseFunc { 20 | return func(contents []byte, url string) engine.ParseResult { 21 | 22 | var retParseResult engine.ParseResult 23 | if videoAid.Title == strconv.FormatInt(videoAid.Aid, 10) { // call from aid-related, we need to get the title of the video 24 | title := gjson.GetBytes(contents, "data.title").String() 25 | title = tool.TitleEdit(title) // remove special characters 26 | videoAid.Title = title 27 | } 28 | data := gjson.GetBytes(contents, "data.pages").Array() 29 | fmt.Println("即将开始下载:", videoAid.Title) 30 | appKey, sec := tool.GetAppKey(_entropy) 31 | 32 | var videoTotalPage int64 33 | for _, i := range data { 34 | cid := i.Get("cid").Int() 35 | page := i.Get("page").Int() 36 | part := i.Get("part").String() 37 | part = tool.TitleEdit(part) //remove special characters 38 | videoCid := model.NewVideoCidInfo(cid, videoAid, page, part) 39 | videoTotalPage += 1 40 | cidStr := strconv.FormatInt(videoCid.Cid, 10) 41 | 42 | params := fmt.Sprintf(_paramsTemp, appKey, cidStr, _quality, _quality) 43 | chksum := fmt.Sprintf("%x", md5.Sum([]byte(params+sec))) 44 | 45 | urlApi := fmt.Sprintf(_playApiTemp, params, chksum) 46 | 47 | req := engine.NewRequest(urlApi, GenVideoDownloadParseFun(videoCid), fetcher.DefaultFetcher) 48 | retParseResult.Requests = append(retParseResult.Requests, req) 49 | } 50 | 51 | videoAid.SetPage(videoTotalPage) 52 | item := engine.NewItem(videoAid) 53 | retParseResult.Items = append(retParseResult.Items, item) 54 | 55 | return retParseResult 56 | } 57 | } 58 | 59 | func GetRequestByAid(aid int64) *engine.Request { 60 | reqUrl := fmt.Sprintf(_getCidUrlTemp, aid) 61 | videoAid := model.NewVideoAidInfo(aid, fmt.Sprintf("%d", aid)) 62 | reqParseFunction := GenGetAidChildrenParseFun(videoAid) 63 | req := engine.NewRequest(reqUrl, reqParseFunction, fetcher.DefaultFetcher) 64 | return req 65 | } 66 | -------------------------------------------------------------------------------- /parser/video.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | 5 | "simple-golang-crawler/engine" 6 | "simple-golang-crawler/fetcher" 7 | "simple-golang-crawler/model" 8 | 9 | "github.com/tidwall/gjson" 10 | ) 11 | 12 | // 大视频所以分成了不同部分提交,但是最终显示的只有一个视频文件 13 | func GenVideoDownloadParseFun(videoCid *model.VideoCid) engine.ParseFunc { 14 | return func(contents []byte, url string) engine.ParseResult { 15 | retParseResult := engine.ParseResult{} 16 | 17 | durlSlice := gjson.GetBytes(contents, "durl").Array() 18 | videoCid.AllOrder = int64(len(durlSlice)) 19 | item := engine.NewItem(videoCid) 20 | retParseResult.Items = append(retParseResult.Items, item) 21 | 22 | for _, i := range durlSlice { 23 | video := &model.Video{Order: i.Get("order").Int(), ParCid: videoCid} 24 | videoUrl := i.Get("url").String() 25 | req := engine.NewRequest(videoUrl, recordCidParseFun(video), fetcher.GenVideoFetcher(video)) 26 | retParseResult.Requests = append(retParseResult.Requests, req) 27 | } 28 | return retParseResult 29 | } 30 | } 31 | 32 | func recordCidParseFun(Video *model.Video) engine.ParseFunc { 33 | return func(contents []byte, url string) engine.ParseResult { 34 | var retResult engine.ParseResult 35 | item := engine.NewItem(Video) 36 | retResult.Items = append(retResult.Items, item) 37 | return retResult 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /persist/type.go: -------------------------------------------------------------------------------- 1 | package persist 2 | 3 | import ( 4 | "fmt" 5 | "simple-golang-crawler/engine" 6 | "simple-golang-crawler/tool" 7 | "sync" 8 | ) 9 | 10 | type GetItemChan func(wg *sync.WaitGroup) (chan *engine.Item, error) 11 | 12 | func GetItemProcessFun() GetItemChan { 13 | var itemProcessFun GetItemChan 14 | if !tool.CheckFfmegStatus() { 15 | fmt.Println("Can't locate your ffmeg.The video your download can't be merged") 16 | itemProcessFun = VideoItemCleaner 17 | } else { 18 | itemProcessFun = VideoItemProcessor 19 | } 20 | 21 | return itemProcessFun 22 | } 23 | -------------------------------------------------------------------------------- /persist/videodiscard.go: -------------------------------------------------------------------------------- 1 | package persist 2 | 3 | import ( 4 | "log" 5 | "simple-golang-crawler/engine" 6 | "sync" 7 | ) 8 | 9 | func VideoItemCleaner(wgOutside *sync.WaitGroup) (chan *engine.Item, error) { 10 | out := make(chan *engine.Item) 11 | go func() { 12 | defer wgOutside.Done() 13 | itemCount := 0 14 | for item := range out { 15 | log.Printf("Item Saver:got item "+ 16 | "#%d: %v", itemCount, item) 17 | itemCount++ 18 | } 19 | }() 20 | return out, nil 21 | } 22 | -------------------------------------------------------------------------------- /persist/videomerge.go: -------------------------------------------------------------------------------- 1 | package persist 2 | 3 | import ( 4 | "fmt" 5 | "simple-golang-crawler/engine" 6 | "simple-golang-crawler/model" 7 | "sync" 8 | ) 9 | 10 | var _videoPageMap = make(map[int64]map[int64]int64) 11 | 12 | //var _contactFile2Name = "contactCid.txt" 13 | //var _videoOutputNameExt = ".mp4" 14 | var xMap = make(map[int64]map[int64]*model.VideoCid) 15 | 16 | func VideoItemProcessor(wgOutside *sync.WaitGroup) (chan *engine.Item, error) { 17 | out := make(chan *engine.Item) 18 | 19 | go func() { 20 | defer wgOutside.Done() 21 | //var wgInside sync.WaitGroup 22 | for item := range out { 23 | 24 | switch x := item.Payload.(type) { 25 | case *model.VideoAid: 26 | _videoPageMap[x.Aid] = make(map[int64]int64) 27 | xMap[x.Aid] = make(map[int64]*model.VideoCid) 28 | 29 | case *model.VideoCid: 30 | _videoPageMap[x.ParAid.Aid][x.Page] = x.AllOrder 31 | xMap[x.ParAid.Aid][x.Page] = x // save as video.ParCid with type of *model.VideoCid 32 | 33 | case *model.Video: 34 | _videoPageMap[x.ParCid.ParAid.Aid][x.ParCid.Page] -= 1 35 | if _videoPageMap[x.ParCid.ParAid.Aid][x.ParCid.Page] == 0 { 36 | delete(_videoPageMap[x.ParCid.ParAid.Aid], x.ParCid.Page) 37 | } 38 | 39 | if len(_videoPageMap[x.ParCid.ParAid.Aid]) == 0 { //当整个列表是空的时执行,即当最后一个文件下载完 40 | //wgInside.Add(1) 41 | //go mergeVideo_mod(_x_map[x.ParCid.ParAid.Aid],&wgInside) 42 | } 43 | 44 | default: 45 | panic(fmt.Sprintf("Unexpected type %T: %v", x, x)) 46 | } 47 | 48 | } 49 | //wgInside.Wait() 50 | }() 51 | return out, nil 52 | } 53 | 54 | //func mergeVideo_mod(x_map map[int64]*model.VideoCid, wg *sync.WaitGroup) { 55 | // defer wg.Done() 56 | // videoTmpParCid := x_map[int64(1)] //从一个子视频中获取视频总名称和aid (assume: 子视频的cid不同但aid和标题是一致的) 57 | // 58 | // aidDirPath := tool.GetAidFileDownloadDir(videoTmpParCid.ParAid.Aid, videoTmpParCid.ParAid.Title) 59 | // contactCidTxtPath := filepath.Join(aidDirPath, _contactFile2Name) 60 | // mp4DirPath := tool.GetMp4Dir(videoTmpParCid.ParAid.Title) 61 | // 62 | // log.Println(videoTmpParCid.ParAid.Title, " download completed. Start to merge videos now.") 63 | // for i := int64(1); i <= videoTmpParCid.ParAid.GetPage(); i++ { 64 | // videoParCid := x_map[i] 65 | // 66 | // // merge small parts in each cid 67 | // err := createMergeCidInfoTxt(aidDirPath, videoParCid.Page, videoParCid.AllOrder) 68 | // if err != nil { 69 | // log.Printf("Something wrong while merging video %d.", videoParCid.ParAid.Aid) 70 | // return 71 | // } 72 | // cidFilename := fmt.Sprintf("%d.flv", videoParCid.Page) 73 | // cidOutput := filepath.Join(aidDirPath, cidFilename) 74 | // command := []string{"ffmpeg", "-f", "concat", "-safe", "0", "-i", contactCidTxtPath, "-c", "copy", cidOutput} 75 | // //log.Println(command) 76 | // findCmd := cmd.NewCmd(command[0], command[1:]...) 77 | // <-findCmd.Start() 78 | // 79 | // //convert from flv to mp4 80 | // mp4Filename := videoParCid.Part + ".mp4" 81 | // mp4Output := filepath.Join(mp4DirPath, mp4Filename) 82 | // log.Println(videoParCid.ParAid.Title+"/"+mp4Filename, " merge completed. Start to convert to mp4.") 83 | // command_new := []string{"ffmpeg", "-i", cidOutput, mp4Output} 84 | // //log.Println(command_new) 85 | // findCmd_new := cmd.NewCmd(command_new[0], command_new[1:]...) 86 | // <-findCmd_new.Start() 87 | // log.Println("Video ", videoParCid.ParAid.Title+"/"+mp4Filename, " merge and conversion is complete.") 88 | // 89 | // // free the map 90 | // delete(_x_map[videoParCid.ParAid.Aid], i) 91 | // } 92 | // 93 | // // can comment out the line below for debugging 94 | // removeTempFile(aidDirPath, _contactFile2Name) 95 | //} 96 | // 97 | //func createMergeCidInfoTxt(aidPath string, cidPage int64, cidAllOrder int64) error { 98 | // videoCidPathTemp := "file '" + filepath.Join(aidPath, "%d_%d.flv") + "'\n" 99 | // txtPath := filepath.Join(aidPath, _contactFile2Name) 100 | // 101 | // file, err := os.Create(txtPath) 102 | // if err != nil { 103 | // return err 104 | // } 105 | // defer file.Close() 106 | // strBuilder := strings.Builder{} 107 | // for i := int64(1); i <= cidAllOrder; i++ { 108 | // strBuilder.WriteString(fmt.Sprintf(videoCidPathTemp, cidPage, i)) 109 | // } 110 | // _, err = fmt.Fprintln(file, strBuilder.String()) 111 | // return err 112 | //} 113 | // 114 | //func removeTempFile(dir, excludeFile string) error { 115 | // log.Println("Merge is completed, start to remove all temporary files.") 116 | // 117 | // d, err := os.Open(dir) 118 | // if err != nil { 119 | // return err 120 | // } 121 | // defer d.Close() 122 | // names, err := d.Readdirnames(-1) 123 | // if err != nil { 124 | // return err 125 | // } 126 | // for _, name := range names { 127 | // if name == excludeFile { 128 | // continue 129 | // } 130 | // err = os.RemoveAll(filepath.Join(dir, name)) 131 | // if err != nil { 132 | // return err 133 | // } 134 | // } 135 | // return nil 136 | //} 137 | -------------------------------------------------------------------------------- /scheduler/scheduler.go: -------------------------------------------------------------------------------- 1 | package scheduler 2 | 3 | import ( 4 | "context" 5 | "simple-golang-crawler/engine" 6 | ) 7 | 8 | type ConcurrentScheduler struct { 9 | RequestsChan chan *engine.Request 10 | WorkerChan chan chan *engine.Request 11 | } 12 | 13 | func NewConcurrentScheduler() engine.Scheduler { 14 | return &ConcurrentScheduler{} 15 | } 16 | 17 | func (s *ConcurrentScheduler) Run(ctx context.Context) { 18 | s.WorkerChan = make(chan chan *engine.Request) 19 | s.RequestsChan = make(chan *engine.Request) 20 | go func() { 21 | var workerQ []chan *engine.Request 22 | var requestQ []*engine.Request 23 | loop: 24 | for { 25 | var readyWorker chan *engine.Request 26 | var readyRequest *engine.Request 27 | if len(workerQ) > 0 && len(requestQ) > 0 { 28 | readyWorker = workerQ[0] 29 | readyRequest = requestQ[0] 30 | } 31 | select { 32 | case readyRequest = <-s.RequestsChan: 33 | requestQ = append(requestQ, readyRequest) 34 | case readyWorker = <-s.WorkerChan: 35 | workerQ = append(workerQ, readyWorker) 36 | case readyWorker <- readyRequest: 37 | requestQ = requestQ[1:] 38 | workerQ = workerQ[1:] 39 | case <-ctx.Done(): 40 | break loop 41 | } 42 | } 43 | }() 44 | } 45 | 46 | func (s *ConcurrentScheduler) GetWorkerChan() chan *engine.Request { 47 | return make(chan *engine.Request) 48 | } 49 | 50 | func (s *ConcurrentScheduler) Submit(req *engine.Request) { 51 | s.RequestsChan <- req 52 | } 53 | 54 | func (s *ConcurrentScheduler) Ready(worker chan *engine.Request) { 55 | s.WorkerChan <- worker 56 | } 57 | -------------------------------------------------------------------------------- /tool/key.go: -------------------------------------------------------------------------------- 1 | package tool 2 | 3 | import "strings" 4 | 5 | func GetAppKey(entropy string) (appkey, sec string) { 6 | revEntropy := ReverseRunes([]rune(entropy)) 7 | for i := range revEntropy { 8 | revEntropy[i] = revEntropy[i] + 2 9 | } 10 | ret := strings.Split(string(revEntropy), ":") 11 | 12 | return ret[0], ret[1] 13 | } 14 | 15 | func ReverseRunes(runes []rune) []rune { 16 | for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { 17 | runes[i], runes[j] = runes[j], runes[i] 18 | } 19 | 20 | return runes 21 | } 22 | -------------------------------------------------------------------------------- /tool/path.go: -------------------------------------------------------------------------------- 1 | package tool 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "path/filepath" 8 | "strings" 9 | ) 10 | 11 | func GetAidFileDownloadDir(aid int64, title string) string { 12 | curDir, err := os.Getwd() 13 | if err != nil { 14 | panic(err) 15 | } 16 | 17 | fullDirPath := filepath.Join(curDir, "download", fmt.Sprintf("%d_%s", aid, title)) 18 | err = os.MkdirAll(fullDirPath, 0777) 19 | if err != nil { 20 | panic(err) 21 | } 22 | return fullDirPath 23 | } 24 | 25 | func GetMp4Dir(title string) string { 26 | curDir, err := os.Getwd() 27 | if err != nil { 28 | panic(err) 29 | } 30 | 31 | fullDirPath := filepath.Join(curDir, "output", title) 32 | err = os.MkdirAll(fullDirPath, 0777) 33 | if err != nil { 34 | panic(err) 35 | } 36 | return fullDirPath 37 | } 38 | 39 | func FileExist(fileName string) bool { 40 | _, err := os.Stat(fileName) 41 | return err == nil || os.IsExist(err) 42 | } 43 | 44 | func CheckFfmegStatus() bool { 45 | _, err := exec.LookPath("ffmpeg") 46 | if err != nil { 47 | return false 48 | } else { 49 | return true 50 | } 51 | } 52 | 53 | func TitleEdit(title string) string { // will be used when save the title or the part 54 | // remove special symbol 55 | title = strings.Replace(title, ":" , "", -1) 56 | title = strings.Replace(title, "\\", "", -1) 57 | title = strings.Replace(title, "/" , "", -1) 58 | title = strings.Replace(title, "*" , "", -1) 59 | title = strings.Replace(title, "?" , "", -1) 60 | title = strings.Replace(title, "\"" , "", -1) 61 | title = strings.Replace(title, "<" , "", -1) 62 | title = strings.Replace(title, ">" , "", -1) 63 | title = strings.Replace(title, "|" , "", -1) 64 | title = strings.Replace(title, "." , "", -1) 65 | 66 | return title 67 | } --------------------------------------------------------------------------------