├── .github ├── FUNDING.yml └── workflows │ ├── lsif.yml │ └── go.yml ├── util ├── const.go ├── cdn_test.go ├── cdn.go └── signature.go ├── .gitignore ├── LICENSE ├── go.mod ├── source ├── source.go ├── update_member.go ├── create_video.go └── update_video_meta.go ├── README.md ├── main.go └── go.sum /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: wuhan005 2 | -------------------------------------------------------------------------------- /util/const.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package util 6 | 7 | const UserAgent = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36` 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | .idea/ 18 | .bin/ 19 | .task 20 | .envrc -------------------------------------------------------------------------------- /.github/workflows/lsif.yml: -------------------------------------------------------------------------------- 1 | name: LSIF 2 | on: 3 | push: 4 | paths: 5 | - '**.go' 6 | - '.github/workflows/lsif.yml' 7 | jobs: 8 | lsif-go: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Generate LSIF data 13 | uses: sourcegraph/lsif-go-action@master 14 | - name: Upload LSIF data to sourcegraph.com 15 | continue-on-error: true 16 | uses: docker://sourcegraph/src-cli:latest 17 | with: 18 | args: lsif upload -github-token=${{ secrets.GITHUB_TOKEN }} 19 | -------------------------------------------------------------------------------- /util/cdn_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package util 6 | 7 | import ( 8 | "testing" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestConvertSignatureCDN(t *testing.T) { 14 | cdnURL := "https://p26-sign.douyinpic.com/obj/tos-cn-i-0813/0c9111db102d40f89792c5aa18e14581?x-expires=1634058000&x-signature=DJ80mn6D3slXuQZWlwFKkLKDENI%3D&from=4257465056_large" 15 | got := ConvertSignatureCDN(cdnURL) 16 | want := "https://p26.douyinpic.com/obj/tos-cn-i-0813/0c9111db102d40f89792c5aa18e14581" 17 | assert.Equal(t, want, got) 18 | } 19 | 20 | func TestIsGIFImage(t *testing.T) { 21 | gifURL := "https://p6.douyinpic.com/obj/tos-cn-p-0015/a9a12bbd889f41e28fabfcd5669a266e_1632729560" 22 | got := IsGIFImage(gifURL) 23 | assert.True(t, got) 24 | 25 | staticURL := "https://p3.douyinpic.com/obj/tos-cn-i-0813/0c9111db102d40f89792c5aa18e14581" 26 | got = IsGIFImage(staticURL) 27 | assert.False(t, got) 28 | } 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 A-SOUL Video 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: 3 | push: 4 | branches: [ master ] 5 | paths: 6 | - '**.go' 7 | - 'go.mod' 8 | - '.github/workflows/go.yml' 9 | pull_request: 10 | paths: 11 | - '**.go' 12 | - 'go.mod' 13 | - '.github/workflows/go.yml' 14 | env: 15 | GOPROXY: "https://proxy.golang.org" 16 | 17 | jobs: 18 | lint: 19 | name: Lint 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@v2 24 | - name: Run golangci-lint 25 | uses: golangci/golangci-lint-action@v2 26 | with: 27 | version: v1.37.0 28 | args: --timeout=30m 29 | 30 | test: 31 | name: Test 32 | runs-on: ubuntu-latest 33 | steps: 34 | - name: Install Go 35 | uses: actions/setup-go@v2 36 | with: 37 | go-version: 1.18.x 38 | - name: Checkout code 39 | uses: actions/checkout@v2 40 | - name: Run tests 41 | run: go test -v -race ./... 42 | env: 43 | PGPORT: 5432 44 | PGHOST: localhost 45 | PGUSER: postgres 46 | PGPASSWORD: postgres 47 | PGSSLMODE: disable 48 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/asoul-sig/acao 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/asoul-sig/asoul-video v0.0.0-20220325111224-deaf4a826187 7 | github.com/flamego/flamego v0.0.0-20210913073131-adc6656c34a1 8 | github.com/json-iterator/go v1.1.12 9 | github.com/pkg/errors v0.9.1 10 | github.com/robertkrimen/otto v0.0.0-20210614181706-373ff5438452 11 | github.com/stretchr/testify v1.7.0 12 | github.com/thanhpk/randstr v1.0.4 13 | unknwon.dev/clog/v2 v2.2.0 14 | ) 15 | 16 | require ( 17 | github.com/alecthomas/participle/v2 v2.0.0-alpha7 // indirect 18 | github.com/davecgh/go-spew v1.1.1 // indirect 19 | github.com/fatih/color v1.13.0 // indirect 20 | github.com/mattn/go-colorable v0.1.10 // indirect 21 | github.com/mattn/go-isatty v0.0.14 // indirect 22 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 23 | github.com/modern-go/reflect2 v1.0.2 // indirect 24 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect 25 | github.com/pmezard/go-difflib v1.0.0 // indirect 26 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6 // indirect 27 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect 28 | gopkg.in/sourcemap.v1 v1.0.5 // indirect 29 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect 30 | ) 31 | -------------------------------------------------------------------------------- /source/source.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package source 6 | 7 | import ( 8 | "io" 9 | "net/http" 10 | 11 | "github.com/asoul-sig/asoul-video/pkg/model" 12 | jsoniter "github.com/json-iterator/go" 13 | "github.com/pkg/errors" 14 | 15 | "github.com/asoul-sig/acao/util" 16 | ) 17 | 18 | var asoul = []model.MemberSecUID{ 19 | model.MemberSecUIDAva, 20 | model.MemberSecUIDBella, 21 | model.MemberSecUIDCarol, 22 | model.MemberSecUIDDiana, 23 | model.MemberSecUIDEileen, 24 | model.MemberSecUIDAcao, 25 | } 26 | 27 | type Result struct { 28 | Data jsoniter.RawMessage 29 | End bool 30 | } 31 | 32 | var Sources = make(map[string]Source) 33 | 34 | type Source interface { 35 | String() string 36 | Scrap(result chan Result) 37 | } 38 | 39 | func Register(source Source) { 40 | Sources[source.String()] = source 41 | } 42 | 43 | func SimpleScrap(method, url string) (jsoniter.RawMessage, error) { 44 | req, err := http.NewRequest(method, url, nil) 45 | if err != nil { 46 | return nil, errors.Wrap(err, "new request") 47 | } 48 | req.Header.Set("User-Agent", util.UserAgent) 49 | 50 | resp, err := http.DefaultClient.Do(req) 51 | if err != nil { 52 | return nil, errors.Wrap(err, "request") 53 | } 54 | defer func() { _ = resp.Body.Close() }() 55 | 56 | bodyBytes, err := io.ReadAll(resp.Body) 57 | if err != nil { 58 | return nil, errors.Wrap(err, "read response body") 59 | } 60 | 61 | if resp.StatusCode/100 != 2 { 62 | return nil, errors.Errorf("unexpected status code %d: %q", resp.StatusCode, string(bodyBytes)) 63 | } 64 | 65 | return bodyBytes, nil 66 | } 67 | -------------------------------------------------------------------------------- /util/cdn.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package util 6 | 7 | import ( 8 | "net/http" 9 | "net/url" 10 | "strings" 11 | 12 | jsoniter "github.com/json-iterator/go" 13 | ) 14 | 15 | // ConvertSignatureCDN converts the temporary CDN URL to long period URL. 16 | func ConvertSignatureCDN(cdnURL string) string { 17 | u, err := url.Parse(cdnURL) 18 | if err != nil { 19 | return cdnURL 20 | } 21 | 22 | u.Host = strings.ReplaceAll(u.Host, "-sign", "") 23 | u.RawQuery = "" // Clean the signature in query. 24 | 25 | return u.String() 26 | } 27 | 28 | type imageInfo struct { 29 | Width int `json:"width"` 30 | Height int `json:"height"` 31 | Format string `json:"format"` 32 | Size int `json:"size"` 33 | Md5 string `json:"md5"` 34 | } 35 | 36 | // IsGIFImage checks whether the given CDN image file is a GIF image. 37 | func IsGIFImage(cdnURL string) bool { 38 | cdnURL = strings.ReplaceAll(cdnURL, "/cdn/", "/") 39 | cdnURL = strings.ReplaceAll(cdnURL, "/obj/", "/") 40 | infoURL := strings.SplitN(cdnURL, "~", 1)[0] + "~info" 41 | 42 | req, err := http.NewRequest(http.MethodGet, infoURL, nil) 43 | if err != nil { 44 | return false 45 | } 46 | req.Header.Set("user-agent", UserAgent) 47 | 48 | client := http.Client{} 49 | resp, err := client.Do(req) 50 | if err != nil { 51 | return false 52 | } 53 | defer func() { _ = resp.Body.Close() }() 54 | 55 | if resp.StatusCode/100 != 2 { 56 | return false 57 | } 58 | 59 | var info imageInfo 60 | if err := jsoniter.NewDecoder(resp.Body).Decode(&info); err != nil { 61 | return false 62 | } 63 | return info.Format == "gif" || info.Format == "webp" 64 | } 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🦙 acao ![Go](https://github.com/asoul-video/acao/workflows/Go/badge.svg) [![Go Report Card](https://goreportcard.com/badge/github.com/asoul-video/acao)](https://goreportcard.com/report/github.com/asoul-video/acao) [![Sourcegraph](https://img.shields.io/badge/view%20on-Sourcegraph-brightgreen.svg?logo=sourcegraph)](https://sourcegraph.com/github.com/asoul-video/acao) 2 | 3 | acao(阿草), the tool man for data scraping of https://asoul.video/. 4 | 5 | ## Deploy to Aliyun serverless function with [Raika](https://github.com/serverless-moe/Raika) 6 | 7 | ### `update_member` Update A-SOUL member profile. 8 | 9 | ```bash 10 | $ GOOS=linux go build . 11 | 12 | $ Raika function create \ 13 | --name asoul_video_update_member \ 14 | --memory 128 \ 15 | --init-timeout 300 \ 16 | --runtime-timeout 600 \ 17 | --binary-file acao \ 18 | --trigger=cron \ 19 | --cron="0 30 * * * *" \ 20 | --env SOURCE_REPORT_TYPE=update_member \ 21 | --env SOURCE_REPORT_URL=https://asoul.video/source/report \ 22 | --env SOURCE_REPORT_KEY= \ 23 | --platform aliyun 24 | ``` 25 | 26 | ### `create_video` Fetch A-SOUL member's videos from Douyin. 27 | 28 | ```bash 29 | $ GOOS=linux go build . 30 | 31 | $ Raika function create \ 32 | --name asoul_video_create_video \ 33 | --memory 128 \ 34 | --init-timeout 300 \ 35 | --runtime-timeout 600 \ 36 | --binary-file acao \ 37 | --trigger=cron \ 38 | --cron="0 30 * * * *" \ 39 | --env SOURCE_REPORT_TYPE=create_video \ 40 | --env SOURCE_REPORT_URL=https://asoul.video/source/report \ 41 | --env SOURCE_REPORT_KEY= \ 42 | --platform aliyun 43 | ``` 44 | 45 | ### `update_video_meta` Update video metadata, especially the created time. 46 | 47 | ```bash 48 | $ GOOS=linux go build . 49 | 50 | $ Raika function create \ 51 | --name asoul_video_update_video_meta \ 52 | --memory 128 \ 53 | --init-timeout 300 \ 54 | --runtime-timeout 600 \ 55 | --binary-file acao \ 56 | --trigger=cron \ 57 | --cron="0 30 * * * *" \ 58 | --env SOURCE_REPORT_TYPE=update_video_meta \ 59 | --env SOURCE_REPORT_URL=https://asoul.video/source/report \ 60 | --env SOURCE_REPORT_KEY= \ 61 | --platform aliyun 62 | ``` 63 | 64 | ## License 65 | 66 | MIT 67 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "net/http" 7 | "os" 8 | 9 | "github.com/flamego/flamego" 10 | jsoniter "github.com/json-iterator/go" 11 | "github.com/pkg/errors" 12 | log "unknwon.dev/clog/v2" 13 | 14 | "github.com/asoul-sig/asoul-video/pkg/model" 15 | 16 | "github.com/asoul-sig/acao/source" 17 | ) 18 | 19 | func main() { 20 | defer log.Stop() 21 | err := log.NewConsole() 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | reportType := os.Getenv("SOURCE_REPORT_TYPE") 27 | 28 | src, ok := source.Sources[reportType] 29 | if !ok { 30 | log.Fatal("Report type not found") 31 | } 32 | 33 | f := flamego.Classic() 34 | 35 | f.Post("/invoke", func() { 36 | resultChan := make(chan source.Result, 5) 37 | go src.Scrap(resultChan) 38 | 39 | for result := range resultChan { 40 | if result.End { 41 | close(resultChan) 42 | break 43 | } 44 | 45 | var err error 46 | for i := 1; i <= 5; i++ { // Retry 5 times. 47 | log.Trace("Report data to backend...") 48 | if err = reportData(model.ReportType(reportType), result.Data); err != nil { 49 | log.Warn("Failed to report data: %v, retry %d / 5", err, i) 50 | continue 51 | } 52 | } 53 | if err != nil { 54 | log.Error("Failed to report data: %v", err) 55 | } 56 | } 57 | }) 58 | f.Run(9000) 59 | } 60 | 61 | func reportData(reportType model.ReportType, reportData jsoniter.RawMessage) error { 62 | reportURL := os.Getenv("SOURCE_REPORT_URL") 63 | reportKey := os.Getenv("SOURCE_REPORT_KEY") 64 | 65 | bodyBytes, err := jsoniter.Marshal(map[string]interface{}{ 66 | "type": reportType, 67 | "data": reportData, 68 | }) 69 | if err != nil { 70 | return errors.Wrap(err, "encode JSON") 71 | } 72 | 73 | req, err := http.NewRequest(http.MethodPost, reportURL, bytes.NewReader(bodyBytes)) 74 | if err != nil { 75 | return errors.Wrap(err, "new request") 76 | } 77 | 78 | req.Header.Set("Authorization", reportKey) 79 | 80 | resp, err := http.DefaultClient.Do(req) 81 | if err != nil { 82 | return errors.Wrap(err, "request") 83 | } 84 | defer func() { _ = resp.Body.Close() }() 85 | 86 | if resp.StatusCode/100 != 2 { 87 | bodyBytes, err := io.ReadAll(resp.Body) 88 | if err != nil { 89 | return errors.Wrap(err, "read response body") 90 | } 91 | return errors.Errorf("unexpected status code %d: %q", resp.StatusCode, string(bodyBytes)) 92 | } 93 | 94 | return nil 95 | } 96 | -------------------------------------------------------------------------------- /util/signature.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package util 6 | 7 | import ( 8 | "github.com/robertkrimen/otto" 9 | ) 10 | 11 | const code = ` 12 | function make_signature(input, ua){ 13 | function str_loop(str, k){ 14 | for(var i = 0; i < str.length; i++){ 15 | k = (65599 * k + str.charCodeAt(i) >>> 0) 16 | } 17 | return k 18 | } 19 | 20 | function char_loop(str){ 21 | offset = 24 22 | 23 | for(;;){ 24 | v = (str >> offset) & 63 25 | if (v < 26){ 26 | c = String.fromCharCode(v + 65) 27 | signature += c 28 | }else if(v < 52){ 29 | c = String.fromCharCode(v + 71) 30 | signature += c 31 | }else if(v < 62){ 32 | c = String.fromCharCode(v - 4) 33 | signature += c 34 | }else{ 35 | c = String.fromCharCode(v - 17) 36 | signature += c 37 | } 38 | 39 | offset -= 6 40 | if(offset < 0){ 41 | return v 42 | } 43 | } 44 | } 45 | 46 | 47 | signature = '' 48 | ts = new Date() / 1000; 49 | 50 | constNum = 65521; 51 | v0 = ts % constNum; 52 | v1 = ((ts ^ (v0 * constNum)) >>> 0) + '' 53 | v2 = (((v1 / 4294967296) << 16) | v0) 54 | k0 = str_loop(v1, 0) 55 | 56 | tmp = v1 >> 2 57 | char_loop(tmp) 58 | 59 | tmp1 = v1 << 28 60 | tmp2 = (v2 >>> 4) 61 | tmp = tmp1 | tmp2 62 | char_loop(tmp) 63 | 64 | k1 = 311735490 ^ v1 // From the canvas, it's a const. 65 | 66 | tmp1 = v2 << 26 67 | tmp2 = k1 >>> 6 68 | tmp = tmp1 | tmp2 69 | char_loop(tmp) 70 | 71 | char_loop(k1, 0) 72 | 73 | k1 = str_loop(input, k0) 74 | tmp1 = k1 % constNum 75 | 76 | k2 = k0 77 | k2 = str_loop(ua, k2) 78 | tmp2 = (k2 % constNum) << 16 79 | 80 | v40 = tmp1 | tmp2 81 | tmp = v40 >> 2 82 | char_loop(tmp) 83 | 84 | tmp1 = v40 << 28 85 | tmp2 = (((0 << 8) | 16) ^ v1) >>> 4 86 | tmp = tmp1 | tmp2 87 | char_loop(tmp) 88 | 89 | return signature 90 | } 91 | var input 92 | var userAgent 93 | make_signature(input, userAgent) 94 | ` 95 | 96 | func MakeSignature(input, userAgent string) string { 97 | vm := otto.New() 98 | _ = vm.Set("input", input) 99 | _ = vm.Set("userAgent", userAgent) 100 | val, err := vm.Run(code) 101 | if err != nil { 102 | return "" 103 | } 104 | 105 | v, _ := val.ToString() 106 | return v 107 | } 108 | -------------------------------------------------------------------------------- /source/update_member.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package source 6 | 7 | import ( 8 | "net/http" 9 | 10 | "github.com/asoul-sig/asoul-video/pkg/model" 11 | jsoniter "github.com/json-iterator/go" 12 | "github.com/pkg/errors" 13 | log "unknwon.dev/clog/v2" 14 | ) 15 | 16 | func init() { 17 | Register(&UpdateMember{}) 18 | } 19 | 20 | type UpdateMember struct{} 21 | 22 | func (s *UpdateMember) String() string { 23 | return "update_member" 24 | } 25 | 26 | func (s *UpdateMember) Scrap(result chan Result) { 27 | defer func() { result <- Result{End: true} }() 28 | 29 | for _, secUID := range asoul { 30 | userInfo, err := scrapMember(secUID) 31 | if err != nil { 32 | log.Error("Failed to scrap member data: %v", err) 33 | continue 34 | } 35 | 36 | var avatarURL string 37 | if len(userInfo.UserInfo.AvatarLarger.UrlList) != 0 { 38 | avatarURL = userInfo.UserInfo.AvatarLarger.UrlList[0] 39 | } else if len(userInfo.UserInfo.AvatarMedium.UrlList) != 0 { 40 | avatarURL = userInfo.UserInfo.AvatarMedium.UrlList[0] 41 | } else if len(userInfo.UserInfo.AvatarThumb.UrlList) != 0 { 42 | avatarURL = userInfo.UserInfo.AvatarThumb.UrlList[0] 43 | } 44 | 45 | updateMember := model.UpdateMember{ 46 | SecUID: secUID, 47 | UID: userInfo.UserInfo.Uid, 48 | UniqueID: userInfo.UserInfo.UniqueId, 49 | ShortUID: userInfo.UserInfo.ShortId, 50 | Name: userInfo.UserInfo.Nickname, 51 | AvatarURL: avatarURL, 52 | Signature: userInfo.UserInfo.Signature, 53 | } 54 | 55 | callback, err := jsoniter.Marshal(updateMember) 56 | if err != nil { 57 | log.Error("Failed to encode callback JSON: %v", err) 58 | continue 59 | } 60 | 61 | log.Trace("Fetch member %q", userInfo.UserInfo.Nickname) 62 | 63 | result <- Result{ 64 | Data: callback, 65 | } 66 | } 67 | } 68 | 69 | type userInfo struct { 70 | UserInfo struct { 71 | AvatarLarger struct { 72 | Uri string `json:"uri"` 73 | UrlList []string `json:"url_list"` 74 | } `json:"avatar_larger"` 75 | FollowerCount int `json:"follower_count"` 76 | TotalFavorited string `json:"total_favorited"` 77 | CustomVerify string `json:"custom_verify"` 78 | Secret int `json:"secret"` 79 | Signature string `json:"signature"` 80 | AwemeCount int `json:"aweme_count"` 81 | VerificationType int `json:"verification_type"` 82 | OriginalMusician struct { 83 | MusicCount int `json:"music_count"` 84 | MusicUsedCount int `json:"music_used_count"` 85 | } `json:"original_musician"` 86 | Region string `json:"region"` 87 | PolicyVersion interface{} `json:"policy_version"` 88 | ShortId string `json:"short_id"` 89 | Nickname string `json:"nickname"` 90 | AvatarMedium struct { 91 | Uri string `json:"uri"` 92 | UrlList []string `json:"url_list"` 93 | } `json:"avatar_medium"` 94 | FollowingCount int `json:"following_count"` 95 | UniqueId string `json:"unique_id"` 96 | FollowersDetail interface{} `json:"followers_detail"` 97 | PlatformSyncInfo interface{} `json:"platform_sync_info"` 98 | Geofencing interface{} `json:"geofencing"` 99 | Uid string `json:"uid"` 100 | TypeLabel interface{} `json:"type_label"` 101 | FavoritingCount int `json:"favoriting_count"` 102 | IsGovMediaVip bool `json:"is_gov_media_vip"` 103 | AvatarThumb struct { 104 | Uri string `json:"uri"` 105 | UrlList []string `json:"url_list"` 106 | } `json:"avatar_thumb"` 107 | } `json:"user_info"` 108 | } 109 | 110 | func scrapMember(secUID model.MemberSecUID) (*userInfo, error) { 111 | respBody, err := SimpleScrap(http.MethodGet, "https://www.iesdouyin.com/web/api/v2/user/info/?sec_uid="+string(secUID)) 112 | if err != nil { 113 | return nil, errors.Wrap(err, "scrap") 114 | } 115 | 116 | var userInfo userInfo 117 | if err := jsoniter.Unmarshal(respBody, &userInfo); err != nil { 118 | return nil, errors.Wrap(err, "JSON decode") 119 | } 120 | return &userInfo, nil 121 | } 122 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/participle/v2 v2.0.0-alpha7 h1:cK4vjj0VSgb3lN1nuKA5F7dw+1s1pWBe5bx7nNCnN+c= 2 | github.com/alecthomas/participle/v2 v2.0.0-alpha7/go.mod h1:NumScqsC42o9x+dGj8/YqsIfhrIQjFEOFovxotbBirA= 3 | github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1 h1:GDQdwm/gAcJcLAKQQZGOJ4knlw+7rfEQQcmwTbt4p5E= 4 | github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= 5 | github.com/asoul-sig/asoul-video v0.0.0-20220325111224-deaf4a826187 h1:VnvjEsxDMxcB4OsxQNehBu/a/lPEjD/rIC+Z9vAbNDw= 6 | github.com/asoul-sig/asoul-video v0.0.0-20220325111224-deaf4a826187/go.mod h1:RN30JQRapZ/mfDkA4Pi/pV4b1aquEwxKaBu8MBcQR00= 7 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 9 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= 11 | github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= 12 | github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= 13 | github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= 14 | github.com/flamego/flamego v0.0.0-20210913073131-adc6656c34a1 h1:+WQYuogBWs50/qWau4AaqCTJzGyrx6du7bTyLPoVAtU= 15 | github.com/flamego/flamego v0.0.0-20210913073131-adc6656c34a1/go.mod h1:apiAxIxeHujHFX4Yr0BHmQFJuZUM07XztdQSChbeuPw= 16 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 17 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 18 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 19 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 20 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 21 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 22 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 23 | github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= 24 | github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= 25 | github.com/mattn/go-colorable v0.1.10 h1:KWqbp83oZ6YOEgIbNW3BM1Jbe2tz4jgmWA9FOuAF8bw= 26 | github.com/mattn/go-colorable v0.1.10/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= 27 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 28 | github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= 29 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 30 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= 31 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 32 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 33 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 34 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 35 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 36 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 37 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 38 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 39 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 40 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 41 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 42 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 43 | github.com/robertkrimen/otto v0.0.0-20210614181706-373ff5438452 h1:ewTtJ72GFy2e0e8uyiDwMG3pKCS5mBh+hdSTYsPKEP8= 44 | github.com/robertkrimen/otto v0.0.0-20210614181706-373ff5438452/go.mod h1:xvqspoSXJTIpemEonrMDFq6XzwHYYgToXWj5eRX1OtY= 45 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 46 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 47 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 48 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 49 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 50 | github.com/thanhpk/randstr v1.0.4 h1:IN78qu/bR+My+gHCvMEXhR/i5oriVHcTB/BJJIRTsNo= 51 | github.com/thanhpk/randstr v1.0.4/go.mod h1:M/H2P1eNLZzlDwAzpkkkUvoyNNMbzRGhESZuEQk3r0U= 52 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 53 | golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 54 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 55 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 56 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 57 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6 h1:foEbQz/B0Oz6YIqu/69kfXPYeFQAuuMYFkjaqXzl5Wo= 58 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 59 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 60 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 61 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 62 | gopkg.in/sourcemap.v1 v1.0.5 h1:inv58fC9f9J3TK2Y2R1NPntXEn3/wjWHkonhIUODNTI= 63 | gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78= 64 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 65 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 66 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 67 | unknwon.dev/clog/v2 v2.2.0 h1:jkPdsxux0MC04BT/9NHbT75z4prK92SH10VBNmIpVCc= 68 | unknwon.dev/clog/v2 v2.2.0/go.mod h1:zvUlyibDHI4mykYdWyWje2G9nF/nBzfDOqRo2my4mWc= 69 | -------------------------------------------------------------------------------- /source/create_video.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package source 6 | 7 | import ( 8 | "fmt" 9 | "net/http" 10 | 11 | "github.com/asoul-sig/asoul-video/pkg/model" 12 | jsoniter "github.com/json-iterator/go" 13 | "github.com/pkg/errors" 14 | "github.com/thanhpk/randstr" 15 | log "unknwon.dev/clog/v2" 16 | 17 | "github.com/asoul-sig/acao/util" 18 | ) 19 | 20 | func init() { 21 | Register(&CreateVideo{}) 22 | } 23 | 24 | type CreateVideo struct{} 25 | 26 | func (s *CreateVideo) String() string { 27 | return "create_video" 28 | } 29 | 30 | func (s *CreateVideo) Scrap(result chan Result) { 31 | defer func() { result <- Result{End: true} }() 32 | 33 | cursorSet := make(map[model.MemberSecUID]int64) 34 | 35 | for _, secUID := range asoul { 36 | cursorSet[secUID] = 0 37 | } 38 | 39 | for secUID, cursor := range cursorSet { 40 | memberVideos, _, err := scrapMemberVideos(secUID, cursor) 41 | if err != nil { 42 | log.Error("Failed to scrap member videos: %v", err) 43 | continue 44 | } 45 | 46 | for _, video := range memberVideos { 47 | log.Trace("Fetch video %q", video.Description) 48 | } 49 | 50 | callback, err := jsoniter.Marshal(memberVideos) 51 | if err != nil { 52 | log.Error("Failed to encode callback JSON: %v", err) 53 | continue 54 | } 55 | result <- Result{ 56 | Data: callback, 57 | } 58 | 59 | delete(cursorSet, secUID) 60 | if len(cursorSet) == 0 { 61 | break 62 | } 63 | } 64 | } 65 | 66 | type videoInfo struct { 67 | AwemeList []struct { 68 | ChaList interface{} `json:"cha_list"` 69 | ImageInfos interface{} `json:"image_infos"` 70 | CommentList interface{} `json:"comment_list"` 71 | Geofencing interface{} `json:"geofencing"` 72 | LabelTopText interface{} `json:"label_top_text"` 73 | Images interface{} `json:"images"` 74 | Author struct { 75 | UniqueId string `json:"unique_id"` 76 | WithCommerceEntry bool `json:"with_commerce_entry"` 77 | Nickname string `json:"nickname"` 78 | FavoritingCount int `json:"favoriting_count"` 79 | WithFusionShopEntry bool `json:"with_fusion_shop_entry"` 80 | AvatarLarger struct { 81 | Uri string `json:"uri"` 82 | UrlList []string `json:"url_list"` 83 | } `json:"avatar_larger"` 84 | AwemeCount int `json:"aweme_count"` 85 | IsAdFake bool `json:"is_ad_fake"` 86 | Signature string `json:"signature"` 87 | FollowerCount int `json:"follower_count"` 88 | Region string `json:"region"` 89 | SecUid string `json:"sec_uid"` 90 | TotalFavorited string `json:"total_favorited"` 91 | CustomVerify string `json:"custom_verify"` 92 | PolicyVersion interface{} `json:"policy_version"` 93 | UserCanceled bool `json:"user_canceled"` 94 | TypeLabel []int64 `json:"type_label"` 95 | Uid string `json:"uid"` 96 | AvatarMedium struct { 97 | Uri string `json:"uri"` 98 | UrlList []string `json:"url_list"` 99 | } `json:"avatar_medium"` 100 | EnterpriseVerifyReason string `json:"enterprise_verify_reason"` 101 | PlatformSyncInfo interface{} `json:"platform_sync_info"` 102 | HasOrders bool `json:"has_orders"` 103 | VideoIcon struct { 104 | Uri string `json:"uri"` 105 | UrlList []interface{} `json:"url_list"` 106 | } `json:"video_icon"` 107 | ShortId string `json:"short_id"` 108 | FollowStatus int `json:"follow_status"` 109 | FollowingCount int `json:"following_count"` 110 | WithShopEntry bool `json:"with_shop_entry"` 111 | Secret int `json:"secret"` 112 | Geofencing interface{} `json:"geofencing"` 113 | AvatarThumb struct { 114 | Uri string `json:"uri"` 115 | UrlList []string `json:"url_list"` 116 | } `json:"avatar_thumb"` 117 | StoryOpen bool `json:"story_open"` 118 | VerificationType int `json:"verification_type"` 119 | FollowersDetail interface{} `json:"followers_detail"` 120 | IsGovMediaVip bool `json:"is_gov_media_vip"` 121 | Rate int `json:"rate"` 122 | } `json:"author"` 123 | TextExtra []struct { 124 | Start int `json:"start"` 125 | End int `json:"end"` 126 | Type int `json:"type"` 127 | HashtagName string `json:"hashtag_name"` 128 | HashtagId int64 `json:"hashtag_id"` 129 | UserId string `json:"user_id,omitempty"` 130 | } `json:"text_extra"` 131 | VideoLabels interface{} `json:"video_labels"` 132 | AwemeType int `json:"aweme_type"` 133 | VideoText interface{} `json:"video_text"` 134 | LongVideo interface{} `json:"long_video"` 135 | AwemeId string `json:"aweme_id"` 136 | Video struct { 137 | Ratio string `json:"ratio"` 138 | DownloadAddr struct { 139 | UrlList []string `json:"url_list"` 140 | Uri string `json:"uri"` 141 | } `json:"download_addr"` 142 | PlayAddrLowbr struct { 143 | Uri string `json:"uri"` 144 | UrlList []string `json:"url_list"` 145 | } `json:"play_addr_lowbr"` 146 | BitRate interface{} `json:"bit_rate"` 147 | Duration int64 `json:"duration"` 148 | Width int `json:"width"` 149 | DynamicCover struct { 150 | Uri string `json:"uri"` 151 | UrlList []string `json:"url_list"` 152 | } `json:"dynamic_cover"` 153 | Height int `json:"height"` 154 | OriginCover struct { 155 | Uri string `json:"uri"` 156 | UrlList []string `json:"url_list"` 157 | } `json:"origin_cover"` 158 | HasWatermark bool `json:"has_watermark"` 159 | Vid string `json:"vid"` 160 | PlayAddr struct { 161 | Uri string `json:"uri"` 162 | UrlList []string `json:"url_list"` 163 | } `json:"play_addr"` 164 | Cover struct { 165 | Uri string `json:"uri"` 166 | UrlList []string `json:"url_list"` 167 | } `json:"cover"` 168 | } `json:"video"` 169 | Promotions interface{} `json:"promotions"` 170 | Desc string `json:"desc"` 171 | Statistics struct { 172 | PlayCount int64 `json:"play_count"` 173 | ShareCount int64 `json:"share_count"` 174 | ForwardCount int64 `json:"forward_count"` 175 | AwemeId string `json:"aweme_id"` 176 | CommentCount int64 `json:"comment_count"` 177 | DiggCount int64 `json:"digg_count"` 178 | } `json:"statistics"` 179 | } `json:"aweme_list"` 180 | MaxCursor int64 `json:"max_cursor"` 181 | MinCursor int64 `json:"min_cursor"` 182 | HasMore bool `json:"has_more"` 183 | } 184 | 185 | func scrapMemberVideos(secUID model.MemberSecUID, cursor int64) (videos []*model.CreateVideo, nextCursor int64, _ error) { 186 | signature := util.MakeSignature("e99p1ant"+randstr.String(6), util.UserAgent) 187 | log.Trace("Signature: %v", signature) 188 | 189 | url := fmt.Sprintf("https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=%s&count=50&max_cursor=%d&_signature=%s", secUID, cursor, signature) 190 | 191 | respBody, err := SimpleScrap(http.MethodGet, url) 192 | if err != nil { 193 | return nil, 0, errors.Wrap(err, "scrap") 194 | } 195 | 196 | var videoInfo videoInfo 197 | if err := jsoniter.Unmarshal(respBody, &videoInfo); err != nil { 198 | return nil, 0, errors.Wrap(err, "decode JSON") 199 | } 200 | 201 | createVideos := make([]*model.CreateVideo, 0, len(videoInfo.AwemeList)) 202 | for _, video := range videoInfo.AwemeList { 203 | textExtra := make([]string, 0, len(video.TextExtra)) 204 | for _, extra := range video.TextExtra { 205 | textExtra = append(textExtra, extra.HashtagName) 206 | } 207 | 208 | originCoverURLs := make([]string, 0, len(video.Video.OriginCover.UrlList)) 209 | for _, url := range video.Video.OriginCover.UrlList { 210 | originCoverURLs = append(originCoverURLs, util.ConvertSignatureCDN(url)) 211 | } 212 | 213 | dynamicCoverURLs := make([]string, 0, len(video.Video.DynamicCover.UrlList)) 214 | for _, url := range video.Video.DynamicCover.UrlList { 215 | dynamicCoverURLs = append(dynamicCoverURLs, util.ConvertSignatureCDN(url)) 216 | } 217 | 218 | isDynamicCover := len(dynamicCoverURLs) > 0 && util.IsGIFImage(dynamicCoverURLs[0]) 219 | 220 | createVideos = append(createVideos, &model.CreateVideo{ 221 | ID: video.AwemeId, 222 | VID: video.Video.Vid, 223 | AuthorSecUID: model.MemberSecUID(video.Author.SecUid), 224 | Description: video.Desc, 225 | TextExtra: textExtra, 226 | OriginCoverURLs: originCoverURLs, 227 | DynamicCoverURLs: dynamicCoverURLs, 228 | IsDynamicCover: isDynamicCover, 229 | VideoHeight: video.Video.Height, 230 | VideoWidth: video.Video.Width, 231 | VideoDuration: video.Video.Duration, 232 | VideoRatio: video.Video.Ratio, 233 | VideoURLs: video.Video.PlayAddr.UrlList, 234 | VideoCDNURL: "", // TODO Upload to my CDN. 235 | 236 | Statistic: model.Statistic{ 237 | Share: video.Statistics.ShareCount, 238 | Forward: video.Statistics.ForwardCount, 239 | Digg: video.Statistics.DiggCount, 240 | Play: video.Statistics.PlayCount, 241 | Comment: video.Statistics.CommentCount, 242 | }, 243 | }) 244 | } 245 | 246 | return createVideos, videoInfo.MaxCursor, nil 247 | } 248 | -------------------------------------------------------------------------------- /source/update_video_meta.go: -------------------------------------------------------------------------------- 1 | // Copyright 2021 E99p1ant. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package source 6 | 7 | import ( 8 | "fmt" 9 | "net/http" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/asoul-sig/asoul-video/pkg/model" 14 | jsoniter "github.com/json-iterator/go" 15 | "github.com/pkg/errors" 16 | log "unknwon.dev/clog/v2" 17 | 18 | "github.com/asoul-sig/acao/util" 19 | ) 20 | 21 | func init() { 22 | Register(&UpdateVideoMeta{}) 23 | } 24 | 25 | type UpdateVideoMeta struct{} 26 | 27 | func (s *UpdateVideoMeta) String() string { 28 | return "update_video_meta" 29 | } 30 | 31 | func (s *UpdateVideoMeta) Scrap(result chan Result) { 32 | defer func() { result <- Result{End: true} }() 33 | 34 | page := 0 35 | for { 36 | page++ 37 | 38 | videoMeta, err := s.scrapVideoList(page) 39 | if err != nil { 40 | if err == ErrNoMoreVideos { 41 | break 42 | } 43 | } 44 | 45 | callback, err := jsoniter.Marshal(videoMeta) 46 | if err != nil { 47 | log.Error("Failed to encode callback JSON: %v", err) 48 | continue 49 | } 50 | 51 | result <- Result{ 52 | Data: callback, 53 | } 54 | } 55 | } 56 | 57 | type video struct { 58 | ID string `json:"id"` 59 | VID string `json:"vid"` 60 | OriginCoverURLs []string `json:"origin_cover_urls"` 61 | DynamicCoverURLs []string `json:"dynamic_cover_urls"` 62 | CreatedAt time.Time `json:"created_at"` 63 | } 64 | 65 | var ErrNoMoreVideos = errors.New("no more videos") 66 | 67 | func (s *UpdateVideoMeta) scrapVideoList(page int) ([]*model.UpdateVideoMeta, error) { 68 | resp, err := http.Get("https://asoul.cdn.n3ko.co/api/videos?page=" + strconv.Itoa(page)) 69 | if err != nil { 70 | return nil, errors.Wrap(err, "get video list from asoul-video") 71 | } 72 | defer func() { _ = resp.Body.Close() }() 73 | 74 | var respJSON struct { 75 | Data []video `json:"data"` 76 | } 77 | if err := jsoniter.NewDecoder(resp.Body).Decode(&respJSON); err != nil { 78 | log.Error("Failed to decode JSON: %v", err) 79 | return nil, errors.Wrap(err, "decode JSON") 80 | } 81 | if len(respJSON.Data) == 0 { 82 | return nil, ErrNoMoreVideos 83 | } 84 | 85 | updateVideoMetas := make([]*model.UpdateVideoMeta, 0, len(respJSON.Data)) 86 | for _, video := range respJSON.Data { 87 | id := video.ID 88 | 89 | var metaData *videoMeta 90 | var err error 91 | for i := 1; i <= 3; i++ { 92 | metaData, err = s.getVideoMeta(id) 93 | if err != nil || len(metaData.ItemList) == 0 { 94 | log.Warn("Failed to get video %q meta data [ %d / 3 ]: %v", id, i, err) 95 | continue 96 | } 97 | break 98 | } 99 | if err != nil { 100 | log.Error("Failed to get video %q meta data: %v", id, err) 101 | continue 102 | } 103 | 104 | if len(metaData.ItemList) == 0 { 105 | log.Error("Video %q not found", id) 106 | continue 107 | } 108 | 109 | meta := metaData.ItemList[0] 110 | createdAt := time.Unix(int64(meta.CreateTime), 0) 111 | 112 | originCoverURLs := make([]string, 0, len(meta.Video.OriginCover.UrlList)) 113 | for _, url := range meta.Video.OriginCover.UrlList { 114 | originCoverURLs = append(originCoverURLs, util.ConvertSignatureCDN(url)) 115 | } 116 | 117 | dynamicCoverURLs := make([]string, 0, len(meta.Video.DynamicCover.UrlList)) 118 | for _, url := range meta.Video.DynamicCover.UrlList { 119 | dynamicCoverURLs = append(dynamicCoverURLs, util.ConvertSignatureCDN(url)) 120 | } 121 | 122 | isDynamicCover := len(dynamicCoverURLs) > 0 && util.IsGIFImage(dynamicCoverURLs[0]) 123 | 124 | updateVideoMetas = append(updateVideoMetas, &model.UpdateVideoMeta{ 125 | ID: id, 126 | VID: meta.Video.Vid, 127 | OriginCoverURLs: originCoverURLs, 128 | DynamicCoverURLs: dynamicCoverURLs, 129 | IsDynamicCover: isDynamicCover, 130 | CreatedAt: createdAt, 131 | 132 | Statistic: model.Statistic{ 133 | Share: meta.Statistics.ShareCount, 134 | Forward: meta.Statistics.ForwardCount, 135 | Digg: meta.Statistics.DiggCount, 136 | Play: meta.Statistics.PlayCount, 137 | Comment: meta.Statistics.CommentCount, 138 | }, 139 | }) 140 | } 141 | 142 | return updateVideoMetas, nil 143 | } 144 | 145 | type videoMeta struct { 146 | StatusCode int `json:"status_code"` 147 | ItemList []struct { 148 | IsLiveReplay bool `json:"is_live_replay"` 149 | TextExtra []struct { 150 | End int `json:"end"` 151 | Type int `json:"type"` 152 | HashtagName string `json:"hashtag_name"` 153 | HashtagId int64 `json:"hashtag_id"` 154 | Start int `json:"start"` 155 | } `json:"text_extra"` 156 | AuthorUserId int64 `json:"author_user_id"` 157 | LongVideo interface{} `json:"long_video"` 158 | Images interface{} `json:"images"` 159 | ChaList []struct { 160 | ConnectMusic interface{} `json:"connect_music"` 161 | Type int `json:"type"` 162 | ViewCount int `json:"view_count"` 163 | HashTagProfile string `json:"hash_tag_profile"` 164 | Cid string `json:"cid"` 165 | Desc string `json:"desc"` 166 | IsCommerce bool `json:"is_commerce"` 167 | ChaName string `json:"cha_name"` 168 | UserCount int `json:"user_count"` 169 | } `json:"cha_list"` 170 | Statistics struct { 171 | DiggCount int64 `json:"digg_count"` 172 | PlayCount int64 `json:"play_count"` 173 | ShareCount int64 `json:"share_count"` 174 | AwemeId string `json:"aweme_id"` 175 | CommentCount int64 `json:"comment_count"` 176 | ForwardCount int64 `json:"forward_count"` 177 | } `json:"statistics"` 178 | RiskInfos struct { 179 | Warn bool `json:"warn"` 180 | Type int `json:"type"` 181 | Content string `json:"content"` 182 | } `json:"risk_infos"` 183 | Desc string `json:"desc"` 184 | Music struct { 185 | Mid string `json:"mid"` 186 | CoverMedium struct { 187 | UrlList []string `json:"url_list"` 188 | Uri string `json:"uri"` 189 | } `json:"cover_medium"` 190 | CoverThumb struct { 191 | UrlList []string `json:"url_list"` 192 | Uri string `json:"uri"` 193 | } `json:"cover_thumb"` 194 | Duration int `json:"duration"` 195 | Position interface{} `json:"position"` 196 | Id int64 `json:"id"` 197 | Author string `json:"author"` 198 | CoverHd struct { 199 | Uri string `json:"uri"` 200 | UrlList []string `json:"url_list"` 201 | } `json:"cover_hd"` 202 | CoverLarge struct { 203 | Uri string `json:"uri"` 204 | UrlList []string `json:"url_list"` 205 | } `json:"cover_large"` 206 | PlayUrl struct { 207 | Uri string `json:"uri"` 208 | UrlList []string `json:"url_list"` 209 | } `json:"play_url"` 210 | Status int `json:"status"` 211 | Title string `json:"title"` 212 | } `json:"music"` 213 | Video struct { 214 | PlayAddr struct { 215 | Uri string `json:"uri"` 216 | UrlList []string `json:"url_list"` 217 | } `json:"play_addr"` 218 | DynamicCover struct { 219 | Uri string `json:"uri"` 220 | UrlList []string `json:"url_list"` 221 | } `json:"dynamic_cover"` 222 | BitRate interface{} `json:"bit_rate"` 223 | Vid string `json:"vid"` 224 | Ratio string `json:"ratio"` 225 | HasWatermark bool `json:"has_watermark"` 226 | Duration int `json:"duration"` 227 | Cover struct { 228 | Uri string `json:"uri"` 229 | UrlList []string `json:"url_list"` 230 | } `json:"cover"` 231 | Height int `json:"height"` 232 | Width int `json:"width"` 233 | OriginCover struct { 234 | Uri string `json:"uri"` 235 | UrlList []string `json:"url_list"` 236 | } `json:"origin_cover"` 237 | } `json:"video"` 238 | AwemeType int `json:"aweme_type"` 239 | VideoText interface{} `json:"video_text"` 240 | GroupId int64 `json:"group_id"` 241 | LabelTopText interface{} `json:"label_top_text"` 242 | IsPreview int `json:"is_preview"` 243 | Author struct { 244 | PlatformSyncInfo interface{} `json:"platform_sync_info"` 245 | Geofencing interface{} `json:"geofencing"` 246 | PolicyVersion interface{} `json:"policy_version"` 247 | ShortId string `json:"short_id"` 248 | Nickname string `json:"nickname"` 249 | AvatarMedium struct { 250 | Uri string `json:"uri"` 251 | UrlList []string `json:"url_list"` 252 | } `json:"avatar_medium"` 253 | UniqueId string `json:"unique_id"` 254 | FollowersDetail interface{} `json:"followers_detail"` 255 | TypeLabel interface{} `json:"type_label"` 256 | Uid string `json:"uid"` 257 | Signature string `json:"signature"` 258 | AvatarLarger struct { 259 | Uri string `json:"uri"` 260 | UrlList []string `json:"url_list"` 261 | } `json:"avatar_larger"` 262 | AvatarThumb struct { 263 | Uri string `json:"uri"` 264 | UrlList []string `json:"url_list"` 265 | } `json:"avatar_thumb"` 266 | } `json:"author"` 267 | ForwardId string `json:"forward_id"` 268 | CreateTime int `json:"create_time"` 269 | VideoLabels interface{} `json:"video_labels"` 270 | ImageInfos interface{} `json:"image_infos"` 271 | Duration int `json:"duration"` 272 | CommentList interface{} `json:"comment_list"` 273 | Geofencing interface{} `json:"geofencing"` 274 | AwemeId string `json:"aweme_id"` 275 | ShareUrl string `json:"share_url"` 276 | ShareInfo struct { 277 | ShareWeiboDesc string `json:"share_weibo_desc"` 278 | ShareDesc string `json:"share_desc"` 279 | ShareTitle string `json:"share_title"` 280 | } `json:"share_info"` 281 | Promotions interface{} `json:"promotions"` 282 | } `json:"item_list"` 283 | Extra struct { 284 | Now int64 `json:"now"` 285 | Logid string `json:"logid"` 286 | } `json:"extra"` 287 | } 288 | 289 | func (s *UpdateVideoMeta) getVideoMeta(id string) (*videoMeta, error) { 290 | time.Sleep(500 * time.Millisecond) 291 | 292 | signature := util.MakeSignature("e99p1ant", util.UserAgent) 293 | log.Trace("Signature: %v for video: %q", signature, id) 294 | 295 | url := fmt.Sprintf("https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=%s&_signature=%s", id, signature) 296 | respBody, err := SimpleScrap(http.MethodGet, url) 297 | if err != nil { 298 | return nil, errors.Wrap(err, "scrap") 299 | } 300 | 301 | var videoMeta videoMeta 302 | if err := jsoniter.Unmarshal(respBody, &videoMeta); err != nil { 303 | return nil, errors.Wrap(err, "decode JSON") 304 | } 305 | return &videoMeta, nil 306 | } 307 | --------------------------------------------------------------------------------