├── .github └── workflows │ └── main.yml ├── .gitignore ├── README.md ├── cookie.png ├── go.mod ├── go.sum ├── main.go └── weixin.png /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: goreleaser 2 | 3 | on: 4 | create: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | goreleaser: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - 13 | name: Checkout 14 | uses: actions/checkout@v1 15 | - 16 | name: Set up Go 17 | uses: actions/setup-go@v1 18 | with: 19 | go-version: 1.13.x 20 | 21 | - name: Run Server GoReleaser 22 | uses: goreleaser/goreleaser-action@v1 23 | with: 24 | version: latest 25 | args: release --rm-dist 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.md 3 | *.html -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 批量导出CSDN博客 2 | > 批量导出`csnd`博客,并转化为`hexo`博客样式,如果你是用富文本编辑器写的则会导出`html`样式 3 | 4 | 注:有些文章可能获取不到造成进度条无法达到100%,如果走到99%,走不动了,直接取消即可 5 | 6 | # Quick start 7 | 8 | **安装** 9 | ```bash 10 | go get github.com/pibigstar/csdn-hexo 11 | ``` 12 | 13 | **使用** 14 | ```bash 15 | csdn-hexo -username 你的csdn用户名 -cookie 你csdn的cookie -page 1 16 | ``` 17 | > page不写,默认为下载全部页 18 | 19 | **完整示例** 20 | ```bash 21 | go run main.go -username "junmoxi" -cookie "UserName=junmoxi; UserToken=c3c29cca48be43c4884fe36d052d5851" 22 | ``` 23 | > 如果想下载别人的文章,那么将`username`更换为别人的即可,cookie还是用你的 24 | 25 | # 下载 26 | > 为了方便非Go语言用户使用,我也编译出了二进制文件,支持Windows、Linux和MAC用户 27 | 28 | [点击下载](https://github.com/pibigstar/csdn-hexo/releases/tag/v1.1) 29 | 30 | 31 | # cookie获取 32 | ![](cookie.png) 33 | 34 | # 关注 35 | > 如果对你有所帮助,请给个star,你的支持是我最大的动力,欢迎关注我微信公众号,一起学习Go语言 36 | 37 | ![](weixin.png) -------------------------------------------------------------------------------- /cookie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pibigstar/csdn-hexo/4547468cda79e2d678a0c9eab956333146eb5153/cookie.png -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pibigstar/csdn-hexo 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed 7 | github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 // indirect 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed h1:/nQRgal0OAvl64fVVo0IrwlMt8vXypxc/a+N0Is80VY= 2 | github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed/go.mod h1:4YWkn3EVkh8c1BDlVmw+Zh2QLhs+MbAg4xy4RqcKMsA= 3 | github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 h1:3EYaPrwMGOaFxBbiLlsfRGFNlSLJ3ETjkPbTfkG5IGQ= 4 | github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751/go.mod h1:HYAQIJIdgW9cGr75BDsucQMgKREt00mECJHOskH5n5k= 5 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/hmac" 5 | "crypto/sha256" 6 | "encoding/base64" 7 | "encoding/json" 8 | "flag" 9 | "fmt" 10 | "io/ioutil" 11 | "math/rand" 12 | "net/http" 13 | "net/url" 14 | "os" 15 | "regexp" 16 | "strings" 17 | "sync" 18 | "time" 19 | 20 | "github.com/qianlnk/pgbar" 21 | ) 22 | 23 | // Crawl posts from csdn 24 | // build posts to hexo style 25 | 26 | const ( 27 | ListPostURL = "https://blog.csdn.net/%s/article/list/%d?" 28 | PostDetailURL = "https://bizapi.csdn.net/blog-console-api/v3/editor/getArticle?id=%s&model_type=" 29 | HexoHeader = ` 30 | --- 31 | title: %s 32 | date: %s 33 | tags: [%s] 34 | categories: %s 35 | --- 36 | ` 37 | HtmlBody = ` 38 | 39 | %s 40 | 41 | 42 | %s 43 | 44 | ` 45 | ) 46 | 47 | 48 | type DetailData struct { 49 | Data PostDetail `json:"data"` 50 | } 51 | 52 | type PostDetail struct { 53 | Title string `json:"title"` 54 | Description string `json:"description"` 55 | Content string `json:"content"` 56 | Markdowncontent string `json:"markdowncontent"` 57 | Tags string `json:"tags"` 58 | Categories string `json:"categories"` 59 | } 60 | 61 | var ( 62 | username string 63 | page int 64 | cookie string 65 | currentPage = 1 66 | count int 67 | wg sync.WaitGroup 68 | bar *pgbar.Bar 69 | postTime = time.Now() 70 | ) 71 | 72 | const ( 73 | appSecret = "9znpamsyl2c7cdrr9sas0le9vbc3r6ba" 74 | appCaKey = "203803574" 75 | signHeaders = "x-ca-key,x-ca-nonce" 76 | ) 77 | 78 | func init() { 79 | flag.StringVar(&username, "username", "junmoxi", "your csdn username") 80 | flag.StringVar(&cookie, "cookie", "UserName=junmoxi;UserToken=34543a5e65f7cae7cb3c4;", "your csdn cookie") 81 | flag.IntVar(&page, "page", -1, "download pages") 82 | flag.Parse() 83 | rand.Seed(time.Now().Unix()) 84 | } 85 | 86 | func main() { 87 | urls, err := crawlPosts(username) 88 | if err != nil { 89 | panic(err) 90 | } 91 | bar = pgbar.NewBar(0, "下载进度", len(urls)) 92 | 93 | for _, ul := range urls { 94 | wg.Add(1) 95 | go crawlPostMarkdown(ul) 96 | } 97 | wg.Wait() 98 | } 99 | 100 | // Crawl posts by username 101 | func crawlPosts(username string) ([]string, error) { 102 | defer fmt.Println("地址抓取完成,开始下载...") 103 | 104 | var urls []string 105 | for { 106 | fmt.Printf("正在抓取第%d页文章地址... \n", currentPage) 107 | resp, err := http.DefaultClient.Get(fmt.Sprintf(ListPostURL, username, currentPage)) 108 | if err != nil { 109 | return nil, err 110 | } 111 | 112 | data, err := ioutil.ReadAll(resp.Body) 113 | 114 | r := regexp.MustCompile(`

\s*= 4 { 120 | urls = append(urls, ss[3]) 121 | } 122 | } 123 | 124 | if len(finds) == 0 { 125 | return urls, nil 126 | } 127 | 128 | if page != -1 && currentPage >= page { 129 | return urls, nil 130 | } 131 | currentPage++ 132 | } 133 | } 134 | 135 | func crawlPostMarkdown(url string) { 136 | defer wg.Done() 137 | defer bar.Add() 138 | defer func() { 139 | if err := recover(); err != nil { 140 | fmt.Println(err) 141 | } 142 | }() 143 | 144 | index := strings.LastIndex(url, "/") 145 | id := url[index+1:] 146 | apiUrl := fmt.Sprintf(PostDetailURL, id) 147 | 148 | uuid := createUUID() 149 | sign := createSignature(uuid, apiUrl) 150 | 151 | req, _ := http.NewRequest("GET",apiUrl, nil) 152 | req.Header.Set("cookie", cookie) 153 | req.Header.Set("x-ca-key", appCaKey) 154 | req.Header.Set("x-ca-nonce", uuid) 155 | req.Header.Set("x-ca-signature", sign) 156 | req.Header.Set("x-ca-signature-headers", signHeaders) 157 | req.Header.Set("Accept", "*/*") 158 | 159 | resp, err := http.DefaultClient.Do(req) 160 | if err != nil { 161 | return 162 | } 163 | if resp.StatusCode != http.StatusOK { 164 | return 165 | } 166 | data, err := ioutil.ReadAll(resp.Body) 167 | if err != nil { 168 | return 169 | } 170 | var post DetailData 171 | err = json.Unmarshal(data, &post) 172 | if err != nil { 173 | return 174 | } 175 | 176 | if post.Data.Markdowncontent != "" { 177 | buildMarkdownPost(post.Data) 178 | } else if post.Data.Content != "" { 179 | buildHtmlPost(post.Data) 180 | } 181 | } 182 | 183 | func buildMarkdownPost(post PostDetail) { 184 | date := postTime.Format("2006-01-02 15:03:04") 185 | header := fmt.Sprintf(HexoHeader, post.Title, date, post.Tags, post.Categories) 186 | 187 | err := ioutil.WriteFile( 188 | fmt.Sprintf("%s.md", post.Title), 189 | []byte(fmt.Sprintf("%s\n%s", header, post.Markdowncontent)), 190 | os.ModePerm) 191 | 192 | if err != nil { 193 | return 194 | } 195 | 196 | rand.Seed(time.Now().UnixNano()) 197 | d := rand.Intn(3) + 1 198 | postTime = postTime.AddDate(0, 0, -d).Add(time.Hour) 199 | count++ 200 | } 201 | 202 | func buildHtmlPost(post PostDetail) { 203 | html := fmt.Sprintf(HtmlBody, post.Title, post.Content) 204 | err := ioutil.WriteFile( 205 | fmt.Sprintf("%s.html", post.Title), 206 | []byte(fmt.Sprintf("%s", html)), 207 | os.ModePerm) 208 | if err != nil { 209 | return 210 | } 211 | } 212 | 213 | func createSignature(uuid, apiUrl string) string { 214 | u, err := url.Parse(apiUrl) 215 | if err != nil { 216 | panic(err) 217 | } 218 | query := u.Query().Encode() 219 | query = query[:len(query)-1] 220 | message := fmt.Sprintf("GET\n*/*\n\n\n\nx-ca-key:%s\nx-ca-nonce:%s\n%s?%s", appCaKey, uuid, u.Path, query) 221 | hc := hmac.New(sha256.New, []byte(appSecret)) 222 | hc.Write([]byte(message)) 223 | res := hc.Sum(nil) 224 | 225 | result := base64.StdEncoding.EncodeToString(res) 226 | return result 227 | } 228 | 229 | func createUUID() string { 230 | s := strings.Builder{} 231 | chars := make([]string, 0, 10) 232 | for i := 97; i < 103; i++ { 233 | chars = append(chars, string(i)) 234 | } 235 | for i := 49; i < 58; i++ { 236 | chars = append(chars, string(i)) 237 | } 238 | xs := "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx" 239 | for _, k := range xs { 240 | x := string(k) 241 | if x == "4" || x == "-" { 242 | s.WriteString(x) 243 | } else { 244 | i := rand.Intn(len(chars)) 245 | s.WriteString(chars[i]) 246 | } 247 | } 248 | return s.String() 249 | } -------------------------------------------------------------------------------- /weixin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pibigstar/csdn-hexo/4547468cda79e2d678a0c9eab956333146eb5153/weixin.png --------------------------------------------------------------------------------