• 用go把博客园博客下载到本地Hexo目录下


    找到cookie

    直接浏览器F12 巴拉巴拉

    直接上代码

    用hexo建静态博客的话,go文件在\source\_posts目录下,run之后将会在此目录下生成cnblogs文件夹,以博客id作为md文件名,然后在\source\下生成一个cnblogs目录存放博客里面的图片文件,图片文件我使用https://img.*.png来做匹配

    package main
    
    import (
    	"bufio"
    	"encoding/json"
    	"fmt"
    	"io"
    	"io/ioutil"
    	"net/http"
    	"os"
    	"strconv"
    	"strings"
    	"regexp"
    	"path"
    )
    const cookie = " xxxxxxxxxxxxxxxxxx"
    
    func main() {
        fmt.Printf("开始执行")
    	getBlogList(1)
    }
    func geturl(pageno int) string{
        return fmt.Sprintf("https://i.cnblogs.com/api/posts/list?p=%s&cid=&tid=&t=1&cfg=0&search=&orderBy=&s=&scid=",strconv.Itoa(pageno))
    }
    func getBlogList(pageindex int){
    	var urlstr = geturl(pageindex)
    
    	recordbody := getData(urlstr)
    	fmt.Printf("\r\n recordbody:%s \n", recordbody)
    
    	var conf blogList
    	err := json.Unmarshal(recordbody, &conf)
    	if err != nil {
    		fmt.Println("error:", err)
    	}
    
    	fmt.Printf("\r\n PageIndex:%s,PageSize:%s,PostsCount:%s \n", strconv.Itoa(conf.PageIndex), strconv.Itoa(conf.PageSize), strconv.Itoa(conf.PostsCount))
    	for _, childval := range conf.PostList {
                 if(childval.IsPublished){
    			childbody := getData(fmt.Sprintf("https://i.cnblogs.com/api/posts/%s", strconv.Itoa(childval.Id)))
    			fmt.Printf("childbody:%s \n", childbody)
    			var jsconf blogbodyConf
    			err := json.Unmarshal(childbody, &jsconf)
    			if err != nil {
    				fmt.Println("error:", err)
    			}
    			var tagbody = ""
    			for _, tag := range jsconf.BlogPost.Tags {
    				if(tagbody!=""){
    					tagbody = fmt.Sprintf("%s,\"%s\"",tagbody,tag)
    				}else{
    					tagbody = fmt.Sprintf("\"%s\"",tag)
    				}
    			}
    			var tagstr = fmt.Sprintf("[%s]",tagbody)
    			var articleBody = fmt.Sprintf("---\r\ntitle: %s\r\ndate: %s\r\nauthor: %s\r\ntags: %s\r\n---\r\n%s",
    			    jsconf.BlogPost.Title,
    				jsconf.BlogPost.DatePublished,
    				jsconf.BlogPost.Author,
    				tagstr,
    			    string(jsconf.BlogPost.PostBody))
    			//添加文章信息
    
    			reg, _ := regexp.Compile(`https://img.*.png`)
    			imgurls := reg.FindAllString(articleBody, -1)
    			for _, imgurl := range imgurls {
    				fileName := path.Base(imgurl)
    				downloadImage(imgurl,strconv.Itoa(jsconf.BlogPost.Id),fileName)
    				articleBody = strings.Replace(articleBody, imgurl, fmt.Sprintf("/cnblogs/%s/%s",strconv.Itoa(jsconf.BlogPost.Id),fileName), -1)
    			}
    			fmt.Printf("articleBody:%s \n", articleBody)
    
    			downloadFile(strings.NewReader(articleBody), strconv.Itoa(jsconf.BlogPost.Id), fmt.Sprintf("%s.md",  strconv.Itoa(jsconf.BlogPost.Id)))
    		}
    	}
            if(conf.PageIndex>0 && conf.PageIndex*conf.PageSize<=conf.PostsCount){
    	    getBlogList(conf.PageIndex+1)
    	}
    	fmt.Println("执行完毕")
    }
    func getData(urlstr string) []byte {
    	client := &http.Client{}
    	fmt.Printf("\r\n urlstr:%s \n", urlstr)
    	req, _ := http.NewRequest("GET", urlstr, nil)
    	req.Header.Add("cookie", cookie)
    
    	resp, _ := client.Do(req)
    	defer resp.Body.Close()
    	body, _ := ioutil.ReadAll(resp.Body)
    	return body
    }
    func downloadImage(imgurl string, rootpath string, fileName string){
    	filepath := fmt.Sprintf("../cnblogs/%s/%s", rootpath, fileName)
    	res, err := http.Get(imgurl)
    	if err != nil {
    		fmt.Println("A error occurred!")
    		return
    	}
    	defer res.Body.Close()
    	// 获得get请求响应的reader对象
    	reader := bufio.NewReaderSize(res.Body, 32 * 1024)
    
        if _, err := os.Stat(fmt.Sprintf("../cnblogs/%s", rootpath)); os.IsNotExist(err) {
    		// 必须分成两步:先创建文件夹、再修改权限
    		os.MkdirAll(fmt.Sprintf("../cnblogs/%s", rootpath), 0777) //0777也可以os.ModePerm
    		os.Chmod(fmt.Sprintf("../cnblogs/%s", rootpath), 0777)
    	}
    	file, err := os.Create(filepath)
    	if err != nil {
    		panic(err)
    	}
    	// 获得文件的writer对象
    	writer := bufio.NewWriter(file)
    
    	written, _ := io.Copy(writer, reader)
    	fmt.Printf("Total length: %d", written)
    }
    func downloadFile(body io.Reader, rootpath string, name string) {
        filepath := fmt.Sprintf("./cnblogs/%s", name)
    	// Create output file
    	if rootpath != "" {
    		if _, err := os.Stat("./cnblogs"); os.IsNotExist(err) {
    			// 必须分成两步:先创建文件夹、再修改权限
    			os.MkdirAll("./cnblogs", 0777) //0777也可以os.ModePerm
    			os.Chmod("./cnblogs", 0777)
    		}
    	}
    	out, err := os.Create(filepath)
    	if err != nil {
    		panic(err)
    	}
    	defer out.Close()
    	// copy stream
    	_, err = io.Copy(out, body)
    	if err != nil {
    		panic(err)
    	}
    }
    
    type blogList struct {
    	PageIndex int `json:"pageIndex"`
    	PageSize int `json:"pageSize"`
    	PostsCount int `json:"postsCount"`
    
    	PostList []blogbodymsg `json:"postList"`
    }
    type blogbodymsg struct {
    	Id int `json:"id"`
    
    	DatePublished string `json:"datePublished"`
    
    	DateUpdated string `json:"dateUpdated"`
    
    	Title string `json:"title"`
            IsPublished bool `json:"isPublished"`
    }
    
    type blogbodyConf struct {
    	BlogPost blogPostEntity `json:"blogPost"`
    }
    type blogPostEntity struct {
    	Id int `json:"id"`
    	AutoDesc string `json:"autoDesc"`
    	DatePublished string `json:"datePublished"`
    	PostBody string `json:"postBody"`
    	Title string `json:"title"`
    	Url string `json:"url"`
    	Author string `json:"author"`
    	Tags []string `json:"tags"` 
    }
    
    
  • 相关阅读:
    java模糊关键字查询
    Asp.Net MVC 扩展 Html.ImageFor 方法详解
    mvc下ajax请求遇到session超时简单处理方式
    8天学通MongoDB——第一天 基础入门
    MVC5中使用jQuery Post 二维数组和一维数组到Action
    构建ASP.NET MVC4+EF5+EasyUI+Unity2.x注入的后台管理系统(33)-数据验证共享
    构建ASP.NET MVC4+EF5+EasyUI+Unity2.x注入的后台管理系统(31)-MVC使用RDL报表
    构建ASP.NET MVC4+EF5+EasyUI+Unity2.x注入的后台管理系统(30)-本地化(多语言)
    JQuery文件上传插件ajaxFileUpload在Asp.net MVC中的使用
    MVC 5 + EF6 入门完整教程14 -- 动态生成面包屑导航
  • 原文地址:https://www.cnblogs.com/spatxos/p/16463506.html
Copyright © 2020-2023  润新知