• Puppeteer 前端截图 爬虫


    https://zhuanlan.zhihu.com/p/76237595

    https://juejin.cn/post/6882332163052994574

    const puppeteer = require('puppeteer');
    const fs = require('fs');
    const path = require('path');

    String.prototype.hashCode = function () {
    var hash = 0;
    if (this.length == 0)
    return hash;
    for (let i = 0; i < this.length; i++) {
    var charCode = this.charCodeAt(i);
    hash = ((hash << 7) - hash) + charCode;
    hash = hash & hash;
    }
    return hash;
    };

    async function getWelfareImage (url) {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.setViewport({ 1280, height: 926 });

    console.log("start..." + url)
    let hash = url.hashCode()
    fs.mkdir("./images/" + hash, { recursive: true }, (err) => {
    if (err) {
    throw err;
    }
    })
    fs.writeFile(`images/${hash}/url`, url, (err) => {
    if (err) {
    console.log(err)
    }
    })
    page.on('response', async (response) => {
    // const matches = /upload.*\.(jpg|png|svg|gif)$/.exec(response.url());
    const matches = /upload.*\/([^\/]*)\.(jpg|png|svg)$/.exec(response.url());
    if (matches && (matches.length === 3)) {
    const name = matches[1];
    const extension = matches[2];
    const buffer = await response.buffer();
    fs.writeFileSync(`images/${hash}/image-${name}.${extension}`, buffer, 'base64');
    }
    });

    await page.goto(url
    , {
    waitUntil: 'networkidle2',
    timeout: 0
    });

    console.log("end..." + url)
    await browser.close();
    }


    async function getAllThreadList () {
    console.log("start...")
    fs.mkdir("./images", { recursive: true }, (err) => {
    if (err) {
    throw err;
    }
    })

    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.on('load', async () => {
    console.log('Page loaded!')

    const urls = await page.evaluate(() => {
    console.log('Page evaluate!')
    let refs = document.querySelectorAll('a')
    let url = []
    for (let i = 0; i < refs.length; i++) {
    u = refs[i].getAttribute("href")
    if (!u.startsWith('thread-index-fid-')) {
    continue;
    }
    url.push(u)
    }
    return url
    }
    )
    console.log(urls)
    for (let i = 0; i < urls.length; i++) {
    await getWelfareImage('http://www.btbtt17.com/' + urls[i])
    }

    await browser.close();
    console.log("end...")
    })

    await page.goto('http://www.btbtt17.com/xxxx'
    , {
    waitUntil: 'load',
    timeout: 0
    });
    }

    // getAllThreadList()

    var urls = [
    ]
    for (let index = 0; index < urls.length; index++) {
    const element = urls[index];
    getWelfareImage(element)
    }
  • 相关阅读:
    Java 中文 乱码问题
    JQuery 操作 radio 被坑一例
    标准I/O库之打开和关闭流
    标准I/O库之缓冲
    标准I/O库之标准输入、标准输出和标准出错
    标准I/O库之流和FILE对象
    文件和目录之文件访问权限位小结
    文件和目录之设备特殊文件
    文件和目录之chdir、fchdir和getcwd函数
    文件和目录之读目录
  • 原文地址:https://www.cnblogs.com/xuxm2007/p/16731081.html
Copyright © 2020-2023  润新知