node抓取百度头像

没有头像当然首先想到的是百度咯，如何批量抓取呢，那么这里用node来做一个栗子

首先安装依赖文件

"dependencies": {
    "colors": "^1.1.2",
    "node-fetch": "^1.7.3"
}

分析百度头像接口，大概酱紫

https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord=${encodeURIComponent('网络头像')}&cl=2&lm=-1&ie=utf-8&oe=utf-8&word=${encodeURIComponent('非主流')}&pn=${pn.page}&width=180&height=180`

代码撸起

var https = require("https");
var fs = require("fs");
var fetch = require('node-fetch');
var colors = require('colors'); 
var pn={page:1,count:1,max:900} //控制分页+显示名称+长度

将文件地址写入到.txt

function asyncAppendFile(url){
    return new Promise(function (resolve, reject){
        fs.appendFile('./img.txt', url+'\r\n',function(err){
              if(err){
                  console.log(err)
                  reject(err);
              }
                  resolve()
            });
    });
}

将图片获取并保存到文件夹

function asyncWriteFile(imgUrl){
    return new Promise(function (resolve, reject){
        fs.writeFile(`./downImg/head${pn.count}.jpg`, imgUrl, "binary",                 function(err){
                if(err){
                    console.log("down fail");
                    reject(err);
                  }else{
                      console.log(`文件：head${pn.count}.jpg 下载成功！`.green)
                    resolve()
                  }
            });
        });
}

循环图片数组并执行存储操作

function syncArrMap(imgArr, selfObj){
    if(imgArr[selfObj.now].thumbURL){
        https.get(imgArr[selfObj.now].thumbURL, function(res){
            var imgUrl = "";
            res.setEncoding("binary");
            res.on("data", function(url){
                imgUrl += url;
            });
            res.on("end", function(){
                if(pn.count>pn.max) return;
                asyncAppendFile(imgArr[selfObj.now].thumbURL)
                    .then((data) => asyncWriteFile(imgUrl))
                    .then((data) => {
                        pn.count ++
                        selfObj.now += 1
                        if(selfObj.now>=selfObj.length-1){
                            pn.page++
                            getImageData()
                        }else{
                            syncArrMap(imgArr, selfObj)
                        }
                })

            });
        });
    }else{
        selfObj.length -= 1
    }
}

最后获取图片

getImageData()
function getImageData(){
    fetch(`https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord=${encodeURIComponent('网络头像')}&cl=2&lm=-1&ie=utf-8&oe=utf-8&word=${encodeURIComponent('非主流')}&pn=${pn.page}&width=180&height=180`)
        .then(res=>res.json())
        .then(data=>{
            var imgArr = data.data
            var selfObj = {length:imgArr.length,now:0}
              syncArrMap(imgArr,selfObj)
        })
}

2018-07-19