Node.js 淘票票院线电影爬虫
Node.js 淘票票院线电影爬虫
环境
node.js v12.16.3
axios@0.20.0
cheerio@1.0.0-rc.3
npm安装
npm install axios cheerio
实现代码
const cherrio = require("cheerio");
const axios = require("axios");
const fs = require("fs");axios.get(`.htm?spm=a1z21.3046609.w2.3.32c0112aZmGlnh&n_s=new`).then((response) => {let $ = cherrio.load(response.data);var movieList = []$("div.movie-card-wrap").each((index, element) => {var movie = {};var title = $(element).find(".bt-l").text();movie["title"] = title;var information = $(element).find(".movie-card-list").text();information = information.replace(/\r\n/g, "").replace(/\n/g, "").replace(/\t/g, "").replace(/\ +/g, ";");movie["information"] = information;var reg = /img width="160" height="224".*? src="(.*?)">/;var result = reg.exec(String($(element).html()));if (result != null) {imgUrl = result[1];} else {imgUrl = "null";}movie["image_url"] = imgUrl; // console.log(imgUrl);reg = /<a href="(.*?)" class="movie-card-soon">/;result = reg.exec(String($(element).html()));if (result != null) {detailUrl = result[1];} else {currentMovieReg = /<a href="(.*?)" class="movie-card">/;result = currentMovieReg.exec(String($(element).html()));if (result != null) {detailUrl = result[1];} else {detailUrl = "null";}}movie["detail_url"] = detailUrl;movieList.push(movie);});console.log(movieList);fs.writeFile("MaoyanMoive.json", JSON.stringify(movieList, null, "\t"), (err) => {if (err == null) {console.log("Successfully!");} else {console.log(err);}});});
测试结果
输出JSON
格式文件
最后
- 由于博主水平有限,不免有疏漏之处,欢迎读者随时批评指正,以免造成不必要的误解!
发布评论