概要:
这篇博文主要讲一下如何使用Phantomjs进行数据抓取,这里面抓的网站是太平洋电脑网估价的内容。主要是对电脑笔记本以及他们的属性进行抓取,然后在使用nodejs进行下载图片和插入数据库操作。
先进行所有页面的内容进行抓取
var page =require('webpage').create();
var address='http://product.pconline.com.cn/server/';
var fs = require('fs');
var mypath = 'version/server/server.txt';
var count = 2;
var pageSize=0;
phantom.outputEncoding="gbk";
page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
function loadController(status){
loadComputerList(address);
}
function loadComputerList(url){
console.log('loading '+url);
page.onLoadFinished = function loadListsucc(status){
console.log("loadlistSucc ["+url+"] =======================Status:"+status);
};
page.open(url,function(status){
setTimeout(function(){
console.log(status);
var content='';
content = page.evaluate(function(){
var cont='';
var listComputer = document.querySelectorAll('div.item-title>h3>a');
var listPrice =document.querySelectorAll('div.price');
for(var j=0;jvar computer = listComputer[j].innerText;
&n
