Electron+Vue开发爬虫客户端2-自动下载网页文件
创建项目
尽量用图形化界面创建项目 安装插件也方便
vue ui
安装插件
vue-cli-plugin-electron-builder
插件官网地址: https://nklayman.github.io/vue-cli-plugin-electron-builder/
Choose Electron Version
选择
默认
即可
运行报错
INFO Launching Electron… Failed to fetch extension, trying 4 more times Failed to fetch extension, trying 3 more times Failed to fetch extension, trying 2 more times Failed to fetch extension, trying 1 more times Failed to fetch extension, trying 0 more times Vue Devtools failed to install: Error: net::ERR_CONNECTION_TIMED_OUT
这是因为Devtools的安装需要翻墙
注释掉
src/background.js
中的以下代码就行了
if (isDevelopment && !process.env.IS_TEST) {
// Install Vue Devtools
try {
await installVueDevtools();
} catch (e) {
console.error("Vue Devtools failed to install:", e.toString());
}
加载页面
官方文档: https://www.electronjs.org/docs/api/webview-tag
页面添加webview
<webview
ref="mwv"
class="webview"
nodeintegration
disablewebsecurity
></webview>
配置中开启webview标签
const win = new BrowserWindow({
width: 1200,
height: 600,
webPreferences: {
webviewTag: true,
webSecurity: false,
enableRemoteModule: true,
nodeIntegration: true
});
获取页面Cookie
页面
<webview
ref="mwv"
class="webview"
partition="persist:psvmc"
nodeintegration
disablewebsecurity
></webview>
JS
const { session } = window.require("electron").remote;
var ses = session.fromPartition("persist:psvmc");
ses.cookies
.get({ url: "http://www.psvmc.cn" })
.then(function(cookies) {
console.log(cookies);
});
也可以使用默认session
<webview
ref="mwv"
class="webview"
nodeintegration
disablewebsecurity
></webview>
js
const { session } = window.require("electron").remote;
var ses = session.defaultSession;
ses.cookies
.get({ url: "http://www.psvmc.cn" })
.then(function(cookies) {
console.log(cookies);
});
注意
webview和外层BrowserWindow内是可以共享session和cookie的。
Preload
加载要调用的JS
file:///Users/zhangjian/psvmc/app/me/web/91crawler2/public/mypreload.js
mypreload.js
文件放在了项目根目录的public文件夹下
if (require("electron").remote) {
window.showData = function() {
const a_arr = document.getElementsByTagName("a");
console.info(a_arr);
const href_arr = [];
for (const a of a_arr) {
const url = a.href;
if (url.indexOf("beike_id") !== -1) {
const idstr = url.split("beike_id/")[1].split("/")[0];
href_arr.push({
url: "http://www.psvmc.cn/Lesson/down/id/" + idstr,
name: a.innerText
return href_arr;
}
加载js和网页
openUrl: function() {
const mwv = this.$refs["mwv"];
mwv.src = this.weburl;
mwv.preload =
"file:///Users/zhangjian/psvmc/app/me/web/91crawler2/public/mypreload.js";
},
注意
-
E lectron-Vue项目在运行时页面是以URL加载的,那么加载
preload.js
就必须用file://
协议加载 - 目前还没有方法同时兼容开发和打包后获取preload.js的路径的方法,所有我暂时就先用dialog来选择文件路径了
- 一定要先设置preload再打开页面,当然同时设置也是可以的
调用其中的方法获取返回数据
myfun: function() {
var that = this;
const mwv = this.$refs["mwv"];
mwv.executeJavaScript("showData();").then(function(data) {
that.res_list = [];
if (data) {
that.res_list = that.res_list.concat(data);
}
弹窗选择文件
selectFile: function() {
const that = this;
dialog
.showOpenDialog({
properties: ["openFile", "openDirectory"]
.then(result => {
if (!result.canceled) {
that.outpath = result.filePaths[0];
.catch(err => {
console.log(err);
},
下载文件
下载文件有两种方式
方式1 调用浏览器下载
downloadfileByUrl: function(murl) {
session.defaultSession.downloadURL(murl);
},
监听下载进度方式
session.defaultSession.on("will-download", (event, item) => {
const filePath = path.join(
app.getPath("documents"),
item.getFilename()
console.info("即将下载:", filePath);
// item.setSavePath(filePath);
item.on("updated", (event, state) => {
if (state === "interrupted") {
console.log("Download is interrupted but can be resumed");
} else if (state === "progressing") {
if (item) {
const dp = (item.getReceivedBytes() * 100) / item.getTotalBytes();
console.log(`${item.getFilename()}: ${dp}%`);
item.once("done", (event, state) => {
if (state === "completed") {
console.log("下载完成");
} else {
console.log(`下载失败: ${state}`);
});
官方说的设置下载位置后就不会弹出选择下载位置弹窗,但是实际并不生效(补充:在主进程中有效)
item.setSavePath(filePath);
优缺点
这种方式能保证下载文件名称中文不会乱码,但是官方给出的取消默认的下载行为再手动下载的方式行不通,后来发现是在渲染层的session的will-download中不能下载行为或者是取消弹窗,但是在主进程里是可以的。 也就是说渲染进程中可以获取下载进度但是没法设置下载位置, 所以在下载地址需要重定向获取的前提下可行的方案有
- 在主线程中设置文件保存的位置,渲染进程中获取文件的下载进度。
-
主线程获取真正的下载地址后调用
event.preventDefault();
取消默认的下载,手动用NodeJS下载。
主进程中的配置
const path = require("path");
win.webContents.session.on("will-download", (event, item) => {
const filePath = path.join(app.getPath("downloads"), item.getFilename());
item.setSavePath(filePath);
item.on("updated", (event, state) => {
if (state === "interrupted") {
console.log("Download is interrupted but can be resumed");
} else if (state === "progressing") {
if (item.isPaused()) {
console.log("Download is paused");
} else {
console.log(`Received bytes: ${item.getReceivedBytes()}`);
item.once("done", (event, state) => {
if (state === "completed") {
console.log("Download successfully");
} else {
console.log(`Download failed: ${state}`);
});
获取文件下载路径后取消下载,把下载地址发送到渲染进程中
win.webContents.session.on("will-download", (event, item) => {
let fileURL = item.getURL();
let fileName = item.getFilename();
event.preventDefault();
});
那会不会是session对象不一致呢
const { remote } = window.require("electron");
let webContent = remote.getCurrentWebContents();
webContent.session.on("will-download", (event, item) => {
const filePath = path.join(
app.getPath("downloads"),
item.getFilename()
item.setSavePath(filePath);
});
在渲染进程中获取
webContent.session
进行监听,回调中设置存储位置依旧会出现选择下载位置的弹窗,所以
event.preventDefault();
和item.setSavePath(filePath);
只能在主进程中生效。
方式2 使用NodeJS下载
目前我使用的就是这种方式,推荐使用。 但是如果使用加载静态页面加载到window中的页面无法共享webview中的cookie
对于下载文件地址会重定向,所以使用了
follow-redirects
这个库。
downloadfileByUrl: function(murl) {
const fs = window.require("fs");
const iconv = require("iconv-lite");
const url = require("url");
const http = require("follow-redirects").http;
const options = url.parse(murl);
const request = http.request(options, response => {
let filename_all = response.headers["content-disposition"];
const file_length = response.headers["content-length"];
let downd_length = 0;
if (filename_all) {
let buffer = iconv.encode(filename_all, "iso-8859-1");
filename_all = iconv.decode(buffer, "utf8");
console.info(filename_all);
let filename = filename_all.split('"')[1];
const filepath = app.getPath("downloads") + "/" + filename;
console.info(filepath);
if (fs.existsSync(filepath)) {
fs.unlinkSync(filepath);
response.on("data", chunk => {
downd_length += chunk.length;
fs.writeFile(
filepath,
chunk,
{ flag: "a", encoding: "utf-8", mode: "0666" },
function(err) {
if (err) {
console.log("文件写入失败");
} else {
console.info(
"下载进度:" +
Math.ceil((downd_length * 100) / file_length) +
response.on("end", function() {
console.info(filename + "下载完成");
request.end();
},
优缺点
这种方式能够完全自己管控下载的位置及流程
文件名乱码解决方式
NodeJS获取
content-disposition
中的文件名
中文乱码
的解决方法
const iconv = require("iconv-lite");
let buffer = iconv.encode(filename_all, "iso-8859-1");
filename_all = iconv.decode(buffer, "utf8");
设置Cookie
如果Electron加载本地静态页面中请求是无法携带Cookie,就需要我们自己填上Cookie的头
getcookie: function () {
let that = this;
const ses = session.defaultSession;
ses.cookies
.get({url: "http://www.91taoke.com"})
.then(function (cookies) {
console.log(cookies);
that.mcookie = cookies;
downloadfileByUrl: function (murl) {
const fs = window.require("fs");
const iconv = require("iconv-lite");
const url = require("url");
const http = require("follow-redirects").http;
const options = url.parse(murl);
let mcookie = this.mcookie;
let cookieStr = "";
for (const mc of mcookie) {
cookieStr += mc.name + "=" + mc.value + ";"
options.headers = {
'Cookie': cookieStr,
'Accept': '/ ',
'Connection': 'keep-alive'
const request = http.request(options, response => {
let filename_all = response.headers["content-disposition"];
const file_length = response.headers["content-length"];
let downd_length = 0;
if (filename_all) {
let buffer = iconv.encode(filename_all, "iso-8859-1");
filename_all = iconv.decode(buffer, "utf8");
console.info(filename_all);
let filename = filename_all.split('"')[1];
const filepath = app.getPath("downloads") + "/" + filename;
console.info(filepath);
if (fs.existsSync(filepath)) {
fs.unlinkSync(filepath);
response.on("data", chunk => {
downd_length += chunk.length;
fs.writeFile(
filepath,
chunk,
{flag: "a", encoding: "utf-8", mode: "0666"},
function (err) {
if (err) {
console.log("文件写入失败");
} else {
console.info(
"下载进度:" +
Math.ceil((downd_length * 100) / file_length) +
response.on("end", function () {