From ebc61363c8c5afc66a43f468520f93dac54a2637 Mon Sep 17 00:00:00 2001 From: Jsbd Date: Fri, 2 Dec 2016 10:17:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=BAPhantomJSDownloader=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=96=B0=E7=9A=84=E6=9E=84=E9=80=A0=E5=87=BD=E6=95=B0=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81phantomjs=E8=87=AA=E5=AE=9A=E4=B9=89=E5=91=BD?= =?UTF-8?q?=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 为PhantomJSDownloader添加新的构造函数,支持phantomjs自定义命令 example: * phantomjs.exe 支持windows环境 * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误 * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException --- .../downloader/PhantomJSDownloader.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java index 532a151d..22927884 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java @@ -20,13 +20,32 @@ import java.io.*; public class PhantomJSDownloader extends AbstractDownloader { private static Logger logger = LoggerFactory.getLogger(PhantomJSDownloader.class); - private static String phantomJSPath; + private static String crawlJsPath; + private static String phantomJsCommand = "phantomjs"; // default private int retryNum; private int threadNum; public PhantomJSDownloader() { - PhantomJSDownloader.phantomJSPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js "; + this.initPhantomjsCrawlPath(); + } + /** + * 添加新的构造函数,支持phantomjs自定义命令 + * + * example: + * phantomjs.exe 支持windows环境 + * phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误 + * /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException + * + * @param phantomJsCommand + */ + public PhantomJSDownloader(String phantomJsCommand) { + this.initPhantomjsCrawlPath(); + PhantomJSDownloader.phantomJsCommand = phantomJsCommand; + } + + private void initPhantomjsCrawlPath() { + PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js "; } @Override @@ -67,7 +86,7 @@ public class PhantomJSDownloader extends AbstractDownloader { try { String url = request.getUrl(); Runtime runtime = Runtime.getRuntime(); - Process process = runtime.exec("phantomjs " + phantomJSPath + url); + Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url); InputStream is = process.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); StringBuffer stringBuffer = new StringBuffer();