From 4978665633ab3b165b4c58995fe5dd626b616c99 Mon Sep 17 00:00:00 2001 From: edwardsbean Date: Wed, 21 Jan 2015 13:26:56 +0800 Subject: [PATCH] add retry sleep time --- .../main/java/us/codecraft/webmagic/Site.java | 56 ++++++++++++------- .../java/us/codecraft/webmagic/Spider.java | 4 +- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java index dd9ea6b8..32118abe 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java @@ -39,6 +39,8 @@ public class Site { private int cycleRetryTimes = 0; + private int retrySleepTime = 1000; + private int timeOut = 5000; private static final Set DEFAULT_STATUS_CODE_SET = new HashSet(); @@ -49,8 +51,8 @@ public class Site { private HttpHost httpProxy; - private ProxyPool httpProxyPool; - + private ProxyPool httpProxyPool; + private boolean useGzip = true; /** @@ -359,6 +361,20 @@ public class Site { return useGzip; } + public int getRetrySleepTime() { + return retrySleepTime; + } + + /** + * Set retry sleep times when download fail, 1000 by default.
+ * + * @param retrySleepTime + */ + public Site setRetrySleepTime(int retrySleepTime) { + this.retrySleepTime = retrySleepTime; + return this; + } + /** * Whether use gzip.
* Default is true, you can set it to false to disable gzip. @@ -448,31 +464,31 @@ public class Site { * * @return this */ - public Site setHttpProxyPool(List httpProxyList) { - this.httpProxyPool=new ProxyPool(httpProxyList); - return this; - } + public Site setHttpProxyPool(List httpProxyList) { + this.httpProxyPool=new ProxyPool(httpProxyList); + return this; + } public Site enableHttpProxyPool() { this.httpProxyPool=new ProxyPool(); return this; } - public ProxyPool getHttpProxyPool() { - return httpProxyPool; - } + public ProxyPool getHttpProxyPool() { + return httpProxyPool; + } - public HttpHost getHttpProxyFromPool() { - return httpProxyPool.getProxy(); - } + public HttpHost getHttpProxyFromPool() { + return httpProxyPool.getProxy(); + } - public void returnHttpProxyToPool(HttpHost proxy,int statusCode) { - httpProxyPool.returnProxy(proxy,statusCode); - } - - public Site setProxyReuseInterval(int reuseInterval) { - this.httpProxyPool.setReuseInterval(reuseInterval); - return this; - } + public void returnHttpProxyToPool(HttpHost proxy,int statusCode) { + httpProxyPool.returnProxy(proxy,statusCode); + } + + public Site setProxyReuseInterval(int reuseInterval) { + this.httpProxyPool.setReuseInterval(reuseInterval); + return this; + } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index dac1fdd1..7e499bed 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -407,14 +407,14 @@ public class Spider implements Runnable, Task { protected void processRequest(Request request) { Page page = downloader.download(request, this); if (page == null) { - sleep(site.getSleepTime()); + sleep(site.getRetrySleepTime()); onError(request); return; } // for cycle retry if (page.isNeedCycleRetry()) { extractAndAddRequests(page, true); - sleep(site.getSleepTime()); + sleep(site.getRetrySleepTime()); return; } pageProcessor.process(page);