From 6a828e923cb7650f60f69f200ef14061c9235f5c Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Tue, 3 Dec 2013 09:59:54 +0800 Subject: [PATCH] #46 Downloader thread hang up when timeout --- user-manual.md | 4 ++-- .../codecraft/webmagic/downloader/HttpClientDownloader.java | 1 + .../java/us/codecraft/webmagic/scripts/ScriptConsole.java | 1 + .../java/us/codecraft/webmagic/scripts/ScriptEnginePool.java | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/user-manual.md b/user-manual.md index acb955ee..5963df6a 100644 --- a/user-manual.md +++ b/user-manual.md @@ -27,12 +27,12 @@ webmagic使用maven管理依赖,在项目中添加对应的依赖即可使用w us.codecraft webmagic-core - 0.4.0 + 0.4.1 us.codecraft webmagic-extension - 0.4.0 + 0.4.1 #### 项目结构 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 1daaad96..da34c2de 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -105,6 +105,7 @@ public class HttpClientDownloader implements Downloader { } RequestConfig.Builder requestConfigBuilder = RequestConfig.custom() .setConnectionRequestTimeout(site.getTimeOut()) + .setSocketTimeout(site.getTimeOut()) .setConnectTimeout(site.getTimeOut()) .setCookieSpec(CookieSpecs.BEST_MATCH); if (site != null && site.getHttpProxy() != null) { diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java index 7d3b6365..0a2424bf 100644 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java @@ -93,6 +93,7 @@ public class ScriptConsole { .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build(); pageProcessor.getSite().setSleepTime(params.getSleepTime()); pageProcessor.getSite().setAcceptStatCode(Sets.newHashSet(200, 404, 500)); + pageProcessor.getSite().setUserAgent("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5"); Spider spider = Spider.create(pageProcessor).thread(params.getThread()); spider.clearPipeline().addPipeline(new Pipeline() { @Override diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java index 9dc74133..d1e5d7fe 100644 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java @@ -34,6 +34,7 @@ public class ScriptEnginePool { public void release(ScriptEngine scriptEngine){ scriptEngines.add(scriptEngine); + availableCount.incrementAndGet(); } }