diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 9e77ef5f..fa907a1d 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -82,14 +82,14 @@ public class HttpClientDownloader extends AbstractDownloader { } logger.info("downloading page {}", request.getUrl()); CloseableHttpResponse httpResponse = null; - int statusCode=0; + int statusCode = 0; try { HttpHost proxyHost = null; Proxy proxy = null; //TODO - if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { + if (site != null && site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { proxy = site.getHttpProxyFromPool(); proxyHost = proxy.getHttpHost(); - } else if(site.getHttpProxy()!= null){ + } else if (site != null && site.getHttpProxy() != null){ proxyHost = site.getHttpProxy(); } @@ -107,25 +107,21 @@ public class HttpClientDownloader extends AbstractDownloader { } } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); - if (site.getCycleRetryTimes() > 0) { + if (site != null && site.getCycleRetryTimes() > 0) { return addToCycleRetry(request, site); } onError(request); return null; } finally { + if (httpResponse != null) { + //ensure the connection is released back to pool + EntityUtils.consumeQuietly(httpResponse.getEntity()); + } request.putExtra(Request.STATUS_CODE, statusCode); - if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) { + if (site != null && site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request .getExtra(Request.STATUS_CODE)); } - try { - if (httpResponse != null) { - //ensure the connection is released back to pool - EntityUtils.consume(httpResponse.getEntity()); - } - } catch (IOException e) { - logger.warn("close response fail", e); - } } } @@ -138,19 +134,23 @@ public class HttpClientDownloader extends AbstractDownloader { return acceptStatCode.contains(statusCode); } - protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map headers,HttpHost proxy) { + protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map headers, HttpHost proxy) { RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); if (headers != null) { for (Map.Entry headerEntry : headers.entrySet()) { requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue()); } } - RequestConfig.Builder requestConfigBuilder = RequestConfig.custom() - .setConnectionRequestTimeout(site.getTimeOut()) - .setSocketTimeout(site.getTimeOut()) - .setConnectTimeout(site.getTimeOut()) - .setCookieSpec(CookieSpecs.BEST_MATCH); - if (proxy !=null) { + + RequestConfig.Builder requestConfigBuilder = RequestConfig.custom(); + if (site != null) { + requestConfigBuilder.setConnectionRequestTimeout(site.getTimeOut()) + .setSocketTimeout(site.getTimeOut()) + .setConnectTimeout(site.getTimeOut()) + .setCookieSpec(CookieSpecs.BEST_MATCH); + } + + if (proxy != null) { requestConfigBuilder.setProxy(proxy); request.putExtra(Request.PROXY, proxy); } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java index 1a0b2bdb..aec53091 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java @@ -100,7 +100,7 @@ public class HttpClientGenerator { CredentialsProvider credsProvider = null; HttpClientBuilder httpClientBuilder = HttpClients.custom(); - if(proxy!=null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword())) + if (proxy != null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword())) { credsProvider= new BasicCredentialsProvider(); credsProvider.setCredentials( @@ -109,7 +109,7 @@ public class HttpClientGenerator { httpClientBuilder.setDefaultCredentialsProvider(credsProvider); } - if(site!=null&&site.getHttpProxy()!=null&&site.getUsernamePasswordCredentials()!=null){ + if (site != null && site.getHttpProxy()!= null && site.getUsernamePasswordCredentials() != null){ credsProvider = new BasicCredentialsProvider(); credsProvider.setCredentials( new AuthScope(site.getHttpProxy()),//可以访问的范围 @@ -137,14 +137,19 @@ public class HttpClientGenerator { } //解决post/redirect/post 302跳转问题 httpClientBuilder.setRedirectStrategy(new CustomRedirectStrategy()); - - SocketConfig socketConfig = SocketConfig.custom().setSoTimeout(site.getTimeOut()).setSoKeepAlive(true).setTcpNoDelay(true).build(); + + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + socketConfigBuilder.setSoKeepAlive(true).setTcpNoDelay(true); + if (site != null) { + socketConfigBuilder.setSoTimeout(site.getTimeOut()); + } + SocketConfig socketConfig = socketConfigBuilder.build(); httpClientBuilder.setDefaultSocketConfig(socketConfig); connectionManager.setDefaultSocketConfig(socketConfig); if (site != null) { httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true)); + generateCookie(httpClientBuilder, site); } - generateCookie(httpClientBuilder, site); return httpClientBuilder.build(); } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java index 0e442a87..5440b338 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java @@ -145,4 +145,20 @@ public class HttpClientDownloaderTest { } }); } + + @Test + public void test_download_when_task_is_null() throws Exception { + HttpServer server = httpserver(12306); + server.response("foo"); + Runner.running(server, new Runnable() { + @Override + public void run() throws Exception { + final HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); + Request request = new Request(); + request.setUrl("http://127.0.0.1:12306/"); + Page page = httpClientDownloader.download(request, null); + assertThat(page.getRawText()).isEqualTo("foo"); + } + }); + } }