Revert " Downloader 提供刷新组件的api,方便在spider中操作"

This reverts commit 2e2a0fdf3e.
pull/993/head
Sutra Zhou 4 years ago
parent 4bedd97267
commit c489647c4b

@ -40,12 +40,8 @@ public class Site {
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
private static final Set<Integer> DEFAULT_REFRESH_CODE_SET = new HashSet<>();
private Set<Integer> refreshCode = DEFAULT_REFRESH_CODE_SET;
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
private Map<String, String> headers = new HashMap<String, String>();
private boolean useGzip = true;
@ -53,7 +49,6 @@ public class Site {
private boolean disableCookieManagement = false;
static {
DEFAULT_REFRESH_CODE_SET.add(HttpConstant.StatusCode.FORBIDDEN);
DEFAULT_STATUS_CODE_SET.add(HttpConstant.StatusCode.CODE_200);
}
@ -202,15 +197,6 @@ public class Site {
return this;
}
public Site setRefreshCode(Set<Integer> refreshCode){
this.refreshCode = refreshCode;
return this;
}
public Set<Integer> getRefreshCode(){
return refreshCode;
}
/**
* get acceptStatCode
*

@ -424,10 +424,7 @@ public class Spider implements Runnable, Task {
pipeline.process(page.getResultItems(), this);
}
}
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
downloader.refreshComponent(this);
}else {
} else {
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
}
sleep(site.getSleepTime());

@ -18,18 +18,14 @@ public interface Downloader {
* Downloads web pages and store in Page object.
*
* @param request request
* @param task task
* @param task task
* @return page
*/
Page download(Request request, Task task);
public Page download(Request request, Task task);
/**
* Tell the downloader how many threads the spider used.
*
* @param threadNum number of threads
*/
void setThread(int threadNum);
void refreshComponent(Task task);
public void setThread(int threadNum);
}

@ -111,17 +111,6 @@ public class HttpClientDownloader extends AbstractDownloader {
}
}
@Override
public void refreshComponent(Task task) {
if (proxyProvider != null ) {
proxyProvider.refreshProxy(task);
}
httpClients.remove(task.getSite().getDomain());
}
@Override
public void setThread(int thread) {
httpClientGenerator.setPoolSize(thread);

@ -1,17 +1,13 @@
package us.codecraft.webmagic.downloader;
import java.io.File;
import java.io.IOException;
import java.security.KeyManagementException;
import java.security.KeyStore;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLContextSpi;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
@ -28,7 +24,6 @@ import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.DefaultHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
@ -37,7 +32,6 @@ import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.HttpContext;
import org.apache.http.ssl.SSLContexts;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -75,7 +69,7 @@ public class HttpClientGenerator {
return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
null,
new DefaultHostnameVerifier()); // 优先绕过安全证书
} catch (KeyManagementException | CertificateException | KeyStoreException | IOException e) {
} catch (KeyManagementException e) {
logger.error("ssl connection fail", e);
} catch (NoSuchAlgorithmException e) {
logger.error("ssl connection fail", e);
@ -83,8 +77,8 @@ public class HttpClientGenerator {
return SSLConnectionSocketFactory.getSocketFactory();
}
private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException, CertificateException, KeyStoreException, IOException {
// 实现一个X509TrustManager接口用于绕过验证不用修改里面的方法
private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
// 实现一个X509TrustManager接口用于绕过验证不用修改里面的方法
X509TrustManager trustManager = new X509TrustManager() {
@Override
@ -102,10 +96,10 @@ public class HttpClientGenerator {
};
SSLContext sc = SSLContext.getInstance("SSLv3");
SSLContext sc = SSLContext.getInstance("TLS");
sc.init(null, new TrustManager[] { trustManager }, null);
return sc;
}
}
public HttpClientGenerator setPoolSize(int poolSize) {
connectionManager.setMaxTotal(poolSize);

@ -28,7 +28,6 @@ public abstract class HttpConstant {
public static abstract class StatusCode {
public static final int CODE_200 = 200;
public static final int FORBIDDEN = 403;
}

@ -57,11 +57,6 @@ public class SpiderTest {
return Site.me().setSleepTime(0);
}
}).setDownloader(new Downloader() {
@Override
public void refreshComponent(Task task) {
}
@Override
public Page download(Request request, Task task) {
return new Page().setRawText("");

@ -28,11 +28,6 @@ public class MockGithubDownloader implements Downloader {
return page;
}
@Override
public void refreshComponent(Task task) {
}
@Override
public void setThread(int threadNum) {
}

@ -42,12 +42,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
this.initPhantomjsCrawlPath();
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
}
@Override
public void refreshComponent(Task task) {
}
/**
* crawl.jsjarruntime.exec()phantomjs使jarcrawl.js
* <pre>

@ -9,10 +9,6 @@ import us.codecraft.webmagic.selector.PlainText;
* @author code4crafter@gmail.com
*/
public class MockGithubDownloader implements Downloader{
@Override
public void refreshComponent(Task task) {
}
private String html = "\n" +
"\n" +

@ -59,11 +59,6 @@ public class SeleniumDownloader implements Downloader, Closeable {
// "/Users/Bingo/Downloads/phantomjs-1.9.7-macosx/bin/phantomjs");
}
@Override
public void refreshComponent(Task task) {
}
/**
* set sleep time to wait until load success
*

Loading…
Cancel
Save