diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index d342069f..87eab14c 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -2,8 +2,7 @@ package us.codecraft.webmagic;
import org.apache.http.HttpHost;
import org.apache.http.auth.UsernamePasswordCredentials;
-import us.codecraft.webmagic.proxy.ProxyPool;
-import us.codecraft.webmagic.proxy.TimerReuseProxyPool;
+import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*;
@@ -52,7 +51,7 @@ public class Site {
private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置
- private ProxyPool httpProxyPool;
+ private ProxyProvider httpProxyPool;
private boolean useGzip = true;
@@ -399,7 +398,11 @@ public class Site {
return new Task() {
@Override
public String getUUID() {
- return Site.this.getDomain();
+ String uuid = Site.this.getDomain();
+ if (uuid == null) {
+ uuid = UUID.randomUUID().toString();
+ }
+ return uuid;
}
@Override
@@ -467,45 +470,4 @@ public class Site {
'}';
}
- /**
- * Set httpProxyPool, String[0]:ip, String[1]:port
- *
- * @param proxyPool proxyPool
- * @return this
- */
- public Site setHttpProxyPool(ProxyPool proxyPool) {
- this.httpProxyPool = proxyPool;
- return this;
- }
-
- /**
- * Set httpProxyPool, String[0]:ip, String[1]:port
- *
- * @param httpProxyList httpProxyList
- * @param isUseLastProxy isUseLastProxy
- * @return this
- */
- public Site setHttpProxyPool(List httpProxyList, boolean isUseLastProxy) {
- this.httpProxyPool=new TimerReuseProxyPool(httpProxyList, isUseLastProxy);
- return this;
- }
-
- public Site enableHttpProxyPool() {
- this.httpProxyPool=new TimerReuseProxyPool();
- return this;
- }
-
- public UsernamePasswordCredentials getUsernamePasswordCredentials() {
- return usernamePasswordCredentials;
- }
-
- public Site setUsernamePasswordCredentials(UsernamePasswordCredentials usernamePasswordCredentials) {
- this.usernamePasswordCredentials = usernamePasswordCredentials;
- return this;
- }
-
- public ProxyPool getHttpProxyPool() {
- return httpProxyPool;
- }
-
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index 93a8a7ce..3a44af65 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -20,6 +20,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
+import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils;
@@ -45,11 +46,17 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();
private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
+
+ private ProxyProvider proxyProvider;
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
this.httpUriRequestConverter = httpUriRequestConverter;
}
+ public void setProxyProvider(ProxyProvider proxyProvider) {
+ this.proxyProvider = proxyProvider;
+ }
+
private CloseableHttpClient getHttpClient(Site site) {
if (site == null) {
return httpClientGenerator.getClient(null);
@@ -79,8 +86,8 @@ public class HttpClientDownloader extends AbstractDownloader {
Site site = task.getSite();
Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext();
- if (site.getHttpProxyPool() != null) {
- proxy = site.getHttpProxyPool().getProxy(task);
+ if (proxyProvider != null) {
+ proxy = proxyProvider.getProxy(task);
request.putExtra(Request.PROXY, proxy);
AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
@@ -111,9 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader {
//ensure the connection is released back to pool
EntityUtils.consumeQuietly(httpResponse.getEntity());
}
- if (proxy != null) {
- site.getHttpProxyPool().returnProxy(proxy, statusCode, task);
- }
}
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
index 0ec4b0e9..951d3323 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
@@ -43,7 +43,7 @@ public class HttpUriRequestConverter {
}
if (proxy != null) {
- requestConfigBuilder.setProxy(new HttpHost(proxy.getProxyHost().getHost(), proxy.getProxyHost().getPort()));
+ requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
}
requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
index 1d872d43..a38ccaa7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
@@ -6,42 +6,36 @@ package us.codecraft.webmagic.proxy;
public class Proxy {
- private ProxyHost proxyHost;
+ private String host;
+ private int port;
private String username;
private String password;
- public Proxy(ProxyHost proxyHost, String username, String password) {
- this.proxyHost = proxyHost;
- this.username = username;
- this.password = password;
+ public Proxy(String host, int port) {
+ this.host = host;
+ this.port = port;
}
- public Proxy(ProxyHost proxyHost) {
- this.proxyHost = proxyHost;
+ public Proxy(String host, int port, String username, String password) {
+ this.host = host;
+ this.port = port;
+ this.username = username;
+ this.password = password;
}
- public ProxyHost getProxyHost() {
- return proxyHost;
+ public String getHost() {
+ return host;
}
- public void setProxyHost(ProxyHost proxyHost) {
- this.proxyHost = proxyHost;
+ public int getPort() {
+ return port;
}
public String getUsername() {
return username;
}
- public void setUsername(String username) {
- this.username = username;
- }
-
public String getPassword() {
return password;
}
-
- public void setPassword(String password) {
- this.password = password;
- }
-
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyHost.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyHost.java
deleted file mode 100644
index 11e8c87b..00000000
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyHost.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package us.codecraft.webmagic.proxy;
-
-/**
- * @author code4crafter@gmail.com
- * Date: 17/3/18
- * Time: 下午12:04
- */
-public class ProxyHost {
-
- private String host;
-
- private int port;
-
- public String getHost() {
- return host;
- }
-
- public ProxyHost(String host, int port) {
- this.host = host;
- this.port = port;
- }
-
- public void setHost(String host) {
- this.host = host;
- }
-
- public int getPort() {
- return port;
- }
-
- public void setPort(int port) {
- this.port = port;
- }
-}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
similarity index 87%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
rename to webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
index fcc1f8df..4266d78c 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
@@ -5,7 +5,7 @@ import us.codecraft.webmagic.Task;
/**
* Created by edwardsbean on 15-2-28.
*/
-public interface ProxyPool {
+public interface ProxyProvider {
void returnProxy(Proxy proxy, boolean banned, Task task);
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxy.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxy.java
index 8f592527..7002df47 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxy.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxy.java
@@ -72,14 +72,10 @@ public class TimerReuseProxy extends Proxy implements Delayed, Serializable {
private List failedErrorType = new ArrayList();
- public TimerReuseProxy(ProxyHost proxyHost, String user, String password) {
- super(proxyHost, user, password);
+ public TimerReuseProxy(String host, int port, String username, String password) {
+ super(host, port, username, password);
}
- public TimerReuseProxy(ProxyHost proxyHost, String user, String password, int reuseTimeInterval) {
- super(proxyHost, user, password);
- this.reuseTimeInterval = reuseTimeInterval;
- }
public int getSuccessNum() {
return successNum;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxyPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxyPool.java
index 6fde6047..6dbac5d5 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxyPool.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/TimerReuseProxyPool.java
@@ -1,17 +1,6 @@
package us.codecraft.webmagic.proxy;
-import org.apache.http.HttpHost;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.utils.FilePersistentBase;
-import us.codecraft.webmagic.utils.ProxyUtils;
-
-import java.io.*;
-import java.util.*;
-import java.util.Map.Entry;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.DelayQueue;
+import us.codecraft.webmagic.Task;
/**
* Pooled Proxy Object
@@ -20,187 +9,196 @@ import java.util.concurrent.DelayQueue;
* @see Proxy
* @since 0.5.1
*/
-public class TimerReuseProxyPool implements ProxyPool {
-
- private Logger logger = LoggerFactory.getLogger(getClass());
-
- private BlockingQueue proxyQueue = new DelayQueue();
- private Map allProxy = new ConcurrentHashMap();
-
- private int reuseInterval = 1500;// ms
- private int reviveTime = 2 * 60 * 60 * 1000;// ms
- private int saveProxyInterval = 10 * 60 * 1000;// ms
-
- private boolean isEnable = false;
- private boolean validateWhenInit = false;
- // private boolean isUseLastProxy = true;
-
- public TimerReuseProxyPool(List httpProxyList) {
- this(httpProxyList, true);
- }
-
- private void addProxy(Map httpProxyMap) {
- isEnable = true;
- for (Entry entry : httpProxyMap.entrySet()) {
- try {
- if (allProxy.containsKey(entry.getKey())) {
- continue;
- }
- if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) {
- entry.getValue().setFailedNum(0);
- entry.getValue().setReuseTimeInterval(reuseInterval);
- proxyQueue.add(entry.getValue());
- allProxy.put(entry.getKey(), entry.getValue());
- }
- } catch (NumberFormatException e) {
- logger.error("HttpHost init error:", e);
- }
- }
- logger.info("proxy pool size>>>>" + allProxy.size());
- }
-
- public void addProxy(Proxy... httpProxyList) {
- isEnable = true;
- for (Proxy proxy : httpProxyList) {
- if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
- TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
- proxyQueue.add(p);
- allProxy.put(p.getProxyHost().getHost(), p);
- }
- }
- logger.info("proxy pool size>>>>" + allProxy.size());
- }
-
- public TimerReuseProxy getProxy() {
- TimerReuseProxy proxy = null;
- try {
- Long time = System.currentTimeMillis();
- proxy = proxyQueue.take();
- double costTime = (System.currentTimeMillis() - time) / 1000.0;
- if (costTime > reuseInterval) {
- logger.info("get proxy time >>>> " + costTime);
- }
- TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost());
- p.setLastBorrowTime(System.currentTimeMillis());
- p.borrowNumIncrement(1);
- } catch (InterruptedException e) {
- logger.error("get proxy error", e);
- }
- if (proxy == null) {
- throw new NoSuchElementException();
- }
- return proxy;
- }
-
- public void returnProxy(Proxy proxy, int statusCode) {
- TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
- if (p == null) {
- return;
- }
- switch (statusCode) {
- case TimerReuseProxy.SUCCESS:
- p.setReuseTimeInterval(reuseInterval);
- p.setFailedNum(0);
- p.setFailedErrorType(new ArrayList());
- p.recordResponse();
- p.successNumIncrement(1);
- break;
- case TimerReuseProxy.ERROR_403:
- // banned,try longer interval
- p.fail(TimerReuseProxy.ERROR_403);
- p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
- logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
- break;
- case TimerReuseProxy.ERROR_BANNED:
- p.fail(TimerReuseProxy.ERROR_BANNED);
- p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
- logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
- break;
- case TimerReuseProxy.ERROR_404:
- // p.fail(Proxy.ERROR_404);
- // p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
- break;
- default:
- p.fail(statusCode);
- break;
- }
- if (p.getFailedNum() > 20) {
- p.setReuseTimeInterval(reviveTime);
- logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
- return;
- }
- if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
- if (!ProxyUtils.validateProxy(proxy)) {
- p.setReuseTimeInterval(reviveTime);
- logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
- return;
- }
- }
- try {
- proxyQueue.put(p);
- } catch (InterruptedException e) {
- logger.warn("proxyQueue return proxy error", e);
- }
- }
-
- public String allProxyStatus() {
- String re = "all proxy info >>>> \n";
- for (Entry entry : allProxy.entrySet()) {
- re += entry.getValue().toString() + "\n";
- }
- return re;
- }
-
- public int getIdleNum() {
- return proxyQueue.size();
- }
-
- public int getReuseInterval() {
- return reuseInterval;
- }
-
- public void setReuseInterval(int reuseInterval) {
- this.reuseInterval = reuseInterval;
- }
-
- public void enable(boolean isEnable) {
- this.isEnable = isEnable;
- }
-
- public boolean isEnable() {
- return isEnable;
- }
-
- public int getReviveTime() {
- return reviveTime;
- }
-
- public void setReviveTime(int reviveTime) {
- this.reviveTime = reviveTime;
- }
-
- public boolean isValidateWhenInit() {
- return validateWhenInit;
- }
-
- public void validateWhenInit(boolean validateWhenInit) {
- this.validateWhenInit = validateWhenInit;
- }
-
- public int getSaveProxyInterval() {
- return saveProxyInterval;
- }
-
- public void setSaveProxyInterval(int saveProxyInterval) {
- this.saveProxyInterval = saveProxyInterval;
- }
-
- public String getProxyFilePath() {
- return proxyFilePath;
- }
-
- public void setProxyFilePath(String proxyFilePath) {
- this.proxyFilePath = proxyFilePath;
- }
+public class TimerReuseProxyPool implements ProxyProvider {
+ @Override
+ public void returnProxy(Proxy proxy, boolean banned, Task task) {
+
+ }
+
+ @Override
+ public Proxy getProxy(Task task) {
+ return null;
+ }
+
+// private Logger logger = LoggerFactory.getLogger(getClass());
+//
+// private BlockingQueue proxyQueue = new DelayQueue();
+// private Map allProxy = new ConcurrentHashMap();
+//
+// private int reuseInterval = 1500;// ms
+// private int reviveTime = 2 * 60 * 60 * 1000;// ms
+// private int saveProxyInterval = 10 * 60 * 1000;// ms
+//
+// private boolean isEnable = false;
+// private boolean validateWhenInit = false;
+// // private boolean isUseLastProxy = true;
+//
+// public TimerReuseProxyPool(List httpProxyList) {
+// this(httpProxyList, true);
+// }
+//
+// private void addProxy(Map httpProxyMap) {
+// isEnable = true;
+// for (Entry entry : httpProxyMap.entrySet()) {
+// try {
+// if (allProxy.containsKey(entry.getKey())) {
+// continue;
+// }
+// if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) {
+// entry.getValue().setFailedNum(0);
+// entry.getValue().setReuseTimeInterval(reuseInterval);
+// proxyQueue.add(entry.getValue());
+// allProxy.put(entry.getKey(), entry.getValue());
+// }
+// } catch (NumberFormatException e) {
+// logger.error("HttpHost init error:", e);
+// }
+// }
+// logger.info("proxy pool size>>>>" + allProxy.size());
+// }
+//
+// public void addProxy(Proxy... httpProxyList) {
+// isEnable = true;
+// for (Proxy proxy : httpProxyList) {
+// if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
+// TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
+// proxyQueue.add(p);
+// allProxy.put(p.getProxyHost().getHost(), p);
+// }
+// }
+// logger.info("proxy pool size>>>>" + allProxy.size());
+// }
+//
+// public TimerReuseProxy getProxy() {
+// TimerReuseProxy proxy = null;
+// try {
+// Long time = System.currentTimeMillis();
+// proxy = proxyQueue.take();
+// double costTime = (System.currentTimeMillis() - time) / 1000.0;
+// if (costTime > reuseInterval) {
+// logger.info("get proxy time >>>> " + costTime);
+// }
+// TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost());
+// p.setLastBorrowTime(System.currentTimeMillis());
+// p.borrowNumIncrement(1);
+// } catch (InterruptedException e) {
+// logger.error("get proxy error", e);
+// }
+// if (proxy == null) {
+// throw new NoSuchElementException();
+// }
+// return proxy;
+// }
+//
+// public void returnProxy(Proxy proxy, int statusCode) {
+// TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
+// if (p == null) {
+// return;
+// }
+// switch (statusCode) {
+// case TimerReuseProxy.SUCCESS:
+// p.setReuseTimeInterval(reuseInterval);
+// p.setFailedNum(0);
+// p.setFailedErrorType(new ArrayList());
+// p.recordResponse();
+// p.successNumIncrement(1);
+// break;
+// case TimerReuseProxy.ERROR_403:
+// // banned,try longer interval
+// p.fail(TimerReuseProxy.ERROR_403);
+// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
+// logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
+// break;
+// case TimerReuseProxy.ERROR_BANNED:
+// p.fail(TimerReuseProxy.ERROR_BANNED);
+// p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
+// logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
+// break;
+// case TimerReuseProxy.ERROR_404:
+// // p.fail(Proxy.ERROR_404);
+// // p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
+// break;
+// default:
+// p.fail(statusCode);
+// break;
+// }
+// if (p.getFailedNum() > 20) {
+// p.setReuseTimeInterval(reviveTime);
+// logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
+// return;
+// }
+// if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
+// if (!ProxyUtils.validateProxy(proxy)) {
+// p.setReuseTimeInterval(reviveTime);
+// logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
+// return;
+// }
+// }
+// try {
+// proxyQueue.put(p);
+// } catch (InterruptedException e) {
+// logger.warn("proxyQueue return proxy error", e);
+// }
+// }
+//
+// public String allProxyStatus() {
+// String re = "all proxy info >>>> \n";
+// for (Entry entry : allProxy.entrySet()) {
+// re += entry.getValue().toString() + "\n";
+// }
+// return re;
+// }
+//
+// public int getIdleNum() {
+// return proxyQueue.size();
+// }
+//
+// public int getReuseInterval() {
+// return reuseInterval;
+// }
+//
+// public void setReuseInterval(int reuseInterval) {
+// this.reuseInterval = reuseInterval;
+// }
+//
+// public void enable(boolean isEnable) {
+// this.isEnable = isEnable;
+// }
+//
+// public boolean isEnable() {
+// return isEnable;
+// }
+//
+// public int getReviveTime() {
+// return reviveTime;
+// }
+//
+// public void setReviveTime(int reviveTime) {
+// this.reviveTime = reviveTime;
+// }
+//
+// public boolean isValidateWhenInit() {
+// return validateWhenInit;
+// }
+//
+// public void validateWhenInit(boolean validateWhenInit) {
+// this.validateWhenInit = validateWhenInit;
+// }
+//
+// public int getSaveProxyInterval() {
+// return saveProxyInterval;
+// }
+//
+// public void setSaveProxyInterval(int saveProxyInterval) {
+// this.saveProxyInterval = saveProxyInterval;
+// }
+//
+// public String getProxyFilePath() {
+// return proxyFilePath;
+// }
+//
+// public void setProxyFilePath(String proxyFilePath) {
+// this.proxyFilePath = proxyFilePath;
+// }
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ProxyUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ProxyUtils.java
index f9f9a8c0..9b734c73 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ProxyUtils.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ProxyUtils.java
@@ -1,14 +1,12 @@
package us.codecraft.webmagic.utils;
-import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.proxy.ProxyHost;
+import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException;
-import java.net.*;
-import java.util.Enumeration;
-import java.util.regex.Pattern;
+import java.net.InetSocketAddress;
+import java.net.Socket;
/**
* Pooled Proxy Object
@@ -18,72 +16,19 @@ import java.util.regex.Pattern;
*/
public class ProxyUtils {
- private static InetAddress localAddr;
- private static String networkInterface = "eth7";
private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class);
- static {
- init();
- }
-
- private static void init() {
- // first way to get local IP
- try {
- localAddr = InetAddress.getLocalHost();
- logger.info("local IP:" + localAddr.getHostAddress());
- } catch (UnknownHostException e) {
- logger.info("try again\n");
- }
- if (localAddr != null) {
- return;
- }
- // other way to get local IP
- Enumeration localAddrs;
- try {
- // modify your network interface name
- NetworkInterface ni = NetworkInterface.getByName(networkInterface);
- if (ni == null) {
- return;
- }
- localAddrs = ni.getInetAddresses();
- if (localAddrs == null || !localAddrs.hasMoreElements()) {
- logger.error("choose NetworkInterface\n" + getNetworkInterface());
- return;
- }
- while (localAddrs.hasMoreElements()) {
- InetAddress tmp = localAddrs.nextElement();
- if (!tmp.isLoopbackAddress() && !tmp.isLinkLocalAddress() && !(tmp instanceof Inet6Address)) {
- localAddr = tmp;
- logger.info("local IP:" + localAddr.getHostAddress());
- break;
- }
- }
- } catch (Exception e) {
- logger.error("Failure when init ProxyUtil", e);
- logger.error("choose NetworkInterface\n" + getNetworkInterface());
- }
- }
-
- public static HttpHost convert(ProxyHost p){
- return new HttpHost(p.getHost(),p.getPort());
- }
- public static boolean validateProxy(ProxyHost p) {
- if (localAddr == null) {
- logger.error("cannot get local IP");
- return false;
- }
- boolean isReachable = false;
+ public static boolean validateProxy(Proxy p) {
Socket socket = null;
try {
socket = new Socket();
- socket.bind(new InetSocketAddress(localAddr, 0));
InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort());
socket.connect(endpointSocketAddr, 3000);
- logger.debug("SUCCESS - connection established! Local: " + localAddr.getHostAddress() + " remote: " + p);
- isReachable = true;
+ return true;
} catch (IOException e) {
- logger.warn("FAILRE - CAN not connect! Local: " + localAddr.getHostAddress() + " remote: " + p);
+ logger.warn("FAILRE - CAN not connect! remote: " + p);
+ return false;
} finally {
if (socket != null) {
try {
@@ -93,30 +38,7 @@ public class ProxyUtils {
}
}
}
- return isReachable;
- }
-
- private static String getNetworkInterface() {
- String networkInterfaceName = ">>>> modify networkInterface in us.codecraft.webmagic.utils.ProxyUtils";
- Enumeration enumeration = null;
- try {
- enumeration = NetworkInterface.getNetworkInterfaces();
- } catch (SocketException e1) {
- e1.printStackTrace();
- }
- while (enumeration.hasMoreElements()) {
- NetworkInterface networkInterface = enumeration.nextElement();
-
- Enumeration addr = networkInterface.getInetAddresses();
- while (addr.hasMoreElements()) {
- String s = addr.nextElement().getHostAddress();
- Pattern IPV4_PATTERN = Pattern.compile("^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
- if (s != null && IPV4_PATTERN.matcher(s).matches()) {
- networkInterfaceName += networkInterface.toString() + "IP:" + s + "\n\n";
- }
- }
- }
- return networkInterfaceName;
}
+
}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
index 5440b338..fd1f4c2f 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
@@ -5,7 +5,7 @@ import com.github.dreamhead.moco.Runnable;
import com.github.dreamhead.moco.Runner;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.RequestBuilder;
+import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
@@ -87,12 +87,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me();
- CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null);
+ CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site);
// encoding in http header Content-Type
Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null;
try {
- httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null));
+ httpResponse = httpClient.execute(new HttpUriRequestConverter().convert(requestGBK, site, null));
} catch (IOException e) {
e.printStackTrace();
}
@@ -117,31 +117,32 @@ public class HttpClientDownloaderTest {
server.delete(eq(query("q"), "webmagic")).response("delete");
server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head"));
server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace");
+ final HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
+ final Site site = Site.me();
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
- HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:12306/search");
request.putParams("q", "webmagic");
request.setMethod(HttpConstant.Method.GET);
- RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get");
+ HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site,null);
+ assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("get");
request.setMethod(HttpConstant.Method.POST);
- requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post");
+ httpUriRequest = httpUriRequestConverter.convert(request, site, null);
+ assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("post");
request.setMethod(HttpConstant.Method.PUT);
- requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put");
+ httpUriRequest = httpUriRequestConverter.convert(request, site, null);
+ assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("put");
request.setMethod(HttpConstant.Method.DELETE);
- requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete");
+ httpUriRequest = httpUriRequestConverter.convert(request, site, null);
+ assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("delete");
request.setMethod(HttpConstant.Method.HEAD);
- requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head");
+ httpUriRequest = httpUriRequestConverter.convert(request, site, null);
+ assertThat(HttpClients.custom().build().execute(httpUriRequest).getFirstHeader("method").getValue()).isEqualTo("head");
request.setMethod(HttpConstant.Method.TRACE);
- requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
- assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace");
+ httpUriRequest = httpUriRequestConverter.convert(request, site, null);
+ assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("trace");
}
});
}
@@ -156,7 +157,7 @@ public class HttpClientDownloaderTest {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:12306/");
- Page page = httpClientDownloader.download(request, null);
+ Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isEqualTo("foo");
}
});
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
index 64773236..86af3672 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
@@ -2,13 +2,10 @@ package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import org.junit.BeforeClass;
-import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
-import static org.assertj.core.api.Assertions.assertThat;
-
/**
* @author yxssfxwzy@sina.com May 30, 2014
*
@@ -27,30 +24,6 @@ public class ProxyTest {
}
}
- @Test
- public void testProxy() {
- TimerReuseProxyPool proxyPool = new TimerReuseProxyPool(httpProxyList,false);
- proxyPool.setReuseInterval(500);
- assertThat(proxyPool.getIdleNum()).isEqualTo(4);
- for (int i = 0; i < 2; i++) {
- List fetchList = new ArrayList();
- while (proxyPool.getIdleNum() != 0) {
- Proxy proxy = proxyPool.getProxy();
- HttpHost httphost = proxy.getHttpHost();
- // httphostList.add(httphost);
- System.out.println(httphost.getHostName() + ":" + httphost.getPort());
- Fetch tmp = new Fetch(httphost);
- tmp.start();
- fetchList.add(tmp);
- }
- for (Fetch fetch : fetchList) {
- proxyPool.returnProxy(fetch.hp, Proxy.SUCCESS);
- }
- System.out.println(proxyPool.allProxyStatus());
-
- }
- }
-
class Fetch extends Thread {
HttpHost hp;