test pass

pull/524/head
yihua.huang 8 years ago
parent 474b7c9d57
commit 68050fc88e

@ -2,8 +2,7 @@ package us.codecraft.webmagic;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.auth.UsernamePasswordCredentials;
import us.codecraft.webmagic.proxy.ProxyPool; import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.proxy.TimerReuseProxyPool;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*; import java.util.*;
@ -52,7 +51,7 @@ public class Site {
private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置 private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置
private ProxyPool httpProxyPool; private ProxyProvider httpProxyPool;
private boolean useGzip = true; private boolean useGzip = true;
@ -399,7 +398,11 @@ public class Site {
return new Task() { return new Task() {
@Override @Override
public String getUUID() { public String getUUID() {
return Site.this.getDomain(); String uuid = Site.this.getDomain();
if (uuid == null) {
uuid = UUID.randomUUID().toString();
}
return uuid;
} }
@Override @Override
@ -467,45 +470,4 @@ public class Site {
'}'; '}';
} }
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param proxyPool proxyPool
* @return this
*/
public Site setHttpProxyPool(ProxyPool proxyPool) {
this.httpProxyPool = proxyPool;
return this;
}
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param httpProxyList httpProxyList
* @param isUseLastProxy isUseLastProxy
* @return this
*/
public Site setHttpProxyPool(List<String[]> httpProxyList, boolean isUseLastProxy) {
this.httpProxyPool=new TimerReuseProxyPool(httpProxyList, isUseLastProxy);
return this;
}
public Site enableHttpProxyPool() {
this.httpProxyPool=new TimerReuseProxyPool();
return this;
}
public UsernamePasswordCredentials getUsernamePasswordCredentials() {
return usernamePasswordCredentials;
}
public Site setUsernamePasswordCredentials(UsernamePasswordCredentials usernamePasswordCredentials) {
this.usernamePasswordCredentials = usernamePasswordCredentials;
return this;
}
public ProxyPool getHttpProxyPool() {
return httpProxyPool;
}
} }

@ -20,6 +20,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.CharsetUtils;
@ -45,11 +46,17 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpClientGenerator httpClientGenerator = new HttpClientGenerator(); private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();
private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter(); private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
private ProxyProvider proxyProvider;
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) { public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
this.httpUriRequestConverter = httpUriRequestConverter; this.httpUriRequestConverter = httpUriRequestConverter;
} }
public void setProxyProvider(ProxyProvider proxyProvider) {
this.proxyProvider = proxyProvider;
}
private CloseableHttpClient getHttpClient(Site site) { private CloseableHttpClient getHttpClient(Site site) {
if (site == null) { if (site == null) {
return httpClientGenerator.getClient(null); return httpClientGenerator.getClient(null);
@ -79,8 +86,8 @@ public class HttpClientDownloader extends AbstractDownloader {
Site site = task.getSite(); Site site = task.getSite();
Proxy proxy = null; Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext(); HttpContext httpContext = new BasicHttpContext();
if (site.getHttpProxyPool() != null) { if (proxyProvider != null) {
proxy = site.getHttpProxyPool().getProxy(task); proxy = proxyProvider.getProxy(task);
request.putExtra(Request.PROXY, proxy); request.putExtra(Request.PROXY, proxy);
AuthState authState = new AuthState(); AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword())); authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
@ -111,9 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader {
//ensure the connection is released back to pool //ensure the connection is released back to pool
EntityUtils.consumeQuietly(httpResponse.getEntity()); EntityUtils.consumeQuietly(httpResponse.getEntity());
} }
if (proxy != null) {
site.getHttpProxyPool().returnProxy(proxy, statusCode, task);
}
} }
} }

@ -43,7 +43,7 @@ public class HttpUriRequestConverter {
} }
if (proxy != null) { if (proxy != null) {
requestConfigBuilder.setProxy(new HttpHost(proxy.getProxyHost().getHost(), proxy.getProxyHost().getPort())); requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
} }
requestBuilder.setConfig(requestConfigBuilder.build()); requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build(); return requestBuilder.build();

@ -6,42 +6,36 @@ package us.codecraft.webmagic.proxy;
public class Proxy { public class Proxy {
private ProxyHost proxyHost; private String host;
private int port;
private String username; private String username;
private String password; private String password;
public Proxy(ProxyHost proxyHost, String username, String password) { public Proxy(String host, int port) {
this.proxyHost = proxyHost; this.host = host;
this.username = username; this.port = port;
this.password = password;
} }
public Proxy(ProxyHost proxyHost) { public Proxy(String host, int port, String username, String password) {
this.proxyHost = proxyHost; this.host = host;
this.port = port;
this.username = username;
this.password = password;
} }
public ProxyHost getProxyHost() { public String getHost() {
return proxyHost; return host;
} }
public void setProxyHost(ProxyHost proxyHost) { public int getPort() {
this.proxyHost = proxyHost; return port;
} }
public String getUsername() { public String getUsername() {
return username; return username;
} }
public void setUsername(String username) {
this.username = username;
}
public String getPassword() { public String getPassword() {
return password; return password;
} }
public void setPassword(String password) {
this.password = password;
}
} }

@ -1,34 +0,0 @@
package us.codecraft.webmagic.proxy;
/**
* @author code4crafter@gmail.com
* Date: 17/3/18
* Time: 12:04
*/
public class ProxyHost {
private String host;
private int port;
public String getHost() {
return host;
}
public ProxyHost(String host, int port) {
this.host = host;
this.port = port;
}
public void setHost(String host) {
this.host = host;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
}

@ -5,7 +5,7 @@ import us.codecraft.webmagic.Task;
/** /**
* Created by edwardsbean on 15-2-28. * Created by edwardsbean on 15-2-28.
*/ */
public interface ProxyPool { public interface ProxyProvider {
void returnProxy(Proxy proxy, boolean banned, Task task); void returnProxy(Proxy proxy, boolean banned, Task task);

@ -72,14 +72,10 @@ public class TimerReuseProxy extends Proxy implements Delayed, Serializable {
private List<Integer> failedErrorType = new ArrayList<Integer>(); private List<Integer> failedErrorType = new ArrayList<Integer>();
public TimerReuseProxy(ProxyHost proxyHost, String user, String password) { public TimerReuseProxy(String host, int port, String username, String password) {
super(proxyHost, user, password); super(host, port, username, password);
} }
public TimerReuseProxy(ProxyHost proxyHost, String user, String password, int reuseTimeInterval) {
super(proxyHost, user, password);
this.reuseTimeInterval = reuseTimeInterval;
}
public int getSuccessNum() { public int getSuccessNum() {
return successNum; return successNum;

@ -1,17 +1,6 @@
package us.codecraft.webmagic.proxy; package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost; import us.codecraft.webmagic.Task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.utils.FilePersistentBase;
import us.codecraft.webmagic.utils.ProxyUtils;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
/** /**
* Pooled Proxy Object * Pooled Proxy Object
@ -20,187 +9,196 @@ import java.util.concurrent.DelayQueue;
* @see Proxy * @see Proxy
* @since 0.5.1 * @since 0.5.1
*/ */
public class TimerReuseProxyPool implements ProxyPool { public class TimerReuseProxyPool implements ProxyProvider {
@Override
private Logger logger = LoggerFactory.getLogger(getClass()); public void returnProxy(Proxy proxy, boolean banned, Task task) {
private BlockingQueue<TimerReuseProxy> proxyQueue = new DelayQueue<TimerReuseProxy>(); }
private Map<String, TimerReuseProxy> allProxy = new ConcurrentHashMap<String, TimerReuseProxy>();
@Override
private int reuseInterval = 1500;// ms public Proxy getProxy(Task task) {
private int reviveTime = 2 * 60 * 60 * 1000;// ms return null;
private int saveProxyInterval = 10 * 60 * 1000;// ms }
private boolean isEnable = false; // private Logger logger = LoggerFactory.getLogger(getClass());
private boolean validateWhenInit = false; //
// private boolean isUseLastProxy = true; // private BlockingQueue<TimerReuseProxy> proxyQueue = new DelayQueue<TimerReuseProxy>();
// private Map<String, TimerReuseProxy> allProxy = new ConcurrentHashMap<String, TimerReuseProxy>();
public TimerReuseProxyPool(List<String[]> httpProxyList) { //
this(httpProxyList, true); // private int reuseInterval = 1500;// ms
} // private int reviveTime = 2 * 60 * 60 * 1000;// ms
// private int saveProxyInterval = 10 * 60 * 1000;// ms
private void addProxy(Map<String, Proxy> httpProxyMap) { //
isEnable = true; // private boolean isEnable = false;
for (Entry<String, Proxy> entry : httpProxyMap.entrySet()) { // private boolean validateWhenInit = false;
try { // // private boolean isUseLastProxy = true;
if (allProxy.containsKey(entry.getKey())) { //
continue; // public TimerReuseProxyPool(List<String[]> httpProxyList) {
} // this(httpProxyList, true);
if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) { // }
entry.getValue().setFailedNum(0); //
entry.getValue().setReuseTimeInterval(reuseInterval); // private void addProxy(Map<String, Proxy> httpProxyMap) {
proxyQueue.add(entry.getValue()); // isEnable = true;
allProxy.put(entry.getKey(), entry.getValue()); // for (Entry<String, Proxy> entry : httpProxyMap.entrySet()) {
} // try {
} catch (NumberFormatException e) { // if (allProxy.containsKey(entry.getKey())) {
logger.error("HttpHost init error:", e); // continue;
} // }
} // if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) {
logger.info("proxy pool size>>>>" + allProxy.size()); // entry.getValue().setFailedNum(0);
} // entry.getValue().setReuseTimeInterval(reuseInterval);
// proxyQueue.add(entry.getValue());
public void addProxy(Proxy... httpProxyList) { // allProxy.put(entry.getKey(), entry.getValue());
isEnable = true; // }
for (Proxy proxy : httpProxyList) { // } catch (NumberFormatException e) {
if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) { // logger.error("HttpHost init error:", e);
TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval); // }
proxyQueue.add(p); // }
allProxy.put(p.getProxyHost().getHost(), p); // logger.info("proxy pool size>>>>" + allProxy.size());
} // }
} //
logger.info("proxy pool size>>>>" + allProxy.size()); // public void addProxy(Proxy... httpProxyList) {
} // isEnable = true;
// for (Proxy proxy : httpProxyList) {
public TimerReuseProxy getProxy() { // if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
TimerReuseProxy proxy = null; // TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
try { // proxyQueue.add(p);
Long time = System.currentTimeMillis(); // allProxy.put(p.getProxyHost().getHost(), p);
proxy = proxyQueue.take(); // }
double costTime = (System.currentTimeMillis() - time) / 1000.0; // }
if (costTime > reuseInterval) { // logger.info("proxy pool size>>>>" + allProxy.size());
logger.info("get proxy time >>>> " + costTime); // }
} //
TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost()); // public TimerReuseProxy getProxy() {
p.setLastBorrowTime(System.currentTimeMillis()); // TimerReuseProxy proxy = null;
p.borrowNumIncrement(1); // try {
} catch (InterruptedException e) { // Long time = System.currentTimeMillis();
logger.error("get proxy error", e); // proxy = proxyQueue.take();
} // double costTime = (System.currentTimeMillis() - time) / 1000.0;
if (proxy == null) { // if (costTime > reuseInterval) {
throw new NoSuchElementException(); // logger.info("get proxy time >>>> " + costTime);
} // }
return proxy; // TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost());
} // p.setLastBorrowTime(System.currentTimeMillis());
// p.borrowNumIncrement(1);
public void returnProxy(Proxy proxy, int statusCode) { // } catch (InterruptedException e) {
TimerReuseProxy p = allProxy.get(proxy.getProxyHost()); // logger.error("get proxy error", e);
if (p == null) { // }
return; // if (proxy == null) {
} // throw new NoSuchElementException();
switch (statusCode) { // }
case TimerReuseProxy.SUCCESS: // return proxy;
p.setReuseTimeInterval(reuseInterval); // }
p.setFailedNum(0); //
p.setFailedErrorType(new ArrayList<Integer>()); // public void returnProxy(Proxy proxy, int statusCode) {
p.recordResponse(); // TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
p.successNumIncrement(1); // if (p == null) {
break; // return;
case TimerReuseProxy.ERROR_403: // }
// banned,try longer interval // switch (statusCode) {
p.fail(TimerReuseProxy.ERROR_403); // case TimerReuseProxy.SUCCESS:
p.setReuseTimeInterval(reuseInterval * p.getFailedNum()); // p.setReuseTimeInterval(reuseInterval);
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0); // p.setFailedNum(0);
break; // p.setFailedErrorType(new ArrayList<Integer>());
case TimerReuseProxy.ERROR_BANNED: // p.recordResponse();
p.fail(TimerReuseProxy.ERROR_BANNED); // p.successNumIncrement(1);
p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum()); // break;
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0); // case TimerReuseProxy.ERROR_403:
break; // // banned,try longer interval
case TimerReuseProxy.ERROR_404: // p.fail(TimerReuseProxy.ERROR_403);
// p.fail(Proxy.ERROR_404); // p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum()); // logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
break; // break;
default: // case TimerReuseProxy.ERROR_BANNED:
p.fail(statusCode); // p.fail(TimerReuseProxy.ERROR_BANNED);
break; // p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
} // logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
if (p.getFailedNum() > 20) { // break;
p.setReuseTimeInterval(reviveTime); // case TimerReuseProxy.ERROR_404:
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size()); // // p.fail(Proxy.ERROR_404);
return; // // p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
} // break;
if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) { // default:
if (!ProxyUtils.validateProxy(proxy)) { // p.fail(statusCode);
p.setReuseTimeInterval(reviveTime); // break;
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size()); // }
return; // if (p.getFailedNum() > 20) {
} // p.setReuseTimeInterval(reviveTime);
} // logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
try { // return;
proxyQueue.put(p); // }
} catch (InterruptedException e) { // if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
logger.warn("proxyQueue return proxy error", e); // if (!ProxyUtils.validateProxy(proxy)) {
} // p.setReuseTimeInterval(reviveTime);
} // logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
// return;
public String allProxyStatus() { // }
String re = "all proxy info >>>> \n"; // }
for (Entry<String, Proxy> entry : allProxy.entrySet()) { // try {
re += entry.getValue().toString() + "\n"; // proxyQueue.put(p);
} // } catch (InterruptedException e) {
return re; // logger.warn("proxyQueue return proxy error", e);
} // }
// }
public int getIdleNum() { //
return proxyQueue.size(); // public String allProxyStatus() {
} // String re = "all proxy info >>>> \n";
// for (Entry<String, Proxy> entry : allProxy.entrySet()) {
public int getReuseInterval() { // re += entry.getValue().toString() + "\n";
return reuseInterval; // }
} // return re;
// }
public void setReuseInterval(int reuseInterval) { //
this.reuseInterval = reuseInterval; // public int getIdleNum() {
} // return proxyQueue.size();
// }
public void enable(boolean isEnable) { //
this.isEnable = isEnable; // public int getReuseInterval() {
} // return reuseInterval;
// }
public boolean isEnable() { //
return isEnable; // public void setReuseInterval(int reuseInterval) {
} // this.reuseInterval = reuseInterval;
// }
public int getReviveTime() { //
return reviveTime; // public void enable(boolean isEnable) {
} // this.isEnable = isEnable;
// }
public void setReviveTime(int reviveTime) { //
this.reviveTime = reviveTime; // public boolean isEnable() {
} // return isEnable;
// }
public boolean isValidateWhenInit() { //
return validateWhenInit; // public int getReviveTime() {
} // return reviveTime;
// }
public void validateWhenInit(boolean validateWhenInit) { //
this.validateWhenInit = validateWhenInit; // public void setReviveTime(int reviveTime) {
} // this.reviveTime = reviveTime;
// }
public int getSaveProxyInterval() { //
return saveProxyInterval; // public boolean isValidateWhenInit() {
} // return validateWhenInit;
// }
public void setSaveProxyInterval(int saveProxyInterval) { //
this.saveProxyInterval = saveProxyInterval; // public void validateWhenInit(boolean validateWhenInit) {
} // this.validateWhenInit = validateWhenInit;
// }
public String getProxyFilePath() { //
return proxyFilePath; // public int getSaveProxyInterval() {
} // return saveProxyInterval;
// }
public void setProxyFilePath(String proxyFilePath) { //
this.proxyFilePath = proxyFilePath; // public void setSaveProxyInterval(int saveProxyInterval) {
} // this.saveProxyInterval = saveProxyInterval;
// }
//
// public String getProxyFilePath() {
// return proxyFilePath;
// }
//
// public void setProxyFilePath(String proxyFilePath) {
// this.proxyFilePath = proxyFilePath;
// }
} }

@ -1,14 +1,12 @@
package us.codecraft.webmagic.utils; package us.codecraft.webmagic.utils;
import org.apache.http.HttpHost;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.proxy.ProxyHost; import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException; import java.io.IOException;
import java.net.*; import java.net.InetSocketAddress;
import java.util.Enumeration; import java.net.Socket;
import java.util.regex.Pattern;
/** /**
* Pooled Proxy Object * Pooled Proxy Object
@ -18,72 +16,19 @@ import java.util.regex.Pattern;
*/ */
public class ProxyUtils { public class ProxyUtils {
private static InetAddress localAddr;
private static String networkInterface = "eth7";
private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class); private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class);
static {
init();
}
private static void init() {
// first way to get local IP
try {
localAddr = InetAddress.getLocalHost();
logger.info("local IP:" + localAddr.getHostAddress());
} catch (UnknownHostException e) {
logger.info("try again\n");
}
if (localAddr != null) {
return;
}
// other way to get local IP
Enumeration<InetAddress> localAddrs;
try {
// modify your network interface name
NetworkInterface ni = NetworkInterface.getByName(networkInterface);
if (ni == null) {
return;
}
localAddrs = ni.getInetAddresses();
if (localAddrs == null || !localAddrs.hasMoreElements()) {
logger.error("choose NetworkInterface\n" + getNetworkInterface());
return;
}
while (localAddrs.hasMoreElements()) {
InetAddress tmp = localAddrs.nextElement();
if (!tmp.isLoopbackAddress() && !tmp.isLinkLocalAddress() && !(tmp instanceof Inet6Address)) {
localAddr = tmp;
logger.info("local IP:" + localAddr.getHostAddress());
break;
}
}
} catch (Exception e) {
logger.error("Failure when init ProxyUtil", e);
logger.error("choose NetworkInterface\n" + getNetworkInterface());
}
}
public static HttpHost convert(ProxyHost p){
return new HttpHost(p.getHost(),p.getPort());
}
public static boolean validateProxy(ProxyHost p) { public static boolean validateProxy(Proxy p) {
if (localAddr == null) {
logger.error("cannot get local IP");
return false;
}
boolean isReachable = false;
Socket socket = null; Socket socket = null;
try { try {
socket = new Socket(); socket = new Socket();
socket.bind(new InetSocketAddress(localAddr, 0));
InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort()); InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort());
socket.connect(endpointSocketAddr, 3000); socket.connect(endpointSocketAddr, 3000);
logger.debug("SUCCESS - connection established! Local: " + localAddr.getHostAddress() + " remote: " + p); return true;
isReachable = true;
} catch (IOException e) { } catch (IOException e) {
logger.warn("FAILRE - CAN not connect! Local: " + localAddr.getHostAddress() + " remote: " + p); logger.warn("FAILRE - CAN not connect! remote: " + p);
return false;
} finally { } finally {
if (socket != null) { if (socket != null) {
try { try {
@ -93,30 +38,7 @@ public class ProxyUtils {
} }
} }
} }
return isReachable;
}
private static String getNetworkInterface() {
String networkInterfaceName = ">>>> modify networkInterface in us.codecraft.webmagic.utils.ProxyUtils";
Enumeration<NetworkInterface> enumeration = null;
try {
enumeration = NetworkInterface.getNetworkInterfaces();
} catch (SocketException e1) {
e1.printStackTrace();
}
while (enumeration.hasMoreElements()) {
NetworkInterface networkInterface = enumeration.nextElement();
Enumeration<InetAddress> addr = networkInterface.getInetAddresses();
while (addr.hasMoreElements()) {
String s = addr.nextElement().getHostAddress();
Pattern IPV4_PATTERN = Pattern.compile("^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
if (s != null && IPV4_PATTERN.matcher(s).matches()) {
networkInterfaceName += networkInterface.toString() + "IP:" + s + "\n\n";
}
}
}
return networkInterfaceName;
} }
} }

@ -5,7 +5,7 @@ import com.github.dreamhead.moco.Runnable;
import com.github.dreamhead.moco.Runner; import com.github.dreamhead.moco.Runner;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
@ -87,12 +87,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) { private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader(); HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me(); Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null); CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site);
// encoding in http header Content-Type // encoding in http header Content-Type
Request requestGBK = new Request(url); Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
try { try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null)); httpResponse = httpClient.execute(new HttpUriRequestConverter().convert(requestGBK, site, null));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -117,31 +117,32 @@ public class HttpClientDownloaderTest {
server.delete(eq(query("q"), "webmagic")).response("delete"); server.delete(eq(query("q"), "webmagic")).response("delete");
server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head")); server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head"));
server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace"); server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace");
final HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
final Site site = Site.me();
Runner.running(server, new Runnable() { Runner.running(server, new Runnable() {
@Override @Override
public void run() throws Exception { public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request(); Request request = new Request();
request.setUrl("http://127.0.0.1:12306/search"); request.setUrl("http://127.0.0.1:12306/search");
request.putParams("q", "webmagic"); request.putParams("q", "webmagic");
request.setMethod(HttpConstant.Method.GET); request.setMethod(HttpConstant.Method.GET);
RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site,null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("get");
request.setMethod(HttpConstant.Method.POST); request.setMethod(HttpConstant.Method.POST);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("post");
request.setMethod(HttpConstant.Method.PUT); request.setMethod(HttpConstant.Method.PUT);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("put");
request.setMethod(HttpConstant.Method.DELETE); request.setMethod(HttpConstant.Method.DELETE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("delete");
request.setMethod(HttpConstant.Method.HEAD); request.setMethod(HttpConstant.Method.HEAD);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head"); assertThat(HttpClients.custom().build().execute(httpUriRequest).getFirstHeader("method").getValue()).isEqualTo("head");
request.setMethod(HttpConstant.Method.TRACE); request.setMethod(HttpConstant.Method.TRACE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("trace");
} }
}); });
} }
@ -156,7 +157,7 @@ public class HttpClientDownloaderTest {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request(); Request request = new Request();
request.setUrl("http://127.0.0.1:12306/"); request.setUrl("http://127.0.0.1:12306/");
Page page = httpClientDownloader.download(request, null); Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isEqualTo("foo"); assertThat(page.getRawText()).isEqualTo("foo");
} }
}); });

@ -2,13 +2,10 @@ package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
/** /**
* @author yxssfxwzy@sina.com May 30, 2014 * @author yxssfxwzy@sina.com May 30, 2014
* *
@ -27,30 +24,6 @@ public class ProxyTest {
} }
} }
@Test
public void testProxy() {
TimerReuseProxyPool proxyPool = new TimerReuseProxyPool(httpProxyList,false);
proxyPool.setReuseInterval(500);
assertThat(proxyPool.getIdleNum()).isEqualTo(4);
for (int i = 0; i < 2; i++) {
List<Fetch> fetchList = new ArrayList<Fetch>();
while (proxyPool.getIdleNum() != 0) {
Proxy proxy = proxyPool.getProxy();
HttpHost httphost = proxy.getHttpHost();
// httphostList.add(httphost);
System.out.println(httphost.getHostName() + ":" + httphost.getPort());
Fetch tmp = new Fetch(httphost);
tmp.start();
fetchList.add(tmp);
}
for (Fetch fetch : fetchList) {
proxyPool.returnProxy(fetch.hp, Proxy.SUCCESS);
}
System.out.println(proxyPool.allProxyStatus());
}
}
class Fetch extends Thread { class Fetch extends Thread {
HttpHost hp; HttpHost hp;

Loading…
Cancel
Save