add SimpleProxyPool and IProxyPool

pull/194/head
edwardsbean 10 years ago
parent 05a1f39569
commit 19474e4716

@ -4,7 +4,8 @@ import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import org.apache.http.HttpHost;
import us.codecraft.webmagic.proxy.ProxyPool;
import us.codecraft.webmagic.proxy.IProxyPool;
import us.codecraft.webmagic.proxy.SimpleProxyPool;
import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*;
@ -51,7 +52,7 @@ public class Site {
private HttpHost httpProxy;
private ProxyPool httpProxyPool;
private IProxyPool httpProxyPool;
private boolean useGzip = true;
@ -464,17 +465,17 @@ public class Site {
*
* @return this
*/
public Site setHttpProxyPool(List<String[]> httpProxyList) {
this.httpProxyPool=new ProxyPool(httpProxyList);
public Site setHttpProxyPool(IProxyPool proxyPool) {
this.httpProxyPool = proxyPool;
return this;
}
public Site enableHttpProxyPool() {
this.httpProxyPool=new ProxyPool();
this.httpProxyPool=new SimpleProxyPool();
return this;
}
public ProxyPool getHttpProxyPool() {
public IProxyPool getHttpProxyPool() {
return httpProxyPool;
}
@ -486,9 +487,4 @@ public class Site {
httpProxyPool.returnProxy(proxy,statusCode);
}
public Site setProxyReuseInterval(int reuseInterval) {
this.httpProxyPool.setReuseInterval(reuseInterval);
return this;
}
}

@ -0,0 +1,12 @@
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
/**
* Created by edwardsbean on 15-2-28.
*/
public interface IProxyPool {
public void returnProxy(HttpHost host, int statusCode);
public HttpHost getProxy();
public boolean isEnable();
}

@ -22,7 +22,7 @@ import java.util.concurrent.DelayQueue;
* @see Proxy
* @since 0.5.1
*/
public class ProxyPool {
public class ProxyPool implements IProxyPool{
private Logger logger = LoggerFactory.getLogger(getClass());

@ -0,0 +1,116 @@
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
/**
* Created by edwardsbean on 15-2-28.
*/
public class SimpleProxyPool implements IProxyPool{
private Logger logger = LoggerFactory.getLogger(getClass());
private BlockingQueue<Proxy> proxyQueue = new DelayQueue<Proxy>();
private Map<String, Proxy> allProxy = new ConcurrentHashMap<String, Proxy>();
private boolean isEnable = false;
private int reuseInterval = 1500;// ms
private int reviveTime = 2 * 60 * 60 * 1000;// ms
public SimpleProxyPool() {
this(null);
}
public SimpleProxyPool(List<String[]> httpProxyList) {
if (httpProxyList != null) {
addProxy(httpProxyList.toArray(new String[httpProxyList.size()][]));
}
}
public void addProxy(String[]... httpProxyList) {
isEnable = true;
for (String[] s : httpProxyList) {
try {
if (allProxy.containsKey(s[0])) {
continue;
}
HttpHost item = new HttpHost(InetAddress.getByName(s[0]), Integer.valueOf(s[1]));
Proxy p = new Proxy(item, reuseInterval);
proxyQueue.add(p);
allProxy.put(s[0], p);
} catch (NumberFormatException e) {
logger.error("HttpHost init error:", e);
} catch (UnknownHostException e) {
logger.error("HttpHost init error:", e);
}
}
logger.info("proxy pool size>>>>" + allProxy.size());
}
public void returnProxy(HttpHost host, int statusCode) {
Proxy p = allProxy.get(host.getAddress().getHostAddress());
if (p == null) {
return;
}
switch (statusCode) {
case Proxy.SUCCESS:
p.setFailedNum(0);
p.setFailedErrorType(new ArrayList<Integer>());
p.recordResponse();
p.successNumIncrement(1);
break;
case Proxy.ERROR_403:
// banned,try longer interval
p.fail(Proxy.ERROR_403);
break;
case Proxy.ERROR_BANNED:
p.fail(Proxy.ERROR_BANNED);
logger.warn("this proxy is banned >>>> " + p.getHttpHost());
break;
case Proxy.ERROR_404:
// p.fail(Proxy.ERROR_404);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
break;
default:
p.fail(statusCode);
break;
}
if (p.getFailedNum() > 3) {
logger.error("remove proxy >>>> " + host + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
try {
proxyQueue.put(p);
} catch (InterruptedException e) {
logger.warn("proxyQueue return proxy error", e);
}
}
@Override
public HttpHost getProxy() {
Proxy proxy = null;
try {
proxy = proxyQueue.take();
} catch (InterruptedException e) {
logger.error("get proxy error", e);
}
if (proxy == null) {
throw new NoSuchElementException();
}
return proxy.getHttpHost();
}
@Override
public boolean isEnable() {
return isEnable;
}
}
Loading…
Cancel
Save