add SimpleProxyPool and IProxyPool
parent
05a1f39569
commit
19474e4716
@ -0,0 +1,12 @@
|
||||
package us.codecraft.webmagic.proxy;
|
||||
|
||||
import org.apache.http.HttpHost;
|
||||
|
||||
/**
|
||||
* Created by edwardsbean on 15-2-28.
|
||||
*/
|
||||
public interface IProxyPool {
|
||||
public void returnProxy(HttpHost host, int statusCode);
|
||||
public HttpHost getProxy();
|
||||
public boolean isEnable();
|
||||
}
|
@ -0,0 +1,116 @@
|
||||
package us.codecraft.webmagic.proxy;
|
||||
|
||||
import org.apache.http.HttpHost;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.DelayQueue;
|
||||
|
||||
/**
|
||||
* Created by edwardsbean on 15-2-28.
|
||||
*/
|
||||
public class SimpleProxyPool implements IProxyPool{
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private BlockingQueue<Proxy> proxyQueue = new DelayQueue<Proxy>();
|
||||
private Map<String, Proxy> allProxy = new ConcurrentHashMap<String, Proxy>();
|
||||
private boolean isEnable = false;
|
||||
private int reuseInterval = 1500;// ms
|
||||
private int reviveTime = 2 * 60 * 60 * 1000;// ms
|
||||
|
||||
public SimpleProxyPool() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public SimpleProxyPool(List<String[]> httpProxyList) {
|
||||
if (httpProxyList != null) {
|
||||
addProxy(httpProxyList.toArray(new String[httpProxyList.size()][]));
|
||||
}
|
||||
}
|
||||
|
||||
public void addProxy(String[]... httpProxyList) {
|
||||
isEnable = true;
|
||||
for (String[] s : httpProxyList) {
|
||||
try {
|
||||
if (allProxy.containsKey(s[0])) {
|
||||
continue;
|
||||
}
|
||||
HttpHost item = new HttpHost(InetAddress.getByName(s[0]), Integer.valueOf(s[1]));
|
||||
Proxy p = new Proxy(item, reuseInterval);
|
||||
proxyQueue.add(p);
|
||||
allProxy.put(s[0], p);
|
||||
} catch (NumberFormatException e) {
|
||||
logger.error("HttpHost init error:", e);
|
||||
} catch (UnknownHostException e) {
|
||||
logger.error("HttpHost init error:", e);
|
||||
}
|
||||
}
|
||||
logger.info("proxy pool size>>>>" + allProxy.size());
|
||||
}
|
||||
|
||||
public void returnProxy(HttpHost host, int statusCode) {
|
||||
Proxy p = allProxy.get(host.getAddress().getHostAddress());
|
||||
if (p == null) {
|
||||
return;
|
||||
}
|
||||
switch (statusCode) {
|
||||
case Proxy.SUCCESS:
|
||||
p.setFailedNum(0);
|
||||
p.setFailedErrorType(new ArrayList<Integer>());
|
||||
p.recordResponse();
|
||||
p.successNumIncrement(1);
|
||||
break;
|
||||
case Proxy.ERROR_403:
|
||||
// banned,try longer interval
|
||||
p.fail(Proxy.ERROR_403);
|
||||
break;
|
||||
case Proxy.ERROR_BANNED:
|
||||
p.fail(Proxy.ERROR_BANNED);
|
||||
logger.warn("this proxy is banned >>>> " + p.getHttpHost());
|
||||
break;
|
||||
case Proxy.ERROR_404:
|
||||
// p.fail(Proxy.ERROR_404);
|
||||
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
|
||||
break;
|
||||
default:
|
||||
p.fail(statusCode);
|
||||
break;
|
||||
}
|
||||
if (p.getFailedNum() > 3) {
|
||||
logger.error("remove proxy >>>> " + host + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
|
||||
return;
|
||||
}
|
||||
try {
|
||||
proxyQueue.put(p);
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("proxyQueue return proxy error", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HttpHost getProxy() {
|
||||
Proxy proxy = null;
|
||||
try {
|
||||
proxy = proxyQueue.take();
|
||||
} catch (InterruptedException e) {
|
||||
logger.error("get proxy error", e);
|
||||
}
|
||||
if (proxy == null) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
return proxy.getHttpHost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEnable() {
|
||||
return isEnable;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue