test pass

pull/524/head
yihua.huang 8 years ago
parent 474b7c9d57
commit 68050fc88e

@ -2,8 +2,7 @@ package us.codecraft.webmagic;
import org.apache.http.HttpHost;
import org.apache.http.auth.UsernamePasswordCredentials;
import us.codecraft.webmagic.proxy.ProxyPool;
import us.codecraft.webmagic.proxy.TimerReuseProxyPool;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*;
@ -52,7 +51,7 @@ public class Site {
private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置
private ProxyPool httpProxyPool;
private ProxyProvider httpProxyPool;
private boolean useGzip = true;
@ -399,7 +398,11 @@ public class Site {
return new Task() {
@Override
public String getUUID() {
return Site.this.getDomain();
String uuid = Site.this.getDomain();
if (uuid == null) {
uuid = UUID.randomUUID().toString();
}
return uuid;
}
@Override
@ -467,45 +470,4 @@ public class Site {
'}';
}
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param proxyPool proxyPool
* @return this
*/
public Site setHttpProxyPool(ProxyPool proxyPool) {
this.httpProxyPool = proxyPool;
return this;
}
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param httpProxyList httpProxyList
* @param isUseLastProxy isUseLastProxy
* @return this
*/
public Site setHttpProxyPool(List<String[]> httpProxyList, boolean isUseLastProxy) {
this.httpProxyPool=new TimerReuseProxyPool(httpProxyList, isUseLastProxy);
return this;
}
public Site enableHttpProxyPool() {
this.httpProxyPool=new TimerReuseProxyPool();
return this;
}
public UsernamePasswordCredentials getUsernamePasswordCredentials() {
return usernamePasswordCredentials;
}
public Site setUsernamePasswordCredentials(UsernamePasswordCredentials usernamePasswordCredentials) {
this.usernamePasswordCredentials = usernamePasswordCredentials;
return this;
}
public ProxyPool getHttpProxyPool() {
return httpProxyPool;
}
}

@ -20,6 +20,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils;
@ -46,10 +47,16 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
private ProxyProvider proxyProvider;
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
this.httpUriRequestConverter = httpUriRequestConverter;
}
public void setProxyProvider(ProxyProvider proxyProvider) {
this.proxyProvider = proxyProvider;
}
private CloseableHttpClient getHttpClient(Site site) {
if (site == null) {
return httpClientGenerator.getClient(null);
@ -79,8 +86,8 @@ public class HttpClientDownloader extends AbstractDownloader {
Site site = task.getSite();
Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext();
if (site.getHttpProxyPool() != null) {
proxy = site.getHttpProxyPool().getProxy(task);
if (proxyProvider != null) {
proxy = proxyProvider.getProxy(task);
request.putExtra(Request.PROXY, proxy);
AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
@ -111,9 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader {
//ensure the connection is released back to pool
EntityUtils.consumeQuietly(httpResponse.getEntity());
}
if (proxy != null) {
site.getHttpProxyPool().returnProxy(proxy, statusCode, task);
}
}
}

@ -43,7 +43,7 @@ public class HttpUriRequestConverter {
}
if (proxy != null) {
requestConfigBuilder.setProxy(new HttpHost(proxy.getProxyHost().getHost(), proxy.getProxyHost().getPort()));
requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
}
requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build();

@ -6,42 +6,36 @@ package us.codecraft.webmagic.proxy;
public class Proxy {
private ProxyHost proxyHost;
private String host;
private int port;
private String username;
private String password;
public Proxy(ProxyHost proxyHost, String username, String password) {
this.proxyHost = proxyHost;
this.username = username;
this.password = password;
public Proxy(String host, int port) {
this.host = host;
this.port = port;
}
public Proxy(ProxyHost proxyHost) {
this.proxyHost = proxyHost;
public Proxy(String host, int port, String username, String password) {
this.host = host;
this.port = port;
this.username = username;
this.password = password;
}
public ProxyHost getProxyHost() {
return proxyHost;
public String getHost() {
return host;
}
public void setProxyHost(ProxyHost proxyHost) {
this.proxyHost = proxyHost;
public int getPort() {
return port;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
}

@ -1,34 +0,0 @@
package us.codecraft.webmagic.proxy;
/**
* @author code4crafter@gmail.com
* Date: 17/3/18
* Time: 12:04
*/
public class ProxyHost {
private String host;
private int port;
public String getHost() {
return host;
}
public ProxyHost(String host, int port) {
this.host = host;
this.port = port;
}
public void setHost(String host) {
this.host = host;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
}

@ -5,7 +5,7 @@ import us.codecraft.webmagic.Task;
/**
* Created by edwardsbean on 15-2-28.
*/
public interface ProxyPool {
public interface ProxyProvider {
void returnProxy(Proxy proxy, boolean banned, Task task);

@ -72,14 +72,10 @@ public class TimerReuseProxy extends Proxy implements Delayed, Serializable {
private List<Integer> failedErrorType = new ArrayList<Integer>();
public TimerReuseProxy(ProxyHost proxyHost, String user, String password) {
super(proxyHost, user, password);
public TimerReuseProxy(String host, int port, String username, String password) {
super(host, port, username, password);
}
public TimerReuseProxy(ProxyHost proxyHost, String user, String password, int reuseTimeInterval) {
super(proxyHost, user, password);
this.reuseTimeInterval = reuseTimeInterval;
}
public int getSuccessNum() {
return successNum;

@ -1,17 +1,6 @@
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.utils.FilePersistentBase;
import us.codecraft.webmagic.utils.ProxyUtils;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
import us.codecraft.webmagic.Task;
/**
* Pooled Proxy Object
@ -20,187 +9,196 @@ import java.util.concurrent.DelayQueue;
* @see Proxy
* @since 0.5.1
*/
public class TimerReuseProxyPool implements ProxyPool {
private Logger logger = LoggerFactory.getLogger(getClass());
private BlockingQueue<TimerReuseProxy> proxyQueue = new DelayQueue<TimerReuseProxy>();
private Map<String, TimerReuseProxy> allProxy = new ConcurrentHashMap<String, TimerReuseProxy>();
private int reuseInterval = 1500;// ms
private int reviveTime = 2 * 60 * 60 * 1000;// ms
private int saveProxyInterval = 10 * 60 * 1000;// ms
private boolean isEnable = false;
private boolean validateWhenInit = false;
// private boolean isUseLastProxy = true;
public TimerReuseProxyPool(List<String[]> httpProxyList) {
this(httpProxyList, true);
}
private void addProxy(Map<String, Proxy> httpProxyMap) {
isEnable = true;
for (Entry<String, Proxy> entry : httpProxyMap.entrySet()) {
try {
if (allProxy.containsKey(entry.getKey())) {
continue;
}
if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) {
entry.getValue().setFailedNum(0);
entry.getValue().setReuseTimeInterval(reuseInterval);
proxyQueue.add(entry.getValue());
allProxy.put(entry.getKey(), entry.getValue());
}
} catch (NumberFormatException e) {
logger.error("HttpHost init error:", e);
}
}
logger.info("proxy pool size>>>>" + allProxy.size());
}
public void addProxy(Proxy... httpProxyList) {
isEnable = true;
for (Proxy proxy : httpProxyList) {
if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
proxyQueue.add(p);
allProxy.put(p.getProxyHost().getHost(), p);
}
}
logger.info("proxy pool size>>>>" + allProxy.size());
}
public TimerReuseProxy getProxy() {
TimerReuseProxy proxy = null;
try {
Long time = System.currentTimeMillis();
proxy = proxyQueue.take();
double costTime = (System.currentTimeMillis() - time) / 1000.0;
if (costTime > reuseInterval) {
logger.info("get proxy time >>>> " + costTime);
}
TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost());
p.setLastBorrowTime(System.currentTimeMillis());
p.borrowNumIncrement(1);
} catch (InterruptedException e) {
logger.error("get proxy error", e);
}
if (proxy == null) {
throw new NoSuchElementException();
}
return proxy;
}
public void returnProxy(Proxy proxy, int statusCode) {
TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
if (p == null) {
return;
}
switch (statusCode) {
case TimerReuseProxy.SUCCESS:
p.setReuseTimeInterval(reuseInterval);
p.setFailedNum(0);
p.setFailedErrorType(new ArrayList<Integer>());
p.recordResponse();
p.successNumIncrement(1);
break;
case TimerReuseProxy.ERROR_403:
// banned,try longer interval
p.fail(TimerReuseProxy.ERROR_403);
p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
break;
case TimerReuseProxy.ERROR_BANNED:
p.fail(TimerReuseProxy.ERROR_BANNED);
p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
break;
case TimerReuseProxy.ERROR_404:
// p.fail(Proxy.ERROR_404);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
break;
default:
p.fail(statusCode);
break;
}
if (p.getFailedNum() > 20) {
p.setReuseTimeInterval(reviveTime);
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
if (!ProxyUtils.validateProxy(proxy)) {
p.setReuseTimeInterval(reviveTime);
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
}
try {
proxyQueue.put(p);
} catch (InterruptedException e) {
logger.warn("proxyQueue return proxy error", e);
}
}
public String allProxyStatus() {
String re = "all proxy info >>>> \n";
for (Entry<String, Proxy> entry : allProxy.entrySet()) {
re += entry.getValue().toString() + "\n";
}
return re;
}
public int getIdleNum() {
return proxyQueue.size();
}
public int getReuseInterval() {
return reuseInterval;
}
public void setReuseInterval(int reuseInterval) {
this.reuseInterval = reuseInterval;
}
public void enable(boolean isEnable) {
this.isEnable = isEnable;
}
public boolean isEnable() {
return isEnable;
}
public int getReviveTime() {
return reviveTime;
}
public void setReviveTime(int reviveTime) {
this.reviveTime = reviveTime;
}
public boolean isValidateWhenInit() {
return validateWhenInit;
}
public void validateWhenInit(boolean validateWhenInit) {
this.validateWhenInit = validateWhenInit;
}
public int getSaveProxyInterval() {
return saveProxyInterval;
}
public void setSaveProxyInterval(int saveProxyInterval) {
this.saveProxyInterval = saveProxyInterval;
}
public String getProxyFilePath() {
return proxyFilePath;
}
public void setProxyFilePath(String proxyFilePath) {
this.proxyFilePath = proxyFilePath;
}
public class TimerReuseProxyPool implements ProxyProvider {
@Override
public void returnProxy(Proxy proxy, boolean banned, Task task) {
}
@Override
public Proxy getProxy(Task task) {
return null;
}
// private Logger logger = LoggerFactory.getLogger(getClass());
//
// private BlockingQueue<TimerReuseProxy> proxyQueue = new DelayQueue<TimerReuseProxy>();
// private Map<String, TimerReuseProxy> allProxy = new ConcurrentHashMap<String, TimerReuseProxy>();
//
// private int reuseInterval = 1500;// ms
// private int reviveTime = 2 * 60 * 60 * 1000;// ms
// private int saveProxyInterval = 10 * 60 * 1000;// ms
//
// private boolean isEnable = false;
// private boolean validateWhenInit = false;
// // private boolean isUseLastProxy = true;
//
// public TimerReuseProxyPool(List<String[]> httpProxyList) {
// this(httpProxyList, true);
// }
//
// private void addProxy(Map<String, Proxy> httpProxyMap) {
// isEnable = true;
// for (Entry<String, Proxy> entry : httpProxyMap.entrySet()) {
// try {
// if (allProxy.containsKey(entry.getKey())) {
// continue;
// }
// if (!validateWhenInit || ProxyUtils.validateProxy(entry.getValue().getHttpHost())) {
// entry.getValue().setFailedNum(0);
// entry.getValue().setReuseTimeInterval(reuseInterval);
// proxyQueue.add(entry.getValue());
// allProxy.put(entry.getKey(), entry.getValue());
// }
// } catch (NumberFormatException e) {
// logger.error("HttpHost init error:", e);
// }
// }
// logger.info("proxy pool size>>>>" + allProxy.size());
// }
//
// public void addProxy(Proxy... httpProxyList) {
// isEnable = true;
// for (Proxy proxy : httpProxyList) {
// if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
// TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
// proxyQueue.add(p);
// allProxy.put(p.getProxyHost().getHost(), p);
// }
// }
// logger.info("proxy pool size>>>>" + allProxy.size());
// }
//
// public TimerReuseProxy getProxy() {
// TimerReuseProxy proxy = null;
// try {
// Long time = System.currentTimeMillis();
// proxy = proxyQueue.take();
// double costTime = (System.currentTimeMillis() - time) / 1000.0;
// if (costTime > reuseInterval) {
// logger.info("get proxy time >>>> " + costTime);
// }
// TimerReuseProxy p = allProxy.get(proxy.getProxyHost().getHost());
// p.setLastBorrowTime(System.currentTimeMillis());
// p.borrowNumIncrement(1);
// } catch (InterruptedException e) {
// logger.error("get proxy error", e);
// }
// if (proxy == null) {
// throw new NoSuchElementException();
// }
// return proxy;
// }
//
// public void returnProxy(Proxy proxy, int statusCode) {
// TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
// if (p == null) {
// return;
// }
// switch (statusCode) {
// case TimerReuseProxy.SUCCESS:
// p.setReuseTimeInterval(reuseInterval);
// p.setFailedNum(0);
// p.setFailedErrorType(new ArrayList<Integer>());
// p.recordResponse();
// p.successNumIncrement(1);
// break;
// case TimerReuseProxy.ERROR_403:
// // banned,try longer interval
// p.fail(TimerReuseProxy.ERROR_403);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
// logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
// break;
// case TimerReuseProxy.ERROR_BANNED:
// p.fail(TimerReuseProxy.ERROR_BANNED);
// p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
// logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
// break;
// case TimerReuseProxy.ERROR_404:
// // p.fail(Proxy.ERROR_404);
// // p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
// break;
// default:
// p.fail(statusCode);
// break;
// }
// if (p.getFailedNum() > 20) {
// p.setReuseTimeInterval(reviveTime);
// logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
// return;
// }
// if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
// if (!ProxyUtils.validateProxy(proxy)) {
// p.setReuseTimeInterval(reviveTime);
// logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
// return;
// }
// }
// try {
// proxyQueue.put(p);
// } catch (InterruptedException e) {
// logger.warn("proxyQueue return proxy error", e);
// }
// }
//
// public String allProxyStatus() {
// String re = "all proxy info >>>> \n";
// for (Entry<String, Proxy> entry : allProxy.entrySet()) {
// re += entry.getValue().toString() + "\n";
// }
// return re;
// }
//
// public int getIdleNum() {
// return proxyQueue.size();
// }
//
// public int getReuseInterval() {
// return reuseInterval;
// }
//
// public void setReuseInterval(int reuseInterval) {
// this.reuseInterval = reuseInterval;
// }
//
// public void enable(boolean isEnable) {
// this.isEnable = isEnable;
// }
//
// public boolean isEnable() {
// return isEnable;
// }
//
// public int getReviveTime() {
// return reviveTime;
// }
//
// public void setReviveTime(int reviveTime) {
// this.reviveTime = reviveTime;
// }
//
// public boolean isValidateWhenInit() {
// return validateWhenInit;
// }
//
// public void validateWhenInit(boolean validateWhenInit) {
// this.validateWhenInit = validateWhenInit;
// }
//
// public int getSaveProxyInterval() {
// return saveProxyInterval;
// }
//
// public void setSaveProxyInterval(int saveProxyInterval) {
// this.saveProxyInterval = saveProxyInterval;
// }
//
// public String getProxyFilePath() {
// return proxyFilePath;
// }
//
// public void setProxyFilePath(String proxyFilePath) {
// this.proxyFilePath = proxyFilePath;
// }
}

@ -1,14 +1,12 @@
package us.codecraft.webmagic.utils;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.proxy.ProxyHost;
import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException;
import java.net.*;
import java.util.Enumeration;
import java.util.regex.Pattern;
import java.net.InetSocketAddress;
import java.net.Socket;
/**
* Pooled Proxy Object
@ -18,72 +16,19 @@ import java.util.regex.Pattern;
*/
public class ProxyUtils {
private static InetAddress localAddr;
private static String networkInterface = "eth7";
private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class);
static {
init();
}
private static void init() {
// first way to get local IP
try {
localAddr = InetAddress.getLocalHost();
logger.info("local IP:" + localAddr.getHostAddress());
} catch (UnknownHostException e) {
logger.info("try again\n");
}
if (localAddr != null) {
return;
}
// other way to get local IP
Enumeration<InetAddress> localAddrs;
try {
// modify your network interface name
NetworkInterface ni = NetworkInterface.getByName(networkInterface);
if (ni == null) {
return;
}
localAddrs = ni.getInetAddresses();
if (localAddrs == null || !localAddrs.hasMoreElements()) {
logger.error("choose NetworkInterface\n" + getNetworkInterface());
return;
}
while (localAddrs.hasMoreElements()) {
InetAddress tmp = localAddrs.nextElement();
if (!tmp.isLoopbackAddress() && !tmp.isLinkLocalAddress() && !(tmp instanceof Inet6Address)) {
localAddr = tmp;
logger.info("local IP:" + localAddr.getHostAddress());
break;
}
}
} catch (Exception e) {
logger.error("Failure when init ProxyUtil", e);
logger.error("choose NetworkInterface\n" + getNetworkInterface());
}
}
public static HttpHost convert(ProxyHost p){
return new HttpHost(p.getHost(),p.getPort());
}
public static boolean validateProxy(ProxyHost p) {
if (localAddr == null) {
logger.error("cannot get local IP");
return false;
}
boolean isReachable = false;
public static boolean validateProxy(Proxy p) {
Socket socket = null;
try {
socket = new Socket();
socket.bind(new InetSocketAddress(localAddr, 0));
InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort());
socket.connect(endpointSocketAddr, 3000);
logger.debug("SUCCESS - connection established! Local: " + localAddr.getHostAddress() + " remote: " + p);
isReachable = true;
return true;
} catch (IOException e) {
logger.warn("FAILRE - CAN not connect! Local: " + localAddr.getHostAddress() + " remote: " + p);
logger.warn("FAILRE - CAN not connect! remote: " + p);
return false;
} finally {
if (socket != null) {
try {
@ -93,30 +38,7 @@ public class ProxyUtils {
}
}
}
return isReachable;
}
private static String getNetworkInterface() {
String networkInterfaceName = ">>>> modify networkInterface in us.codecraft.webmagic.utils.ProxyUtils";
Enumeration<NetworkInterface> enumeration = null;
try {
enumeration = NetworkInterface.getNetworkInterfaces();
} catch (SocketException e1) {
e1.printStackTrace();
}
while (enumeration.hasMoreElements()) {
NetworkInterface networkInterface = enumeration.nextElement();
Enumeration<InetAddress> addr = networkInterface.getInetAddresses();
while (addr.hasMoreElements()) {
String s = addr.nextElement().getHostAddress();
Pattern IPV4_PATTERN = Pattern.compile("^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
if (s != null && IPV4_PATTERN.matcher(s).matches()) {
networkInterfaceName += networkInterface.toString() + "IP:" + s + "\n\n";
}
}
}
return networkInterfaceName;
}
}

@ -5,7 +5,7 @@ import com.github.dreamhead.moco.Runnable;
import com.github.dreamhead.moco.Runner;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
@ -87,12 +87,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null);
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site);
// encoding in http header Content-Type
Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null;
try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null));
httpResponse = httpClient.execute(new HttpUriRequestConverter().convert(requestGBK, site, null));
} catch (IOException e) {
e.printStackTrace();
}
@ -117,31 +117,32 @@ public class HttpClientDownloaderTest {
server.delete(eq(query("q"), "webmagic")).response("delete");
server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head"));
server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace");
final HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
final Site site = Site.me();
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:12306/search");
request.putParams("q", "webmagic");
request.setMethod(HttpConstant.Method.GET);
RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get");
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site,null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("get");
request.setMethod(HttpConstant.Method.POST);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post");
httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("post");
request.setMethod(HttpConstant.Method.PUT);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put");
httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("put");
request.setMethod(HttpConstant.Method.DELETE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete");
httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("delete");
request.setMethod(HttpConstant.Method.HEAD);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head");
httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(HttpClients.custom().build().execute(httpUriRequest).getFirstHeader("method").getValue()).isEqualTo("head");
request.setMethod(HttpConstant.Method.TRACE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace");
httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("trace");
}
});
}
@ -156,7 +157,7 @@ public class HttpClientDownloaderTest {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:12306/");
Page page = httpClientDownloader.download(request, null);
Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isEqualTo("foo");
}
});

@ -2,13 +2,10 @@ package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author yxssfxwzy@sina.com May 30, 2014
*
@ -27,30 +24,6 @@ public class ProxyTest {
}
}
@Test
public void testProxy() {
TimerReuseProxyPool proxyPool = new TimerReuseProxyPool(httpProxyList,false);
proxyPool.setReuseInterval(500);
assertThat(proxyPool.getIdleNum()).isEqualTo(4);
for (int i = 0; i < 2; i++) {
List<Fetch> fetchList = new ArrayList<Fetch>();
while (proxyPool.getIdleNum() != 0) {
Proxy proxy = proxyPool.getProxy();
HttpHost httphost = proxy.getHttpHost();
// httphostList.add(httphost);
System.out.println(httphost.getHostName() + ":" + httphost.getPort());
Fetch tmp = new Fetch(httphost);
tmp.start();
fetchList.add(tmp);
}
for (Fetch fetch : fetchList) {
proxyPool.returnProxy(fetch.hp, Proxy.SUCCESS);
}
System.out.println(proxyPool.allProxyStatus());
}
}
class Fetch extends Thread {
HttpHost hp;

Loading…
Cancel
Save