#629 correct illegal url in HttpUriRequestConverter

pull/638/head
yihua.huang 8 years ago
parent 5daf92e8b2
commit 3266ea15ca

@ -58,7 +58,7 @@ public class HttpUriRequestConverter {
}
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl()));
if (site.getHeaders() != null) {
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());

@ -43,7 +43,7 @@ public class UrlUtils {
if (url.startsWith("?"))
url = base.getPath() + url;
URL abs = new URL(base, url);
return encodeIllegalCharacterInUrl(abs.toExternalForm());
return abs.toExternalForm();
} catch (MalformedURLException e) {
return "";
}
@ -53,12 +53,17 @@ public class UrlUtils {
*
* @param url url
* @return new url
* @deprecated
*/
public static String encodeIllegalCharacterInUrl(String url) {
//TODO more charator support
return url.replace(" ", "%20");
}
public static String fixIllegalCharacterInUrl(String url) {
//TODO more charator support
return url.replace(" ", "%20").replaceAll("#+", "#");
}
public static String getHost(String url) {
String host = url;
int i = StringUtils.ordinalIndexOf(url, "/", 3);

@ -0,0 +1,31 @@
package us.codecraft.webmagic.downloader;
import org.junit.Test;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.utils.UrlUtils;
import java.net.URI;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author code4crafter@gmail.com
* Date: 2017/7/22
* Time: 5:29
*/
public class HttpUriRequestConverterTest {
@Test(expected = IllegalArgumentException.class)
public void test_illegal_uri() throws Exception {
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
httpUriRequestConverter.convert(new Request("http://bj.zhongkao.com/beikao/yimo/##"), Site.me(), null);
}
@Test
public void test_illegal_uri_correct() throws Exception {
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(new Request(UrlUtils.fixIllegalCharacterInUrl("http://bj.zhongkao.com/beikao/yimo/##")), Site.me(), null);
assertThat(requestContext.getHttpUriRequest().getURI()).isEqualTo(new URI("http://bj.zhongkao.com/beikao/yimo/#"));
}
}
Loading…
Cancel
Save