diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
index 72a9d3f8..68646066 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
@@ -92,41 +92,6 @@ public class UrlUtils {
}
}
- /**
- * allow blank space in quote
- */
- private static Pattern patternForHrefWithQuote = Pattern.compile("(]*href=)[\"']([^\"'<>]*)[\"']", Pattern.CASE_INSENSITIVE);
-
- /**
- * disallow blank space without quote
- */
- private static Pattern patternForHrefWithoutQuote = Pattern.compile("(]*href=)([^\"'<>\\s]+)", Pattern.CASE_INSENSITIVE);
-
- public static String fixAllRelativeHrefs(String html, String url) {
- html = replaceByPattern(html, url, patternForHrefWithQuote);
- html = replaceByPattern(html, url, patternForHrefWithoutQuote);
- return html;
- }
-
- public static String replaceByPattern(String html, String url, Pattern pattern) {
- StringBuilder stringBuilder = new StringBuilder();
- Matcher matcher = pattern.matcher(html);
- int lastEnd = 0;
- boolean modified = false;
- while (matcher.find()) {
- modified = true;
- stringBuilder.append(StringUtils.substring(html, lastEnd, matcher.start()));
- stringBuilder.append(matcher.group(1));
- stringBuilder.append("\"").append(canonicalizeUrl(matcher.group(2), url)).append("\"");
- lastEnd = matcher.end();
- }
- if (!modified) {
- return html;
- }
- stringBuilder.append(StringUtils.substring(html, lastEnd));
- return stringBuilder.toString();
- }
-
public static List convertToRequests(Collection urls) {
List requestList = new ArrayList(urls.size());
for (String url : urls) {
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java
index a90304dc..6afdeefe 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java
@@ -33,25 +33,6 @@ public class UrlUtilsTest {
assertThat(absoluteUrl).isEqualTo("http://www.dianping.com/aa");
}
- @Test
- public void testFixAllRelativeHrefs() {
- String originHtml = "";
- String replacedHtml = UrlUtils.fixAllRelativeHrefs(originHtml, "http://www.dianping.com/");
- assertThat(replacedHtml).isEqualTo("");
-
- originHtml = "";
- replacedHtml = UrlUtils.fixAllRelativeHrefs(originHtml, "http://www.dianping.com/");
- assertThat(replacedHtml).isEqualTo("");
-
- originHtml = "";
- replacedHtml = UrlUtils.fixAllRelativeHrefs(originHtml, "http://www.dianping.com/");
- assertThat(replacedHtml).isEqualTo("");
-
- originHtml = "";
- replacedHtml = UrlUtils.fixAllRelativeHrefs(originHtml, "http://www.dianping.com/");
- assertThat(replacedHtml).isEqualTo("");
- }
-
@Test
public void testGetDomain(){
String url = "http://www.dianping.com/aa/";
diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
index 6e350aad..f45f7e2a 100644
--- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
+++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
@@ -5,7 +5,6 @@ import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
-
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
@@ -13,7 +12,6 @@ import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.PlainText;
-import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable;
import java.io.IOException;
@@ -108,8 +106,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setRawText(content);
- page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content,
- request.getUrl())));
+ page.setHtml(new Html(content, request.getUrl()));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
webDriverPool.returnToPool(webDriver);