diff --git a/pom.xml b/pom.xml index 1a0139f9..e4b2c841 100644 --- a/pom.xml +++ b/pom.xml @@ -146,7 +146,7 @@ org.jsoup jsoup - 1.8.3 + 1.10.3 org.mockito diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java index 7b22639a..f2218f12 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java @@ -3,7 +3,6 @@ package us.codecraft.webmagic.selector; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.nodes.Entities; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,25 +19,12 @@ public class Html extends HtmlNode { private Logger logger = LoggerFactory.getLogger(getClass()); - private static volatile boolean INITED = false; - /** * Disable jsoup html entity escape. It can be set just before any Html instance is created. + * @deprecated */ public static boolean DISABLE_HTML_ENTITY_ESCAPE = false; - /** - * Disable jsoup html entity escape. It is a hack way only for jsoup 1.7.2. - */ - private void disableJsoupHtmlEntityEscape() { - if (DISABLE_HTML_ENTITY_ESCAPE && !INITED) { - Entities.EscapeMode.base.getMap().clear(); - Entities.EscapeMode.extended.getMap().clear(); - Entities.EscapeMode.xhtml.getMap().clear(); - INITED = true; - } - } - /** * Store parsed document for better performance when only one text exist. */ @@ -46,7 +32,6 @@ public class Html extends HtmlNode { public Html(String text, String url) { try { - disableJsoupHtmlEntityEscape(); this.document = Jsoup.parse(text, url); } catch (Exception e) { this.document = null; @@ -56,7 +41,6 @@ public class Html extends HtmlNode { public Html(String text) { try { - disableJsoupHtmlEntityEscape(); this.document = Jsoup.parse(text); } catch (Exception e) { this.document = null; diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/HtmlTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/HtmlTest.java index faf249fa..f42f68d7 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/HtmlTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/HtmlTest.java @@ -30,7 +30,6 @@ public class HtmlTest { @Test public void testEnableJsoupHtmlEntityEscape() throws Exception { - Html.DISABLE_HTML_ENTITY_ESCAPE = false; Html html = new Html("aaaaaaa&b"); assertThat(html.regex("(aaaaaaa&b)").toString()).isEqualTo("aaaaaaa&b"); }