diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index d2c48b24..cf42d2a9 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -27,11 +27,6 @@
commons-lang3
-
- net.sf.saxon
- Saxon-HE
-
-
log4j
log4j
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
index 79d62a01..114eef99 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
@@ -63,12 +63,6 @@ public class Html extends PlainText {
return selectList(xpathSelector, strings);
}
- @Override
- public Selectable xpath2(String xpath) {
- Xpath2Selector xpathSelector = SelectorFactory.getInstatnce().newXpath2Selector(xpath);
- return selectList(xpathSelector, strings);
- }
-
@Override
public Selectable $(String selector) {
CssSelector cssSelector = new CssSelector(selector);
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
index 4fff6da8..d06a5310 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
@@ -34,11 +34,6 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
- @Override
- public Selectable xpath2(String xpath) {
- throw new UnsupportedOperationException();
- }
-
@Override
public Selectable $(String selector) {
throw new UnsupportedOperationException();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
index cea501dd..42f3d108 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
@@ -18,14 +18,6 @@ public interface Selectable {
*/
public Selectable xpath(String xpath);
- /**
- * select list with xpath 2.0 syntax
- *
- * @param xpath
- * @return new Selectable after extract
- */
- public Selectable xpath2(String xpath);
-
/**
* select list with css selector
*
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
index 9abb1ce3..1dd56e01 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
@@ -34,10 +34,6 @@ public class SelectorFactory {
return newSelector(XpathSelector.class, xpath);
}
- public Xpath2Selector newXpath2Selector(String xpath) {
- return newSelector(Xpath2Selector.class, xpath);
- }
-
public SmartContentSelector newSmartContentSelector(){
return newSelector(SmartContentSelector.class);
}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SaxonTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SaxonTest.java
deleted file mode 100644
index 05a89063..00000000
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SaxonTest.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package us.codecraft.webmagic.selector;
-
-import net.sf.saxon.xpath.XPathFactoryImpl;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.DomSerializer;
-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
-import org.junit.Test;
-import org.w3c.dom.Document;
-import org.w3c.dom.NodeList;
-
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathFactoryConfigurationException;
-
-/**
- * @author code4crafter@gmail.com
- * @date: 13-8-2
- * Time: 下午5:48
- */
-public class SaxonTest {
-
- @Test
- public void test() throws XPathFactoryConfigurationException {
-// System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
-// XPathFactory xpf = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
- String xml = "#BBB##CCC##DDD#";
- try {
- HtmlCleaner htmlCleaner = new HtmlCleaner();
- TagNode tagNode = htmlCleaner.clean("");
- Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
-
- javax.xml.xpath.XPathFactory factory = XPathFactoryImpl.newInstance();
- XPath xpath = factory.newXPath();
- XPathExpression expr = xpath.compile("//a[matches(.,'#...#')]");
-
- Object result = expr.evaluate(document, XPathConstants.NODESET);
- NodeList nodes = (NodeList) result;
- System.out.println(nodes);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-}
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 63034f23..843c2c3c 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -27,6 +27,10 @@
webmagic-core
${project.version}
+
+ net.sf.saxon
+ Saxon-HE
+
junit
junit
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
similarity index 100%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
similarity index 98%
rename from webmagic-core/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
rename to webmagic-extension/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
index 2f663c99..9f32a8f1 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
@@ -1380,42 +1380,6 @@ public class XpathSelectorTest {
System.out.println(xpathSelector.select(text));
}
- //http://sourceforge.net/mailarchive/forum.php?thread_name=4EA92A8A.6080202%40saxonica.com&forum_name=saxon-help
- @Test
- public void testSaxon() {
- String text = "眉山:扎实推进农业农村工作 促农持续增收
\n" +
- "2013-07-31 23:29:45 来源:眉山网 责任编辑:张斯炜
";
- try {
- HtmlCleaner htmlCleaner = new HtmlCleaner();
- TagNode tagNode = htmlCleaner.clean(text);
- Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
- XPathEvaluator xPathEvaluator = new XPathEvaluator();
- xPathEvaluator.setNamespaceContext(new NamespaceContextImpl(new NamespaceResolver() {
-
-
- @Override
- public String getURIForPrefix(String s, boolean b) {
- return NamespaceConstant.FN;
- }
-
- @Override
- public Iterator iteratePrefixes() {
- return Collections.singletonList("fn").iterator();
- }
- }));
- XPathExpression expr = xPathEvaluator.compile("fn:substring-before(//h1,'\n')");
- Object result = expr.evaluate(document, XPathConstants.STRING);
- Assert.assertNotNull(result);
- } catch (Exception e) {
- e.printStackTrace();
- }
- Xpath2Selector xpath2Selector = new Xpath2Selector("fn:substring-before(//h1,'\n')");
- String select = xpath2Selector.select(text);
- Assert.assertNotNull(select);
- Assert.assertNotNull(xpath2Selector.selectList(text));
-
- }
-
@Test
public void testXpath2Selector() {
Xpath2Selector xpath2Selector = new Xpath2Selector("//a/@href");