remove saxon to extension

pull/17/head
yihua.huang 12 years ago
parent c6132e0746
commit 268bd8d0c4

@ -27,11 +27,6 @@
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>

@ -63,12 +63,6 @@ public class Html extends PlainText {
return selectList(xpathSelector, strings);
}
@Override
public Selectable xpath2(String xpath) {
Xpath2Selector xpathSelector = SelectorFactory.getInstatnce().newXpath2Selector(xpath);
return selectList(xpathSelector, strings);
}
@Override
public Selectable $(String selector) {
CssSelector cssSelector = new CssSelector(selector);

@ -34,11 +34,6 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
@Override
public Selectable xpath2(String xpath) {
throw new UnsupportedOperationException();
}
@Override
public Selectable $(String selector) {
throw new UnsupportedOperationException();

@ -18,14 +18,6 @@ public interface Selectable {
*/
public Selectable xpath(String xpath);
/**
* select list with xpath 2.0 syntax
*
* @param xpath
* @return new Selectable after extract
*/
public Selectable xpath2(String xpath);
/**
* select list with css selector
*

@ -34,10 +34,6 @@ public class SelectorFactory {
return newSelector(XpathSelector.class, xpath);
}
public Xpath2Selector newXpath2Selector(String xpath) {
return newSelector(Xpath2Selector.class, xpath);
}
public SmartContentSelector newSmartContentSelector(){
return newSelector(SmartContentSelector.class);
}

@ -1,45 +0,0 @@
package us.codecraft.webmagic.selector;
import net.sf.saxon.xpath.XPathFactoryImpl;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactoryConfigurationException;
/**
* @author code4crafter@gmail.com <br>
* @date: 13-8-2 <br>
* Time: 5:48 <br>
*/
public class SaxonTest {
@Test
public void test() throws XPathFactoryConfigurationException {
// System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
// XPathFactory xpf = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
String xml = "<root><a>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";
try {
HtmlCleaner htmlCleaner = new HtmlCleaner();
TagNode tagNode = htmlCleaner.clean("");
Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
javax.xml.xpath.XPathFactory factory = XPathFactoryImpl.newInstance();
XPath xpath = factory.newXPath();
XPathExpression expr = xpath.compile("//a[matches(.,'#...#')]");
Object result = expr.evaluate(document, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
System.out.println(nodes);
} catch (Exception e) {
e.printStackTrace();
}
}
}

@ -27,6 +27,10 @@
<artifactId>webmagic-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

@ -1380,42 +1380,6 @@ public class XpathSelectorTest {
System.out.println(xpathSelector.select(text));
}
//http://sourceforge.net/mailarchive/forum.php?thread_name=4EA92A8A.6080202%40saxonica.com&forum_name=saxon-help
@Test
public void testSaxon() {
String text = "<h1>眉山:扎实推进农业农村工作 促农持续增收<br>\n" +
"<span>2013-07-31 23:29:45&nbsp;&nbsp;&nbsp;来源:<a href=\"http://www.mshw.net\" target=\"_blank\" style=\"color:#AAA\">眉山网</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;责任编辑:张斯炜</span></h1>";
try {
HtmlCleaner htmlCleaner = new HtmlCleaner();
TagNode tagNode = htmlCleaner.clean(text);
Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
XPathEvaluator xPathEvaluator = new XPathEvaluator();
xPathEvaluator.setNamespaceContext(new NamespaceContextImpl(new NamespaceResolver() {
@Override
public String getURIForPrefix(String s, boolean b) {
return NamespaceConstant.FN;
}
@Override
public Iterator<String> iteratePrefixes() {
return Collections.singletonList("fn").iterator();
}
}));
XPathExpression expr = xPathEvaluator.compile("fn:substring-before(//h1,'\n')");
Object result = expr.evaluate(document, XPathConstants.STRING);
Assert.assertNotNull(result);
} catch (Exception e) {
e.printStackTrace();
}
Xpath2Selector xpath2Selector = new Xpath2Selector("fn:substring-before(//h1,'\n')");
String select = xpath2Selector.select(text);
Assert.assertNotNull(select);
Assert.assertNotNull(xpath2Selector.selectList(text));
}
@Test
public void testXpath2Selector() {
Xpath2Selector xpath2Selector = new Xpath2Selector("//a/@href");
Loading…
Cancel
Save