extractors
parent
20705b34ac
commit
c1471718df
@ -0,0 +1,44 @@
|
||||
package us.codecraft.webmagic.selector;
|
||||
|
||||
/**
|
||||
* Convenient methods for selectors.<br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* @since 0.2.1
|
||||
*/
|
||||
public abstract class Selectors {
|
||||
|
||||
public static RegexSelector regex(String expr) {
|
||||
return SelectorFactory.getInstatnce().newRegexSelector(expr);
|
||||
}
|
||||
|
||||
public static RegexSelector regex(String expr, int group) {
|
||||
return SelectorFactory.getInstatnce().newRegexSelector(expr, group);
|
||||
}
|
||||
|
||||
public static CssSelector $(String expr) {
|
||||
return new CssSelector(expr);
|
||||
}
|
||||
|
||||
public static CssSelector $(String expr, String attrName) {
|
||||
return new CssSelector(expr, attrName);
|
||||
}
|
||||
|
||||
public static XpathSelector xpath(String expr) {
|
||||
return SelectorFactory.getInstatnce().newXpathSelector(expr);
|
||||
}
|
||||
|
||||
public static AndSelector and(Selector... selectors) {
|
||||
return new AndSelector(selectors);
|
||||
}
|
||||
|
||||
public static OrSelector or(Selector... selectors) {
|
||||
return new OrSelector(selectors);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
String s = "a";
|
||||
or(regex("<title>(.*)</title>"), xpath("//title"), $("title")).select(s);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package us.codecraft.webmagic.selector;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import static us.codecraft.webmagic.selector.Selectors.*;
|
||||
|
||||
/**
|
||||
* @author code4crafter@gmail.com <br>
|
||||
*/
|
||||
public class ExtractorsTest {
|
||||
|
||||
String html = "<div><h1>test<a href=\"xxx\">aabbcc</a></h1></div>";
|
||||
|
||||
String html2 = "<title>aabbcc</title>";
|
||||
|
||||
@Test
|
||||
public void testEach() {
|
||||
Assert.assertEquals("<a href=\"xxx\">aabbcc</a>", $("div h1 a").select(html));
|
||||
Assert.assertEquals("xxx", $("div h1 a", "href").select(html));
|
||||
Assert.assertEquals("aabbcc", $("div h1 a", "innerHtml").select(html));
|
||||
Assert.assertEquals("xxx", xpath("//a/@href").select(html));
|
||||
Assert.assertEquals("xxx", regex("a href=\"(.*)\"").select(html));
|
||||
Assert.assertEquals("xxx", regex("(a href)=\"(.*)\"", 2).select(html));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombo() {
|
||||
Assert.assertEquals("bb", and($("title"), regex("aa(bb)cc")).select(html2));
|
||||
OrSelector or = or($("div h1 a", "innerHtml"), xpath("//title"));
|
||||
Assert.assertEquals("aabbcc", or.select(html));
|
||||
Assert.assertEquals("aabbcc", or.select(html2));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue