diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java
index 997b6cf1..f13c6ed7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AndSelector.java
@@ -5,8 +5,7 @@ import java.util.List;
/**
* @author code4crafter@gmail.com
- * Date: 13-8-3
- * Time: 下午5:29
+ * @since 0.2.0
*/
public class AndSelector implements Selector {
@@ -18,6 +17,10 @@ public class AndSelector implements Selector {
}
}
+ public AndSelector(List selectors) {
+ this.selectors = selectors;
+ }
+
@Override
public String select(String text) {
for (Selector selector : selectors) {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java
index 48f9fb93..4ece3222 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/OrSelector.java
@@ -5,8 +5,7 @@ import java.util.List;
/**
* @author code4crafter@gmail.com
- * Date: 13-8-3
- * Time: 下午5:29
+ * @since 0.2.0
*/
public class OrSelector implements Selector {
@@ -18,11 +17,15 @@ public class OrSelector implements Selector {
}
}
+ public OrSelector(List selectors) {
+ this.selectors = selectors;
+ }
+
@Override
public String select(String text) {
for (Selector selector : selectors) {
text = selector.select(text);
- if (text!=null){
+ if (text != null) {
return text;
}
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
index 2f9004b5..043af109 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
@@ -4,6 +4,7 @@ import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.annotation.*;
import us.codecraft.webmagic.selector.*;
+import us.codecraft.webmagic.utils.ExtractorUtils;
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
@@ -49,20 +50,15 @@ class PageModelExtractor {
for (Field field : clazz.getDeclaredFields()) {
field.setAccessible(true);
FieldExtractor fieldExtractor = getAnnotationExtractBy(clazz, field);
- FieldExtractor fieldExtractorTmp = getAnnotationExtractByRaw(clazz, field);
+ FieldExtractor fieldExtractorTmp = getAnnotationExtractCombo(clazz, field);
if (fieldExtractor != null && fieldExtractorTmp != null) {
- throw new IllegalStateException("Only one of 'ExtractBy ExtractByRaw ExtractByUrl' can be added to a field!");
+ throw new IllegalStateException("Only one of 'ExtractBy ComboExtract ExtractByUrl' can be added to a field!");
} else if (fieldExtractor == null && fieldExtractorTmp != null) {
fieldExtractor = fieldExtractorTmp;
}
- // ExtractBy2 & ExtractBy3
- if (fieldExtractor!=null){
- addAnnotationExtractBy2(fieldExtractor);
- addAnnotationExtractBy3(fieldExtractor);
- }
fieldExtractorTmp = getAnnotationExtractByUrl(clazz, field);
if (fieldExtractor != null && fieldExtractorTmp != null) {
- throw new IllegalStateException("Only one of 'ExtractBy ExtractByRaw ExtractByUrl' can be added to a field!");
+ throw new IllegalStateException("Only one of 'ExtractBy ComboExtract ExtractByUrl' can be added to a field!");
} else if (fieldExtractor == null && fieldExtractorTmp != null) {
fieldExtractor = fieldExtractorTmp;
}
@@ -94,26 +90,23 @@ class PageModelExtractor {
return fieldExtractor;
}
- private FieldExtractor getAnnotationExtractBy(Class clazz, Field field) {
+ private FieldExtractor getAnnotationExtractCombo(Class clazz, Field field) {
FieldExtractor fieldExtractor = null;
- ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
- if (extractBy != null) {
- String value = extractBy.value();
+ ComboExtract comboExtract = field.getAnnotation(ComboExtract.class);
+ if (comboExtract != null) {
+ ExtractBy[] extractBies = comboExtract.value();
Selector selector;
- switch (extractBy.type()) {
- case Css:
- selector = new CssSelector(value);
- break;
- case Regex:
- selector = new RegexSelector(value);
+ switch (comboExtract.op()) {
+ case And:
+ selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
break;
- case XPath:
- selector = new XpathSelector(value);
+ case Or:
+ selector = new OrSelector(ExtractorUtils.getSelectors(extractBies));
break;
default:
- selector = new XpathSelector(value);
+ selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
}
- fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi());
+ fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, comboExtract.notNull(), comboExtract.multi());
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
@@ -122,70 +115,12 @@ class PageModelExtractor {
return fieldExtractor;
}
- private void addAnnotationExtractBy2(FieldExtractor fieldExtractor) {
- ExtractBy2 extractBy = fieldExtractor.getField().getAnnotation(ExtractBy2.class);
- if (extractBy != null) {
- String value = extractBy.value();
- Selector selector;
- switch (extractBy.type()) {
- case Css:
- selector = new CssSelector(value);
- break;
- case Regex:
- selector = new RegexSelector(value);
- break;
- case XPath:
- selector = new XpathSelector(value);
- break;
- default:
- selector = new XpathSelector(value);
- }
- fieldExtractor.setSelector(new AndSelector(fieldExtractor.getSelector(), selector));
- }
- }
-
- private void addAnnotationExtractBy3(FieldExtractor fieldExtractor) {
- ExtractBy3 extractBy = fieldExtractor.getField().getAnnotation(ExtractBy3.class);
- if (extractBy != null) {
- String value = extractBy.value();
- Selector selector;
- switch (extractBy.type()) {
- case Css:
- selector = new CssSelector(value);
- break;
- case Regex:
- selector = new RegexSelector(value);
- break;
- case XPath:
- selector = new XpathSelector(value);
- break;
- default:
- selector = new XpathSelector(value);
- }
- fieldExtractor.setSelector(new AndSelector(fieldExtractor.getSelector(), selector));
- }
- }
-
- private FieldExtractor getAnnotationExtractByRaw(Class clazz, Field field) {
+ private FieldExtractor getAnnotationExtractBy(Class clazz, Field field) {
FieldExtractor fieldExtractor = null;
- ExtractByRaw extractByRaw = field.getAnnotation(ExtractByRaw.class);
- if (extractByRaw != null) {
- String value = extractByRaw.value();
- Selector selector;
- switch (extractByRaw.type()) {
- case Css:
- selector = new CssSelector(value);
- break;
- case Regex:
- selector = new RegexSelector(value);
- break;
- case XPath:
- selector = new XpathSelector(value);
- break;
- default:
- selector = new XpathSelector(value);
- }
- fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.RawHtml, extractByRaw.notNull(), extractByRaw.multi());
+ ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
+ if (extractBy != null) {
+ Selector selector = ExtractorUtils.getSelector(extractBy);
+ fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi());
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
index 1f5f008c..02fa25b4 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
@@ -5,14 +5,75 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
+ * Combo 'ExtractBy' extractor with and/or operator.
+ *
* @author code4crafter@gmail.com
- * Date: 13-8-16
- * Time: 下午11:09
+ * @since 0.2.1
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.TYPE})
public @interface ComboExtract {
+ /**
+ * The extractors to be combined.
+ *
+ * @return the extractors to be combined
+ */
+ ExtractBy[] value();
+ enum Op {
+ /**
+ * All extractors will be arranged as a pipeline.
+ * The next extractor uses the result of the previous as source.
+ */
+ And,
+ /**
+ * All extractors will do extracting separately,
+ * and the results of extractors will combined as the final result.
+ */
+ Or;
+ }
+
+ /**
+ * Combining operation of extractors.
+ *
+ * @return combining operation of extractors
+ */
+ Op op() default Op.And;
+
+ /**
+ * Define whether the field can be null.
+ * If set to 'true' and the extractor get no result, the entire class will be discarded.
+ *
+ * @return whether the field can be null
+ */
+ boolean notNull() default false;
+
+ public enum Source {
+ /**
+ * extract from the content extracted by class extractor
+ */
+ SelectedHtml,
+ /**
+ * extract from the raw html
+ */
+ RawHtml
+ }
+
+ /**
+ * The source for extracting.
+ * It works only if you already added 'ExtractBy' to Class.
+ *
+ * @return the source for extracting
+ */
+ Source source() default Source.SelectedHtml;
+
+ /**
+ * Define whether the extractor return more than one result.
+ * When set to 'true', the extractor return a list of string (so you should define the field as List).
+ *
+ * @return whether the extractor return more than one result
+ */
+ boolean multi() default false;
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
index 8c12ce1f..9e0ea18e 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
@@ -5,45 +5,63 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
- * 定义类或者字段的抽取规则。
+ * Define the extractor for field or class。
*
* @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
+ * @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.TYPE})
public @interface ExtractBy {
/**
- * 抽取规则
+ * Extractor expression, support XPath, CSS Selector and regex.
*
- * @return 抽取规则
+ * @return extractor expression
*/
String value();
public enum Type {XPath, Regex, Css}
/**
- * 抽取规则类型,支持XPath、Css selector、正则表达式,默认是XPath
+ * Extractor type, support XPath, CSS Selector and regex.
*
- * @return 抽取规则类型
+ * @return extractor type
*/
Type type() default Type.XPath;
/**
- * 是否是不能为空的关键字段,若notNull为true,则对应字段抽取不到时,丢弃整个类,默认为false
+ * Define whether the field can be null.
+ * If set to 'true' and the extractor get no result, the entire class will be discarded.
*
- * @return 是否是不能为空的关键字段
+ * @return whether the field can be null
*/
boolean notNull() default false;
+ public enum Source {
+ /**
+ * extract from the content extracted by class extractor
+ */
+ SelectedHtml,
+ /**
+ * extract from the raw html
+ */
+ RawHtml
+ }
+
+ /**
+ * The source for extracting.
+ * It works only if you already added 'ExtractBy' to Class.
+ *
+ * @return the source for extracting
+ */
+ Source source() default Source.SelectedHtml;
+
/**
- * 是否抽取多个结果
- * 用于字段时,需要List来盛放结果
- * 用于类时,表示单页抽取多个对象
+ * Define whether the extractor return more than one result.
+ * When set to 'true', the extractor return a list of string (so you should define the field as List).
*
- * @return 是否抽取多个结果
+ * @return whether the extractor return more than one result
*/
boolean multi() default false;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy2.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy2.java
deleted file mode 100644
index 2a4f0802..00000000
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy2.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package us.codecraft.webmagic.model.annotation;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.Target;
-
-/**
- * 定义类或者字段的抽取规则,只能在Extract、ExtractByRaw之后使用。
- *
- * @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
- */
-@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
-@Target({ElementType.FIELD})
-public @interface ExtractBy2 {
-
- String value();
-
- public enum Type {XPath, Regex, Css}
-
- Type type() default Type.XPath;
-
-}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy3.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy3.java
deleted file mode 100644
index 741682d4..00000000
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy3.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package us.codecraft.webmagic.model.annotation;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.Target;
-
-/**
- * 定义类或者字段的抽取规则,只能在Extract、ExtractByRaw之后使用。
- * @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
- */
-@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
-@Target({ElementType.FIELD})
-public @interface ExtractBy3 {
-
- String value();
-
- public enum Type { XPath, Regex, Css}
-
- Type type() default Type.XPath;
-
-}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByRaw.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByRaw.java
deleted file mode 100644
index a3ae3e5c..00000000
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByRaw.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package us.codecraft.webmagic.model.annotation;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.Target;
-
-/**
- * 对于在Class级别就使用过ExtractBy的类,在字段中想抽取全部内容可使用此方法。
- *
- * @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
- */
-@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
-@Target({ElementType.FIELD, ElementType.TYPE})
-public @interface ExtractByRaw {
-
- /**
- * 抽取规则
- *
- * @return 抽取规则
- */
- String value();
-
- public enum Type {XPath, Regex, Css}
-
- /**
- * 抽取规则类型,支持XPath、Css selector、正则表达式,默认是XPath
- *
- * @return 抽取规则类型
- */
- Type type() default Type.XPath;
-
- /**
- * 是否是不能为空的关键字段,若notNull为true,则对应字段抽取不到时,丢弃整个类,默认为false
- *
- * @return 是否是不能为空的关键字段
- */
- boolean notNull() default false;
-
- /**
- * 是否抽取多个结果
- * 需要List来盛放结果
- *
- * @return 是否抽取多个结果
- */
- boolean multi() default false;
-
-}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
index 51b5f0df..416bd89f 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
@@ -5,35 +5,35 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
- * 定义类或者字段的抽取规则(从url中抽取,只支持正则表达式)。
+ * Define a extractor for url. Only regex can be used.
+ *
* @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
+ * @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
-public @interface ExtractByUrl{
+public @interface ExtractByUrl {
/**
- * 抽取规则,支持正则表达式
+ * Extractor expression, only regex can be used
*
- * @return 抽取规则
+ * @return extractor expression
*/
String value() default "";
/**
- * 是否是不能为空的关键字段,若notNull为true,则对应字段抽取不到时,丢弃整个类,默认为false
+ * Define whether the field can be null.
+ * If set to 'true' and the extractor get no result, the entire class will be discarded.
*
- * @return 是否是不能为空的关键字段
+ * @return whether the field can be null
*/
boolean notNull() default false;
/**
- * 是否抽取多个结果
- * 用于字段时,需要List来盛放结果
- * 用于类时,表示单页抽取多个对象
+ * Define whether the extractor return more than one result.
+ * When set to 'true', the extractor return a list of string (so you should define the field as List).
*
- * @return 是否抽取多个结果
+ * @return whether the extractor return more than one result
*/
boolean multi() default false;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/HelpUrl.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/HelpUrl.java
index 9a0cce4f..d986c1e1 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/HelpUrl.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/HelpUrl.java
@@ -5,26 +5,32 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
- * 定义辅助爬取的url。
+ * Define the 'help' url patterns for class.
+ * All urls matching the pattern will be crawled and but not extracted for new objects.
+ *
* @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
+ * @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE})
public @interface HelpUrl {
/**
- * 某个类对应的URL规则列表
- * webmagic对正则表达式进行了修改,"."仅表示字符"."而不代表任意字符,而"\*"则代表了".\*",例如"http://\*.oschina.net/\*"代表了oschina所有的二级域名下的URL。
+ * The url patterns to crawl.
+ * Use regex expression with some changes:
+ * "." stand for literal character "." instead of "any character".
+ * "*" stand for any legal character for url in 0-n length ([^"'#]*) instead of "any length".
*
- * @return 抽取规则
+ * @return the url patterns for class
*/
String[] value();
/**
- * 指定提取URL的区域(仅支持XPath)
- * @return 指定提取URL的区域
+ * Define the region for url extracting.
+ * Only support XPath.
+ * When sourceRegion is set, the urls will be extracted only from the region instead of entire content.
+ *
+ * @return the region for url extracting
*/
String sourceRegion() default "";
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/TargetUrl.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/TargetUrl.java
index e12fca39..3a8ab559 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/TargetUrl.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/TargetUrl.java
@@ -5,27 +5,32 @@ import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
- * 定义某个类抽取的范围和来源,sourceRegion可以用xpath语法限定抽取区域。
+ * Define the url patterns for class.
+ * All urls matching the pattern will be crawled and extracted for new objects.
*
* @author code4crafter@gmail.com
- * Date: 13-8-1
- * Time: 下午8:40
+ * @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE})
public @interface TargetUrl {
/**
- * 某个类对应的URL规则列表
- * webmagic对正则表达式进行了修改,"."仅表示字符"."而不代表任意字符,而"\*"则代表了".\*",例如"http://\*.oschina.net/\*"代表了oschina所有的二级域名下的URL。
+ * The url patterns for class.
+ * Use regex expression with some changes:
+ * "." stand for literal character "." instead of "any character".
+ * "*" stand for any legal character for url in 0-n length ([^"'#]*) instead of "any length".
*
- * @return 抽取规则
+ * @return the url patterns for class
*/
String[] value();
/**
- * 指定提取URL的区域(仅支持XPath)
- * @return 指定提取URL的区域
+ * Define the region for url extracting.
+ * Only support XPath.
+ * When sourceRegion is set, the urls will be extracted only from the region instead of entire content.
+ *
+ * @return the region for url extracting
*/
String sourceRegion() default "";
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/package.html b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/package.html
index 1e3004fb..4e213f7d 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/package.html
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/package.html
@@ -1,5 +1,5 @@
-webmagic注解抓取方式所定义的注解。
+Annotations for define a class.
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java
new file mode 100644
index 00000000..5c6ebbf8
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java
@@ -0,0 +1,48 @@
+package us.codecraft.webmagic.utils;
+
+import us.codecraft.webmagic.model.annotation.ExtractBy;
+import us.codecraft.webmagic.selector.CssSelector;
+import us.codecraft.webmagic.selector.RegexSelector;
+import us.codecraft.webmagic.selector.Selector;
+import us.codecraft.webmagic.selector.XpathSelector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tools for annotation converting.
+ * @author code4crafter@gmail.com
+ * @since 0.2.1
+ */
+public class ExtractorUtils {
+
+ public static Selector getSelector(ExtractBy extractBy) {
+ String value = extractBy.value();
+ Selector selector;
+ switch (extractBy.type()) {
+ case Css:
+ selector = new CssSelector(value);
+ break;
+ case Regex:
+ selector = new RegexSelector(value);
+ break;
+ case XPath:
+ selector = new XpathSelector(value);
+ break;
+ default:
+ selector = new XpathSelector(value);
+ }
+ return selector;
+ }
+
+ public static List getSelectors(ExtractBy[] extractBies) {
+ List selectors = new ArrayList();
+ if (extractBies==null){
+ return selectors;
+ }
+ for (ExtractBy extractBy : extractBies) {
+ selectors.add(getSelector(extractBy));
+ }
+ return selectors;
+ }
+}