bugfix: rawhtml do not work

pull/17/head
yihua.huang 12 years ago
parent a994b1c9fd
commit 6cc1d62a08

@ -4,6 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.*;
import us.codecraft.webmagic.model.annotation.Experimental;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.processor.SimplePageProcessor;
@ -20,6 +21,7 @@ import java.io.*;
* @author code4crafter@gmail.com
* @since 0.2.1
*/
@Experimental
public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor {
private Downloader downloaderWhenFileMiss;

@ -23,6 +23,12 @@ import us.codecraft.webmagic.processor.PageProcessor;
* private List<String> tags;
* }
</pre>
* And start the spider by:
* <pre>
* OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
* ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
* }
</pre>
* @author code4crafter@gmail.com <br>
* @since 0.2.0
*/

@ -105,7 +105,8 @@ class PageModelExtractor {
default:
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
}
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, comboExtract.notNull(), comboExtract.multi());
fieldExtractor = new FieldExtractor(field, selector, comboExtract.source() == ComboExtract.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
comboExtract.notNull(), comboExtract.multi());
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
@ -119,7 +120,8 @@ class PageModelExtractor {
ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
if (extractBy != null) {
Selector selector = ExtractorUtils.getSelector(extractBy);
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi());
fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
extractBy.notNull(), extractBy.multi());
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);

Loading…
Cancel
Save