Make PageProcessor#getSite be default method. Closes #1040.

pull/1006/merge
Sutra Zhou 3 years ago
parent c5a037a807
commit 34da2fb3a0

@ -4,13 +4,16 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
/**
* Interface to be implemented to customize a crawler.<br>
* <br>
* Interface to be implemented to customize a crawler.
*
* <p>
* In PageProcessor, you can customize:
* <br>
* start urls and other settings in {@link Site}<br>
* how the urls to fetch are detected <br>
* how the data are extracted and stored <br>
* </p>
* <ul>
* <li>start URLs and other settings in {@link Site}</li>
* <li>how the URLs to fetch are detected</li>
* <li>how the data are extracted and stored</li>
* </ul>
*
* @author code4crafter@gmail.com <br>
* @see Site
@ -20,17 +23,20 @@ import us.codecraft.webmagic.Site;
public interface PageProcessor {
/**
* process the page, extract urls to fetch, extract the data and store
* Processes the page, extract URLs to fetch, extract the data and store.
*
* @param page page
*/
public void process(Page page);
void process(Page page);
/**
* get the site settings
* Returns the site settings.
*
* @return site
* @see Site
*/
public Site getSite();
default Site getSite() {
return Site.me();
}
}

@ -0,0 +1,40 @@
package us.codecraft.webmagic.processor;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
public class PageProcessorTest {
@Test
public void testGetSite() {
Site actualSite = new PageProcessor() {
@Override
public void process(Page page) {
}
}.getSite();
assertEquals(Site.me(), actualSite);
actualSite = new PageProcessor() {
@Override
public void process(Page page) {
}
@Override
public Site getSite() {
return Site.me().setTimeOut(123);
};
}.getSite();
assertEquals(Site.me().setTimeOut(123), actualSite);
}
}
Loading…
Cancel
Save