invite kotlin experimental

pull/584/head
yihua.huang 8 years ago
parent 3c653d941a
commit 818a2b2408

@ -9,6 +9,9 @@
<groupId>us.codecraft</groupId>
<artifactId>webmagic-scripts</artifactId>
<properties>
<kotlin.version>1.1.2-2</kotlin.version>
</properties>
<dependencies>
<dependency>
@ -16,6 +19,12 @@
<artifactId>jruby</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib</artifactId>
<version>${kotlin.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
@ -48,6 +57,7 @@
</dependencies>
<build>
<sourceDirectory>${project.basedir}/src/main/kotlin,${project.basedir}/src/main/groovy,${project.basedir}/src/main/java</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>

@ -0,0 +1,40 @@
import us.codecraft.webmagic.Page
import us.codecraft.webmagic.Site
import us.codecraft.webmagic.Spider
import us.codecraft.webmagic.processor.PageProcessor
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
/**
*
* @author code4crafter@gmail.com
* Date: 2017/5/31
* Time: 下午11:33
*
*/
class GithubRepoPageProcessor : PageProcessor {
private val site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000)
override fun process(page: Page) {
page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all())
page.addTargetRequests(page.html.links().regex("(https://github\\.com/[\\w\\-])").all())
page.putField("author", page.url.regex("https://github\\.com/(\\w+)/.*").toString())
page.putField("name", page.html.xpath("//h1[@class='public']/strong/a/text()").toString())
if (page.resultItems.get<Any>("name") == null) {
//skip this page
page.setSkip(true)
}
page.putField("readme", page.html.xpath("//div[@id='readme']/tidyText()"))
}
override fun getSite(): Site {
return site
}
companion object {
@JvmStatic fun main(args: Array<String>) {
Spider.create(GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run()
}
}
}
Loading…
Cancel
Save