fix compile error

pull/17/head
yihua.huang 12 years ago
parent 019353b41e
commit 312e1bce87

@ -44,13 +44,8 @@ public class FileCacheQueueSchedular implements Schedular {
private Set<String> urls;
public FileCacheQueueSchedular(Task task) {
this.task = task;
}
public FileCacheQueueSchedular(Task task, String filePath) {
public FileCacheQueueSchedular(String filePath) {
this.filePath = filePath;
this.task = task;
}
private void flush() {
@ -58,7 +53,8 @@ public class FileCacheQueueSchedular implements Schedular {
fileCursorWriter.flush();
}
private void init() {
private void init(Task task) {
this.task = task;
File file = new File(filePath);
if (!file.exists()) {
file.mkdirs();
@ -127,7 +123,7 @@ public class FileCacheQueueSchedular implements Schedular {
@Override
public synchronized void push(Request request, Task task) {
if (!inited.get()) {
init();
init(task);
}
if (logger.isDebugEnabled()) {
logger.debug("push to queue " + request.getUrl());
@ -142,7 +138,7 @@ public class FileCacheQueueSchedular implements Schedular {
@Override
public synchronized Request poll(Task task) {
if (!inited.get()) {
init();
init(task);
}
fileCursorWriter.println(cursor.incrementAndGet());
return queue.poll();

@ -5,10 +5,12 @@ import freemarker.template.Template;
import freemarker.template.TemplateException;
import org.apache.commons.codec.digest.DigestUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.utils.UrlUtils;
import us.codecraft.webmagic.Task;
import java.io.*;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
/**
* Author: code4crafter@gmail.com
@ -37,10 +39,8 @@ public class FreemarkerPipeline implements Pipeline {
@Override
public void process(Page page, Site site) {
String domain = site.getDomain();
domain = UrlUtils.getDomain(domain);
String path = this.path + "" + domain + "/";
public void process(Page page, Task task) {
String path = this.path + "" + task.getUUID() + "/";
File file = new File(path);
if (!file.exists()) {
file.mkdirs();

@ -31,7 +31,7 @@ public class SpiderTest {
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space", "http://www.diaoyuweng.com/thread-*-1-1.html");
System.out.println(pageProcessor2.getSite().getEncoding());
pageProcessor2.getSite().setSleepTime(500);
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular(pageProcessor2.getSite(),"/data/temp/webmagic/cache/")).
Spider.me().pipeline(new FilePipeline()).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(pageProcessor2).run();

@ -30,7 +30,7 @@ public class DiandianProcessorTest {
//ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
Spider.me().pipeline(new ConsolePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(diaoyuwengProcessor).run();
}
}

@ -1,5 +1,6 @@
package us.codecraft.webmagic.processor;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
@ -16,11 +17,12 @@ import java.io.IOException;
*/
public class DiaoyuwengProcessorTest {
@Ignore
@Test
public void test() throws IOException {
DiaoyuwengProcessor diaoyuwengProcessor = new DiaoyuwengProcessor();
FreemarkerPipeline pipeline = new FreemarkerPipeline("wordpress.ftl");
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(diaoyuwengProcessor.getSite(), "/data/temp/webmagic/cache/")).
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(diaoyuwengProcessor).run();
}
}

@ -30,7 +30,7 @@ public class SinablogProcessorTest {
//ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url支持断点续传临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular(sinaBlogProcesser.getSite(), "/data/temp/webmagic/cache/")).
Spider.me().pipeline(new FilePipeline()).pipeline(pipeline).schedular(new FileCacheQueueSchedular("/data/temp/webmagic/cache/")).
processor(sinaBlogProcesser).run();
}
}

Loading…
Cancel
Save