update scripts

pull/88/head^2
yihua.huang 11 years ago
parent 7f26b84439
commit 81bb809dba

@ -18,6 +18,11 @@
<artifactId>jruby</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
@ -30,5 +35,57 @@
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<configuration>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>./lib/</classpathPrefix>
<mainClass>us.codecraft.webmagic.scripts.ScriptConsole</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

@ -0,0 +1,147 @@
package us.codecraft.webmagic.scripts;
import com.google.common.collect.Sets;
import org.apache.commons.cli.*;
import us.codecraft.webmagic.Spider;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public class ScriptConsole {
private static class Params {
Language language = Language.JavaScript;
String scriptFileName;
List<String> urls;
int thread = 1;
int sleepTime = 1000;
private static Map<Language, Set<String>> alias = new HashMap<Language, Set<String>>();
static {
alias.put(Language.JavaScript, Sets.<String>newHashSet("js", "javascript", "JavaScript", "JS"));
alias.put(Language.JRuby, Sets.<String>newHashSet("ruby", "jruby", "Ruby", "JRuby"));
}
public void setLanguagefromArg(String arg) {
for (Map.Entry<Language, Set<String>> languageSetEntry : alias.entrySet()) {
if (languageSetEntry.getValue().contains(arg)) {
this.language = languageSetEntry.getKey();
return;
}
}
}
private Language getLanguage() {
return language;
}
private void setLanguage(Language language) {
this.language = language;
}
private String getScriptFileName() {
return scriptFileName;
}
private void setScriptFileName(String scriptFileName) {
this.scriptFileName = scriptFileName;
}
private List<String> getUrls() {
return urls;
}
private void setUrls(List<String> urls) {
this.urls = urls;
}
private int getThread() {
return thread;
}
private void setThread(int thread) {
this.thread = thread;
}
private int getSleepTime() {
return sleepTime;
}
private void setSleepTime(int sleepTime) {
this.sleepTime = sleepTime;
}
}
public static void main(String[] args) {
Params params = parseCommand(args);
startSpider(params);
}
private static void startSpider(Params params) {
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).build();
pageProcessor.getSite().setSleepTime(params.getSleepTime());
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
if (params.getUrls() == null || params.getUrls().size() == 0) {
System.err.println("Need at least one argument");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
for (String url : params.getUrls()) {
spider.addUrl(url);
}
spider.run();
}
private static Params parseCommand(String[] args) {
try {
Options options = new Options();
options.addOption(new Option("l", true, "language"));
options.addOption(new Option("t", true, "thread"));
options.addOption(new Option("f", true, "script file"));
CommandLineParser commandLineParser = new PosixParser();
CommandLine commandLine = commandLineParser.parse(options, args);
return readOptions(commandLine);
} catch (Exception e) {
e.printStackTrace();
exit();
return null;
}
}
private static void exit() {
System.err.println("Format error");
System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
System.exit(-1);
}
private static Params readOptions(CommandLine commandLine) {
Params params = new Params();
if (commandLine.hasOption("l")) {
String language = commandLine.getOptionValue("l");
params.setLanguagefromArg(language);
}
if (commandLine.hasOption("f")) {
String scriptFilename = commandLine.getOptionValue("f");
params.setScriptFileName(scriptFilename);
} else {
exit();
}
if (commandLine.hasOption("s")) {
Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
params.setSleepTime(sleepTime);
}
if (commandLine.hasOption("t")) {
Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
params.setThread(thread);
}
params.setUrls(commandLine.getArgList());
return params;
}
}

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<logger name="org.apache" additivity="false">
<level value="warn" />
<appender-ref ref="stdout" />
</logger>
<root>
<level value="info" />
<appender-ref ref="stdout" />
</root>
</log4j:configuration>
Loading…
Cancel
Save