RequestUtils for range #222

pull/202/merge
yihua.huang 8 years ago
parent 13cdf82695
commit 76766a7c77

@ -0,0 +1,37 @@
package us.codecraft.webmagic.utils;
import us.codecraft.webmagic.Request;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author code4crafter@gmail.com
* Date: 2017/6/5
* Time: 4:58
*/
public abstract class RequestUtils {
private static Pattern p4Range = Pattern.compile("\\[(\\d+)\\-(\\d+)\\]");
public static List<Request> from(String exp){
Matcher matcher = p4Range.matcher(exp);
if (!matcher.find()) {
return Collections.singletonList(new Request(exp));
}
int rangeFrom = Integer.parseInt(matcher.group(1));
int rangeTo = Integer.parseInt(matcher.group(2));
if (rangeFrom > rangeTo) {
return Collections.emptyList();
}
List<Request> requests = new ArrayList<Request>(rangeTo - rangeFrom + 1);
for (int i = rangeFrom; i <= rangeTo; i++) {
requests.add(new Request(matcher.replaceAll(String.valueOf(i))));
}
return requests;
}
}

@ -0,0 +1,28 @@
package us.codecraft.webmagic.utils;
import org.junit.Test;
import us.codecraft.webmagic.Request;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author code4crafter@gmail.com
* Date: 2017/6/5
* Time: 5:08
*/
public class RequestUtilsTest {
@Test
public void test_generate_range() throws Exception {
List<Request> requests = RequestUtils.from("http://angularjs.cn/api/article/latest?p=[1-3]&s=20");
assertThat(requests).containsExactly(new Request("http://angularjs.cn/api/article/latest?p=1&s=20"), new Request("http://angularjs.cn/api/article/latest?p=2&s=20"), new Request("http://angularjs.cn/api/article/latest?p=3&s=20"));
}
@Test
public void test_generate_range_when_invalid_number() throws Exception {
List<Request> requests = RequestUtils.from("http://angularjs.cn/api/article/latest?p=[10-3]&s=20");
assertThat(requests).isEmpty();
}
}
Loading…
Cancel
Save