From 76766a7c770798fcefad053db9c0e47cf6c9024d Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Mon, 5 Jun 2017 17:13:33 +0800 Subject: [PATCH] RequestUtils for range #222 --- .../webmagic/utils/RequestUtils.java | 37 +++++++++++++++++++ .../webmagic/utils/RequestUtilsTest.java | 28 ++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 webmagic-extension/src/main/java/us/codecraft/webmagic/utils/RequestUtils.java create mode 100644 webmagic-extension/src/test/java/us/codecraft/webmagic/utils/RequestUtilsTest.java diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/RequestUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/RequestUtils.java new file mode 100644 index 00000000..135dc40d --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/RequestUtils.java @@ -0,0 +1,37 @@ +package us.codecraft.webmagic.utils; + +import us.codecraft.webmagic.Request; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author code4crafter@gmail.com + * Date: 2017/6/5 + * Time: 下午4:58 + */ +public abstract class RequestUtils { + + private static Pattern p4Range = Pattern.compile("\\[(\\d+)\\-(\\d+)\\]"); + + public static List from(String exp){ + Matcher matcher = p4Range.matcher(exp); + if (!matcher.find()) { + return Collections.singletonList(new Request(exp)); + } + int rangeFrom = Integer.parseInt(matcher.group(1)); + int rangeTo = Integer.parseInt(matcher.group(2)); + if (rangeFrom > rangeTo) { + return Collections.emptyList(); + } + List requests = new ArrayList(rangeTo - rangeFrom + 1); + for (int i = rangeFrom; i <= rangeTo; i++) { + requests.add(new Request(matcher.replaceAll(String.valueOf(i)))); + } + return requests; + } + +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/RequestUtilsTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/RequestUtilsTest.java new file mode 100644 index 00000000..ec848648 --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/RequestUtilsTest.java @@ -0,0 +1,28 @@ +package us.codecraft.webmagic.utils; + +import org.junit.Test; +import us.codecraft.webmagic.Request; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * @author code4crafter@gmail.com + * Date: 2017/6/5 + * Time: 下午5:08 + */ +public class RequestUtilsTest { + + @Test + public void test_generate_range() throws Exception { + List requests = RequestUtils.from("http://angularjs.cn/api/article/latest?p=[1-3]&s=20"); + assertThat(requests).containsExactly(new Request("http://angularjs.cn/api/article/latest?p=1&s=20"), new Request("http://angularjs.cn/api/article/latest?p=2&s=20"), new Request("http://angularjs.cn/api/article/latest?p=3&s=20")); + } + + @Test + public void test_generate_range_when_invalid_number() throws Exception { + List requests = RequestUtils.from("http://angularjs.cn/api/article/latest?p=[10-3]&s=20"); + assertThat(requests).isEmpty(); + } +}