spider config draft
parent
27b37e8164
commit
023c2ac84e
@ -0,0 +1,29 @@
|
||||
<!--This is a draft of config file.
|
||||
If you have any advice, go https://github.com/code4craft/webmagic/issues/106 and comment!-->
|
||||
<spider>
|
||||
<site>
|
||||
<charset>utf-8</charset>
|
||||
<user-agent></user-agent>
|
||||
<cookies>
|
||||
<cookie domain="" path="" name="" value="">
|
||||
</cookie>
|
||||
</cookies>
|
||||
<heads>
|
||||
<head name="" value=""/>
|
||||
</heads>
|
||||
</site>
|
||||
|
||||
<startUrls>
|
||||
<url></url>
|
||||
</startUrls>
|
||||
|
||||
<extraction targetUrl="" helpUrl="">
|
||||
<field name="title">
|
||||
<extractor type="xpath" value="//div[@class='title']"/>
|
||||
</field>
|
||||
<field name="content">
|
||||
<extractor type="xpath" value="//div[@class='content']"/>
|
||||
</field>
|
||||
</extraction>
|
||||
|
||||
</spider>
|
Loading…
Reference in New Issue