谷歌浏览器爬虫插件Webscraper抓取 csdn 作者文章列表
1、 数据字段
文章类型
文章标题
文章详情链接
文章简介
发布日期
阅读数
评论数
2、结果示例截图
3、sitemap json
{"_id":"csdn","startUrl":["https://blog.csdn.net/zll_0405/article/list/?"],"selectors":[{"id":"element","type":"SelectorElement","parentSelectors":["_root"],"selector":"div.article-item-box","multiple":true,"delay":0},{"id":"tag","type":"SelectorText","parentSelectors":["element"],"selector":"span.article-type","multiple":false,"regex":"","delay":0},{"id":"title","type":"SelectorLink","parentSelectors":["element"],"selector":"h4 a","multiple":false,"delay":0},{"id":"intro","type":"SelectorText","parentSelectors":["element"],"selector":".content a","multiple":false,"regex":"","delay":0},{"id":"date","type":"SelectorText","parentSelectors":["element"],"selector":"span.date","multiple":false,"regex":"","delay":0},{"id":"read","type":"SelectorText","parentSelectors":["element"],"selector":"p:nth-of-type(3) span.num","multiple":false,"regex":"","delay":0},{"id":"comment","type":"SelectorText","parentSelectors":["element"],"selector":"p:nth-of-type(5) span.num","multiple":false,"regex":"","delay":0}]}
作者:iWebscraper
页:
[1]