4 Star 2 Fork 0

durban.zhang / node-rss-spider

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.js 3.11 KB
一键复制 编辑 原始数据 按行查看 历史
durban.zhang 提交于 2015-06-10 12:13 . 添加pubDate字段
var request = require('request');
var Promise = require('bluebird');
var FeedParser = require('feedparser');
var Iconv = require('iconv').Iconv;
var read = require('node-readability');
var _ = require('lodash');
var GlobalOptions = ['title', 'description', 'summary', 'pubDate', 'date', 'link','guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', 'enclosures']
function FetchSite(url,options){
options = options || GlobalOptions;
return new Promise(function(resolve,reject){
var posts = [];
var req = request(url,{timeout:10000,pool:false});
req.setMaxListeners(50);
req.setHeader('user-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.76 Safari/537.36');
req.setHeader('accept','text/html,application/xhtml+xml,application/xml');
var feedparser = new FeedParser();
req.on('error',reject);
req.on('response',function(res){
var stream = this;
if(res.statusCode !== 200){
return this.emit('error',new Error('Bad Status Code:'+res.statusCode));
}
stream.pipe(feedparser);
});
feedparser.on('error',reject);
feedparser.on('end',function(err){
if(err){
reject(err);
}
resolve(posts)
});
feedparser.on('readable',function(){
while(post = this.read()){
var post = _.pick(post,options);
posts.push(post);
}
})
});
}
function FetchRss(url,options){
options = options || GlobalOptions;
return new Promise(function(resolve,reject){
var posts = [];
var req = request(url,{timeout:10000,pool:false});
req.setMaxListeners(50);
req.setHeader('user-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.76 Safari/537.36');
req.setHeader('accept','text/html,application/xhtml+xml,application/xml');
var feedparser = new FeedParser();
req.on('error',reject);
req.on('response',function(res){
var stream = this;
if(res.statusCode !== 200){
return this.emit('error',new Error('Bad Status Code:'+res.statusCode));
}
stream.pipe(feedparser);
});
feedparser.on('error',reject);
feedparser.on('end',function(err){
if(err){
reject(err);
}
resolve(posts)
});
feedparser.on('readable',function(){
while(post = this.read()){
var post = _.pick(post,options);
posts.push(post);
}
})
});
}
function GetAllByUrl(url) {
return FetchRss(url).then(function (posts) {
return FetchAllContent(posts);
});
}
function GetCleanBody(link) {
return new Promise(function (resolve, reject) {
read(link, function (err, article, meta) {
if (err) {
reject(err);
}
resolve(article);
});
});
}
function FetchAllContent(posts) {
return Promise.reduce(posts, function (total, post) {
return GetCleanBody(post.link).then(function (article) {
post.content = article.content ? article.content : post.description || post.summary;
return post;
});
}, []).then(function (total) {
return posts;
});
}
module.exports = {
FetchRss: FetchRss,
FetchSite: FetchSite,
FetchAllContent: FetchAllContent,
GetCleanBody: GetCleanBody,
GetAllByUrl: GetAllByUrl
};
JavaScript
1
https://gitee.com/durban/node-rss-spider.git
git@gitee.com:durban/node-rss-spider.git
durban
node-rss-spider
node-rss-spider
master

搜索帮助