├── .gitignore ├── LICENSE ├── README.md ├── index.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | perf/versions 3 | nyc_output 4 | .nyc_output 5 | coverage 6 | *.log 7 | .DS_Store 8 | npm-debug.log 9 | tmp 10 | build 11 | build-es 12 | .idea 13 | docs -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Bhavneet Singh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GoogleNewsApi 2 | A REST api which will give all news from any location of different categories and also related news 3 | 4 | ### Firebase Api Usage 5 | Currently only following countries are supported :- 6 | ['IN','US','AU','CA','CN','FR','JP','PK','RU','UK'] 7 | 8 | Categories are Business, Entertainment, Health, Science, Sports, World and Techology. 9 | 10 | Firebase api - https://news-20e36.firebaseio.com/.json 11 | 12 | Use as https://news-20e36.firebaseio.com/:countryCode/:category.json 13 | 14 | Sample 15 | https://news-20e36.firebaseio.com/US.json 16 | 17 | https://news-20e36.firebaseio.com/US/Business.json 18 | 19 | ### Installation 20 | ``` 21 | npm i googlenewsapi 22 | 23 | ``` 24 | ### Sample 25 | 26 | ``` 27 | News { 28 | title: 'Watch SpaceX get very close to catching its rocket’s nose cone during a test', 29 | description: 'SpaceX is getting excruciatingly close to catching a part of its rocket that falls out of the sky after each launch. A new video shows a recent drop test of the ...', 30 | thumbnail: 'https://lh3.googleusercontent.com/NKOw3Vvids-nNCfx1vmll-wR88AvTqnRK3vakt4XhxhtYzgWlurhPEOWY-lq413YTava0bMKdwIKrxQvRjQ=pf-w200-h200', 31 | publisher: 'The Verge', 32 | publishedAt: 1548865689, 33 | category: 'Science', 34 | url: 'https://news.google.com/articles/CAIiEGxY4QfBKcLIDNpvJ1mDS2AqFggEKg4IACoGCAow3O8nMMqOBjD38Ak?hl=en-US&gl=US&ceid=US%3Aen', 35 | related: 36 | [ News { 37 | title: 'Watch SpaceX rocket\'s nose cone miss the boat, again', 38 | description: 'A ship named Mr. Steven keeps trying to catch the thing.', 39 | thumbnail: '', 40 | publisher: 'CNET', 41 | publishedAt: 1548870300, 42 | category: 'Science', 43 | url: 'https://news.google.com/articles/CAIiECurGhHT3ziGtrQLkWIu6OIqEwgEKgwIACoFCAow4GowoAgwkRo?hl=en-US&gl=US&ceid=US%3Aen', 44 | related: [] 45 | } 46 | ] 47 | } 48 | 49 | ``` 50 | ### Usage 51 | ``` 52 | var googleNewsApi=require('googlenewsapi'); 53 | // Initialize with storeFunction,country code. 54 | googleNewsApi.initialize(storeFunction); 55 | googleNewsApi.addCountryCode(code); 56 | setInterval(googleNewsApi.refresh,1000*60*60); 57 | 58 | //Search 59 | googleNewsApi.search('Modi','IN').then((results)=>{ 60 | 61 | }) 62 | ``` 63 | 64 | ### Built With 65 | 66 | Following is list of libraries used in app. 67 | 1) async 68 | 2) cheerio 69 | 5) request 70 | 71 | 72 | ## Authors 73 | 74 | * **Bhavneet Singh** - [singhbhavneet](https://github.com/singhbhavneet) 75 | 76 | ## License 77 | 78 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details 79 | 80 | ## Acknowledgments 81 | 82 | Its because of my mummy's motivation,god's blessing and family'support that I am able to complete this project. 83 | 84 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const cheerio=require("cheerio") 2 | const async=require('async') 3 | const request=require('request'); 4 | var data=[]; 5 | var storeData; 6 | function base(code) 7 | { 8 | return `https://news.google.com/?gl=${code}` 9 | } 10 | function initialize(storeNews) 11 | { 12 | if(!storeNews) 13 | { 14 | throw 'StoreFunction can not be undefined'; 15 | } 16 | 17 | storeData=storeNews; 18 | refresh(); 19 | } 20 | module.exports.initialize=initialize; 21 | function addCountryCode(countryCode='IN') 22 | { 23 | data[countryCode]=true; 24 | } 25 | module.exports.addCountryCode=addCountryCode; 26 | function refresh() 27 | { 28 | console.log("Refreshing....."); 29 | async.map(Object.keys(data),(code,done)=>{ 30 | crawlGoogle(code,done) 31 | },(err,results)=>{ 32 | var mainResults={}; 33 | results.forEach((result)=>{ 34 | for(key in result) 35 | { 36 | mainResults[key]=result[key]; 37 | } 38 | }) 39 | storeData(mainResults) 40 | }) 41 | 42 | } 43 | module.exports.refresh=refresh; 44 | class News{ 45 | constructor(title="",description="",thumbnail="",publisher="",publishedAt=0,url="",category="") 46 | { 47 | this.title=title; 48 | this.description=description; 49 | this.thumbnail=thumbnail; 50 | this.publisher=publisher; 51 | this.publishedAt=publishedAt; 52 | this.category=category; 53 | if(url.startsWith('./')) 54 | this.url="https://news.google.com"+url.substring(1); 55 | this.related=[]; 56 | } 57 | } 58 | const headers = { 59 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 60 | }; 61 | function crawlGoogle(code,done) 62 | { 63 | request.get({uri:base(code),headers:headers}, function(err, resp, body){ 64 | var urls={} 65 | $ = cheerio.load(body); 66 | //Scraping topics 67 | var topics=[]; 68 | $('.SFllF').each((index,elm)=>{ 69 | var link=$(elm).attr('href'); 70 | if(link&&link.indexOf('/topics')!=-1) 71 | { 72 | topics.push({link:`https://news.google.com${link}`,name:$(elm).attr('aria-label')}); 73 | } 74 | }) 75 | async.map(topics,(topic,done)=>{ 76 | crawlGoogleLink(topic,done); 77 | },(err,jsonData)=>{ 78 | var results={}; 79 | for(var i in jsonData){ 80 | var key = i; 81 | var val = jsonData[i]; 82 | for(var j in val){ 83 | var sub_key = j; 84 | var sub_val = val[j]; 85 | results[sub_key]=sub_val; 86 | } 87 | } 88 | var mainResults={}; 89 | mainResults[code]=results; 90 | done(null,mainResults); 91 | }) 92 | }) 93 | } 94 | function search(query) 95 | { 96 | if(!query) 97 | { 98 | throw 'Query can not be empty'; 99 | } 100 | var searchLink=`https://news.google.com/search?q=${query}`; 101 | return new Promise((resolve,reject)=>{ 102 | function done(err,results) 103 | { 104 | resolve(results); 105 | } 106 | crawlGoogleLink({link:searchLink,name:'Results'},done) 107 | }); 108 | } 109 | 110 | module.exports.search=search; 111 | function crawlGoogleLink(topic,done) 112 | { 113 | var googleLink=topic.link; 114 | request.get({uri:googleLink,headers:headers}, function(err, resp, body){ 115 | var urls={} 116 | $ = cheerio.load(body); 117 | var articlesGrid=$(`[jsname=${"esK7Lc"}]`); 118 | var articles=[]; 119 | $(articlesGrid).children().each((index,article)=>{ 120 | try { 121 | var mainArticle=$(article).find('figure').parent(); 122 | var relatedArticles=$(mainArticle.find('.SbNwzf')); 123 | var thumbnail=$(mainArticle.find('img')[0]).attr('src') 124 | var mainNews=new getDetails(mainArticle,'h3'); 125 | if(thumbnail) 126 | { 127 | mainNews.thumbnail=thumbnail; 128 | } 129 | mainNews.category=topic.name; 130 | relatedArticles.each((index,elm)=>{ 131 | var relatedNews=getDetails($(elm),'h4'); 132 | if(relatedNews.title) 133 | { 134 | relatedNews.category=topic.name; 135 | mainNews.related.push(relatedNews); 136 | } 137 | }) 138 | if(mainNews.title) 139 | articles.push(mainNews); 140 | 141 | } catch (error) { 142 | } 143 | }) 144 | done(null,{[topic.name]:articles}); 145 | }) 146 | } 147 | function getDetails(element,titleTag) 148 | { 149 | var title=$(element.find(titleTag)[0]).text(); 150 | var description=$(element.find('p')[0]).text(); 151 | var publisher=$(element.find('.KbnJ8')[0]).text(); 152 | var publishedAt=$(element.find('time')[0]).attr('datetime'); 153 | if(publishedAt) 154 | { 155 | var time=publishedAt.replace("seconds: ",""); 156 | time=time.replace("\n",""); 157 | publishedAt=parseInt(time); 158 | } 159 | var url=$(element.find('a')[0]).attr('href'); 160 | if(url) 161 | return new News(title,description,"",publisher,publishedAt,url); 162 | return new News(); 163 | } 164 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "googlenewsapi", 3 | "version": "2.0.3", 4 | "description": "A npm package to scrap news of any language from any country from GoogleNews.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/singhbhavneet/GoogleNewsApi.git" 12 | }, 13 | "keywords": [ 14 | "googlenews", 15 | "news", 16 | "newsapi", 17 | "REST", 18 | "async", 19 | "promise", 20 | "googlenewsapi", 21 | "cheerio" 22 | ], 23 | "author": "Bhavneet Singh (https://github.com/singhbhavneet/)", 24 | "license": "MIT", 25 | "bugs": { 26 | "url": "https://github.com/singhbhavneet/GoogleNewsApi/issues" 27 | }, 28 | "homepage": "https://github.com/singhbhavneet/GoogleNewsApi#readme", 29 | "dependencies": { 30 | "async": "^2.6.1", 31 | "cheerio": "^1.0.0-rc.2", 32 | "request": "^2.88.0" 33 | }, 34 | "devDependencies": {} 35 | } 36 | --------------------------------------------------------------------------------