├── README.md ├── main.py └── news.sqlite /README.md: -------------------------------------------------------------------------------- 1 | # News_parser SQLite 2 | ### parsing economy news from RBC ("https://www.rbc.ru/economics) 3 | 4 | 5 | ![331](https://user-images.githubusercontent.com/54048747/224293143-24b3d2b4-8124-43a1-98b9-de3e2a62d9ac.JPG) 6 | 7 | 8 | ## SQL: 9 | ```SQL 10 | CREATE TABLE IF NOT EXISTS news( 11 | id INT PRIMARY KEY, 12 | time_ TEXT, 13 | name_ TEXT); 14 | ``` 15 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import requests, lxml 2 | from bs4 import BeautifulSoup as bs 3 | from datetime import date, datetime 4 | import sqlite3 5 | 6 | def bd(a): 7 | conn = sqlite3.connect('news.sqlite') 8 | cur = conn.cursor() 9 | i=0 10 | for n in a: 11 | cur.execute("INSERT INTO news VALUES(?, ?, ?);", [i,n[0],n[1]]) 12 | conn.commit() 13 | i+=1 14 | 15 | conn.close() 16 | 17 | def dt(date_): 18 | dt='' 19 | if ',' in date_: 20 | #dt=d.split(',') 21 | dt=date_ 22 | if 'мар' in date_: 23 | dt='2023-03-'+date_.split(' ')[0]+date_.split(',')[1] 24 | else: 25 | dt=str(date.today())+' '+date_ 26 | return dt 27 | 28 | def main(): 29 | url="https://www.rbc.ru/economics/" 30 | 31 | headers= { 32 | "user-agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Mobile Safari/537.36", 33 | "X-Requested-Width":"XMLHttpRequest" 34 | } 35 | 36 | resp = requests.get(url,headers=headers) 37 | 38 | soup = bs(resp.text, "lxml") 39 | #cat=soup.class('category') 40 | 41 | news= soup.find_all(class_= "item__wrap") 42 | 43 | a=[] 44 | for n in news: 45 | date = dt(n.find(class_='item__category').text.strip()) 46 | name = n.find(class_='item__title').text.strip() 47 | a.append([date,name]) 48 | 49 | bd(a) 50 | 51 | if __name__ == "__main__": 52 | main() 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /news.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Good4lien/news_parser/5f6c2d937991b5609e8defc2126714a84d27c39c/news.sqlite --------------------------------------------------------------------------------