└── rostender_p_tk.py /rostender_p_tk.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import csv 3 | import re 4 | import tkinter as tk 5 | 6 | from bs4 import BeautifulSoup as bs 7 | from tkinter import * 8 | 9 | 10 | 11 | root = tk.Tk() 12 | 13 | root['bg'] = "#fafafa" 14 | root.title("Парсинг rostender") 15 | root.geometry("300x100") 16 | 17 | # make headers for safe parsing 18 | headers = {'Accept': '*/*', 19 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36'} 20 | 21 | # urls for href in write file function 22 | url_for_href = 'http://rostender.info/' 23 | 24 | # urls for url constructor 25 | kwd_reagent = '%F0%E5%E0%E3%E5%ED%F2' 26 | kwd_nalco = 'nalco' 27 | kwd_nalco_ru = '%ED%E0%EB%EA%EE' 28 | kwd_purotech = 'purotech' 29 | kwd_puro_tech = 'puro tech' 30 | kwd_inhibitor = '%E8%ED%E3%E8%E1%E8%F2%EE%F0' 31 | kwd_biocide = '%E1%E8%EE%F6%E8%E4' 32 | kwd_option = '%EE%EF%F2%E8%EE%ED' 33 | kwd_ektoskeil = '%FD%EA%F2%EE%F1%EA%E5%E9%EB' 34 | kwd_aminat = '%E0%EC%E8%ED%E0%F2' 35 | 36 | 37 | # function wich makes urls list if there is pagination on web site 38 | def url_constructor(kwd, actual_date): 39 | base_url = f'http://rostender.info/extsearch.php?pgsearch=0&extsearch=2&branch134=on&branch234=on&branch239=on&kwd={kwd}&from={actual_date}&to=&pfrom=&pto=' 40 | urls = [] 41 | urls.append(base_url) 42 | session = requests.Session() 43 | request = session.get(base_url, headers=headers) 44 | 45 | if request.status_code == 200: 46 | soup = bs(request.content, 'lxml') 47 | try: 48 | pagination = soup.find_all('div', attrs={'class': 'b-paging'}) 49 | for integer in pagination: 50 | integer = integer.find('strong').text 51 | count = int(integer) 52 | for i in range(count): 53 | url = f'http://rostender.info/extsearch.php?pgsearch={i+1}&extsearch=2&branch134=on&branch234=on&branch239=on&kwd={kwd}&from={actual_date}&to=&pfrom=&pto=' 54 | if url not in urls: 55 | urls.append(url) 56 | print(f'Количество найденных страниц {len(urls)}') 57 | except Exception as e: 58 | print(e) 59 | pass 60 | else: 61 | print(f'ERROR{request.status_code}') 62 | return urls 63 | 64 | # parse function for web site rostender.info which find need information filter information and make dictionary 65 | def rostender_parse(urls, headers): 66 | tenders_info = [] 67 | for url in urls: 68 | session = requests.Session() 69 | request = session.get(url, headers=headers) 70 | if request.status_code == 200: 71 | soup = bs(request.content, 'lxml') 72 | try: 73 | divs = soup.find_all('div', attrs={'class': 'tender-info'}) 74 | for div in divs: 75 | title = div.find('a', attrs={'target': '_blank'}).text 76 | href_title = div.find('a', attrs={'target': '_blank'})['href'] 77 | date_information = div.find('div', attrs={'class': 'col-lg-6 text-right'}).text 78 | title = ' '.join(title.split()) 79 | title = title.lower() 80 | href_title = ' '.join(href_title.split()) 81 | date_information = ' '.join(date_information.split()) 82 | if re.search(r'лед|мед|лёд|бассейн|лаб|анализ|реактив|хозяйственные', title): 83 | print(f'Отфильтровано {title}') 84 | else: 85 | tenders_info.append({ 86 | 'description': title, 87 | 'href': href_title, 88 | 'end_date': date_information 89 | }) 90 | except Exception as e: 91 | print(e) 92 | pass 93 | else: 94 | print(f'ERROR {request.status_code}') 95 | print(f'Статус {request.status_code}') 96 | print(f'Количество найденных тендеров {len(tenders_info)}') 97 | return tenders_info 98 | 99 | # function which write new csv file every using 100 | def files_writer(tenders_info, file_name): 101 | with open(f'{file_name}.csv', 'w') as file: 102 | a_pen = csv.writer(file) 103 | for info in tenders_info: 104 | a_pen.writerow((info['description'], url_for_href+info['href'], info['end_date'])) 105 | 106 | 107 | 108 | def get_date(): 109 | actual_date = actual_date_str.get() 110 | print('---------------------------------------\nЗанимаюсь ключевым словом реагент') 111 | url_reagent = url_constructor(kwd_reagent, actual_date) 112 | tenders_info = rostender_parse(url_reagent, headers) 113 | files_writer(tenders_info, 'reagent') 114 | 115 | print('---------------------------------------\nЗанимаюсь ключевым словом Nalco') 116 | url_nalco = url_constructor(kwd_nalco, actual_date) 117 | tenders_info = rostender_parse(url_nalco, headers) 118 | files_writer(tenders_info, 'nalco') 119 | 120 | print('---------------------------------------\nЗанимаюсь ключевым словом Налко') 121 | url_nalco_ru = url_constructor(kwd_nalco_ru, actual_date) 122 | tenders_info = rostender_parse(url_nalco_ru, headers) 123 | files_writer(tenders_info, 'nalco_ru') 124 | 125 | print('---------------------------------------\nЗанимаюсь ключевым словом purotech') 126 | url_purotech = url_constructor(kwd_purotech, actual_date) 127 | tenders_info = rostender_parse(url_purotech, headers) 128 | files_writer(tenders_info, 'purotech') 129 | 130 | print('---------------------------------------\nЗанимаюсь ключевым словом puro tech') 131 | url_purotech = url_constructor(kwd_puro_tech, actual_date) 132 | tenders_info = rostender_parse(url_purotech, headers) 133 | files_writer(tenders_info, 'puro_tech') 134 | 135 | print('---------------------------------------\nЗанимаюсь ключевым словом ингибитор') 136 | url_inhibitor = url_constructor(kwd_inhibitor, actual_date) 137 | tenders_info = rostender_parse(url_inhibitor, headers) 138 | files_writer(tenders_info, 'inhibitor') 139 | 140 | print('---------------------------------------\nЗанимаюсь ключевым словом биоцид') 141 | url_biocide = url_constructor(kwd_biocide, actual_date) 142 | tenders_info = rostender_parse(url_biocide, headers) 143 | files_writer(tenders_info, 'biocide') 144 | 145 | print('---------------------------------------\nЗанимаюсь ключевым словом оптион') 146 | url_option = url_constructor(kwd_option, actual_date) 147 | tenders_info = rostender_parse(url_option, headers) 148 | files_writer(tenders_info, 'option') 149 | 150 | print('---------------------------------------\nЗанимаюсь ключевым словом эктоскейл') 151 | url_ektoskail = url_constructor(kwd_ektoskeil, actual_date) 152 | tenders_info = rostender_parse(url_ektoskail, headers) 153 | files_writer(tenders_info, 'ektoskeil') 154 | 155 | print('---------------------------------------\nЗанимаюсь ключевым словом аминат') 156 | url_aminat = url_constructor(kwd_aminat, actual_date) 157 | tenders_info = rostender_parse(url_aminat, headers) 158 | files_writer(tenders_info, 'aminat') 159 | 160 | 161 | date_str = Label(root, text='Введи дату в формате 06.03.2021', bg='#fafafa', font=("Arial", 10)).pack() 162 | actual_date_str = StringVar() 163 | date_field = Entry(root, textvariable=actual_date_str).pack() 164 | date_btn = Button(root, text='Отправить', command=get_date).pack() 165 | 166 | 167 | if __name__ == '__main__': 168 | root.mainloop() 169 | 170 | 171 | 172 | --------------------------------------------------------------------------------