└── rostender_p_tk.py


/rostender_p_tk.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import csv
  3 | import re
  4 | import tkinter as tk
  5 | 
  6 | from bs4 import BeautifulSoup as bs
  7 | from tkinter import *
  8 | 
  9 | 
 10 | 
 11 | root = tk.Tk()
 12 | 
 13 | root['bg'] = "#fafafa"
 14 | root.title("Парсинг rostender")
 15 | root.geometry("300x100")
 16 | 
 17 | # make headers for safe parsing
 18 | headers = {'Accept': '*/*',
 19 |            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36'}
 20 | 
 21 | # urls for href in write file function
 22 | url_for_href = 'http://rostender.info/'
 23 | 
 24 | # urls for url constructor
 25 | kwd_reagent = '%F0%E5%E0%E3%E5%ED%F2'
 26 | kwd_nalco = 'nalco'
 27 | kwd_nalco_ru = '%ED%E0%EB%EA%EE'
 28 | kwd_purotech = 'purotech'
 29 | kwd_puro_tech = 'puro tech'
 30 | kwd_inhibitor = '%E8%ED%E3%E8%E1%E8%F2%EE%F0'
 31 | kwd_biocide = '%E1%E8%EE%F6%E8%E4'
 32 | kwd_option = '%EE%EF%F2%E8%EE%ED'
 33 | kwd_ektoskeil = '%FD%EA%F2%EE%F1%EA%E5%E9%EB'
 34 | kwd_aminat = '%E0%EC%E8%ED%E0%F2'
 35 | 
 36 | 
 37 | # function wich makes urls list if there is pagination on web site
 38 | def url_constructor(kwd, actual_date):
 39 |     base_url = f'http://rostender.info/extsearch.php?pgsearch=0&extsearch=2&branch134=on&branch234=on&branch239=on&kwd={kwd}&from={actual_date}&to=&pfrom=&pto='
 40 |     urls = []
 41 |     urls.append(base_url)
 42 |     session = requests.Session()
 43 |     request = session.get(base_url, headers=headers)
 44 | 
 45 |     if request.status_code == 200:
 46 |         soup = bs(request.content, 'lxml')
 47 |         try:
 48 |             pagination = soup.find_all('div', attrs={'class': 'b-paging'})
 49 |             for integer in pagination:
 50 |                 integer = integer.find('strong').text
 51 |                 count = int(integer)
 52 |                 for i in range(count):
 53 |                     url = f'http://rostender.info/extsearch.php?pgsearch={i+1}&extsearch=2&branch134=on&branch234=on&branch239=on&kwd={kwd}&from={actual_date}&to=&pfrom=&pto='
 54 |                     if url not in urls:
 55 |                         urls.append(url)
 56 |             print(f'Количество найденных страниц {len(urls)}')
 57 |         except Exception as e:
 58 |             print(e)
 59 |             pass
 60 |     else:
 61 |         print(f'ERROR{request.status_code}')
 62 |     return urls
 63 | 
 64 | # parse function for web site rostender.info which find need information filter information and make dictionary
 65 | def rostender_parse(urls, headers):
 66 |     tenders_info = []
 67 |     for url in urls:
 68 |         session = requests.Session()
 69 |         request = session.get(url, headers=headers)
 70 |         if request.status_code == 200:
 71 |             soup = bs(request.content, 'lxml')
 72 |             try:
 73 |                 divs = soup.find_all('div', attrs={'class': 'tender-info'})
 74 |                 for div in divs:
 75 |                     title = div.find('a', attrs={'target': '_blank'}).text
 76 |                     href_title = div.find('a', attrs={'target': '_blank'})['href']
 77 |                     date_information = div.find('div', attrs={'class': 'col-lg-6 text-right'}).text
 78 |                     title = ' '.join(title.split())
 79 |                     title = title.lower()
 80 |                     href_title = ' '.join(href_title.split())
 81 |                     date_information = ' '.join(date_information.split())
 82 |                     if re.search(r'лед|мед|лёд|бассейн|лаб|анализ|реактив|хозяйственные', title):
 83 |                         print(f'Отфильтровано {title}')
 84 |                     else:
 85 |                         tenders_info.append({
 86 |                             'description': title,
 87 |                             'href': href_title,
 88 |                             'end_date': date_information
 89 |                         })
 90 |             except Exception as e:
 91 |                 print(e)
 92 |                 pass
 93 |         else:
 94 |             print(f'ERROR {request.status_code}')
 95 |     print(f'Статус {request.status_code}')
 96 |     print(f'Количество найденных тендеров {len(tenders_info)}')
 97 |     return tenders_info
 98 | 
 99 | # function which write new csv file every using
100 | def files_writer(tenders_info, file_name):
101 |     with open(f'{file_name}.csv', 'w') as file:
102 |         a_pen = csv.writer(file)
103 |         for info in tenders_info:
104 |             a_pen.writerow((info['description'], url_for_href+info['href'], info['end_date']))
105 | 
106 | 
107 | 
108 | def get_date():
109 |     actual_date = actual_date_str.get()
110 |     print('---------------------------------------\nЗанимаюсь ключевым словом реагент')
111 |     url_reagent = url_constructor(kwd_reagent, actual_date)
112 |     tenders_info = rostender_parse(url_reagent, headers)
113 |     files_writer(tenders_info, 'reagent')
114 | 
115 |     print('---------------------------------------\nЗанимаюсь ключевым словом Nalco')
116 |     url_nalco = url_constructor(kwd_nalco, actual_date)
117 |     tenders_info = rostender_parse(url_nalco, headers)
118 |     files_writer(tenders_info, 'nalco')
119 | 
120 |     print('---------------------------------------\nЗанимаюсь ключевым словом Налко')
121 |     url_nalco_ru = url_constructor(kwd_nalco_ru, actual_date)
122 |     tenders_info = rostender_parse(url_nalco_ru, headers)
123 |     files_writer(tenders_info, 'nalco_ru')
124 | 
125 |     print('---------------------------------------\nЗанимаюсь ключевым словом purotech')
126 |     url_purotech = url_constructor(kwd_purotech, actual_date)
127 |     tenders_info = rostender_parse(url_purotech, headers)
128 |     files_writer(tenders_info, 'purotech')
129 | 
130 |     print('---------------------------------------\nЗанимаюсь ключевым словом puro tech')
131 |     url_purotech = url_constructor(kwd_puro_tech, actual_date)
132 |     tenders_info = rostender_parse(url_purotech, headers)
133 |     files_writer(tenders_info, 'puro_tech')
134 | 
135 |     print('---------------------------------------\nЗанимаюсь ключевым словом ингибитор')
136 |     url_inhibitor = url_constructor(kwd_inhibitor, actual_date)
137 |     tenders_info = rostender_parse(url_inhibitor, headers)
138 |     files_writer(tenders_info, 'inhibitor')
139 | 
140 |     print('---------------------------------------\nЗанимаюсь ключевым словом биоцид')
141 |     url_biocide = url_constructor(kwd_biocide, actual_date)
142 |     tenders_info = rostender_parse(url_biocide, headers)
143 |     files_writer(tenders_info, 'biocide')
144 | 
145 |     print('---------------------------------------\nЗанимаюсь ключевым словом оптион')
146 |     url_option = url_constructor(kwd_option, actual_date)
147 |     tenders_info = rostender_parse(url_option, headers)
148 |     files_writer(tenders_info, 'option')
149 | 
150 |     print('---------------------------------------\nЗанимаюсь ключевым словом эктоскейл')
151 |     url_ektoskail = url_constructor(kwd_ektoskeil, actual_date)
152 |     tenders_info = rostender_parse(url_ektoskail, headers)
153 |     files_writer(tenders_info, 'ektoskeil')
154 | 
155 |     print('---------------------------------------\nЗанимаюсь ключевым словом аминат')
156 |     url_aminat = url_constructor(kwd_aminat, actual_date)
157 |     tenders_info = rostender_parse(url_aminat, headers)
158 |     files_writer(tenders_info, 'aminat')
159 | 
160 | 
161 | date_str = Label(root, text='Введи дату в формате 06.03.2021', bg='#fafafa', font=("Arial", 10)).pack()
162 | actual_date_str = StringVar()
163 | date_field = Entry(root, textvariable=actual_date_str).pack()
164 | date_btn = Button(root, text='Отправить', command=get_date).pack()
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     root.mainloop()
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------