├── LICENSE
├── README.md
├── ornek-verisetleri
    ├── amazon-ornek-veriseti.xlsx
    ├── beyazperde-ornek-veriseti.xlsx
    ├── ciceksepeti-ornek-veriseti.xlsx
    ├── eksi-sozluk-ornek-veriseti.xlsx
    ├── gittigidiyor-ornek-veriseti.xlsx
    ├── hepsiburada-ornek-veriseti.xlsx
    ├── incehesap-ornek-veriseti.xlsx
    ├── kitapyurdu-ornek-veriseti.xlsx
    ├── mediamarkt-ornek-veriseti.xlsx
    ├── n11-ornek-veriseti.xlsx
    ├── trendyol-ornek-veriseti.xlsx
    ├── yemeksepeti-ornek-veriseti.xlsx
    └── youtube-ornek-veriseti.xlsx
├── requirements.txt
└── scrapers
    ├── amazon_scraper
        ├── amazon-ornek-veriseti.xlsx
        ├── amazon_scraper.py
        └── requirements.txt
    ├── beyazperde_scraper
        ├── beyazperde-ornek-veriseti.xlsx
        ├── beyazperde_scraper.py
        └── requirements.txt
    ├── ciceksepeti_scraper
        ├── ciceksepeti-ornek-veriseti.xlsx
        ├── ciceksepeti_scraper.py
        └── requirements.txt
    ├── eksi_scraper
        ├── eksi-sozluk-ornek-veriseti.xlsx
        ├── eksi_scraper.py
        └── requirements.txt
    ├── gittigidiyor_scraper
        ├── gittigidiyor-ornek-veriseti.xlsx
        ├── gittigidiyor_scraper.py
        └── requirements.txt
    ├── hepsiburada_scraper
        ├── hepsiburada-ornek-veriseti.xlsx
        ├── hepsiburada_scraper.py
        └── requirements.txt
    ├── incehesap_scraper
        ├── incehesap-ornek-veriseti.xlsx
        ├── incehesap_scraper.py
        └── requirements.txt
    ├── kitapyurdu_scraper
        ├── kitapyurdu-ornek-veriseti.xlsx
        ├── kitapyurdu_scraper.py
        └── requirements.txt
    ├── main-scraper.py
    ├── mediamarkt_scraper
        ├── mediamarkt-ornek-veriseti.xlsx
        ├── mediamarkt_scraper.py
        └── requirements.txt
    ├── n11_scraper
        ├── n11-ornek-veriseti.xlsx
        ├── n11_scraper.py
        └── requirements.txt
    ├── trendyol_scraper
        ├── requirements.txt
        ├── trendyol-ornek-veriseti.xlsx
        └── trendyol_scraper.py
    ├── yemeksepeti_scraper
        ├── requirements.txt
        ├── yemeksepeti-ornek-veriseti.xlsx
        └── yemeksepeti_scraper.py
    └── youtube_scraper
        ├── requirements.txt
        ├── youtube-ornek-veriseti.xlsx
        └── youtube_scraper.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Arda Uzunoğlu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TRScraper
 2 | 
 3 |   TRScraper, **doğal dil işleme uygulamaları**nda kullanılmak amacıyla geliştirilmiş, Türkçe içerik girilen büyük platformlarda **metin madenciliği** yapma imkanı sunan bir uygulamadır.
 4 |   
 5 | ### Yöntem ve Kullanılan Kütüphaneler
 6 | 
 7 |   TRScraper, **selenium** kütüphanesini kullanarak çeşitli büyük platformlarda kazıma(scrape) işlemi yapar ve elde ettiği veriyi **pandas** kütüphanesi aracılığı ile .xlsx uzantılı Excel dosyasına aktarır.
 8 |   
 9 | ### Gereksinimler
10 | > 'pip install -r requirements.txt' komutu ile gerekli kütüphaneleri kurabilirsiniz. <br>
11 | 
12 | > Chromedriver'ın indirilmesi gerekmektedir. İndirme tamamlandıktan sonra '.py' uzantılı dosyalar içerisindeki path değişkenine Chromedriver'ın uzantısı atamalısınız. <br>
13 | 
14 | [ChromeDriver'ı İndir](https://chromedriver.storage.googleapis.com/index.html?path=84.0.4147.30/)
15 | 
16 | 
17 | ### Geliştirme Aşamasında Olan Özellikler
18 | 
19 | - Çeşitli Platformlar <br>
20 | - Arayüz <br>
21 | - Kullanılabilir Drive Sayısını Arttırma <br>
22 | 
23 | ### Galeri - Ekşi Sözlük Örneği
24 | 
25 | ![1](https://user-images.githubusercontent.com/48959682/102391426-c11f1600-3fe6-11eb-9557-e693184c38e7.png)
26 | ![2](https://user-images.githubusercontent.com/48959682/102391595-fcb9e000-3fe6-11eb-910d-c440e042a12c.png)
27 | ![3](https://user-images.githubusercontent.com/48959682/102391626-05aab180-3fe7-11eb-906f-de8a3cd7f41a.png)
28 | ![4](https://user-images.githubusercontent.com/48959682/102688999-f0fb3300-420b-11eb-806c-639581ed361d.png)
29 | 
30 | ### Geliştirici
31 | 
32 | Github Hesabım: [ardauzunoglu](https://github.com/ardauzunoglu) <br>
33 | LinkedIn Hesabım: [Arda Uzunoğlu](https://www.linkedin.com/in/arda-uzunoğlu/) <br>
34 | 


--------------------------------------------------------------------------------
/ornek-verisetleri/amazon-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/amazon-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/beyazperde-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/beyazperde-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/ciceksepeti-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/ciceksepeti-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/eksi-sozluk-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/eksi-sozluk-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/gittigidiyor-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/gittigidiyor-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/hepsiburada-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/hepsiburada-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/incehesap-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/incehesap-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/kitapyurdu-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/kitapyurdu-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/mediamarkt-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/mediamarkt-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/n11-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/n11-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/trendyol-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/trendyol-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/yemeksepeti-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/yemeksepeti-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/ornek-verisetleri/youtube-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/ornek-verisetleri/youtube-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.9.1
2 | numpy==1.18.5
3 | pandas==1.1.4
4 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/amazon_scraper/amazon-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/amazon_scraper/amazon-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/amazon_scraper/amazon_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import requests
  4 | import pandas as pd
  5 | from bs4 import BeautifulSoup
  6 | from selenium import webdriver
  7 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  8 | from selenium.webdriver.common.keys import Keys
  9 | 
 10 | def amazon_scraper():
 11 |     def selenium():
 12 |         def selenium_initialize():
 13 |             def preference(scrape_input, question):
 14 |                 while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 15 |                     if scrape_input.lower() == "y":
 16 |                         output = True
 17 |                         break
 18 | 
 19 |                     elif scrape_input.lower() == "n":
 20 |                         output = False
 21 |                         break
 22 | 
 23 |                     else:
 24 |                         print("Geçersiz yanıt.")
 25 |                         scrape_input = input(question) 
 26 | 
 27 |                 return output
 28 | 
 29 |             def delay_check(delay):
 30 |                 while type(delay) != int:
 31 |                     try:
 32 |                         delay = int(delay)
 33 |                     except ValueError:
 34 |                         print("Lütfen bir sayı değeri giriniz.")
 35 |                         delay = input("Bekleme süresi: ")
 36 | 
 37 |                 return delay
 38 | 
 39 |             global product_name, file, delay, review_texts, review_headlines, review_useful, customer_name_texts, date_texts, scrape_headlines, scrape_useful, scrape_customer_names, scrape_dates, path
 40 | 
 41 |             product_name = input("İncelemelerin çekileceği ürün adı: ")
 42 |             file = input("Oluşturulacak Excel dosyasının adı: ")
 43 |             file = file + ".xlsx"
 44 |             delay = delay_check(input("Bekleme süresi(sn): "))    
 45 | 
 46 |             review_texts = []
 47 |             review_useful = []
 48 |             review_headlines = []
 49 |             customer_name_texts = []
 50 |             date_texts = []
 51 | 
 52 |             scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 53 |             scrape_useful_input = input(scrape_useful_question)
 54 |             scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 55 | 
 56 |             scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
 57 |             scrape_headlines_input = input(scrape_headlines_question)
 58 |             scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
 59 | 
 60 |             scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 61 |             scrape_customer_name_input = input(scrape_customer_name_question)
 62 |             scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 63 | 
 64 |             scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 65 |             scrape_date_input = input(scrape_date_question)
 66 |             scrape_dates = preference(scrape_date_input, scrape_date_question)
 67 | 
 68 |             path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 69 | 
 70 |         def selenium_scrape():
 71 |             try:
 72 |                 print("Chromedriver'a erişiliyor...")
 73 |                 driver = webdriver.Chrome(path)
 74 |                 time.sleep(delay)
 75 |                 print("Chromedriver'a erişildi.")
 76 | 
 77 |             except WebDriverException:
 78 |                 print("Chromedriver kullanılamıyor.")            
 79 |                 sys.exit()
 80 | 
 81 |             try:
 82 |                 print("Amazon adresine gidiliyor...")
 83 |                 driver.get("https://www.amazon.com.tr")
 84 |                 time.sleep(delay)
 85 |                 driver.maximize_window()
 86 |                 time.sleep(delay)
 87 |                 print("Amazon adresine gidildi.")
 88 | 
 89 |             except:
 90 |                 print("Amazon'a erişilemiyor.")
 91 |                 sys.exit()
 92 | 
 93 |             try:
 94 |                 print("Ürün aranıyor...")
 95 |                 search_bar = driver.find_element_by_id("twotabsearchtextbox")
 96 |                 search_bar.send_keys(product_name)
 97 |                 search_bar.send_keys(Keys.ENTER)
 98 |                 time.sleep(delay)
 99 | 
100 |                 product = driver.find_element_by_class_name("s-result-item")
101 |                 product.click()
102 |                 time.sleep(delay)
103 |                 print("Ürün bulundu.")
104 | 
105 |             except NoSuchElementException:
106 |                 print("Ürün bulunamadı.")
107 |                 sys.exit()
108 | 
109 |             try:
110 |                 time.sleep(delay)
111 |                 review_count = driver.find_element_by_id("acrCustomerReviewText")
112 |                 review_count = int(review_count.text.replace("değerlendirme", ""))
113 | 
114 |             except NoSuchElementException:
115 |                 print("İnceleme bulunamadı.")
116 |                 sys.exit()
117 | 
118 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
119 |             match = False
120 | 
121 |             while match == False:
122 |                 lastCount = lenOfPage
123 |                 time.sleep(delay)
124 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
125 |                 if lastCount == lenOfPage:
126 |                     match = True
127 | 
128 |             see_all_reviews = driver.find_element_by_xpath("//*[@id='reviews-medley-footer']/div[2]/a")
129 |             see_all_reviews.click()
130 | 
131 |             if review_count % 10 == 0:
132 |                 length_of_page = review_count // 10
133 | 
134 |             else:
135 |                 length_of_page = (review_count // 10) + 1 
136 | 
137 |             l = 1
138 | 
139 |             while l <= length_of_page:
140 | 
141 |                 print("İncelemeler çekiliyor...")
142 |                 print("Sayfa: " + str(l))
143 | 
144 |                 time.sleep(delay)
145 |                 
146 |                 reviews = driver.find_elements_by_class_name("review")
147 |                 for review in reviews:
148 |                     review_text = review.find_element_by_class_name("review-text-content").text
149 |                     review_texts.append(review_text)
150 | 
151 |                     try:
152 |                         headline = review.find_element_by_class_name("review-title").text
153 |                         review_headlines.append(headline)
154 | 
155 |                     except:
156 |                         review_headlines.append("BOŞ")
157 |                     
158 |                     try:
159 |                         useful = review.find_element_by_class_name("cr-vote-text")
160 |                         useful = useful.text.split()[0]
161 |                         if useful.lower == "bir":
162 |                             useful = "1"
163 |                         review_useful.append(useful)
164 | 
165 |                     except:
166 |                         review_useful.append("0")
167 | 
168 |                     customer = review.find_element_by_class_name("a-profile-name").text
169 |                     customer_name_texts.append(customer)
170 | 
171 |                     date = review.find_element_by_class_name("review-date")
172 |                     date = date.text.split()
173 |                     date = date[1:4]
174 |                     date = " ".join(date)
175 |                     date_texts.append(date)
176 | 
177 |                 driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
178 |                 try:
179 |                     next_page = driver.find_element_by_class_name("a-last")
180 |                     next_page.click()
181 | 
182 |                 except:
183 |                     pass
184 | 
185 |                 l += 1
186 | 
187 |             driver.close()
188 | 
189 |             length_list = [review_texts, review_useful, review_headlines, customer_name_texts, date_texts]
190 |             limit = map(len, length_list)
191 |             limit = min(list(limit))
192 |             limit -= 1
193 |                 
194 |             review_texts_fin = review_texts[:limit]
195 |             df = pd.DataFrame({"Yorum": review_texts_fin})
196 | 
197 |             if scrape_useful:
198 |                 review_useful_fin = review_useful[:limit]
199 |                 df["Yorum Beğeni Sayısı"] = review_useful_fin
200 |                 df["Yorum Beğeni Sayısı"] = df["Yorum Beğeni Sayısı"]
201 | 
202 |             if scrape_headlines:
203 |                 review_headlines_fin = review_headlines[:limit]
204 |                 df["Yorumun Başlığı"] = review_headlines_fin
205 | 
206 |             if scrape_customer_names:
207 |                 customer_name_texts_fin = customer_name_texts[:limit]
208 |                 df["Yorum Yazan Müşteri"] = customer_name_texts_fin
209 | 
210 |             if scrape_dates:
211 |                 date_texts_fin = date_texts[:limit]
212 |                 df["Yorumun Yazıldığı Tarih"] = date_texts_fin
213 | 
214 |             df.to_excel(file, header = True, index = False)
215 | 
216 |             x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
217 |             print(x)
218 |             print("""
219 |                 --------------------------------------------------------------------------
220 |                 -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
221 |                 -  Github Hesabım: ardauzunoglu                                          -
222 |                 --------------------------------------------------------------------------
223 |             """)
224 |         selenium_initialize()
225 |         selenium_scrape()
226 | 
227 |     def beautifulsoup():
228 |         def preference(scrape_input, question):
229 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
230 |                 if scrape_input.lower() == "y":
231 |                     output = True
232 |                     break
233 | 
234 |                 elif scrape_input.lower() == "n":
235 |                     output = False
236 |                     break
237 | 
238 |                 else:
239 |                     print("Geçersiz yanıt.")
240 |                     scrape_input = input(question) 
241 | 
242 |             return output
243 | 
244 |         def get_soup(url):
245 |             r = requests.get(url)
246 |             soup = BeautifulSoup(r.text, "html.parser")
247 |             return soup
248 | 
249 |         def get_length_of_pages():
250 |             soup = get_soup(page_url)
251 |             review_count = int(soup.find("div", {"data-hook":"cr-filter-info-review-rating-count"}).text.split()[4])
252 |             if review_count % 10 == 0:
253 |                 length_of_pages = review_count // 10
254 |             else:
255 |                 length_of_pages = review_count // 10 + 1
256 |             return length_of_pages
257 | 
258 |         def get_reviews(soup):
259 |             reviews = soup.find_all("div", {"data-hook":"review"})
260 |             for review in reviews:
261 |                 try:
262 |                     like_count = review.find("span", {"data-hook":"helpful-vote-statement"}).text.split()[0] if review.find("span", {"data-hook":"helpful-vote-statement"}).text.split()[0] != "Bir" else "1"
263 |                 except:
264 |                     like_count = 0
265 |                 review = {
266 |                     "Yorum":review.find("span", {"data-hook":"review-body"}).text.replace("\n", ""),
267 |                     "Yorumun Beğeni Sayısı":like_count,
268 |                     "Yorumun Başlığı":review.find("a", {"data-hook":"review-title"}).text,
269 |                     "Yorum Yazan Müşteri":review.find("span", {"class":"a-profile-name"}).text.replace("\n", ""),
270 |                     "Yorumun Yazıldığı Tarih":" ".join(review.find("span", {"data-hook":"review-date"}).text.split()[1:4])
271 |                 }
272 |                 review_list.append(review)
273 |                     
274 |         def list_to_excel(list):
275 |             df = pd.DataFrame(list)
276 |             if not scrape_useful:
277 |                 df = df.drop(columns=["Yorumun Beğeni Sayısı"])
278 | 
279 |             if not scrape_headlines:
280 |                 df = df.drop(columns=["Yorumun Başlığı"])
281 | 
282 |             if not scrape_customer_names:
283 |                 df = df.drop(columns=["Yorum Yazan Müşteri"])
284 | 
285 |             if not scrape_dates:
286 |                 df = df.drop(columns=["Yorumun Yazıldığı Tarih"])
287 | 
288 |             df.to_excel(file, header = True, index = False)
289 |             print("Excele kaydedildi.")
290 | 
291 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
292 |         scrape_useful_input = input(scrape_useful_question)
293 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
294 | 
295 |         scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
296 |         scrape_headlines_input = input(scrape_headlines_question)
297 |         scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
298 | 
299 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
300 |         scrape_customer_name_input = input(scrape_customer_name_question)
301 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
302 | 
303 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
304 |         scrape_date_input = input(scrape_date_question)
305 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
306 | 
307 |         review_list = []
308 |         page_url = input("Ürün linki: ")
309 |         file = input("Oluşturulacak Excel dosyasının adı: ") + ".xlsx"
310 | 
311 |         current_page = 1
312 | 
313 |         while True:
314 |             if current_page == 1:
315 |                 page_url = page_url + str("&reviewerType=all_reviews&pageNumber=") + str(current_page)
316 |             else:
317 |                 page_url = page_url[:len(page_url)-1] + str(current_page)
318 |             soup = get_soup(page_url)
319 |             print(page_url)
320 |             print("Veriler çekiliyor...")
321 |             print("Sayfa: " + str(current_page))
322 |             get_reviews(soup)
323 |             current_page += 1
324 |             length_of_pages = get_length_of_pages()
325 |             if current_page > length_of_pages:
326 |                 break
327 | 
328 |         list_to_excel(review_list)
329 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
330 |         print(x)
331 |         print("""
332 |                 --------------------------------------------------------------------------
333 |                 -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
334 |                 -  Github Hesabım: ardauzunoglu                                          -
335 |                 --------------------------------------------------------------------------
336 |         """)
337 | 
338 |     print("""
339 |                     ---------------------------------------------------------
340 |                     -         Amazon Scraper'a hoş geldiniz!                -
341 |                     -         Geliştirici: Arda Uzunoğlu                    -
342 |                     ---------------------------------------------------------
343 |     """)
344 | 
345 |     s_or_bs = input("Kullanılacak kütüphane(s/bs): ")
346 |     if s_or_bs.lower() == "bs":
347 |         beautifulsoup()
348 | 
349 |     elif s_or_bs.lower() == "s":
350 |         selenium()
351 | 
352 |     else:
353 |         print("Geçersiz yanıt.")
354 | 
355 | if __name__ == "__main__":
356 |     amazon_scraper()


--------------------------------------------------------------------------------
/scrapers/amazon_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.9.1
2 | numpy==1.18.5
3 | pandas==1.1.4
4 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/beyazperde_scraper/beyazperde-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/beyazperde_scraper/beyazperde-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/beyazperde_scraper/beyazperde_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | 
  7 | def beyazperde_scrape():
  8 |     def initialize():
  9 |         def preference(scrape_input, question):
 10 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 11 |                 if scrape_input.lower() == "y":
 12 |                     output = True
 13 |                     break
 14 | 
 15 |                 elif scrape_input.lower() == "n":
 16 |                     output = False
 17 |                     break
 18 | 
 19 |                 else:
 20 |                     print("Geçersiz yanıt.")
 21 |                     scrape_input = input(question) 
 22 | 
 23 |             return output
 24 | 
 25 |         def delay_check(delay):
 26 |             while type(delay) != int:
 27 |                 try:
 28 |                     delay = int(delay)
 29 |                 except ValueError:
 30 |                     print("Lütfen bir sayı değeri giriniz.")
 31 |                     delay = input("Bekleme süresi: ")
 32 | 
 33 |             return delay
 34 |         
 35 |         print("""
 36 |             ---------------------------------------------------------
 37 |             -         Beyazperde Scraper'a hoş geldiniz!            -
 38 |             -         Geliştirici: Arda Uzunoğlu                    -
 39 |             ---------------------------------------------------------
 40 |         """)
 41 | 
 42 |         global film, file, delay, review_texts, review_useful, review_not_useful, review_scores, member_name_texts, date_texts, scrape_useful, scrape_scores, scrape_member_name, scrape_date, path
 43 | 
 44 |         film = input("İncelemelerin Çekileceği Film: ")
 45 |         file = input("Oluşturulacak Excel dosyasının adı: ") + ".xlsx"
 46 |         delay = delay_check(input("Bekleme süresi(sn): "))
 47 | 
 48 |         review_texts = []
 49 |         review_useful = []
 50 |         review_not_useful = []
 51 |         review_scores = []
 52 |         member_name_texts = []
 53 |         date_texts = []
 54 | 
 55 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 56 |         scrape_useful_input = input(scrape_useful_question)
 57 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 58 | 
 59 |         scrape_scores_question = "Filme verilen puan çekilsin mi(y/n): "
 60 |         scrape_scores_input = input(scrape_scores_question)
 61 |         scrape_scores = preference(scrape_scores_input, scrape_scores_question)
 62 | 
 63 |         scrape_member_name_question = "Kullanıcı isimleri çekilsin mi(y/n): "
 64 |         scrape_member_name_input = input(scrape_member_name_question)
 65 |         scrape_member_name = preference(scrape_member_name_input, scrape_member_name_question)
 66 | 
 67 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 68 |         scrape_date_input = input(scrape_date_question)
 69 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 70 | 
 71 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 72 | 
 73 |     def scrape():
 74 |         try:
 75 |             print("Chromedriver'a erişiliyor...")
 76 |             driver = webdriver.Chrome(path)
 77 |             time.sleep(delay)
 78 |             print("Chromedriver'a erişildi.")
 79 | 
 80 |         except WebDriverException:
 81 |             print("Chromedriver kullanılamıyor.")
 82 |             sys.exit()
 83 | 
 84 |         try:
 85 |             print("Beyazperde adresine gidiliyor...")
 86 |             driver.get("http://www.beyazperde.com")
 87 |             time.sleep(delay)
 88 |             driver.maximize_window()
 89 |             time.sleep(delay)
 90 |             print("Beyazperde adresine gidildi.")
 91 | 
 92 |         except:
 93 |             print("Beyazperde'ye erişilemiyor.")
 94 |             sys.exit()
 95 | 
 96 |         try:
 97 |             print("Film aranıyor...")
 98 |             search = driver.find_element_by_class_name("header-search-input")
 99 |             search.send_keys(film)
100 |             time.sleep(delay+3)
101 | 
102 |             auto_complete = driver.find_element_by_class_name("autocomplete-result-title")
103 |             auto_complete.click()
104 |             time.sleep(delay)
105 |             print("Film bulundu.")
106 |             
107 |         except NoSuchElementException:
108 |             print("Film bulunamadı.")
109 |             sys.exit()
110 | 
111 |         try:
112 |             member_reviews = driver.find_element_by_link_text("Üye Eleştirileri")
113 |             member_reviews.click()
114 |             time.sleep(delay)
115 | 
116 |             review_count = driver.find_element_by_class_name("titlebar-title.titlebar-title-md").text
117 |             review_count = int(review_count.replace(" kullanıcı eleştirisi",""))
118 |             time.sleep(delay)
119 | 
120 |         except NoSuchElementException:
121 |             print("Film incelemeleri bulunamadı.")
122 |             sys.exit()
123 | 
124 |         try:
125 |             close_banner = driver.find_element_by_id("creativeClose")
126 |             close_banner.click()
127 |             time.sleep(delay)
128 | 
129 |         except NoSuchElementException:
130 |             pass
131 |     
132 |         if (review_count % 20) == 0:
133 |             review_page_count = review_count // 20
134 | 
135 |         else:
136 |             review_page_count = (review_count // 20) + 1
137 | 
138 |         constant_url = driver.current_url
139 | 
140 |         l = 1
141 |         while l <= review_page_count:
142 | 
143 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
144 |             match = False
145 | 
146 |             while match == False:
147 |                 lastCount = lenOfPage
148 |                 time.sleep(delay)
149 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
150 |                 if lastCount == lenOfPage:
151 |                     match = True
152 | 
153 |             print("Veriler çekiliyor...")
154 |             print("Sayfa: " + str(l))
155 | 
156 |             reviews = driver.find_elements_by_class_name("review-card-content")
157 |             for review in reviews:
158 |                 review = review.text
159 |                 review_texts.append(review)
160 | 
161 |             usefuls = driver.find_elements_by_class_name("reviews-users-comment-useful")
162 |             for useful_unp in usefuls:
163 |                 useful_unp = useful_unp.text
164 |                 useful_unp = useful_unp.split()
165 | 
166 |                 useful = useful_unp[0][0]
167 |                 not_useful = useful_unp[0][1]
168 | 
169 |                 review_useful.append(useful)
170 |                 review_not_useful.append(not_useful)
171 | 
172 |             scores = driver.find_elements_by_class_name("stareval-note")
173 |             for score in scores:
174 |                 score = score.text.replace(",0","")
175 |                 review_scores.append(score)
176 | 
177 |             member_names = driver.find_elements_by_class_name("review-card-user-infos.cf")
178 |             for member_name in member_names:
179 |                 seperation = member_name.index("\n")
180 |                 member_name = member_name.text[:seperation]
181 |                 member_name_texts.append(member_name)
182 | 
183 |             dates = driver.find_elements_by_class_name("review-card-meta-date")
184 |             for date in dates:
185 |                 date = date.text.split()[:3]
186 |                 date = " ".join(date)
187 |                 date_texts.append(date)
188 | 
189 |             l += 1
190 |             
191 |             url = constant_url + "?page=" + str(l)
192 |             driver.get(url)
193 | 
194 |         driver.close()
195 |         
196 |         length_list = [review_texts, review_useful, review_not_useful, review_scores, member_name_texts, date_texts]
197 |         limit = map(len, length_list)
198 |         limit = min(list(limit))
199 |         limit -= 1
200 | 
201 |         review_texts_fin = review_texts[:limit]
202 |         df = pd.DataFrame({"İncelemeler":review_texts_fin})
203 | 
204 |         if scrape_useful:
205 |             review_useful_fin = review_useful[:limit]
206 |             review_not_useful_fin = review_not_useful[:limit]
207 |             df["İncelemeyi Yararlı Bulanlar"] = review_useful_fin
208 |             df["İncelemeyi Yararlı Bulmayanlar"] = review_not_useful_fin
209 | 
210 |         if scrape_scores:
211 |             review_scores_fin = review_scores[:limit]
212 |             df["İnceleme Puanları"] = review_scores_fin
213 | 
214 |         if scrape_member_name:
215 |             member_name_texts_fin = member_name_texts[:limit]
216 |             df["İncelemeyi Yayınlayan Kişi"] = member_name_texts_fin
217 | 
218 |         if scrape_date:
219 |             date_texts_fin = date_texts[:limit]
220 |             df["İncelemenin Yayınlanma Tarihi"] = date_texts_fin
221 | 
222 |         df.to_excel(file, header = True, index = False)
223 |         x = "Çektiğiniz veriler "+ file + " adlı excel dosyasına kaydedildi."
224 |         print(x)
225 |         print("""
226 |             --------------------------------------------------------------------------
227 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
228 |             -  Github Hesabım: ardauzunoglu                                          -
229 |             --------------------------------------------------------------------------
230 |         """)
231 |         time.sleep(3)
232 |     
233 |     initialize()
234 |     scrape()
235 | 
236 | if __name__ == "__main__":
237 |     beyazperde_scrape()


--------------------------------------------------------------------------------
/scrapers/beyazperde_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/ciceksepeti_scraper/ciceksepeti-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/ciceksepeti_scraper/ciceksepeti-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/ciceksepeti_scraper/ciceksepeti_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def ciceksepeti_scraper():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |                 ---------------------------------------------------------
 38 |                 -         Çiçeksepeti Scraper'a hoş geldiniz!           -
 39 |                 -         Geliştirici: Arda Uzunoğlu                    -
 40 |                 ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, customer_province_texts, customer_name_texts, date_texts, scrape_province, scrape_customer_names, scrape_dates, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))    
 49 | 
 50 |         review_texts = []
 51 |         customer_province_texts = []
 52 |         customer_name_texts = []
 53 |         date_texts = []
 54 | 
 55 |         scrape_province_question = "Müşterinin konumu çekilsin mi(y/n): "
 56 |         scrape_province_input = input(scrape_province_question)
 57 |         scrape_province = preference(scrape_province_input, scrape_province_question)
 58 | 
 59 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 60 |         scrape_customer_name_input = input(scrape_customer_name_question)
 61 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 62 | 
 63 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 64 |         scrape_date_input = input(scrape_date_question)
 65 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
 66 | 
 67 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 68 |     
 69 |     def scrape():
 70 |         try:
 71 |             print("Chromedriver'a erişiliyor...")
 72 |             driver = webdriver.Chrome(path)
 73 |             time.sleep(delay)
 74 |             print("Chromedriver'a erişildi.")
 75 | 
 76 |         except WebDriverException:
 77 |             print("Chromedriver kullanılamıyor.")            
 78 |             sys.exit()
 79 | 
 80 |         try:
 81 |             print("Çiçeksepeti adresine gidiliyor...")
 82 |             driver.get("https://www.ciceksepeti.com")
 83 |             time.sleep(delay)
 84 |             driver.maximize_window()
 85 |             time.sleep(delay)
 86 |             print("Çiçeksepeti adresine gidildi.")
 87 | 
 88 |         except:
 89 |             print("Çiçeksepeti'ne erişilemiyor.")
 90 |             sys.exit()
 91 | 
 92 |         try:
 93 |             print("Ürün aranıyor...")
 94 |             search_bar = driver.find_element_by_class_name("product-search__input")
 95 |             search_bar.send_keys(product_name)
 96 |             search_bar.send_keys(Keys.ENTER)
 97 |             time.sleep(delay)
 98 | 
 99 |             product = driver.find_element_by_class_name("products__item-inner")
100 |             product.click()
101 |             time.sleep(delay)
102 |             print("Ürün bulundu.")
103 | 
104 |         except NoSuchElementException:
105 |             print("Ürün bulunamadı.")
106 |             sys.exit()
107 | 
108 |         see_all_reviews = driver.find_element_by_class_name("comments__all-comments")
109 |         see_all_reviews.click()
110 | 
111 |         review_count = driver.find_element_by_class_name("page-comments__product-evaluation__comment-count").text.replace("Yorum", "")
112 |         review_count = int(review_count.strip("()"))
113 | 
114 |         if review_count % 20 == 0:
115 |             length_of_page = review_count // 20
116 | 
117 |         else:
118 |             length_of_page = (review_count // 20) + 1 
119 | 
120 |         l = 1
121 | 
122 |         while l <= length_of_page:
123 |             print("İncelemeler çekiliyor...")
124 |             print("Sayfa: " + str(l))
125 | 
126 |             time.sleep(delay)
127 | 
128 |             reviews = driver.find_elements_by_class_name("page-comments__list__item")
129 |             for review in reviews:
130 |                 review_text = review.find_element_by_class_name("page-comments__list__item__text").text
131 |                 if review_text == "":
132 |                     review_text = "BOŞ"
133 |                 review_texts.append(review_text)
134 |                 
135 |                 customer_name = review.find_element_by_class_name("page-comments__list__item__name").text
136 |                 customer_name_texts.append(customer_name)
137 | 
138 |                 try:
139 |                     review = review.text.replace(review_text, "")
140 | 
141 |                 except:
142 |                     pass
143 | 
144 |                 review = review.replace(customer_name, "")
145 |                 review = review.replace(" | ", "").split()
146 |                 customer_province = review[0]
147 |                 date = review[1]
148 | 
149 |                 customer_province_texts.append(customer_province)
150 |                 date_texts.append(date)
151 | 
152 |             try:
153 |                 driver.execute_script("window.scrollTo(0, 2160)") 
154 |                 next_page = driver.find_element_by_class_name("cs-next")
155 |                 next_page.click()
156 |                 
157 |             except:
158 |                 pass
159 |         
160 |             l += 1
161 | 
162 |         driver.close()
163 | 
164 |         length_list = [review_texts, customer_province_texts, customer_name_texts, date_texts]
165 |         limit = map(len, length_list)
166 |         limit = min(list(limit))
167 |         limit -= 1
168 |             
169 |         review_texts_fin = review_texts[:limit]
170 |         df = pd.DataFrame({"Yorum": review_texts_fin})
171 | 
172 |         if scrape_province:
173 |             customer_province_texts_fin = customer_province_texts[:limit]
174 |             df["Yorum Beğeni Sayısı"] = customer_province_texts_fin
175 |             df["Yorum Beğeni Sayısı"] = df["Yorum Beğeni Sayısı"]
176 | 
177 |         if scrape_customer_names:
178 |             customer_name_texts_fin = customer_name_texts[:limit]
179 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
180 | 
181 |         if scrape_dates:
182 |             date_texts_fin = date_texts[:limit]
183 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
184 | 
185 |         df.to_excel(file, header = True, index = False)
186 | 
187 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
188 |         print(x)
189 | 
190 |         print("""
191 |             --------------------------------------------------------------------------
192 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
193 |             -  Github Hesabım: ardauzunoglu                                          -
194 |             --------------------------------------------------------------------------
195 |         """)
196 | 
197 |     initialize()
198 |     scrape()
199 | 
200 | if __name__ == "__main__":
201 |     ciceksepeti_scraper()


--------------------------------------------------------------------------------
/scrapers/ciceksepeti_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/eksi_scraper/eksi-sozluk-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/eksi_scraper/eksi-sozluk-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/eksi_scraper/eksi_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import NoSuchElementException, WebDriverException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def eksisozluk_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 |             
 34 |             return delay
 35 |             
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Ekşi Sözlük Scraper'a hoş geldiniz!           -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global title, file, delay, entry_texts, author_texts, date_texts, scrape_author_input, scrape_date_input, scrape_author, scrape_date, path
 44 | 
 45 |         title = input("Entrylerin çekileceği başlık: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))
 49 | 
 50 |         entry_texts = []
 51 |         author_texts = []
 52 |         date_texts = []
 53 | 
 54 |         scrape_author_question = "Yazar isimleri çekilsin mi(y/n): "
 55 |         scrape_author_input = input(scrape_author_question)
 56 |         scrape_author = preference(scrape_author_input, scrape_author_question)
 57 | 
 58 |         scrape_date_question = "Entry tarihleri çekilsin mi(y/n): "
 59 |         scrape_date_input = input(scrape_date_question)
 60 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 61 | 
 62 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 63 | 
 64 |     def scrape():
 65 |         try:
 66 |             print("Chromedriver'a erişiliyor...")
 67 |             driver = webdriver.Chrome(path)
 68 |             time.sleep(delay)
 69 |             print("Chromedriver'a erişildi.")
 70 | 
 71 |         except WebDriverException:
 72 |             print("Chromedriver kullanılamıyor.")
 73 |             sys.exit()
 74 | 
 75 |         try:
 76 |             print("Ekşi Sözlük adresine gidiliyor...")
 77 |             driver.get("https://eksisozluk.com")
 78 |             time.sleep(delay)
 79 |             driver.maximize_window()
 80 |             time.sleep(delay)
 81 |             print("Ekşi Sözlük adresine gidildi.")
 82 | 
 83 |         except:
 84 |             print("Ekşi Sözlük'e erişilemiyor.")
 85 |             sys.exit()
 86 | 
 87 |         try:
 88 |             print("Başlık aranıyor...")
 89 |             search_bar = driver.find_element_by_id("search-textbox")
 90 |             search_bar.send_keys(title)
 91 |             search_bar.send_keys(Keys.ENTER)
 92 |             time.sleep(delay)
 93 |             print("Başlık bulundu.")
 94 | 
 95 |         except NoSuchElementException:
 96 |             print("Başlık bulunamadı.")
 97 |             sys.exit()
 98 | 
 99 |         try:
100 |             length_of_title = driver.find_element_by_class_name("last")
101 |             length_of_title = int(length_of_title.text)
102 | 
103 |         except NoSuchElementException:
104 |             length_of_title = 1
105 | 
106 |         l = 1
107 | 
108 |         while l <= length_of_title:
109 | 
110 |             print("Veriler çekiliyor...")
111 |             print("Sayfa: " + str(l)) 
112 |             
113 |             time.sleep(delay)
114 | 
115 |             entries = driver.find_elements_by_css_selector(".content")
116 |             for entry in entries:
117 |                 entry = entry.text
118 |                 entry_texts.append(entry)
119 | 
120 |             time.sleep(delay)
121 | 
122 |             dates = driver.find_elements_by_class_name("entry-date")          
123 |             for date in dates:
124 |                 date = date.text
125 |                 date_texts.append(date)
126 | 
127 |             time.sleep(delay)
128 | 
129 |             authors = driver.find_elements_by_class_name("entry-author")
130 |             for author in authors:
131 |                 author = author.text 
132 |                 author_texts.append(author)
133 | 
134 |             l += 1
135 | 
136 |             try:
137 |                 close_ad = driver.find_element_by_id("interstitial-close-link-tag")
138 |                 close_ad.click()
139 |                 time.sleep(delay)
140 | 
141 |             except NoSuchElementException:
142 |                 try:
143 |                     next_page = driver.find_element_by_class_name("next")
144 |                     next_page.click()
145 | 
146 |                 except NoSuchElementException:
147 |                     pass
148 | 
149 |         driver.close()
150 | 
151 |         length_list = [entry_texts, author_texts, date_texts]
152 |         limit = map(len, length_list)
153 |         limit = min(list(limit))
154 |         limit -= 1
155 | 
156 |         entry_texts_fin = entry_texts[:limit]
157 |         df = pd.DataFrame({"Entryler": entry_texts_fin})
158 | 
159 |         if scrape_date:
160 |             date_texts_fin = date_texts[:limit]
161 |             df["Tarihler"] = date_texts_fin
162 | 
163 |         if scrape_author:
164 |             author_texts_fin = author_texts[:limit]
165 |             df["Yazarlar"] = author_texts_fin
166 | 
167 |         df.to_excel(file, header = True, index = False)
168 | 
169 |         print("Başlık kazıması tamamlandı.")
170 |         print("Çektiğiniz veriler "+ file + " adlı excel dosyasına kaydedildi.")
171 |         print("""
172 |             --------------------------------------------------------------------------
173 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
174 |             -  Github Hesabım: ardauzunoglu                                          -
175 |             --------------------------------------------------------------------------
176 |         """)
177 | 
178 |         time.sleep(3)
179 | 
180 |     initialize()
181 |     scrape()
182 | 
183 | if __name__ == "__main__":
184 |     eksisozluk_scrape()


--------------------------------------------------------------------------------
/scrapers/eksi_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/gittigidiyor_scraper/gittigidiyor-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/gittigidiyor_scraper/gittigidiyor-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/gittigidiyor_scraper/gittigidiyor_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def gittigidiyor_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |                 ---------------------------------------------------------
 38 |                 -         Gittigidiyor Scraper'a hoş geldiniz!          -
 39 |                 -         Geliştirici: Arda Uzunoğlu                    -
 40 |                 ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_headlines, review_useful, customer_name_texts, date_texts, scrape_headlines, scrape_useful, scrape_customer_names, scrape_dates, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))    
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_headlines = []
 53 |         customer_name_texts = []
 54 |         date_texts = []
 55 | 
 56 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 57 |         scrape_useful_input = input(scrape_useful_question)
 58 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 59 | 
 60 |         scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
 61 |         scrape_headlines_input = input(scrape_headlines_question)
 62 |         scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
 63 | 
 64 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 65 |         scrape_customer_name_input = input(scrape_customer_name_question)
 66 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 67 | 
 68 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 69 |         scrape_date_input = input(scrape_date_question)
 70 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
 71 | 
 72 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 73 | 
 74 |     def scrape():
 75 |         try:
 76 |             print("Chromedriver'a erişiliyor...")
 77 |             driver = webdriver.Chrome(path)
 78 |             time.sleep(delay)
 79 |             print("Chromedriver'a erişildi.")
 80 | 
 81 |         except WebDriverException:
 82 |             print("Chromedriver kullanılamıyor.")
 83 |             sys.exit()
 84 | 
 85 |         try:
 86 |             print("Gittigidiyor adresine gidiliyor...")
 87 |             driver.get("https://www.gittigidiyor.com")
 88 |             time.sleep(delay)
 89 |             driver.maximize_window()
 90 |             time.sleep(delay)
 91 |             print("Gittigidiyor adresine gidildi.")
 92 | 
 93 |         except:
 94 |             print("Gittigidiyor'a erişilemiyor.")
 95 |             sys.exit()
 96 | 
 97 |         try:
 98 |             print("Ürün aranıyor...")
 99 |             search_bar = driver.find_element_by_xpath("//*[@id='__next']/header/div[3]/div/div/div/div[2]/form/div/div[1]/div[2]/input")
100 |             search_bar.send_keys(product_name)
101 |             search_bar.send_keys(Keys.ENTER)
102 |             time.sleep(delay)
103 | 
104 |             product = driver.find_element_by_class_name("srp-item-list")
105 |             product.click()
106 |             time.sleep(delay)
107 |             print("Ürün bulundu.")
108 | 
109 |         except NoSuchElementException:
110 |             print("Ürün bulunamadı.")
111 |             sys.exit()
112 | 
113 |         url = driver.current_url
114 |         root = url.index("_")
115 |         url = url[:root]
116 |         url = url + "/yorumlari"
117 |         driver.get(url)
118 | 
119 |         review_counts = driver.find_element_by_class_name("catalog-point-content").text
120 |         review_counts = int(review_counts.replace("Kullanıcı Değerlendirmesi", ""))
121 | 
122 |         if review_counts % 10 == 0:
123 |             length_of_page = review_counts // 10
124 |         else:
125 |             length_of_page = (review_counts // 10) + 1
126 | 
127 |         l = 1 
128 | 
129 |         while l <= length_of_page:
130 |             
131 |             print("İncelemeler çekiliyor...")
132 |             print("Sayfa: " + str(l))
133 |             
134 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
135 |             match = False
136 | 
137 |             while match == False:
138 |                 lastCount = lenOfPage
139 |                 time.sleep(delay)
140 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
141 |                 if lastCount == lenOfPage:
142 |                     match = True
143 | 
144 |             time.sleep(delay)
145 | 
146 |             reviews = driver.find_elements_by_class_name("user-catalog-review-comment-detail")
147 |             for review in reviews:
148 |                 review = review.text
149 |                 if review == "":
150 |                     review = "BOŞ"
151 |                 review_texts.append(review)
152 | 
153 |                 print("Veriler çekiliyor...")
154 |                 print("İnceleme: " + str(len(review_texts)))
155 | 
156 |             time.sleep(delay)
157 | 
158 |             usefuls = driver.find_elements_by_class_name("point-count")
159 |             for useful in usefuls:
160 |                 useful = useful.text
161 |                 if useful == "":
162 |                     useful = "0"
163 |                 review_useful.append(useful)
164 |             
165 |             headlines = driver.find_elements_by_class_name("user-catalog-review-header")
166 |             for headline in headlines:
167 |                 headline = headline.text 
168 |                 if headline == "":
169 |                     headline = "BOŞ"
170 |                 review_headlines.append(headline)
171 |             
172 |             customers = driver.find_elements_by_class_name("user-detail-container")
173 |             for customer in customers:
174 |                 customer = customer.text
175 |                 customer = customer.split()
176 | 
177 |                 customer_name = customer[0]
178 |                 customer_name_texts.append(customer_name)
179 | 
180 |                 date = customer[1]
181 |                 date_texts.append(date)
182 |             
183 |             try:
184 |                 next_button = driver.find_element_by_class_name("next-link")
185 |                 next_button.click()
186 | 
187 |             except:
188 |                 pass
189 | 
190 |             l += 1
191 | 
192 |         driver.close()
193 | 
194 |         length_list = [review_texts, review_useful, review_headlines, customer_name_texts, date_texts]
195 |         limit = map(len, length_list)
196 |         limit = min(list(limit))
197 |         limit -= 1
198 |         
199 |         review_texts_fin = review_texts[:limit]
200 |         df = pd.DataFrame({"Yorum": review_texts_fin})
201 | 
202 |         if scrape_useful:
203 |             review_useful_fin = review_useful[:limit]
204 |             df["Yorum Beğeni Sayısı"] = review_useful_fin
205 |             df["Yorum Beğeni Sayısı"] = df["Yorum Beğeni Sayısı"]
206 | 
207 |         if scrape_headlines:
208 |             review_headlines_fin = review_headlines[:limit]
209 |             df["Yorumun Başlığı"] = review_headlines_fin
210 | 
211 |         if scrape_customer_names:
212 |             customer_name_texts_fin = customer_name_texts[:limit]
213 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
214 | 
215 |         if scrape_dates:
216 |             date_texts_fin = date_texts[:limit]
217 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
218 | 
219 |         df.to_excel(file, header = True, index = False)
220 | 
221 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
222 |         print(x)
223 | 
224 |         print("""
225 |             --------------------------------------------------------------------------
226 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
227 |             -  Github Hesabım: ardauzunoglu                                          -
228 |             --------------------------------------------------------------------------
229 |         """)
230 | 
231 |         time.sleep(3)
232 |     initialize()
233 |     scrape()
234 | 
235 | if __name__ == "__main__":
236 |     gittigidiyor_scrape()


--------------------------------------------------------------------------------
/scrapers/gittigidiyor_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/hepsiburada_scraper/hepsiburada-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/hepsiburada_scraper/hepsiburada-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/hepsiburada_scraper/hepsiburada_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import NoSuchElementException, WebDriverException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def hepsiburada_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |                 while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                     if scrape_input.lower() == "y":
 13 |                         output = True
 14 |                         break
 15 | 
 16 |                     elif scrape_input.lower() == "n":
 17 |                         output = False
 18 |                         break
 19 | 
 20 |                     else:
 21 |                         print("Geçersiz yanıt.")
 22 |                         scrape_input = input(question) 
 23 | 
 24 |                 return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Hepsiburada Scraper'a hoş geldiniz!           -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_useful, review_not_useful, customer_name_texts, customer_province_texts, customer_age_texts, date_texts, scrape_useful, scrape_customer_name, scrape_customer_province, scrape_customer_age, scrape_date, path
 44 | 
 45 |         product_name = input("Değerlendirmelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_not_useful = []
 53 |         customer_name_texts = []
 54 |         customer_province_texts = []
 55 |         customer_age_texts = []
 56 |         date_texts = []
 57 | 
 58 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 59 |         scrape_useful_input = input(scrape_useful_question)
 60 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 61 | 
 62 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 63 |         scrape_customer_name_input = input(scrape_customer_name_question)
 64 |         scrape_customer_name = preference(scrape_customer_name_input, scrape_customer_name_question)
 65 | 
 66 |         scrape_customer_province_question = "Müşteri konumları çekilsin mi(y/n): "
 67 |         scrape_customer_province_input = input(scrape_customer_province_question)
 68 |         scrape_customer_province = preference(scrape_customer_province_input, scrape_customer_province_question)
 69 | 
 70 |         scrape_customer_age_question = "Müşteri yaşları çekilsin mi(y/n): "
 71 |         scrape_customer_age_input = input(scrape_customer_age_question)
 72 |         scrape_customer_age = preference(scrape_customer_age_input, scrape_customer_age_question)
 73 | 
 74 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 75 |         scrape_date_input = input(scrape_date_question)
 76 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 77 |         
 78 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 79 | 
 80 |     def scrape():
 81 |         try:
 82 |             print("Chromedriver'a erişiliyor...")
 83 |             driver = webdriver.Chrome(path)
 84 |             time.sleep(delay)
 85 |             print("Chromedriver'a erişildi.")
 86 | 
 87 |         except WebDriverException:
 88 |             print("Chromedriver kullanılamıyor.")
 89 |             sys.exit()
 90 | 
 91 |         try: 
 92 |             print("Hepsiburada adresine gidiliyor...")
 93 |             driver.get("https://www.hepsiburada.com")
 94 |             time.sleep(delay)
 95 |             driver.maximize_window()
 96 |             time.sleep(delay)
 97 |             print("Hepsiburada adresine gidildi.")
 98 | 
 99 |         except:
100 |             print("Hepsiburada'ya erişilemiyor.")
101 |             sys.exit()
102 | 
103 |         try:
104 |             print("Ürün aranıyor...")
105 |             search_bar = driver.find_element_by_class_name("desktopOldAutosuggestTheme-input")
106 |             search_bar.send_keys(product_name)
107 |             search_bar.send_keys(Keys.ENTER)
108 |             time.sleep(delay)
109 | 
110 |             product = driver.find_element_by_class_name("search-item")
111 |             product.click()
112 |             time.sleep(delay)
113 |             print("Ürün bulundu.")
114 |             
115 |         except NoSuchElementException:
116 |             print("Ürün bulunamadı.")
117 |             sys.exit()
118 | 
119 |         try:
120 |             review_count = driver.find_element_by_id("productReviewsTab").text
121 |             review_count = review_count.replace("Değerlendirmeler ", "")
122 |             review_count = review_count.replace("(","")
123 |             review_count = review_count.replace(")","")
124 |             review_count = int(review_count)
125 |             if review_count % 30 == 0:
126 |                 review_page_count = review_count // 10
127 | 
128 |             else:
129 |                 review_page_count = (review_count // 10) + 1
130 | 
131 |             constant_url = driver.current_url
132 | 
133 |         except NoSuchElementException:
134 |             print("İnceleme bulunamadı.")
135 |             sys.exit()
136 | 
137 |         try:
138 |             index_of_question_mark = constant_url.index("?")
139 |             constant_url = constant_url[:index_of_question_mark]
140 | 
141 |         except NoSuchElementException:
142 |             pass
143 | 
144 |         i = 1
145 |         while i <= review_page_count:
146 | 
147 |             url = constant_url + "-yorumlari?sayfa=" + str(i)
148 |             driver.get(url)
149 | 
150 |             print("Veriler çekiliyor...")
151 |             print("Sayfa: " + str(i))
152 | 
153 |             reviews = driver.find_elements_by_xpath("//*[@id='hermes-voltran-comments']//span[@itemprop='description']")
154 |             for review in reviews:
155 |                 review = review.text
156 |                 review_texts.append(review)
157 | 
158 |             customer_names = driver.find_elements_by_xpath("//*[@id='hermes-voltran-comments']//span[@itemprop='author']")
159 |             for customer_name in customer_names:
160 |                 customer_name = customer_name.text
161 |                 customer_name_texts.append(customer_name)
162 | 
163 |             customer_ages = driver.find_elements_by_xpath("//*[@class='hermes-ReviewCard-module-1-Wp3']//span[2]")
164 |             for customer_age in customer_ages:
165 |                 customer_age = customer_age.text
166 |                 customer_age = customer_age.replace("(", "")
167 |                 customer_age = customer_age.replace(")", "")
168 | 
169 |                 if customer_age == "":
170 |                     customer_age = "Boş"
171 | 
172 |                 customer_age_texts.append(customer_age)
173 | 
174 |             customer_provinces = driver.find_elements_by_xpath("//*[@class='hermes-ReviewCard-module-1-Wp3']//span[3]")
175 |             for customer_province in customer_provinces:
176 |                 customer_province = customer_province.text
177 |                 customer_province = customer_province.replace("-", "")
178 |                 customer_province = customer_province.replace(" ", "")
179 |                 customer_province_texts.append(customer_province)
180 | 
181 |             dates = driver.find_elements_by_xpath("//*[@id='hermes-voltran-comments']//span[@itemprop='datePublished']")
182 |             for date in dates:
183 |                 date = date.text
184 |                 date = date.replace(",", "")
185 |                 date = date.split()
186 | 
187 |                 day_conv = {
188 |                     "Pts":"Pazartesi",
189 |                     "Sal":"Salı",
190 |                     "Çar":"Çarşamba",
191 |                     "Per":"Perşembe",
192 |                     "Cum":"Cuma",
193 |                     "Cts":"Cumartesi",
194 |                     "Paz":"Pazar",
195 |                     "Pazartesi":"Pazartesi",
196 |                     "Salı":"Salı",
197 |                     "Çarşamba":"Çarşamba",
198 |                     "Perşembe":"Perşembe",
199 |                     "Cuma":"Cuma",
200 |                     "Cumartesi":"Cumartesi",
201 |                     "Pazar":"Pazar"
202 |                 }
203 | 
204 |                 years = ["2020", "2019", "2018", "2017", "2016", "2015", "2014", "2013", "2012", "2011", "2010", "2009", "2008", "2007", "2006", "2005", "2004", "2003", "2002", "2001", "2000"]
205 | 
206 |                 if date[2] not in years:
207 |                     date.insert(2, "2021")
208 | 
209 |                 date[-1] = day_conv[date[-1]]
210 |                 date = " ".join(date)
211 |                 date_texts.append(date)
212 | 
213 |             usefuls = driver.find_elements_by_xpath("//*[@id='hermes-voltran-comments']//button[@class='hermes-ReviewCard-module-1MoiF']")
214 |             not_usefuls = driver.find_elements_by_xpath("//*[@id='hermes-voltran-comments']//button[@class='hermes-ReviewCard-module-39K0Y']")
215 | 
216 |             for useful in usefuls:
217 |                 useful = useful.text
218 |                 useful = useful.replace("Evet", "")
219 |                 useful = useful.replace("(", "")
220 |                 useful = useful.replace(")", "")
221 |                 review_useful.append(useful)
222 |             
223 |             for not_useful in not_usefuls:
224 |                 not_useful = not_useful.text
225 |                 not_useful = not_useful.replace("Hayır", "")
226 |                 not_useful = not_useful.replace("(", "")
227 |                 not_useful = not_useful.replace(")", "")
228 |                 review_not_useful.append(not_useful)
229 | 
230 |             while len(review_useful) < len(date_texts):
231 |                 review_useful.append("0")
232 |                 review_not_useful.append("0")
233 | 
234 |             while len(review_texts) < len(date_texts):
235 |                 review_texts.append("Boş")
236 | 
237 |             i += 1
238 | 
239 |         driver.close()
240 | 
241 |         length_list = [review_texts, review_useful, review_not_useful, date_texts, customer_name_texts, customer_age_texts, customer_province_texts]
242 |         limit = map(len, length_list)
243 |         limit = min(list(limit))
244 |         limit -= 1
245 |         
246 |         review_texts_fin = review_texts[:limit]
247 |         df = pd.DataFrame({"Değerlendirme: ":review_texts_fin})
248 | 
249 |         if scrape_useful:
250 |             review_useful_fin = review_useful[:limit]
251 |             review_not_useful_fin = review_not_useful[:limit]
252 |             df["Değerlendirmeyi Yararlı Bulanlar"] = review_useful_fin
253 |             df["Değerlendirmeyi Yararlı Bulmayanlar"] = review_not_useful_fin
254 | 
255 |         if scrape_date:
256 |             date_texts_fin = date_texts[:limit]
257 |             df["Değerlendirme Tarihi:"] = date_texts_fin
258 | 
259 |         if scrape_customer_name:
260 |             customer_name_texts_fin = customer_name_texts[:limit]
261 |             df["Müşterinin Adı Soyadı"] = customer_name_texts_fin
262 | 
263 |         if scrape_customer_age:
264 |             customer_age_texts_fin = customer_age_texts[:limit]
265 |             df["Müşterinin Yaşı"] = customer_age_texts_fin
266 | 
267 |         if scrape_customer_province:
268 |             customer_province_texts_fin = customer_province_texts[:limit]
269 |             df["Müşterinin Konumu"] = customer_province_texts_fin
270 | 
271 |         df.to_excel(file, header = True, index = False)
272 | 
273 |         x = "Çektiğiniz veriler "+ file + " adlı excel dosyasına kaydedildi."
274 |         print(x)
275 | 
276 |         print("""
277 |             --------------------------------------------------------------------------
278 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
279 |             -  Github Hesabım: ardauzunoglu                                          -
280 |             --------------------------------------------------------------------------
281 |         """)
282 | 
283 |         time.sleep(3)
284 | 
285 |     initialize()
286 |     scrape()
287 | 
288 | if __name__ == "__main__":
289 |     hepsiburada_scrape()


--------------------------------------------------------------------------------
/scrapers/hepsiburada_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/incehesap_scraper/incehesap-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/incehesap_scraper/incehesap-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/incehesap_scraper/incehesap_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def incehesap_scraper():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |                 ---------------------------------------------------------
 38 |                 -         İncehesap Scraper'a hoş geldiniz!             -
 39 |                 -         Geliştirici: Arda Uzunoğlu                    -
 40 |                 ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_headlines, review_useful, customer_name_texts, date_texts, scrape_headlines, scrape_useful, scrape_customer_names, scrape_dates, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))    
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_headlines = []
 53 |         customer_name_texts = []
 54 |         date_texts = []
 55 | 
 56 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 57 |         scrape_useful_input = input(scrape_useful_question)
 58 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 59 | 
 60 |         scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
 61 |         scrape_headlines_input = input(scrape_headlines_question)
 62 |         scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
 63 | 
 64 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 65 |         scrape_customer_name_input = input(scrape_customer_name_question)
 66 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 67 | 
 68 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 69 |         scrape_date_input = input(scrape_date_question)
 70 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
 71 | 
 72 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 73 | 
 74 |     def scrape():
 75 |         try:
 76 |             print("Chromedriver'a erişiliyor...")
 77 |             driver = webdriver.Chrome(path)
 78 |             time.sleep(delay)
 79 |             print("Chromedriver'a erişildi.")
 80 | 
 81 |         except WebDriverException:
 82 |             print("Chromedriver kullanılamıyor.")            
 83 |             sys.exit()
 84 | 
 85 |         try:
 86 |             print("İncehesap adresine gidiliyor...")
 87 |             driver.get("https://www.incehesap.com")
 88 |             time.sleep(delay)
 89 |             driver.maximize_window()
 90 |             time.sleep(delay)
 91 |             print("İncehesap adresine gidildi.")
 92 | 
 93 |         except:
 94 |             print("İncehesap'a erişilemiyor.")
 95 |             sys.exit()
 96 | 
 97 |         try:
 98 |             print("Ürün aranıyor...")
 99 |             search_bar = driver.find_element_by_id("query")
100 |             search_bar.send_keys(product_name)
101 |             search_bar.send_keys(Keys.ENTER)
102 |             time.sleep(delay)
103 | 
104 |             product = driver.find_element_by_class_name("product-link")
105 |             product.click()
106 |             time.sleep(delay)
107 |             print("Ürün bulundu.")
108 | 
109 |         except NoSuchElementException:
110 |             print("Ürün bulunamadı.")
111 |             sys.exit()
112 | 
113 |         try:
114 |             cancel_cookie_pop = driver.find_element_by_xpath("/html/body/div[4]/div/button")
115 |             cancel_cookie_pop.click()
116 |         except:
117 |             pass
118 | 
119 |         try:
120 |             time.sleep(delay)
121 |             review_count = driver.find_element_by_xpath("/html/body/div[2]/div[1]/main/section[1]/div[2]/div[3]/div[4]/div[2]/a[2]")
122 |             review_count.click()
123 |             review_count = review_count.text.replace("(", "")
124 |             review_count = review_count.replace(")", "")
125 |             review_count = int(review_count.replace("Yorumlar", ""))
126 | 
127 |         except NoSuchElementException:
128 |             print("İnceleme bulunamadı.")
129 |             sys.exit()
130 | 
131 |         try:
132 |             time.sleep(delay)
133 |             load_all_comments = driver.find_element_by_class_name("all-comments")
134 |             load_all_comments.click()
135 | 
136 |         except:
137 |             pass
138 | 
139 |         while len(review_texts) <= review_count:
140 |             
141 |             time.sleep(delay)
142 |             driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
143 | 
144 |             comments = driver.find_elements_by_class_name("item")
145 |             for comment in comments:
146 |                 try:
147 |                     customer = comment.find_element_by_xpath("//*[@id='comment-list']/div["+str(len(review_texts)+2)+"]/div[1]/strong").text
148 |                     customer_name_texts.append(customer)
149 | 
150 |                     date = comment.find_element_by_xpath("//*[@id='comment-list']/div["+str(len(review_texts)+2)+"]/div[1]/span").text.split()
151 |                     date = " ".join(date[:3])
152 |                     date_texts.append(date)
153 | 
154 |                     headline = comment.find_element_by_xpath("//*[@id='comment-list']/div["+str(len(review_texts)+2)+"]/div[3]/b").text
155 |                     if headline == "":
156 |                         review_headlines.append("BOŞ")
157 |                     else:
158 |                         review_headlines.append(headline)
159 | 
160 |                     useful = comment.find_element_by_xpath("//*[@id='comment-list']/div["+str(len(review_texts)+2)+"]/div[4]/a[1]").text
161 |                     useful = useful.replace("Evet", "").replace("(", "").replace(")", "")
162 |                     review_useful.append(useful)
163 | 
164 |                     review = comment.find_element_by_xpath("//*[@id='comment-list']/div["+str(len(review_texts)+2)+"]/div[3]/span").text
165 |                     review_texts.append(review)
166 | 
167 |                     print("İncelemeler çekiliyor...")
168 |                     print("İnceleme: " + str(len(review_texts)))
169 | 
170 |                 except:
171 |                     break
172 | 
173 |             break
174 | 
175 |         driver.close()
176 | 
177 |         length_list = [review_texts, review_useful, review_headlines, customer_name_texts, date_texts]
178 |         limit = map(len, length_list)
179 |         limit = min(list(limit))
180 |         limit -= 1
181 |             
182 |         review_texts_fin = review_texts[:limit]
183 |         df = pd.DataFrame({"Yorum": review_texts_fin})
184 | 
185 |         if scrape_useful:
186 |             review_useful_fin = review_useful[:limit]
187 |             df["Yorum Beğeni Sayısı"] = review_useful_fin
188 | 
189 |         if scrape_headlines:
190 |             review_headlines_fin = review_headlines[:limit]
191 |             df["Yorumun Başlığı"] = review_headlines_fin
192 | 
193 |         if scrape_customer_names:
194 |             customer_name_texts_fin = customer_name_texts[:limit]
195 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
196 | 
197 |         if scrape_dates:
198 |             date_texts_fin = date_texts[:limit]
199 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
200 | 
201 |         df.to_excel(file, header = True, index = False)
202 | 
203 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
204 |         print(x)
205 | 
206 |         print("""
207 |             --------------------------------------------------------------------------
208 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
209 |             -  Github Hesabım: ardauzunoglu                                          -
210 |             --------------------------------------------------------------------------
211 |         """)
212 | 
213 |     initialize()
214 |     scrape()
215 | 
216 | if __name__ == "__main__":
217 |     incehesap_scraper()


--------------------------------------------------------------------------------
/scrapers/incehesap_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/kitapyurdu_scraper/kitapyurdu-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/kitapyurdu_scraper/kitapyurdu-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/kitapyurdu_scraper/kitapyurdu_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import NoSuchElementException, WebDriverException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def kitapyurdu_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |                 while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                     if scrape_input.lower() == "y":
 13 |                         output = True
 14 |                         break
 15 | 
 16 |                     elif scrape_input.lower() == "n":
 17 |                         output = False
 18 |                         break
 19 | 
 20 |                     else:
 21 |                         print("Geçersiz yanıt.")
 22 |                         scrape_input = input(question) 
 23 | 
 24 |                 return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Kitapyurdu Scraper'a hoş geldiniz!            -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global book, file, delay, review_texts, review_useful, review_not_useful, author_texts, date_texts, scrape_useful, scrape_author, scrape_date, path
 44 | 
 45 |         book = input("İncelemelerin Çekileceği Kitap Adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_not_useful = []
 53 |         author_texts = []
 54 |         date_texts = []
 55 | 
 56 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 57 |         scrape_useful_input = input(scrape_useful_question)
 58 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 59 | 
 60 |         scrape_author_question = "Kullanıcı isimleri çekilsin mi(y/n): "
 61 |         scrape_author_input = input(scrape_author_question)
 62 |         scrape_author = preference(scrape_author_input, scrape_author_question)
 63 | 
 64 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 65 |         scrape_date_input = input(scrape_date_question)
 66 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 67 | 
 68 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 69 | 
 70 |     def scrape():
 71 |         try:
 72 |             print("Chromedriver'a erişiliyor...")
 73 |             driver = webdriver.Chrome(path)
 74 |             time.sleep(delay)
 75 |             print("Chromedriver'a erişildi.")
 76 | 
 77 |         except WebDriverException:
 78 |             print("Chromedriver kullanılamıyor.")
 79 |             sys.exit()
 80 | 
 81 |         try:
 82 |             print("Kitapyurdu adresine gidiliyor...")
 83 |             driver.get("https://www.kitapyurdu.com")
 84 |             time.sleep(delay)
 85 |             driver.maximize_window()
 86 |             time.sleep(delay)
 87 |             print("Kitapyurdu adresine gidildi.")
 88 | 
 89 |         except:
 90 |             print("Kitapyurdu'na erişilemiyor.")
 91 |             sys.exit()
 92 | 
 93 |         try:
 94 |             print("Kitap aranıyor...")
 95 |             search = driver.find_element_by_id("search-input")
 96 |             search.send_keys(book)
 97 |             search.send_keys(Keys.ENTER)
 98 | 
 99 |             time.sleep(delay)
100 | 
101 |             try:
102 |                 close_notification = driver.find_element_by_class_name("opt-in-disallow-button")
103 |                 close_notification.click()
104 | 
105 |             except NoSuchElementException:
106 |                 pass
107 | 
108 |             time.sleep(delay)
109 | 
110 |             product = driver.find_element_by_class_name("name.ellipsis")
111 |             product.click()
112 |             time.sleep(delay)
113 |             print("Kitap bulundu.")
114 | 
115 |         except NoSuchElementException:
116 |             print("Kitap bulunamadı.")
117 |             sys.exit()
118 | 
119 |         try:
120 |             reviewsTab = driver.find_element_by_class_name("pr__htabs-review-text")
121 |             reviewsTab.click()
122 |             time.sleep(delay)
123 | 
124 |         except NoSuchElementException:
125 |             print("Kitap incelemeleri bulunamadı.")
126 |             sys.exit()
127 | 
128 |         l = 1
129 |         review_length = reviewsTab.text.replace("Yorumlar","")
130 |         
131 |         try:
132 |             review_length = review_length.replace(".","")
133 |             review_length = int(review_length)
134 | 
135 |         except NoSuchElementException:
136 |             review_length = int(review_length)
137 | 
138 |         if review_length % 5 == 0:
139 |             review_length = review_length // 5
140 |         else:
141 |             review_length = (review_length // 5) + 1
142 | 
143 |         while l <= review_length:
144 | 
145 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
146 |             match = False
147 | 
148 |             while match == False:
149 |                 lastCount = lenOfPage
150 |                 time.sleep(delay)
151 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
152 |                 if lastCount == lenOfPage:
153 |                     match = True
154 | 
155 |             print("Veriler çekiliyor...")
156 |             print("Sayfa: " + str(l))
157 | 
158 |             try:
159 |                 reviews = driver.find_elements_by_class_name("review-text")
160 |                 for review in reviews:
161 |                     review = review.text
162 |                     review_texts.append(review)
163 |                 
164 |                 authors = driver.find_elements_by_xpath("//a[@class ='alt']//span[@itemprop='name']")
165 |                 for author in authors:
166 |                     author = author.text 
167 |                     author_texts.append(author)
168 |                 
169 |                 dates = driver.find_elements_by_class_name("review-date")
170 |                 for date in dates:
171 |                     date = date.text
172 |                     date_texts.append(date)
173 | 
174 |                 usefuls = driver.find_elements_by_xpath("//div[@class ='agree']//span[@class='count']")
175 |                 for useful in usefuls:
176 |                     useful = useful.text
177 |                     review_useful.append(useful)
178 | 
179 |                 not_usefuls = driver.find_elements_by_xpath("//div[@class ='disagree']//span[@class='count']")
180 |                 for not_useful in not_usefuls:
181 |                     not_useful = not_useful.text
182 |                     review_not_useful.append(not_useful)
183 | 
184 |             except NoSuchElementException:
185 |                 time.sleep(delay)
186 | 
187 |             l += 1
188 | 
189 |             try: 
190 |                 next_page = driver.find_element_by_link_text(str(l))
191 |                 next_page.click()
192 | 
193 |             except NoSuchElementException:
194 |                 time.sleep(delay)
195 | 
196 |         driver.close()
197 | 
198 |         length_list = [review_texts, review_useful, review_not_useful, author_texts, date_texts]
199 |         limit = map(len, length_list)
200 |         limit = min(list(limit))
201 |         limit -= 1
202 | 
203 |         review_texts_fin = review_texts[:limit]
204 | 
205 |         df = pd.DataFrame({"Yorumlar": review_texts_fin})
206 | 
207 |         if scrape_author:
208 |             author_texts_fin = author_texts[:limit]
209 |             df["Müşteriler"] = author_texts_fin
210 | 
211 |         if scrape_date:
212 |             date_texts_fin = date_texts[:limit]
213 |             df["İnceleme Tarihi"] = date_texts_fin
214 | 
215 |         if scrape_useful:
216 |             review_useful_fin = review_useful[:limit]
217 |             review_not_useful_fin = review_not_useful[:limit]
218 |             df["İncelemeyi Yararlı Bulan Kişi Sayısı"] = review_useful_fin
219 |             df["İncelemeyi Yararlı Bulmayan Kişi Sayısı"] = review_not_useful_fin
220 | 
221 |         df.to_excel(file, header = True, index = False)
222 | 
223 |         x = "Çektiğiniz veriler "+ file + " adlı excel dosyasına kaydedildi."
224 |         print(x)
225 | 
226 |         print("""
227 |             --------------------------------------------------------------------------
228 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
229 |             -  Github Hesabım: ardauzunoglu                                          -
230 |             --------------------------------------------------------------------------
231 |         """)
232 | 
233 |         time.sleep(3)
234 |     initialize()
235 |     scrape()
236 | 
237 | if __name__ == "__main__":
238 |     kitapyurdu_scrape()


--------------------------------------------------------------------------------
/scrapers/kitapyurdu_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/main-scraper.py:
--------------------------------------------------------------------------------
 1 | import amazon_scraper as as 
 2 | from amazon_scraper import amazon_scraper
 3 | import beyazperde_scraper as bp
 4 | from beyazperde_scraper import beyazperde_scrape
 5 | import ciceksepeti_scraper as cs 
 6 | from ciceksepeti_scraper import ciceksepeti_scraper
 7 | import eksi_scraper as es
 8 | from eksi_scraper import eksisozluk_scrape
 9 | import gittigidiyor_scraper as gs 
10 | from gittigidiyor_scraper import gittigidiyor_scrape
11 | import hepsiburada_scraper as hb 
12 | from hepsiburada_scraper import hepsiburada_scrape
13 | import incehesap_scraper as is 
14 | from incehesap_scraper import incehesap_scraper
15 | import kitapyurdu_scraper as ky 
16 | from kitapyurdu_scraper import kitapyurdu_scrape
17 | import mediamarkt_scraper as ms 
18 | from mediamarkt_scraper import mediamarkt_scraper
19 | import n11_scraper as ns 
20 | from n11_scraper import n11_scraper
21 | import trendyol_scraper as ty 
22 | from trendyol_scraper import trendyol_scrape
23 | import yemeksepeti_scraper as ys 
24 | from yemeksepeti_scraper import yemeksepeti_scrape
25 | import youtube_scraper as yt
26 | from youtube_scraper import youtube_scrape
27 | 
28 | choices = ["amazon", "beyazperde", "çiçeksepeti", "ekşi sözlük", "gittigidiyor", "hepsiburada", "incehesap", "kitapyurdu", "mediamarkt", "n11", "trendyol", "yemeksepeti", "youtube"]
29 | libs = {"amazon":amazon_scraper, "beyazperde":beyazperde_scrape, "çiçeksepeti":ciceksepeti_scraper, "ekşi sözlük":eksisozluk_scrape, "hepsiburada":hepsiburada_scrape, "incehesap":incehesap_scraper, 
30 |         "kitapyurdu":kitapyurdu_scrape, "mediamarkt":mediamarkt_scraper, "n11":n11_scraper, "trendyol":trendyol_scrape, "yemeksepeti":yemeksepeti_scrape, "youtube":youtube_scrape}
31 | 
32 | choice = input("Kullanacağınız scraper: ")
33 | choice = choice.lower()
34 | 
35 | if choice in choices:
36 |     scraper = libs[choice]
37 |     scraper()
38 | else:
39 |     print("Geçersiz yanıt.")
40 |     secenek = input("Kullanacağınız scraper: ")


--------------------------------------------------------------------------------
/scrapers/mediamarkt_scraper/mediamarkt-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/mediamarkt_scraper/mediamarkt-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/mediamarkt_scraper/mediamarkt_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def mediamarkt_scraper():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |                 ---------------------------------------------------------
 38 |                 -         MediaMarkt Scraper'a hoş geldiniz!            -
 39 |                 -         Geliştirici: Arda Uzunoğlu                    -
 40 |                 ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_headlines, review_useful, customer_name_texts, date_texts, scrape_headlines, scrape_useful, scrape_customer_names, scrape_dates, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))    
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_headlines = []
 53 |         customer_name_texts = []
 54 |         date_texts = []
 55 | 
 56 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 57 |         scrape_useful_input = input(scrape_useful_question)
 58 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 59 | 
 60 |         scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
 61 |         scrape_headlines_input = input(scrape_headlines_question)
 62 |         scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
 63 | 
 64 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 65 |         scrape_customer_name_input = input(scrape_customer_name_question)
 66 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 67 | 
 68 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 69 |         scrape_date_input = input(scrape_date_question)
 70 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
 71 | 
 72 |         path = "BURAYA CHROMEDRİVER KONUMUNU GİRİNİZ"
 73 | 
 74 |     def scrape():
 75 |         try:
 76 |             print("Chromedriver'a erişiliyor...")
 77 |             driver = webdriver.Chrome(path)
 78 |             time.sleep(delay)
 79 |             print("Chromedriver'a erişildi.")
 80 | 
 81 |         except WebDriverException:
 82 |             print("Chromedriver kullanılamıyor.")            
 83 |             sys.exit()
 84 | 
 85 |         try:
 86 |             print("MediaMarkt adresine gidiliyor...")
 87 |             driver.get("https://www.mediamarkt.com.tr")
 88 |             time.sleep(delay)
 89 |             driver.maximize_window()
 90 |             time.sleep(delay)
 91 |             print("MediaMarkt adresine gidildi.")
 92 | 
 93 |         except:
 94 |             print("MediaMarkt'a erişilemiyor.")
 95 |             sys.exit()
 96 | 
 97 |         try:
 98 |             print("Ürün aranıyor...")
 99 |             search_bar = driver.find_element_by_xpath("//*[@id='search-autocomplete']/form/input[1]")
100 |             search_bar.send_keys(product_name)
101 |             search_bar.send_keys(Keys.ENTER)
102 |             time.sleep(delay)
103 | 
104 |             product = driver.find_element_by_class_name("clickable")
105 |             product.click()
106 |             time.sleep(delay)
107 |             print("Ürün bulundu.")
108 | 
109 |         except NoSuchElementException:
110 |             print("Ürün bulunamadı.")
111 |             sys.exit()
112 | 
113 |         time.sleep(delay)
114 |         review_count = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/div[1]/h2").text.replace("Yorumlar ", "")
115 |         review_count = review_count.replace("(", "")
116 |         review_count = review_count.replace(")", "")
117 |         review_count = int(review_count)
118 |         driver.execute_script("window.scrollTo(0, 1080)")
119 | 
120 |         while len(review_texts) < review_count:
121 |             if len(review_texts) <= 2:
122 |                 try:
123 |                     useful = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[1]/li["+str(len(review_texts) + 1)+"]/article/div[1]").text
124 |                     useful_prep = useful.split()[0]
125 |                     review_useful.append(useful_prep)
126 | 
127 |                     headline = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[1]/li["+str(len(review_texts) + 1)+"]/article/h3").text
128 |                     review_headlines.append(headline)
129 | 
130 |                     customer_name = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[1]/li["+str(len(review_texts) + 1)+"]/aside/strong").text
131 |                     customer_name_texts.append(customer_name)
132 | 
133 |                     date = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[1]/li["+str(len(review_texts) + 1)+"]/aside/small").text
134 |                     date_texts.append(date)
135 | 
136 |                     review = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[1]/li["+str(len(review_texts) + 1)+"]/article").text
137 |                     question = driver.find_element_by_class_name("review-rate").text
138 | 
139 |                     review = review.replace(useful, "").replace(headline, "").replace(question, "")
140 |                     review_texts.append(review)
141 |                     print("Veriler çekiliyor...")
142 |                     print("İnceleme: " + str(len(review_texts)))
143 | 
144 |                 except:
145 |                     break
146 | 
147 |                 try:
148 |                     cookie_accept_button = driver.find_element_by_xpath("//*[@id='cookie-info-layer']/div[1]/div/div[2]/a")
149 |                     cookie_accept_button.click()
150 | 
151 |                 except:
152 |                     pass
153 |                 
154 |                 try:
155 |                     load_all_reviews = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/div[3]/a[1]")
156 |                     load_all_reviews.click()
157 |                 
158 |                 except:
159 |                     pass
160 | 
161 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
162 |                 match = False
163 | 
164 |                 while match == False:
165 |                     lastCount = lenOfPage
166 |                     time.sleep(delay)
167 |                     lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
168 |                     if lastCount == lenOfPage:
169 |                         match = True
170 | 
171 |                 time.sleep(delay)
172 |             
173 |             else:
174 |                 try:
175 |                     useful = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[2]/li["+str(len(review_texts) + 1)+"]/article/div[1]").text
176 |                     useful_prep = useful.split()[0]
177 |                     review_useful.append(useful_prep)
178 | 
179 |                     headline = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[2]/li["+str(len(review_texts) + 1)+"]/article/h3").text
180 |                     review_headlines.append(headline)
181 | 
182 |                     customer_name = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[2]/li["+str(len(review_texts) + 1)+"]/aside/strong").text
183 |                     customer_name_texts.append(customer_name)
184 | 
185 |                     date = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[2]/li["+str(len(review_texts) + 1)+"]/aside/small").text
186 |                     date_texts.append(date)
187 | 
188 |                     review = driver.find_element_by_xpath("//*[@id='yorumlar-']/div/ul[2]/li["+str(len(review_texts) + 1)+"]/article").text
189 |                     question = driver.find_element_by_class_name("review-rate").text
190 | 
191 |                     review = review.replace(useful, "").replace(headline, "").replace(question, "")
192 |                     review_texts.append(review)
193 |                     print("Veriler çekiliyor...")
194 |                     print("İnceleme: " + str(len(review_texts)))
195 | 
196 |                     lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
197 |                     match = False
198 | 
199 |                     while match == False:
200 |                         lastCount = lenOfPage
201 |                         time.sleep(delay)
202 |                         lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
203 |                         if lastCount == lenOfPage:
204 |                             match = True
205 | 
206 |                 except:
207 |                     break
208 | 
209 |                 time.sleep(delay)
210 | 
211 |         driver.close()
212 | 
213 |         length_list = [review_texts, review_useful, review_headlines, customer_name_texts, date_texts]
214 |         limit = map(len, length_list)
215 |         limit = min(list(limit))
216 |         limit -= 1
217 |             
218 |         review_texts_fin = review_texts[:limit]
219 |         df = pd.DataFrame({"Yorum": review_texts_fin})
220 | 
221 |         if scrape_useful:
222 |             review_useful_fin = review_useful[:limit]
223 |             df["Yorum Beğeni Sayısı"] = review_useful_fin
224 | 
225 |         if scrape_headlines:
226 |             review_headlines_fin = review_headlines[:limit]
227 |             df["Yorumun Başlığı"] = review_headlines_fin
228 | 
229 |         if scrape_customer_names:
230 |             customer_name_texts_fin = customer_name_texts[:limit]
231 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
232 | 
233 |         if scrape_dates:
234 |             date_texts_fin = date_texts[:limit]
235 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
236 | 
237 |         df.to_excel(file, header = True, index = False)
238 | 
239 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
240 |         print(x)
241 | 
242 |         print("""
243 |             --------------------------------------------------------------------------
244 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
245 |             -  Github Hesabım: ardauzunoglu                                          -
246 |             --------------------------------------------------------------------------
247 |         """)
248 |     initialize()
249 |     scrape()
250 | 
251 | if __name__ == "__main__":
252 |     mediamarkt_scraper()


--------------------------------------------------------------------------------
/scrapers/mediamarkt_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/n11_scraper/n11-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/n11_scraper/n11-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/n11_scraper/n11_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def n11_scraper():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |                 ---------------------------------------------------------
 38 |                 -         N11 Scraper'a hoş geldiniz!                   -
 39 |                 -         Geliştirici: Arda Uzunoğlu                    -
 40 |                 ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_headlines, review_useful, customer_name_texts, date_texts, scrape_headlines, scrape_useful, scrape_customer_names, scrape_dates, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))    
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         review_headlines = []
 53 |         customer_name_texts = []
 54 |         date_texts = []
 55 | 
 56 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 57 |         scrape_useful_input = input(scrape_useful_question)
 58 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 59 | 
 60 |         scrape_headlines_question = "İncelemenin başlığı çekilsin mi(y/n): "
 61 |         scrape_headlines_input = input(scrape_headlines_question)
 62 |         scrape_headlines = preference(scrape_headlines_input, scrape_headlines_question)
 63 | 
 64 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 65 |         scrape_customer_name_input = input(scrape_customer_name_question)
 66 |         scrape_customer_names = preference(scrape_customer_name_input, scrape_customer_name_question)
 67 | 
 68 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 69 |         scrape_date_input = input(scrape_date_question)
 70 |         scrape_dates = preference(scrape_date_input, scrape_date_question)
 71 | 
 72 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 73 | 
 74 |     def scrape():
 75 |         try:
 76 |             print("Chromedriver'a erişiliyor...")
 77 |             driver = webdriver.Chrome(path)
 78 |             time.sleep(delay)
 79 |             print("Chromedriver'a erişildi.")
 80 | 
 81 |         except WebDriverException:
 82 |             print("Chromedriver kullanılamıyor.")            
 83 |             sys.exit()
 84 | 
 85 |         try:
 86 |             print("N11 adresine gidiliyor...")
 87 |             driver.get("https://www.n11.com")
 88 |             time.sleep(delay)
 89 |             driver.maximize_window()
 90 |             time.sleep(delay)
 91 |             print("N11 adresine gidildi.")
 92 | 
 93 |         except:
 94 |             print("N11'e erişilemiyor.")
 95 |             sys.exit()
 96 | 
 97 |         try:
 98 |             print("Ürün aranıyor...")
 99 |             search_bar = driver.find_element_by_id("searchData")
100 |             search_bar.send_keys(product_name)
101 |             search_bar.send_keys(Keys.ENTER)
102 |             time.sleep(delay)
103 | 
104 |             product = driver.find_element_by_class_name("productName")
105 |             product.click()
106 |             time.sleep(delay)
107 |             print("Ürün bulundu.")
108 | 
109 |         except NoSuchElementException:
110 |             print("Ürün bulunamadı.")
111 |             sys.exit()
112 | 
113 |         review_count = driver.find_element_by_class_name("reviewNum").text
114 |         review_count = int(review_count)
115 | 
116 |         go_to_reviews = driver.find_element_by_id("readReviews")
117 |         go_to_reviews.click()
118 | 
119 |         if review_count % 10 == 0:
120 |             length_of_page = review_count // 10
121 |         else:
122 |             length_of_page = (review_count // 10) + 1
123 | 
124 |         l = 1
125 | 
126 |         while l <= length_of_page:
127 |             
128 |             print("İncelemeler çekiliyor...")
129 |             print("Sayfa: " + str(l))
130 |             
131 |             time.sleep(delay)
132 |             driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
133 | 
134 |             comments = driver.find_elements_by_class_name("comment")
135 |             for comment in comments:
136 |                 
137 |                 customer = comment.find_element_by_class_name("userName").text
138 |                 customer_name_texts.append(customer)
139 | 
140 |                 date = comment.find_element_by_class_name("commentDate").text
141 |                 date_texts.append(date)
142 | 
143 |                 try:
144 |                     headline = comment.find_element_by_class_name("commentTitle").text
145 |                     review_headlines.append(headline)
146 | 
147 |                 except:
148 |                     review_headlines.append("BOŞ")
149 | 
150 |                 useful = comment.find_element_by_class_name("btnComment.yesBtn").text
151 |                 useful = useful.replace("Evet", "").replace("(", "").replace(")", "")
152 |                 review_useful.append(useful)
153 | 
154 |                 replaced_useful = comment.find_element_by_class_name("btnComment.yesBtn").text
155 |                 review = comment.text
156 |                 review = review.replace(customer, "").replace(date, "").replace(replaced_useful, "").replace("Bu yorumu faydalı buldunuz mu?", "")
157 |                 review_texts.append(review)
158 | 
159 |             try:
160 |                 next_button = driver.find_element_by_xpath("//*[@id='tabPanelProComments']/div/div[2]/div[2]/a[11]")
161 |                 next_button.click()
162 | 
163 |             except:
164 |                 pass
165 | 
166 |             l += 1
167 | 
168 |         driver.close()
169 | 
170 |         length_list = [review_texts, review_useful, review_headlines, customer_name_texts, date_texts]
171 |         limit = map(len, length_list)
172 |         limit = min(list(limit))
173 |         limit -= 1
174 |             
175 |         review_texts_fin = review_texts[:limit]
176 |         df = pd.DataFrame({"Yorum": review_texts_fin})
177 | 
178 |         if scrape_useful:
179 |             review_useful_fin = review_useful[:limit]
180 |             df["Yorum Beğeni Sayısı"] = review_useful_fin
181 | 
182 |         if scrape_headlines:
183 |             review_headlines_fin = review_headlines[:limit]
184 |             df["Yorumun Başlığı"] = review_headlines_fin
185 | 
186 |         if scrape_customer_names:
187 |             customer_name_texts_fin = customer_name_texts[:limit]
188 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
189 | 
190 |         if scrape_dates:
191 |             date_texts_fin = date_texts[:limit]
192 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
193 | 
194 |         df.to_excel(file, header = True, index = False)
195 | 
196 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
197 |         print(x)
198 | 
199 |         print("""
200 |             --------------------------------------------------------------------------
201 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
202 |             -  Github Hesabım: ardauzunoglu                                          -
203 |             --------------------------------------------------------------------------
204 |         """)
205 |     initialize()
206 |     scrape()
207 | 
208 | if __name__ == "__main__":
209 |     n11_scraper()


--------------------------------------------------------------------------------
/scrapers/n11_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/trendyol_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/trendyol_scraper/trendyol-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/trendyol_scraper/trendyol-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/trendyol_scraper/trendyol_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def trendyol_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Trendyol Scraper'a hoş geldiniz!              -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global product_name, file, delay, review_texts, review_useful, customer_name_texts, date_texts, scrape_useful, scrape_customer_name, scrape_date, path
 44 | 
 45 |         product_name = input("İncelemelerin çekileceği ürün adı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))
 49 | 
 50 |         review_texts = []
 51 |         review_useful = []
 52 |         customer_name_texts = []
 53 |         date_texts = []
 54 | 
 55 |         scrape_useful_question = "İncelemenin aldığı beğeni sayısı çekilsin mi(y/n): "
 56 |         scrape_useful_input = input(scrape_useful_question)
 57 |         scrape_useful = preference(scrape_useful_input, scrape_useful_question)
 58 | 
 59 |         scrape_customer_name_question = "Müşteri isimleri çekilsin mi(y/n): "
 60 |         scrape_customer_name_input = input(scrape_customer_name_question)
 61 |         scrape_customer_name = preference(scrape_customer_name_input, scrape_customer_name_question)
 62 | 
 63 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 64 |         scrape_date_input = input(scrape_date_question) 
 65 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 66 | 
 67 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 68 | 
 69 |     def scrape():
 70 |         try:
 71 |             print("Chromedriver'a erişiliyor...")
 72 |             driver = webdriver.Chrome(path)
 73 |             time.sleep(delay)
 74 |             print("Chromedriver'a erişildi.")
 75 | 
 76 |         except WebDriverException:
 77 |             print("Chromedriver kullanılamıyor.")
 78 |             sys.exit()
 79 | 
 80 |         try:
 81 |             print("Trendyol adresine gidiliyor...")
 82 |             driver.get("https://www.trendyol.com")
 83 |             time.sleep(delay)
 84 |             driver.maximize_window()
 85 |             time.sleep(delay)
 86 |             print("Trendyol adresine gidildi.")
 87 | 
 88 |         except:
 89 |             print("Trendyola'a erişilemiyor.")
 90 |             sys.exit()
 91 | 
 92 |         try:
 93 |             print("Ürün aranıyor...")
 94 |             search_bar = driver.find_element_by_class_name("search-box")
 95 |             search_bar.send_keys(product_name)
 96 |             search_bar.send_keys(Keys.ENTER)
 97 |             time.sleep(delay)
 98 | 
 99 |             product = driver.find_element_by_class_name("prdct-desc-cntnr")
100 |             product.click()
101 |             time.sleep(delay)
102 |             print("Ürün bulundu.")
103 | 
104 |         except NoSuchElementException:
105 |             print("Ürün bulunamadı.")
106 |             sys.exit()
107 | 
108 |         url = driver.current_url
109 |         index_of_question_mark = url.index("?")
110 |         url = url[:index_of_question_mark]
111 |         url = url + "/yorumlar"
112 |         driver.get(url)
113 | 
114 |         review_count = driver.find_element_by_class_name("pr-rnr-sm-p-s").text
115 |         review_count = review_count.replace("Değerlendirme", "")
116 |         review_count = review_count.replace("Yorum", "")
117 |         review_count = review_count.split()
118 |         review_count = int(review_count[1])
119 | 
120 |         while len(review_texts) < review_count:
121 | 
122 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
123 |             match = False
124 | 
125 |             while match == False:
126 |                 lastCount = lenOfPage
127 |                 time.sleep(delay)
128 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
129 |                 if lastCount == lenOfPage:
130 |                     match = True
131 | 
132 |             time.sleep(delay)
133 | 
134 |             reviews = driver.find_elements_by_class_name("rnr-com-tx")
135 |             for review in reviews:
136 |                 review = review.text
137 |                 review_texts.append(review)
138 | 
139 |                 print("Veriler çekiliyor...")
140 |                 print("İnceleme: " + str(len(review_texts)))
141 | 
142 |             usefuls = driver.find_elements_by_xpath("//*[@class='tooltip-wrp']//span[2]")
143 |             for useful in usefuls:
144 |                 useful = useful.text
145 |                 useful = useful.strip("()")
146 |                 review_useful.append(useful)
147 | 
148 |             customers = driver.find_elements_by_xpath("//*[@class='rnr-com-bt']//span[@class = 'rnr-com-usr']")
149 |             for customer in customers:
150 |                 customer = customer.text
151 |                 customer = customer.replace("|","")
152 |                 customer = customer.split()
153 | 
154 |                 customer_name = customer[-3:]
155 |                 customer_name = " ".join(customer_name)
156 |                 customer_name_texts.append(customer_name)
157 | 
158 |                 date = customer[:-3]
159 |                 date = " ".join(date)
160 |                 date_texts.append(date)
161 | 
162 |         driver.close()
163 | 
164 |         length_list = [review_texts, review_useful, customer_name_texts, date_texts]
165 |         limit = map(len, length_list)
166 |         limit = min(list(limit))
167 |         limit -= 1
168 | 
169 |         review_texts_fin = review_texts[:limit]
170 |         df = pd.DataFrame({"Yorum": review_texts_fin})
171 | 
172 |         if scrape_useful:
173 |             review_useful_fin = review_useful[:limit]
174 |             df["Yorum Beğeni Sayısı"] = review_useful_fin
175 | 
176 |         if scrape_customer_name:
177 |             customer_name_texts_fin = customer_name_texts[:limit]
178 |             df["Yorum Yazan Müşteri"] = customer_name_texts_fin
179 | 
180 |         if scrape_date:
181 |             date_texts_fin = date_texts[:limit]
182 |             df["Yorumun Yazıldığı Tarih"] = date_texts_fin
183 | 
184 |         df.to_excel(file, header = True, index = False)
185 | 
186 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
187 |         print(x)
188 |         print("""
189 |             --------------------------------------------------------------------------
190 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
191 |             -  Github Hesabım: ardauzunoglu                                          -
192 |             --------------------------------------------------------------------------
193 |         """)
194 | 
195 |         time.sleep(3)
196 |     initialize()
197 |     scrape()
198 | 
199 | if __name__ == "__main__":
200 |     trendyol_scrape()


--------------------------------------------------------------------------------
/scrapers/yemeksepeti_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/yemeksepeti_scraper/yemeksepeti-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/yemeksepeti_scraper/yemeksepeti-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/yemeksepeti_scraper/yemeksepeti_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def yemeksepeti_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 |             
 34 |             return delay
 35 | 
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Yemeksepeti Scraper'a hoş geldiniz!           -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 | 
 43 |         global restaurant_info, username_info, password_info, city_info, file, delay, review_texts, author_texts, date_texts, speed_ratings, service_ratings, flavour_ratings, scrape_author, scrape_date, scrape_speed, scrape_service, scrape_flavour, path
 44 | 
 45 |         restaurant_info = input("Yorumların Çekileceği Restoran: ")
 46 |         username_info = input("Yemeksepeti kullanıcı adı: ")
 47 |         password_info = input("Yemeksepeti parola: ")
 48 |         city_info = input("Yemeksepeti Şehir: ")
 49 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 50 |         file = file + ".xlsx"
 51 |         delay = delay_check(input("Bekleme süresi(sn): "))
 52 | 
 53 |         review_texts = []
 54 |         author_texts = []
 55 |         date_texts = []
 56 |         speed_ratings = []
 57 |         service_ratings = []
 58 |         flavour_ratings = []
 59 | 
 60 |         scrape_author_question = "Müşteri isimleri çekilsin mi(y/n): "
 61 |         scrape_author_input = input(scrape_author_question)
 62 |         scrape_author = preference(scrape_author_input, scrape_author_question)
 63 | 
 64 |         scrape_date_question = "İnceleme tarihleri çekilsin mi(y/n): "
 65 |         scrape_date_input = input(scrape_date_question)
 66 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 67 | 
 68 |         scrape_speed_question = "İncelemedeki hız puanı çekilsin mi(y/n): "
 69 |         scrape_speed_input = input(scrape_speed_question)
 70 |         scrape_speed = preference(scrape_speed_input, scrape_speed_question)
 71 | 
 72 |         scrape_service_question = "İncelemedeki servis puanı çekilsin mi(y/n): "
 73 |         scrape_service_input = input(scrape_service_question)
 74 |         scrape_service = preference(scrape_service_input, scrape_service_question)
 75 | 
 76 |         scrape_flavour_question = "İncelemedeki lezzet puanı çekilsin mi(y/n): "
 77 |         scrape_flavour_input = input(scrape_flavour_question)
 78 |         scrape_flavour = preference(scrape_flavour_input, scrape_flavour_question)
 79 |         
 80 |         path = "BURAYA CHROMEDRIVER KONUMUNU GİRİNİZ"
 81 | 
 82 |         tr_chars = ["ğ", "ş", "ı", "ü", "ö", "ç"]
 83 |         tr2eng = {
 84 |             "ğ":"g",
 85 |             "ş":"s",
 86 |             "ı":"i",
 87 |             "ü":"u",
 88 |             "ö":"o",
 89 |             "ç":"c"
 90 |         }
 91 | 
 92 |         city_info = city_info.lower()
 93 |         for harf in city_info:
 94 |             if harf in tr_chars:
 95 |                 city_info = city_info.replace(harf, tr2eng[harf])
 96 | 
 97 |             else:
 98 |                 pass
 99 | 
100 |     def scrape():
101 |         try:
102 |             print("Chromedriver'a erişiliyor...")
103 |             driver = webdriver.Chrome(path)
104 |             time.sleep(delay)
105 |             print("Chromedriver'a erişildi.")
106 | 
107 |         except WebDriverException:
108 |             print("Chromedriver kullanılamıyor.")
109 |             sys.exit()
110 | 
111 |         try:
112 |             print("Yemeksepeti adresine gidiliyor...")
113 |             url = "https://www.yemeksepeti.com/" + city_info
114 |             driver.get(url)
115 |             time.sleep(delay)
116 |             driver.maximize_window()
117 |             time.sleep(delay)
118 |             print("Yemeksepeti adresine gidildi.")
119 | 
120 |         except:
121 |             print("Yemeksepeti'ne ulaşılamıyor.")
122 |             sys.exit()
123 | 
124 |         try:
125 |             print("Yemeksepeti hesabına giriş yapılıyor...")
126 |             username = driver.find_element_by_id("UserName")
127 |             username.send_keys(username_info) 
128 |             time.sleep(delay)
129 |         
130 |             password = driver.find_element_by_id("password")
131 |             password.send_keys(password_info)
132 |             password.send_keys(Keys.ENTER)
133 |             time.sleep(delay)
134 |             print("Yemeksepeti hesabına giriş yapıldı.")
135 | 
136 |         except NoSuchElementException:
137 |             print("Kullanıcı adı ve/veya parola hatalı.")
138 |             sys.exit()
139 | 
140 |         try:
141 |             address_area = driver.find_element_by_class_name("address-area")
142 |             address_area.click()
143 |             time.sleep(delay)
144 | 
145 |         except NoSuchElementException:
146 |             print("Kayıtlı adres bulunamadı.")
147 |             sys.exit()
148 | 
149 |         try:
150 |             print("Restoran aranıyor...")
151 |             search_box = driver.find_element_by_class_name("search-box")
152 |             search_box.send_keys(restaurant_info)
153 |             search_box.send_keys(Keys.ENTER)
154 |             time.sleep(delay+3)
155 | 
156 |             restaurant = driver.find_element_by_class_name("restaurantName")
157 |             restaurant.click()
158 |             time.sleep(delay)
159 |             print("Restoran bulundu.")
160 | 
161 |         except NoSuchElementException:
162 |             print("Restoran bulunamadı.")
163 |             sys.exit()
164 | 
165 |         try:
166 |             yorumlar_section = driver.find_element_by_xpath("//*[@id='restaurantDetail']/div[2]/div[1]/ul/li[4]/a")
167 |             yorumlar_section.click()
168 |             time.sleep(delay)
169 | 
170 |         except NoSuchElementException:
171 |             print("Yorum bulunamadı.")
172 |             sys.exit()
173 | 
174 |         l = 1
175 |         review_count = yorumlar_section.text
176 |         review_count = review_count.replace("Yorumlar", "")
177 |         review_count = review_count.replace("(","")
178 |         review_count = review_count.replace(")","")
179 |         review_count = int(review_count)
180 | 
181 |         if review_count % 30 == 0:
182 |             review_count = review_count // 30
183 | 
184 |         else:
185 |             review_count = (review_count // 30) + 1
186 | 
187 |         while l < review_count:
188 | 
189 |             lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
190 |             match = False
191 |             
192 |             while match == False:
193 |                 lastCount = lenOfPage
194 |                 time.sleep(delay)
195 |                 lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); var lenOfPage=document.body.scrollHeight; return lenOfPage;")
196 |                 if lastCount == lenOfPage:
197 |                     match = True
198 | 
199 |             print("Veriler çekiliyor...")
200 |             print("Sayfa: " + str(l))
201 | 
202 |             yorumlar = driver.find_elements_by_class_name("comment.row")
203 |             for yorum in yorumlar:
204 |                 yorum = yorum.text  
205 |                 yorum = yorum.replace("\n", " ")
206 |                 yorum = yorum.split()
207 | 
208 |                 if "." in yorum[0]:
209 |                     yorum = yorum[1:]
210 | 
211 |                 else:
212 |                     pass
213 |                     
214 |                 yorum = " ".join(yorum)
215 |                 review_texts.append(yorum)
216 | 
217 |             authors = driver.find_elements_by_class_name("userName")
218 |             for author in authors:
219 |                 author = author.text
220 |                 author_texts.append(author)
221 | 
222 |             dates = driver.find_elements_by_class_name("commentDate")
223 |             for date in dates:
224 |                 date = date.text
225 |                 date_texts.append(date)
226 | 
227 |             speeds = driver.find_elements_by_class_name("speed")
228 |             for speed in speeds:
229 |                 speed = speed.text
230 |                 speed = speed.replace("Hız: ", "")
231 |                 speed_ratings.append(speed)
232 | 
233 |             services = driver.find_elements_by_class_name("serving")
234 |             for service in services:
235 |                 service = service.text
236 |                 service = service.replace("Servis: ", "")
237 |                 service_ratings.append(service)
238 | 
239 |             flavours = driver.find_elements_by_class_name("flavour")
240 |             for flavour in flavours:
241 |                 flavour = flavour.text
242 |                 flavour = flavour.replace("Lezzet: ", "")
243 |                 flavour_ratings.append(flavour)
244 | 
245 |             l += 1
246 |             next_page = driver.find_element_by_link_text(str(l))
247 |             next_page.click()
248 | 
249 |         driver.close()
250 | 
251 |         def duplicates(lst, item):
252 |             return [i for i, x in enumerate(lst) if x == item]
253 | 
254 |         if "Restoran Cevabı" in author_texts:
255 |             girilecek_rating_indexleri = duplicates(author_texts,"Restoran Cevabı")
256 |             for i in girilecek_rating_indexleri:
257 |                 date_texts.insert(i, "Restoran Cevabı")
258 |                 speed_ratings.insert(i, "Restoran Cevabı")
259 |                 service_ratings.insert(i, "Restoran Cevabı")
260 |                 flavour_ratings.insert(i, "Restoran Cevabı")
261 | 
262 |         elif "Yemeksepeti" in author_texts:
263 |             girilecek_rating_indexleri = duplicates(author_texts,"Yemeksepeti")
264 |             for i in girilecek_rating_indexleri:
265 |                 date_texts.insert(i, "Yemeksepeti")
266 |                 speed_ratings.insert(i, "Yemeksepeti")
267 |                 service_ratings.insert(i, "Yemeksepeti")
268 |                 flavour_ratings.insert(i, "Yemeksepeti")
269 |             
270 |         else:
271 |             pass
272 | 
273 |         length_list = [review_texts, author_texts, date_texts, speed_ratings, service_ratings, flavour_ratings]
274 |         limit = map(len, length_list)
275 |         limit = min(list(limit))
276 |         limit -= 1
277 | 
278 |         review_texts = review_texts[:limit]
279 |         df = pd.DataFrame({"Yorumlar": review_texts})
280 | 
281 |         if scrape_author:
282 |             author_texts_fin = author_texts[:limit]
283 |             df["Müşteriler"] = author_texts_fin
284 | 
285 |         if scrape_date:
286 |             date_texts_fin = date_texts[:limit]
287 |             df["Yorum Tarihi"] = date_texts_fin
288 | 
289 |         if scrape_speed:
290 |             speed_ratings_fin = speed_ratings[:limit]
291 |             df["Hız Değerlendirmesi"] = speed_ratings_fin
292 | 
293 |         if scrape_service:
294 |             service_ratings_fin = service_ratings[:limit]
295 |             df["Servis Değerlendirmesi"] = service_ratings_fin
296 | 
297 |         if scrape_flavour:
298 |             flavour_ratings_fin = flavour_ratings[:limit]
299 |             df["Lezzet Değerlendirmesi"] = flavour_ratings_fin
300 | 
301 |         df.to_excel(file, header = True, index = False)
302 | 
303 |         x = "Çektiğiniz veriler "+ file + " adlı excel dosyasına kaydedildi."
304 |         print(x)
305 |         print("""
306 |             --------------------------------------------------------------------------
307 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
308 |             -  Github Hesabım: ardauzunoglu                                          -
309 |             --------------------------------------------------------------------------
310 |         """)
311 | 
312 |         time.sleep(3)
313 |     initialize()
314 |     scrape()
315 | 
316 | if __name__ == "__main__":
317 |     yemeksepeti_scrape()


--------------------------------------------------------------------------------
/scrapers/youtube_scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.5
2 | pandas==1.1.4
3 | selenium==3.141.0


--------------------------------------------------------------------------------
/scrapers/youtube_scraper/youtube-ornek-veriseti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ardauzunoglu/TRScraper/85ef82834d225756b2727ad2e0aa3683d0dd9333/scrapers/youtube_scraper/youtube-ornek-veriseti.xlsx


--------------------------------------------------------------------------------
/scrapers/youtube_scraper/youtube_scraper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import pandas as pd
  4 | from selenium import webdriver
  5 | from selenium.common.exceptions import WebDriverException, NoSuchElementException
  6 | from selenium.webdriver.common.keys import Keys
  7 | 
  8 | def youtube_scrape():
  9 |     def initialize():
 10 |         def preference(scrape_input, question):
 11 |             while (scrape_input.lower() != "y") or (scrape_input.lower() != "n"):
 12 |                 if scrape_input.lower() == "y":
 13 |                     output = True
 14 |                     break
 15 | 
 16 |                 elif scrape_input.lower() == "n":
 17 |                     output = False
 18 |                     break
 19 | 
 20 |                 else:
 21 |                     print("Geçersiz yanıt.")
 22 |                     scrape_input = input(question) 
 23 | 
 24 |             return output
 25 | 
 26 |         def delay_check(delay):
 27 |             while type(delay) != int:
 28 |                 try:
 29 |                     delay = int(delay)
 30 |                 except ValueError:
 31 |                     print("Lütfen bir sayı değeri giriniz.")
 32 |                     delay = input("Bekleme süresi: ")
 33 | 
 34 |             return delay
 35 | 
 36 |         print("""
 37 |             ---------------------------------------------------------
 38 |             -         Youtube Scraper'a hoş geldiniz!               -
 39 |             -         Geliştirici: Arda Uzunoğlu                    -
 40 |             ---------------------------------------------------------
 41 |         """)
 42 |         
 43 |         global url, file, delay, comment_texts, author_texts, date_texts, title_text, like_texts, scrape_author, scrape_date, scrape_title, scrape_like, path
 44 | 
 45 |         url = input("Yorumların çekileceği Youtube videosunun bağlantısı: ")
 46 |         file = input("Oluşturulacak Excel dosyasının adı: ")
 47 |         file = file + ".xlsx"
 48 |         delay = delay_check(input("Bekleme süresi(sn): "))
 49 | 
 50 |         comment_texts = []
 51 |         author_texts = []
 52 |         date_texts = []
 53 |         title_text = []
 54 |         like_texts = []
 55 | 
 56 |         scrape_author_question = "Kullanıcı isimleri çekilsin mi(y/n): "
 57 |         scrape_author_input = input(scrape_author_question)
 58 |         scrape_author = preference(scrape_author_input, scrape_author_question)
 59 | 
 60 |         scrape_date_question = "Yorum tarihleri çekilsin mi(y/n): "
 61 |         scrape_date_input = input(scrape_date_question)
 62 |         scrape_date = preference(scrape_date_input, scrape_date_question)
 63 | 
 64 |         scrape_title_question = "Video başlığı çekilsin mi(y/n): "
 65 |         scrape_title_input = input(scrape_title_question)
 66 |         scrape_title = preference(scrape_title_input, scrape_title_question)
 67 | 
 68 |         scrape_like_question = "Yorumun aldığı beğeni sayısı çekilsin mi(y/n): "
 69 |         scrape_like_input = input(scrape_like_question)
 70 |         scrape_like = preference(scrape_like_input, scrape_like_question)
 71 | 
 72 |         path = "C:\chromedriver.exe"
 73 | 
 74 |     def scrape():
 75 |         try:
 76 |             print("Chromedriver'a erişiliyor...")
 77 |             driver = webdriver.Chrome(path)
 78 |             time.sleep(delay)
 79 |             print("Chromedriver'a erişildi.")
 80 | 
 81 |         except WebDriverException:
 82 |             print("Chromedriver kullanılamıyor.")
 83 |             sys.exit()
 84 | 
 85 |         try: 
 86 |             print("Video bağlantısına gidiliyor...")
 87 |             driver.get(url)
 88 |             time.sleep(delay)
 89 |             driver.maximize_window()
 90 |             time.sleep(delay)
 91 |             print("Video bağlantısına gidildi.")
 92 | 
 93 |         except:
 94 |             print("Youtube'a erişilemiyor.")
 95 |             sys.exit()
 96 | 
 97 |         time.sleep(delay+2)
 98 |         comment_section = driver.find_element_by_xpath('//*[@id="comments"]')
 99 |         title = driver.find_element_by_class_name("title").text
100 |         time.sleep(delay)
101 | 
102 |         driver.execute_script("arguments[0].scrollIntoView();", comment_section)
103 |         time.sleep(delay+2)
104 | 
105 |         comment_count = driver.find_element_by_class_name("count-text.ytd-comments-header-renderer").text
106 |         comment_count = comment_count.replace(" Yorum","")
107 |         comment_count = comment_count.replace(".","")
108 |         comment_count = int(comment_count)
109 | 
110 |         last_height = driver.execute_script("return document.documentElement.scrollHeight")
111 | 
112 |         while True:
113 |             driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
114 |             new_height = driver.execute_script("return document.documentElement.scrollHeight")
115 |             if new_height == last_height:
116 |                 break
117 |             last_height = new_height
118 | 
119 |         l = 1
120 |         while l <= comment_count:
121 |             try:
122 |                 comments = driver.find_elements_by_xpath("//*[@id='contents']/ytd-comment-thread-renderer")
123 |                 for comment in comments:
124 |                     print("Veri çekiliyor...")
125 |                     print("Yorum: " + str(l))
126 |                     author = comment.find_element_by_id("author-text").text
127 |                     date = comment.find_element_by_class_name("published-time-text").text
128 |                     comment_text = comment.find_element_by_id("content-text").text
129 |                     likes = comment.find_element_by_id("vote-count-middle").text
130 | 
131 |                     author_texts.append(author)
132 |                     date_texts.append(date)
133 |                     comment_texts.append(comment_text)
134 |                     like_texts.append(likes)
135 |                     title_text.append(title)
136 | 
137 |                     l += 1
138 | 
139 |             except NoSuchElementException:
140 |                 break
141 | 
142 |         driver.close()
143 | 
144 |         length_list = [comment_texts, author_texts, date_texts, like_texts, title_text]
145 |         limit = map(len, length_list)
146 |         limit = min(list(limit))
147 |         limit -= 1
148 | 
149 |         comment_texts_fin = comment_texts[:limit]
150 |         df = pd.DataFrame({"Yorumlar":comment_texts_fin})
151 |         if scrape_author:
152 |             author_texts_fin = author_texts[:limit]
153 |             df["Kullanıcı"] = author_texts_fin
154 | 
155 |         if scrape_date:
156 |             date_texts_fin = date_texts[:limit]
157 |             df["Yorum Tarihi"] =  date_texts_fin
158 |         
159 |         if scrape_like:
160 |             like_texts_fin = like_texts[:limit]
161 |             df["Yorumun Aldığı Beğeni Sayısı"] =  like_texts_fin
162 |         
163 |         if scrape_title:
164 |             title_text_fin = title_text[:limit]
165 |             df["Video Başlığı"] = title_text_fin
166 | 
167 |         df.to_excel(file, header = True, index = False)
168 |         x = "Çektiğiniz veriler " + file + " adlı excel dosyasına kaydedildi."
169 |         print(x)
170 | 
171 |         print("""
172 |             --------------------------------------------------------------------------
173 |             -  Projeden memnun kaldıysanız Github üzerinden yıldızlamayı unutmayın.  -
174 |             -  Github Hesabım: ardauzunoglu                                          -
175 |             --------------------------------------------------------------------------
176 |         """)
177 | 
178 |         time.sleep(3)
179 |     initialize()
180 |     scrape()
181 | 
182 | if __name__ == "__main__":
183 |     youtube_scrape()


--------------------------------------------------------------------------------