├── .git-blame-ignore-revs ├── .github └── workflows │ ├── publish.yml │ ├── tests-macos.yml │ ├── tests-ubuntu.yml │ └── tests-windows.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── codecov.yml ├── pyproject.toml ├── setup.py ├── src └── protego.py ├── tests ├── __init__.py ├── fetch_robotstxt.py ├── test_data │ ├── 103.88.218.109 │ ├── 10minutemail.com │ ├── 10times.com │ ├── 4chan.org │ ├── 6abc.com │ ├── 99designs.com │ ├── 9gag.com │ ├── aawsat.com │ ├── abc13.com │ ├── abc7.com │ ├── abc7chicago.com │ ├── abc7news.com │ ├── abc7ny.com │ ├── abcnews.go.com │ ├── about.gitlab.com │ ├── adage.com │ ├── adblockplus.org │ ├── addons.mozilla.org │ ├── adguard.com │ ├── adsabs.harvard.edu │ ├── adultfriendfinder.com │ ├── adventure.howstuffworks.com │ ├── advrider.com │ ├── aeromexico.com │ ├── agendaweb.org │ ├── ahrefs.com │ ├── aib.ie │ ├── akc.org │ ├── akismet.com │ ├── alaska.gov │ ├── alight.com │ ├── alison.com │ ├── allafrica.com │ ├── alternativeto.net │ ├── android.stackexchange.com │ ├── androidforums.com │ ├── animoto.com │ ├── answers.yahoo.com │ ├── ao.com │ ├── appinventor.mit.edu │ ├── appleinsider.com │ ├── archinect.com │ ├── archive.codeplex.com │ ├── archive.org │ ├── archiveofourown.org │ ├── arstechnica.com │ ├── artofmanliness.com │ ├── arxiv.org │ ├── asianwiki.com │ ├── asic.gov.au │ ├── assam.gov.in │ ├── atlanta.craigslist.org │ ├── au.indeed.com │ ├── au.yahoo.com │ ├── auspost.com.au │ ├── austin.craigslist.org │ ├── aut.ac.ir │ ├── auto-data.net │ ├── automobiles.honda.com │ ├── avaaz.org │ ├── avm.de │ ├── aws.amazon.com │ ├── azertag.az │ ├── badoo.com │ ├── ballotpedia.org │ ├── baseball.fantasysports.yahoo.com │ ├── basecamp.com │ ├── basketball.realgm.com │ ├── bdnews24.com │ ├── bdsmstreak.com │ ├── beforeitsnews.com │ ├── berniesanders.com │ ├── betanews.com │ ├── bethesda.net │ ├── bgp.he.net │ ├── bgr.com │ ├── bigcharts.marketwatch.com │ ├── bitbucket.org │ ├── bitcoin.org │ ├── bitcointalk.org │ ├── bitly.com │ ├── blenderartists.org │ ├── blog.mozilla.org │ ├── blogs.oracle.com │ ├── boardgamegeek.com │ ├── boingboing.net │ ├── bongacams.com │ ├── book.goindigo.in │ ├── bookboon.com │ ├── bossip.com │ ├── boston.craigslist.org │ ├── boxrec.com │ ├── breakingnewsenglish.com │ ├── brew.sh │ ├── brickset.com │ ├── brighthouse.com │ ├── buffalonews.com │ ├── bugmenot.com │ ├── buttsmithy.com │ ├── ca.yahoo.com │ ├── cad-comic.com │ ├── cafeastrology.com │ ├── calendar.yahoo.com │ ├── cams.com │ ├── caniuse.com │ ├── car-part.com │ ├── carleton.ca │ ├── carousell.com │ ├── case.edu │ ├── casper.com │ ├── catholic.net │ ├── ccbill.com │ ├── ccli.com │ ├── cdbaby.com │ ├── cdn.englishforums.com │ ├── cdnjs.com │ ├── celebitchy.com │ ├── chaturbate.com │ ├── chicago.craigslist.org │ ├── chicago.suntimes.com │ ├── citeseerx.ist.psu.edu │ ├── ckeditor.com │ ├── clark.com │ ├── clicky.com │ ├── clinicaltrials.gov │ ├── cmb.ac.lk │ ├── codeigniter.com │ ├── coinmarketcap.com │ ├── cointelegraph.com │ ├── collider.com │ ├── colorlib.com │ ├── consent.yahoo.com │ ├── convertcase.net │ ├── convertio.co │ ├── coolmaterial.com │ ├── cooltext.com │ ├── corporate.walmart.com │ ├── cps.edu │ ├── creately.com │ ├── creativecommons.org │ ├── creditcards.chase.com │ ├── ctext.org │ ├── cvmkr.com │ ├── cx.rightnow.com │ ├── dailycaller.com │ ├── daz3d.com │ ├── dblp.uni-trier.de │ ├── dd-wrt.com │ ├── deadspin.com │ ├── defence.pk │ ├── delhi.gov.in │ ├── denver.craigslist.org │ ├── designshack.net │ ├── developer.android.com │ ├── developer.apple.com │ ├── developer.mozilla.org │ ├── developers.facebook.com │ ├── dfw.cbslocal.com │ ├── dict.leo.org │ ├── dict.longdo.com │ ├── digg.com │ ├── digital-photography-school.com │ ├── dilbert.com │ ├── discovermagazine.com │ ├── disneyland.disney.go.com │ ├── disneyworld.disney.go.com │ ├── dmv.ny.gov │ ├── dnd.wizards.com │ ├── docs.google.com │ ├── docs.oracle.com │ ├── doctor.webmd.com │ ├── dogtime.com │ ├── dos.myflorida.com │ ├── dotnet.microsoft.com │ ├── download.cnet.com │ ├── downloadcenter.intel.com │ ├── drafthouse.com │ ├── drdo.gov.in │ ├── drivers.softpedia.com │ ├── duckduckgo.com │ ├── dulfy.net │ ├── dyn.com │ ├── ec.europa.eu │ ├── economictimes.indiatimes.com │ ├── edition.cnn.com │ ├── elementor.com │ ├── emedicine.medscape.com │ ├── en-in.sennheiser.com │ ├── en.chessbase.com │ ├── en.mappy.com │ ├── en.ucoin.net │ ├── en.wikipedia.org │ ├── en.wikiquote.org │ ├── en.wikivoyage.org │ ├── en.wiktionary.org │ ├── epguides.com │ ├── epson.com │ ├── eric.ed.gov │ ├── eu.wholefoodsmarket.com │ ├── eur-lex.europa.eu │ ├── europa.eu │ ├── euw.leagueoflegends.com │ ├── evermotion.org_443 │ ├── everymac.com │ ├── ew.com │ ├── explorable.com │ ├── extremereach.com │ ├── ezinearticles.com │ ├── faapy.com │ ├── familydoctor.org │ ├── fetlife.com │ ├── ffmpeg.org │ ├── file.org │ ├── filehippo.com │ ├── fileinfo.com │ ├── filerio.in │ ├── filezilla-project.org │ ├── filmschoolrejects.com │ ├── finance.yahoo.com │ ├── fineartamerica.com │ ├── flatmates.com.au │ ├── flightaware.com │ ├── flightview.com │ ├── flyasiana.com │ ├── food.ndtv.com │ ├── football.fantasysports.yahoo.com │ ├── foreverliving.com │ ├── fortune.com │ ├── forums.cgsociety.org │ ├── forums.macrumors.com │ ├── forzamotorsport.net │ ├── foursquare.com │ ├── freemusicarchive.org │ ├── freshdesk.com │ ├── frontier.com │ ├── ftbwiki.org │ ├── gaijinpot.com │ ├── gallica.bnf.fr │ ├── gamejolt.com │ ├── gameranx.com │ ├── gamesdonequick.com │ ├── gawker.com │ ├── gayboystube.com │ ├── gearpatrol.com │ ├── geekologie.com │ ├── geoguessr.com │ ├── geology.com │ ├── georgia.gov │ ├── getbootstrap.com │ ├── getfireshot.com │ ├── git-scm.com │ ├── github.com │ ├── global.oup.com │ ├── globalresearch.ca │ ├── gmatclub.com │ ├── go.com │ ├── golang.org │ ├── goldprice.org │ ├── gothamist.com │ ├── greyscalegorilla.com │ ├── groceries.morrisons.com │ ├── group.renault.com │ ├── groupme.com │ ├── groups.google.com │ ├── gtmetrix.com │ ├── guardian.ng │ ├── gulfnews.com │ ├── hackforums.net │ ├── hbr.org │ ├── he.wikipedia.org │ ├── healthy.kaiserpermanente.org │ ├── heavy.com │ ├── help.ubuntu.com │ ├── hentai2read.com │ ├── heroku.com │ ├── hetmanrecovery.com │ ├── hiphopdx.com │ ├── hollywoodlife.com │ ├── home.barclays │ ├── home.bt.com │ ├── hoopshype.com │ ├── hootsuite.com │ ├── hotcopper.com.au │ ├── hotpads.com │ ├── hp.myway.com │ ├── httpd.apache.org │ ├── hubpages.com │ ├── hymnary.org │ ├── hypebeast.com │ ├── icai.org │ ├── iccup.com │ ├── icon-icons.com │ ├── ideas.repec.org │ ├── ieeexplore.ieee.org │ ├── illinois.edu │ ├── illinoisstate.edu │ ├── img.konami.com │ ├── imgur.com │ ├── imslp.org │ ├── in.bookmyshow.com │ ├── in.godaddy.com │ ├── in.hotels.com │ ├── in.news.yahoo.com │ ├── in.yahoo.com │ ├── in.yougov.com │ ├── incompetech.com │ ├── indianexpress.com │ ├── inform.kz │ ├── inkbunny.net │ ├── insider.espn.com │ ├── internetbanking.suncorpbank.com.au │ ├── investorshub.advfn.com │ ├── invoicehome.com_443 │ ├── islamqa.info │ ├── it.toolbox.com │ ├── itch.io │ ├── iteslj.org │ ├── jalopnik.com │ ├── jang.com.pk │ ├── javascript.info │ ├── jetpack.com │ ├── jobs.theguardian.com │ ├── jobsalert.pk │ ├── jquery.com │ ├── jqueryui.com │ ├── jsfiddle.net │ ├── jsoneditoronline.org │ ├── jsonformatter.curiousconcept.com │ ├── justgetflux.com │ ├── kapitalbank.az │ ├── kb.iu.edu │ ├── kde.org │ ├── kidshealth.org │ ├── king.com │ ├── kinsta.com │ ├── koa.com │ ├── komonews.com │ ├── krakow.pl │ ├── kriesi.at │ ├── krita.org │ ├── ktla.com │ ├── labtestsonline.org │ ├── laughingsquid.com │ ├── launchpad.net │ ├── law.justia.com │ ├── learningenglish.voanews.com │ ├── lerablog.org │ ├── livejasmin.com │ ├── localbitcoins.com │ ├── login.aol.com │ ├── losangeles.cbslocal.com │ ├── losangeles.craigslist.org │ ├── luscious.net │ ├── madison.craigslist.org │ ├── magicseaweed.com │ ├── mail.google.com │ ├── mail.yahoo.com │ ├── mailchimp.com │ ├── mailtester.com │ ├── maps.google.com │ ├── marketplace.secondlife.com │ ├── mashable.com │ ├── match.com │ ├── mathworld.wolfram.com │ ├── matplotlib.org │ ├── mediaget.com │ ├── medlineplus.gov │ ├── mega.nz │ ├── megagames.com │ ├── meta.ua │ ├── mfa.gov.ua │ ├── miamioh.edu │ ├── mikrotik.com │ ├── missouri.edu │ ├── misuse.ncbi.nlm.nih.gov │ ├── mlb.mlb.com │ ├── mmajunkie.usatoday.com │ ├── mn.gov │ ├── models.com │ ├── mom.me │ ├── money.cnn.com │ ├── moodle.org_443 │ ├── moversguide.usps.com │ ├── movieweb.com │ ├── moz.com │ ├── msdn.microsoft.com │ ├── msu.edu │ ├── muaban.net │ ├── muse.jhu.edu │ ├── musescore.org │ ├── musicbrainz.org │ ├── mxtoolbox.com │ ├── my.yahoo.com │ ├── mygreatlakes.org │ ├── myspace.com │ ├── n4g.com │ ├── nashville.craigslist.org │ ├── nationaldaycalendar.com │ ├── nationalinterest.org │ ├── nces.ed.gov │ ├── nelly.com │ ├── nest.com │ ├── netbeans.org │ ├── neurips.cc │ ├── newatlas.com │ ├── newindianexpress.com │ ├── newrepublic.com │ ├── news.abs-cbn.com │ ├── news.am │ ├── news.google.com │ ├── news.nationalgeographic.com │ ├── news.sky.com │ ├── news.yahoo.com │ ├── newsbusters.org │ ├── newyork.cbslocal.com │ ├── newyork.craigslist.org │ ├── nginx.org │ ├── nhentai.net │ ├── nodejs.org │ ├── notepad-plus-plus.org │ ├── nypost.com │ ├── observer.com │ ├── ocw.mit.edu │ ├── ohnotheydidnt.livejournal.com │ ├── oilprice.com │ ├── onlinelibrary.wiley.com │ ├── onthehub.com │ ├── openvpn.net │ ├── openwrt.org │ ├── order-order.com │ ├── oregonstate.edu │ ├── paper.li │ ├── paperpk.com │ ├── parade.com │ ├── pasadena.edu │ ├── patient.info │ ├── paultan.org │ ├── pbskids.org │ ├── pcsx2.net │ ├── penzu.com │ ├── petapixel.com │ ├── phet.colorado.edu │ ├── philpapers.org │ ├── phoenix.craigslist.org │ ├── photobucket.com │ ├── phys.org │ ├── pitchfork.com │ ├── pixabay.com │ ├── pixologic.com │ ├── plagiarisma.net │ ├── plato.stanford.edu │ ├── play.google.com │ ├── plus.google.com │ ├── podio.com │ ├── poets.org │ ├── polycount.com │ ├── portableapps.com │ ├── portland.craigslist.org │ ├── president.az │ ├── prezi.com │ ├── primagames.com_443 │ ├── processing.org │ ├── profootballtalk.nbcsports.com │ ├── projects.zoho.com │ ├── pubs.acs.org │ ├── puchd.ac.in │ ├── punchng.com │ ├── puzzles.usatoday.com │ ├── pydata.org │ ├── quickbooks.intuit.com │ ├── quicksurveys.com │ ├── rateyourmusic.com │ ├── reason.com │ ├── reference.wolfram.com │ ├── regex101.com │ ├── rekvizitai.vz.lt │ ├── reliefweb.int │ ├── retirementplans.vanguard.com │ ├── roosterteeth.com │ ├── rpg.stackexchange.com │ ├── rubyonrails.org │ ├── runrepeat.com │ ├── sacramento.craigslist.org │ ├── sandiego.craigslist.org │ ├── sanfrancisco.cbslocal.com │ ├── scratch.mit.edu │ ├── screenrant.com │ ├── scroll.in │ ├── scssoft.com │ ├── search.aol.com │ ├── search.vodacom.co.za │ ├── search.yahoo.com │ ├── searchengineland.com │ ├── searchnetworking.techtarget.com │ ├── searchsecurity.techtarget.com │ ├── seattle.craigslist.org │ ├── secondlife.com │ ├── secure.actblue.com │ ├── secure.logmein.com │ ├── sedo.com │ ├── sendgrid.com │ ├── sendpulse.com │ ├── sg.finance.yahoo.com │ ├── shabdkosh.com │ ├── shirazu.ac.ir │ ├── shop.advanceautoparts.com │ ├── shop.lego.com │ ├── shop.lululemon.com │ ├── shop.nordstrom.com │ ├── simple.wikipedia.org │ ├── simplyrecipes.com │ ├── skyvector.com │ ├── slashdot.org │ ├── slidemodel.com │ ├── smallbiztrends.com │ ├── smallpdf.com │ ├── sodapdf.com │ ├── songmeanings.com │ ├── soundcloud.com │ ├── sourceforge.net │ ├── sports.sportingbet.com │ ├── sports.yahoo.com │ ├── sre.gob.mx │ ├── ss64.com │ ├── st-takla.org │ ├── stackoverflow.com │ ├── statcounter.com │ ├── steelseries.com │ ├── stockcharts.com │ ├── store.steampowered.com │ ├── store.usps.com │ ├── strategywiki.org │ ├── studentaid.ed.gov │ ├── students.ubc.ca │ ├── stylecaster.com │ ├── sudaneseonline.com │ ├── support.logi.com │ ├── support.microsoft.com │ ├── support.mozilla.org │ ├── swappa.com │ ├── sydney.edu.au │ ├── talkingpointsmemo.com │ ├── tallysolutions.com │ ├── targetjobs.co.uk │ ├── tatarstan.ru │ ├── te.eg │ ├── tea.texas.gov │ ├── techresources.oecd.org │ ├── testmy.net │ ├── tfl.gov.uk │ ├── theadvocate.com │ ├── thebiglead.com │ ├── theconversation.com │ ├── thehill.com │ ├── thehun.net │ ├── themeisle.com │ ├── thenextweb.com │ ├── theoldreader.com │ ├── theonion.com │ ├── theweek.com │ ├── thewest.com.au │ ├── time.com │ ├── time.is │ ├── timesofindia.indiatimes.com │ ├── tinyurl.com │ ├── tomcat.apache.org │ ├── topgolf.com │ ├── toronto.citynews.ca │ ├── torontosun.com │ ├── townhall.com │ ├── tradingeconomics.com │ ├── translate.google.com │ ├── trello.com │ ├── triblive.com │ ├── trophymanager.com │ ├── tureng.com │ ├── tutorial.math.lamar.edu │ ├── tvtropes.org │ ├── tweetdeck.twitter.com │ ├── twitter.com │ ├── ub.ac.id │ ├── ubuntu.com │ ├── ubuntuforums.org │ ├── uci.edu │ ├── uclouvain.be │ ├── uconn.edu │ ├── ugm.ac.id │ ├── uiowa.edu │ ├── uk.ask.com │ ├── uk.news.yahoo.com │ ├── uk.reuters.com │ ├── uk.rs-online.com │ ├── uk.yahoo.com │ ├── uncrate.com │ ├── undip.ac.id │ ├── unity.com │ ├── unity3d.com │ ├── untappd.com │ ├── up.pt │ ├── uploaded.net │ ├── us.playstation.com │ ├── uwaterloo.ca │ ├── uwm.edu │ ├── vacationstogo.com │ ├── valenciacollege.edu │ ├── validate.perfdrive.com │ ├── validator.w3.org │ ├── variety.com │ ├── venngage.com │ ├── verify.naukri.com │ ├── vimeo.com │ ├── virustotal.com │ ├── visual.ly │ ├── vnsgu.ac.in │ ├── vt.edu │ ├── w3layouts.com │ ├── wall.alphacoders.com │ ├── warwick.ac.uk │ ├── washingtondc.craigslist.org │ ├── wayne.edu │ ├── weather.com │ ├── weather.gc.ca │ ├── welcome.miami.edu │ ├── wgntv.com │ ├── whatculture.com │ ├── whatis.techtarget.com │ ├── whentowork.com │ ├── whirlpool.net.au │ ├── whoer.net │ ├── wiki.mabinogiworld.com │ ├── wikitravel.org │ ├── winscp.net │ ├── wlos.com │ ├── wmich.edu │ ├── wordpress.com │ ├── wordpress.org │ ├── world.coach.com │ ├── worldofsolitaire.com │ ├── worldoftanks.com │ ├── wow.gamepedia.com │ ├── wowprogress.com │ ├── writingexplained.org │ ├── wsu.edu │ ├── wtop.com │ ├── wustl.edu │ ├── wwd.com │ ├── wwnorton.com │ ├── www.1001fonts.com │ ├── www.1001freefonts.com │ ├── www.101greatgoals.com │ ├── www.123-reg.co.uk │ ├── www.123greetings.com │ ├── www.123rf.com │ ├── www.1fichier.com │ ├── www.24hourfitness.com │ ├── www.2checkout.com │ ├── www.365online.com │ ├── www.3m.com │ ├── www.4icu.org │ ├── www.4shared.com │ ├── www.53.com │ ├── www.6pm.com │ ├── www.8notes.com │ ├── www.8x8.com_443 │ ├── www.9news.com │ ├── www.9now.com.au │ ├── www.a2hosting.com │ ├── www.aa.com │ ├── www.aaa.com │ ├── www.aad.org │ ├── www.aafp.org │ ├── www.aamc.org │ ├── www.aao.org │ ├── www.aarp.org │ ├── www.aastocks.com │ ├── www.aau.dk │ ├── www.aau.edu.et │ ├── www.abb.com │ ├── www.abbreviations.com │ ├── www.abc.net.au │ ├── www.abcam.com │ ├── www.abcya.com │ ├── www.abebooks.com │ ├── www.ablebits.com │ ├── www.ableton.com │ ├── www.abr.business.gov.au │ ├── www.abs-cbn.com │ ├── www.absa.co.za │ ├── www.academy.com │ ├── www.accaglobal.com │ ├── www.accenture.com │ ├── www.accorhotels.com │ ├── www.accountingcoach.com │ ├── www.acehardware.com │ ├── www.acer.com │ ├── www.acm.org │ ├── www.aconvert.com │ ├── www.acronis.com │ ├── www.acronymfinder.com │ ├── www.act.org │ ├── www.active.com │ ├── www.activecampaign.com_443 │ ├── www.adaderana.lk │ ├── www.adam4adam.com │ ├── www.adameve.com │ ├── www.adb.org │ ├── www.addgene.org │ ├── www.addictinggames.com │ ├── www.additudemag.com │ ├── www.adelaide.edu.au │ ├── www.adelaidenow.com.au │ ├── www.admiral.com │ ├── www.adobe.com │ ├── www.adoptapet.com │ ├── www.adorama.com │ ├── www.adp.com │ ├── www.adultdvdempire.com │ ├── www.adultdvdtalk.com │ ├── www.adultswim.com │ ├── www.adultwork.com │ ├── www.advertserve.com │ ├── www.advfn.com │ ├── www.advocate.com │ ├── www.adweek.com │ ├── www.aegeanair.com │ ├── www.aeroflot.ru │ ├── www.aessuccess.org │ ├── www.aetna.com │ ├── www.af.mil │ ├── www.afdb.org │ ├── www.afl.com.au │ ├── www.afp.com │ ├── www.afr.com │ ├── www.afrihost.com │ ├── www.agilent.com │ ├── www.agoda.com │ ├── www.aiaa.org │ ├── www.airarabia.com_443 │ ├── www.airasia.com │ ├── www.airbaltic.com │ ├── www.airblue.com │ ├── www.airbus.com │ ├── www.aircanada.com │ ├── www.airdroid.com │ ├── www.aireuropa.com │ ├── www.airindia.in │ ├── www.airliners.net │ ├── www.ajc.com │ ├── www.ajol.info │ ├── www.akamai.com │ ├── www.akbank.com │ ├── www.al-sharq.com │ ├── www.al.com │ ├── www.alamo.com │ ├── www.alamy.com │ ├── www.alarabiya.net │ ├── www.alaska.edu │ ├── www.alaskaair.com │ ├── www.albany.edu │ ├── www.albumoftheyear.org │ ├── www.aldi.co.uk │ ├── www.aldi.com.au │ ├── www.aleks.com │ ├── www.alexa.com │ ├── www.alhayat.com │ ├── www.alibaba.com │ ├── www.aljazeera.com │ ├── www.allaboutcircuits.com │ ├── www.alldatasheet.com │ ├── www.allegiantair.com │ ├── www.allfreecrochet.com │ ├── www.alliedelec.com │ ├── www.allkpop.com │ ├── www.allure.com │ ├── www.ally.com │ ├── www.almanac.com │ ├── www.alot.com │ ├── www.alrajhibank.com.sa │ ├── www.alternet.org │ ├── www.ama-assn.org │ ├── www.amadeus.com │ ├── www.amadeus.net │ ├── www.amazon.ca │ ├── www.amazon.co.jp │ ├── www.amazon.co.uk │ ├── www.amazon.com │ ├── www.amc.com │ ├── www.amctheatres.com │ ├── www.americafirst.com │ ├── www.american.edu │ ├── www.americanexpress.com │ ├── www.americanfunds.com │ ├── www.americanthinker.com │ ├── www.ameriprise.com │ ├── www.ametsoc.org │ ├── www.amity.edu │ ├── www.amnesty.org │ ├── www.amrita.edu │ ├── www.ams.org │ ├── www.amtrak.com │ ├── www.amway.com │ ├── www.ana.co.jp │ ├── www.analog.com │ ├── www.anandtech.com │ ├── www.ancestry.co.uk │ ├── www.ancestry.com │ ├── www.andertons.co.uk │ ├── www.andhrabank.in │ ├── www.android.com │ ├── www.androidauthority.com │ ├── www.androidcentral.com │ ├── www.androidpit.com │ ├── www.andyroid.net │ ├── www.anime-expo.org │ ├── www.anime-planet.com │ ├── www.animenewsnetwork.com │ ├── www.ankara.edu.tr │ ├── www.anker.com │ ├── www.anntaylor.com │ ├── www.annualcreditreport.com │ ├── www.annualreviews.org │ ├── www.anonym.to │ ├── www.ansible.com │ ├── www.answers.com │ ├── www.ansys.com │ ├── www.anthropologie.com │ ├── www.anu.edu.au │ ├── www.anz.com │ ├── www.anz.com.au │ ├── www.aol.co.uk │ ├── www.aol.com │ ├── www.ap.org │ ├── www.apache.org │ ├── www.apartmentguide.com │ ├── www.apartmenttherapy.com │ ├── www.apc.com │ ├── www.apmex.com │ ├── www.apple.com │ ├── www.appnexus.com │ ├── www.aps.org │ ├── www.aptoide.com │ ├── www.aq.com │ ├── www.aqa.org.uk │ ├── www.ar15.com │ ├── www.arabic-keyboard.org │ ├── www.arabnews.com │ ├── www.aramex.com │ ├── www.arbetsformedlingen.se │ ├── www.arcamax.com │ ├── www.archdaily.com │ ├── www.architecturaldigest.com │ ├── www.architonic.com │ ├── www.archives.gov │ ├── www.archlinux.org │ ├── www.arduino.cc │ ├── www.argos.co.uk │ ├── www.ariba.com │ ├── www.arizona.edu │ ├── www.arkadium.com │ ├── www.arm.com │ ├── www.armslist.com │ ├── www.army.mil │ ├── www.arrl.org │ ├── www.arsenal.com │ ├── www.art.com │ ├── www.artnet.com │ ├── www.aruba.it │ ├── www.arubanetworks.com │ ├── www.arvest.com │ ├── www.asb.co.nz │ ├── www.ascd.org │ ├── www.asda.com │ ├── www.asexstories.com │ ├── www.ashford.edu │ ├── www.ashleyfurniture.com │ ├── www.asiaone.com │ ├── www.asics.com │ ├── www.ask.com │ ├── www.askmen.com │ ├── www.asm.org │ ├── www.asrock.com │ ├── www.associatedbank.com │ ├── www.asstr.org │ ├── www.astm.org │ ├── www.astro.com │ ├── www.astrology-zodiac-signs.com │ ├── www.astrology.com │ ├── www.astrosage.com │ ├── www.asu.edu │ ├── www.asus.com │ ├── www.asx.com.au │ ├── www.atlasobscura.com │ ├── www.ato.gov.au │ ├── www.att.com │ ├── www.au.dk │ ├── www.auburn.edu │ ├── www.auckland.ac.nz │ ├── www.audi.com │ ├── www.audible.com │ ├── www.audiobooks.com │ ├── www.audionetwork.com_443 │ ├── www.audiusa.com │ ├── www.austincc.edu │ ├── www.austrian.com │ ├── www.auth.gr │ ├── www.authorize.net │ ├── www.authorstream.com │ ├── www.autoblog.com_443 │ ├── www.autocarindia.com │ ├── www.autodesk.com │ ├── www.autoevolution.com │ ├── www.autoexpress.co.uk │ ├── www.autohotkey.com │ ├── www.automation.siemens.com │ ├── www.autonews.com │ ├── www.autosport.com │ ├── www.autotrader.co.uk │ ├── www.autotrader.com │ ├── www.availity.com │ ├── www.avalara.com │ ├── www.avast.com │ ├── www.avature.net │ ├── www.avaya.com │ ├── www.avclub.com │ ├── www.aventertainments.com │ ├── www.avery.com │ ├── www.avforums.com │ ├── www.avg.com │ ├── www.avianca.com │ ├── www.avira.com │ ├── www.avis.com │ ├── www.avsforum.com │ ├── www.avvo.com │ ├── www.aweber.com │ ├── www.awwwards.com │ ├── www.axisbank.com │ ├── www.azal.az │ ├── www.azcentral.com │ ├── www.b92.net │ ├── www.babylon.com │ ├── www.babynamewizard.com │ ├── www.babypips.com │ ├── www.backblaze.com │ ├── www.backcountry.com │ ├── www.backstage.com │ ├── www.bakuelectronics.az │ ├── www.balenciaga.com │ ├── www.baltimoresun.com │ ├── www.bankaustria.at │ ├── www.bankofamerica.com │ ├── www.bankofindia.co.in │ ├── www.bankofscotland.co.uk │ ├── www.bankofthewest.com │ ├── www.bankrate.com │ ├── www.bankwest.com.au │ ├── www.barbie.com │ ├── www.barchart.com │ ├── www.barclays.co.uk │ ├── www.barnesandnoble.com │ ├── www.barracuda.com │ ├── www.barrons.com │ ├── www.bartleby.com │ ├── www.base64decode.org │ ├── www.baseball-reference.com │ ├── www.basketball-reference.com │ ├── www.basspro.com │ ├── www.bathandbodyworks.com │ ├── www.bayareafastrak.org │ ├── www.baylor.edu │ ├── www.bbc.co.uk │ ├── www.bbt.com │ ├── www.bcb.gov.br │ ├── www.bcci.tv │ ├── www.bcg.com │ ├── www.bcit.ca │ ├── www.bcm.edu │ ├── www.bdjobs.com │ ├── www.beachbody.com │ ├── www.beautybay.com │ ├── www.becker.com │ ├── www.becu.org │ ├── www.bedbathandbeyond.com │ ├── www.beeradvocate.com │ ├── www.behance.net │ ├── www.behindthename.com │ ├── www.behindthevoiceactors.com │ ├── www.behindwoods.com │ ├── www.belgium.be │ ├── www.belk.com │ ├── www.belloflostsouls.net │ ├── www.benaughty.com │ ├── www.benchmarkemail.com │ ├── www.bendigobank.com.au │ ├── www.benq.com │ ├── www.bentley.com │ ├── www.bergdorfgoodman.com │ ├── www.berkeley.edu │ ├── www.berklee.edu │ ├── www.berlin.de │ ├── www.bestbuy.ca │ ├── www.bestbuy.com │ ├── www.bet.com │ ├── www.bet365.com │ ├── www.betfair.com │ ├── www.betterhealth.vic.gov.au │ ├── www.bettycrocker.com │ ├── www.bfi.org.uk │ ├── www.bg.ac.rs │ ├── www.bgsu.edu │ ├── www.bharatmatrimony.com │ ├── www.bhg.com │ ├── www.bhphotovideo.com │ ├── www.biblegateway.com │ ├── www.bibleserver.com │ ├── www.biblestudytools.com │ ├── www.bicycling.com │ ├── www.bidorbuy.co.za │ ├── www.bigcommerce.com │ ├── www.biggerpockets.com │ ├── www.biglots.com │ ├── www.bigstockphoto.com │ ├── www.bikeforums.net │ ├── www.bikeradar.com │ ├── www.bill.com │ ├── www.billboard.com │ ├── www.bim.com.tr │ ├── www.bing.com │ ├── www.binsearch.info │ ├── www.binus.ac.id │ ├── www.biography.com │ ├── www.biomedcentral.com │ ├── www.birmingham.ac.uk │ ├── www.birminghammail.co.uk │ ├── www.bitcoin.com │ ├── www.bitcomet.com │ ├── www.bitdefender.com │ ├── www.bitrix24.com │ ├── www.bits-pilani.ac.in │ ├── www.bitstamp.net │ ├── www.bittorrent.com │ ├── www.bizcommunity.com │ ├── www.bjs.com │ ├── www.bjtu.edu.cn │ ├── www.bk.com │ ├── www.bkstr.com │ ├── www.bl.uk │ ├── www.blackbaud.com │ ├── www.blackberry.com │ ├── www.blackboard.com │ ├── www.blackrock.com │ ├── www.blastingnews.com │ ├── www.bleacherreport.com │ ├── www.blender.org │ ├── www.blizzard.com │ ├── www.blogger.com │ ├── www.blogto.com │ ├── www.bloodyelbow.com │ ├── www.bloomberg.com │ ├── www.bloombergquint.com │ ├── www.bls.gov │ ├── www.blu-ray.com │ ├── www.bluedart.com │ ├── www.blueletterbible.org │ ├── www.bluenile.com │ ├── www.bluestacks.com │ ├── www.blurb.com │ ├── www.bmc.com │ ├── www.bme.hu │ ├── www.bmf.gv.at_443 │ ├── www.bmfbovespa.com.br │ ├── www.bmj.com │ ├── www.bmo.com │ ├── www.bmoharris.com │ ├── www.bmstu.ru │ ├── www.bmwusa.com │ ├── www.bnf.fr │ ├── www.bni.co.id │ ├── www.bobvila.com │ ├── www.bodybuilding.com │ ├── www.boeing.com │ ├── www.bollywoodhungama.com │ ├── www.bom.gov.au │ ├── www.bonappetit.com │ ├── www.bookbub.com │ ├── www.booking.com │ ├── www.booktopia.com.au │ ├── www.boostmobile.com │ ├── www.boots.com │ ├── www.bose.com │ ├── www.boston.com │ ├── www.bostonglobe.com │ ├── www.bostonherald.com │ ├── www.bou.edu.bd │ ├── www.box.com │ ├── www.boxofficemojo.com │ ├── www.bp.com │ ├── www.bplans.com │ ├── www.bpost.be │ ├── www.brainpop.com │ ├── www.brainyquote.com │ ├── www.bravotv.com │ ├── www.breakingnews.ie │ ├── www.bricklink.com │ ├── www.brides.com │ ├── www.bridgebase.com │ ├── www.brightcove.com │ ├── www.bris.ac.uk │ ├── www.britannica.com │ ├── www.britishairways.com │ ├── www.broadcom.com_443 │ ├── www.broadway.com │ ├── www.broadwayworld.com │ ├── www.brookings.edu │ ├── www.brooksbrothers.com │ ├── www.brothersoft.com │ ├── www.brown.edu │ ├── www.brownells.com │ ├── www.brownsfashion.com │ ├── www.browserling.com │ ├── www.brusselsairlines.com │ ├── www.bseindia.com │ ├── www.bsi.ir │ ├── www.bsp.com.pg │ ├── www.bsu.by │ ├── www.bt.com │ ├── www.bu.edu │ ├── www.buffalo.edu │ ├── www.bukkit.org │ ├── www.bund.de │ ├── www.bungie.net │ ├── www.bunnings.com.au │ ├── www.business-standard.com │ ├── www.business.com │ ├── www.business.gov.au │ ├── www.businessinsider.com │ ├── www.businesswire.com │ ├── www.buybuybaby.com │ ├── www.bvg.de │ ├── www.byu.edu │ ├── www.c-sharpcorner.com │ ├── www.c-span.org │ ├── www.ca.com │ ├── www.ca.gov │ ├── www.cableone.net │ ├── www.cac.gov.ng │ ├── www.caclubindia.com │ ├── www.caesars.com │ ├── www.cafemom.com │ ├── www.cafepress.com │ ├── www.calculatorsoup.com │ ├── www.calendarlabs.com │ ├── www.callofduty.com │ ├── www.calottery.com │ ├── www.calpoly.edu │ ├── www.caltech.edu_443 │ ├── www.cam.ac.uk │ ├── www.cam4.com │ ├── www.cambridge.org │ ├── www.cambridgeenglish.org │ ├── www.campaignmonitor.com │ ├── www.campingworld.com │ ├── www.canadacomputers.com │ ├── www.canadavisa.com │ ├── www.canadiantire.ca │ ├── www.canarabank.in │ ├── www.cancer.gov │ ├── www.cancerresearchuk.org │ ├── www.canon.com │ ├── www.canstockphoto.com │ ├── www.capcom.com │ ├── www.capgemini.com │ ├── www.capitalone.com │ ├── www.caracol.com.co │ ├── www.caradvice.com.au │ ├── www.caranddriver.com │ ├── www.carcomplaints.com │ ├── www.cardkingdom.com │ ├── www.care.com │ ├── www.care2.com │ ├── www.carecredit.com │ ├── www.careerbuilder.com │ ├── www.careerjunction.co.za │ ├── www.careerlauncher.com │ ├── www.cargurus.com │ ├── www.carid.com │ ├── www.caringbridge.org │ ├── www.carleton.edu │ ├── www.carnival.com │ ├── www.carnoc.com │ ├── www.carphonewarehouse.com │ ├── www.carsales.com.au │ ├── www.carscoops.com │ ├── www.carsguide.com.au │ ├── www.carters.com │ ├── www.cartier.com │ ├── www.carwale.com │ ├── www.cas.org │ ├── www.cat.com │ ├── www.cathaypacific.com │ ├── www.caughtoffside.com │ ├── www.cbc.ca │ ├── www.cbn.com │ ├── www.cbp.gov │ ├── www.cbr.com │ ├── www.cbs.com │ ├── www.cbsnews.com │ ├── www.cbssports.com │ ├── www.cbtnuggets.com │ ├── www.cc.com │ ├── www.cccs.edu │ ├── www.cdc.gov │ ├── www.cdiscount.com │ ├── www.cdjapan.co.jp │ ├── www.cdw.com │ ├── www.cedefop.europa.eu │ ├── www.celcom.com.my │ ├── www.celine.com │ ├── www.cell.com │ ├── www.celtx.com │ ├── www.cengage.com │ ├── www.census.gov │ ├── www.centerpointenergy.com │ ├── www.centos.org │ ├── www.centurylink.com │ ├── www.cerner.com │ ├── www.ceskatelevize.cz │ ├── www.cfainstitute.org │ ├── www.cfr.org │ ├── www.cgg.gov.in │ ├── www.chainreactioncycles.com │ ├── www.chanel.com │ ├── www.change.org │ ├── www.channel4.com │ ├── www.channelfireball.com │ ├── www.channelstv.com │ ├── www.chapters.indigo.ca │ ├── www.charlotteobserver.com │ ├── www.chase.com │ ├── www.chat-avenue.com │ ├── www.cheaperthandirt.com │ ├── www.cheaptickets.com │ ├── www.cheathappens.com │ ├── www.checkpoint.com │ ├── www.chelseafc.com │ ├── www.chess.com │ ├── www.chessbomb.com │ ├── www.chessgames.com │ ├── www.chevrolet.com │ ├── www.chevron.com │ ├── www.chicagotribune.com │ ├── www.chick-fil-a.com_443 │ ├── www.childrensalon.com │ ├── www.childrensplace.com │ ├── www.chilis.com │ ├── www.china-airlines.com │ ├── www.chinadaily.com.cn │ ├── www.chinahighlights.com │ ├── www.chipotle.com │ ├── www.choicehotels.com │ ├── www.chowhound.com │ ├── www.christianitytoday.com │ ├── www.christianlouboutin.com │ ├── www.christianpost.com │ ├── www.christies.com │ ├── www.chromium.org │ ├── www.chron.com │ ├── www.chroniclelive.co.uk │ ├── www.chrono24.com │ ├── www.chrysler.com │ ├── www.chula.ac.th │ ├── www.cia.gov │ ├── www.cibc.com │ ├── www.cibeg.com │ ├── www.cic.gc.ca │ ├── www.cigna.com │ ├── www.cinemablend.com │ ├── www.cineworld.co.uk │ ├── www.cio.com │ ├── www.cisco.com │ ├── www.cision.com │ ├── www.citationmachine.net │ ├── www.citehr.com │ ├── www.citibank.com.hk │ ├── www.citibank.com.sg │ ├── www.citizensbank.com │ ├── www.citrix.com │ ├── www.cityu.edu.hk │ ├── www.civfanatics.com │ ├── www.civilserviceindia.com │ ├── www.cj.com │ ├── www.clalit.co.il │ ├── www.classicfm.com │ ├── www.classifiedads.com │ ├── www.classlink.com_443 │ ├── www.classmates.com │ ├── www.cleancss.com │ ├── www.clemson.edu │ ├── www.cleveland.com │ ├── www.clickbank.com │ ├── www.clickindia.com │ ├── www.clickondetroit.com │ ├── www.cliphunter.com │ ├── www.clips4sale.com │ ├── www.cloudways.com │ ├── www.clublexus.com │ ├── www.cma-cgm.com │ ├── www.cmegroup.com │ ├── www.cms.gov │ ├── www.cmu.edu │ ├── www.cna.com.tw │ ├── www.cnbc.com │ ├── www.cnet.com │ ├── www.cnrs.fr │ ├── www.cntraveler.com │ ├── www.cnu.edu.cn │ ├── www.co-operativebank.co.uk │ ├── www.cochrane.org │ ├── www.codeblocks.org │ ├── www.codecademy.com │ ├── www.codeguru.com │ ├── www.codeproject.com │ ├── www.coe.int │ ├── www.cognizant.com │ ├── www.coldwellbanker.com │ ├── www.coldwellbankerhomes.com │ ├── www.colehaan.com │ ├── www.coles.com.au │ ├── www.colgate.com │ ├── www.collarspace.com │ ├── www.collegeboard.org │ ├── www.collegeconfidential.com │ ├── www.collegenet.com │ ├── www.colorado.edu │ ├── www.colostate.edu │ ├── www.columbia.com │ ├── www.columbia.edu │ ├── www.comicbookmovie.com │ ├── www.comingsoon.net │ ├── www.commbank.com.au │ ├── www.commercebank.com │ ├── www.commondreams.org │ ├── www.commonsensemedia.org │ ├── www.companieshouse.gov.uk │ ├── www.complex.com │ ├── www.comptia.org │ ├── www.computerhope.com │ ├── www.computerworld.com │ ├── www.comsol.com │ ├── www.concordia.ca │ ├── www.concur.com │ ├── www.condor.com │ ├── www.conduit.com │ ├── www.confirmit.com │ ├── www.confused.com │ ├── www.congress.gov │ ├── www.constantcontact.com │ ├── www.consumeraffairs.com │ ├── www.consumercomplaints.in │ ├── www.consumerreports.org │ ├── www.consumersearch.com │ ├── www.containerstore.com │ ├── www.converse.com │ ├── www.convertfiles.com │ ├── www.cookinglight.com │ ├── www.coolermaster.com_443 │ ├── www.copaair.com │ ├── www.copart.com │ ├── www.copyright.com │ ├── www.corel.com │ ├── www.cornell.edu │ ├── www.correios.com.br │ ├── www.cosmopolitan.com │ ├── www.couchsurfing.com │ ├── www.countryliving.com │ ├── www.courant.com │ ├── www.couriermail.com.au │ ├── www.coursera.org │ ├── www.courts.ca.gov │ ├── www.courts.state.ny.us │ ├── www.covers.com │ ├── www.cox.com │ ├── www.cp.pt │ ├── www.cp24.com │ ├── www.cpanel.net │ ├── www.cplusplus.com │ ├── www.cra-arc.gc.ca │ ├── www.cracked.com │ ├── www.cratejoy.com │ ├── www.creativecow.net │ ├── www.credit-suisse.com │ ├── www.creditonebank.com │ ├── www.crestron.com │ ├── www.cricbuzz.com │ ├── www.cricket.com.au │ ├── www.cricketwireless.com │ ├── www.crooksandliars.com │ ├── www.crosswalk.com │ ├── www.crucial.com │ ├── www.cruisecritic.com_443 │ ├── www.crutchfield.com │ ├── www.cs.cmu.edu │ ├── www.cs.princeton.edu │ ├── www.cs.washington.edu │ ├── www.csail.mit.edu │ ├── www.csu.edu.au │ ├── www.csulb.edu │ ├── www.csun.edu │ ├── www.csus.edu │ ├── www.ct.gov │ ├── www.ctvnews.ca │ ├── www.cuni.cz │ ├── www.cuny.edu │ ├── www.currys.co.uk │ ├── www.curtin.edu.au │ ├── www.customink.com │ ├── www.customs.go.kr │ ├── www.cv-library.co.uk │ ├── www.cvent.com │ ├── www.cvs.com │ ├── www.cvut.cz │ ├── www.cyberciti.biz │ ├── www.cyberlink.com │ ├── www.cybersource.com │ ├── www.cyclingnews.com │ ├── www.dafont.com │ ├── www.dailyfx.com │ ├── www.dailykos.com │ ├── www.dailynayadiganta.com │ ├── www.dailystar.co.uk │ ├── www.dailytelegraph.com.au │ ├── www.daimler.com │ ├── www.dal.ca │ ├── www.dallasnews.com │ ├── www.daniweb.com │ ├── www.dartmouth.edu │ ├── www.datasciencecentral.com │ ├── www.daveramsey.com │ ├── www.davidsbridal.com │ ├── www.dazeddigital.com │ ├── www.db.com │ ├── www.dbs.com │ ├── www.dc.gov │ ├── www.dcccd.edu │ ├── www.deakin.edu.au │ ├── www.debenhams.com │ ├── www.decathlon.co.uk │ ├── www.deccanchronicle.com │ ├── www.deccanherald.com │ ├── www.deere.com │ ├── www.deezer.com │ ├── www.defense.gov │ ├── www.definitions.net │ ├── www.dell.com │ ├── www.delta.com │ ├── www.deluxe.com │ ├── www.democraticunderground.com │ ├── www.denverpost.com │ ├── www.depaul.edu │ ├── www.deped.gov.ph │ ├── www.deseretnews.com │ ├── www.designboom.com │ ├── www.designcrowd.com │ ├── www.desiringgod.org │ ├── www.desjardins.com │ ├── www.desmos.com │ ├── www.detroitnews.com │ ├── www.deu.edu.tr │ ├── www.devexpress.com │ ├── www.deviantart.com │ ├── www.dezeen.com │ ├── www.dhl.com │ ├── www.dhs.gov │ ├── www.di.fm │ ├── www.dialog.lk │ ├── www.dice.com │ ├── www.dickblick.com │ ├── www.dickssportinggoods.com │ ├── www.dict.cc │ ├── www.digicert.com │ ├── www.digikey.com │ ├── www.digit.in │ ├── www.digitalocean.com │ ├── www.digitalriver.com │ ├── www.digitaltrends.com │ ├── www.dignitymemorial.com │ ├── www.dillards.com │ ├── www.dior.com │ ├── www.diplomatie.gouv.fr │ ├── www.directhit.com │ ├── www.directindustry.com │ ├── www.directv.com │ ├── www.discogs.com │ ├── www.discover.com │ ├── www.discovery.com │ ├── www.dish.com │ ├── www.dishtv.in │ ├── www.disney.com │ ├── www.disneylandparis.com │ ├── www.diy.com │ ├── www.diynetwork.com │ ├── www.diyphotography.net │ ├── www.djangoproject.com │ ├── www.dlink.com │ ├── www.dll-files.com │ ├── www.dmdc.osd.mil │ ├── www.dmv.ca.gov │ ├── www.doaj.org │ ├── www.docusign.com │ ├── www.dodge.com │ ├── www.dofus.com │ ├── www.dogpile.com │ ├── www.dol.gov │ ├── www.dol.wa.gov │ ├── www.dollarshaveclub.com │ ├── www.dollartree.com │ ├── www.dollskill.com │ ├── www.domain.com │ ├── www.domaintools.com │ ├── www.dominos.com │ ├── www.dominos.com.au │ ├── www.domo.com │ ├── www.dowjones.com │ ├── www.downloadhelper.net │ ├── www.doximity.com │ ├── www.dpd.com │ ├── www.dpe.gov.bd │ ├── www.dpm.org.cn │ ├── www.dpreview.com_443 │ ├── www.dreamhost.com │ ├── www.dreamstime.com │ ├── www.drexel.edu │ ├── www.drivereasy.com │ ├── www.driverguide.com │ ├── www.driveridentifier.com │ ├── www.drivespark.com │ ├── www.dropbox.com │ ├── www.drugabuse.gov │ ├── www.drugs.com │ ├── www.drupal.org │ ├── www.dslreports.com │ ├── www.dstv.com │ ├── www.dteenergy.com │ ├── www.dtu.dk │ ├── www.du.edu │ ├── www.duke-energy.com │ ├── www.duluthtrading.com │ ├── www.dunelm.com │ ├── www.duolingo.com │ ├── www.duplichecker.com │ ├── www.dur.ac.uk │ ├── www.dvdvideosoft.com │ ├── www.dw.com │ ├── www.dwd.de │ ├── www.dwell.com │ ├── www.dx.com │ ├── www.dxc.technology │ ├── www.e3expo.com │ ├── www.ea.com │ ├── www.earthcam.com │ ├── www.earthlink.net │ ├── www.easeus.com │ ├── www.easports.com │ ├── www.easybib.com │ ├── www.easyjet.com │ ├── www.easynews.com │ ├── www.eater.com │ ├── www.eaton.com │ ├── www.ebags.com │ ├── www.ebaumsworld.com │ ├── www.ebay.ca │ ├── www.ebay.co.uk │ ├── www.ebay.com │ ├── www.ebay.com.au │ ├── www.ebay.ie │ ├── www.ebi.ac.uk │ ├── www.ebs.in │ ├── www.ebscohost.com │ ├── www.ebuyer.com │ ├── www.ecenglish.com │ ├── www.echinacities.com │ ├── www.eclipse.org │ ├── www.economie.gouv.fr │ ├── www.economist.com │ ├── www.ecu.edu.au │ ├── www.ecwid.com │ ├── www.ed.ac.uk │ ├── www.ed.gov │ ├── www.edaboard.com │ ├── www.edb.gov.hk │ ├── www.edd.ca.gov │ ├── www.eddiebauer.com │ ├── www.edrawsoft.com │ ├── www.edu.gov.az │ ├── www.edutopia.org │ ├── www.edweek.org │ ├── www.edx.org │ ├── www.eecs.berkeley.edu │ ├── www.eff.org │ ├── www.efirstbank.com │ ├── www.ego4u.com │ ├── www.egotastic.com │ ├── www.ehu.eus │ ├── www.eia.gov │ ├── www.elderscrollsonline.com │ ├── www.elegantthemes.com │ ├── www.eliteprospects.com │ ├── www.elitepvpers.com │ ├── www.elkhabar.com │ ├── www.elle.com │ ├── www.elsevier.com │ ├── www.elte.hu │ ├── www.embarcadero.com │ ├── www.emc.com │ ├── www.emerald.com_443 │ ├── www.emirates.com │ ├── www.emory.edu │ ├── www.emulator-zone.com │ ├── www.emuparadise.me │ ├── www.encyclopedia.com │ ├── www.endnote.com │ ├── www.energy.gov │ ├── www.eng-tips.com │ ├── www.engadget.com │ ├── www.englishgrammar.org │ ├── www.enterprise.com │ ├── www.entrepreneur.com │ ├── www.enworld.org │ ├── www.eonline.com │ ├── www.epa.gov │ ├── www.epfl.ch │ ├── www.epicgames.com │ ├── www.epicurious.com │ ├── www.epochconverter.com │ ├── www.epsilon.com │ ├── www.equifax.com │ ├── www.equifaxworkforce.com │ ├── www.equityapartments.com │ ├── www.ericsson.com │ ├── www.err.ee │ ├── www.esa.int │ ├── www.eset.com │ ├── www.esic.nic.in │ ├── www.eslkidstuff.com │ ├── www.espn.com │ ├── www.espncricinfo.com │ ├── www.esquire.com │ ├── www.esri.com │ ├── www.essence.com │ ├── www.estatesales.net │ ├── www.esteri.it │ ├── www.esurance.com │ ├── www.ethiopianairlines.com │ ├── www.etonline.com │ ├── www.etrailer.com │ ├── www.ets.org │ ├── www.etsy.com │ ├── www.etymonline.com │ ├── www.eurail.com │ ├── www.eurekalert.org │ ├── www.eurobank.gr │ ├── www.eurogamer.net │ ├── www.euronews.com │ ├── www.europarl.europa.eu │ ├── www.eurosport.com │ ├── www.eurostar.com │ ├── www.evaair.com │ ├── www.evanscycles.com │ ├── www.eventbrite.com │ ├── www.eventhubs.com │ ├── www.eveonline.com │ ├── www.evite.com │ ├── www.evo.com │ ├── www.ewg.org │ ├── www.examenglish.com │ ├── www.excel-easy.com │ ├── www.exeter.ac.uk │ ├── www.exlibrisgroup.com │ ├── www.expat.com │ ├── www.expatica.com │ ├── www.expatriates.com │ ├── www.expedia.ca │ ├── www.expedia.co.uk │ ├── www.expedia.com │ ├── www.experian.com │ ├── www.experts-exchange.com │ ├── www.exportersindia.com │ ├── www.express-scripts.com │ ├── www.express.co.uk │ ├── www.extremetech.com_443 │ ├── www.eyebuydirect.com │ ├── www.ezvid.com │ ├── www.f-list.net │ ├── www.f4wonline.com │ ├── www.fa.ru │ ├── www.faa.gov │ ├── www.fabswingers.com │ ├── www.facebook.com │ ├── www.fairmont.com │ ├── www.fakku.net │ ├── www.familyfriendpoems.com │ ├── www.familysearch.org │ ├── www.familytreedna.com_443 │ ├── www.famousbirthdays.com │ ├── www.fanatics.com │ ├── www.fandango.com │ ├── www.fandom.com │ ├── www.fanfiction.net │ ├── www.fangraphs.com │ ├── www.fao.org │ ├── www.faradars.org │ ├── www.fark.com │ ├── www.farmers.com │ ├── www.farnell.com │ ├── www.fas.harvard.edu │ ├── www.fastcompany.com │ ├── www.fastweb.com │ ├── www.fau.edu │ ├── www.fbi.gov │ ├── www.fcbarcelona.com │ ├── www.fcc.gov │ ├── www.fda.gov │ ├── www.federalbank.co.in │ ├── www.fedex.com │ ├── www.feedly.com │ ├── www.fema.gov │ ├── www.femjoy.com │ ├── www.fender.com │ ├── www.ffonts.net │ ├── www.fidelity.com │ ├── www.fido.ca │ ├── www.fifa.com │ ├── www.filefactory.com │ ├── www.filemaker.com │ ├── www.fileplanet.com │ ├── www.filgoal.com │ ├── www.filmaffinity.com │ ├── www.financialexpress.com │ ├── www.findagrave.com │ ├── www.findaphd.com │ ├── www.findlaw.com │ ├── www.fingerhut.com │ ├── www.finnair.com │ ├── www.firstbanknigeria.com_443 │ ├── www.firstcitizens.com │ ├── www.firstdata.com │ ├── www.firstenergycorp.com │ ├── www.firsttennessee.com │ ├── www.fisglobal.com │ ├── www.fiu.edu │ ├── www.fiverr.com │ ├── www.fixya.com │ ├── www.flagstar.com │ ├── www.flamingtext.com │ ├── www.flashscore.com │ ├── www.flaticon.com │ ├── www.fleshlight.com │ ├── www.flexjobs.com │ ├── www.flhsmv.gov │ ├── www.flickr.com │ ├── www.flighthub.com │ ├── www.flightnetwork.com │ ├── www.flightradar24.com │ ├── www.flightstats.com │ ├── www.flinders.edu.au │ ├── www.fling.com │ ├── www.flipkart.com │ ├── www.flirt4free.com │ ├── www.flyfrontier.com │ ├── www.flynas.com │ ├── www.flyordie.com │ ├── www.flypgs.com │ ├── www.flysat.com │ ├── www.flytap.com │ ├── www.flyuia.com │ ├── www.fmcsa.dot.gov │ ├── www.fmprc.gov.cn │ ├── www.fnb.co.za │ ├── www.fnp.com │ ├── www.fodors.com │ ├── www.fontspring.com │ ├── www.fontsquirrel.com │ ├── www.foodandwine.com │ ├── www.foodnetwork.com │ ├── www.fool.com │ ├── www.football-italia.net │ ├── www.football365.com │ ├── www.forbes.com │ ├── www.ford.com │ ├── www.fordham.edu │ ├── www.foreca.com │ ├── www.foreignaffairs.com │ ├── www.foreignpolicy.com │ ├── www.forever21.com │ ├── www.forexfactory.com │ ├── www.formsite.com │ ├── www.formula1.com │ ├── www.fortinet.com │ ├── www.forzieri.com │ ├── www.fossil.com │ ├── www.fotolia.com │ ├── www.fourseasons.com │ ├── www.fox.com │ ├── www.foxbusiness.com │ ├── www.foxitsoftware.com │ ├── www.foxnews.com │ ├── www.foxsports.com │ ├── www.foxsports.com.au │ ├── www.foxsportsasia.com │ ├── www.foxtel.com.au │ ├── www.fpl.com │ ├── www.fragrancenet.com │ ├── www.fragrantica.com │ ├── www.france24.com │ ├── www.fraunhofer.de │ ├── www.free-fonts.com │ ├── www.free-power-point-templates.com │ ├── www.free-scores.com │ ├── www.freeconferencecall.com │ ├── www.freecycle.org │ ├── www.freedownloadmanager.org │ ├── www.freeimages.com │ ├── www.freelancer.com │ ├── www.freeones.com │ ├── www.freep.com │ ├── www.freepatentsonline.com │ ├── www.freepdfconvert.com │ ├── www.freepeople.com │ ├── www.freerepublic.com │ ├── www.freshbooks.com │ ├── www.frontier.co.uk │ ├── www.frontiersin.org │ ├── www.frys.com │ ├── www.fsu.edu │ ├── www.ft.com │ ├── www.ftb.ca.gov │ ├── www.ftc.gov │ ├── www.fullsail.edu │ ├── www.funtrivia.com │ ├── www.furaffinity.net │ ├── www.fux.com │ ├── www.fxnetworks.com │ ├── www.fxstreet.com │ ├── www.gaiaonline.com │ ├── www.gallup.com │ ├── www.gamasutra.com │ ├── www.game-debate.com │ ├── www.game.co.uk │ ├── www.game.co.za │ ├── www.gamedesign.jp │ ├── www.gameinformer.com │ ├── www.gamerevolution.com │ ├── www.gameskinny.com │ ├── www.gamespot.com │ ├── www.gamesradar.com │ ├── www.ganeshaspeaks.com │ ├── www.gap.com │ ├── www.gardeningknowhow.com │ ├── www.garmin.com │ ├── www.gartner.com │ ├── www.garuda-indonesia.com │ ├── www.gatech.edu │ ├── www.gcu.edu │ ├── www.ge.com │ ├── www.geappliances.com │ ├── www.gear4music.com │ ├── www.gearboxsoftware.com │ ├── www.gearslutz.com │ ├── www.geek.com │ ├── www.geico.com │ ├── www.geneanet.org │ ├── www.geni.com │ ├── www.geocaching.com │ ├── www.geogebra.org │ ├── www.georgetown.edu │ ├── www.getharvest.com │ ├── www.getresponse.com │ ├── www.gettyimages.in │ ├── www.gfk.com │ ├── www.ghanamotion.com │ ├── www.ghanaweb.com │ ├── www.giant-bicycles.com │ ├── www.giantbomb.com │ ├── www.giantitp.com │ ├── www.gingersoftware.com │ ├── www.girlsaskguys.com │ ├── www.girlsgogames.com │ ├── www.givemesport.com │ ├── www.givenchy.com │ ├── www.gizbot.com │ ├── www.gizmochina.com │ ├── www.gizmodo.com │ ├── www.gks.ru │ ├── www.gla.ac.uk │ ├── www.glamour.com │ ├── www.glassesusa.com │ ├── www.glastonburyfestivals.co.uk │ ├── www.globalindustrial.com │ ├── www.globalspec.com │ ├── www.globaltestmarket.com │ ├── www.globaltimes.cn │ ├── www.globe.com.ph │ ├── www.globes.co.il │ ├── www.gm.com │ ├── www.gmu.edu │ ├── www.gmx.com │ ├── www.gnc.com │ ├── www.gnome.org │ ├── www.gnu.org │ ├── www.goabroad.com │ ├── www.goair.in │ ├── www.goal.com │ ├── www.gobizkorea.com │ ├── www.gocomics.com │ ├── www.goethe.de │ ├── www.golden1.com │ ├── www.goldmansachs.com │ ├── www.golf.com │ ├── www.golfchannel.com │ ├── www.golfdigest.com │ ├── www.golfgalaxy.com │ ├── www.golfnow.com │ ├── www.goodhousekeeping.com │ ├── www.goodreads.com │ ├── www.goodrx.com │ ├── www.google.co.in │ ├── www.google.co.nz │ ├── www.google.co.uk │ ├── www.google.com │ ├── www.google.com.au │ ├── www.google.ie │ ├── www.google.lu │ ├── www.google.mu │ ├── www.google.org │ ├── www.google.rs │ ├── www.gosugamers.net │ ├── www.gotomypc.com │ ├── www.gotquestions.org │ ├── www.gottabemobile.com │ ├── www.gov.hk │ ├── www.gov.il │ ├── www.gov.uk │ ├── www.gov.uz │ ├── www.governmentjobs.com │ ├── www.gq-magazine.co.uk │ ├── www.gq.com │ ├── www.grab.com │ ├── www.grainger.com │ ├── www.grameenphone.com │ ├── www.grammarcheck.net │ ├── www.graphpad.com │ ├── www.grasscity.com │ ├── www.gravatar.com │ ├── www.graysonline.com │ ├── www.greatandhra.com │ ├── www.greatschools.org │ ├── www.greenmangaming.com │ ├── www.greenpeace.org │ ├── www.greetingsisland.com │ ├── www.greyhound.com │ ├── www.griffith.edu.au │ ├── www.groupon.com │ ├── www.grubhub.com │ ├── www.gsa.gov │ ├── www.gsis.gr │ ├── www.gsmarena.com │ ├── www.gsu.edu │ ├── www.gtaforums.com │ ├── www.gtbank.com │ ├── www.guanajuato.gob.mx │ ├── www.guinnessworldrecords.com │ ├── www.guitarworld.com │ ├── www.gulf-times.com │ ├── www.gulfair.com │ ├── www.gumtree.com │ ├── www.gunbroker.com │ ├── www.guru.com │ ├── www.guru3d.com │ ├── www.gutenberg.org │ ├── www.gva.es │ ├── www.gvsu.edu │ ├── www.gwu.edu │ ├── www.h3c.com │ ├── www.haaretz.com │ ├── www.hacettepe.edu.tr │ ├── www.hackaday.com │ ├── www.hagerty.com │ ├── www.haier.com │ ├── www.halfords.com │ ├── www.halifax.co.uk │ ├── www.halooglasi.com │ ├── www.hamburg.de │ ├── www.handycafe.com │ ├── www.hangseng.com │ ├── www.har.com │ ├── www.harborfreight.com │ ├── www.harley-davidson.com │ ├── www.harpersbazaar.com │ ├── www.harvard.edu │ ├── www.harveynorman.com.au │ ├── www.hasbro.com │ ├── www.hattrick.org │ ├── www.hawaii.edu │ ├── www.hawaiianairlines.com │ ├── www.hbo.com │ ├── www.hbs.edu │ ├── www.hdfc.com │ ├── www.hdfcbank.com │ ├── www.hdfclife.com │ ├── www.hdfcsec.com │ ├── www.head-fi.org │ ├── www.headspace.com │ ├── www.health.com │ ├── www.health.nsw.gov.au │ ├── www.healthcare.gov │ ├── www.healthcaresource.com │ ├── www.healthgrades.com │ ├── www.healthstream.com │ ├── www.heart.org │ ├── www.heb.com │ ├── www.hellomagazine.com │ ├── www.helsinki.fi │ ├── www.hemmings.com │ ├── www.hentai-foundry.com │ ├── www.heraldsun.com.au │ ├── www.here.com │ ├── www.heritage.org │ ├── www.hermes.com │ ├── www.herold.at │ ├── www.hertz.com │ ├── www.hgtv.com │ ├── www.hhs.gov │ ├── www.hi5.com │ ├── www.hidemyass.com │ ├── www.highend3d.com │ ├── www.hightail.com │ ├── www.hilton.com │ ├── www.hindawi.com │ ├── www.hindustantimes.com │ ├── www.hireright.com │ ├── www.hiroshima-u.ac.jp │ ├── www.history.com │ ├── www.hitachi.co.jp │ ├── www.hitbullseye.com │ ├── www.hkbu.edu.hk │ ├── www.hkgolden.com │ ├── www.hktdc.com │ ├── www.hku.hk │ ├── www.hlb.com.my │ ├── www.hlj.com │ ├── www.hm.com │ ├── www.hobbylobby.com │ ├── www.hockey-reference.com │ ├── www.hokudai.ac.jp │ ├── www.hollandandbarrett.com │ ├── www.hollywoodreporter.com │ ├── www.homeaway.com │ ├── www.homebase.co.uk │ ├── www.homedepot.com │ ├── www.homes.com │ ├── www.homestead.com │ ├── www.honda.com │ ├── www.hondafinancialservices.com │ ├── www.honeywell.com │ ├── www.hopkinsmedicine.org │ ├── www.horoscope.com │ ├── www.hostgator.com │ ├── www.hotcelebshome.com │ ├── www.hotnigerianjobs.com │ ├── www.hotspotshield.com │ ├── www.hottopic.com │ ├── www.hotwire.com │ ├── www.houseoffraser.co.uk │ ├── www.howtoforge.com │ ├── www.howtogeek.com │ ├── www.hp.com │ ├── www.hrblock.com │ ├── www.hrs.de_443 │ ├── www.hrt.hr │ ├── www.hrw.org │ ├── www.hsbc.co.uk │ ├── www.hsbc.com │ ├── www.hsbc.com.my │ ├── www.hse.ru │ ├── www.hsn.com │ ├── www.htc.com │ ├── www.hulu.com │ ├── www.humanservices.gov.au │ ├── www.humblebundle.com │ ├── www.husqvarna.com │ ├── www.hy-vee.com │ ├── www.hyatt.com │ ├── www.hyperdia.com │ ├── www.hyundaiusa.com │ ├── www.iaai.com │ ├── www.iaea.org │ ├── www.iafd.com │ ├── www.iastate.edu │ ├── www.iberia.com │ ├── www.ibge.gov.br │ ├── www.ibm.com │ ├── www.ibo.org │ ├── www.ibtimes.com │ ├── www.icann.org │ ├── www.icicibank.com │ ├── www.icicidirect.com │ ├── www.iciciprulife.com │ ├── www.icims.com │ ├── www.iconfinder.com │ ├── www.icontact.com │ ├── www.ics.uci.edu │ ├── www.icy-veins.com │ ├── www.ideacellular.com │ ├── www.idealist.org │ ├── www.idlebrain.com │ ├── www.ieee.org │ ├── www.ielts.org │ ├── www.ieltsessentials.com │ ├── www.ietf.org │ ├── www.ifixit.com │ ├── www.igi-global.com │ ├── www.ignou.ac.in │ ├── www.iherb.com │ ├── www.iinet.net.au │ ├── www.iis.net │ ├── www.iitb.ac.in │ ├── www.iitd.ac.in │ ├── www.ikea.com │ ├── www.ilo.org │ ├── www.ilovepdf.com │ ├── www.imaging-resource.com │ ├── www.imdb.com │ ├── www.imf.org │ ├── www.immigration.govt.nz │ ├── www.immihelp.com │ ├── www.imobiliare.ro │ ├── www.imore.com │ ├── www.imperial.ac.uk │ ├── www.in.gov │ ├── www.inboxdollars.com │ ├── www.inc.com │ ├── www.indeed.ca │ ├── www.indeed.co.uk │ ├── www.indeed.com │ ├── www.independent.ie │ ├── www.india.gov.in │ ├── www.indiainfoline.com │ ├── www.indiamart.com │ ├── www.indiana.edu │ ├── www.indiaresults.com │ ├── www.indiastudychannel.com │ ├── www.indiatimes.com │ ├── www.indiatvnews.com │ ├── www.indiewire.com │ ├── www.indusind.com │ ├── www.indystar.com │ ├── www.infineon.com │ ├── www.info.com │ ├── www.infobel.com │ ├── www.infoplease.com │ ├── www.infor.com │ ├── www.informit.com │ ├── www.infosys.com │ ├── www.infowars.com │ ├── www.infoworld.com │ ├── www.ing.be │ ├── www.ingentaconnect.com │ ├── www.ingrammicro.com │ ├── www.ingress.com │ ├── www.inkfrog.com │ ├── www.inkscape.org │ ├── www.inmotionhosting.com │ ├── www.inn.co.il │ ├── www.inquirer.net │ ├── www.inquisitr.com │ ├── www.insearchoftheworldsmostbeautifulwoman.com │ ├── www.insidehighered.com │ ├── www.insight.com │ ├── www.insperity.com │ ├── www.instructables.com │ ├── www.instyle.com │ ├── www.intel.com │ ├── www.intelius.com │ ├── www.interactivebrokers.com │ ├── www.intercom.com │ ├── www.interieur.gouv.fr │ ├── www.intermedia.net │ ├── www.internetdownloadmanager.com │ ├── www.interpals.net │ ├── www.interserver.net │ ├── www.intuit.com │ ├── www.invaluable.com │ ├── www.investopedia.com │ ├── www.investorplace.com │ ├── www.investors.com │ ├── www.iol.co.za │ ├── www.iop.org │ ├── www.ip2location.com │ ├── www.ipage.com │ ├── www.ipower.com │ ├── www.iproperty.com.my │ ├── www.irantalent.com │ ├── www.irctc.co.in │ ├── www.ird.govt.nz │ ├── www.irib.ir │ ├── www.irishexaminer.com │ ├── www.irishtimes.com │ ├── www.irna.ir │ ├── www.ironman.com │ ├── www.irs.gov │ ├── www.isbank.com.tr │ ├── www.isd.com │ ├── www.islamicfinder.org │ ├── www.iso.org │ ├── www.israelpost.co.il │ ├── www.isro.gov.in │ ├── www.ist.psu.edu_443 │ ├── www.itftennis.com │ ├── www.its.ac.id │ ├── www.itsnicethat.com │ ├── www.ittf.com │ ├── www.itu.edu.tr │ ├── www.itu.int │ ├── www.itv.com │ ├── www.iust.ac.ir │ ├── www.ixigo.com │ ├── www.izotope.com │ ├── www.jacquielawson.com │ ├── www.jal.co.jp │ ├── www.jambase.com │ ├── www.jamendo.com │ ├── www.jamieoliver.com │ ├── www.japan-guide.com │ ├── www.japanesepod101.com_443 │ ├── www.japantimes.co.jp │ ├── www.jasminedirectory.com │ ├── www.java.com │ ├── www.jazztel.com │ ├── www.jbc.org │ ├── www.jbhifi.com.au │ ├── www.jbl.com │ ├── www.jboss.org │ ├── www.jcpenney.com │ ├── www.jcrew.com │ ├── www.jeep.com │ ├── www.jensonusa.com │ ├── www.jetblue.com │ ├── www.jetro.go.jp │ ├── www.jetstar.com │ ├── www.jeunesseglobal.com │ ├── www.jezebel.com │ ├── www.jhu.edu │ ├── www.jigidi.com │ ├── www.jigsawplanet.com │ ├── www.jimmyjohns.com │ ├── www.jma.go.jp │ ├── www.jmi.ac.in │ ├── www.jnj.com │ ├── www.jnu.ac.in │ ├── www.jnu.edu.cn │ ├── www.joann.com │ ├── www.jobs.ac.uk │ ├── www.jobs.net │ ├── www.jobs.nhs.uk │ ├── www.jobstreet.com.my │ ├── www.jobthai.com │ ├── www.johnlewis.com │ ├── www.joomla.org │ ├── www.jorudan.co.jp │ ├── www.journals.uchicago.edu │ ├── www.jove.com │ ├── www.jpc.de │ ├── www.jpl.nasa.gov │ ├── www.jpmorgan.com │ ├── www.jpmorganchase.com │ ├── www.jpost.com │ ├── www.jsonline.com │ ├── www.jst.go.jp │ ├── www.jtb.co.jp │ ├── www.jumia.com.ng │ ├── www.juniper.net │ ├── www.juno.co.uk │ ├── www.juno.com │ ├── www.just-eat.co.uk │ ├── www.justanswer.com │ ├── www.justgiving.com │ ├── www.justice.gov │ ├── www.justinguitar.com │ ├── www.justjared.com │ ├── www.justonecookbook.com │ ├── www.juventus.com │ ├── www.jwpepper.com │ ├── www.k-state.edu │ ├── www.k12.com │ ├── www.k12reader.com │ ├── www.kaist.ac.kr │ ├── www.kali.org │ ├── www.kaltura.com │ ├── www.kansai-u.ac.jp │ ├── www.kansascity.com │ ├── www.karnataka.gov.in │ ├── www.kasikornbank.com │ ├── www.kaspersky.com │ ├── www.kau.edu.sa │ ├── www.kayak.com │ ├── www.kayako.com │ ├── www.kbb.com │ ├── www.kbstar.com │ ├── www.kcl.ac.uk │ ├── www.kddi.com │ ├── www.kdnuggets.com │ ├── www.keenspot.com │ ├── www.keil.com │ ├── www.keio.ac.jp │ ├── www.kela.fi │ ├── www.kennesaw.edu │ ├── www.kent.ac.uk │ ├── www.kent.edu │ ├── www.key.com │ ├── www.khaleejtimes.com │ ├── www.khanacademy.org │ ├── www.kickoff.com │ ├── www.kijiji.ca │ ├── www.kingarthurflour.com │ ├── www.kingcounty.gov │ ├── www.kingjamesbibleonline.org │ ├── www.kingston.com │ ├── www.kinokuniya.co.jp │ ├── www.kiplinger.com │ ├── www.kit.edu │ ├── www.kitco.com │ ├── www.kiwibank.co.nz │ ├── www.klm.com │ ├── www.kmart.com │ ├── www.knou.ac.kr │ ├── www.kobe-u.ac.jp │ ├── www.komando.com │ ├── www.kompass.com │ ├── www.kongregate.com │ ├── www.koreanair.com │ ├── www.koreatimes.co.kr │ ├── www.kotaku.com.au │ ├── www.kqed.org │ ├── www.ksl.com │ ├── www.ktb.co.th │ ├── www.kth.se │ ├── www.ku.edu │ ├── www.kuleuven.be │ ├── www.kvb.co.in │ ├── www.kvraudio.com │ ├── www.kw.com │ ├── www.kw.zain.com │ ├── www.kwsp.gov.my │ ├── www.kxan.com │ ├── www.kyoto-u.ac.jp │ ├── www.labcorp.com │ ├── www.lacity.org │ ├── www.lacounty.gov │ ├── www.lacourt.org │ ├── www.ladwp.com │ ├── www.lamar.edu │ ├── www.lamborghini.com │ ├── www.lampsplus.com │ ├── www.lan.com │ ├── www.lancaster.ac.uk │ ├── www.landrover.com │ ├── www.landsend.com │ ├── www.lanebryant.com │ ├── www.laptopmag.com │ ├── www.last.fm │ ├── www.latimes.com │ ├── www.latrobe.edu.au │ ├── www.lavasoft.com │ ├── www.law.cornell.edu │ ├── www.lawdepot.com │ ├── www.lbc.co.uk │ ├── www.lbl.gov │ ├── www.lboro.ac.uk │ ├── www.lds.org │ ├── www.le.ac.uk │ ├── www.leeds.ac.uk │ ├── www.legacy.com │ ├── www.legalzoom.com │ ├── www.lego.com │ ├── www.lendingtree.com │ ├── www.lenovo.com │ ├── www.letour.fr │ ├── www.letsrun.com │ ├── www.level3.com │ ├── www.lexis.com │ ├── www.lexisnexis.com │ ├── www.lexus.com │ ├── www.lg.com │ ├── www.lgbtqnation.com │ ├── www.lib.umich.edu │ ├── www.liberty.edu │ ├── www.libertymutual.com │ ├── www.librarything.com │ ├── www.libsyn.com │ ├── www.liebertpub.com │ ├── www.lifehack.org │ ├── www.lifesitenews.com │ ├── www.lifeway.com │ ├── www.lifewire.com │ ├── www.lightningmaps.org │ ├── www.lingq.com │ ├── www.linguee.com │ ├── www.linkedin.com │ ├── www.linode.com │ ├── www.linux.com │ ├── www.linuxquestions.org │ ├── www.lionbridge.com │ ├── www.lipsum.com │ ├── www.liquidweb.com │ ├── www.liquor.com │ ├── www.list.am │ ├── www.listal.com │ ├── www.literotica.com │ ├── www.litmos.com │ ├── www.liveabout.com │ ├── www.livejournal.com │ ├── www.liveleak.com │ ├── www.liverpoolecho.co.uk │ ├── www.liverpoolfc.com │ ├── www.livescience.com │ ├── www.livescore.com │ ├── www.livingsocial.com │ ├── www.livingspaces.com │ ├── www.llbean.com │ ├── www.lloydsbank.com │ ├── www.lmgtfy.com │ ├── www.loc.gov │ ├── www.local.ch │ ├── www.local.com │ ├── www.loewe.com │ ├── www.logitech.com │ ├── www.logos.com │ ├── www.lonelyplanet.com │ ├── www.lonestar.edu │ ├── www.lookfantastic.com │ ├── www.looperman.com │ ├── www.lordandtaylor.com │ ├── www.lotterypost.com │ ├── www.lowyat.net │ ├── www.lpu.in │ ├── www.lsac.org │ ├── www.lse.ac.uk │ ├── www.lsu.edu │ ├── www.lufthansa.com │ ├── www.lulu.com │ ├── www.lulus.com │ ├── www.lyft.com │ ├── www.lynda.com │ ├── www.lyngsat.com │ ├── www.lyricfind.com │ ├── www.lyrics.com │ ├── www.lyst.co.uk │ ├── www.macrumors.com │ ├── www.macsales.com │ ├── www.macworld.co.uk │ ├── www.macworld.com │ ├── www.made-in-china.com │ ├── www.madewell.com │ ├── www.madison.com │ ├── www.maharashtra.gov.in │ ├── www.mahidol.ac.th │ ├── www.mail.com │ ├── www.maine.edu │ ├── www.maine.gov │ ├── www.majorgeeks.com │ ├── www.majorleaguegaming.com │ ├── www.malaysiakini.com │ ├── www.manchester.ac.uk │ ├── www.mandatory.com │ ├── www.mandy.com │ ├── www.mango.com │ ├── www.manhunt.net │ ├── www.manoramaonline.com │ ├── www.mans.edu.eg │ ├── www.manulife.com │ ├── www.manutd.com │ ├── www.manythings.org │ ├── www.mapmyrun.com │ ├── www.mapquest.com │ ├── www.mapsofworld.com │ ├── www.marcos.com │ ├── www.maricopa.edu │ ├── www.marieclaire.com │ ├── www.marksandspencer.com │ ├── www.marriott.com │ ├── www.marthastewart.com │ ├── www.martinfowler.com │ ├── www.marutisuzuki.com │ ├── www.marvel.com │ ├── www.marxists.org │ ├── www.maryland.gov │ ├── www.mass.gov │ ├── www.masslive.com │ ├── www.massmutual.com │ ├── www.mastercard.com │ ├── www.masterclass.com │ ├── www.masterstudies.com │ ├── www.mataf.net │ ├── www.mathplayground.com │ ├── www.mathrubhumi.com │ ├── www.mathsisfun.com │ ├── www.mathway.com │ ├── www.mathworks.com │ ├── www.mavcsoport.hu │ ├── www.maxim.com │ ├── www.maximintegrated.com │ ├── www.maxon.net │ ├── www.maxpreps.com │ ├── www.mayoclinic.org │ ├── www.mba.com │ ├── www.mbs.jp │ ├── www.mbta.com │ ├── www.mbusa.com │ ├── www.mcafee.com │ ├── www.mcdonalds.com │ ├── www.mckesson.com │ ├── www.mckinsey.com │ ├── www.mcmaster.ca │ ├── www.mcmaster.com │ ├── www.mdpi.com │ ├── www.mec.ca │ ├── www.med.umich.edu │ ├── www.medallia.com │ ├── www.media.io │ ├── www.media.mit.edu │ ├── www.mediafire.com │ ├── www.mediawiki.org │ ├── www.medicare.gov │ ├── www.medici.tv │ ├── www.medicinenet.com │ ├── www.medscape.com │ ├── www.medtronic.com │ ├── www.meetup.com │ ├── www.megabus.com │ ├── www.megamillions.com │ ├── www.meijer.com │ ├── www.melaleuca.com │ ├── www.melia.com │ ├── www.meltwater.com │ ├── www.menards.com │ ├── www.menshealth.com │ ├── www.mensjournal.com │ ├── www.menswearhouse.com │ ├── www.mensxp.com │ ├── www.mercedes-benz.com │ ├── www.mercola.com │ ├── www.mercurynews.com │ ├── www.merriam-webster.com │ ├── www.merrickbank.com │ ├── www.met.hu │ ├── www.metacafe.com │ ├── www.metacritic.com │ ├── www.metafilter.com │ ├── www.metal-archives.com │ ├── www.metalinjection.net │ ├── www.meteo.be │ ├── www.meter.net │ ├── www.metlife.com │ ├── www.metmuseum.org │ ├── www.metoffice.gov.uk │ ├── www.metro.co.uk │ ├── www.metrolyrics.com │ ├── www.metservice.com │ ├── www.metu.edu.tr │ ├── www.mgm.gov.tr │ ├── www.mgmresorts.com │ ├── www.miamiherald.com │ ├── www.michaels.com │ ├── www.michigan.gov │ ├── www.microcenter.com │ ├── www.microchip.com │ ├── www.microfocus.com │ ├── www.micron.com │ ├── www.microsoft.com │ ├── www.mid-day.com │ ├── www.mid.ru │ ├── www.midwayusa.com │ ├── www.migrationsverket.se │ ├── www.milb.com │ ├── www.military.com │ ├── www.militarytimes.com │ ├── www.mindbodyonline.com │ ├── www.mindmeister.com │ ├── www.mindtools.com │ ├── www.minecraft.net │ ├── www.minecraftforum.net │ ├── www.miniaturemarket.com │ ├── www.miniclip.com │ ├── www.minitab.com │ ├── www.mint.com │ ├── www.minted.com │ ├── www.mirror.co.uk │ ├── www.missmalini.com │ ├── www.mit.edu │ ├── www.mitre.org │ ├── www.mixedmartialarts.com │ ├── www.mkcl.org │ ├── www.ml.com │ ├── www.mlb.com │ ├── www.mlbshop.com │ ├── www.mlbtraderumors.com │ ├── www.mlive.com │ ├── www.mlssoccer.com │ ├── www.mmafighting.com │ ├── www.mmamania.com │ ├── www.mmo-champion.com │ ├── www.mmorpg.com │ ├── www.mnn.com │ ├── www.mo.gov │ ├── www.mobikwik.com │ ├── www.mobile88.com │ ├── www.mobygames.com │ ├── www.modcloth.com │ ├── www.mofa.go.kr │ ├── www.moi.gov.tw │ ├── www.mom.gov.sg │ ├── www.moma.org │ ├── www.monash.edu │ ├── www.mondaq.com │ ├── www.moneris.com │ ├── www.moneycontrol.com │ ├── www.moneygram.com │ ├── www.moneysupermarket.com │ ├── www.monitor.co.ug │ ├── www.monki.com │ ├── www.monoprice.com │ ├── www.monster.ca │ ├── www.monster.co.uk │ ├── www.monster.com │ ├── www.monstercrawler.com │ ├── www.monsterindia.com │ ├── www.monstersandcritics.com │ ├── www.moo.com │ ├── www.moonpig.com │ ├── www.moosejaw.com │ ├── www.moph.go.th │ ├── www.morganstanley.com │ ├── www.morningstar.com │ ├── www.mortgagecalculator.org │ ├── www.motherjones.com │ ├── www.motorola.com │ ├── www.motorsport.com │ ├── www.motortrend.com │ ├── www.mouser.com │ ├── www.mouthshut.com │ ├── www.movavi.com │ ├── www.moviefone.com │ ├── www.mozilla.org │ ├── www.mpg.de │ ├── www.mps.it │ ├── www.mq.edu.au │ ├── www.mredllc.com │ ├── www.mrexcel.com │ ├── www.mrporngeek.com │ ├── www.mrskin.com │ ├── www.mskcc.org │ ├── www.msn.com │ ├── www.mt.com │ ├── www.mta.info │ ├── www.mtb.com │ ├── www.mtbr.com │ ├── www.mtggoldfish.com │ ├── www.mtgsalvation.com │ ├── www.mtr.com.hk │ ├── www.mtu.edu │ ├── www.mtv.com │ ├── www.mu.ac.in │ ├── www.muenchen.de │ ├── www.mun.ca │ ├── www.muni.cz │ ├── www.muscleandfitness.com │ ├── www.musicnotes.com │ ├── www.mwave.com.au │ ├── www.mweb.co.za │ ├── www.myer.com.au │ ├── www.myfantasyleague.com │ ├── www.myfico.com │ ├── www.myfitnesspal.com │ ├── www.myflorida.com │ ├── www.myfonts.com │ ├── www.mylife.com │ ├── www.mypoints.com │ ├── www.myprotein.com │ ├── www.myrealgames.com │ ├── www.mysanantonio.com │ ├── www.mysearch.com │ ├── www.mysql.com │ ├── www.myus.com │ ├── www.myvue.com │ ├── www.nab.com.au │ ├── www.nadra.gov.pk │ ├── www.nagoya-u.ac.jp │ ├── www.nairaland.com │ ├── www.namecheap.com │ ├── www.nap.edu │ ├── www.napaonline.com │ ├── www.nascar.com │ ├── www.nasdaq.com │ ├── www.nation.co.ke │ ├── www.national-lottery.co.uk │ ├── www.nationalarchives.gov.uk │ ├── www.nationalcar.com │ ├── www.nationalexpress.com │ ├── www.nationalgeographic.com │ ├── www.nationalpost.com │ ├── www.nationalrail.co.uk │ ├── www.nationalreview.com │ ├── www.nationstates.net │ ├── www.nationwide.co.uk │ ├── www.nationwide.com │ ├── www.native-instruments.com │ ├── www.naturalreaders.com │ ├── www.nature.com │ ├── www.natwest.com │ ├── www.nau.edu │ ├── www.navy.mil │ ├── www.nba.com │ ├── www.nbc.com │ ├── www.nbcchicago.com │ ├── www.nbcdfw.com │ ├── www.nbclosangeles.com │ ├── www.nbcnews.com │ ├── www.nbcnewyork.com │ ├── www.nbcsports.com │ ├── www.nber.org │ ├── www.nbg.gr │ ├── www.ncaa.com │ ├── www.ncdot.gov │ ├── www.nch.com.au │ ├── www.ncl.ac.uk │ ├── www.ncl.com │ ├── www.ncsecu.org │ ├── www.ncsoft.com │ ├── www.ncsu.edu │ ├── www.nctu.edu.tw │ ├── www.nd.edu_443 │ ├── www.ndtv.com │ ├── www.neb.com │ ├── www.nedbank.co.za │ ├── www.neimanmarcus.com │ ├── www.nejm.org │ ├── www.neopets.com │ ├── www.nespresso.com │ ├── www.net-a-porter.com │ ├── www.netapp.com │ ├── www.netcarshow.com │ ├── www.neteller.com │ ├── www.netflix.com │ ├── www.netgate.com │ ├── www.netgear.com │ ├── www.netsuite.com │ ├── www.netvibes.com │ ├── www.networksolutions.com │ ├── www.networkworld.com │ ├── www.newbalance.com │ ├── www.newcastle.edu.au │ ├── www.newegg.ca │ ├── www.newegg.com │ ├── www.newgrounds.com │ ├── www.newlook.com │ ├── www.newrelic.com │ ├── www.news.com.au │ ├── www.news12.com │ ├── www.newsarama.com │ ├── www.newscientist.com │ ├── www.newsday.com │ ├── www.newshub.co.nz │ ├── www.newsmax.com │ ├── www.newsnow.co.uk │ ├── www.newsobserver.com │ ├── www.newspapers.com │ ├── www.newstatesman.com │ ├── www.newvision.co.ug │ ├── www.newyorker.com │ ├── www.next.co.uk │ ├── www.nexusmods.com │ ├── www.nfl.com │ ├── www.nginx.com │ ├── www.nhk.or.jp │ ├── www.nhl.com │ ├── www.nhlbi.nih.gov │ ├── www.ni.com │ ├── www.niaaa.nih.gov │ ├── www.niams.nih.gov │ ├── www.nic.ir │ ├── www.nice.org.uk │ ├── www.niceincontact.com │ ├── www.nichd.nih.gov │ ├── www.nick.com.au │ ├── www.niddk.nih.gov │ ├── www.nifty.org │ ├── www.nih.gov │ ├── www.nii.ac.jp │ ├── www.nike.com │ ├── www.nikonusa.com │ ├── www.nimh.nih.gov │ ├── www.ning.com │ ├── www.nintendolife.com │ ├── www.nirsoft.net │ ├── www.nist.gov │ ├── www.nj.com │ ├── www.nla.gov.au │ ├── www.nlm.nih.gov │ ├── www.nme.com │ ├── www.nngroup.com │ ├── www.noip.com │ ├── www.nokia.com │ ├── www.nola.com │ ├── www.northeastern.edu │ ├── www.northwestern.edu │ ├── www.notonthehighstreet.com │ ├── www.nova.edu │ ├── www.npmjs.com │ ├── www.npr.org │ ├── www.nps.gov │ ├── www.nrl.com │ ├── www.ns.nl │ ├── www.nsf.gov │ ├── www.nsw.gov.au │ ├── www.nta.co.jp │ ├── www.ntt.com │ ├── www.ntu.edu.sg │ ├── www.ntv.co.jp │ ├── www.nuance.com │ ├── www.nudevista.com │ ├── www.nus.edu.sg │ ├── www.nvidia.com │ ├── www.nvshq.org │ ├── www.nwu.ac.za │ ├── www.nxp.com │ ├── www.ny.gov │ ├── www.nyc.gov │ ├── www.nydailynews.com │ ├── www.nypl.org │ ├── www.nysed.gov │ ├── www.nytimes.com │ ├── www.nyu.edu │ ├── www.nzherald.co.nz │ ├── www.o2.co.uk │ ├── www.oaed.gr │ ├── www.oas.org │ ├── www.ocado.com │ ├── www.oclc.org │ ├── www.ocregister.com │ ├── www.odeon.co.uk │ ├── www.odu.edu │ ├── www.offensive-security.com │ ├── www.office.co.uk │ ├── www.ohio.edu │ ├── www.ohsu.edu │ ├── www.oilandgasjobsearch.com │ ├── www.ok.gov │ ├── www.okcupid.com │ ├── www.okstate.edu │ ├── www.olivegarden.com │ ├── www.olympic.org │ ├── www.omaha.com │ ├── www.omegawatches.com │ ├── www.omgubuntu.co.uk │ ├── www.omniglot.com │ ├── www.on24.com │ ├── www.oneindia.com │ ├── www.onetravel.com │ ├── www.online-convert.com │ ├── www.online-tech-tips.com │ ├── www.online.citibank.co.in │ ├── www.onlineocr.net │ ├── www.onlyinyourstate.com │ ├── www.ontario.ca │ ├── www.onthisday.com │ ├── www.open.ac.uk │ ├── www.openair.com │ ├── www.openculture.com │ ├── www.opendns.com_443 │ ├── www.openoffice.org │ ├── www.openstack.org │ ├── www.openstreetmap.org │ ├── www.opensubtitles.org │ ├── www.opentable.com │ ├── www.opentext.com │ ├── www.opera.com │ ├── www.opm.gov │ ├── www.opodo.co.uk │ ├── www.opticsplanet.com_443 │ ├── www.optum.com │ ├── www.optus.com.au │ ├── www.oracle.com │ ├── www.orange.com │ ├── www.orbitz.com │ ├── www.oregon.gov │ ├── www.oregonlive.com │ ├── www.oreilly.com │ ├── www.orientaltrading.com │ ├── www.oriflame.com │ ├── www.orlandosentinel.com │ ├── www.osaka-u.ac.jp │ ├── www.osha.gov │ ├── www.osu.edu │ ├── www.otago.ac.nz │ ├── www.ou.edu │ ├── www.outlookindia.com │ ├── www.outsideonline.com │ ├── www.overclockers.co.uk │ ├── www.overdrive.com │ ├── www.owasp.org │ ├── www.ox.ac.uk │ ├── www.oxforddictionaries.com │ ├── www.pa.gov │ ├── www.padi.com │ ├── www.paessler.com │ ├── www.pagalguy.com │ ├── www.pageuppeople.com │ ├── www.paho.org │ ├── www.pakwheels.com_443 │ ├── www.paloaltonetworks.com │ ├── www.paltalk.com │ ├── www.panasonic.com │ ├── www.pandasecurity.com │ ├── www.pandora.com │ ├── www.pandora.net │ ├── www.pantone.com │ ├── www.papajohns.com │ ├── www.paradoxplaza.com │ ├── www.parallels.com │ ├── www.parcelforce.com │ ├── www.parents.com │ ├── www.paris.fr_443 │ ├── www.parliament.uk │ ├── www.partcommunity.com │ ├── www.parts-express.com │ ├── www.partycity.com │ ├── www.patagonia.com │ ├── www.patch.com │ ├── www.patreon.com │ ├── www.paulsmith.co.jp │ ├── www.paychex.com │ ├── www.paycom.com │ ├── www.paycor.com │ ├── www.paypal.com │ ├── www.payscale.com │ ├── www.payserve.com │ ├── www.pbs.org │ ├── www.pbtech.co.nz │ ├── www.pbteen.com │ ├── www.pc.gc.ca │ ├── www.pcc.edu │ ├── www.pccasegear.com │ ├── www.pcgamer.com │ ├── www.pch.com │ ├── www.pcmag.com │ ├── www.pcworld.co.uk │ ├── www.pcworld.com │ ├── www.pdfescape.com │ ├── www.pdfforge.org │ ├── www.pdfonline.com │ ├── www.pdftoword.com │ ├── www.pdx.edu │ ├── www.pearson.com │ ├── www.pearsoned.com │ ├── www.peekyou.com │ ├── www.pega.com │ ├── www.penfed.org │ ├── www.penguinrandomhouse.com │ ├── www.pennlive.com │ ├── www.penny-arcade.com │ ├── www.peopleadmin.com │ ├── www.pep.ph │ ├── www.pepboys.com │ ├── www.perfect-english-grammar.com │ ├── www.petco.com │ ├── www.petfinder.com │ ├── www.petmd.com │ ├── www.pets4homes.co.uk │ ├── www.petsmart.com │ ├── www.peugeot.com │ ├── www.pg.com │ ├── www.pgatour.com │ ├── www.pge.com │ ├── www.philippineairlines.com │ ├── www.philips.com │ ├── www.philstar.com │ ├── www.phish.net │ ├── www.phoenix.edu │ ├── www.phonearena.com │ ├── www.photoshopessentials.com │ ├── www.php.net │ ├── www.phpnuke.org │ ├── www.phrases.org.uk │ ├── www.physicsclassroom.com │ ├── www.physicsforums.com │ ├── www.piac.com.pk │ ├── www.pib.nic.in │ ├── www.pier1.com │ ├── www.pingdom.com │ ├── www.pingidentity.com │ ├── www.pinknews.co.uk │ ├── www.pinterest.com │ ├── www.pistonheads.com │ ├── www.pitt.edu │ ├── www.pivotaltracker.com │ ├── www.planetfitness.com │ ├── www.planetminecraft.com │ ├── www.planetsuzy.org │ ├── www.planetware.com │ ├── www.plannedparenthood.org │ ├── www.plantronics.com │ ├── www.playbill.com │ ├── www.playerauctions.com │ ├── www.playok.com │ ├── www.playstation.com │ ├── www.plos.org │ ├── www.pmi.org │ ├── www.pmnewsnigeria.com │ ├── www.pnas.org │ ├── www.pnet.co.za │ ├── www.poemhunter.com │ ├── www.pogo.com │ ├── www.pokemoncenter.com │ ├── www.pokernews.com │ ├── www.polar.com │ ├── www.polimi.it │ ├── www.politico.com │ ├── www.politifact.com │ ├── www.polyu.edu.hk │ ├── www.pond5.com │ ├── www.popsci.com │ ├── www.popularmechanics.com │ ├── www.porn.com │ ├── www.porsche.com │ ├── www.post-gazette.com │ ├── www.post.at │ ├── www.post.ch │ ├── www.post.japanpost.jp │ ├── www.postcrossing.com │ ├── www.posten.no │ ├── www.postgresql.org │ ├── www.posti.fi │ ├── www.postoffice.co.uk │ ├── www.potterybarn.com │ ├── www.powerschool.com │ ├── www.powerthesaurus.org │ ├── www.poznan.pl │ ├── www.practicefusion.com │ ├── www.pravda.ru │ ├── www.prc.gov.ph │ ├── www.premiumbeat.com │ ├── www.president.gov.ua │ ├── www.presonus.com │ ├── www.prestashop.com │ ├── www.prevention.com │ ├── www.priceline.com │ ├── www.primark.com │ ├── www.princess.com │ ├── www.princeton.edu │ ├── www.princetonreview.com │ ├── www.principal.com_443 │ ├── www.private.com │ ├── www.privateproperty.co.za │ ├── www.prnewswire.com │ ├── www.pro-football-reference.com │ ├── www.procore.com │ ├── www.progressive.com │ ├── www.prokerala.com │ ├── www.prometric.com │ ├── www.proprofs.com │ ├── www.proquest.com │ ├── www.prospects.ac.uk │ ├── www.providence.org │ ├── www.proz.com │ ├── www.prudential.com │ ├── www.prweb.com │ ├── www.pscu.com │ ├── www.pseg.com │ ├── www.psu.ac.th │ ├── www.psu.edu │ ├── www.pta.gov.pk │ ├── www.ptc.com │ ├── www.publix.com │ ├── www.purdue.edu │ ├── www.purplemath.com │ ├── www.pwc.com │ ├── www.python.org │ ├── www.qmul.ac.uk │ ├── www.qrz.com │ ├── www.qu.edu.qa │ ├── www.quackit.com │ ├── www.quadratec.com │ ├── www.qualtrics.com │ ├── www.qub.ac.uk │ ├── www.queensu.ca │ ├── www.queerty.com │ ├── www.questionpro.com │ ├── www.quetext.com │ ├── www.quia.com │ ├── www.quickenloans.com │ ├── www.qut.edu.au │ ├── www.qvc.com │ ├── www.r-bloggers.com │ ├── www.racingpost.com │ ├── www.rackspace.com │ ├── www.radiotimes.com │ ├── www.ragalahari.com │ ├── www.rakuten.com │ ├── www.ralphlauren.com │ ├── www.rammstein.de │ ├── www.rand.org │ ├── www.random.org │ ├── www.ranepa.ru │ ├── www.rarlab.com │ ├── www.rasmussen.edu │ ├── www.raspberrypi.org │ ├── www.ratemyprofessors.com │ ├── www.ratp.fr │ ├── www.ravelry.com │ ├── www.raymond.cc │ ├── www.rb.cz │ ├── www.rbcroyalbank.com │ ├── www.rbfcu.org │ ├── www.rcgroups.com │ ├── www.rci.com │ ├── www.rcn.com │ ├── www.rd.com │ ├── www.readinga-z.com │ ├── www.readingrockets.org │ ├── www.real.com │ ├── www.realestate.com.au │ ├── www.reallusion.com │ ├── www.realmadrid.com │ ├── www.realpage.com │ ├── www.realsimple.com │ ├── www.realtor.com │ ├── www.realvnc.com │ ├── www.recon.com │ ├── www.recreation.gov │ ├── www.recruit.net │ ├── www.red-gate.com │ ├── www.redbubble.com │ ├── www.redbull.com │ ├── www.redbus.in │ ├── www.redcafe.net │ ├── www.redcross.org │ ├── www.reddit.com │ ├── www.redhat.com │ ├── www.rediff.com │ ├── www.reebok.com │ ├── www.reed.co.uk │ ├── www.regions.com │ ├── www.regmovies.com │ ├── www.rei.com │ ├── www.rejseplanen.dk │ ├── www.remax.com │ ├── www.remita.net │ ├── www.renderosity.com │ ├── www.renderotica.com │ ├── www.renfe.com │ ├── www.rent.com │ ├── www.renweb.com │ ├── www.repairclinic.com │ ├── www.replicon.com │ ├── www.researchgate.net │ ├── www.reserveamerica.com │ ├── www.residentadvisor.net │ ├── www.restorationhardware.com │ ├── www.reuters.com │ ├── www.rev.com │ ├── www.reverbnation.com │ ├── www.reverso.net │ ├── www.reviewjournal.com │ ├── www.reviews.com │ ├── www.revolve.com │ ├── www.rfa.org │ ├── www.rfi.fr │ ├── www.rhino3d.com │ ├── www.rhymezone.com │ ├── www.rice.edu │ ├── www.ricksteves.com │ ├── www.rightmove.co.uk │ ├── www.ril.com │ ├── www.ringcentral.com │ ├── www.rinmarugames.com │ ├── www.rit.edu │ ├── www.riteaid.com │ ├── www.ritsumei.ac.jp │ ├── www.rivals.com │ ├── www.riverisland.com │ ├── www.rmit.edu.au │ ├── www.roadandtrack.com │ ├── www.roberthalf.com │ ├── www.roblox.com │ ├── www.roboform.com │ ├── www.roche.com │ ├── www.rochester.edu │ ├── www.rockauto.com │ ├── www.rocketlawyer.com │ ├── www.rockler.com │ ├── www.rockpapershotgun.com │ ├── www.rockstargames.com │ ├── www.rockwellautomation.com │ ├── www.rogerebert.com │ ├── www.roku.com │ ├── www.roland.com │ ├── www.rolex.com │ ├── www.rollingstone.com │ ├── www.romhacking.net │ ├── www.ronaldo7.net │ ├── www.rosettastone.com │ ├── www.rotary.org │ ├── www.rotowire.com │ ├── www.rotoworld.com │ ├── www.rottentomatoes.com │ ├── www.royalcaribbean.com │ ├── www.royalenfield.com │ ├── www.royalmail.com │ ├── www.rozee.pk │ ├── www.rpi.edu │ ├── www.rsc.org │ ├── www.rsl.ru │ ├── www.rstudio.com │ ├── www.rt.com │ ├── www.rt.ru │ ├── www.rtbf.be │ ├── www.rte.ie │ ├── www.ru.nl │ ├── www.ruc.edu.cn │ ├── www.rug.nl │ ├── www.runescape.com │ ├── www.runnersworld.com │ ├── www.rushlimbaugh.com │ ├── www.rutgers.edu │ ├── www.rwth-aachen.de │ ├── www.rxlist.com │ ├── www.ryanair.com │ ├── www.ryerson.ca │ ├── www.sabre.com │ ├── www.sabrehospitality.com │ ├── www.sacbee.com │ ├── www.sachsen.de │ ├── www.safeway.com │ ├── www.sage.com │ ├── www.sagepub.com │ ├── www.saglik.gov.tr │ ├── www.sainsburys.co.uk │ ├── www.saksfifthavenue.com │ ├── www.salary.com │ ├── www.salesforce.com │ ├── www.salliemae.com │ ├── www.salon.com │ ├── www.samsclub.com │ ├── www.samsung.com │ ├── www.sandiegouniontribune.com │ ├── www.sandisk.com │ ├── www.sans.org │ ├── www.santabanta.com │ ├── www.santander.co.uk │ ├── www.sap.com │ ├── www.sars.gov.za │ ├── www.sas.com │ ├── www.saudia.com │ ├── www.savethestudent.org │ ├── www.saveur.com │ ├── www.sba.gov │ ├── www.sbb.ch │ ├── www.sbmu.ac.ir │ ├── www.sbnation.com │ ├── www.sbs.com.au │ ├── www.sbtjapan.com │ ├── www.sc.edu │ ├── www.scad.edu │ ├── www.scarymommy.com │ ├── www.sce.com │ ├── www.schneider-electric.com │ ├── www.scholastic.com │ ├── www.schools.nyc.gov │ ├── www.schoolsfirstfcu.org │ ├── www.schwab.com │ ├── www.sciencealert.com │ ├── www.sciencedaily.com │ ├── www.sciencedirect.com │ ├── www.sciencemag.org │ ├── www.scientificamerican.com │ ├── www.scmp.com │ ├── www.scoreland.com │ ├── www.scotiabank.com │ ├── www.scotsman.com │ ├── www.screwfix.com │ ├── www.scribd.com │ ├── www.scu.edu │ ├── www.sdge.com │ ├── www.sdu.dk │ ├── www.seagate.com │ ├── www.seamless.com │ ├── www.sears.com │ ├── www.seatguru.com_443 │ ├── www.seattle.gov │ ├── www.seattlepi.com │ ├── www.seattletimes.com │ ├── www.seb.lt │ ├── www.sec.gov │ ├── www.seek.co.nz │ ├── www.seek.com.au │ ├── www.seetickets.com_443 │ ├── www.sej.co.jp │ ├── www.sejda.com │ ├── www.self.com │ ├── www.senate.gov │ ├── www.senecacollege.ca │ ├── www.seoul.go.kr │ ├── www.sephora.com │ ├── www.serif.com │ ├── www.sermoncentral.com │ ├── www.service-public.fr │ ├── www.seslisozluk.net │ ├── www.seventeen.com │ ├── www.sfchronicle.com │ ├── www.sfgate.com │ ├── www.sfimg.com │ ├── www.sfsu.edu │ ├── www.sfu.ca │ ├── www.sgx.com │ ├── www.shaadi.com │ ├── www.shacknews.com │ ├── www.shape.com │ ├── www.shapeways.com │ ├── www.shareasale.com │ ├── www.sharefile.com │ ├── www.shareit.com │ ├── www.sharekhan.com │ ├── www.shaw.ca │ ├── www.sheetmusicplus.com │ ├── www.sheffield.ac.uk │ ├── www.sheknows.com │ ├── www.shell.com │ ├── www.sherdog.com │ ├── www.sheridancollege.ca │ ├── www.shimano.com │ ├── www.sho.com │ ├── www.shockwave.com │ ├── www.shopify.com │ ├── www.shopmyexchange.com │ ├── www.shoprite.com │ ├── www.shoutmeloud.com │ ├── www.shrm.org │ ├── www.shure.com │ ├── www.shutterfly.com │ ├── www.shutterstock.com │ ├── www.si.com │ ├── www.si.edu │ ├── www.sidefx.com │ ├── www.siemens.com │ ├── www.sigmaaldrich.com │ ├── www.silkroad.com │ ├── www.silverscreenandroll.com │ ├── www.simon.com │ ├── www.simplyhired.com │ ├── www.sina.com │ ├── www.sinclair.edu │ ├── www.singaporeair.com │ ├── www.singaporepools.com.sg │ ├── www.sinica.edu.tw │ ├── www.siriusxm.com │ ├── www.site24x7.com │ ├── www.siteground.com │ ├── www.sitejabber.com │ ├── www.sitepoint.com │ ├── www.sixflags.com │ ├── www.sixt.com │ ├── www.sjsu.edu │ ├── www.skechers.com │ ├── www.sketchup.com │ ├── www.skf.com │ ├── www.sky.com │ ├── www.skylinewebcams.com │ ├── www.skype.com │ ├── www.skyrock.com │ ├── www.skyscanner.com │ ├── www.skyscrapercity.com │ ├── www.skysports.com │ ├── www.slate.com │ ├── www.slb.com │ ├── www.slideshare.net │ ├── www.slsp.sk │ ├── www.slt.lk │ ├── www.sltrib.com │ ├── www.slu.edu │ ├── www.smackjeeves.com │ ├── www.smartbrief.com │ ├── www.smartdraw.com │ ├── www.smartshanghai.com │ ├── www.smartsheet.com │ ├── www.smbc-comics.com │ ├── www.smc.edu │ ├── www.smh.com.au │ ├── www.smhi.se │ ├── www.smiletemplates.com │ ├── www.smithsonianmag.com │ ├── www.smugmug.com │ ├── www.snagajob.com │ ├── www.snapfish.com │ ├── www.sncf.com │ ├── www.snhu.edu │ ├── www.snopes.com │ ├── www.socalgas.com │ ├── www.soccerladuma.co.za │ ├── www.soccermanager.com │ ├── www.soccerstand.com │ ├── www.soccervista.com │ ├── www.soccerway.com │ ├── www.soft32.com │ ├── www.softpedia.com │ ├── www.solaredge.com │ ├── www.solarwinds.com │ ├── www.solidworks.com │ ├── www.somethingawful.com │ ├── www.songkick.com │ ├── www.sonos.com │ ├── www.sony.co.uk │ ├── www.sony.com │ ├── www.sony.net │ ├── www.sonymobile.com │ ├── www.soompi.com │ ├── www.sophos.com │ ├── www.sos.ca.gov │ ├── www.soumu.go.jp │ ├── www.soundonsound.com │ ├── www.soundsnap.com │ ├── www.southerncompany.com │ ├── www.southernliving.com │ ├── www.southindianbank.com │ ├── www.southwest.com │ ├── www.spa.gov.my │ ├── www.spa.gov.sa │ ├── www.space.com │ ├── www.spacex.com │ ├── www.spanishdict.com │ ├── www.spankwire.com │ ├── www.spareroom.co.uk │ ├── www.sparknotes.com │ ├── www.spc.noaa.gov │ ├── www.specialized.com │ ├── www.spectator.co.uk │ ├── www.speedhunters.com │ ├── www.speedtest.net │ ├── www.spicejet.com │ ├── www.spin.com │ ├── www.spine-health.com │ ├── www.spirit.com │ ├── www.spokeo.com │ ├── www.sportchek.ca │ ├── www.sportingnews.com │ ├── www.sportsbet.com.au │ ├── www.sportsdirect.com │ ├── www.sportsmansguide.com │ ├── www.sportsnet.ca │ ├── www.spotify.com │ ├── www.springer.com │ ├── www.sprint.com │ ├── www.sprouts.com │ ├── www.sqlservercentral.com │ ├── www.squarespace.com │ ├── www.squirt.org │ ├── www.ssa.gov │ ├── www.ssrn.com │ ├── www.st.com │ ├── www.stamps.com │ ├── www.standardbank.co.za │ ├── www.standardmedia.co.ke │ ├── www.stanford.edu │ ├── www.stanfordchildrens.org │ ├── www.staples.ca │ ├── www.starbucks.com │ ├── www.starcitygames.com │ ├── www.starcraft2.com │ ├── www.stardoll.com │ ├── www.starfall.com │ ├── www.startribune.com │ ├── www.starwars.com │ ├── www.starz.com │ ├── www.stata.com │ ├── www.statcan.gc.ca │ ├── www.state.gov │ ├── www.state.nj.us │ ├── www.statefarm.com │ ├── www.statesman.com │ ├── www.steinberg.net │ ├── www.stellarinfo.com │ ├── www.stereogum.com │ ├── www.stern.nyu.edu │ ├── www.stgeorge.com.au │ ├── www.stlouisfed.org │ ├── www.stltoday.com │ ├── www.stonybrook.edu │ ├── www.straitstimes.com │ ├── www.strava.com │ ├── www.streamate.com │ ├── www.streema.com │ ├── www.studentbeans.com │ ├── www.studentdoctor.net │ ├── www.stuff.co.nz │ ├── www.stumbleupon.com │ ├── www.styleforum.net │ ├── www.subaru.com │ ├── www.subway.com │ ├── www.success.com │ ├── www.successcds.net │ ├── www.successfactors.com │ ├── www.suicidegirls.com │ ├── www.sulekha.com │ ├── www.summitracing.com │ ├── www.sun-sentinel.com │ ├── www.sun.ac.za │ ├── www.sunglasshut.com │ ├── www.sunlife.com │ ├── www.suntrust.com │ ├── www.suny.edu │ ├── www.suomi.fi │ ├── www.supercheapauto.com.au │ ├── www.supercheats.com │ ├── www.supercoloring.com │ ├── www.superdrug.com │ ├── www.supersport.com │ ├── www.surfline.com │ ├── www.surrey.ac.uk │ ├── www.surveymonkey.com │ ├── www.sussex.ac.uk │ ├── www.suzuki.co.jp │ ├── www.svuonline.org │ ├── www.swansonvitamins.com │ ├── www.swarovski.com │ ├── www.swedbank.ee │ ├── www.sweetwater.com │ ├── www.swinglifestyle.com │ ├── www.swiss.com │ ├── www.swissinfo.ch │ ├── www.swtor.com │ ├── www.syfy.com │ ├── www.symantec.com │ ├── www.symbolab.com │ ├── www.synonym.com │ ├── www.synopsys.com │ ├── www.syracuse.com │ ├── www.syracuse.edu │ ├── www.t-mobile.com │ ├── www.t-nation.com │ ├── www.tableau.com │ ├── www.tafensw.edu.au │ ├── www.tagged.com │ ├── www.taichung.gov.tw │ ├── www.taiwannews.com.tw │ ├── www.taiwantrade.com │ ├── www.talbots.com │ ├── www.talentlms.com │ ├── www.talkbass.com │ ├── www.tampabay.com │ ├── www.tamu.edu │ ├── www.tangerine.ca │ ├── www.target.com │ ├── www.tarot.com │ ├── www.tatamotors.com │ ├── www.tatasky.com │ ├── www.tate.org.uk │ ├── www.tatrabanka.sk │ ├── www.tau.ac.il │ ├── www.tax.ny.gov │ ├── www.tbs.co.jp │ ├── www.tccd.edu │ ├── www.tcd.ie │ ├── www.tcm.com │ ├── www.td.com │ ├── www.tdameritrade.com │ ├── www.tdbank.com │ ├── www.teachingenglish.org.uk │ ├── www.team-bhp.com │ ├── www.teamtalk.com │ ├── www.teamviewer.com │ ├── www.tec-it.com │ ├── www.techbargains.com │ ├── www.techcrunch.com │ ├── www.techdata.com │ ├── www.techmahindra.com │ ├── www.technipages.com │ ├── www.technologyreview.com │ ├── www.techradar.com │ ├── www.techsmith.com │ ├── www.techspot.com │ ├── www.ted.com │ ├── www.tedbaker.com │ ├── www.teenvogue.com │ ├── www.tek-tips.com │ ├── www.telegraph.co.uk │ ├── www.telegraphindia.com │ ├── www.telekom.de │ ├── www.telekom.hu │ ├── www.telerik.com │ ├── www.telkom.co.za │ ├── www.telstra.com.au │ ├── www.telus.com │ ├── www.temple.edu │ ├── www.tempo.co │ ├── www.tennessean.com │ ├── www.tennis-warehouse.com │ ├── www.tennis.com │ ├── www.tensorflow.org │ ├── www.tesco.com │ ├── www.texas.gov │ ├── www.textures.com │ ├── www.tfw2005.com │ ├── www.thaiairways.com │ ├── www.theaa.com │ ├── www.theage.com.au │ ├── www.theatlantic.com │ ├── www.theaustralian.com.au │ ├── www.thebalance.com │ ├── www.thebalancecareers.com │ ├── www.thebalancesmb.com │ ├── www.thebetterindia.com │ ├── www.thecalculatorsite.com │ ├── www.thedailybeast.com │ ├── www.thedailymeal.com │ ├── www.thedailystar.net │ ├── www.thedrum.com │ ├── www.theepochtimes.com │ ├── www.thefreedictionary.com │ ├── www.thegearpage.net │ ├── www.theglobeandmail.com │ ├── www.thegospelcoalition.org │ ├── www.theguardian.com │ ├── www.thehartford.com │ ├── www.thehindu.com │ ├── www.thehindubusinessline.com │ ├── www.theiet.org │ ├── www.thekitchn.com │ ├── www.theknot.com │ ├── www.thelancet.com │ ├── www.thenation.com │ ├── www.thenational.ae │ ├── www.thenews.com.pk │ ├── www.thenewsminute.com │ ├── www.thenorthface.com │ ├── www.theoutnet.com │ ├── www.thepeninsulaqatar.com │ ├── www.thepetitionsite.com │ ├── www.therealreal.com │ ├── www.theregister.co.uk │ ├── www.theroot.com │ ├── www.thesaurus.com │ ├── www.thesimpledollar.com │ ├── www.thespruce.com │ ├── www.thesprucecrafts.com │ ├── www.thespruceeats.com │ ├── www.thesprucepets.com │ ├── www.thestar.com │ ├── www.thestar.com.my │ ├── www.thestranger.com │ ├── www.thestudentroom.co.uk │ ├── www.thesun.co.uk │ ├── www.thetimes.co.uk │ ├── www.thetrainline.com │ ├── www.thetrendspotter.net │ ├── www.theupsstore.com │ ├── www.theverge.com │ ├── www.theweathernetwork.com │ ├── www.thewindowsclub.com │ ├── www.thomann.de │ ├── www.thomascook.com │ ├── www.thomascook.in │ ├── www.thomsonreuters.com │ ├── www.thorlabs.com │ ├── www.thoughtco.com │ ├── www.threadless.com │ ├── www.three.co.uk │ ├── www.thrillist.com │ ├── www.thule.com │ ├── www.thumbtack.com │ ├── www.ti.com │ ├── www.tibia.com │ ├── www.tickets.com │ ├── www.tigerdirect.com │ ├── www.tigerdroppings.com │ ├── www.tikona.in │ ├── www.tillys.com │ ├── www.time4education.com │ ├── www.timeanddate.com │ ├── www.timeout.com │ ├── www.timesjobs.com │ ├── www.timesunion.com │ ├── www.timetrade.com │ ├── www.tineye.com │ ├── www.titech.ac.jp │ ├── www.tjx.com │ ├── www.tlc.com │ ├── www.tldp.org │ ├── www.tmz.com │ ├── www.tn.gov │ ├── www.tn.gov.in │ ├── www.tnt.com │ ├── www.toastmasters.org │ ├── www.today.com │ ├── www.toggl.com │ ├── www.tohoku.ac.jp │ ├── www.tomsguide.com │ ├── www.tomshardware.com │ ├── www.tomtom.com │ ├── www.tomtop.com │ ├── www.tonyrobbins.com │ ├── www.toolbox.com │ ├── www.topcashback.co.uk_443 │ ├── www.topgear.com │ ├── www.topman.com │ ├── www.topshop.com │ ├── www.tor.com │ ├── www.toronto.ca │ ├── www.torrentfreak.com │ ├── www.torrid.com │ ├── www.toryburch.com │ ├── www.toshiba.com │ ├── www.totalwar.com │ ├── www.touchnet.com │ ├── www.towson.edu │ ├── www.toyoko-inn.com │ ├── www.toyotafinancial.com │ ├── www.tp-link.com │ ├── www.tpg.com.au │ ├── www.tracfone.com │ ├── www.track-trace.com │ ├── www.tractorsupply.com │ ├── www.tradedoubler.com │ ├── www.tradeindia.com │ ├── www.trademe.co.nz │ ├── www.traderjoes.com │ ├── www.tradesy.com │ ├── www.trainingpeaks.com │ ├── www.transamerica.com │ ├── www.translated.net │ ├── www.transparent.com │ ├── www.transportation.gov │ ├── www.transunion.com │ ├── www.travelandleisure.com │ ├── www.travelchannel.com │ ├── www.travelchinaguide.com │ ├── www.travelers.com │ ├── www.traveller.com.au │ ├── www.travelocity.com │ ├── www.travelodge.co.uk │ ├── www.travelport.com │ ├── www.travelzoo.com │ ├── www.travian.com │ ├── www.treasury.gov │ ├── www.treehugger.com │ ├── www.trekbikes.com │ ├── www.trend.az │ ├── www.trendsmap.com │ ├── www.tribuneindia.com │ ├── www.trimble.com │ ├── www.tripadvisor.com │ ├── www.tripsavvy.com │ ├── www.trivago.com │ ├── www.trollandtoad.com │ ├── www.troweprice.com │ ├── www.trulia.com │ ├── www.trustedreviews.com │ ├── www.trustpilot.com │ ├── www.tsa.gov │ ├── www.tsheets.com │ ├── www.tsn.ca │ ├── www.tsp.gov │ ├── www.tsukuba.ac.jp │ ├── www.tu-berlin.de │ ├── www.tucows.com │ ├── www.tudelft.nl │ ├── www.tufts.edu │ ├── www.tugraz.at │ ├── www.tulane.edu │ ├── www.tumblr.com │ ├── www.tunecore.com │ ├── www.turbosquid.com │ ├── www.turkishairlines.com │ ├── www.turktelekom.com.tr │ ├── www.turnitin.com │ ├── www.tutor2u.net │ ├── www.tutorialrepublic.com │ ├── www.tv.com │ ├── www.tvguide.co.uk │ ├── www.tvguide.com │ ├── www.tvnz.co.nz │ ├── www.tweaktown.com │ ├── www.twitch.tv │ ├── www.typingclub.com │ ├── www.typingmaster.com │ ├── www.u-tokyo.ac.jp │ ├── www.ua.edu │ ├── www.uab.edu │ ├── www.ualberta.ca │ ├── www.ubc.ca │ ├── www.uber.com │ ├── www.ubergizmo.com │ ├── www.ubisoft.com_443 │ ├── www.ubs.com │ ├── www.uc.edu │ ├── www.ucalgary.ca │ ├── www.ucar.edu │ ├── www.ucas.com │ ├── www.ucc.ie │ ├── www.ucdavis.edu │ ├── www.ucdenver.edu │ ├── www.ucf.edu │ ├── www.uchicago.edu │ ├── www.ucl.ac.uk │ ├── www.ucla.edu │ ├── www.uclahealth.org │ ├── www.ucop.edu │ ├── www.ucr.edu │ ├── www.ucsb.edu │ ├── www.ucsc.edu │ ├── www.ucsf.edu │ ├── www.uct.ac.za │ ├── www.udacity.com │ ├── www.udemy.com │ ├── www.uefa.com │ ├── www.ufc.com │ ├── www.ufcu.org │ ├── www.uga.edu │ ├── www.ugc.ac.in │ ├── www.ugent.be │ ├── www.uh.edu │ ├── www.uhaul.com │ ├── www.ui.com │ ├── www.ui.edu.ng │ ├── www.uic.edu │ ├── www.uidaho.edu │ ├── www.uillinois.edu │ ├── www.uitm.edu.my │ ├── www.uj.ac.za │ ├── www.uj.edu.pl │ ├── www.ukulele-tabs.com │ ├── www.uky.edu │ ├── www.ukzn.ac.za │ ├── www.ul.com │ ├── www.ulaval.ca │ ├── www.uline.com │ ├── www.ulta.com │ ├── www.ultimate-guitar.com │ ├── www.ultratools.com │ ├── www.um.ac.ir │ ├── www.um.edu.my │ ├── www.umanitoba.ca │ ├── www.umass.edu │ ├── www.umassonline.net │ ├── www.umbc.edu │ ├── www.umd.edu │ ├── www.umich.edu │ ├── www.umontreal.ca │ ├── www.umsystem.edu │ ├── www.umuc.edu │ ├── www.un.org │ ├── www.uncc.edu │ ├── www.uncommongoods.com │ ├── www.underarmour.com │ ├── www.undp.org │ ├── www.unesco.org │ ├── www.unhcr.org │ ├── www.uni-bonn.de │ ├── www.uni-freiburg.de │ ├── www.uni-giessen.de │ ├── www.uni-hannover.de │ ├── www.uni-trier.de │ ├── www.uni.lodz.pl │ ├── www.unian.net │ ├── www.unicef.org │ ├── www.unila.ac.id │ ├── www.unimelb.edu.au │ ├── www.unimi.it │ ├── www.unionbankofindia.co.in │ ├── www.unipune.ac.in │ ├── www.uniraj.ac.in │ ├── www.unisa.ac.za │ ├── www.unitedbankofindia.com │ ├── www.universalorlando.com │ ├── www.universalstudioshollywood.com │ ├── www.universetoday.com │ ├── www.universityofcalifornia.edu │ ├── www.univie.ac.at │ ├── www.univision.com │ ├── www.unl.edu │ ├── www.unlv.edu │ ├── www.unm.edu │ ├── www.unpad.ac.id │ ├── www.unsw.edu.au │ ├── www.unt.edu │ ├── www.unv.org │ ├── www.uoa.gr │ ├── www.uob.com.sg │ ├── www.uoguelph.ca │ ├── www.uonbi.ac.ke │ ├── www.uoregon.edu │ ├── www.uottawa.ca │ ├── www.uow.edu.au │ ├── www.up.ac.za │ ├── www.upc.edu │ ├── www.upenn.edu │ ├── www.upi.com │ ├── www.uprinting.com │ ├── www.ups.com │ ├── www.upsc.gov.in │ ├── www.uptodate.com │ ├── www.upv.es │ ├── www.uq.edu.au │ ├── www.urbandictionary.com │ ├── www.urbanfonts.com │ ├── www.urbanoutfitters.com │ ├── www.uregina.ca │ ├── www.urmc.rochester.edu │ ├── www.us.hsbc.com │ ├── www.usa.canon.com │ ├── www.usa.gov │ ├── www.usa.philips.com │ ├── www.usaa.com │ ├── www.usajobs.gov │ ├── www.usask.ca │ ├── www.usatoday.com │ ├── www.usbank.com │ ├── www.usccb.org │ ├── www.uscis.gov │ ├── www.uscourts.gov │ ├── www.usda.gov │ ├── www.usembassy.gov │ ├── www.usertesting.com │ ├── www.usf.edu │ ├── www.usg.edu │ ├── www.usgs.gov │ ├── www.usherbrooke.ca │ ├── www.usingenglish.com │ ├── www.usj.co.jp │ ├── www.usm.my │ ├── www.usnews.com │ ├── www.usopen.com │ ├── www.usps.com │ ├── www.uspto.gov │ ├── www.ussoccer.com │ ├── www.ust.hk │ ├── www.usta.com │ ├── www.usu.edu │ ├── www.uswitch.com │ ├── www.ut.ac.id │ ├── www.ut.ac.ir │ ├── www.uta.edu │ ├── www.utah.gov │ ├── www.utahrealestate.com │ ├── www.utar.edu.my │ ├── www.utas.edu.au │ ├── www.utdallas.edu │ ├── www.utexas.edu │ ├── www.uthm.edu.my │ ├── www.utk.edu │ ├── www.utm.edu │ ├── www.utm.my │ ├── www.utoronto.ca │ ├── www.utorrent.com │ ├── www.uts.edu.au │ ├── www.utsa.edu │ ├── www.utsystem.edu │ ├── www.uu.se │ ├── www.uvic.ca │ ├── www.uvm.edu │ ├── www.uw.edu.pl │ ├── www.uwindsor.ca │ ├── www.uwo.ca │ ├── www.uzh.ch │ ├── www.va.gov │ ├── www.valueresearchonline.com │ ├── www.valvesoftware.com │ ├── www.vancouversun.com │ ├── www.vanguard.com │ ├── www.vanguardngr.com │ ├── www.vans.com │ ├── www.vbforums.com │ ├── www.vccs.edu │ ├── www.vcu.edu │ ├── www.vectorstock.com │ ├── www.veeam.com │ ├── www.vegasinsider.com │ ├── www.venus.com │ ├── www.veritas.com │ ├── www.verizon.com │ ├── www.verizonwireless.com │ ├── www.versace.com │ ├── www.verywellfamily.com │ ├── www.verywellfit.com │ ├── www.verywellhealth.com │ ├── www.verywellmind.com │ ├── www.vg247.com │ ├── www.viamichelin.com │ ├── www.viamichelin.fr │ ├── www.viarail.ca │ ├── www.viasat.com │ ├── www.viber.com │ ├── www.vice.com │ ├── www.victoriassecret.com │ ├── www.videohelp.com │ ├── www.videolan.org │ ├── www.vietcombank.com.vn │ ├── www.vietnamplus.vn │ ├── www.viewbug.com │ ├── www.virginaustralia.com │ ├── www.virginia.edu │ ├── www.virginmedia.com │ ├── www.virtualbox.org │ ├── www.visa.com │ ├── www.vistaprint.com │ ├── www.visualcv.com │ ├── www.vit.ac.in │ ├── www.vivastreet.co.uk │ ├── www.viz.com │ ├── www.vmware.com │ ├── www.voanews.com │ ├── www.voat.co │ ├── www.vocabulary.com │ ├── www.vodafone.co.uk │ ├── www.vodafone.com │ ├── www.vogue.com │ ├── www.volusion.com │ ├── www.volvocars.com │ ├── www.vonage.com │ ├── www.voyeurweb.com │ ├── www.vrbo.com │ ├── www.vsp.com │ ├── www.vst4free.com │ ├── www.vtu.ac.in │ ├── www.vu.edu.au │ ├── www.vu.edu.pk │ ├── www.vub.sk │ ├── www.vw.com │ ├── www.vwvortex.com │ ├── www.w3.org │ ├── www.wa.gov.au │ ├── www.wacom.com │ ├── www.wageworks.com │ ├── www.waitrose.com │ ├── www.waldenu.edu │ ├── www.walgreens.com │ ├── www.wampserver.com │ ├── www.wanderu.com │ ├── www.warbyparker.com │ ├── www.washingtonexaminer.com │ ├── www.washingtontimes.com │ ├── www.watchuseek.com │ ├── www.waves.com │ ├── www.wayfair.com │ ├── www.wbsedcl.in │ ├── www.wbur.org │ ├── www.wcvb.com │ ├── www.wd.com │ ├── www.weather.info_443 │ ├── www.weatherbug.com │ ├── www.weatherzone.com.au │ ├── www.webcams.travel │ ├── www.webcrawler.com │ ├── www.weber.com │ ├── www.webex.com │ ├── www.webfx.com │ ├── www.webindia123.com │ ├── www.webjet.com.au │ ├── www.webmail.co.za │ ├── www.webmd.com │ ├── www.webopedia.com │ ├── www.webpagetest.org │ ├── www.webstaurantstore.com │ ├── www.weebly.com │ ├── www.weforum.org │ ├── www.wegmans.com │ ├── www.weightwatchers.com │ ├── www.wellsfargo.com │ ├── www.wendys.com │ ├── www.westelm.com │ ├── www.westernunion.com │ ├── www.westjet.com │ ├── www.westmarine.com │ ├── www.westpac.co.nz │ ├── www.westpac.com.au │ ├── www.westword.com │ ├── www.wfaa.com │ ├── www.wgu.edu │ ├── www.whatismyip.com │ ├── www.whatsmydns.net │ ├── www.which.co.uk │ ├── www.whitehouse.gov │ ├── www.whitepages.com │ ├── www.who.int │ ├── www.wickes.co.uk │ ├── www.wien.gv.at │ ├── www.wiggle.co.uk │ ├── www.wikidot.com │ ├── www.wikihow.com │ ├── www.wikitree.com │ ├── www.wiley.com │ ├── www.wilko.com │ ├── www.williamhill.com │ ├── www.williamlong.info │ ├── www.williams-sonoma.com │ ├── www.wimp.com │ ├── www.wincalendar.com │ ├── www.wind.gr │ ├── www.windowscentral.com │ ├── www.winzip.com │ ├── www.wipo.int │ ├── www.wipro.com │ ├── www.wired.com │ ├── www.wireshark.org │ ├── www.wisc.edu │ ├── www.wisconsin.edu │ ├── www.wisecleaner.com │ ├── www.wisegeek.com │ ├── www.wizards.com │ ├── www.wlu.ca │ ├── www.wm.com │ ├── www.wm.edu │ ├── www.wmtransfer.com │ ├── www.wn.com │ ├── www.wnd.com │ ├── www.wolfram.com │ ├── www.wolframalpha.com │ ├── www.womansday.com │ ├── www.wonderhowto.com │ ├── www.wondershare.com │ ├── www.woodforest.com │ ├── www.wordplays.com │ ├── www.wordproject.org │ ├── www.wordreference.com │ ├── www.worldbank.org │ ├── www.worldcat.org │ ├── www.worldfirst.com │ ├── www.worldmarket.com │ ├── www.worldpay.com │ ├── www.worldscientific.com │ ├── www.worldsex.com │ ├── www.worldtimebuddy.com │ ├── www.wowhead.com │ ├── www.wowinterface.com │ ├── www.wpbeginner.com │ ├── www.wps.com │ ├── www.wral.com │ ├── www.wrestlinginc.com │ ├── www.wrike.com │ ├── www.wroclaw.pl │ ├── www.wsbtv.com │ ├── www.wsdot.wa.gov │ ├── www.wsj.com │ ├── www.wsop.com │ ├── www.wtatennis.com │ ├── www.wthr.com │ ├── www.wto.org │ ├── www.wu.ac.at │ ├── www.wufoo.com │ ├── www.wunderground.com │ ├── www.wwe.com │ ├── www.x-plane.org │ ├── www.x-rates.com │ ├── www.xbox.com │ ├── www.xda-developers.com │ ├── www.xe.com │ ├── www.xero.com │ ├── www.xerox.com │ ├── www.xfinity.com │ ├── www.xilinx.com │ ├── www.xing.com │ ├── www.xmind.net │ ├── www.xnxx.com │ ├── www.xoom.com │ ├── www.xsnews.nl │ ├── www.xvideos.com │ ├── www.y8.com │ ├── www.yachtworld.com │ ├── www.yahoo.com │ ├── www.yale.edu │ ├── www.yamaha.com │ ├── www.yell.com │ ├── www.yellowpages.com │ ├── www.yellowpages.com.au │ ├── www.yellowpages.com.eg │ ├── www.yelp.com │ ├── www.yesasia.com │ ├── www.yesbank.in │ ├── www.yeti.com │ ├── www.ynetnews.com │ ├── www.yogajournal.com │ ├── www.yonsei.ac.kr │ ├── www.yoox.com │ ├── www.yopmail.com │ ├── www.york.ac.uk │ ├── www.youporn.com │ ├── www.yourdictionary.com │ ├── www.yourtango.com │ ├── www.youtube.com │ ├── www.ypes.gr │ ├── www.yr.no │ ├── www.ysl.com │ ├── www.ysu.edu.cn │ ├── www.yugioh-card.com │ ├── www.yummly.com │ ├── www.zabbix.com │ ├── www.zacks.com │ ├── www.zamg.ac.at │ ├── www.zamzar.com │ ├── www.zawya.com │ ├── www.zdnet.com │ ├── www.zebra.com │ ├── www.zedo.com │ ├── www.zendesk.com │ ├── www.zenithbank.com │ ├── www.zennioptical.com │ ├── www.zerohedge.com │ ├── www.zhaopin.com │ ├── www.zillow.com │ ├── www.zip-codes.com │ ├── www.zipcar.com │ ├── www.zoho.com │ ├── www.zoominfo.com │ ├── www.zoopla.co.uk │ ├── www.zorpia.com │ ├── www.zotero.org │ ├── www.zougla.gr │ ├── www.zumiez.com │ ├── www1.udel.edu │ ├── xkcd.com │ ├── ycmou.digitaluniversity.ac │ ├── yoast.com │ ├── zeenews.india.com │ └── zone.msn.com ├── test_on_fetched_robotstxt.py ├── test_on_google_spec.py ├── test_protego.py ├── test_unquote.py └── top-10000-websites.txt └── tox.ini /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Contains commits to be ignored due to linting 2 | 3 | 9097889fe2820cd582f0ae873b1be48eae47f818 4 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "header, diff, tree" 3 | 4 | coverage: 5 | status: 6 | project: false 7 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_data/10minutemail.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | User-agent: * 5 | Allow: / 6 | 7 | Sitemap: https://10minutemail.com/sitemap.xml 8 | -------------------------------------------------------------------------------- /tests/test_data/4chan.org: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / 3 | 4 | User-agent: * 5 | Disallow: 6 | 7 | Sitemap: http://www.4chan.org/sitemap.xml 8 | -------------------------------------------------------------------------------- /tests/test_data/about.gitlab.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /resources/downloads/ 3 | Disallow: /analysts/includes/ 4 | Sitemap: https://about.gitlab.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/agendaweb.org: -------------------------------------------------------------------------------- 1 | # robots.txt 2 | User-agent:* 3 | Disallow: 4 | Allow: /*.css$ 5 | Allow: /*.js$ 6 | Sitemap: //www.agendaweb.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/akc.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /hidden_tag/ 4 | Disallow: /clubs-delegates/puppies/all-breeds/ 5 | Allow: /wp-admin/admin-ajax.php 6 | -------------------------------------------------------------------------------- /tests/test_data/akismet.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /login/ 3 | Disallow: /signup/ 4 | 5 | 6 | Sitemap: http://akismet.com/sitemap.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/alaska.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /images 3 | 4 | User-agent: PiplBot 5 | Disallow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/alison.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Disallow: /lms/scorm 4 | Crawl-delay: 5 5 | Sitemap: https://alison.com/sitemap.xml 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/appinventor.mit.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /ai2 4 | Disallow: /setup 5 | -------------------------------------------------------------------------------- /tests/test_data/archinect.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://archinect.com/sitemap.xml 2 | -------------------------------------------------------------------------------- /tests/test_data/asic.gov.au: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: /about-asic/media-centre/find-a-media-release/archive/ -------------------------------------------------------------------------------- /tests/test_data/auto-data.net: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.auto-data.net/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/avaaz.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/avaaz.org -------------------------------------------------------------------------------- /tests/test_data/baseball.fantasysports.yahoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /user/ -------------------------------------------------------------------------------- /tests/test_data/basecamp.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /demos/ 4 | Sitemap: https://basecamp.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/beforeitsnews.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /dashboard/ 4 | -------------------------------------------------------------------------------- /tests/test_data/berniesanders.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/betanews.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/bethesda.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /community/ 3 | Sitemap: https://bethesda.net/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/bigcharts.marketwatch.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /RealMedia/ -------------------------------------------------------------------------------- /tests/test_data/bitcoin.org: -------------------------------------------------------------------------------- 1 | 2 | Sitemap: https://bitcoin.org/sitemap.xml 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/bitcointalk.org: -------------------------------------------------------------------------------- 1 | Sitemap: https://bitcointalk.org/sitemap.php 2 | -------------------------------------------------------------------------------- /tests/test_data/bitly.com: -------------------------------------------------------------------------------- 1 | # Welcome to Bitly =) 2 | # robots welcome; 3 | # API documentation can be found at https://dev.bitly.com/ 4 | User-Agent: * 5 | Disallow: 6 | -------------------------------------------------------------------------------- /tests/test_data/blog.mozilla.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/boingboing.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin 3 | Disallow: /wp-content/themes/2012/ads/ 4 | Disallow: /wp-content/themes/2012/ad_* 5 | -------------------------------------------------------------------------------- /tests/test_data/boxrec.com: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 3 | 4 | User-agent: * 5 | Disallow: 6 | -------------------------------------------------------------------------------- /tests/test_data/breakingnewsenglish.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | User-agent: * 4 | Disallow: 5 | -------------------------------------------------------------------------------- /tests/test_data/brew.sh: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: 4 | Sitemap: https://brew.sh/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/brighthouse.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /structural/ 3 | Disallow: /content/residential/structural/ 4 | Disallow: /content/residential/ 5 | -------------------------------------------------------------------------------- /tests/test_data/bugmenot.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /vote.php 3 | Disallow: /removal.php -------------------------------------------------------------------------------- /tests/test_data/cad-comic.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/cafeastrology.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://cafeastrology.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/caniuse.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/caniuse.com -------------------------------------------------------------------------------- /tests/test_data/carousell.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /activity/ 3 | Disallow: /archive/ 4 | Disallow: /inbox/ 5 | Disallow: /join/ 6 | Disallow: /login/ -------------------------------------------------------------------------------- /tests/test_data/cdnjs.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://cdnjs.com/sitemap.xml 3 | Allow: * 4 | -------------------------------------------------------------------------------- /tests/test_data/celebitchy.com: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot-Image 2 | Disallow: / 3 | Disallow: /wp-content/uploads/ 4 | 5 | User-agent: Bingbot 6 | Disallow: /wp-content/uploads/ 7 | -------------------------------------------------------------------------------- /tests/test_data/clicky.com: -------------------------------------------------------------------------------- 1 | 2 | User-agent: * 3 | Disallow: /stats/ 4 | Disallow: /api/ 5 | -------------------------------------------------------------------------------- /tests/test_data/codeigniter.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/coinmarketcap.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/colorlib.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /wp-includes/ 4 | Disallow: /out/ 5 | Disallow: /wp/out/ 6 | -------------------------------------------------------------------------------- /tests/test_data/convertcase.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://convertcase.net/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/convertio.co: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://convertio.co/sitemap.xml 3 | Disallow: /tmp/ 4 | Disallow: /*/tmp/ -------------------------------------------------------------------------------- /tests/test_data/creativecommons.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wordpress 3 | -------------------------------------------------------------------------------- /tests/test_data/cvmkr.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/cvmkr.com -------------------------------------------------------------------------------- /tests/test_data/dailycaller.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 1 3 | 4 | Sitemap: https://dailycaller.com/sitemap 5 | -------------------------------------------------------------------------------- /tests/test_data/daz3d.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /newsletter/ 3 | Disallow: /admin/ 4 | Disallow: /forums/profile/ 5 | Disallow: /customer/ 6 | Disallow: /checkout/ 7 | -------------------------------------------------------------------------------- /tests/test_data/dd-wrt.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/designshack.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /preview/* 3 | Disallow: /out/ 4 | Crawl-delay: 10 5 | -------------------------------------------------------------------------------- /tests/test_data/dict.leo.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api/ 3 | 4 | User-agent: Mediapartners-Google 5 | Allow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/dilbert.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/discovermagazine.com: -------------------------------------------------------------------------------- 1 | Sitemap: sitemap.xml 2 | -------------------------------------------------------------------------------- /tests/test_data/drafthouse.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*/tickets/ 3 | Disallow: /s/* 4 | Disallow: /utils/* 5 | -------------------------------------------------------------------------------- /tests/test_data/drdo.gov.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.drdo.gov.in/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/dulfy.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/dyn.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /wp-includes/ 4 | Sitemap: https://dyn.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/elementor.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/emedicine.medscape.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /article/*_print 4 | Disallow: /druginfo* 5 | Disallow: /*dr-ln* -------------------------------------------------------------------------------- /tests/test_data/epguides.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /djk/family/ -------------------------------------------------------------------------------- /tests/test_data/eric.ed.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Host: eric.ed.gov 4 | Sitemap: https://files.eric.ed.gov/sitemaps/sitemap_index.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/evermotion.org_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /links 3 | Disallow: /libs/JavaScript/ 4 | Sitemap: https://evermotion.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/extremereach.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/fetlife.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /ads 3 | Disallow: /users 4 | Disallow: /posts 5 | Disallow: /groups 6 | Disallow: /explore 7 | Disallow: /home 8 | -------------------------------------------------------------------------------- /tests/test_data/ffmpeg.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/fileinfo.com: -------------------------------------------------------------------------------- 1 | # FileInfo.com robots.txt 2 | 3 | User-agent: * 4 | Disallow: /cgi-bin/ 5 | Disallow: /print/ 6 | 7 | User-agent: NinjaBot 8 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/filezilla-project.org: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /nightlies/ 3 | Disallow: /builds/ 4 | Disallow: /locales/ 5 | -------------------------------------------------------------------------------- /tests/test_data/filmschoolrejects.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/flatmates.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://flatmates.com.au/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/food.ndtv.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /recipes/email/ 4 | Disallow: /common/ 5 | Disallow: /default/ -------------------------------------------------------------------------------- /tests/test_data/football.fantasysports.yahoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /user/ -------------------------------------------------------------------------------- /tests/test_data/freemusicarchive.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /music/download 3 | Disallow: /music/listen 4 | Disallow: /search 5 | -------------------------------------------------------------------------------- /tests/test_data/freshdesk.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: */demo-thank-you 3 | Disallow: */thank-you -------------------------------------------------------------------------------- /tests/test_data/gameranx.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/gamesdonequick.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | Disallow: /tracker/donors/ 4 | Disallow: /tracker/donations/ 5 | -------------------------------------------------------------------------------- /tests/test_data/gawker.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /how-the-rich-get-into-ivies-behind-the-scenes-of-elite-1699066450 3 | -------------------------------------------------------------------------------- /tests/test_data/gearpatrol.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /75205299/ 3 | -------------------------------------------------------------------------------- /tests/test_data/geekologie.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/geoguessr.com: -------------------------------------------------------------------------------- 1 | User-Agent:* 2 | Disallow: /*/play 3 | Disallow: /results/ 4 | Disallow: /challenge/ 5 | Disallow: /me -------------------------------------------------------------------------------- /tests/test_data/geology.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | 4 | User-Agent: * 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/getbootstrap.com: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | 3 | # Allow crawling of all content 4 | User-agent: * 5 | Disallow: 6 | Sitemap: https://getbootstrap.com/sitemap.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/getfireshot.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/getfireshot.com -------------------------------------------------------------------------------- /tests/test_data/golang.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | -------------------------------------------------------------------------------- /tests/test_data/greyscalegorilla.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | Disallow: /wp-content/plugins/vimeography/lib/shared/assets/ 5 | -------------------------------------------------------------------------------- /tests/test_data/hentai2read.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin 3 | Disallow: /wp-login.php 4 | 5 | Crawl-delay: 180 6 | 7 | Sitemap: https://hentai2read.com/sitemap -------------------------------------------------------------------------------- /tests/test_data/heroku.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.heroku.com/sitemap.xml 2 | User-agent: * 3 | Disallow: */search 4 | -------------------------------------------------------------------------------- /tests/test_data/hiphopdx.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://hiphopdx.com/sitemap.xml 5 | Sitemap: https://hiphopdx.com/sitemap_news.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/hotcopper.com.au: -------------------------------------------------------------------------------- 1 | Sitemap: https://hotcopper.com.au/sitemap/sitemap.xml.gz 2 | 3 | User-agent: * 4 | Disallow: 5 | -------------------------------------------------------------------------------- /tests/test_data/httpd.apache.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /websrc 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/icai.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/ideas.repec.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | User-agent: Twitterbot 4 | Disallow: 5 | -------------------------------------------------------------------------------- /tests/test_data/illinoisstate.edu: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /search/ 3 | -------------------------------------------------------------------------------- /tests/test_data/in.news.yahoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://in.news.yahoo.com/sitemaps/news-sitemap_index_IN_en-IN.xml.gz 4 | -------------------------------------------------------------------------------- /tests/test_data/islamqa.info: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /jawal-zad/ 3 | Sitemap: https://islamqa.info/sitemap-index.xml -------------------------------------------------------------------------------- /tests/test_data/it.toolbox.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://it.toolbox.com/sitemap.xml 2 | 3 | User-agent: * 4 | Crawl-delay: 10 5 | Disallow: /4585/ -------------------------------------------------------------------------------- /tests/test_data/jang.com.pk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/javascript.info: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /node/ 3 | Disallow: /users/ 4 | Disallow: /content/ 5 | Disallow: /play/ 6 | Disallow: /profile/ 7 | -------------------------------------------------------------------------------- /tests/test_data/jquery.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/jqueryui.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/jsfiddle.net: -------------------------------------------------------------------------------- 1 | # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file 2 | -------------------------------------------------------------------------------- /tests/test_data/jsoneditoronline.org: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_data/jsonformatter.curiousconcept.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/kb.iu.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/kb.iu.edu -------------------------------------------------------------------------------- /tests/test_data/kde.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/kde.org -------------------------------------------------------------------------------- /tests/test_data/kidshealth.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: * 3 | -------------------------------------------------------------------------------- /tests/test_data/king.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://king.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/kinsta.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/komonews.com: -------------------------------------------------------------------------------- 1 | user-agent: * 2 | allow: / -------------------------------------------------------------------------------- /tests/test_data/krakow.pl: -------------------------------------------------------------------------------- 1 | User-agent: InwegroBot 2 | Disallow: / 3 | 4 | User-Agent: * 5 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/kriesi.at: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://kriesi.at/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/krita.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://krita.org/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/laughingsquid.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/launchpad.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api/ 3 | Disallow: /+loggerhead/ 4 | Disallow: /*utouch* 5 | Disallow: /ubuntu/+source/utouch* 6 | -------------------------------------------------------------------------------- /tests/test_data/lerablog.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://lerablog.org/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/mailchimp.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search/*?* 3 | Disallow: /en/* 4 | 5 | Sitemap: https://mailchimp.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/matplotlib.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /examples/ # outdated location. Current examples are in /gallery/. 3 | -------------------------------------------------------------------------------- /tests/test_data/mediaget.com: -------------------------------------------------------------------------------- 1 | Host: mediaget.com 2 | Sitemap: http://mediaget.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/mfa.gov.ua: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/mfa.gov.ua -------------------------------------------------------------------------------- /tests/test_data/models.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search/ 3 | Disallow: /account/ 4 | -------------------------------------------------------------------------------- /tests/test_data/mom.me: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | sitemap: https://mom.me/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/movieweb.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /go/ 3 | Sitemap: https://movieweb.com/sitemap.xml 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/msu.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /honoredfaculty/directory/index.html 3 | Disallow: /honoredfaculty/directory/directory.html 4 | Disallow: /_files/ 5 | -------------------------------------------------------------------------------- /tests/test_data/muaban.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /chia-se/ 4 | Disallow: /trang-ca-nhan/ 5 | Sitemap: https://muaban.net/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/n4g.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://n4g.com/sitemaps/storyindex 4 | -------------------------------------------------------------------------------- /tests/test_data/nces.ed.gov: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /programs/coe/2005/section2/tableXLS.asp -------------------------------------------------------------------------------- /tests/test_data/netbeans.org: -------------------------------------------------------------------------------- 1 | Sitemap: https://netbeans.org/sitemap.xml.gz 2 | 3 | User-Agent: * 4 | Crawl-Delay: 5 5 | Disallow: /bugzilla/buglist.cgi* 6 | -------------------------------------------------------------------------------- /tests/test_data/news.am: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cache/ 3 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/news.sky.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Sitemap: https://news.sky.com/sitemap-index.xml -------------------------------------------------------------------------------- /tests/test_data/nginx.org: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /libxslt/ 3 | Sitemap: http://nginx.org/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/nodejs.org: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /dist/ 3 | Disallow: /docs/ 4 | Allow: /dist/latest/ 5 | Allow: /dist/latest/docs/api/ 6 | Allow: /api/ 7 | -------------------------------------------------------------------------------- /tests/test_data/ocw.mit.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/openvpn.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/paper.li: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /stop-mentions.html 3 | Disallow: /~/facebook-tab 4 | Sitemap: https://paper.li/~sitemaps/sitemap_index.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/paperpk.com: -------------------------------------------------------------------------------- 1 | # multiple brand selectors 2 | User-agent: * 3 | Allow: / 4 | Disallow: /cv/ 5 | Disallow: /admissions/blog/ 6 | Disallow: /shayari/ 7 | Disallow: /sms/ -------------------------------------------------------------------------------- /tests/test_data/parade.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | # Sitemap archive 5 | Sitemap: https://parade.com/sitemap.xml 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/paultan.org: -------------------------------------------------------------------------------- 1 | # BEGIN XML-SITEMAP-PLUGIN 2 | Sitemap: http://paultan.org/sitemap_index.xml 3 | # END XML-SITEMAP-PLUGIN 4 | -------------------------------------------------------------------------------- /tests/test_data/petapixel.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin 3 | Disallow: /wp-login.php 4 | 5 | Sitemap: https://petapixel.com/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/phet.colorado.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*/unsubscribe 3 | Disallow: /services/metadata/ 4 | Disallow: /*/services/metadata/documentation 5 | Disallow: /archive/ -------------------------------------------------------------------------------- /tests/test_data/photobucket.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | User-agent: rogerbot 5 | Allow: / 6 | Crawl-delay: 7 7 | -------------------------------------------------------------------------------- /tests/test_data/puchd.ac.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /includes/ 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/punchng.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://punchng.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/puzzles.usatoday.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | 4 | Sitemap: http://puzzles.usatoday.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/pydata.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/reason.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /botsn 3 | Disallow: /botsv 4 | Disallow: /admin/ 5 | Disallow: /wp-admin/ 6 | Sitemap: https://reason.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/reference.wolfram.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /searchsystem.html 3 | Disallow: /legacy/ 4 | Disallow: /cloudplatform/ -------------------------------------------------------------------------------- /tests/test_data/regex101.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /r/ 3 | 4 | User-agent: * 5 | Allow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/rubyonrails.org: -------------------------------------------------------------------------------- 1 | Sitemap: https://rubyonrails.org/sitemap.xml 2 | -------------------------------------------------------------------------------- /tests/test_data/runrepeat.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/screenrant.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | Sitemap: https://screenrant.com/sitemap.xml 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/scssoft.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /404 3 | -------------------------------------------------------------------------------- /tests/test_data/search.aol.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /bin 4 | Disallow: /language 5 | Disallow: /yhs 6 | Disallow: /aol 7 | -------------------------------------------------------------------------------- /tests/test_data/search.yahoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /bin 4 | Disallow: /language 5 | Disallow: /yhs 6 | Disallow: /aol 7 | -------------------------------------------------------------------------------- /tests/test_data/smallpdf.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /uploads/ 3 | Disallow: /download/ 4 | Disallow: /processed/ 5 | -------------------------------------------------------------------------------- /tests/test_data/songmeanings.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /query/ 3 | Sitemap: https://songmeanings.com/sitemap_index.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/soundcloud.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://a-v2.sndcdn.com/sitemap.txt 4 | -------------------------------------------------------------------------------- /tests/test_data/store.usps.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/store.usps.com -------------------------------------------------------------------------------- /tests/test_data/swappa.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | 4 | User-agent: 008 5 | Disallow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/talkingpointsmemo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://talkingpointsmemo.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/tatarstan.ru: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin 3 | Disallow: /press_mobile -------------------------------------------------------------------------------- /tests/test_data/techresources.oecd.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/techresources.oecd.org -------------------------------------------------------------------------------- /tests/test_data/thehun.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/themeisle.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/theweek.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Sitemap: /sitemap.xml 3 | Disallow: /mediakit 4 | Disallow: /*?*xhr= 5 | Disallow: /modules/* 6 | Sitemap: /sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/thewest.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://thewest.com.au/sitemap.xml 4 | Sitemap: https://thewest.com.au/news-sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/tinyurl.com: -------------------------------------------------------------------------------- 1 | user-agent: AhrefsBot 2 | disallow: / -------------------------------------------------------------------------------- /tests/test_data/tomcat.apache.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://tomcat.apache.org/sitemap.xml 3 | -------------------------------------------------------------------------------- /tests/test_data/townhall.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://townhall.com/sitemaps/sitemapindex-townhall.xml -------------------------------------------------------------------------------- /tests/test_data/trophymanager.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/tvtropes.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*?*action=source* 3 | -------------------------------------------------------------------------------- /tests/test_data/ub.ac.id: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | User-agent: * 9 | Disallow: /wp-admin/ 10 | Allow: /wp-admin/admin-ajax.php 11 | -------------------------------------------------------------------------------- /tests/test_data/ubuntu.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /search 3 | Disallow: /search* 4 | Sitemap: https://ubuntu.com/static/files/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/ubuntuforums.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | -------------------------------------------------------------------------------- /tests/test_data/uci.edu: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # http://code.google.com/web/controlcrawlindex/ 3 | 4 | User-agent: * 5 | Disallow: 6 | Sitemap: https://uci.edu/sitemap.php -------------------------------------------------------------------------------- /tests/test_data/uk.news.yahoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://uk.news.yahoo.com/sitemaps/news-sitemap_index_GB_en-GB.xml.gz 4 | -------------------------------------------------------------------------------- /tests/test_data/undip.ac.id: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/untappd.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/up.pt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /umib/ 3 | Disallow: /notivent/ 4 | 5 | Crawl-delay: 2 6 | -------------------------------------------------------------------------------- /tests/test_data/uploaded.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/uploaded.net -------------------------------------------------------------------------------- /tests/test_data/wall.alphacoders.com: -------------------------------------------------------------------------------- 1 | User-agent: Yandex 2 | Disallow: / 3 | 4 | User-agent: * 5 | Disallow: /crop.php 6 | Disallow: /by_color.php -------------------------------------------------------------------------------- /tests/test_data/wayne.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /manager/ 4 | Disallow: /cache/ 5 | -------------------------------------------------------------------------------- /tests/test_data/weather.gc.ca: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /scripts/ 4 | Disallow: /build/ 5 | Disallow: /template/ 6 | Disallow: /search/ 7 | -------------------------------------------------------------------------------- /tests/test_data/welcome.miami.edu: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /_assets/php/ 3 | Disallow: /_assets/apache-error-pages/ 4 | 5 | Sitemap: http://welcome.miami.edu/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/whatculture.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/whentowork.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /cgi-bin/ 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/wikitravel.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | User-agent: Mediapartners-Google 5 | Allow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/wlos.com: -------------------------------------------------------------------------------- 1 | user-agent: * 2 | allow: / -------------------------------------------------------------------------------- /tests/test_data/wordpress.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /support/rss 4 | Disallow: /archive/ 5 | Disallow: /?filters 6 | -------------------------------------------------------------------------------- /tests/test_data/worldofsolitaire.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://worldofsolitaire.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/writingexplained.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | -------------------------------------------------------------------------------- /tests/test_data/wsu.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/wustl.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.365online.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.365online.com -------------------------------------------------------------------------------- /tests/test_data/www.4icu.org: -------------------------------------------------------------------------------- 1 | User-agent: SemrushBot 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.4shared.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /rest/ 3 | Sitemap: https://www.4shared.com/web/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.8notes.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.8notes.com -------------------------------------------------------------------------------- /tests/test_data/www.aastocks.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.aastocks.com/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.abb.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 4 3 | Disallow: /*preferences=show* 4 | Disallow: /User/Login.ashx 5 | Disallow: /User/Login.aspx 6 | -------------------------------------------------------------------------------- /tests/test_data/www.aconvert.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /samples/ 3 | Disallow: /convert/ -------------------------------------------------------------------------------- /tests/test_data/www.act.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /content/dam/act/secured/ -------------------------------------------------------------------------------- /tests/test_data/www.adam4adam.com: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 3 | 4 | User-agent: * 5 | -------------------------------------------------------------------------------- /tests/test_data/www.addgene.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /emta/ 3 | Disallow: /emta-addgene-public/ 4 | Disallow: /users/login/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.afr.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /search.html?* 4 | Disallow: /afr-newsletter-btb 5 | Disallow: /afr-newsletter-mw5 6 | Disallow: /smi-newsletter -------------------------------------------------------------------------------- /tests/test_data/www.airbaltic.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*?language=* 3 | 4 | Sitemap: https://www.airbaltic.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.airdroid.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow:/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.airliners.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /photo-details-carousel 4 | 5 | Sitemap: https://www.airliners.net/sitemap/index.xml -------------------------------------------------------------------------------- /tests/test_data/www.aleks.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /alekscgi/ 3 | Disallow: /webform/ 4 | Disallow: /donotscan/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.alternet.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-login.php 3 | Disallow: /wp-admin/ 4 | Crawl-delay: 600 -------------------------------------------------------------------------------- /tests/test_data/www.americanthinker.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /article/ 3 | Allow: /articles/2013/08/ 4 | Allow: /articles/2013/07/ 5 | Allow: /articles/2013/06/ -------------------------------------------------------------------------------- /tests/test_data/www.ametsoc.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-Delay: 5 3 | Disallow: /admin/ 4 | Disallow: /tasks/ 5 | Disallow: /requirements/ 6 | Disallow: /config/ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.amity.edu: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | 3 | Disallow: 4 | -------------------------------------------------------------------------------- /tests/test_data/www.amtrak.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.amtrak.com/sitemap.xml 2 | User-agent: * 3 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.ana.co.jp: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /amc-elite/ 3 | Disallow: /amc_e/ 4 | Disallow: /be/ 5 | 6 | Sitemap: http://www.ana.co.jp/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.androidauthority.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.andyroid.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.anime-expo.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ankara.edu.tr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.anonym.to: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /no/ 3 | 4 | User-agent: ia_archiver 5 | Disallow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/www.apache.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /websrc 3 | Crawl-Delay: 4 4 | -------------------------------------------------------------------------------- /tests/test_data/www.arkadium.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /team_member/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.asda.com: -------------------------------------------------------------------------------- 1 | # robots.txt for http://www.asda.com/ 2 | Sitemap: https://www.asda.com/sitemap.xml 3 | User-agent: * 4 | Disallow:/emails/* 5 | -------------------------------------------------------------------------------- /tests/test_data/www.asm.org: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.asrock.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /events/ 3 | Disallow: /*.xls$ -------------------------------------------------------------------------------- /tests/test_data/www.associatedbank.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /Unsub* 3 | -------------------------------------------------------------------------------- /tests/test_data/www.asstr.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /fassmpl/ 4 | Disallow: /~Killerwhale_Zeus/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.astrology-zodiac-signs.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://www.astrology-zodiac-signs.com/sitemap.xml 3 | -------------------------------------------------------------------------------- /tests/test_data/www.astrology.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /us/Backoffice 4 | 5 | # Sitemap files 6 | Sitemap: https://www.astrology.com/sitemap.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/www.astrosage.com: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Allow:* -------------------------------------------------------------------------------- /tests/test_data/www.asx.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | sitemap: http://www.asx.com.au/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.auburn.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /student_info/omega_phi_alpha/images/members 3 | Disallow: /academic/rotc/afrotc/CurrentCadets/ -------------------------------------------------------------------------------- /tests/test_data/www.audiobooks.com: -------------------------------------------------------------------------------- 1 | User-agent: 008 2 | Disallow: / 3 | 4 | User-agent:* 5 | Disallow: 6 | 7 | Sitemap: https://www.audiobooks.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.autosport.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /roc-live/ 3 | User-agent: AhrefsBot 4 | Disallow: / 5 | User-agent: MJ12bot 6 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.avast.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Disallow: /search-results* 4 | Sitemap: https://www.avast.com/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.avature.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.avature.net -------------------------------------------------------------------------------- /tests/test_data/www.avis.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.babylon.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /forms 3 | Disallow: /style 4 | Disallow: /templates 5 | 6 | Disallow: /*.BGL$ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.bakuelectronics.az: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.balenciaga.com: -------------------------------------------------------------------------------- 1 | # Disallow tricombot. 2 | User-agent: Tricombot 3 | Disallow: / 4 | 5 | User-agent: * 6 | Disallow: /yTos/ 7 | Disallow: /teaser.asp* -------------------------------------------------------------------------------- /tests/test_data/www.bankofscotland.co.uk: -------------------------------------------------------------------------------- 1 | # v 1.1 2 | # www.bankofscotland.co.uk 3 | User-agent: * 4 | Disallow: 5 | Sitemap: https://www.bankofscotland.co.uk/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.bankofthewest.com: -------------------------------------------------------------------------------- 1 | User-agent: * # applies to all robots 2 | Disallow: 3 | Sitemap: https://www.bankofthewest.com/sitemap_index.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.barbie.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: * -------------------------------------------------------------------------------- /tests/test_data/www.barchart.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /proxies/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.base64decode.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.base64decode.org/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.bcci.tv: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin -------------------------------------------------------------------------------- /tests/test_data/www.beeradvocate.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.behance.net: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.behance.net/sitemap 2 | -------------------------------------------------------------------------------- /tests/test_data/www.behindthename.com: -------------------------------------------------------------------------------- 1 | User-agent: * Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.behindthevoiceactors.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.behindthevoiceactors.com -------------------------------------------------------------------------------- /tests/test_data/www.benq.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: * 3 | Sitemap: https://www.benq.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.berkeley.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /directory/ 3 | Crawl-delay: 120 4 | -------------------------------------------------------------------------------- /tests/test_data/www.bg.ac.rs: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /test/ 4 | Sitemap: http://www.bg.ac.rs/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.bgsu.edu: -------------------------------------------------------------------------------- 1 | # test robots.txt -------------------------------------------------------------------------------- /tests/test_data/www.bharatmatrimony.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin/ 3 | 4 | User-agent: Yandex 5 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.biblestudytools.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: http://sitemaps.salemweb.net/sitemaps/sitemap-biblestudytools-14.xml -------------------------------------------------------------------------------- /tests/test_data/www.bikeforums.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.bim.com.tr: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.binsearch.info: -------------------------------------------------------------------------------- 1 | User-agent: SemrushBot 2 | Disallow: / 3 | 4 | User-agent: SemrushBot-SA 5 | Disallow: / 6 | 7 | User-agent: ia_archiver 8 | Disallow: / 9 | -------------------------------------------------------------------------------- /tests/test_data/www.bitcoin.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin/ -------------------------------------------------------------------------------- /tests/test_data/www.bitcomet.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | Disallow: /client/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.bjtu.edu.cn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.bjtu.edu.cn -------------------------------------------------------------------------------- /tests/test_data/www.blastingnews.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.blastingnews.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.blender.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.blender.org/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.blizzard.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.blogto.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /bookmarks/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.bluedart.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.bmfbovespa.com.br: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.bmwusa.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api 3 | Sitemap: https://www.bmwusa.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.bollywoodhungama.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /?s 4 | Sitemap: https://www.bollywoodhungama.com/sitemap_index.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.boston.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.broadway.com: -------------------------------------------------------------------------------- 1 | 2 | User-agent: * 3 | Disallow: /admin 4 | 5 | Allow: / 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.bukkit.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | #Disallow: / 3 | 4 | User-agent: Mediapartners-Google* 5 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.bvg.de: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.ca.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cac.gov.ng: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.cac.gov.ng -------------------------------------------------------------------------------- /tests/test_data/www.callofduty.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.cambridgeenglish.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: http://www.cambridgeenglish.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.cancer.gov: -------------------------------------------------------------------------------- 1 | sitemap: https://www.cancer.gov/sitemap.xml 2 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.canon.com: -------------------------------------------------------------------------------- 1 | # robots.txt for http://www.canon.com/ 2 | 3 | User-Agent: * 4 | Disallow: /js/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.capcom.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.capitalone.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /assets/bank/media/beneficiary-form.pdf 3 | 4 | sitemap: https://www.capitalone.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.caracol.com.co: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.cardkingdom.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.cardkingdom.com/sitemap-index.xml 2 | User-agent: * 3 | Disallow: /cart/add 4 | -------------------------------------------------------------------------------- /tests/test_data/www.carleton.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ -------------------------------------------------------------------------------- /tests/test_data/www.carnoc.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.carscoops.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.cbr.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | Sitemap: https://www.cbr.com/sitemap.xml 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.cccs.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cerner.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /page.aspx? 3 | -------------------------------------------------------------------------------- /tests/test_data/www.cgg.gov.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /master/ 3 | Allow: /master/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.channelstv.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 30 3 | Disallow: /wp-admin/ 4 | Disallow: /wp-includes/ 5 | Disallow: /channelsAPI/tvplayer/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.chat-avenue.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /chatrooms/ 3 | Disallow: /~chatave/forums/ 4 | Sitemap: https://www.chat-avenue.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.chessbomb.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.chessbomb.com/sitemap_index.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.china-airlines.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: 4 | 5 | SiteMap: https://www.china-airlines.com/us/en/sitemap.xml 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.chinadaily.com.cn: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /player/ 3 | Crawl-delay:30 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.chipotle.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.chipotle.com/sitemap.us.en.xml -------------------------------------------------------------------------------- /tests/test_data/www.chromium.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /feeds 3 | Allow: /_/rsrc/ 4 | Disallow: /_/ 5 | Sitemap: http://www.chromium.org:80/system/feeds/sitemap 6 | -------------------------------------------------------------------------------- /tests/test_data/www.chula.ac.th: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cibeg.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: http://www.cibeg.com -------------------------------------------------------------------------------- /tests/test_data/www.cision.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /us/?s=* 3 | 4 | Sitemap: https://www.cision.com/sitemap_index.xml 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.citehr.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.cityu.edu.hk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /tto/ 3 | Disallow: /cityualbum/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.civfanatics.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.classlink.com_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /?blackhole 3 | -------------------------------------------------------------------------------- /tests/test_data/www.cleancss.com: -------------------------------------------------------------------------------- 1 | User-agent: Yandex 2 | Crawl-delay: 5 # sets a 2 second time-out 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.clemson.edu: -------------------------------------------------------------------------------- 1 | User-agent: 80legs 2 | Disallow: / 3 | 4 | User-agent: 008 5 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.clickbank.com: -------------------------------------------------------------------------------- 1 | User-agent: : * 2 | Disallow: /Platinum-Summit-2018/ 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.clublexus.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cma-cgm.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api/* 3 | Disallow: /health-monitoring 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cmu.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cmufront/ 3 | Disallow: /myandrew 4 | Disallow: /homepage/health/2009/fall/nobel-prize.shtml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.coles.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /catalogues/* 3 | -------------------------------------------------------------------------------- /tests/test_data/www.collegenet.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://corp.collegenet.com/sitemap.xml 2 | 3 | -------------------------------------------------------------------------------- /tests/test_data/www.colostate.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.companieshouse.gov.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: http://www.companieshouse.gov.uk/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.concordia.ca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.concordia.ca -------------------------------------------------------------------------------- /tests/test_data/www.conduit.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.confirmit.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | # CMS 3 | Disallow: /admin/ 4 | 5 | SITEMAP: https://www.confirmit.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.congress.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 2 3 | User-Agent: Googlebot 4 | Disallow: /search/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.convertfiles.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /files/ 3 | Disallow: /blog/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.coolermaster.com_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.coolermater.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.copaair.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.copart.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap:https://www.copart.com/sitemap-index.xml -------------------------------------------------------------------------------- /tests/test_data/www.copyright.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.corel.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.corel.com/href.lang.sitemap.xml 2 | 3 | User-agent: * 4 | Allow: / 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.cplusplus.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /faq/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.cratejoy.com: -------------------------------------------------------------------------------- 1 | # 2 | # robots.txt 3 | # 4 | user-agent: * 5 | 6 | disallow: /admin/ 7 | disallow: /api/ 8 | disallow: /*?*sort_by= -------------------------------------------------------------------------------- /tests/test_data/www.creativecow.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.creditonebank.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | crawl-delay: 1 4 | Sitemap: https://www.creditonebank.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.crestron.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.crestron.com -------------------------------------------------------------------------------- /tests/test_data/www.crucial.com: -------------------------------------------------------------------------------- 1 | #US 2 | Sitemap: https://www.crucial.com/sitemap_10151.xml 3 | User-agent: * 4 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.cuni.cz: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.cuny.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.curtin.edu.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.cv-library.co.uk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.cv-library.co.uk -------------------------------------------------------------------------------- /tests/test_data/www.cybersource.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /developers/integration_methods/rest_api/ 3 | Sitemap: http://www.cybersource.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.dailynayadiganta.com: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Allow:/ -------------------------------------------------------------------------------- /tests/test_data/www.daimler.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /index.html 3 | Disallow: /overview-nav/ 4 | Disallow: /canvasnav/ 5 | Disallow: /system/ics/ -------------------------------------------------------------------------------- /tests/test_data/www.dal.ca: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: */search.html?*q=*$ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.datasciencecentral.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.datasciencecentral.com/sitemap.xml 2 | User-agent: ia_archiver 3 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.definitions.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /myvocabulary.php 3 | 4 | User-agent: msnbot 5 | Crawl-delay: 1 -------------------------------------------------------------------------------- /tests/test_data/www.deped.gov.ph: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.desiringgod.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /donate/create 3 | -------------------------------------------------------------------------------- /tests/test_data/www.desmos.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /status 3 | Disallow: /drive_api/* -------------------------------------------------------------------------------- /tests/test_data/www.devexpress.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /MyAccount/ 3 | Disallow: /App_Data/ 4 | SiteMap: https://www.devexpress.com/SiteMapGen.ashx -------------------------------------------------------------------------------- /tests/test_data/www.dezeen.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.di.fm: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /investors/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.dialog.lk: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.dialog.lk/siteindexDialog.xml -------------------------------------------------------------------------------- /tests/test_data/www.digitalriver.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /wp-includes/ 4 | 5 | 6 | Sitemap: https://www.digitalriver.com/sitemap.xml 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.disney.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.disney.com -------------------------------------------------------------------------------- /tests/test_data/www.diyphotography.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Disallow: *&preview_nonce=* 6 | -------------------------------------------------------------------------------- /tests/test_data/www.djangoproject.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin -------------------------------------------------------------------------------- /tests/test_data/www.dll-files.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://www.dll-files.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.dmdc.osd.mil: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: /identitymanagement/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.dofus.com: -------------------------------------------------------------------------------- 1 | # Allow all 2 | User-agent: * 3 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.dogpile.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /$ 3 | Disallow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.dominos.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.dominos.com.au/sitemap.aspx -------------------------------------------------------------------------------- /tests/test_data/www.dowjones.com: -------------------------------------------------------------------------------- 1 | user-agent: * 2 | Sitemap: https://www.dowjones.com/sitemap_index.xml 3 | Disallow: /logos 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.downloadhelper.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.dpm.org.cn: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Disallow:/admin 3 | Disallow:/Public 4 | Disallow:/vr/ -------------------------------------------------------------------------------- /tests/test_data/www.drexel.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Disallow: /search -------------------------------------------------------------------------------- /tests/test_data/www.driveridentifier.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.dteenergy.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /eesg/ 4 | Disallow: /images/eesg/ -------------------------------------------------------------------------------- /tests/test_data/www.dwell.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.dwell.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.dx.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.dx.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.earthcam.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /clients/common/ 3 | Disallow: /search/ 4 | Disallow: /swf/ 5 | 6 | User-agent: revivebot 7 | Disallow: / 8 | 9 | -------------------------------------------------------------------------------- /tests/test_data/www.earthlink.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.easports.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Sitemap: https://www.easports.com/sitemap-index.xml 3 | Disallow: /services/ 4 | Disallow: /search -------------------------------------------------------------------------------- /tests/test_data/www.easybib.com: -------------------------------------------------------------------------------- 1 | # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file 2 | -------------------------------------------------------------------------------- /tests/test_data/www.eaton.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: */SearchResults/* 3 | Disallow: *.facets* 4 | Disallow: *.sort* 5 | Crawl-delay: 20 6 | -------------------------------------------------------------------------------- /tests/test_data/www.edaboard.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /css.php -------------------------------------------------------------------------------- /tests/test_data/www.edb.gov.hk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.edrawsoft.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /flow/ 3 | Disallow: /order/ 4 | Sitemap: https://www.edrawsoft.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.edutopia.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /user/* 3 | Disallow: /search? 4 | Disallow: /search$ 5 | Noindex: /user/* 6 | Noindex: /search? 7 | Noindex: /search$ -------------------------------------------------------------------------------- /tests/test_data/www.elderscrollsonline.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.eliteprospects.com: -------------------------------------------------------------------------------- 1 | Disallow: /intra7net/ 2 | Disallow: /intrateam/ 3 | Crawl-delay: 30 4 | Request-rate: 1/30 5 | -------------------------------------------------------------------------------- /tests/test_data/www.elkhabar.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.elte.hu: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 3 | 4 | User-agent: * 5 | -------------------------------------------------------------------------------- /tests/test_data/www.emuparadise.me: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Sitemap: http://www.emuparadise.me/sitemap.php 3 | Allow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.endnote.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.enworld.org: -------------------------------------------------------------------------------- 1 | User-agent: Yahoo! Slurp 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.epfl.ch: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-login.php 3 | Disallow: /wp-admin/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.epicgames.com: -------------------------------------------------------------------------------- 1 | 2 | User-agent: * 3 | Disallow: /account 4 | Disallow: /exchange 5 | Disallow: /login 6 | Disallow: /logout 7 | -------------------------------------------------------------------------------- /tests/test_data/www.epsilon.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.epsilon.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.equifaxworkforce.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.equifaxworkforce.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.equityapartments.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /roommates/profile 3 | Disallow: /roommates/matches -------------------------------------------------------------------------------- /tests/test_data/www.err.ee: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.err.ee/sitemap -------------------------------------------------------------------------------- /tests/test_data/www.essence.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ethiopianairlines.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.ethiopianairlines.com -------------------------------------------------------------------------------- /tests/test_data/www.eurogamer.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /forum_moderation.php 3 | Disallow: /shopto.php 4 | Disallow: /m/ 5 | Disallow: /profiles/ 6 | Disallow: /showcase/ -------------------------------------------------------------------------------- /tests/test_data/www.europarl.europa.eu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.europarl.europa.eu -------------------------------------------------------------------------------- /tests/test_data/www.eveonline.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.examenglish.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.excel-easy.com: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.exlibrisgroup.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.expatica.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ezvid.com: -------------------------------------------------------------------------------- 1 | # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file 2 | -------------------------------------------------------------------------------- /tests/test_data/www.fakku.net: -------------------------------------------------------------------------------- 1 | Host: https://www.fakku.net 2 | Sitemap: https://www.fakku.net/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.famousbirthdays.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.famousbirthdays.com/fb-sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.federalbank.co.in: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.federalbank.co.in/sitemap.xml?groupId=10180&privateLayout=false 4 | -------------------------------------------------------------------------------- /tests/test_data/www.feedly.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /k/ 3 | Disallow: /e/ 4 | Disallow: /v3/ 5 | Disallow: /i/entry/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.femjoy.com: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # http://code.google.com/web/controlcrawlindex/ 3 | 4 | User-agent: * 5 | -------------------------------------------------------------------------------- /tests/test_data/www.ffonts.net: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.fido.ca: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | http://www.fido.ca/sitemap.xml 4 | http://www.fido.ca/product-sitemap.xml 5 | http://www.fido.ca/support-sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.filgoal.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.filgoal.com/sitemaps -------------------------------------------------------------------------------- /tests/test_data/www.filmaffinity.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*?FASID 3 | Disallow: /*&FASID 4 | Disallow: /*/sharerating 5 | Disallow: /flash/rats.swf 6 | -------------------------------------------------------------------------------- /tests/test_data/www.financialexpress.com: -------------------------------------------------------------------------------- 1 | Bye -------------------------------------------------------------------------------- /tests/test_data/www.findlaw.com: -------------------------------------------------------------------------------- 1 | # Findlaw robots.txt file 2 | 3 | User-Agent: * 4 | Disallow: 5 | Sitemap: https://www.findlaw.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.firstbanknigeria.com_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.firstdata.com: -------------------------------------------------------------------------------- 1 | User-agent: AhrefsBot 2 | Disallow: / 3 | 4 | User-agent: * 5 | Disallow: /en_us/lp/offer.html 6 | -------------------------------------------------------------------------------- /tests/test_data/www.fisglobal.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.fiu.edu: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.fiu.edu/_assets/sitemap.xml 2 | -------------------------------------------------------------------------------- /tests/test_data/www.flamingtext.com: -------------------------------------------------------------------------------- 1 | # robots.txt 2 | #User-agent: * 3 | #Disallow: /net-fu/jobs/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.flexjobs.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /admin/ 3 | Disallow: /Admin/ 4 | Disallow: /ADMIN/ 5 | Disallow: /*.asmx 6 | -------------------------------------------------------------------------------- /tests/test_data/www.flhsmv.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow:/frip/*/ 3 | Disallow:/robots.txt -------------------------------------------------------------------------------- /tests/test_data/www.flynas.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin 3 | Disallow: /umbraco 4 | Disallow: /en/aramcoltp 5 | Disallow: /ar/aramcoltp -------------------------------------------------------------------------------- /tests/test_data/www.flysat.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.football365.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.foreca.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.foreca.fi/sitemap-forecafi-2019-04-17.xml 2 | User-agent: * 3 | Allow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.forexfactory.com: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.formula1.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.formula1.com/content/fom-website/en.sitemap-index.xml 2 | User-Agent: * 3 | Disallow: 4 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.freepdfconvert.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /d/ 3 | Disallow: /result/ 4 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.freerepublic.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /perl/ 3 | Disallow: /search/ 4 | Disallow: /focus/f-news/search 5 | Disallow: /focus/keywords 6 | -------------------------------------------------------------------------------- /tests/test_data/www.ftb.ca.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /forms/archive 3 | Disallow: /tax-pros/ask-a-legal-expert.asp -------------------------------------------------------------------------------- /tests/test_data/www.fullsail.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.fullsail.edu -------------------------------------------------------------------------------- /tests/test_data/www.gamedesign.jp: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google* 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.gearboxsoftware.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.gizmochina.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.gizmochina.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.gks.ru: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | -------------------------------------------------------------------------------- /tests/test_data/www.glastonburyfestivals.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.globaltimes.cn: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://www.globaltimes.cn/newssitemap.xml 3 | Allow: / 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.globe.com.ph: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /globeone 4 | Disallow: /community 5 | Disallow: /shop 6 | 7 | Sitemap: https://www.globe.com.ph/sitemap.xml 8 | -------------------------------------------------------------------------------- /tests/test_data/www.gmx.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /mc/ -------------------------------------------------------------------------------- /tests/test_data/www.gnome.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.gnome.org/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.goabroad.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /api/ 4 | Disallow: /embassy/ 5 | Disallow: /blog/*? 6 | Disallow: /blog/wp-admin/ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.goair.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.goair.in/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.golf.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.golfnow.com: -------------------------------------------------------------------------------- 1 | # robots.txt for http://www.golfnow.com 2 | 3 | User-agent: * 4 | allow: / -------------------------------------------------------------------------------- /tests/test_data/www.gosugamers.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /dev-tools/ 3 | Disallow: /crew/ 4 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.gotquestions.org: -------------------------------------------------------------------------------- 1 | #robots.txt for gotquestions.org 2 | 3 | Sitemap: https://www.gotquestions.org/sitemapindex.xml -------------------------------------------------------------------------------- /tests/test_data/www.gov.uz: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.gq-magazine.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.gq-magazine.co.uk/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.graphpad.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /autoupdates/ 3 | Disallow: /ecommerce/ 4 | Disallow: /guides/prism/5/ 5 | Disallow: /guides/prism/6/ 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.greatandhra.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.gtaforums.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.gva.es: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.gva61.es/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.hangseng.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.hangseng.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.head-fi.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.head-fi.org -------------------------------------------------------------------------------- /tests/test_data/www.health.nsw.gov.au: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Disallow: /healthreform/2012/Pages/resources.aspx 4 | Disallow: /survey2016 5 | -------------------------------------------------------------------------------- /tests/test_data/www.healthstream.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /phase2/ 3 | Disallow: /*login-test? 4 | Disallow: /content/ -------------------------------------------------------------------------------- /tests/test_data/www.hentai-foundry.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.hentai-foundry.com -------------------------------------------------------------------------------- /tests/test_data/www.hidemyass.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.hidemyass.com/sitemap_index.xml 3 | 4 | Disallow: /legacy/ 5 | Disallow: /old/ 6 | Disallow: /vpn-config/ -------------------------------------------------------------------------------- /tests/test_data/www.hightail.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.hitachi.co.jp: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /media 3 | -------------------------------------------------------------------------------- /tests/test_data/www.hkbu.edu.hk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /uis/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.hm.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.honda.com: -------------------------------------------------------------------------------- 1 | #Blank robots.txt -------------------------------------------------------------------------------- /tests/test_data/www.honeywell.com: -------------------------------------------------------------------------------- 1 | Sitemap:https://www.honeywell.com/sitemap.xml 2 | User-agent: * 3 | -------------------------------------------------------------------------------- /tests/test_data/www.hotcelebshome.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.hsbc.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.hsbc.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.hyperdia.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | Sitemap: http://www.hyperdia.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.iaai.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /MyAuctionCenter/ 3 | #Sitemap: https://www.iaai.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.ibo.org: -------------------------------------------------------------------------------- 1 | 2 | User-agent: * 3 | -------------------------------------------------------------------------------- /tests/test_data/www.icann.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | -------------------------------------------------------------------------------- /tests/test_data/www.idealist.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.idealist.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.idlebrain.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ielts.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.ielts.org/sitemap.xml 3 | Allow: / 4 | Disallow: */error/ 5 | Sitemap: sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.iinet.net.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: * 3 | Disallow: /search/ 4 | Disallow: /_library/ 5 | 6 | Sitemap: /sitemap/_data/sitemap.xml 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /tests/test_data/www.ilovepdf.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /upload/ -------------------------------------------------------------------------------- /tests/test_data/www.imperial.ac.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /terminalfour/SiteManager/ 3 | Crawl-delay: 20 4 | 5 | User-agent: slurp 6 | Crawl-delay: 80 7 | -------------------------------------------------------------------------------- /tests/test_data/www.indeed.ca: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /$ 3 | Allow: /m/$ 4 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.indiastudychannel.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /tag/ 3 | 4 | Sitemap: https://www.indiastudychannel.com/sitemaps/index.aspx -------------------------------------------------------------------------------- /tests/test_data/www.indiatimes.com: -------------------------------------------------------------------------------- 1 | #robots_https.txt 2 | User-agent: * 3 | Allow: / 4 | Disallow: /*.php* 5 | Disallow: /whatshot -------------------------------------------------------------------------------- /tests/test_data/www.info.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /$ 3 | Disallow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ingress.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /.well-known/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.inkfrog.com: -------------------------------------------------------------------------------- 1 | # Don't allow web crawlers to index Craft CMS 2 | User-agent: * 3 | Disallow: /craft/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.inkscape.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /media/ 3 | Disallow: /static/ 4 | Disallow: /get/ 5 | Crawl-delay: 86400 6 | -------------------------------------------------------------------------------- /tests/test_data/www.internetdownloadmanager.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search.html 3 | Disallow: /register/reseller_27.html 4 | -------------------------------------------------------------------------------- /tests/test_data/www.interpals.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Crawl-delay: 0.2 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ip2location.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.ird.govt.nz: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: https://www.ird.govt.nz/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.irib.ir: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://irib.ir/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.israelpost.co.il: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /fp/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.its.ac.id: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.itsnicethat.com: -------------------------------------------------------------------------------- 1 | # User-agent: * 2 | # Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.itu.edu.tr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /sitefinity/ -------------------------------------------------------------------------------- /tests/test_data/www.izotope.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /en/product-data/* 3 | Allow: / 4 | Sitemap: sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.jal.co.jp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.jal.co.jp -------------------------------------------------------------------------------- /tests/test_data/www.jamendo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /me/playlists 3 | Disallow: /me/settings 4 | Disallow: /me/favorite 5 | Disallow: /api 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.japan-guide.com: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: 3 | 4 | User-agent: Mediapartners-Google 5 | Disallow: 6 | 7 | User-agent: * 8 | Disallow: /local/ 9 | -------------------------------------------------------------------------------- /tests/test_data/www.jetblue.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /manageflights/flightnotification/ 3 | Disallow: /vacations/Login 4 | Disallow: /boardingpass/ 5 | Allow: / 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.jeunesseglobal.com: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: 3 | 4 | User-agent: AdsBot-Google 5 | Disallow: 6 | 7 | User-agent: Googlebot-Image 8 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.jimmyjohns.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.jimmyjohns.com/sitemap.xml 3 | 4 | User-agent: SemrushBot 5 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.jnj.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 10 3 | -------------------------------------------------------------------------------- /tests/test_data/www.jnu.edu.cn: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Disallow:/tz 3 | Disallow:/gg 4 | Disallow:/74 5 | Disallow:/71 6 | Disallow:/6e 7 | #限制校外访问的url,禁止收录 8 | Disallow:/*.psp*/ 9 | -------------------------------------------------------------------------------- /tests/test_data/www.jst.go.jp: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.jst.go.jp/sitemap4google.txt 2 | -------------------------------------------------------------------------------- /tests/test_data/www.juno.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.justinguitar.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /casein 3 | Disallow: /admin 4 | 5 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.justjared.com: -------------------------------------------------------------------------------- 1 | # BEGIN XML-SITEMAP-PLUGIN 2 | Sitemap: http://www.justjared.com/sitemapindex.xml 3 | # END XML-SITEMAP-PLUGIN -------------------------------------------------------------------------------- /tests/test_data/www.kaist.ac.kr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow : / 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.kaltura.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /content/ 3 | Disallow: /p/*/serveFlavor/ 4 | 5 | User-agent: Googlebot 6 | Disallow: /content/ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.kau.edu.sa: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://www.kau.edu.sa/SiteMap.xml 3 | Disallow: /cgi-bin/ 4 | Disallow: /WebResource.axd 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.keenspot.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /premium-services/ -------------------------------------------------------------------------------- /tests/test_data/www.keio.ac.jp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.keio.ac.jp -------------------------------------------------------------------------------- /tests/test_data/www.kennesaw.edu: -------------------------------------------------------------------------------- 1 | User-agent: bingbot 2 | Crawl-delay: 5 3 | -------------------------------------------------------------------------------- /tests/test_data/www.kobe-u.ac.jp: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /documents/info/public-info/achievements/ 3 | Disallow: /download/ -------------------------------------------------------------------------------- /tests/test_data/www.koreanair.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.koreanair.com -------------------------------------------------------------------------------- /tests/test_data/www.kvb.co.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /manager 4 | -------------------------------------------------------------------------------- /tests/test_data/www.kw.zain.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.kw.zain.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.kwsp.gov.my: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.kwsp.gov.my/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.lacounty.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/*/ 3 | Disallow: /wp-content/plugins/ 4 | Disallow: /wp-login.php/ 5 | Disallow: /wp-content/ai1wm-backups/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.landrover.com: -------------------------------------------------------------------------------- 1 | # robots.txt https://www.landrover.com/ 2 | 3 | Sitemap: https://www.landrover.com/sitemap.xml 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.lbl.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | User-agent: Baiduspider 6 | Disallow: /LBL-Programs/physics/ -------------------------------------------------------------------------------- /tests/test_data/www.letsrun.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /phptestnot/ 4 | Disallow: /static/ -------------------------------------------------------------------------------- /tests/test_data/www.lexis.com: -------------------------------------------------------------------------------- 1 | # /robots.txt file for http://web.lexis-nexis.com/ 1481 2 | 3 | User-agent: * 4 | Disallow: / 5 | Disallow: /clients/ 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.lexus.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://www.lexus.com/sitemap.xml 3 | Disallow: /Communication-Preferences/ 4 | Disallow: /Communication-Preferences-OptOut/ -------------------------------------------------------------------------------- /tests/test_data/www.lgbtqnation.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Disallow: /4564944 4 | Disallow: /debug -------------------------------------------------------------------------------- /tests/test_data/www.libsyn.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.lifesitenews.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.lifeway.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /$ 3 | Allow: /*/* 4 | Disallow: /* 5 | Disallow: /storefaqs -------------------------------------------------------------------------------- /tests/test_data/www.lightningmaps.org: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_data/www.lionbridge.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.lipsum.com: -------------------------------------------------------------------------------- 1 | # robots.txt for http://www.lipsum.com/ 2 | 3 | User-agent: * 4 | Disallow: 5 | -------------------------------------------------------------------------------- /tests/test_data/www.liquor.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.liquor.com/sitemapindex.xml 2 | -------------------------------------------------------------------------------- /tests/test_data/www.listal.com: -------------------------------------------------------------------------------- 1 | User-agent: Yandex 2 | Crawl-delay: 1 3 | -------------------------------------------------------------------------------- /tests/test_data/www.liverpoolfc.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.local.com: -------------------------------------------------------------------------------- 1 | #robots.txt for all our sites 2 | User-agent: * 3 | Disallow: /contact_us.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.lufthansa.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.lufthansa.com -------------------------------------------------------------------------------- /tests/test_data/www.lyngsat.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.lyricfind.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /documentation/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.lyrics.com: -------------------------------------------------------------------------------- 1 | User-agent: msnbot 2 | Crawl-delay: 1 -------------------------------------------------------------------------------- /tests/test_data/www.macsales.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: http://eshop.macsales.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.malaysiakini.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /CHANGELOG.txt 3 | 4 | User-agent: Nutch 5 | Disallow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/www.manchester.ac.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /_contentlibrary/ 3 | Disallow: /medialibrary/ 4 | Disallow: /search/ -------------------------------------------------------------------------------- /tests/test_data/www.mango.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /web/ 3 | Disallow: /*/faqs.htm -------------------------------------------------------------------------------- /tests/test_data/www.manhunt.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /promo/ 3 | Disallow: /mh/ 4 | Disallow: /externalProfile/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.manulife.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.manulife.com/sitemap.xml 2 | Sitemap: https://www.manulife.com/fr.sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.marcos.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wordpress/wp-admin/ 3 | Allow: /wordpress/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.martinfowler.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.marutisuzuki.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: .js 3 | Allow: .css 4 | Disallow: /amp/ 5 | 6 | 7 | Sitemap: https://www.marutisuzuki.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.maryland.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /pages/search.aspx -------------------------------------------------------------------------------- /tests/test_data/www.masterstudies.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.masterstudies.com/sitemap.xml 2 | User-agent: * 3 | Disallow: /ajax_handler.php?action=get_form* -------------------------------------------------------------------------------- /tests/test_data/www.mathway.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /settings/ -------------------------------------------------------------------------------- /tests/test_data/www.media.io: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.media.io/sitemap.xml 2 | 3 | User-agent: * 4 | Disallow: /thankyou/ 5 | Disallow: /survey/ -------------------------------------------------------------------------------- /tests/test_data/www.medici.tv: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search/ 3 | Disallow: /apidoc/ 4 | Disallow: /api/ 5 | Disallow: /admin/ 6 | Disallow: /*/embed/ 7 | Disallow: /ru/ 8 | -------------------------------------------------------------------------------- /tests/test_data/www.metacafe.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.meter.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /_speedtest/ 3 | Disallow: /_debug 4 | -------------------------------------------------------------------------------- /tests/test_data/www.metservice.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /publicData/ 3 | Disallow: /dynamic/ 4 | Disallow: /beta/ 5 | Disallow: /internal/ 6 | Disallow: /help/* -------------------------------------------------------------------------------- /tests/test_data/www.mgmresorts.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /content/ccm -------------------------------------------------------------------------------- /tests/test_data/www.minecraft.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.minecraft.net/en-us/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.minted.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.mixedmartialarts.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /news-cb/ 3 | Disallow: /cb/ 4 | Disallow: /ns/ 5 | Disallow: /beta/ 6 | Disallow: /forums/frames/ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.mnn.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /search/ 4 | 5 | Sitemap: https://www.mnn.com/sitemap.xml 6 | Sitemap: https://www.mnn.com/googlenews.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/www.mo.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /meetings/open-meeting-details 3 | Disallow: /onlineservices_admin/* -------------------------------------------------------------------------------- /tests/test_data/www.mobikwik.com: -------------------------------------------------------------------------------- 1 | User-agent : * 2 | Allow : / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.mobile88.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.mobile88.com -------------------------------------------------------------------------------- /tests/test_data/www.monstercrawler.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /contact.php -------------------------------------------------------------------------------- /tests/test_data/www.monstersandcritics.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 15 3 | 4 | User-agent: Googlebot-News 5 | Disallow: /tags/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.motortrend.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | Disallow: /cars-for-sale/ -------------------------------------------------------------------------------- /tests/test_data/www.moviefone.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.mps.it: -------------------------------------------------------------------------------- 1 | # MPS 2 | user-agent: stress-agent 3 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.mtgsalvation.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api/ 3 | 4 | User-agent: * 5 | Disallow: /cp/ 6 | 7 | User-agent: Mediapartners-Google* 8 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.muni.cz: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.mweb.co.za: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /*.js$ 3 | Allow: /*.css$ 4 | 5 | Sitemap: https://www.mweb.co.za/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.myfitnesspal.com: -------------------------------------------------------------------------------- 1 | # See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file -------------------------------------------------------------------------------- /tests/test_data/www.myflorida.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.myflorida.com -------------------------------------------------------------------------------- /tests/test_data/www.mypoints.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /?* 4 | Disallow: /MyPoints-Rewards.txt -------------------------------------------------------------------------------- /tests/test_data/www.myrealgames.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin 3 | Disallow: /index-test.php -------------------------------------------------------------------------------- /tests/test_data/www.mysearch.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.myvue.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.myvue.com -------------------------------------------------------------------------------- /tests/test_data/www.nadra.gov.pk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.nairaland.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /nigeria? 3 | Disallow: /hopto 4 | Disallow: /login 5 | Disallow: /newpost -------------------------------------------------------------------------------- /tests/test_data/www.nascar.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.naturalreaders.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.naturalreaders.com -------------------------------------------------------------------------------- /tests/test_data/www.nd.edu_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /assets/ 3 | Disallow: /cache/ 4 | Disallow: /cgi-bin 5 | Disallow: /error/ 6 | Disallow: /offline/ -------------------------------------------------------------------------------- /tests/test_data/www.netcarshow.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | User-agent: MSIECrawler 4 | Disallow: / 5 | User-agent: 008 6 | Disallow: / 7 | -------------------------------------------------------------------------------- /tests/test_data/www.netgate.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: https://www.netgate.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.newshub.co.nz: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | Sitemap: https://www.newshub.co.nz/home.sitemapindex.xml -------------------------------------------------------------------------------- /tests/test_data/www.newvision.co.ug: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.nexusmods.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /ajax/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.nginx.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.nice.org.uk: -------------------------------------------------------------------------------- 1 | User-agent: bingbot 2 | Crawl-delay: 1 3 | User-agent: * 4 | Allow: / 5 | -------------------------------------------------------------------------------- /tests/test_data/www.nii.ac.jp: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | 3 | Disallow: /daigakuin/ 4 | 5 | Disallow: /en/group/IABM -------------------------------------------------------------------------------- /tests/test_data/www.nirsoft.net: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Disallow:/cgi-bin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.nps.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /ns/ 3 | Disallow: /search/ 4 | Disallow: /loader.cfm 5 | Disallow: /*loader.cfm* -------------------------------------------------------------------------------- /tests/test_data/www.nrl.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | #Sitemap 4 | Sitemap: https://www.nrl.com/sitemap/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.ntu.edu.sg: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.ntv.co.jp: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /mailmagazine/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.oaed.gr: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://localhost/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.okcupid.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /daisy 3 | Disallow: /mybestface 4 | Disallow: /stalker 5 | Disallow: /flagmod 6 | Disallow: /logout 7 | Disallow: /l/ 8 | -------------------------------------------------------------------------------- /tests/test_data/www.okstate.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.okstate.edu -------------------------------------------------------------------------------- /tests/test_data/www.olympic.org: -------------------------------------------------------------------------------- 1 | Allow: / 2 | user-agent: AhrefsBot 3 | Disallow: / 4 | SITEMAP: http://www.olympic.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.omgubuntu.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /wp-includes/ 4 | User-agent: ia_archiver 5 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.on24.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*?* 3 | Disallow: /wp-admin/ 4 | Allow: /wp-admin/admin-ajax.php 5 | -------------------------------------------------------------------------------- /tests/test_data/www.online-convert.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.online-tech-tips.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.openair.com: -------------------------------------------------------------------------------- 1 | # Allow all robots to spider everything by disallowing nothing 2 | 3 | User-agent: * 4 | Crawl-Delay: 20 5 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.openculture.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: http://www.openculture.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.opendns.com_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.openstack.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: https://www.openstack.org/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.opentext.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.opentext.com -------------------------------------------------------------------------------- /tests/test_data/www.optum.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.oriflame.com: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: 3 | User-agent: Googlebot-image 4 | Disallow: 5 | User-agent: * 6 | Disallow: / 7 | -------------------------------------------------------------------------------- /tests/test_data/www.osaka-u.ac.jp: -------------------------------------------------------------------------------- 1 | User-Agent: Googlebot 2 | Disallow: /ja/banner/ 3 | Disallow: /en/banner/ -------------------------------------------------------------------------------- /tests/test_data/www.osu.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /assets/pdf/Investigation-Report.pdf -------------------------------------------------------------------------------- /tests/test_data/www.ou.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /content 3 | Disallow: /content/ipad 4 | Disallow: /ipad -------------------------------------------------------------------------------- /tests/test_data/www.outlookindia.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Disallow: /pwa/* -------------------------------------------------------------------------------- /tests/test_data/www.owasp.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 30 3 | -------------------------------------------------------------------------------- /tests/test_data/www.oxforddictionaries.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.pa.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp/wp-admin/ 3 | Allow: /wp/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.paessler.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /manuals/prtg/differences_between_prtg_on_premises_and_prtg_in_the_cloud -------------------------------------------------------------------------------- /tests/test_data/www.paho.org: -------------------------------------------------------------------------------- 1 | User-agent: PowerMapper 2 | Allow: / 3 | User-agent: * 4 | Allow: / 5 | Disallow: /oldhq/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.parallels.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /typo3/ 3 | Disallow: /r/ 4 | Disallow: /files/ 5 | Disallow: /index.php?* 6 | Disallow: /tmp/ 7 | -------------------------------------------------------------------------------- /tests/test_data/www.partcommunity.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /community/ -------------------------------------------------------------------------------- /tests/test_data/www.payserve.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.pch.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.pch.com -------------------------------------------------------------------------------- /tests/test_data/www.pdfescape.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /open/RadPdf.axd -------------------------------------------------------------------------------- /tests/test_data/www.pdftoword.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.pearsoned.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin 3 | Allow: / 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.penny-arcade.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /feed/ 3 | Disallow: /feed/podcasts-* 4 | Disallow: /feed/show-* 5 | Allow: /feed/podcasts 6 | Allow: /feed/show 7 | -------------------------------------------------------------------------------- /tests/test_data/www.peopleadmin.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.pets4homes.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | 4 | User-agent: * 5 | Allow: / 6 | -------------------------------------------------------------------------------- /tests/test_data/www.pgatour.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.pge.com: -------------------------------------------------------------------------------- 1 | User-agent: NinjaBot 2 | User-Agent: W3C-checklink 3 | Allow: / 4 | User-Agent:* 5 | Disallow: /mobile/ 6 | Disallow: /mobile_jbri/ 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.photoshopessentials.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.phrases.org.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /thesaurus/ 4 | 5 | User-agent: NinjaBot 6 | Allow: / 7 | -------------------------------------------------------------------------------- /tests/test_data/www.pingdom.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /feature/ 3 | Noindex: /feature/ 4 | Disallow: /signup-b/ 5 | Noindex: /signup-b/ -------------------------------------------------------------------------------- /tests/test_data/www.pinknews.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.playok.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /slownik/ 3 | Disallow: /p/ 4 | Disallow: /*/stat.phtml 5 | Disallow: /*/game.phtml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.plos.org: -------------------------------------------------------------------------------- 1 | # http://www.robotstxt.org 2 | User-agent: * 3 | Disallow: /menu 4 | -------------------------------------------------------------------------------- /tests/test_data/www.politifact.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.polyu.edu.hk: -------------------------------------------------------------------------------- 1 | User-Agent: 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.post.at: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /sendungsverfolgung.php? 3 | Disallow: /sendungsverfolgung.php/ 4 | Disallow: /downloads/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.post.ch: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /api/ 3 | Disallow: /*.xml$ 4 | Disallow: /*.xml?* 5 | Disallow: /-/media/post/noindex/* -------------------------------------------------------------------------------- /tests/test_data/www.post.japanpost.jp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.post.japanpost.jp -------------------------------------------------------------------------------- /tests/test_data/www.posten.no: -------------------------------------------------------------------------------- 1 | # Hovedinnstillinger 2 | User-agent: * 3 | Sitemap: https://www.posten.no/sitemap.xml 4 | 5 | # ...URL 6 | 7 | -------------------------------------------------------------------------------- /tests/test_data/www.powerschool.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.president.gov.ua: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: * 3 | Disallow: /docs/ 4 | Host: www.president.gov.ua 5 | -------------------------------------------------------------------------------- /tests/test_data/www.presonus.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /deployscripts 3 | Disallow: /sql_changes 4 | Disallow: /logmein-verification-code.txt -------------------------------------------------------------------------------- /tests/test_data/www.princetonreview.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.procore.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /snapengage/ 3 | Disallow: /testflight/ 4 | Sitemap: https://www.procore.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.prometric.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.prometric.com/google_sitemap.xml User-agent: * Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.pta.gov.pk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.quackit.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.queerty.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp/wp-admin/ 3 | Allow: /wp/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.quetext.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.rammstein.de: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wordpress/wp-admin/ 3 | Allow: /wordpress/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.rarlab.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /rar/ 3 | Disallow: /far/ 4 | Disallow: /farnew.htm 5 | Disallow: /shop2rarlab.php 6 | -------------------------------------------------------------------------------- /tests/test_data/www.raspberrypi.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /forums/*?f=11 3 | Disallow: /forums/*?f=101 4 | -------------------------------------------------------------------------------- /tests/test_data/www.raymond.cc: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /blog/wp-admin/ 3 | Sitemap: https://www.raymond.cc/blog/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.rd.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.rd.com/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.recon.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://www.recon.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.recreation.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.redcafe.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /find-new/ 3 | Disallow: /search/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.rejseplanen.dk: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.renfe.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.reviews.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /go/* 3 | Disallow: /wp-admin/ 4 | Disallow: /wp-login 5 | Sitemap: https://www.reviews.com/sitemap_index.xml 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.ritsumei.ac.jp: -------------------------------------------------------------------------------- 1 | User-Agent:* 2 | Disallow: /campusweb 3 | -------------------------------------------------------------------------------- /tests/test_data/www.roboform.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /dist/ 3 | Disallow: /php/pums/ 4 | Disallow: /php/ams/ 5 | Disallow: /php/rtss/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.rockauto.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /info/ 3 | Disallow: /cgi-bin/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.rockpapershotgun.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.rockpapershotgun.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.rockstargames.com: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.rogerebert.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.rogerebert.com/sitemaps/sitemap.xml.gz 2 | -------------------------------------------------------------------------------- /tests/test_data/www.roku.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.romhacking.net: -------------------------------------------------------------------------------- 1 | # /robots.txt file for Yahoo 2 | 3 | User-agent: Slurp 4 | Crawl-delay: 300 5 | 6 | User-agent: * 7 | Disallow: /download/ 8 | -------------------------------------------------------------------------------- /tests/test_data/www.ronaldo7.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*.swf$ -------------------------------------------------------------------------------- /tests/test_data/www.rosettastone.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.rosettastone.com/sitemap.xml 2 | 3 | User-agent: * 4 | Disallow: /institutional/ 5 | Disallow: /kids/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.rottentomatoes.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Sitemap: https://www.rottentomatoes.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.rsl.ru: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.rstudio.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ruc.edu.cn: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.sabre.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp/wp-admin/ 3 | Allow: /wp/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.sachsen.de: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cps/rde/xchg/ -------------------------------------------------------------------------------- /tests/test_data/www.safeway.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.salliemae.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.salliemae.com/sitemap.xml 3 | Sitemap: https://www.salliemae.com/sitemap-blog.xml -------------------------------------------------------------------------------- /tests/test_data/www.santander.co.uk: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: http://www.santander.co.uk/uk/sitemap.xml 4 | Sitemap: http://www.santander.co.uk/info/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.saudia.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.saudia.com -------------------------------------------------------------------------------- /tests/test_data/www.scoreland.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /members/ 4 | Disallow: /cgi/secured/ 5 | Disallow: /tmp/ 6 | Disallow: /private/ -------------------------------------------------------------------------------- /tests/test_data/www.seoul.go.kr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.serif.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.serif.com -------------------------------------------------------------------------------- /tests/test_data/www.sermoncentral.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | sitemap: https://www.sermoncentral.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.seslisozluk.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: http://www.seslisozluk.net/sitemap.xml 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.sfimg.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.sfimg.com -------------------------------------------------------------------------------- /tests/test_data/www.sharefile.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /try/ 3 | Disallow: /archive/ 4 | Disallow: /akamaitest 5 | Sitemap: https://www.sharefile.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.shareit.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.sherdog.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.sheridancollege.ca: -------------------------------------------------------------------------------- 1 | Sitemap: http://sitemap.xml 2 | Sitemap: http://www.sheridancollege.ca/sitemap.xml 3 | -------------------------------------------------------------------------------- /tests/test_data/www.shopmyexchange.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /account/ 4 | 5 | Disallow: /cart/ 6 | 7 | Sitemap: https://www.shopmyexchange.com/index.xml 8 | -------------------------------------------------------------------------------- /tests/test_data/www.shoprite.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /sitecore -------------------------------------------------------------------------------- /tests/test_data/www.sina.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.site24x7.com: -------------------------------------------------------------------------------- 1 | # $Id: Robots.txt $ 2 | User-agent: * 3 | Disallow: /app/ 4 | Disallow: /api/ 5 | Disallow: /new/ 6 | Disallow: /abtest/ -------------------------------------------------------------------------------- /tests/test_data/www.skylinewebcams.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.slate.com: -------------------------------------------------------------------------------- 1 | User-agent: feedjira 2 | Disallow: / 3 | 4 | User-agent: magpie-crawler 5 | Disallow: / 6 | 7 | User-agent: * 8 | Disallow: /bullpen/ 9 | -------------------------------------------------------------------------------- /tests/test_data/www.slb.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.sltrib.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.sltrib.com/arcio/sitemap/ 2 | User-agent: * 3 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.smbc-comics.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /comiccontrol/ -------------------------------------------------------------------------------- /tests/test_data/www.smh.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.snopes.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.socalgas.com: -------------------------------------------------------------------------------- 1 | User-agent: Baiduspider 2 | Disallow: / 3 | 4 | User-agent: * 5 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.soccermanager.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.soccervista.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /go/ 3 | Disallow: /obr/ 4 | Disallow: /dothebet.php 5 | Disallow: /oempv/* -------------------------------------------------------------------------------- /tests/test_data/www.soccerway.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /banners/ 3 | Disallow: /lib/ -------------------------------------------------------------------------------- /tests/test_data/www.soft32.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Disallow: /publishers 4 | 5 | User-agent: Yandex 6 | Crawl-delay: 3 7 | 8 | Sitemap: https://www.soft32.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.soompi.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /community/ 4 | Disallow: /wp-admin/ 5 | 6 | Sitemap: https://www.soompi.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.soumu.go.jp: -------------------------------------------------------------------------------- 1 | User-agent: ia_archiver 2 | Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.southerncompany.com: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Disallow: 3 | Allow: / 4 | Disallow: /search.html 5 | Disallow: /content 6 | Allow: /content/dam/southern-company -------------------------------------------------------------------------------- /tests/test_data/www.southindianbank.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.spa.gov.my: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.sparknotes.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Crawl-delay: 0.5 3 | -------------------------------------------------------------------------------- /tests/test_data/www.spectator.co.uk: -------------------------------------------------------------------------------- 1 | User-agent: Twitterbot 2 | Disallow: * 3 | Allow: /content/* 4 | 5 | User-agent: * 6 | Crawl-delay: 2 7 | Disallow: /?s= 8 | Disallow: /search 9 | -------------------------------------------------------------------------------- /tests/test_data/www.speedhunters.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.speedtest.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.speedtest.net -------------------------------------------------------------------------------- /tests/test_data/www.spicejet.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.spirit.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.spirit.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.sportsnet.ca: -------------------------------------------------------------------------------- 1 | User-agent: Twitterbot 2 | Disallow: 3 | 4 | User-agent: * 5 | Disallow: 6 | Crawl-delay: 10 7 | -------------------------------------------------------------------------------- /tests/test_data/www.standardmedia.co.ke: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_data/www.stanford.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.starcraft2.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.state.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.studentbeans.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /my 3 | Disallow: /social 4 | Disallow: /styleguide 5 | Disallow: */login.json 6 | -------------------------------------------------------------------------------- /tests/test_data/www.studentdoctor.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.subway.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /sitecore 3 | Disallow: /Sitecore 4 | Disallow: /suboftheday 5 | Disallow: /mywayrewards -------------------------------------------------------------------------------- /tests/test_data/www.suicidegirls.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /members/sean/ 3 | Disallow: /search/ 4 | Disallow: /*?next 5 | 6 | crawl-delay: 1 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.suomi.fi: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /palaute 3 | Disallow: /respons 4 | Disallow: /feedback 5 | 6 | Sitemap: https://www.suomi.fi/sitemap_index.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/www.supersport.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /feedback 4 | Disallow: /search 5 | Disallow: /support 6 | Sitemap: https://www.supersport.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.swarovski.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: https://www.swarovski.com/sitemapindex.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.swinglifestyle.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /groups/ 3 | 4 | -------------------------------------------------------------------------------- /tests/test_data/www.symbolab.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.symbolab.com/sitemap.xml 3 | -------------------------------------------------------------------------------- /tests/test_data/www.synonym.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Disallow:/carambola 4 | Noindex:/carambola 5 | Sitemap: https://www.synonym.com/sitemaps/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.syracuse.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search/ 3 | Disallow: /tmp/ 4 | Disallow: /static/ 5 | 6 | Sitemap: https://www.syracuse.edu/sitemapindex.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/www.tafensw.edu.au: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | Sitemap: https://www.tafensw.edu.au/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.talentlms.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tamu.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /customers -------------------------------------------------------------------------------- /tests/test_data/www.tarot.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: 4 | 5 | # Sitemap files 6 | Sitemap: https://gfx.tarot.com/tarot_sitemap.xml 7 | -------------------------------------------------------------------------------- /tests/test_data/www.tatamotors.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tbs.co.jp: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /newsi/ 3 | Disallow: /sports/football/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.teamtalk.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.techbargains.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /outbound/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.telegraphindia.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.tempo.co: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /ajax/ 3 | Sitemap: http://www.tempo.co/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.tennis.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: http://www.tennis.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tensorflow.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /versions/* 3 | Sitemap: https://www.tensorflow.org/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.texas.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.textures.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cdn-cgi/ 3 | Crawl-delay: 5 4 | Sitemap: http://www.textures.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.theage.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.thecalculatorsite.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /results.php 3 | Disallow: /results_temperature.php 4 | Disallow: /counter.php -------------------------------------------------------------------------------- /tests/test_data/www.thegospelcoalition.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.theiet.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /_Incapsula_Resource* 3 | -------------------------------------------------------------------------------- /tests/test_data/www.thepetitionsite.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /servlets/do_logoff.html 3 | -------------------------------------------------------------------------------- /tests/test_data/www.thomascook.in: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / 3 | Disallow: /campaigns/ 4 | Sitemap: https://www.thomascook.in/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.threadless.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cart/ 3 | Disallow: /search/* -------------------------------------------------------------------------------- /tests/test_data/www.tineye.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search 3 | Disallow: /parse 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tlc.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /anonymous 3 | Disallow: /affiliate 4 | Sitemap: https://www.tlc.com/sitemap/index.xml.gz -------------------------------------------------------------------------------- /tests/test_data/www.tldp.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.tn.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Allow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tomtom.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: http://download.tomtom.com/open/feeds/sitemap/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tonyrobbins.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | Sitemap: https://www.tonyrobbins.com/sitemap_index.xml 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.toolbox.com: -------------------------------------------------------------------------------- 1 | Sitemap: https://www.toolbox.com/sitemap.xml 2 | 3 | User-agent: * 4 | Crawl-delay: 10 5 | Disallow: /4585/ -------------------------------------------------------------------------------- /tests/test_data/www.topman.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | sitemap: https://www.topman.com/sitemap1.xml 3 | Disallow: */category/*test 4 | Disallow: *ResetPasswordLink* 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.topshop.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | sitemap: https://www.topshop.com/sitemap1.xml 3 | Disallow: */category/*test 4 | Disallow: *ResetPasswordLink* 5 | -------------------------------------------------------------------------------- /tests/test_data/www.torrentfreak.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.totalwar.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tp-link.com: -------------------------------------------------------------------------------- 1 | User-agent:* 2 | Disallow:/ir/ 3 | Disallow:/iq/ 4 | Disallow:/*/search/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.tpg.com.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.tradedoubler.com: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | User-agent: * 6 | Disallow: /wp-admin/ 7 | Allow: /wp-admin/admin-ajax.php 8 | -------------------------------------------------------------------------------- /tests/test_data/www.travian.com: -------------------------------------------------------------------------------- 1 | # robots.txt für travian.com 2 | 3 | User-agent: * 4 | Disallow: 5 | 6 | Sitemap: https://www.travian.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.tribuneindia.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.troweprice.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /content/dam/emailcampaigns/client/ 3 | Disallow: /money-market/ 4 | Disallow: /content/money-market/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.tsheets.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /pages/ 3 | Disallow: /wp-includes/ 4 | Sitemap: https://www.tsheets.com/sitemap_index.xml -------------------------------------------------------------------------------- /tests/test_data/www.tsukuba.ac.jp: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.tunecore.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.turktelekom.com.tr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.turktelekom.com.tr -------------------------------------------------------------------------------- /tests/test_data/www.typingclub.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Allow: / -------------------------------------------------------------------------------- /tests/test_data/www.ualberta.ca: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /Components/ 3 | Disallow: /sitecore/ 4 | Disallow: /Layout/ 5 | Disallow: /layouts/ 6 | Disallow: /*/sitecore/content/ -------------------------------------------------------------------------------- /tests/test_data/www.ubisoft.com_443: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | -------------------------------------------------------------------------------- /tests/test_data/www.ucdenver.edu: -------------------------------------------------------------------------------- 1 |  2 | User-agent: * 3 | Disallow: /wdts 4 | Allow: / 5 | 6 | User-agent: * 7 | Disallow: /about/newsroom/workroom/ 8 | -------------------------------------------------------------------------------- /tests/test_data/www.ucla.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /pdf/viewbook.pdf 3 | -------------------------------------------------------------------------------- /tests/test_data/www.uga.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /_resources/email/ 3 | Disallow: /_resources/includes/ 4 | Disallow: /_heroes/ -------------------------------------------------------------------------------- /tests/test_data/www.ui.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.uic.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.uic.edu -------------------------------------------------------------------------------- /tests/test_data/www.uillinois.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.uillinois.edu -------------------------------------------------------------------------------- /tests/test_data/www.ukzn.ac.za: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ulaval.ca: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /calendrier.html 3 | Disallow: /no_cache/calendrier.html 4 | Disallow: /no_cache 5 | Disallow: /no_cache.html 6 | -------------------------------------------------------------------------------- /tests/test_data/www.ultratools.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Disallow: /cgi-bin/ 4 | Sitemap: http://www.ultratools.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.um.ac.ir: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.um.ac.ir/sitemap.xml 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /tests/test_data/www.um.edu.my: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.umassonline.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin 3 | Sitemap: http://www.umassonline.net/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.umbc.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /blogs # these are very old and should not be searchable in general 3 | -------------------------------------------------------------------------------- /tests/test_data/www.umich.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin/ 3 | Disallow: /pingtest/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.umontreal.ca: -------------------------------------------------------------------------------- 1 | # Accepter l'indexation 2 | User-agent: * 3 | Crawl-delay: 1 4 | Disallow: 5 | 6 | -------------------------------------------------------------------------------- /tests/test_data/www.umsystem.edu: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /index.php/ 3 | Disallow: /u/ 4 | Disallow: /apps/ur/dbcontrol/ -------------------------------------------------------------------------------- /tests/test_data/www.unhcr.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.uni.lodz.pl: -------------------------------------------------------------------------------- 1 | #User-agent: * 2 | #Disallow: / 3 | #Disallow: / 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.unila.ac.id: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.unionbankofindia.co.in: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Sitemap: https://www.unionbankofindia.co.in/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.uniraj.ac.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/www.unm.edu: -------------------------------------------------------------------------------- 1 | # robots.txt 2 | User-agent: * 3 | Disallow: 4 | Disallow: /cgi-bin/ 5 | Disallow: /tmp/ -------------------------------------------------------------------------------- /tests/test_data/www.unpad.ac.id: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.up.ac.za: -------------------------------------------------------------------------------- 1 | # robots.txt 2 | User-agent: * 3 | 4 | Disallow: */limit/100 -------------------------------------------------------------------------------- /tests/test_data/www.upi.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /sponsors/ 4 | Disallow: /photo-request/ 5 | Disallow: /photo_request/ 6 | Disallow: /account/ 7 | 8 | -------------------------------------------------------------------------------- /tests/test_data/www.ups.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /media/en/Marketplace_Integration.pdf 3 | Disallow: /tracking?loc=* 4 | -------------------------------------------------------------------------------- /tests/test_data/www.uq.edu.au: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /study/search.html 3 | -------------------------------------------------------------------------------- /tests/test_data/www.urbandictionary.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.urbandictionary.com/sitemap.xml.gz 2 | Sitemap: https://www.urbandictionary.com/sitemap-https.xml.gz 3 | -------------------------------------------------------------------------------- /tests/test_data/www.us.hsbc.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | 4 | Sitemap: https://www.us.hsbc.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.usa.canon.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /internet/contenthandler/ 3 | 4 | SITEMAP: https://www.usa.canon.com/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.usembassy.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.usopen.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Sitemap: https://www.usopen.com/content/us-open.sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ussoccer.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | 4 | SITEMAP: https://www.ussoccer.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.ut.ac.ir: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.utah.gov: -------------------------------------------------------------------------------- 1 | # www.robotstxt.org/ 2 | # www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 3 | 4 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.utar.edu.my: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: -------------------------------------------------------------------------------- /tests/test_data/www.utas.edu.au: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /users/ 3 | Disallow: /demo/ 4 | Disallow: /user/ 5 | Disallow: /_designs/ 6 | Disallow: /_admin/ 7 | Disallow: /_edit/ -------------------------------------------------------------------------------- /tests/test_data/www.utk.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /admin.php 3 | Disallow: /themes/ 4 | Disallow: /masthead/ -------------------------------------------------------------------------------- /tests/test_data/www.utm.my: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.utorrent.com: -------------------------------------------------------------------------------- 1 | User-agent: * -------------------------------------------------------------------------------- /tests/test_data/www.uw.edu.pl: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.uw.edu.pl/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.vanguardngr.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /xmlrpc.php 4 | 5 | Sitemap: http://www.vanguardngr.com/sitemap_index.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.vccs.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.vg247.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | 5 | Sitemap: https://www.vg247.com/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.viber.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp/wp-admin/ 3 | Allow: /wp/wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.victoriassecret.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.victoriassecret.com -------------------------------------------------------------------------------- /tests/test_data/www.viewbug.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /my-account/ 3 | Allow: / 4 | 5 | -------------------------------------------------------------------------------- /tests/test_data/www.visualcv.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: https://www.visualcv.com/sitemap.xml 3 | disallow: */pdf$ 4 | disallow: */pdf/* 5 | -------------------------------------------------------------------------------- /tests/test_data/www.voyeurweb.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Host: www.voyeurweb.com 4 | Sitemap: https://www.voyeurweb.com/public/sitemaps/sitemap.xml 5 | Disallow: /away/ -------------------------------------------------------------------------------- /tests/test_data/www.vsp.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Sitemap: https://www.vsp.com/sitemap.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.vtu.ac.in: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /wp-includes/ 4 | -------------------------------------------------------------------------------- /tests/test_data/www.vu.edu.pk: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | Disallow: /cgi-bin/ 4 | Disallow: /images/ 5 | Sitemap: http://www.vu.edu.pk/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.vub.sk: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /components 3 | Disallow: /thumb 4 | Sitemap: https://www.vub.sk/sitemap.xml 5 | -------------------------------------------------------------------------------- /tests/test_data/www.wacom.com: -------------------------------------------------------------------------------- 1 | User-agent: * Disallow: / -------------------------------------------------------------------------------- /tests/test_data/www.wampserver.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.weatherzone.com.au: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | -------------------------------------------------------------------------------- /tests/test_data/www.webcams.travel: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /flag/ 3 | Disallow: /suggest/ 4 | Disallow: /linking/ 5 | Sitemap: https://www.webcams.travel/sitemap.xml 6 | -------------------------------------------------------------------------------- /tests/test_data/www.webcrawler.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: /$ 3 | Disallow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.webmd.com: -------------------------------------------------------------------------------- 1 | # Robots.txt file WebMD 2 | # Updated: September 2017 3 | 4 | User-agent: * 5 | Disallow: /search/search_results/ 6 | -------------------------------------------------------------------------------- /tests/test_data/www.westpac.co.nz: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /bank/ 3 | Disallow: /rednews/tag/ 4 | Disallow: /lightbox/ 5 | Sitemap: http://www.westpac.co.nz/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.whatsmydns.net: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: 3 | -------------------------------------------------------------------------------- /tests/test_data/www.whitehouse.gov: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.wikidot.com: -------------------------------------------------------------------------------- 1 | Sitemap: http://www.wikidot.com/sitemap.xml 2 | User-agent: voltron 3 | Disallow: / 4 | -------------------------------------------------------------------------------- /tests/test_data/www.wimp.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*? 3 | Allow: /wp-admin/admin-ajax.php 4 | Crawl-delay: 3 5 | -------------------------------------------------------------------------------- /tests/test_data/www.wincalendar.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | 4 | User-agent: * 5 | Disallow: 6 | -------------------------------------------------------------------------------- /tests/test_data/www.winzip.com: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /cgi-bin/ 3 | Disallow: /mac/de/ 4 | Sitemap: https://www.winzip.com/sitemap.xml -------------------------------------------------------------------------------- /tests/test_data/www.wireshark.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /awstats/ 3 | Disallow: /cgi-bin/ 4 | Disallow: /mailman/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.wisc.edu: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /pubs/home -------------------------------------------------------------------------------- /tests/test_data/www.wizards.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /dnd/ -------------------------------------------------------------------------------- /tests/test_data/www.wnd.com: -------------------------------------------------------------------------------- 1 | # robots.txt 2016-04-25: ajo 2 | User-agent: * 3 | Disallow: /markets 4 | User-agent: yacybot 5 | Crawl-delay: 2 6 | -------------------------------------------------------------------------------- /tests/test_data/www.wolframalpha.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Sitemap: http://sitemaps.wolfram.com/www.wolframalpha.com/sitemap_index.xml 3 | -------------------------------------------------------------------------------- /tests/test_data/www.wondershare.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /news/ 3 | Disallow: /thankyou/ 4 | Disallow: /survey/ 5 | -------------------------------------------------------------------------------- /tests/test_data/www.woodforest.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /Home/ErrorPages/ 3 | Disallow: /WFNB/ 4 | Allow: / 5 | 6 | Sitemap: https://www.woodforest.com/Sitemap.ashx 7 | -------------------------------------------------------------------------------- /tests/test_data/www.wsop.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.wsop.com -------------------------------------------------------------------------------- /tests/test_data/www.wu.ac.at: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /typo3/ 3 | Disallow: /typo3_src/ 4 | 5 | User-agent: Bingbot 6 | Disallow: / 7 | -------------------------------------------------------------------------------- /tests/test_data/www.wufoo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.x-rates.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /auth/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.xda-developers.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /search/* 3 | Disallow: */tag/* 4 | Disallow: *?s=* 5 | Disallow: /tip-us/* 6 | -------------------------------------------------------------------------------- /tests/test_data/www.xmind.net: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /account/ 3 | Disallow: /xmind/ 4 | Allow: / 5 | -------------------------------------------------------------------------------- /tests/test_data/www.yonsei.ac.kr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: http://www.yonsei.ac.kr/sitemap_index.xml 4 | -------------------------------------------------------------------------------- /tests/test_data/www.ypes.gr: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Disallow: /suggest/?* 4 | Disallow: /readme.html 5 | Allow: /wp-admin/admin-ajax.php -------------------------------------------------------------------------------- /tests/test_data/www.ysu.edu.cn: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /*system*/*resource*/ 3 | -------------------------------------------------------------------------------- /tests/test_data/www.zabbix.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Disallow: /partners/ 4 | Disallow: /customers/ 5 | Disallow: /forum/special/visitor-messages 6 | -------------------------------------------------------------------------------- /tests/test_data/www.zamg.ac.at: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/protego/14defd6228255d68d6fab80654b439a732aa7cd5/tests/test_data/www.zamg.ac.at -------------------------------------------------------------------------------- /tests/test_data/www.zedo.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /wp-admin/ 3 | Allow: /wp-admin/admin-ajax.php 4 | -------------------------------------------------------------------------------- /tests/test_data/www.zip-codes.com: -------------------------------------------------------------------------------- 1 | User-agent: Mediapartners-Google 2 | Disallow: 3 | 4 | User-agent: * 5 | Disallow: 6 | Disallow: /cgi-bin/ 7 | Disallow: /admin/ -------------------------------------------------------------------------------- /tests/test_data/www.zotero.org: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /trac/ 3 | Disallow: /people/ 4 | Disallow: /*/following 5 | Disallow: /*/followers 6 | -------------------------------------------------------------------------------- /tests/test_data/www1.udel.edu: -------------------------------------------------------------------------------- 1 | User-Agent: * 2 | Disallow: /images 3 | -------------------------------------------------------------------------------- /tests/test_data/xkcd.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /personal/ -------------------------------------------------------------------------------- /tests/test_data/ycmou.digitaluniversity.ac: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /tests/test_data/zone.msn.com: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /toolbox/ 3 | Disallow: /utility/auth/Login.aspx 4 | Disallow: /en/utility/siteindex.aspx --------------------------------------------------------------------------------