├── feeds.feedblitz.com.txt
├── as-web.jp.txt
├── ganglia.info.txt
├── autocrypt.org.txt
├── lukew.com.txt
├── what-if.xkcd.com.txt
├── soundcity.tv.txt
├── crimemagazine.com.txt
├── ht.ly.txt
├── the-magazine.org.txt
├── chareidi.org.txt
├── pxlnv.com.txt
├── snip.ly.txt
├── .dxy.cn.txt
├── alex.mullr.net.txt
├── blog.lepine.pro.txt
├── opensource.org.txt
├── techmeme.com.txt
├── blog.fefe.de.txt
├── danluu.com.txt
├── axesslab.com.txt
├── bitelia.com.txt
├── blog.spu.edu.txt
├── briefly.co.za.txt
├── digitalcourage.de.txt
├── ecetia.com.txt
├── extracine.com.txt
├── ghanaweb.com.txt
├── help.fivefilters.org.txt
├── jjahnke.net.txt
├── jollinger.com.txt
├── kumailplus.com.txt
├── luxuo.com.txt
├── mattcutts.com.txt
├── roy.gbiv.com.txt
├── achgut.com.txt
├── backlinko.com.txt
├── facta.co.jp.txt
├── meowni.ca.txt
├── motorfull.com.txt
├── 512pixels.net.txt
├── apple.news.txt
├── elance.com.txt
├── futurism.com.txt
├── getnews.jp.txt
├── hacf.fr.txt
├── jamesclear.com.txt
├── kont.me.txt
├── lostgarden.com.txt
├── mbl.is.txt
├── n.survol.fr.txt
├── philosophynow.org.txt
├── scotthelme.co.uk.txt
├── acidcow.com.txt
├── alseraj.net.txt
├── avantivictoirerao.com.txt
├── blogs.gnome.org.txt
├── carlchenet.com.txt
├── doc.wallabag.org.txt
├── gsmarena.com.txt
├── kachestvo.ru.txt
├── monkeyzen.com.txt
├── paquier.xyz.txt
├── summitroute.com.txt
├── .readthedocs.io.txt
├── allafrica.com.txt
├── eternabuenosaires.com.txt
├── fakirpresse.info.txt
├── fivefilters.org.txt
├── fok.nl.txt
├── gist.github.com.txt
├── indehekken.net.txt
├── macdrifter.com.txt
├── marksdailyapple.com.txt
├── nifi.apache.org.txt
├── osmand.net.txt
├── retro-games.fr.txt
├── staltz.com.txt
├── theteaspot.com.txt
├── wallabag.org.txt
├── zerokspot.com.txt
├── altaonline.com.txt
├── blog.imirhil.fr.txt
├── dagogtid.no.txt
├── dailydot.com.txt
├── dr-b.io.txt
├── drdobbs.com.txt
├── flyingmachinestudios.com.txt
├── gizmovil.com.txt
├── goodfil.ms.txt
├── ishadeed.com.txt
├── je-suis-papa.com.txt
├── jungle-world.com.txt
├── rust-lang-nursery.github.io.txt
├── stjv.fr.txt
├── vg.no.txt
├── LICENSE.txt
├── appleweblog.com.txt
├── az.lib.ru.txt
├── blogs.forbes.com.txt
├── brucelawson.co.uk.txt
├── doc.rust-lang.ru.txt
├── dropbox.com.txt
├── gihyo.jp.txt
├── help.sharegate.com.txt
├── hiperpop.com.txt
├── hipertextual.com.txt
├── krone.at.txt
├── panic.com.txt
├── perell.com.txt
├── portertech.ca.txt
├── swcarpentry.github.io.txt
├── tofugu.com.txt
├── urbandictionary.com.txt
├── warnerbros.fr.txt
├── zataz.com.txt
├── altfoto.com.txt
├── chaperonsetvous.fr.txt
├── cucharasonica.com.txt
├── dansdata.com.txt
├── doc.rust-lang.org.txt
├── interviewmagazine.com.txt
├── jvt.me.txt
├── najlepsze-ksiazki.pl.txt
├── news.rub.de.txt
├── pentaxforums.com.txt
├── phototrend.fr.txt
├── renenekuda.cz.txt
├── tldp.org.txt
├── vot-tak.tv.txt
├── .about.com.txt
├── blogs.lse.ac.uk.txt
├── coalicionporelevangelio.org.txt
├── crn.de.txt
├── elfster.com.txt
├── fiftytwo.in.txt
├── finexpert.e15.cz.txt
├── marigold.cz.txt
├── monkeyuser.com.txt
├── news.techmeme.com.txt
├── python.org.txt
├── utdailybeacon.com.txt
├── worldwidewords.org.txt
├── .tweakblogs.net.txt
├── caseinterview.com.txt
├── gizmologia.com.txt
├── happyassassin.net.txt
├── iansommerville.com.txt
├── iplaysoft.com.txt
├── jobbank.gc.ca.txt
├── lefilrouge.media.txt
├── matt.might.net.txt
├── mein-mmo.de.txt
├── radionz.co.nz.txt
├── roomescapeartist.com.txt
├── sivers.org.txt
├── sports.ru.txt
├── stumbleupon.com.txt
├── thenews.coop.txt
├── turnoff.us.txt
├── vedonlyonti.com.txt
├── betabeat.com.txt
├── cashless.pl.txt
├── cfclrk.com.txt
├── china-gadgets.de.txt
├── davidwalsh.name.txt
├── democracynow.org.txt
├── dissentmagazine.org.txt
├── earvingad.github.io.txt
├── explosm.net.txt
├── freelancer.com.txt
├── fs.blog.txt
├── gauchiste.fr.txt
├── izismile.com.txt
├── lado.mx.txt
├── make.wordpress.org.txt
├── mbk-news.appspot.com.txt
├── mytotalretail.com.txt
├── openstreetmap.org.txt
├── php.net.txt
├── vakarm.net.txt
├── archiloque.net.txt
├── berlingske.dk.txt
├── carnegie.ru.txt
├── cooper.com.txt
├── doughellmann.com.txt
├── fokus.se.txt
├── geenstijl.nl.txt
├── hackertarget.com.txt
├── ianlewis.org.txt
├── juliareda.eu.txt
├── kresus.org.txt
├── letraslibres.com.txt
├── longform.org.txt
├── oschina.net.txt
├── reactjs.org.txt
├── signal.org.txt
├── slrlounge.com.txt
├── yosoy.red.txt
├── .mitpress.mit.edu.txt
├── aerobuzz.fr.txt
├── alexduner.com.txt
├── archdaily.com.txt
├── c.newsnow.com.txt
├── codeproject.com.txt
├── declassifieduk.org.txt
├── elblogsalmon.com.txt
├── esglobal.org.txt
├── gold.ac.uk.txt
├── nextcloud.com.txt
├── optimizesmart.com.txt
├── pastepad.fivefilters.org.txt
├── pixellibre.net.txt
├── radishzz.cc.txt
├── sme.sk.txt
├── sourcebooks.com.txt
├── sprengsatz.de.txt
├── stopgame.ru.txt
├── thefilmexperience.net.txt
├── triplebyte.com.txt
├── vivirmexico.com.txt
├── w3.org.txt
├── www.seriouseats.com.txt
├── a11ywithlindsey.com.txt
├── blog.eng.xogrp.com.txt
├── blog.nightly.mozilla.org.txt
├── c.newsnow.co.uk.txt
├── cabinetmagazine.org.txt
├── caffereggio.net.txt
├── economie.gouv.fr.txt
├── gnu.org.txt
├── lvsl.fr.txt
├── lwlies.com.txt
├── marriedtothesea.com.txt
├── miops.com.txt
├── missnumerique.com.txt
├── pmf.silvrback.com.txt
├── pogue.blogs.nytimes.com.txt
├── queerty.com.txt
├── renverse.co.txt
├── thebaffler.com.txt
├── theses.enc.sorbonne.fr.txt
├── tidbits.com.txt
├── viget.com.txt
├── yourerie.com.txt
├── zaknrw.de.txt
├── 9to5mac.com.txt
├── abplive.com.txt
├── aps.dz.txt
├── brookings.edu.txt
├── car-it.com.txt
├── cn.engadget.com.txt
├── dictionary.reference.com.txt
├── ericsuh.com.txt
├── f-droid.org.txt
├── github.blog.txt
├── grumpygamer.com.txt
├── hmercer.com.txt
├── laughingsquid.com.txt
├── lawfareblog.com.txt
├── nf-farn.de.txt
├── osmc.tv.txt
├── prolost.com.txt
├── radar.oreilly.com.txt
├── the-tls.co.uk.txt
├── theoaklandpress.com.txt
├── thisamericanlife.org.txt
├── wenow.com.txt
├── .sodexo.com.txt
├── blogs.reuters.com.txt
├── crimethinc.com.txt
├── good.is.txt
├── highscalability.com.txt
├── lesecolohumanistes.fr.txt
├── marketresearchdirect.com.txt
├── mebedo.de.txt
├── philosophyforlife.org.txt
├── redalemeden.com.txt
├── redtimmy.com.txt
├── saltyworld.net.txt
├── sfweekly.com.txt
├── techno-science.net.txt
├── tthfanfic.org.txt
├── web-libre.org.txt
├── americanthinker.com.txt
├── autoactu.com.txt
├── bobbyromeo.com.txt
├── brettterpstra.com.txt
├── code.google.com.txt
├── domo-blog.fr.txt
├── fair.org.txt
├── historic-uk.com.txt
├── keycloak.org.txt
├── mcorbin.fr.txt
├── mises.org.txt
├── motorcyclistonline.com.txt
├── pymotw.com.txt
├── riffreporter.de.txt
├── rnd.de.txt
├── scottohara.me.txt
├── share.ez.no.txt
├── sqlite.org.txt
├── thingiverse.com.txt
├── varsity.co.uk.txt
├── will-self.com.txt
├── brandingstrategyinsider.com.txt
├── catb.org.txt
├── forbiddenstories.org.txt
├── hanselman.com.txt
├── iphonetweak.fr.txt
├── m.theregister.co.uk.txt
├── publications.parliament.uk.txt
├── robertsspaceindustries.com.txt
├── smarthomebeginner.com.txt
├── taxacc.jp.txt
├── theintercept.com.txt
├── therumpus.net.txt
├── .stanford.edu.txt
├── 5by5.tv.txt
├── batenka.ru.txt
├── bez.es.txt
├── business-standard.com.txt
├── derekseaman.com.txt
├── houstonchronicle.com.txt
├── hs.fi.txt
├── indiehackers.com.txt
├── instagr.am.txt
├── kathimerini.gr.txt
├── labs.mwrinfosecurity.com.txt
├── luminous-landscape.com.txt
├── lupa.cz.txt
├── mobilenet.cz.txt
├── scnsrc.me.txt
├── singularityhub.com.txt
├── zoomit.ir.txt
├── 43folders.com.txt
├── brooksreview.net.txt
├── bzg.fr.txt
├── chomsky.info.txt
├── ciperchile.cl.txt
├── dcurt.is.txt
├── developers.facebook.com.txt
├── elmalpensante.com.txt
├── feinschwarz.net.txt
├── gorky.media.txt
├── groups.drupal.org.txt
├── indiatimes.com.txt
├── openthemagazine.com.txt
├── scinfolex.com.txt
├── soundonsound.com.txt
├── spin.com.txt
├── toolsandtoys.net.txt
├── trailer.web-view.net.txt
├── wphive.com.txt
├── xlsemanal.com.txt
├── amptoons.com.txt
├── bernama.com.txt
├── book.douban.com.txt
├── caravanmagazine.in.txt
├── delong.typepad.com.txt
├── gameswirtschaft.de.txt
├── informationclearinghouse.info.txt
├── itavisen.no.txt
├── keyboardmag.com.txt
├── kingarthurflour.com.txt
├── lifehack.org.txt
├── m.xkcd.com.txt
├── mainichi.jp.txt
├── marcvidal.net.txt
├── news.jp.txt
├── nj.com.txt
├── nplusonemag.com.txt
├── onlinewelten.com.txt
├── parliament.uk.txt
├── pinterest.com.txt
├── plzkthxbai.com.txt
├── rancher.com.txt
├── revdennismccarty.com.txt
├── thenetworkgarden.blogs.com.txt
├── theodinproject.com.txt
├── thethaovanhoa.vn.txt
├── wikiwand.com.txt
├── .mozilla.org.txt
├── a.tldrnewsletter.com.txt
├── askingbox.de.txt
├── dadall.info.txt
├── fairphone.com.txt
├── linuxnix.com.txt
├── mactechnews.de.txt
├── medialens.org.txt
├── moo.nac.uci.edu.txt
├── pjmedia.com.txt
├── real.gr.txt
├── thepointmag.com.txt
├── ux.artu.tv.txt
├── wpbeginner.com.txt
├── alexwlchan.net.txt
├── arduino-tutorial.de.txt
├── ascarter.net.txt
├── buquad.com.txt
├── hazlitt.net.txt
├── health.com.txt
├── histoire-filante.fr.txt
├── mesec.cz.txt
├── pandemicequityinitiative.com.txt
├── pandodaily.com.txt
├── philstar.com.txt
├── racjonalista.pl.txt
├── rom-game.fr.txt
├── singaporeanstocksinvestor.blogspot.com.txt
├── stadt-muenster.de.txt
├── tbray.org.txt
├── thesocialitefamily.com.txt
├── tokyo-np.co.jp.txt
├── web.gekisaka.jp.txt
├── writerunboxed.com.txt
├── .fivefilters.org.txt
├── .robweychert.com.txt
├── 24a11y.com.txt
├── blog.native-instruments.com.txt
├── deia.com.txt
├── greaterwrong.com.txt
├── halo.bungie.org.txt
├── jacobin.com.txt
├── jandan.net.txt
├── leb.fbi.gov.txt
├── microsiervos.com.txt
├── proskauer.com.txt
├── roughtype.com.txt
├── slog.thestranger.com.txt
├── utiliser-lightroom.com.txt
├── www2.cnrs.fr.txt
├── .repubblica.it.txt
├── bjango.com.txt
├── blog.robertelder.org.txt
├── blog.sentry.io.txt
├── cars.com.txt
├── cohost.org.txt
├── devblogs.microsoft.com.txt
├── edge.org.txt
├── gocomics.com.txt
├── gurusblog.com.txt
├── isource.com.txt
├── maitre-eolas.fr.txt
├── manga-news.com.txt
├── nosalty.hu.txt
├── protonmail.com.txt
├── ruhlman.com.txt
├── shifteleven.com.txt
├── timesofisrael.com.txt
├── wordpress.org.txt
├── wz-newsline.de.txt
├── .allthingsd.com.txt
├── 37signals.com.txt
├── 7newsbelize.com.txt
├── agirpourlatransition.ademe.fr.txt
├── bbva.es.txt
├── bdaily.co.uk.txt
├── blog.chriszacharias.com.txt
├── blog.twitter.com.txt
├── blog.wells.ee.txt
├── borderhouseblog.com.txt
├── business2community.com.txt
├── cert-bund.de.txt
├── ciaosamin.com.txt
├── communities-dominate.blogs.com.txt
├── eckerd.edu.txt
├── hespress.com.txt
├── hometheaterreview.com.txt
├── literaryreview.co.uk.txt
├── maxim.com.txt
├── palmbeachpost.com.txt
├── phys.org.txt
├── propakistani.pk.txt
├── retractionwatch.com.txt
├── robots.thoughtbot.com.txt
├── spiderum.com.txt
├── techpinions.com.txt
├── thewirecutter.com.txt
├── tuaw.com.txt
├── wpmayor.com.txt
├── alsacreations.com.txt
├── cjr.org.txt
├── filamentgroup.com.txt
├── fortelabs.co.txt
├── geeksofdoom.com.txt
├── higcapital.com.txt
├── humantransit.org.txt
├── mforum.cari.com.my.txt
├── mikeash.com.txt
├── neunetz.com.txt
├── seattletransitblog.com.txt
├── spectrejournal.com.txt
├── squashed.tumblr.com.txt
├── tijd.be.txt
├── vk.com.txt
├── voltairenet.org.txt
├── vozpopuli.com.txt
├── wochenanzeiger.de.txt
├── adslzone.net.txt
├── basicthinking.de.txt
├── blog.eleven-labs.com.txt
├── diagonalperiodico.net.txt
├── ecranlarge.com.txt
├── engineering.tumblr.com.txt
├── inhabitat.com.txt
├── muycomputerpro.com.txt
├── mysqlblog.fivefarmers.com.txt
├── newcriterion.com.txt
├── papodehomem.com.br.txt
├── rasgolatente.es.txt
├── resilience.org.txt
├── sayidaty.net.txt
├── sdxcentral.com.txt
├── toolinux.com.txt
├── blog.landr.com.txt
├── blog.pinboard.in.txt
├── boundlessline.org.txt
├── devlinsangle.blogspot.co.at.txt
├── eff.org.txt
├── gurumed.org.txt
├── hvg.hu.txt
├── ilyabirman.ru.txt
├── interconnected.org.txt
├── jp.motorsport.com.txt
├── lehollandaisvolant.net.txt
├── maritimedanmark.dk.txt
├── novinky.cz.txt
├── rachelandrew.co.uk.txt
├── rezeptwelt.de.txt
├── stefanjudis.com.txt
├── techcommunity.microsoft.com.txt
├── visualcapitalist.com.txt
├── .ietf.org.txt
├── .philhist.unibas.ch.txt
├── 24.ae.txt
├── acroswing.fr.txt
├── angrymetalguy.com.txt
├── cleafy.com.txt
├── donnahay.com.au.txt
├── gawker.com.txt
├── glazman.org.txt
├── gnppn.fr.txt
├── hiphopleeft.nl.txt
├── icannabis.tumblr.com.txt
├── labs.ripe.net.txt
├── nojesguiden.se.txt
├── openai.com.txt
├── photopills.com.txt
├── researchandmarkets.com.txt
├── rpgsite.net.txt
├── timeshighereducation.com.txt
├── useit.com.txt
├── vitispr.com.txt
├── yostivanich.com.txt
├── accaglobal.com.txt
├── audiobookshelf.org.txt
├── ebay.com.txt
├── grafikart.fr.txt
├── inessential.com.txt
├── mintpressnews.com.txt
├── omiliya.org.txt
├── popehat.com.txt
├── searchenginejournal.com.txt
├── terrestres.org.txt
├── thecounter.org.txt
├── touilleur-express.fr.txt
├── valdaiclub.com.txt
├── vc.ru.txt
├── .craigslist.org.txt
├── accesstoinsight.org.txt
├── addendum.org.txt
├── americandrink.net.txt
├── blog.dropbox.com.txt
├── cnrs.fr.txt
├── commonwealmagazine.org.txt
├── da.feedsportal.com.txt
├── firstthings.com.txt
├── granta.com.txt
├── haberler.com.txt
├── itwire.com.txt
├── journal.markusthoma.com.txt
├── lezephyrmag.com.txt
├── libcom.org.txt
├── msdn.microsoft.com.txt
├── nakedsecurity.sophos.com.txt
├── nextdraft.com.txt
├── politifact.com.txt
├── snob.ru.txt
├── timeshighereducation.co.uk.txt
├── wiki.guildwars.com.txt
├── 36kr.com.txt
├── amandala.com.bz.txt
├── annouchka.fr.txt
├── econlog.econlib.org.txt
├── exoplanets.nasa.gov.txt
├── folklore.org.txt
├── gizmodo.uol.com.br.txt
├── healthletter.mayoclinic.com.txt
├── linuxjournal.com.txt
├── macg.co.txt
├── marco.org.txt
├── nicj.net.txt
├── pastebin.com.txt
├── splinternews.com.txt
├── thesimpledollar.com.txt
├── web.dev.txt
├── wiki.guildwars2.com.txt
├── wmpoweruser.com.txt
├── .livejournal.com.txt
├── .redbullmusicacademy.com.txt
├── actualitte.com.txt
├── albayan.ae.txt
├── annatravelling.wordpress.com.txt
├── blog.kaelig.fr.txt
├── bostonreview.net.txt
├── ekultura.hu.txt
├── fictionpress.com.txt
├── franceculture.fr.txt
├── gofugyourself.com.txt
├── kottke.org.txt
├── linkedin.com.txt
├── n-tv.de.txt
├── redmas.com.co.txt
├── warriordudimanche.net.txt
├── alternet.org.txt
├── blog.mozilla.org.txt
├── coffeecircle.com.txt
├── csswizardry.com.txt
├── enikos.gr.txt
├── framablog.org.txt
├── information.dk.txt
├── itsfoss.com.txt
├── kickstarter.com.txt
├── linux.com.txt
├── marmiton.org.txt
├── mentalfloss.com.txt
├── mirrorfootball.co.uk.txt
├── news.rambler.ru.txt
├── parislemon.com.txt
├── sec.gov.txt
├── shahinkalantari.com.txt
├── thinkspot.com.txt
├── venturebeat.com.txt
├── watoday.com.au.txt
├── aftenposten.no.txt
├── autoblog.com.txt
├── dummies.com.txt
├── fmhy.net.txt
├── globalgrind.com.txt
├── hacks.mozilla.org.txt
├── jalopnik.com.txt
├── labs.bishopfox.com.txt
├── lesswrong.com.txt
├── mlssoccer.com.txt
├── tvtropes.org.txt
├── whatever.scalzi.com.txt
├── andy-bell.design.txt
├── arxiv-vanity.com.txt
└── asymco.com.txt
/feeds.feedblitz.com.txt:
--------------------------------------------------------------------------------
1 | http_header(referer): http://feedblitz.com
2 |
--------------------------------------------------------------------------------
/as-web.jp.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://www.as-web.jp/f1/1275289
4 |
--------------------------------------------------------------------------------
/ganglia.info.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@id = "mid")]
2 | test_url: http://ganglia.info/
3 |
--------------------------------------------------------------------------------
/autocrypt.org.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@class='section']
3 |
4 | prune: no
5 |
--------------------------------------------------------------------------------
/lukew.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 |
3 | test_url: https://www.lukew.com/ff/entry.asp?1995
4 |
--------------------------------------------------------------------------------
/what-if.xkcd.com.txt:
--------------------------------------------------------------------------------
1 | autodetect_next_page: no
2 | test_url: http://what-if.xkcd.com/1/
--------------------------------------------------------------------------------
/soundcity.tv.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: sharing
2 |
3 | test_url: http://soundcity.tv/feed/
4 |
--------------------------------------------------------------------------------
/crimemagazine.com.txt:
--------------------------------------------------------------------------------
1 | autodetect_next_page: no
2 | test_url: http://www.crimemagazine.com/son-sam
--------------------------------------------------------------------------------
/ht.ly.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //iframe[@id='hootFrame']/@src
2 |
3 | test_url: http://ht.ly/bOiZV
--------------------------------------------------------------------------------
/the-magazine.org.txt:
--------------------------------------------------------------------------------
1 | tidy: no
2 |
3 | test_url: http://the-magazine.org/1/alone-together-again
--------------------------------------------------------------------------------
/chareidi.org.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm
--------------------------------------------------------------------------------
/pxlnv.com.txt:
--------------------------------------------------------------------------------
1 | date: //main//time/@datetime
2 |
3 | test_url: https://pxlnv.com/blog/bullshit-web/
4 |
--------------------------------------------------------------------------------
/snip.ly.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //meta[@property="og:url"]/@content
2 |
3 | test_url: http://snip.ly/qa1R
--------------------------------------------------------------------------------
/.dxy.cn.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 | prune: no
3 |
4 | test_url: http://neurosurg.dxy.cn/article/87224
--------------------------------------------------------------------------------
/alex.mullr.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="entry"]
2 | test_url: http://alex.mullr.net/blog/2011/05/on-spotify/
--------------------------------------------------------------------------------
/blog.lepine.pro.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 |
3 | test_url: http://blog.lepine.pro/bus-de-donnees-datapipeline
4 |
--------------------------------------------------------------------------------
/opensource.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='content clear-block']
2 | test_url: http://opensource.org/node/537
--------------------------------------------------------------------------------
/techmeme.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link_in_feed: //b/a
2 |
3 | test_url: http://www.techmeme.com/feed.xml
4 |
--------------------------------------------------------------------------------
/blog.fefe.de.txt:
--------------------------------------------------------------------------------
1 | title: //h2
2 | date: //h3
3 | body: //ul
4 |
5 | test_url: http://blog.fefe.de/?ts=b063bf55
--------------------------------------------------------------------------------
/danluu.com.txt:
--------------------------------------------------------------------------------
1 | body: /html/body/main
2 |
3 | prune: no
4 |
5 | test_url: https://danluu.com/look-stupid/
6 |
--------------------------------------------------------------------------------
/axesslab.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@id="main-content"]
2 |
3 | test_url: https://axesslab.com/disabled-buttons-suck/
4 |
--------------------------------------------------------------------------------
/bitelia.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://bitelia.com/2011/09/klout-midiendo-influencia
--------------------------------------------------------------------------------
/blog.spu.edu.txt:
--------------------------------------------------------------------------------
1 | body://div[@class='post']
2 | test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/
--------------------------------------------------------------------------------
/briefly.co.za.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/7.4
2 |
3 | test_url: https://briefly.co.za/rss/south-africa.rss
4 |
--------------------------------------------------------------------------------
/digitalcourage.de.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 | test_url: https://digitalcourage.de/blog/2020/corona-apps_gastbeitrag
3 |
--------------------------------------------------------------------------------
/ecetia.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://ecetia.com/2011/09/vida-de-jugon-vii-las-tres-es
--------------------------------------------------------------------------------
/extracine.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://extracine.com/2011/09/straw-dogs-la-original
--------------------------------------------------------------------------------
/ghanaweb.com.txt:
--------------------------------------------------------------------------------
1 | http_header(User-Agent): PHP/7.4
2 |
3 | test_url: https://cdn.ghanaweb.com/feed/newsfeed.xml
4 |
--------------------------------------------------------------------------------
/help.fivefilters.org.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="title"]/h3
2 | date: substring-after(//div[@class="meta"], ": ")
3 |
--------------------------------------------------------------------------------
/jjahnke.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='entry']
2 | prune: no
3 |
4 | test_url: http://www.jjahnke.net/rundbr87.html#2514
--------------------------------------------------------------------------------
/jollinger.com.txt:
--------------------------------------------------------------------------------
1 | body: //body
2 |
3 | test_url: http://www.jollinger.com/photo/enlargers/guide-to-enlargers.htm
4 |
--------------------------------------------------------------------------------
/kumailplus.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class = "entry-full"]
2 |
3 | test_url: http://www.kumailplus.com/2011/12/02/24308
--------------------------------------------------------------------------------
/luxuo.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post-content']
2 | prune: no
3 |
4 | test_url: http://www.luxuo.com/watches/feed
--------------------------------------------------------------------------------
/mattcutts.com.txt:
--------------------------------------------------------------------------------
1 | date: //*[@class = 'published']
2 | test_url: http://www.mattcutts.com/blog/internet-censorship-sopa/
--------------------------------------------------------------------------------
/roy.gbiv.com.txt:
--------------------------------------------------------------------------------
1 | strip_comments: no
2 | test_url: http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven
--------------------------------------------------------------------------------
/achgut.com.txt:
--------------------------------------------------------------------------------
1 | http_header(User-Agent): Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
2 |
3 |
--------------------------------------------------------------------------------
/backlinko.com.txt:
--------------------------------------------------------------------------------
1 | body: //main
2 | strip: //footer
3 | prune: no
4 |
5 | test_url: https://backlinko.com/ecommerce-seo
6 |
--------------------------------------------------------------------------------
/facta.co.jp.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='content']
2 |
3 | test_url: http://facta.co.jp/blog/archives/20111026001026.html
4 |
--------------------------------------------------------------------------------
/meowni.ca.txt:
--------------------------------------------------------------------------------
1 | author: //meta[@name="author"]/@content
2 |
3 | test_url: https://meowni.ca/posts/2017-puppeteer-tests/
4 |
--------------------------------------------------------------------------------
/motorfull.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://motorfull.com/2011/09/aparca-valeo-park4u-remote
--------------------------------------------------------------------------------
/512pixels.net.txt:
--------------------------------------------------------------------------------
1 | title: //meta[@property='og:title']/@content
2 | test_url: http://www.512pixels.net/blog/2014/10/the-move
3 |
--------------------------------------------------------------------------------
/apple.news.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //p//a[contains(., 'Click here')]
2 | test_url: https://apple.news/AHQREjzH0Ts6iikKhNe6o8w
3 |
--------------------------------------------------------------------------------
/elance.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='jobDesc-bd']/p
2 |
3 | test_url: http://www.elance.com/j/xml-technical-intergration/23687172/
--------------------------------------------------------------------------------
/futurism.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: tracking-wider
2 |
3 | test_url: https://futurism.com/the-byte/china-ai-prosecutor-crimes
--------------------------------------------------------------------------------
/getnews.jp.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post']
2 | strip: //ul[@id='bookmark_single']
3 | test_url: http://getnews.jp/archives/117312
--------------------------------------------------------------------------------
/hacf.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="post-content"]
2 |
3 | test_url: https://www.hacf.fr/un-beau-dashboard-tout-simplement/
4 |
--------------------------------------------------------------------------------
/jamesclear.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'entry-content')]
2 |
3 | test_url: https://jamesclear.com/procrastination
4 |
--------------------------------------------------------------------------------
/kont.me.txt:
--------------------------------------------------------------------------------
1 | http_header(User-agent): twitterbot
2 |
3 | test_url: https://kont.me/%C3%A9loge-d%C3%A9croissance-individuelle
4 |
--------------------------------------------------------------------------------
/lostgarden.com.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 | convert_double_br_tags: yes
3 | test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html
--------------------------------------------------------------------------------
/mbl.is.txt:
--------------------------------------------------------------------------------
1 | body: //div[class="frett-main"]
2 | test_url: http://mbl.is/frettir/innlent/2012/02/21/litill_munur_a_fargjaldaverdi/
--------------------------------------------------------------------------------
/n.survol.fr.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | date: //header//time/@datetime
3 |
4 | test_url: https://n.survol.fr/n/gerer-son-potager
5 |
--------------------------------------------------------------------------------
/philosophynow.org.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: welcome_box
2 |
3 | test_url: https://philosophynow.org/issues/141/Time_and_Being
4 |
--------------------------------------------------------------------------------
/scotthelme.co.uk.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post-content']
2 |
3 | test_url: https://scotthelme.co.uk/can-you-get-pwned-with-css/
--------------------------------------------------------------------------------
/acidcow.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[starts-with(@id, 'news-id-')]
2 |
3 | test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html
--------------------------------------------------------------------------------
/alseraj.net.txt:
--------------------------------------------------------------------------------
1 | title: //*[@id='normalfontyellow']
2 | test_url: http://www.alseraj.net/cgi-bin/pros/av/LeqaTextDisplay.cgi?display&2
--------------------------------------------------------------------------------
/avantivictoirerao.com.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://www.avantivictoirerao.com/society/markup-html-tags-and-formatting/
4 |
--------------------------------------------------------------------------------
/blogs.gnome.org.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/7.2
2 |
3 | test_url: https://blogs.gnome.org/aday/2017/08/08/the-gnome-way/
4 |
--------------------------------------------------------------------------------
/carlchenet.com.txt:
--------------------------------------------------------------------------------
1 | date: //time/@datetime
2 |
3 | test_url: https://carlchenet.com/foss-passive-consumerism-kills-our-community/
4 |
--------------------------------------------------------------------------------
/doc.wallabag.org.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(@class, 'normal')]
2 |
3 | test_url: https://doc.wallabag.org/en/user/filters.html
4 |
--------------------------------------------------------------------------------
/gsmarena.com.txt:
--------------------------------------------------------------------------------
1 | next_page_link: //a[@class='pages-next']
2 |
3 | test_url: http://www.gsmarena.com/samsung_galaxy_j2-review-1348.php
--------------------------------------------------------------------------------
/kachestvo.ru.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'inner_content')]
2 |
3 | test_url: http://kachestvo.ru/promtovar/odezhda/denim.html
--------------------------------------------------------------------------------
/monkeyzen.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://monkeyzen.com/2011/09/siluetas-de-clasicos-a-modo-de-vinilos
--------------------------------------------------------------------------------
/paquier.xyz.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post']
2 |
3 | test_url: http://paquier.xyz/postgresql-2/postgres-10-incompatible-changes/
4 |
--------------------------------------------------------------------------------
/summitroute.com.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://summitroute.com/blog/2015/12/24/instagram_bounty_case_study_for_defense/
4 |
--------------------------------------------------------------------------------
/.readthedocs.io.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@role='main']
3 | test_url: http://docs.readthedocs.io/en/latest/getting_started.html
4 |
--------------------------------------------------------------------------------
/allafrica.com.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/7.0
2 |
3 | test_url: http://allafrica.com/tools/headlines/rdf/latest/headlines.rdf
4 |
--------------------------------------------------------------------------------
/eternabuenosaires.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://eternabuenosaires.com/2011/09/calle-adolfo-bioy-casares
--------------------------------------------------------------------------------
/fakirpresse.info.txt:
--------------------------------------------------------------------------------
1 | author: //a[@class="url fn spip_in"]
2 |
3 | test_url: https://www.fakirpresse.info/les-patrons-ca-osent-tout
4 |
--------------------------------------------------------------------------------
/fivefilters.org.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(@class, 'container')]
2 | prune: no
3 |
4 | test_url: http://fivefilters.org/kindle-it/
5 |
--------------------------------------------------------------------------------
/fok.nl.txt:
--------------------------------------------------------------------------------
1 | # skip cookie warning
2 | single_page_link: concat(//form/@action, '?allowcookies=yes')
3 |
4 | test_url: http://fok.nl/687116
--------------------------------------------------------------------------------
/gist.github.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="highlight"]/pre
2 |
3 | prune: no
4 | tidy: no
5 |
6 | test_url: https://gist.github.com/1258908
--------------------------------------------------------------------------------
/indehekken.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post-entry']/p
2 |
3 | test_url: http://www.indehekken.net/you-only-sing-when-youre-rowing/
4 |
--------------------------------------------------------------------------------
/macdrifter.com.txt:
--------------------------------------------------------------------------------
1 | title: substring-before(//title,' « Macdrifter')
2 | test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/
--------------------------------------------------------------------------------
/marksdailyapple.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: wwsgd
2 | test_url: http://www.marksdailyapple.com/are-detoxes-and-cleanses-safe-and-effective/
--------------------------------------------------------------------------------
/nifi.apache.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 |
3 | test_url: https://nifi.apache.org/docs/nifi-docs/html/getting-started.html
4 |
--------------------------------------------------------------------------------
/osmand.net.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="article"]//h2
2 | date: //meta/@pubdate
3 |
4 | test_url: https://osmand.net/blog/guideline-pt
5 |
--------------------------------------------------------------------------------
/retro-games.fr.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //link[@rel='amphtml']
2 |
3 | test_url: https://www.retro-games.fr/shadow-hearts-ps2-7476
4 |
5 |
--------------------------------------------------------------------------------
/staltz.com.txt:
--------------------------------------------------------------------------------
1 | author: //h2[@class="name-title"]
2 |
3 | test_url: https://staltz.com/a-plan-to-rescue-the-web-from-the-internet.html
4 |
--------------------------------------------------------------------------------
/theteaspot.com.txt:
--------------------------------------------------------------------------------
1 | body: //main[@id='MainContent']
2 |
3 | prune: no
4 |
5 | test_url: https://www.theteaspot.com/pages/about-tea
6 |
--------------------------------------------------------------------------------
/wallabag.org.txt:
--------------------------------------------------------------------------------
1 | date: //meta[@name="date"]/@content
2 |
3 | test_url: https://wallabag.org/news/20241103-new-release-wallabag-2610/
4 |
--------------------------------------------------------------------------------
/zerokspot.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id="primarycontent"]
3 | test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/
--------------------------------------------------------------------------------
/altaonline.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: embed-pullquote
2 |
3 | test_url: https://www.altaonline.com/dispatches/a40395942/the-beach-rats/
4 |
--------------------------------------------------------------------------------
/blog.imirhil.fr.txt:
--------------------------------------------------------------------------------
1 | date: //section[@id="post"]//h1//time
2 |
3 | test_url: https://blog.imirhil.fr/2019/11/13/first-party-tracker.html
4 |
--------------------------------------------------------------------------------
/dagogtid.no.txt:
--------------------------------------------------------------------------------
1 | title: //span[@class = 'overskriftEkstrastor']
2 | author: //em/a
3 |
4 | test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414
--------------------------------------------------------------------------------
/dailydot.com.txt:
--------------------------------------------------------------------------------
1 | tidy: no
2 | body: //article
3 |
4 | test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/
--------------------------------------------------------------------------------
/dr-b.io.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='card-body']
2 |
3 | test_url: https://dr-b.io/post/Synology-DSM-7-with-Lets-Encrypt-and-DNS-Challenge
4 |
5 |
--------------------------------------------------------------------------------
/drdobbs.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //a[contains(@href, '/article/print')]
2 | test_url: http://www.drdobbs.com/architecture-and-design/240001128
--------------------------------------------------------------------------------
/flyingmachinestudios.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: linenos
2 | test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/
--------------------------------------------------------------------------------
/gizmovil.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://gizmovil.com/2011/09/hipertextual-labs-receptor-bluetooth-nokia-bh-214
--------------------------------------------------------------------------------
/goodfil.ms.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: gutter
2 | test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/
--------------------------------------------------------------------------------
/ishadeed.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | author: //h2[@class="post-author__name"]
3 |
4 | test_url: https://ishadeed.com/article/cascade-layers/
5 |
--------------------------------------------------------------------------------
/je-suis-papa.com.txt:
--------------------------------------------------------------------------------
1 | strip: //noscript
2 |
3 | test_url: http://www.je-suis-papa.com/pandacraft-kit-educatif-creatif-abonnement-12-ans/
4 |
--------------------------------------------------------------------------------
/jungle-world.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[contains(@class,'story')]
3 | test_url: http://jungle-world.com/artikel/2015/02/51207.html
4 |
--------------------------------------------------------------------------------
/rust-lang-nursery.github.io.txt:
--------------------------------------------------------------------------------
1 | body: //main
2 |
3 | test_url: https://rust-lang-nursery.github.io/rust-cookbook/web/clients/apis.html
4 |
--------------------------------------------------------------------------------
/stjv.fr.txt:
--------------------------------------------------------------------------------
1 | date: //time[contains(@class, 'published')]/@datetime
2 |
3 | test_url: https://www.stjv.fr/2018/01/au-sujet-quantic-dream/
4 |
--------------------------------------------------------------------------------
/vg.no.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='artikkelspalte']
2 | strip_id_or_class: 'breadcrumb'
3 | test_url: http://www.vg.no/spill/artikkel.php?artid=10003628
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | These files are released to the public domain.
2 |
3 | See https://creativecommons.org/publicdomain/zero/1.0/ for more information.
4 |
--------------------------------------------------------------------------------
/appleweblog.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://appleweblog.com/2011/09/encontrada-vulnerabilidad-grave-en-skype-para-ios
--------------------------------------------------------------------------------
/az.lib.ru.txt:
--------------------------------------------------------------------------------
1 | body: //xxx7
2 | tidy: no
3 | prune: no
4 |
5 | test_url: http://az.lib.ru/s/shepkinakupernik_t_l/text_1910_inesa_di_kastro.shtml
6 |
--------------------------------------------------------------------------------
/blogs.forbes.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='entry']
2 | test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/
--------------------------------------------------------------------------------
/brucelawson.co.uk.txt:
--------------------------------------------------------------------------------
1 | title: //h2[@itemprop="title"]
2 |
3 | test_url: https://www.brucelawson.co.uk/2018/the-practical-value-of-semantic-html/
4 |
--------------------------------------------------------------------------------
/doc.rust-lang.ru.txt:
--------------------------------------------------------------------------------
1 | body: //main
2 |
3 | prune: no
4 | tidy: no
5 |
6 | test_url: https://doc.rust-lang.ru/book/ch10-03-lifetime-syntax.html
7 |
--------------------------------------------------------------------------------
/dropbox.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //a[@id='download_button_link']
2 |
3 | test_url: https://www.dropbox.com/s/qmocfrco2t0d28o/Fluffbeast.docx
4 |
--------------------------------------------------------------------------------
/gihyo.jp.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]
2 |
3 | test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010
--------------------------------------------------------------------------------
/help.sharegate.com.txt:
--------------------------------------------------------------------------------
1 | body: //body//article
2 | prune: no
3 |
4 | test_url: https://help.sharegate.com/en/articles/12185777-release-25-9-2
5 |
--------------------------------------------------------------------------------
/hiperpop.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://hiperpop.com/2011/09/marc-anthony-celebra-su-cumpleanos-con-jennifer-lopez
--------------------------------------------------------------------------------
/hipertextual.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://hipertextual.com/2011/09/banda-ancha-en-america-latina-insignificante
3 |
--------------------------------------------------------------------------------
/krone.at.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' kmm-article-box ')]
2 |
3 | test_url: https://www.krone.at/1941895
4 |
--------------------------------------------------------------------------------
/panic.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='entry']
2 | date: //h3[@class='postDate']
3 | test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/
--------------------------------------------------------------------------------
/perell.com.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(@class, 'BlogItem-main')]
2 |
3 | test_url: https://www.perell.com/blog/50-ideas-that-changed-my-life
4 |
--------------------------------------------------------------------------------
/portertech.ca.txt:
--------------------------------------------------------------------------------
1 | author: //*[(@class = "author")]
2 | date: //*[(@class = "date")]
3 | test_url: http://portertech.ca/2012/12/10/iac-morning-market/
--------------------------------------------------------------------------------
/swcarpentry.github.io.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | prune: no
4 |
5 | test_url: https://swcarpentry.github.io/shell-novice/02-filedir/index.html
6 |
--------------------------------------------------------------------------------
/tofugu.com.txt:
--------------------------------------------------------------------------------
1 | body://div[@class='entry-content']
2 |
3 | test_url: http://www.tofugu.com/2015/07/20/interview-with-toriena-japanese-chiptune/
4 |
--------------------------------------------------------------------------------
/urbandictionary.com.txt:
--------------------------------------------------------------------------------
1 | title: //title
2 | body: //table[@id='entries']
3 | test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
4 |
--------------------------------------------------------------------------------
/warnerbros.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="article-body"]
2 | test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait
3 |
--------------------------------------------------------------------------------
/zataz.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="blog-title"]
2 |
3 | test_url: https://www.zataz.com/bientot-noel-securisons-le-prochain-pc-de-la-famille/
4 |
--------------------------------------------------------------------------------
/altfoto.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://altfoto.com/2011/09/nikon-presenta-su-nuevo-sistema-nikon-1-y-dos-nuevas-camaras
--------------------------------------------------------------------------------
/chaperonsetvous.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, "excerpt")]
2 |
3 | test_url: https://www.chaperonsetvous.fr/legalite-ca-commence-a-la-creche-3/
4 |
--------------------------------------------------------------------------------
/cucharasonica.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://cucharasonica.com/2011/09/queen-busca-candidatos-para-su-propia-banda-tributo
--------------------------------------------------------------------------------
/dansdata.com.txt:
--------------------------------------------------------------------------------
1 | autodetect_next_page: no
2 | tidy: no
3 | prune: no
4 | body: //div[@class='NoOverflow']
5 | test_url: http://www.dansdata.com/gz129.htm
--------------------------------------------------------------------------------
/doc.rust-lang.org.txt:
--------------------------------------------------------------------------------
1 | body: //main
2 |
3 | prune: no
4 | tidy: no
5 |
6 | test_url: https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html
7 |
--------------------------------------------------------------------------------
/interviewmagazine.com.txt:
--------------------------------------------------------------------------------
1 | title: //title
2 | body: //div[contains(@class, 'block')]
3 |
4 | test_url: http://www.interviewmagazine.com/film/spike-jonze
--------------------------------------------------------------------------------
/jvt.me.txt:
--------------------------------------------------------------------------------
1 | author: //div[@class="post-details"]//a[contains(@class, "p-name")]
2 |
3 | test_url: https://www.jvt.me/posts/2019/10/20/indieweb-talk/
4 |
--------------------------------------------------------------------------------
/najlepsze-ksiazki.pl.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | strip: //footer
4 |
5 | test_url: https://najlepsze-ksiazki.pl/najlepsze-ksiazki-science-fiction/
6 |
--------------------------------------------------------------------------------
/news.rub.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' content-inner-inner-wrapper ')]
2 |
3 | http_header(Cookie): has_js=1
4 |
--------------------------------------------------------------------------------
/pentaxforums.com.txt:
--------------------------------------------------------------------------------
1 | next_page_link: //a[contains(., 'Next:')]
2 | test_url: http://www.pentaxforums.com/reviews/long-exposure-handhelds/introduction.html
--------------------------------------------------------------------------------
/phototrend.fr.txt:
--------------------------------------------------------------------------------
1 | author: //div[contains(@class, "vcard")]
2 |
3 | test_url: https://phototrend.fr/2019/04/mise-a-jour-luminar-3-1-0-accent-ai-2-0/
4 |
--------------------------------------------------------------------------------
/renenekuda.cz.txt:
--------------------------------------------------------------------------------
1 | title: //*[@class='entry-title']
2 | body: //div[@class='entry-content']
3 | test_url: http://www.renenekuda.cz/recept-na-produktivitu/
--------------------------------------------------------------------------------
/tldp.org.txt:
--------------------------------------------------------------------------------
1 | title: //title
2 | body: //h2 | //p | //ul
3 | prune: no
4 | tidy: no
5 |
6 | test_url: http://www.tldp.org/HOWTO/Plug-and-Play-HOWTO-7.html
--------------------------------------------------------------------------------
/vot-tak.tv.txt:
--------------------------------------------------------------------------------
1 | # prevent duplicated images
2 | strip: //img[@fallback]
3 |
4 | test_url: https://vot-tak.tv/novosti/16-08-2021-nevzlin-intervyu/
5 |
--------------------------------------------------------------------------------
/.about.com.txt:
--------------------------------------------------------------------------------
1 | title: //*[@id='title']//h1
2 | body: //*[(@id = "articlebody")]
3 | test_url: http://nutrition.about.com/od/changeyourdiet/qt/healthysnacks.htm
--------------------------------------------------------------------------------
/blogs.lse.ac.uk.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/8.3
2 |
3 | test_url: https://blogs.lse.ac.uk/politicsandpolicy/stop-working-on-increasing-road-safety/
4 |
--------------------------------------------------------------------------------
/coalicionporelevangelio.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="section"]
2 |
3 | prune: no
4 |
5 | test_url: https://www.coalicionporelevangelio.org/curso/el-dinero/
6 |
--------------------------------------------------------------------------------
/crn.de.txt:
--------------------------------------------------------------------------------
1 | author: //p[contains(@class,'author')]/a
2 | date: //div[contains(@class,'date')]
3 | test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html
--------------------------------------------------------------------------------
/elfster.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: elf-call-out-ssg
2 | strip_id_or_class: cta-box
3 |
4 | test_url: https://www.elfster.com/content/secret-santa-rules/
5 |
--------------------------------------------------------------------------------
/fiftytwo.in.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'story-wrapper')]
2 | strip: //svg
3 |
4 | prune: no
5 |
6 | test_url: https://fiftytwo.in/story/kingdom/
7 |
--------------------------------------------------------------------------------
/finexpert.e15.cz.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: article-linktoanother
2 |
3 | test_url: http://finexpert.e15.cz/budiz-teplo-eu-stedre-zadotuje-nejen-plynovy-kotel
--------------------------------------------------------------------------------
/marigold.cz.txt:
--------------------------------------------------------------------------------
1 | http_header(accept): */*
2 |
3 | test_url: https://www.marigold.cz/item/projektovy-manazer-je-v-cesku-sproste-slovo-ke-skode-projektu
4 |
--------------------------------------------------------------------------------
/monkeyuser.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ', normalize-space(@class), ' '), ' content ')]
2 |
3 | test_url: https://www.monkeyuser.com/2019/v-201/
4 |
--------------------------------------------------------------------------------
/news.techmeme.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='main']/div[@class='item']
2 | strip: //div[@class='right']
3 |
4 | test_url: http://news.techmeme.com/110516/fh-rip
--------------------------------------------------------------------------------
/python.org.txt:
--------------------------------------------------------------------------------
1 | body: //article[contains(concat(' ',normalize-space(@class),' '),' text ')]
2 |
3 | test_url: https://www.python.org/about/success/cog/
4 |
--------------------------------------------------------------------------------
/utdailybeacon.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='blox-story-text']
2 | test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html
3 |
--------------------------------------------------------------------------------
/worldwidewords.org.txt:
--------------------------------------------------------------------------------
1 | title: //p[@id='content']
2 |
3 | body: //div[@class='contentblock']
4 | test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm
--------------------------------------------------------------------------------
/.tweakblogs.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="article"]
2 | author: //p[@class="author"]/a
3 |
4 | test_url: http://harryl.tweakblogs.net/blog/11988/voorstellen
5 |
--------------------------------------------------------------------------------
/caseinterview.com.txt:
--------------------------------------------------------------------------------
1 | body: (//div[@data-elementor-type="wp-post"])[1]
2 | strip: //footer
3 |
4 | prune: no
5 |
6 | test_url: https://caseinterview.com/mece
7 |
--------------------------------------------------------------------------------
/gizmologia.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://gizmologia.com/2011/09/amd-trinity-el-sucesor-de-llano-en-una-demostracion-muy-interesante
--------------------------------------------------------------------------------
/happyassassin.net.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/7.2
2 |
3 | test_url: https://www.happyassassin.net/2014/01/25/uefi-boot-how-does-that-actually-work-then/
4 |
--------------------------------------------------------------------------------
/iansommerville.com.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): PHP/5.3
2 |
3 | test_url: http://iansommerville.com/blog/a-guide-to-scottish-delicacies-for-tgo-challengers/
4 |
--------------------------------------------------------------------------------
/iplaysoft.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']//div[@class='entry-banner' or @class='entry-content']
2 | test_url: http://www.iplaysoft.com/webbrowserpassview.html
--------------------------------------------------------------------------------
/jobbank.gc.ca.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='formatCont_en']
2 |
3 | prune: no
4 |
5 | test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922
--------------------------------------------------------------------------------
/lefilrouge.media.txt:
--------------------------------------------------------------------------------
1 |
2 | strip_id_or_class: the_champ_sharing_container
3 |
4 | test_url: https://lefilrouge.media/etats-unis-nevada-burning-man-festival/
5 |
--------------------------------------------------------------------------------
/matt.might.net.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | author: string("Matt Might")
3 | strip: //h1/following-sibling::div
4 |
5 | test_url: http://matt.might.net/articles/oo-cesk/
--------------------------------------------------------------------------------
/mein-mmo.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='gp-entry-text']
2 | strip: //aside
3 |
4 | test_url: http://mein-mmo.de/pokemon-go-15-staerksten-pokemon-der-2-generation/
5 |
--------------------------------------------------------------------------------
/radionz.co.nz.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='body']
2 | title: //div[@class='newsstory']/h2
3 | test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d
--------------------------------------------------------------------------------
/roomescapeartist.com.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | strip: //svg
4 |
5 | test_url: https://roomescapeartist.com/2020/05/18/locked-amsterdam-submarine-hivemind-review/
6 |
--------------------------------------------------------------------------------
/sivers.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']//article
2 |
3 | strip: //header
4 | strip: //footer
5 |
6 | prune: no
7 |
8 | test_url: https://sivers.org/exex
9 |
--------------------------------------------------------------------------------
/sports.ru.txt:
--------------------------------------------------------------------------------
1 | title: //meta[@property="twitter:title"]/@content
2 | date: //time/@datetime
3 |
4 | test_url: https://www.sports.ru/football/blogs/3326825.html
5 |
--------------------------------------------------------------------------------
/stumbleupon.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //iframe[@id='tb-stumble-frame']/@src
2 |
3 | test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
--------------------------------------------------------------------------------
/thenews.coop.txt:
--------------------------------------------------------------------------------
1 | body: //div[@itemprop='articleBody']
2 |
3 | test_url: http://www.thenews.coop/98221/news/co-operatives/jeremy-corbyn-needs-co-op-movement/
4 |
--------------------------------------------------------------------------------
/turnoff.us.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class='post-title']
2 | body: //article[@class='post-content']
3 | test_url: https://turnoff.us/geek/the-depressed-developer-13/
4 |
--------------------------------------------------------------------------------
/vedonlyonti.com.txt:
--------------------------------------------------------------------------------
1 | strip: //style
2 |
3 | test_url: https://vedonlyonti.com/veikkausvihjeet/pitkavetovihjeet/nba-boston-celtics-cleveland-cavaliers-8-5-2024
4 |
--------------------------------------------------------------------------------
/betabeat.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="entry-content"]
2 | test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/
--------------------------------------------------------------------------------
/cashless.pl.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' post-page-content ')]
2 |
3 | test_url: https://www.cashless.pl/5465-revolut-kryzys
4 |
--------------------------------------------------------------------------------
/cfclrk.com.txt:
--------------------------------------------------------------------------------
1 | body: //body
2 |
3 | strip: //header
4 | strip: //nav
5 |
6 | prune: no
7 |
8 | test_url: https://www.cfclrk.com/articles/github_identities.html
9 |
--------------------------------------------------------------------------------
/china-gadgets.de.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: bcac-item
2 | strip_id_or_class: ez-toc-container
3 |
4 | test_url: https://www.china-gadgets.de/blitzwolf-bw-v7-beamer/
5 |
--------------------------------------------------------------------------------
/davidwalsh.name.txt:
--------------------------------------------------------------------------------
1 | author: //article//span[@itemprop="name"]
2 | strip: //div[@class="article-block"]
3 |
4 | test_url: https://davidwalsh.name/optional-chaining
5 |
--------------------------------------------------------------------------------
/democracynow.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'blog_body')]
2 |
3 | prune: no
4 |
5 | test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a
--------------------------------------------------------------------------------
/dissentmagazine.org.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | author: //a[@rel='author']
4 |
5 | test_url: https://www.dissentmagazine.org/article/why-the-left-needs-liberals
6 |
--------------------------------------------------------------------------------
/earvingad.github.io.txt:
--------------------------------------------------------------------------------
1 | body: //main
2 |
3 | strip: //header
4 | strip: //footer
5 |
6 | prune: no
7 |
8 | test_url: https://earvingad.github.io/posts/headscale/
9 |
--------------------------------------------------------------------------------
/explosm.net.txt:
--------------------------------------------------------------------------------
1 | body: //*[@id="comic"]/div/div[2]/div/span
2 | author: //*[@id="comic"]/div/div[2]/div/div/div[1]
3 |
4 | test_url: http://explosm.net/comics/3955/
5 |
--------------------------------------------------------------------------------
/freelancer.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="projectDetailsContent"]//td
2 |
3 | test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html
--------------------------------------------------------------------------------
/fs.blog.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]
2 |
3 | prune: no
4 |
5 | test_url: https://fs.blog/inside-a-miracle/
6 |
--------------------------------------------------------------------------------
/gauchiste.fr.txt:
--------------------------------------------------------------------------------
1 | date: //meta[@name="date"]/@content
2 |
3 | test_url: https://gauchiste.fr/post/2019/11/08/Un-catadioptre-vous-sauvera-(peut-%C3%AAtre)-la-vie
4 |
--------------------------------------------------------------------------------
/izismile.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[starts-with(@id, 'news-id-')]
2 | prune: no
3 |
4 | test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html
--------------------------------------------------------------------------------
/lado.mx.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //a[starts-with(@href, 'http') and contains(., 'Leer noticia completa')]
2 |
3 | test_url: https://lado.mx/noticia.php?id=15249243
4 |
--------------------------------------------------------------------------------
/make.wordpress.org.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: glossary-item-hidden-content
2 |
3 | test_url: https://make.wordpress.org/core/2020/12/21/bug-scrub-schedule-for-5-7/
4 |
--------------------------------------------------------------------------------
/mbk-news.appspot.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'evo-entry-content')]
2 |
3 | prune: no
4 |
5 | test_url: https://mbk-news.appspot.com/suzhet/ee-imya-budet/
6 |
--------------------------------------------------------------------------------
/mytotalretail.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@itemprop="content"]
3 |
4 | test_url: https://www.mytotalretail.com/article/how-site-search-could-kill-amazon/
5 |
--------------------------------------------------------------------------------
/openstreetmap.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 | strip: //div[@class='standard-form']
3 | test_url: https://www.openstreetmap.org/user/woodpeck/diary/393947
4 |
--------------------------------------------------------------------------------
/php.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 | strip_id_or_class: manualnavbar
3 |
4 | prune: no
5 |
6 | test_url: http://www.php.net/manual/en/migration5.incompatible.php
--------------------------------------------------------------------------------
/vakarm.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="block_news_main_content_preview"]
2 |
3 | test_url: https://www.vakarm.net/news/read/Choual-History-X-tout-un-cinema/10270/2
4 |
--------------------------------------------------------------------------------
/archiloque.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 |
3 | date: //span[@id='revdate']
4 |
5 | test_url: https://archiloque.net/blog/task-engine-ruby/task-engine-ruby.html
6 |
--------------------------------------------------------------------------------
/berlingske.dk.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class='headline']
2 | body: //div[contains(@class, 'article-wrapper')]
3 | test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa
--------------------------------------------------------------------------------
/carnegie.ru.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-body ')]
2 | prune: no
3 |
4 | test_url: https://carnegie.ru/commentary/86428
5 |
--------------------------------------------------------------------------------
/cooper.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class,'post-body')]
2 | date: //abbr[@class='published']
3 |
4 | test_url: http://www.cooper.com/journal/2015/6/creating-personas
5 |
--------------------------------------------------------------------------------
/doughellmann.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //a[.="Read more…"]/@href
2 |
3 | test_url: https://doughellmann.com/blog/2017/02/06/getopt-command-line-option-parsing-pymotw-3/
4 |
--------------------------------------------------------------------------------
/fokus.se.txt:
--------------------------------------------------------------------------------
1 | title: //h2[contains(@class, 'entry-title')]
2 | body: //div[contains(@class, 'entry-content')]
3 | test_url: http://www.fokus.se/2017/03/olosta-karnfragor/
4 |
--------------------------------------------------------------------------------
/geenstijl.nl.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id = 'article']
2 | strip: //div[@id = 'klasbox']
3 | test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html
--------------------------------------------------------------------------------
/hackertarget.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '), ' btx-post-body ')]
2 |
3 | test_url: https://hackertarget.com/ssh-examples-tunnels/
4 |
--------------------------------------------------------------------------------
/ianlewis.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class), ' '),' post-content ')]
2 |
3 | test_url: https://www.ianlewis.org/en/almighty-pause-container
4 |
--------------------------------------------------------------------------------
/juliareda.eu.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | strip: //aside[contains(@class, "mashsb-container")]
4 |
5 | test_url: https://juliareda.eu/2019/02/eu-copyright-final-text/
6 |
--------------------------------------------------------------------------------
/kresus.org.txt:
--------------------------------------------------------------------------------
1 | title: //h2[@class="entry-title"]
2 | date: //time[@class="published"]/@datetime
3 |
4 | test_url: https://kresus.org/blog/kresus-version-0-14-0.html
5 |
--------------------------------------------------------------------------------
/letraslibres.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: concat(link[@rel="canonical"], "?page=full")
2 |
3 | test_url: http://www.letraslibres.com/revista/dossier/quien-manda-en-europa
4 |
--------------------------------------------------------------------------------
/longform.org.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //div[@class="post"]/div[@class="title"]/a
2 |
3 | test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/
--------------------------------------------------------------------------------
/oschina.net.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | strip_id_or_class: syntaxhighlighter
3 | test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print
--------------------------------------------------------------------------------
/reactjs.org.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | strip: //header
4 | strip: //aside
5 |
6 | prune: no
7 |
8 | test_url: https://reactjs.org/docs/uncontrolled-components.html
9 |
--------------------------------------------------------------------------------
/signal.org.txt:
--------------------------------------------------------------------------------
1 | title: //h2[@class="post-title"]
2 | author: //p[contains(@class, "body2")]//a
3 |
4 | test_url: https://signal.org/blog/signal-private-group-system/
5 |
--------------------------------------------------------------------------------
/slrlounge.com.txt:
--------------------------------------------------------------------------------
1 | replace_string():
3 |
4 | test_url: https://www.slrlounge.com/flash-outdoors-ambient-balancing-natural/
5 |
--------------------------------------------------------------------------------
/yosoy.red.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://yosoy.red/2021/01/22/politica/
4 | test_contains: es fundamental entender que los sistemas son creados por humanos
5 |
--------------------------------------------------------------------------------
/.mitpress.mit.edu.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: wp-block-pullquote
2 |
3 | test_url: https://thereader.mitpress.mit.edu/a-master-perfumers-reflections-on-patchouli-and-vetiver/
4 |
--------------------------------------------------------------------------------
/aerobuzz.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' chapo ')]
2 |
3 | test_url: https://www.aerobuzz.fr/depose-minute/aerodromes-de-proximite/
4 |
--------------------------------------------------------------------------------
/alexduner.com.txt:
--------------------------------------------------------------------------------
1 | body: //section[@class='content']
2 | date: //span[1]
3 | author: //h1[@id='sitetitle']
4 | test_url: http://alexduner.com/blog/something-i-learned-today
5 |
--------------------------------------------------------------------------------
/archdaily.com.txt:
--------------------------------------------------------------------------------
1 | date: //div[@class='post_date']
2 |
3 | body: //div[@class='post_content']
4 |
5 | test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up
--------------------------------------------------------------------------------
/c.newsnow.com.txt:
--------------------------------------------------------------------------------
1 | # Also as c.newsnow.co.uk.txt
2 |
3 | single_page_link: //div[@id="js-retrieval-msg"]//a
4 |
5 | test_url: http://c.newsnow.com/A/1041394538?-15254:37150
6 |
--------------------------------------------------------------------------------
/codeproject.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="contentdiv"]
2 | date: //span[@class="date"]
3 | test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code
--------------------------------------------------------------------------------
/declassifieduk.org.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: related-post
2 |
3 | test_url: https://declassifieduk.org/british-warmongering-is-driving-europe-towards-catastrophe-in-ukraine/
4 |
--------------------------------------------------------------------------------
/elblogsalmon.com.txt:
--------------------------------------------------------------------------------
1 | replace_string(sf-src): src
2 |
3 | test_url: https://www.elblogsalmon.com/economia/no-todo-fue-mal-con-el-euro-datos-que-indican-que-fue-una-buena-idea
4 |
--------------------------------------------------------------------------------
/esglobal.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='blog-content']
2 |
3 | test_url: http://www.esglobal.org/el-caos-en-el-este-los-socios-de-la-ue-necesitan-que-se-les-preste-atencion/
4 |
--------------------------------------------------------------------------------
/gold.ac.uk.txt:
--------------------------------------------------------------------------------
1 | body: //article//div[contains(concat(' ',normalize-space(@class),' '), ' rich-content ')]
2 |
3 | test_url: https://www.gold.ac.uk/news/carbon-neutral-plan/
4 |
--------------------------------------------------------------------------------
/nextcloud.com.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="blog-title-and-subtitle"]//h1
2 | date: //time/@datetime
3 |
4 | test_url: https://nextcloud.com/blog/celebrating-2-years-nextcloud/
5 |
--------------------------------------------------------------------------------
/optimizesmart.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="content"]
2 |
3 | prune: no
4 |
5 | test_url: https://www.optimizesmart.com/how-to-set-up-conversion-tracking-in-google-analytics-4/
6 |
--------------------------------------------------------------------------------
/pastepad.fivefilters.org.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id='ff-pastepad-content']
3 | prune: no
4 | # todo: add test file
5 | test_url: http://pastepad.fivefilters.org/test.html
--------------------------------------------------------------------------------
/pixellibre.net.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="entry-title"]
2 | author: //a[@class='url fn n']
3 |
4 | test_url: https://pixellibre.net/2017/10/vie-privee-smartphones-applications/
5 |
--------------------------------------------------------------------------------
/radishzz.cc.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-content ')]
2 |
3 | prune: no
4 |
5 | test_url: https://radishzz.cc/posts/384776b2/
6 |
--------------------------------------------------------------------------------
/sme.sk.txt:
--------------------------------------------------------------------------------
1 | title: //meta[@property='og:title']/@content
2 | date: //p[@class='autor_line']/b/text()
3 | test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html
--------------------------------------------------------------------------------
/sourcebooks.com.txt:
--------------------------------------------------------------------------------
1 | #grab the actual content div
2 | body: //div[@class='rt-article']
3 |
4 | test_url: http://www.sourcebooks.com/blog/happy-27th-birthday-sourcebooks.html
5 |
--------------------------------------------------------------------------------
/sprengsatz.de.txt:
--------------------------------------------------------------------------------
1 | title: //h2
2 | author: string('Michael Spreng')
3 | date: //div[@class='date']
4 | body: //div[@class='entry']
5 | test_url: http://www.sprengsatz.de/?p=3691
--------------------------------------------------------------------------------
/stopgame.ru.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(concat(' ',normalize-space(@class),' '),' article ')]
2 |
3 | test_url: https://stopgame.ru/show/113377/phoenotopia_awakening_review
4 |
--------------------------------------------------------------------------------
/thefilmexperience.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='body']
2 | test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html
--------------------------------------------------------------------------------
/triplebyte.com.txt:
--------------------------------------------------------------------------------
1 | body: //article[contains(concat(' ',normalize-space(@class), ' '), ' blog-post ')]
2 |
3 | test_url: https://triplebyte.com/blog/marissa-mayer-interview
4 |
--------------------------------------------------------------------------------
/vivirmexico.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social
--------------------------------------------------------------------------------
/w3.org.txt:
--------------------------------------------------------------------------------
1 | date: //time[@class="entry-date"]/@datetime
2 |
3 | test_url: https://www.w3.org/blog/2019/05/w3c-and-whatwg-to-work-together-to-advance-the-open-web-platform/
4 |
--------------------------------------------------------------------------------
/www.seriouseats.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'recipe-wrapper')]
2 |
3 | test_url: https://www.seriouseats.com/recipes/2014/02/braised-short-ribs-from-daniel.html
4 |
--------------------------------------------------------------------------------
/a11ywithlindsey.com.txt:
--------------------------------------------------------------------------------
1 | body: //main[@id="main-content"]
2 | author: "Lindsey Kopacz"
3 |
4 | test_url: https://www.a11ywithlindsey.com/blog/javascript-accessibility-accordions/
5 |
--------------------------------------------------------------------------------
/blog.eng.xogrp.com.txt:
--------------------------------------------------------------------------------
1 | title: //article[contains(@class, 'type_text')]//h2
2 | test_url: http://blog.eng.xogrp.com/post/154005485319/node-js-promise-enterprise-grade-first-of-all
3 |
--------------------------------------------------------------------------------
/blog.nightly.mozilla.org.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | date: //time/@datetime
3 |
4 | test_url: https://blog.nightly.mozilla.org/2018/06/27/protecting-your-privacy-in-firefox-pre-release/
5 |
--------------------------------------------------------------------------------
/c.newsnow.co.uk.txt:
--------------------------------------------------------------------------------
1 | # Also as c.newsnow.com.txt
2 |
3 | single_page_link: //div[@id="js-retrieval-msg"]//a
4 |
5 | test_url: http://c.newsnow.co.uk/A/1041394538?-15254:37150
6 |
--------------------------------------------------------------------------------
/cabinetmagazine.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'blog-content')]
2 |
3 | prune: no
4 |
5 | test_url: https://www.cabinetmagazine.org/kiosk/komska_yuliya_4_march_2021.php
6 |
--------------------------------------------------------------------------------
/caffereggio.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="pf-content"]
2 |
3 | test_url: http://www.caffereggio.net/2017/02/09/apoyo-pablo-iglesias-podemos-unidos-podemos-vicenc-navarro-publico/
4 |
--------------------------------------------------------------------------------
/economie.gouv.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'txtVisu')]
2 | prune: no
3 |
4 | test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance
--------------------------------------------------------------------------------
/gnu.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' top-level-extent ')]
2 | prune:no
3 |
4 | test_url: https://www.gnu.org/software/bash/manual/bash.html
5 |
--------------------------------------------------------------------------------
/lvsl.fr.txt:
--------------------------------------------------------------------------------
1 | title: //h1[contains(@class,'entry-title')]
2 | body: //div[contains(@class,'body-content')]
3 | test_url: http://lvsl.fr/peuple-manifestant-saez-a-t-pondu-hexagone
4 |
--------------------------------------------------------------------------------
/lwlies.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' singleColRight ')]
2 |
3 | test_url: https://lwlies.com/articles/rope-alfred-hitchcock-masterpiece/
4 |
--------------------------------------------------------------------------------
/marriedtothesea.com.txt:
--------------------------------------------------------------------------------
1 | body: //img[contains(@src, '.gif')]
2 |
3 | test_url: http://www.marriedtothesea.com
4 | test_url: http://www.marriedtothesea.com/index.php?date=010818
5 |
--------------------------------------------------------------------------------
/miops.com.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@id="relatedblogs"]
2 | strip: //div[@class="product-card"]
3 |
4 | test_url: https://www.miops.com/blogs/news/master-tips-to-take-lightning-photos
5 |
--------------------------------------------------------------------------------
/missnumerique.com.txt:
--------------------------------------------------------------------------------
1 | strip: //figure/noscript
2 |
3 | test_url: https://www.missnumerique.com/blog/la-proxiphotographie-la-solution-nature-pour-exprimer-son-sens-artistique/
4 |
--------------------------------------------------------------------------------
/pmf.silvrback.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' all_external_links ')]
2 | test_url: https://pmf.silvrback.com/fixing-tethering-on-android-kitkat
--------------------------------------------------------------------------------
/pogue.blogs.nytimes.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="content"]/div[1]
2 |
3 | title: //h1[@class="entry-title"]
4 | test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/
--------------------------------------------------------------------------------
/queerty.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='copy']
2 | title: //h1[@class='hed']
3 | test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/
--------------------------------------------------------------------------------
/renverse.co.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'article-texte')]
2 |
3 | prune: no
4 |
5 | test_url: https://renverse.co/infos-locales/article/rejoignez-la-zad-du-geissberg-3486
6 |
--------------------------------------------------------------------------------
/thebaffler.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='pico']
2 |
3 | strip_id_or_class: wp-block-pullquote
4 |
5 | test_url: https://thebaffler.com/latest/hong-kong-literatures-growing-pains-chu
6 |
--------------------------------------------------------------------------------
/theses.enc.sorbonne.fr.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="head"]
2 | author: //div[@class="name"]
3 | body: //article[@id="text"]
4 |
5 | test_url: http://theses.enc.sorbonne.fr/2014/sidre
6 |
--------------------------------------------------------------------------------
/tidbits.com.txt:
--------------------------------------------------------------------------------
1 | author: //span[@class='fn']
2 | date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
3 | test_url: http://tidbits.com/article/12651
--------------------------------------------------------------------------------
/viget.com.txt:
--------------------------------------------------------------------------------
1 | body: (//article)[1]
2 |
3 | strip_id_or_class: sharer
4 |
5 | prune: no
6 |
7 | test_url: https://www.viget.com/articles/understanding-futures-in-rust-part-1/
8 |
--------------------------------------------------------------------------------
/yourerie.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@itemprop="articleBody"]
2 | test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA
3 |
--------------------------------------------------------------------------------
/zaknrw.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single-content ')]
2 |
3 | test_url: http://www.zaknrw.de/medium/audience-development-und-diversitaet
4 |
--------------------------------------------------------------------------------
/9to5mac.com.txt:
--------------------------------------------------------------------------------
1 | strip: //p[preceding::hr]/span[@class="embed-youtube"]
2 | strip: //hr
3 |
4 | test_url: https://9to5mac.com/2017/04/14/toshiba-semiconductor-business-apple-foxconn/
5 |
--------------------------------------------------------------------------------
/abplive.com.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0
2 |
3 | test_url: https://www.abplive.com/news/india/feed
4 |
--------------------------------------------------------------------------------
/aps.dz.txt:
--------------------------------------------------------------------------------
1 | # author: HolgerAusB | version 2023-02-06
2 |
3 | body: //div[@class='itemBody']
4 |
5 | test_url: https://www.aps.dz/algerie/151377-communique-du-conseil-des-ministres
6 |
--------------------------------------------------------------------------------
/brookings.edu.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: inline-widget
2 |
3 | test_url: https://www.brookings.edu/blog/techtank/2019/06/26/why-data-ownership-is-the-wrong-approach-to-protecting-privacy/
4 |
--------------------------------------------------------------------------------
/car-it.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='entry-content']
2 | strip: //div[@class='yasr-auto-insert-visitor']
3 |
4 | test_url: https://www.car-it.com/die-utopie-von-level-5/id-0071510
5 |
6 |
--------------------------------------------------------------------------------
/cn.engadget.com.txt:
--------------------------------------------------------------------------------
1 | title: //h2[@class="posttitle"]
2 | body: //div[@class="postbody"]
3 | prune: no
4 |
5 | test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/
6 |
--------------------------------------------------------------------------------
/dictionary.reference.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'source-data')]
2 | strip: //button
3 |
4 | prune: no
5 |
6 | test_url: http://dictionary.reference.com/browse/propaganda
7 |
--------------------------------------------------------------------------------
/ericsuh.com.txt:
--------------------------------------------------------------------------------
1 | date: //h6[@class='datetime']/child::text()
2 | author: string("Eric J. Suh")
3 | footnotes: yes
4 | test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html
--------------------------------------------------------------------------------
/f-droid.org.txt:
--------------------------------------------------------------------------------
1 | title: //h2[@class="post-title"]
2 | author: //header[@class="post-header"]//img/@alt
3 |
4 | test_url: https://f-droid.org/en/2020/01/16/tracking-the-trackers.html
5 |
--------------------------------------------------------------------------------
/github.blog.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="post__header-content"]//h1
2 |
3 | test_url: https://github.blog/2020-02-12-supercharge-your-command-line-experience-github-cli-is-now-in-beta/
4 |
--------------------------------------------------------------------------------
/grumpygamer.com.txt:
--------------------------------------------------------------------------------
1 | title: //h3[@class="post-title"]
2 | author: "Ron Gilbert"
3 | body: //div[@class="grumpypost"]
4 |
5 | test_url: https://grumpygamer.com/scope_budget_schedule
6 |
--------------------------------------------------------------------------------
/hmercer.com.txt:
--------------------------------------------------------------------------------
1 | title: //*[@class='ptitle']
2 | date: //span[@class='date']
3 | body: //div[@class='body']
4 | prune: no
5 | test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/
--------------------------------------------------------------------------------
/laughingsquid.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class='entry-title']
2 | body: //div[@class='entry-content']
3 | test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/
--------------------------------------------------------------------------------
/lawfareblog.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'node-body')]//div[@class='field-items']
2 | title: //div[@class='title']
3 |
4 | test_url: https://lawfareblog.com/limits-panopticon
5 |
--------------------------------------------------------------------------------
/nf-farn.de.txt:
--------------------------------------------------------------------------------
1 | body: //article[contains(concat(" ",normalize-space(@class)," ")," view-mode-full ")]
2 |
3 | prune: no
4 |
5 | test_url: https://www.nf-farn.de/maer-ueberbevoelkerung
6 |
--------------------------------------------------------------------------------
/osmc.tv.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="post-title"]
2 | date: //time[@class="post-date"]/@datetime
3 |
4 | test_url: https://osmc.tv/2019/11/osmcs-november-update-is-here-with-kodi-18-5/
5 |
--------------------------------------------------------------------------------
/prolost.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='body']
2 | title: //h2[@class='title']
3 | date: //span[@class='posted-on']
4 | test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html
--------------------------------------------------------------------------------
/radar.oreilly.com.txt:
--------------------------------------------------------------------------------
1 | date://span[@class='date']
2 | body://div[@class='entry-body']
3 | test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html
--------------------------------------------------------------------------------
/the-tls.co.uk.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'tls-article-body')]
2 |
3 | test_url: https://www.the-tls.co.uk/articles/katrina-history-1915-2015-andy-horowitz-review-peter-coates/
4 |
--------------------------------------------------------------------------------
/theoaklandpress.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='fullstory']
2 | strip: //div[@id='page_leftbar']
3 | test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt
--------------------------------------------------------------------------------
/thisamericanlife.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 |
3 | test_url: https://www.thisamericanlife.org/282/transcript
4 | test_contains: I was part of sending an innocent man to jail
5 |
--------------------------------------------------------------------------------
/wenow.com.txt:
--------------------------------------------------------------------------------
1 | title: //article//h1
2 |
3 | strip: //a[@class="post-previous"]
4 | strip: //aside
5 |
6 | test_url: https://www.wenow.com/2021/06/07/empreinte-carbone-de-la-viande/
7 |
--------------------------------------------------------------------------------
/.sodexo.com.txt:
--------------------------------------------------------------------------------
1 | body: //main[@id='main']
2 |
3 | strip: //button
4 |
5 | test_url: https://uk.sodexo.com/home/media/news-room/newsList-area/uk-press-releases/SSAFA-Friendly-to-Forces.html
6 |
--------------------------------------------------------------------------------
/blogs.reuters.com.txt:
--------------------------------------------------------------------------------
1 | title: //div[@id='single']/h1
2 | body: //div[@id='postcontent']
3 | test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/
--------------------------------------------------------------------------------
/crimethinc.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="readingtext"]
2 | title: substring-after(substring-after(//title, ':'), ':')
3 | test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php
--------------------------------------------------------------------------------
/good.is.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="title"]/div/h1
2 | body: //div[@class="body"]
3 | date: //li[@class="date-time"]
4 | test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/
--------------------------------------------------------------------------------
/highscalability.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='journal-entry-text']
2 |
3 | test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html
--------------------------------------------------------------------------------
/lesecolohumanistes.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]/*[not(self::figure)]
2 |
3 | test_url: https://lesecolohumanistes.fr/interdependance/
4 |
--------------------------------------------------------------------------------
/marketresearchdirect.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="product_tabs-0"]
2 |
3 | test_url: https://www.marketresearchdirect.com/consumer-goods/smart-lighting-market-in-india-2018-2023-market-report
4 |
--------------------------------------------------------------------------------
/mebedo.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' av_two_third ')]
2 |
3 | test_url: https://www.mebedo.de/themen/faq-zur-verantwortlichen-elektrofachkraft-vefk/
4 |
--------------------------------------------------------------------------------
/philosophyforlife.org.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 | replace_string(data-src=):src=
3 |
4 | test_url: https://www.philosophyforlife.org/blog/mind-palaces-the-art-of-psycho-technics-or-soul-craft
5 |
--------------------------------------------------------------------------------
/redalemeden.com.txt:
--------------------------------------------------------------------------------
1 | author: //header//h1[@class="full-name"]
2 | date: //div[@class="post-metadata"]//time/@date
3 |
4 | test_url: https://redalemeden.com/blog/2019/we-need-chrome-no-more
5 |
--------------------------------------------------------------------------------
/redtimmy.com.txt:
--------------------------------------------------------------------------------
1 | title: //h4[@class='entry-title']
2 | body: //div[@class='post-content']
3 |
4 | test_url: https://www.redtimmy.com/docker/a-tale-of-escaping-a-hardened-docker-container/
5 |
--------------------------------------------------------------------------------
/saltyworld.net.txt:
--------------------------------------------------------------------------------
1 | body: //article[contains(@class, 'hentry')]
2 |
3 | prune: no
4 | # remove related
5 | strip_id_or_class: relpost
6 |
7 | test_url: https://saltyworld.net/contrapoints/
8 |
--------------------------------------------------------------------------------
/sfweekly.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'content_body')]
2 | strip_id_or_class: det_rel
3 | test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/
--------------------------------------------------------------------------------
/techno-science.net.txt:
--------------------------------------------------------------------------------
1 | title://div[@class="news"]/div[@class="titre"]
2 | body://div[@class="news"]/div[@class="texte"]
3 | test_url: http://www.techno-science.net/?onglet=news&news=14808
4 |
--------------------------------------------------------------------------------
/tthfanfic.org.txt:
--------------------------------------------------------------------------------
1 | title: //h2
2 | author: //a[starts-with(@href, '/AuthorStories')]
3 | body: //div[@id='storyinnerbody']
4 | test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm
--------------------------------------------------------------------------------
/web-libre.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='template_article']
2 |
3 | strip_id_or_class: article_more
4 | strip: //hr
5 |
6 | test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html
--------------------------------------------------------------------------------
/americanthinker.com.txt:
--------------------------------------------------------------------------------
1 | # Avoid duplicating content
2 | strip_id_or_class: mrf-hidden
3 |
4 | test_url: https://www.americanthinker.com/articles/2019/10/hillarys_health_coming_up_again.html
5 |
--------------------------------------------------------------------------------
/autoactu.com.txt:
--------------------------------------------------------------------------------
1 |
2 | body: //div[@id="bloc_actu"]/parent::*
3 | title: //div[@id="content"]/h1[1]
4 |
5 | test_url: http://www.autoactu.com/thomas-owsianski-nomme-president-d-audi-chine.shtml
6 |
--------------------------------------------------------------------------------
/bobbyromeo.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: adsbygoogle
2 | strip_id_or_class: yarpp-related
3 |
4 | test_url: http://bobbyromeo.com/technology/xiaomi-smart-1080p-wifi-ip-camera-rtsp-streaming-hack/
5 |
--------------------------------------------------------------------------------
/brettterpstra.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post full']
2 | title: //h1
3 | author: substring-after(//title, '- ')
4 | date: //span[@class='date']
5 | test_url: http://brettterpstra.com/byword-for-ios/
--------------------------------------------------------------------------------
/code.google.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="gc-pagecontent"]
2 | strip: //a[@class="backtotop"]
3 | prune: no
4 |
5 | test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html
--------------------------------------------------------------------------------
/domo-blog.fr.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@id="extras"]
2 | strip: //div[@class="herald-da"]
3 |
4 | test_url: https://www.domo-blog.fr/economisez-energie-argent-avec-domotique-chauffe-eau-shelly-1-pro/
5 |
--------------------------------------------------------------------------------
/fair.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]
2 |
3 | test_url: https://fair.org/home/still-manufacturing-consent-an-interview-with-noam-chomsky/
4 |
--------------------------------------------------------------------------------
/historic-uk.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'content__inner__text')]
2 |
3 | prune: no
4 |
5 | test_url: https://www.historic-uk.com/HistoryUK/HistoryofEngland/Tragic-Demise-Edward-II/
6 |
--------------------------------------------------------------------------------
/keycloak.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 |
3 | strip_id_or_class: preamble
4 | strip_id_or_class: sidebarblock
5 |
6 | test_url: https://www.keycloak.org/docs/12.0/authorization_services/
7 |
--------------------------------------------------------------------------------
/mcorbin.fr.txt:
--------------------------------------------------------------------------------
1 | title: //div[contains(concat(' ',normalize-space(@class),' '),' post-header ')]//h2
2 | body: //div[@id="post"]/div[2]
3 | test_url: https://www.mcorbin.fr/posts/2023-07-04-metriques/
--------------------------------------------------------------------------------
/mises.org.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: 'book-ad'
2 | strip_id_or_class: 'bigger pullquote'
3 | strip_id_or_class: 'subscribe'
4 | strip_id_or_class: 'blog-link'
5 | test_url: http://mises.org/daily/4804
--------------------------------------------------------------------------------
/motorcyclistonline.com.txt:
--------------------------------------------------------------------------------
1 | http_header(cookie): bonnier_consent=true
2 |
3 | strip_id_or_class: arcAdsBox
4 |
5 | author: //div[contains(concat(' ',normalize-space(@class),' '),' by_author ')]
6 |
--------------------------------------------------------------------------------
/pymotw.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[starts-with(@id, 'module-')]
2 |
3 | test_url: https://pymotw.com/3/configparser/
4 | test_url: https://pymotw.com/3/shlex/
5 | test_url: https://pymotw.com/3/sys/
6 |
--------------------------------------------------------------------------------
/riffreporter.de.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 | strip: //svg
3 | prune: no
4 |
5 | test_url: https://www.riffreporter.de/de/wissen/klimakrise-artensterben-biodiversitaet-droht-sechstes-massenaussterben
6 |
--------------------------------------------------------------------------------
/rnd.de.txt:
--------------------------------------------------------------------------------
1 | body: //article/div[contains(concat(' ',normalize-space(@class),' '),' a__in ')]
2 |
3 | strip_id_or_class: a__rel-a-li
4 | strip_id_or_class: ord--0
5 | strip_id_or_class: a-hd__aut
6 |
--------------------------------------------------------------------------------
/scottohara.me.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | author: "Scott O'Hara"
3 | date: //time[@itemprop="datePublished"]/@datetime
4 |
5 | test_url: https://www.scottohara.me/blog/2019/01/21/how-do-you-figure.html
6 |
--------------------------------------------------------------------------------
/share.ez.no.txt:
--------------------------------------------------------------------------------
1 | author: //div[@class="entry-user"]//a
2 | body: //div[@class="attribute-long"]
3 |
4 | test_url: http://share.ez.no/blogs/jean-luc-chassaing/how-one-should-code-in-ez-platform
5 |
--------------------------------------------------------------------------------
/sqlite.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='ff-body']
2 |
3 | replace_string(
):
4 |
5 | prune: no
6 |
7 | test_url: http://www.sqlite.org/fileformat2.html
--------------------------------------------------------------------------------
/thingiverse.com.txt:
--------------------------------------------------------------------------------
1 | tidy: yes
2 | autodetect_on_failure: yes
3 | prune: yes
4 | body: /html/head/title
5 |
6 | test_url: https://www.thingiverse.com/thing:3868321
7 | test_contains: Panther Origami
--------------------------------------------------------------------------------
/varsity.co.uk.txt:
--------------------------------------------------------------------------------
1 | # FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
2 |
3 | strip: //h2
4 | test_url: http://www.varsity.co.uk/reviews/2662
--------------------------------------------------------------------------------
/will-self.com.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@class="widget-area"]
2 | title: //*[@class="entry-title"]
3 | date: //time[@class="entry-date"]
4 | test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/
--------------------------------------------------------------------------------
/brandingstrategyinsider.com.txt:
--------------------------------------------------------------------------------
1 | date://h2[@class="date-header"]
2 | body://div[@class="entry-content"]
3 | test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html
--------------------------------------------------------------------------------
/catb.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='article']
2 | strip: //div[@class='revhistory']
3 | strip: //div[@class='toc']
4 | tidy: no
5 | prune: no
6 |
7 | test_url: http://catb.org/~esr/faqs/smart-questions.html
--------------------------------------------------------------------------------
/forbiddenstories.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'complexe-content')]
2 |
3 | prune: no
4 |
5 | test_url: https://forbiddenstories.org/story-killers/gauri-lankesh-in-the-age-of-false-news/
6 |
--------------------------------------------------------------------------------
/hanselman.com.txt:
--------------------------------------------------------------------------------
1 | date: //span[@class="item-date"]
2 | body: //div[@class="item-content"]
3 | strip_comments: no
4 | test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx
--------------------------------------------------------------------------------
/iphonetweak.fr.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class='fond_titre']/h1[@class='post-title']
2 | body: //div[@class="post-chapo"]
3 |
4 | test_url: http://iphonetweak.fr/2016/05/20/apple-watch-deja-jailbreakee
5 |
--------------------------------------------------------------------------------
/m.theregister.co.uk.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@class='wptl btm']
2 | body: //div[@id='article']//h2 | //div[@id='body']
3 |
4 | test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
5 |
--------------------------------------------------------------------------------
/publications.parliament.uk.txt:
--------------------------------------------------------------------------------
1 | author: //meta[@name="Author"]
2 | date: //meta[@name="Date"]
3 | strip: //h5
4 | test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm
--------------------------------------------------------------------------------
/robertsspaceindustries.com.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: 'sharedaddy'
2 | strip_id_or_class: 'respond'
3 | strip_id_or_class: 'meta'
4 | test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/
--------------------------------------------------------------------------------
/smarthomebeginner.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' inner-post-entry ')]
2 |
3 | test_url: https://www.smarthomebeginner.com/docker-home-media-server-2018-basic/
4 |
--------------------------------------------------------------------------------
/taxacc.jp.txt:
--------------------------------------------------------------------------------
1 | title: substring-before(//title, '/朝日税理士法人')
2 |
3 | strip: //h3[contains(., 'カテゴリー')]
4 | strip: //h3[contains(., '月別アーカイブ')]
5 |
6 | test_url: https://www.taxacc.jp/blog/202512/
7 |
--------------------------------------------------------------------------------
/theintercept.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="Headline"]
2 | body: //div[@class="PostContent"]
3 |
4 | test_url: https://theintercept.com/2014/10/30/inside-story-matt-taibbis-departure-first-look-media/
5 |
--------------------------------------------------------------------------------
/therumpus.net.txt:
--------------------------------------------------------------------------------
1 | title: /html/body/div/div[2]/div/div/h1
2 |
3 | body: /html/body/div/div[2]/div/div/div[2]
4 | test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes
--------------------------------------------------------------------------------
/.stanford.edu.txt:
--------------------------------------------------------------------------------
1 | title: //div[@id='aueditable']/h1
2 | body: //div[@id='content']
3 | strip: //div[@id='message' or @id='linklist']
4 | prune: no
5 | test_url: http://plato.stanford.edu/entries/supervenience/
--------------------------------------------------------------------------------
/5by5.tv.txt:
--------------------------------------------------------------------------------
1 | body: //*[@id="episode"]
2 | prune: no
3 | tidy: no
4 |
5 | autodetect_next_page: no
6 | strip_id_or_class: player
7 |
8 | strip://*[@id="header"]
9 | test_url: http://5by5.tv/buildanalyze/60
--------------------------------------------------------------------------------
/batenka.ru.txt:
--------------------------------------------------------------------------------
1 | body: //section[@itemprop="articleBody"]
2 |
3 | strip: //section[contains(@class, 'hide-mobile')]
4 |
5 | prune: no
6 |
7 | test_url: https://batenka.ru/unity/sect/white-brotherhood/
8 |
--------------------------------------------------------------------------------
/bez.es.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='text_art']
2 | strip: //div[@class='cab_datos_opinion']
3 | strip: //div[@class='sumario2_left']
4 |
5 | test_url: http://www.bez.es/382758623/otros-fracasos-empresas.html
6 |
--------------------------------------------------------------------------------
/business-standard.com.txt:
--------------------------------------------------------------------------------
1 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0
2 |
3 | test_url: https://www.business-standard.com/rss/opinion-105.rss
4 |
--------------------------------------------------------------------------------
/derekseaman.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='entry-content']
2 |
3 | prune: no
4 | tidy: no
5 |
6 | test_url: https://www.derekseaman.com/2019/09/how-to-pi-hole-plus-dnscrypt-setup-on-raspberry-pi-4.html
7 |
--------------------------------------------------------------------------------
/houstonchronicle.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='subsection_wrap']
2 | next_page_link: //ul[@class='pagination']//a[contains(text(), '»')]
3 |
4 | test_url: http://www.houstonchronicle.com/nasa/adrift/1/
5 |
--------------------------------------------------------------------------------
/hs.fi.txt:
--------------------------------------------------------------------------------
1 | prune: yes
2 | tidy: yes
3 | replace_string(