├── feeds.feedblitz.com.txt ├── as-web.jp.txt ├── ganglia.info.txt ├── autocrypt.org.txt ├── lukew.com.txt ├── what-if.xkcd.com.txt ├── soundcity.tv.txt ├── crimemagazine.com.txt ├── ht.ly.txt ├── the-magazine.org.txt ├── chareidi.org.txt ├── pxlnv.com.txt ├── snip.ly.txt ├── .dxy.cn.txt ├── alex.mullr.net.txt ├── blog.lepine.pro.txt ├── opensource.org.txt ├── techmeme.com.txt ├── blog.fefe.de.txt ├── danluu.com.txt ├── axesslab.com.txt ├── bitelia.com.txt ├── blog.spu.edu.txt ├── briefly.co.za.txt ├── digitalcourage.de.txt ├── ecetia.com.txt ├── extracine.com.txt ├── ghanaweb.com.txt ├── help.fivefilters.org.txt ├── jjahnke.net.txt ├── jollinger.com.txt ├── kumailplus.com.txt ├── luxuo.com.txt ├── mattcutts.com.txt ├── roy.gbiv.com.txt ├── achgut.com.txt ├── backlinko.com.txt ├── facta.co.jp.txt ├── meowni.ca.txt ├── motorfull.com.txt ├── 512pixels.net.txt ├── apple.news.txt ├── elance.com.txt ├── futurism.com.txt ├── getnews.jp.txt ├── hacf.fr.txt ├── jamesclear.com.txt ├── kont.me.txt ├── lostgarden.com.txt ├── mbl.is.txt ├── n.survol.fr.txt ├── philosophynow.org.txt ├── scotthelme.co.uk.txt ├── acidcow.com.txt ├── alseraj.net.txt ├── avantivictoirerao.com.txt ├── blogs.gnome.org.txt ├── carlchenet.com.txt ├── doc.wallabag.org.txt ├── gsmarena.com.txt ├── kachestvo.ru.txt ├── monkeyzen.com.txt ├── paquier.xyz.txt ├── summitroute.com.txt ├── .readthedocs.io.txt ├── allafrica.com.txt ├── eternabuenosaires.com.txt ├── fakirpresse.info.txt ├── fivefilters.org.txt ├── fok.nl.txt ├── gist.github.com.txt ├── indehekken.net.txt ├── macdrifter.com.txt ├── marksdailyapple.com.txt ├── nifi.apache.org.txt ├── osmand.net.txt ├── retro-games.fr.txt ├── staltz.com.txt ├── theteaspot.com.txt ├── wallabag.org.txt ├── zerokspot.com.txt ├── altaonline.com.txt ├── blog.imirhil.fr.txt ├── dagogtid.no.txt ├── dailydot.com.txt ├── dr-b.io.txt ├── drdobbs.com.txt ├── flyingmachinestudios.com.txt ├── gizmovil.com.txt ├── goodfil.ms.txt ├── ishadeed.com.txt ├── je-suis-papa.com.txt ├── jungle-world.com.txt ├── rust-lang-nursery.github.io.txt ├── stjv.fr.txt ├── vg.no.txt ├── LICENSE.txt ├── appleweblog.com.txt ├── az.lib.ru.txt ├── blogs.forbes.com.txt ├── brucelawson.co.uk.txt ├── doc.rust-lang.ru.txt ├── dropbox.com.txt ├── gihyo.jp.txt ├── help.sharegate.com.txt ├── hiperpop.com.txt ├── hipertextual.com.txt ├── krone.at.txt ├── panic.com.txt ├── perell.com.txt ├── portertech.ca.txt ├── swcarpentry.github.io.txt ├── tofugu.com.txt ├── urbandictionary.com.txt ├── warnerbros.fr.txt ├── zataz.com.txt ├── altfoto.com.txt ├── chaperonsetvous.fr.txt ├── cucharasonica.com.txt ├── dansdata.com.txt ├── doc.rust-lang.org.txt ├── interviewmagazine.com.txt ├── jvt.me.txt ├── najlepsze-ksiazki.pl.txt ├── news.rub.de.txt ├── pentaxforums.com.txt ├── phototrend.fr.txt ├── renenekuda.cz.txt ├── tldp.org.txt ├── vot-tak.tv.txt ├── .about.com.txt ├── blogs.lse.ac.uk.txt ├── coalicionporelevangelio.org.txt ├── crn.de.txt ├── elfster.com.txt ├── fiftytwo.in.txt ├── finexpert.e15.cz.txt ├── marigold.cz.txt ├── monkeyuser.com.txt ├── news.techmeme.com.txt ├── python.org.txt ├── utdailybeacon.com.txt ├── worldwidewords.org.txt ├── .tweakblogs.net.txt ├── caseinterview.com.txt ├── gizmologia.com.txt ├── happyassassin.net.txt ├── iansommerville.com.txt ├── iplaysoft.com.txt ├── jobbank.gc.ca.txt ├── lefilrouge.media.txt ├── matt.might.net.txt ├── mein-mmo.de.txt ├── radionz.co.nz.txt ├── roomescapeartist.com.txt ├── sivers.org.txt ├── sports.ru.txt ├── stumbleupon.com.txt ├── thenews.coop.txt ├── turnoff.us.txt ├── vedonlyonti.com.txt ├── betabeat.com.txt ├── cashless.pl.txt ├── cfclrk.com.txt ├── china-gadgets.de.txt ├── davidwalsh.name.txt ├── democracynow.org.txt ├── dissentmagazine.org.txt ├── earvingad.github.io.txt ├── explosm.net.txt ├── freelancer.com.txt ├── fs.blog.txt ├── gauchiste.fr.txt ├── izismile.com.txt ├── lado.mx.txt ├── make.wordpress.org.txt ├── mbk-news.appspot.com.txt ├── mytotalretail.com.txt ├── openstreetmap.org.txt ├── php.net.txt ├── vakarm.net.txt ├── archiloque.net.txt ├── berlingske.dk.txt ├── carnegie.ru.txt ├── cooper.com.txt ├── doughellmann.com.txt ├── fokus.se.txt ├── geenstijl.nl.txt ├── hackertarget.com.txt ├── ianlewis.org.txt ├── juliareda.eu.txt ├── kresus.org.txt ├── letraslibres.com.txt ├── longform.org.txt ├── oschina.net.txt ├── reactjs.org.txt ├── signal.org.txt ├── slrlounge.com.txt ├── yosoy.red.txt ├── .mitpress.mit.edu.txt ├── aerobuzz.fr.txt ├── alexduner.com.txt ├── archdaily.com.txt ├── c.newsnow.com.txt ├── codeproject.com.txt ├── declassifieduk.org.txt ├── elblogsalmon.com.txt ├── esglobal.org.txt ├── gold.ac.uk.txt ├── nextcloud.com.txt ├── optimizesmart.com.txt ├── pastepad.fivefilters.org.txt ├── pixellibre.net.txt ├── radishzz.cc.txt ├── sme.sk.txt ├── sourcebooks.com.txt ├── sprengsatz.de.txt ├── stopgame.ru.txt ├── thefilmexperience.net.txt ├── triplebyte.com.txt ├── vivirmexico.com.txt ├── w3.org.txt ├── www.seriouseats.com.txt ├── a11ywithlindsey.com.txt ├── blog.eng.xogrp.com.txt ├── blog.nightly.mozilla.org.txt ├── c.newsnow.co.uk.txt ├── cabinetmagazine.org.txt ├── caffereggio.net.txt ├── economie.gouv.fr.txt ├── gnu.org.txt ├── lvsl.fr.txt ├── lwlies.com.txt ├── marriedtothesea.com.txt ├── miops.com.txt ├── missnumerique.com.txt ├── pmf.silvrback.com.txt ├── pogue.blogs.nytimes.com.txt ├── queerty.com.txt ├── renverse.co.txt ├── thebaffler.com.txt ├── theses.enc.sorbonne.fr.txt ├── tidbits.com.txt ├── viget.com.txt ├── yourerie.com.txt ├── zaknrw.de.txt ├── 9to5mac.com.txt ├── abplive.com.txt ├── aps.dz.txt ├── brookings.edu.txt ├── car-it.com.txt ├── cn.engadget.com.txt ├── dictionary.reference.com.txt ├── ericsuh.com.txt ├── f-droid.org.txt ├── github.blog.txt ├── grumpygamer.com.txt ├── hmercer.com.txt ├── laughingsquid.com.txt ├── lawfareblog.com.txt ├── nf-farn.de.txt ├── osmc.tv.txt ├── prolost.com.txt ├── radar.oreilly.com.txt ├── the-tls.co.uk.txt ├── theoaklandpress.com.txt ├── thisamericanlife.org.txt ├── wenow.com.txt ├── .sodexo.com.txt ├── blogs.reuters.com.txt ├── crimethinc.com.txt ├── good.is.txt ├── highscalability.com.txt ├── lesecolohumanistes.fr.txt ├── marketresearchdirect.com.txt ├── mebedo.de.txt ├── philosophyforlife.org.txt ├── redalemeden.com.txt ├── redtimmy.com.txt ├── saltyworld.net.txt ├── sfweekly.com.txt ├── techno-science.net.txt ├── tthfanfic.org.txt ├── web-libre.org.txt ├── americanthinker.com.txt ├── autoactu.com.txt ├── bobbyromeo.com.txt ├── brettterpstra.com.txt ├── code.google.com.txt ├── domo-blog.fr.txt ├── fair.org.txt ├── historic-uk.com.txt ├── keycloak.org.txt ├── mcorbin.fr.txt ├── mises.org.txt ├── motorcyclistonline.com.txt ├── pymotw.com.txt ├── riffreporter.de.txt ├── rnd.de.txt ├── scottohara.me.txt ├── share.ez.no.txt ├── sqlite.org.txt ├── thingiverse.com.txt ├── varsity.co.uk.txt ├── will-self.com.txt ├── brandingstrategyinsider.com.txt ├── catb.org.txt ├── forbiddenstories.org.txt ├── hanselman.com.txt ├── iphonetweak.fr.txt ├── m.theregister.co.uk.txt ├── publications.parliament.uk.txt ├── robertsspaceindustries.com.txt ├── smarthomebeginner.com.txt ├── taxacc.jp.txt ├── theintercept.com.txt ├── therumpus.net.txt ├── .stanford.edu.txt ├── 5by5.tv.txt ├── batenka.ru.txt ├── bez.es.txt ├── business-standard.com.txt ├── derekseaman.com.txt ├── houstonchronicle.com.txt ├── hs.fi.txt ├── indiehackers.com.txt ├── instagr.am.txt ├── kathimerini.gr.txt ├── labs.mwrinfosecurity.com.txt ├── luminous-landscape.com.txt ├── lupa.cz.txt ├── mobilenet.cz.txt ├── scnsrc.me.txt ├── singularityhub.com.txt ├── zoomit.ir.txt ├── 43folders.com.txt ├── brooksreview.net.txt ├── bzg.fr.txt ├── chomsky.info.txt ├── ciperchile.cl.txt ├── dcurt.is.txt ├── developers.facebook.com.txt ├── elmalpensante.com.txt ├── feinschwarz.net.txt ├── gorky.media.txt ├── groups.drupal.org.txt ├── indiatimes.com.txt ├── openthemagazine.com.txt ├── scinfolex.com.txt ├── soundonsound.com.txt ├── spin.com.txt ├── toolsandtoys.net.txt ├── trailer.web-view.net.txt ├── wphive.com.txt ├── xlsemanal.com.txt ├── amptoons.com.txt ├── bernama.com.txt ├── book.douban.com.txt ├── caravanmagazine.in.txt ├── delong.typepad.com.txt ├── gameswirtschaft.de.txt ├── informationclearinghouse.info.txt ├── itavisen.no.txt ├── keyboardmag.com.txt ├── kingarthurflour.com.txt ├── lifehack.org.txt ├── m.xkcd.com.txt ├── mainichi.jp.txt ├── marcvidal.net.txt ├── news.jp.txt ├── nj.com.txt ├── nplusonemag.com.txt ├── onlinewelten.com.txt ├── parliament.uk.txt ├── pinterest.com.txt ├── plzkthxbai.com.txt ├── rancher.com.txt ├── revdennismccarty.com.txt ├── thenetworkgarden.blogs.com.txt ├── theodinproject.com.txt ├── thethaovanhoa.vn.txt ├── wikiwand.com.txt ├── .mozilla.org.txt ├── a.tldrnewsletter.com.txt ├── askingbox.de.txt ├── dadall.info.txt ├── fairphone.com.txt ├── linuxnix.com.txt ├── mactechnews.de.txt ├── medialens.org.txt ├── moo.nac.uci.edu.txt ├── pjmedia.com.txt ├── real.gr.txt ├── thepointmag.com.txt ├── ux.artu.tv.txt ├── wpbeginner.com.txt ├── alexwlchan.net.txt ├── arduino-tutorial.de.txt ├── ascarter.net.txt ├── buquad.com.txt ├── hazlitt.net.txt ├── health.com.txt ├── histoire-filante.fr.txt ├── mesec.cz.txt ├── pandemicequityinitiative.com.txt ├── pandodaily.com.txt ├── philstar.com.txt ├── racjonalista.pl.txt ├── rom-game.fr.txt ├── singaporeanstocksinvestor.blogspot.com.txt ├── stadt-muenster.de.txt ├── tbray.org.txt ├── thesocialitefamily.com.txt ├── tokyo-np.co.jp.txt ├── web.gekisaka.jp.txt ├── writerunboxed.com.txt ├── .fivefilters.org.txt ├── .robweychert.com.txt ├── 24a11y.com.txt ├── blog.native-instruments.com.txt ├── deia.com.txt ├── greaterwrong.com.txt ├── halo.bungie.org.txt ├── jacobin.com.txt ├── jandan.net.txt ├── leb.fbi.gov.txt ├── microsiervos.com.txt ├── proskauer.com.txt ├── roughtype.com.txt ├── slog.thestranger.com.txt ├── utiliser-lightroom.com.txt ├── www2.cnrs.fr.txt ├── .repubblica.it.txt ├── bjango.com.txt ├── blog.robertelder.org.txt ├── blog.sentry.io.txt ├── cars.com.txt ├── cohost.org.txt ├── devblogs.microsoft.com.txt ├── edge.org.txt ├── gocomics.com.txt ├── gurusblog.com.txt ├── isource.com.txt ├── maitre-eolas.fr.txt ├── manga-news.com.txt ├── nosalty.hu.txt ├── protonmail.com.txt ├── ruhlman.com.txt ├── shifteleven.com.txt ├── timesofisrael.com.txt ├── wordpress.org.txt ├── wz-newsline.de.txt ├── .allthingsd.com.txt ├── 37signals.com.txt ├── 7newsbelize.com.txt ├── agirpourlatransition.ademe.fr.txt ├── bbva.es.txt ├── bdaily.co.uk.txt ├── blog.chriszacharias.com.txt ├── blog.twitter.com.txt ├── blog.wells.ee.txt ├── borderhouseblog.com.txt ├── business2community.com.txt ├── cert-bund.de.txt ├── ciaosamin.com.txt ├── communities-dominate.blogs.com.txt ├── eckerd.edu.txt ├── hespress.com.txt ├── hometheaterreview.com.txt ├── literaryreview.co.uk.txt ├── maxim.com.txt ├── palmbeachpost.com.txt ├── phys.org.txt ├── propakistani.pk.txt ├── retractionwatch.com.txt ├── robots.thoughtbot.com.txt ├── spiderum.com.txt ├── techpinions.com.txt ├── thewirecutter.com.txt ├── tuaw.com.txt ├── wpmayor.com.txt ├── alsacreations.com.txt ├── cjr.org.txt ├── filamentgroup.com.txt ├── fortelabs.co.txt ├── geeksofdoom.com.txt ├── higcapital.com.txt ├── humantransit.org.txt ├── mforum.cari.com.my.txt ├── mikeash.com.txt ├── neunetz.com.txt ├── seattletransitblog.com.txt ├── spectrejournal.com.txt ├── squashed.tumblr.com.txt ├── tijd.be.txt ├── vk.com.txt ├── voltairenet.org.txt ├── vozpopuli.com.txt ├── wochenanzeiger.de.txt ├── adslzone.net.txt ├── basicthinking.de.txt ├── blog.eleven-labs.com.txt ├── diagonalperiodico.net.txt ├── ecranlarge.com.txt ├── engineering.tumblr.com.txt ├── inhabitat.com.txt ├── muycomputerpro.com.txt ├── mysqlblog.fivefarmers.com.txt ├── newcriterion.com.txt ├── papodehomem.com.br.txt ├── rasgolatente.es.txt ├── resilience.org.txt ├── sayidaty.net.txt ├── sdxcentral.com.txt ├── toolinux.com.txt ├── blog.landr.com.txt ├── blog.pinboard.in.txt ├── boundlessline.org.txt ├── devlinsangle.blogspot.co.at.txt ├── eff.org.txt ├── gurumed.org.txt ├── hvg.hu.txt ├── ilyabirman.ru.txt ├── interconnected.org.txt ├── jp.motorsport.com.txt ├── lehollandaisvolant.net.txt ├── maritimedanmark.dk.txt ├── novinky.cz.txt ├── rachelandrew.co.uk.txt ├── rezeptwelt.de.txt ├── stefanjudis.com.txt ├── techcommunity.microsoft.com.txt ├── visualcapitalist.com.txt ├── .ietf.org.txt ├── .philhist.unibas.ch.txt ├── 24.ae.txt ├── acroswing.fr.txt ├── angrymetalguy.com.txt ├── cleafy.com.txt ├── donnahay.com.au.txt ├── gawker.com.txt ├── glazman.org.txt ├── gnppn.fr.txt ├── hiphopleeft.nl.txt ├── icannabis.tumblr.com.txt ├── labs.ripe.net.txt ├── nojesguiden.se.txt ├── openai.com.txt ├── photopills.com.txt ├── researchandmarkets.com.txt ├── rpgsite.net.txt ├── timeshighereducation.com.txt ├── useit.com.txt ├── vitispr.com.txt ├── yostivanich.com.txt ├── accaglobal.com.txt ├── audiobookshelf.org.txt ├── ebay.com.txt ├── grafikart.fr.txt ├── inessential.com.txt ├── mintpressnews.com.txt ├── omiliya.org.txt ├── popehat.com.txt ├── searchenginejournal.com.txt ├── terrestres.org.txt ├── thecounter.org.txt ├── touilleur-express.fr.txt ├── valdaiclub.com.txt ├── vc.ru.txt ├── .craigslist.org.txt ├── accesstoinsight.org.txt ├── addendum.org.txt ├── americandrink.net.txt ├── blog.dropbox.com.txt ├── cnrs.fr.txt ├── commonwealmagazine.org.txt ├── da.feedsportal.com.txt ├── firstthings.com.txt ├── granta.com.txt ├── haberler.com.txt ├── itwire.com.txt ├── journal.markusthoma.com.txt ├── lezephyrmag.com.txt ├── libcom.org.txt ├── msdn.microsoft.com.txt ├── nakedsecurity.sophos.com.txt ├── nextdraft.com.txt ├── politifact.com.txt ├── snob.ru.txt ├── timeshighereducation.co.uk.txt ├── wiki.guildwars.com.txt ├── 36kr.com.txt ├── amandala.com.bz.txt ├── annouchka.fr.txt ├── econlog.econlib.org.txt ├── exoplanets.nasa.gov.txt ├── folklore.org.txt ├── gizmodo.uol.com.br.txt ├── healthletter.mayoclinic.com.txt ├── linuxjournal.com.txt ├── macg.co.txt ├── marco.org.txt ├── nicj.net.txt ├── pastebin.com.txt ├── splinternews.com.txt ├── thesimpledollar.com.txt ├── web.dev.txt ├── wiki.guildwars2.com.txt ├── wmpoweruser.com.txt ├── .livejournal.com.txt ├── .redbullmusicacademy.com.txt ├── actualitte.com.txt ├── albayan.ae.txt ├── annatravelling.wordpress.com.txt ├── blog.kaelig.fr.txt ├── bostonreview.net.txt ├── ekultura.hu.txt ├── fictionpress.com.txt ├── franceculture.fr.txt ├── gofugyourself.com.txt ├── kottke.org.txt ├── linkedin.com.txt ├── n-tv.de.txt ├── redmas.com.co.txt ├── warriordudimanche.net.txt ├── alternet.org.txt ├── blog.mozilla.org.txt ├── coffeecircle.com.txt ├── csswizardry.com.txt ├── enikos.gr.txt ├── framablog.org.txt ├── information.dk.txt ├── itsfoss.com.txt ├── kickstarter.com.txt ├── linux.com.txt ├── marmiton.org.txt ├── mentalfloss.com.txt ├── mirrorfootball.co.uk.txt ├── news.rambler.ru.txt ├── parislemon.com.txt ├── sec.gov.txt ├── shahinkalantari.com.txt ├── thinkspot.com.txt ├── venturebeat.com.txt ├── watoday.com.au.txt ├── aftenposten.no.txt ├── autoblog.com.txt ├── dummies.com.txt ├── fmhy.net.txt ├── globalgrind.com.txt ├── hacks.mozilla.org.txt ├── jalopnik.com.txt ├── labs.bishopfox.com.txt ├── lesswrong.com.txt ├── mlssoccer.com.txt ├── tvtropes.org.txt ├── whatever.scalzi.com.txt ├── andy-bell.design.txt ├── arxiv-vanity.com.txt └── asymco.com.txt /feeds.feedblitz.com.txt: -------------------------------------------------------------------------------- 1 | http_header(referer): http://feedblitz.com 2 | -------------------------------------------------------------------------------- /as-web.jp.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | 3 | test_url: https://www.as-web.jp/f1/1275289 4 | -------------------------------------------------------------------------------- /ganglia.info.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@id = "mid")] 2 | test_url: http://ganglia.info/ 3 | -------------------------------------------------------------------------------- /autocrypt.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@class='section'] 3 | 4 | prune: no 5 | -------------------------------------------------------------------------------- /lukew.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | 3 | test_url: https://www.lukew.com/ff/entry.asp?1995 4 | -------------------------------------------------------------------------------- /what-if.xkcd.com.txt: -------------------------------------------------------------------------------- 1 | autodetect_next_page: no 2 | test_url: http://what-if.xkcd.com/1/ -------------------------------------------------------------------------------- /soundcity.tv.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: sharing 2 | 3 | test_url: http://soundcity.tv/feed/ 4 | -------------------------------------------------------------------------------- /crimemagazine.com.txt: -------------------------------------------------------------------------------- 1 | autodetect_next_page: no 2 | test_url: http://www.crimemagazine.com/son-sam -------------------------------------------------------------------------------- /ht.ly.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //iframe[@id='hootFrame']/@src 2 | 3 | test_url: http://ht.ly/bOiZV -------------------------------------------------------------------------------- /the-magazine.org.txt: -------------------------------------------------------------------------------- 1 | tidy: no 2 | 3 | test_url: http://the-magazine.org/1/alone-together-again -------------------------------------------------------------------------------- /chareidi.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm -------------------------------------------------------------------------------- /pxlnv.com.txt: -------------------------------------------------------------------------------- 1 | date: //main//time/@datetime 2 | 3 | test_url: https://pxlnv.com/blog/bullshit-web/ 4 | -------------------------------------------------------------------------------- /snip.ly.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //meta[@property="og:url"]/@content 2 | 3 | test_url: http://snip.ly/qa1R -------------------------------------------------------------------------------- /.dxy.cn.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | prune: no 3 | 4 | test_url: http://neurosurg.dxy.cn/article/87224 -------------------------------------------------------------------------------- /alex.mullr.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="entry"] 2 | test_url: http://alex.mullr.net/blog/2011/05/on-spotify/ -------------------------------------------------------------------------------- /blog.lepine.pro.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | 3 | test_url: http://blog.lepine.pro/bus-de-donnees-datapipeline 4 | -------------------------------------------------------------------------------- /opensource.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='content clear-block'] 2 | test_url: http://opensource.org/node/537 -------------------------------------------------------------------------------- /techmeme.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link_in_feed: //b/a 2 | 3 | test_url: http://www.techmeme.com/feed.xml 4 | -------------------------------------------------------------------------------- /blog.fefe.de.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | date: //h3 3 | body: //ul 4 | 5 | test_url: http://blog.fefe.de/?ts=b063bf55 -------------------------------------------------------------------------------- /danluu.com.txt: -------------------------------------------------------------------------------- 1 | body: /html/body/main 2 | 3 | prune: no 4 | 5 | test_url: https://danluu.com/look-stupid/ 6 | -------------------------------------------------------------------------------- /axesslab.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@id="main-content"] 2 | 3 | test_url: https://axesslab.com/disabled-buttons-suck/ 4 | -------------------------------------------------------------------------------- /bitelia.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://bitelia.com/2011/09/klout-midiendo-influencia -------------------------------------------------------------------------------- /blog.spu.edu.txt: -------------------------------------------------------------------------------- 1 | body://div[@class='post'] 2 | test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/ -------------------------------------------------------------------------------- /briefly.co.za.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/7.4 2 | 3 | test_url: https://briefly.co.za/rss/south-africa.rss 4 | -------------------------------------------------------------------------------- /digitalcourage.de.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | test_url: https://digitalcourage.de/blog/2020/corona-apps_gastbeitrag 3 | -------------------------------------------------------------------------------- /ecetia.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://ecetia.com/2011/09/vida-de-jugon-vii-las-tres-es -------------------------------------------------------------------------------- /extracine.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://extracine.com/2011/09/straw-dogs-la-original -------------------------------------------------------------------------------- /ghanaweb.com.txt: -------------------------------------------------------------------------------- 1 | http_header(User-Agent): PHP/7.4 2 | 3 | test_url: https://cdn.ghanaweb.com/feed/newsfeed.xml 4 | -------------------------------------------------------------------------------- /help.fivefilters.org.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="title"]/h3 2 | date: substring-after(//div[@class="meta"], ": ") 3 | -------------------------------------------------------------------------------- /jjahnke.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry'] 2 | prune: no 3 | 4 | test_url: http://www.jjahnke.net/rundbr87.html#2514 -------------------------------------------------------------------------------- /jollinger.com.txt: -------------------------------------------------------------------------------- 1 | body: //body 2 | 3 | test_url: http://www.jollinger.com/photo/enlargers/guide-to-enlargers.htm 4 | -------------------------------------------------------------------------------- /kumailplus.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class = "entry-full"] 2 | 3 | test_url: http://www.kumailplus.com/2011/12/02/24308 -------------------------------------------------------------------------------- /luxuo.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post-content'] 2 | prune: no 3 | 4 | test_url: http://www.luxuo.com/watches/feed -------------------------------------------------------------------------------- /mattcutts.com.txt: -------------------------------------------------------------------------------- 1 | date: //*[@class = 'published'] 2 | test_url: http://www.mattcutts.com/blog/internet-censorship-sopa/ -------------------------------------------------------------------------------- /roy.gbiv.com.txt: -------------------------------------------------------------------------------- 1 | strip_comments: no 2 | test_url: http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven -------------------------------------------------------------------------------- /achgut.com.txt: -------------------------------------------------------------------------------- 1 | http_header(User-Agent): Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 2 | 3 | -------------------------------------------------------------------------------- /backlinko.com.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | strip: //footer 3 | prune: no 4 | 5 | test_url: https://backlinko.com/ecommerce-seo 6 | -------------------------------------------------------------------------------- /facta.co.jp.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='content'] 2 | 3 | test_url: http://facta.co.jp/blog/archives/20111026001026.html 4 | -------------------------------------------------------------------------------- /meowni.ca.txt: -------------------------------------------------------------------------------- 1 | author: //meta[@name="author"]/@content 2 | 3 | test_url: https://meowni.ca/posts/2017-puppeteer-tests/ 4 | -------------------------------------------------------------------------------- /motorfull.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://motorfull.com/2011/09/aparca-valeo-park4u-remote -------------------------------------------------------------------------------- /512pixels.net.txt: -------------------------------------------------------------------------------- 1 | title: //meta[@property='og:title']/@content 2 | test_url: http://www.512pixels.net/blog/2014/10/the-move 3 | -------------------------------------------------------------------------------- /apple.news.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //p//a[contains(., 'Click here')] 2 | test_url: https://apple.news/AHQREjzH0Ts6iikKhNe6o8w 3 | -------------------------------------------------------------------------------- /elance.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='jobDesc-bd']/p 2 | 3 | test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ -------------------------------------------------------------------------------- /futurism.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: tracking-wider 2 | 3 | test_url: https://futurism.com/the-byte/china-ai-prosecutor-crimes -------------------------------------------------------------------------------- /getnews.jp.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post'] 2 | strip: //ul[@id='bookmark_single'] 3 | test_url: http://getnews.jp/archives/117312 -------------------------------------------------------------------------------- /hacf.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="post-content"] 2 | 3 | test_url: https://www.hacf.fr/un-beau-dashboard-tout-simplement/ 4 | -------------------------------------------------------------------------------- /jamesclear.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'entry-content')] 2 | 3 | test_url: https://jamesclear.com/procrastination 4 | -------------------------------------------------------------------------------- /kont.me.txt: -------------------------------------------------------------------------------- 1 | http_header(User-agent): twitterbot 2 | 3 | test_url: https://kont.me/%C3%A9loge-d%C3%A9croissance-individuelle 4 | -------------------------------------------------------------------------------- /lostgarden.com.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | convert_double_br_tags: yes 3 | test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html -------------------------------------------------------------------------------- /mbl.is.txt: -------------------------------------------------------------------------------- 1 | body: //div[class="frett-main"] 2 | test_url: http://mbl.is/frettir/innlent/2012/02/21/litill_munur_a_fargjaldaverdi/ -------------------------------------------------------------------------------- /n.survol.fr.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | date: //header//time/@datetime 3 | 4 | test_url: https://n.survol.fr/n/gerer-son-potager 5 | -------------------------------------------------------------------------------- /philosophynow.org.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: welcome_box 2 | 3 | test_url: https://philosophynow.org/issues/141/Time_and_Being 4 | -------------------------------------------------------------------------------- /scotthelme.co.uk.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post-content'] 2 | 3 | test_url: https://scotthelme.co.uk/can-you-get-pwned-with-css/ -------------------------------------------------------------------------------- /acidcow.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[starts-with(@id, 'news-id-')] 2 | 3 | test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html -------------------------------------------------------------------------------- /alseraj.net.txt: -------------------------------------------------------------------------------- 1 | title: //*[@id='normalfontyellow'] 2 | test_url: http://www.alseraj.net/cgi-bin/pros/av/LeqaTextDisplay.cgi?display&2 -------------------------------------------------------------------------------- /avantivictoirerao.com.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | 3 | test_url: https://www.avantivictoirerao.com/society/markup-html-tags-and-formatting/ 4 | -------------------------------------------------------------------------------- /blogs.gnome.org.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/7.2 2 | 3 | test_url: https://blogs.gnome.org/aday/2017/08/08/the-gnome-way/ 4 | -------------------------------------------------------------------------------- /carlchenet.com.txt: -------------------------------------------------------------------------------- 1 | date: //time/@datetime 2 | 3 | test_url: https://carlchenet.com/foss-passive-consumerism-kills-our-community/ 4 | -------------------------------------------------------------------------------- /doc.wallabag.org.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(@class, 'normal')] 2 | 3 | test_url: https://doc.wallabag.org/en/user/filters.html 4 | -------------------------------------------------------------------------------- /gsmarena.com.txt: -------------------------------------------------------------------------------- 1 | next_page_link: //a[@class='pages-next'] 2 | 3 | test_url: http://www.gsmarena.com/samsung_galaxy_j2-review-1348.php -------------------------------------------------------------------------------- /kachestvo.ru.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'inner_content')] 2 | 3 | test_url: http://kachestvo.ru/promtovar/odezhda/denim.html -------------------------------------------------------------------------------- /monkeyzen.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://monkeyzen.com/2011/09/siluetas-de-clasicos-a-modo-de-vinilos -------------------------------------------------------------------------------- /paquier.xyz.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post'] 2 | 3 | test_url: http://paquier.xyz/postgresql-2/postgres-10-incompatible-changes/ 4 | -------------------------------------------------------------------------------- /summitroute.com.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | 3 | test_url: https://summitroute.com/blog/2015/12/24/instagram_bounty_case_study_for_defense/ 4 | -------------------------------------------------------------------------------- /.readthedocs.io.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@role='main'] 3 | test_url: http://docs.readthedocs.io/en/latest/getting_started.html 4 | -------------------------------------------------------------------------------- /allafrica.com.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/7.0 2 | 3 | test_url: http://allafrica.com/tools/headlines/rdf/latest/headlines.rdf 4 | -------------------------------------------------------------------------------- /eternabuenosaires.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://eternabuenosaires.com/2011/09/calle-adolfo-bioy-casares -------------------------------------------------------------------------------- /fakirpresse.info.txt: -------------------------------------------------------------------------------- 1 | author: //a[@class="url fn spip_in"] 2 | 3 | test_url: https://www.fakirpresse.info/les-patrons-ca-osent-tout 4 | -------------------------------------------------------------------------------- /fivefilters.org.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(@class, 'container')] 2 | prune: no 3 | 4 | test_url: http://fivefilters.org/kindle-it/ 5 | -------------------------------------------------------------------------------- /fok.nl.txt: -------------------------------------------------------------------------------- 1 | # skip cookie warning 2 | single_page_link: concat(//form/@action, '?allowcookies=yes') 3 | 4 | test_url: http://fok.nl/687116 -------------------------------------------------------------------------------- /gist.github.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="highlight"]/pre 2 | 3 | prune: no 4 | tidy: no 5 | 6 | test_url: https://gist.github.com/1258908 -------------------------------------------------------------------------------- /indehekken.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post-entry']/p 2 | 3 | test_url: http://www.indehekken.net/you-only-sing-when-youre-rowing/ 4 | -------------------------------------------------------------------------------- /macdrifter.com.txt: -------------------------------------------------------------------------------- 1 | title: substring-before(//title,' « Macdrifter') 2 | test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ -------------------------------------------------------------------------------- /marksdailyapple.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: wwsgd 2 | test_url: http://www.marksdailyapple.com/are-detoxes-and-cleanses-safe-and-effective/ -------------------------------------------------------------------------------- /nifi.apache.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | 3 | test_url: https://nifi.apache.org/docs/nifi-docs/html/getting-started.html 4 | -------------------------------------------------------------------------------- /osmand.net.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="article"]//h2 2 | date: //meta/@pubdate 3 | 4 | test_url: https://osmand.net/blog/guideline-pt 5 | -------------------------------------------------------------------------------- /retro-games.fr.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //link[@rel='amphtml'] 2 | 3 | test_url: https://www.retro-games.fr/shadow-hearts-ps2-7476 4 | 5 | -------------------------------------------------------------------------------- /staltz.com.txt: -------------------------------------------------------------------------------- 1 | author: //h2[@class="name-title"] 2 | 3 | test_url: https://staltz.com/a-plan-to-rescue-the-web-from-the-internet.html 4 | -------------------------------------------------------------------------------- /theteaspot.com.txt: -------------------------------------------------------------------------------- 1 | body: //main[@id='MainContent'] 2 | 3 | prune: no 4 | 5 | test_url: https://www.theteaspot.com/pages/about-tea 6 | -------------------------------------------------------------------------------- /wallabag.org.txt: -------------------------------------------------------------------------------- 1 | date: //meta[@name="date"]/@content 2 | 3 | test_url: https://wallabag.org/news/20241103-new-release-wallabag-2610/ 4 | -------------------------------------------------------------------------------- /zerokspot.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@id="primarycontent"] 3 | test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ -------------------------------------------------------------------------------- /altaonline.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: embed-pullquote 2 | 3 | test_url: https://www.altaonline.com/dispatches/a40395942/the-beach-rats/ 4 | -------------------------------------------------------------------------------- /blog.imirhil.fr.txt: -------------------------------------------------------------------------------- 1 | date: //section[@id="post"]//h1//time 2 | 3 | test_url: https://blog.imirhil.fr/2019/11/13/first-party-tracker.html 4 | -------------------------------------------------------------------------------- /dagogtid.no.txt: -------------------------------------------------------------------------------- 1 | title: //span[@class = 'overskriftEkstrastor'] 2 | author: //em/a 3 | 4 | test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414 -------------------------------------------------------------------------------- /dailydot.com.txt: -------------------------------------------------------------------------------- 1 | tidy: no 2 | body: //article 3 | 4 | test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ -------------------------------------------------------------------------------- /dr-b.io.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='card-body'] 2 | 3 | test_url: https://dr-b.io/post/Synology-DSM-7-with-Lets-Encrypt-and-DNS-Challenge 4 | 5 | -------------------------------------------------------------------------------- /drdobbs.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //a[contains(@href, '/article/print')] 2 | test_url: http://www.drdobbs.com/architecture-and-design/240001128 -------------------------------------------------------------------------------- /flyingmachinestudios.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: linenos 2 | test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/ -------------------------------------------------------------------------------- /gizmovil.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://gizmovil.com/2011/09/hipertextual-labs-receptor-bluetooth-nokia-bh-214 -------------------------------------------------------------------------------- /goodfil.ms.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: gutter 2 | test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/ -------------------------------------------------------------------------------- /ishadeed.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: //h2[@class="post-author__name"] 3 | 4 | test_url: https://ishadeed.com/article/cascade-layers/ 5 | -------------------------------------------------------------------------------- /je-suis-papa.com.txt: -------------------------------------------------------------------------------- 1 | strip: //noscript 2 | 3 | test_url: http://www.je-suis-papa.com/pandacraft-kit-educatif-creatif-abonnement-12-ans/ 4 | -------------------------------------------------------------------------------- /jungle-world.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[contains(@class,'story')] 3 | test_url: http://jungle-world.com/artikel/2015/02/51207.html 4 | -------------------------------------------------------------------------------- /rust-lang-nursery.github.io.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | test_url: https://rust-lang-nursery.github.io/rust-cookbook/web/clients/apis.html 4 | -------------------------------------------------------------------------------- /stjv.fr.txt: -------------------------------------------------------------------------------- 1 | date: //time[contains(@class, 'published')]/@datetime 2 | 3 | test_url: https://www.stjv.fr/2018/01/au-sujet-quantic-dream/ 4 | -------------------------------------------------------------------------------- /vg.no.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='artikkelspalte'] 2 | strip_id_or_class: 'breadcrumb' 3 | test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | These files are released to the public domain. 2 | 3 | See https://creativecommons.org/publicdomain/zero/1.0/ for more information. 4 | -------------------------------------------------------------------------------- /appleweblog.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://appleweblog.com/2011/09/encontrada-vulnerabilidad-grave-en-skype-para-ios -------------------------------------------------------------------------------- /az.lib.ru.txt: -------------------------------------------------------------------------------- 1 | body: //xxx7 2 | tidy: no 3 | prune: no 4 | 5 | test_url: http://az.lib.ru/s/shepkinakupernik_t_l/text_1910_inesa_di_kastro.shtml 6 | -------------------------------------------------------------------------------- /blogs.forbes.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry'] 2 | test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/ -------------------------------------------------------------------------------- /brucelawson.co.uk.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@itemprop="title"] 2 | 3 | test_url: https://www.brucelawson.co.uk/2018/the-practical-value-of-semantic-html/ 4 | -------------------------------------------------------------------------------- /doc.rust-lang.ru.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | prune: no 4 | tidy: no 5 | 6 | test_url: https://doc.rust-lang.ru/book/ch10-03-lifetime-syntax.html 7 | -------------------------------------------------------------------------------- /dropbox.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //a[@id='download_button_link'] 2 | 3 | test_url: https://www.dropbox.com/s/qmocfrco2t0d28o/Fluffbeast.docx 4 | -------------------------------------------------------------------------------- /gihyo.jp.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] 2 | 3 | test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 -------------------------------------------------------------------------------- /help.sharegate.com.txt: -------------------------------------------------------------------------------- 1 | body: //body//article 2 | prune: no 3 | 4 | test_url: https://help.sharegate.com/en/articles/12185777-release-25-9-2 5 | -------------------------------------------------------------------------------- /hiperpop.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://hiperpop.com/2011/09/marc-anthony-celebra-su-cumpleanos-con-jennifer-lopez -------------------------------------------------------------------------------- /hipertextual.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://hipertextual.com/2011/09/banda-ancha-en-america-latina-insignificante 3 | -------------------------------------------------------------------------------- /krone.at.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' kmm-article-box ')] 2 | 3 | test_url: https://www.krone.at/1941895 4 | -------------------------------------------------------------------------------- /panic.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry'] 2 | date: //h3[@class='postDate'] 3 | test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ -------------------------------------------------------------------------------- /perell.com.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(@class, 'BlogItem-main')] 2 | 3 | test_url: https://www.perell.com/blog/50-ideas-that-changed-my-life 4 | -------------------------------------------------------------------------------- /portertech.ca.txt: -------------------------------------------------------------------------------- 1 | author: //*[(@class = "author")] 2 | date: //*[(@class = "date")] 3 | test_url: http://portertech.ca/2012/12/10/iac-morning-market/ -------------------------------------------------------------------------------- /swcarpentry.github.io.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | prune: no 4 | 5 | test_url: https://swcarpentry.github.io/shell-novice/02-filedir/index.html 6 | -------------------------------------------------------------------------------- /tofugu.com.txt: -------------------------------------------------------------------------------- 1 | body://div[@class='entry-content'] 2 | 3 | test_url: http://www.tofugu.com/2015/07/20/interview-with-toriena-japanese-chiptune/ 4 | -------------------------------------------------------------------------------- /urbandictionary.com.txt: -------------------------------------------------------------------------------- 1 | title: //title 2 | body: //table[@id='entries'] 3 | test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass 4 | -------------------------------------------------------------------------------- /warnerbros.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="article-body"] 2 | test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait 3 | -------------------------------------------------------------------------------- /zataz.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="blog-title"] 2 | 3 | test_url: https://www.zataz.com/bientot-noel-securisons-le-prochain-pc-de-la-famille/ 4 | -------------------------------------------------------------------------------- /altfoto.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://altfoto.com/2011/09/nikon-presenta-su-nuevo-sistema-nikon-1-y-dos-nuevas-camaras -------------------------------------------------------------------------------- /chaperonsetvous.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, "excerpt")] 2 | 3 | test_url: https://www.chaperonsetvous.fr/legalite-ca-commence-a-la-creche-3/ 4 | -------------------------------------------------------------------------------- /cucharasonica.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://cucharasonica.com/2011/09/queen-busca-candidatos-para-su-propia-banda-tributo -------------------------------------------------------------------------------- /dansdata.com.txt: -------------------------------------------------------------------------------- 1 | autodetect_next_page: no 2 | tidy: no 3 | prune: no 4 | body: //div[@class='NoOverflow'] 5 | test_url: http://www.dansdata.com/gz129.htm -------------------------------------------------------------------------------- /doc.rust-lang.org.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | prune: no 4 | tidy: no 5 | 6 | test_url: https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html 7 | -------------------------------------------------------------------------------- /interviewmagazine.com.txt: -------------------------------------------------------------------------------- 1 | title: //title 2 | body: //div[contains(@class, 'block')] 3 | 4 | test_url: http://www.interviewmagazine.com/film/spike-jonze -------------------------------------------------------------------------------- /jvt.me.txt: -------------------------------------------------------------------------------- 1 | author: //div[@class="post-details"]//a[contains(@class, "p-name")] 2 | 3 | test_url: https://www.jvt.me/posts/2019/10/20/indieweb-talk/ 4 | -------------------------------------------------------------------------------- /najlepsze-ksiazki.pl.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | strip: //footer 4 | 5 | test_url: https://najlepsze-ksiazki.pl/najlepsze-ksiazki-science-fiction/ 6 | -------------------------------------------------------------------------------- /news.rub.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' content-inner-inner-wrapper ')] 2 | 3 | http_header(Cookie): has_js=1 4 | -------------------------------------------------------------------------------- /pentaxforums.com.txt: -------------------------------------------------------------------------------- 1 | next_page_link: //a[contains(., 'Next:')] 2 | test_url: http://www.pentaxforums.com/reviews/long-exposure-handhelds/introduction.html -------------------------------------------------------------------------------- /phototrend.fr.txt: -------------------------------------------------------------------------------- 1 | author: //div[contains(@class, "vcard")] 2 | 3 | test_url: https://phototrend.fr/2019/04/mise-a-jour-luminar-3-1-0-accent-ai-2-0/ 4 | -------------------------------------------------------------------------------- /renenekuda.cz.txt: -------------------------------------------------------------------------------- 1 | title: //*[@class='entry-title'] 2 | body: //div[@class='entry-content'] 3 | test_url: http://www.renenekuda.cz/recept-na-produktivitu/ -------------------------------------------------------------------------------- /tldp.org.txt: -------------------------------------------------------------------------------- 1 | title: //title 2 | body: //h2 | //p | //ul 3 | prune: no 4 | tidy: no 5 | 6 | test_url: http://www.tldp.org/HOWTO/Plug-and-Play-HOWTO-7.html -------------------------------------------------------------------------------- /vot-tak.tv.txt: -------------------------------------------------------------------------------- 1 | # prevent duplicated images 2 | strip: //img[@fallback] 3 | 4 | test_url: https://vot-tak.tv/novosti/16-08-2021-nevzlin-intervyu/ 5 | -------------------------------------------------------------------------------- /.about.com.txt: -------------------------------------------------------------------------------- 1 | title: //*[@id='title']//h1 2 | body: //*[(@id = "articlebody")] 3 | test_url: http://nutrition.about.com/od/changeyourdiet/qt/healthysnacks.htm -------------------------------------------------------------------------------- /blogs.lse.ac.uk.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/8.3 2 | 3 | test_url: https://blogs.lse.ac.uk/politicsandpolicy/stop-working-on-increasing-road-safety/ 4 | -------------------------------------------------------------------------------- /coalicionporelevangelio.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="section"] 2 | 3 | prune: no 4 | 5 | test_url: https://www.coalicionporelevangelio.org/curso/el-dinero/ 6 | -------------------------------------------------------------------------------- /crn.de.txt: -------------------------------------------------------------------------------- 1 | author: //p[contains(@class,'author')]/a 2 | date: //div[contains(@class,'date')] 3 | test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html -------------------------------------------------------------------------------- /elfster.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: elf-call-out-ssg 2 | strip_id_or_class: cta-box 3 | 4 | test_url: https://www.elfster.com/content/secret-santa-rules/ 5 | -------------------------------------------------------------------------------- /fiftytwo.in.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'story-wrapper')] 2 | strip: //svg 3 | 4 | prune: no 5 | 6 | test_url: https://fiftytwo.in/story/kingdom/ 7 | -------------------------------------------------------------------------------- /finexpert.e15.cz.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: article-linktoanother 2 | 3 | test_url: http://finexpert.e15.cz/budiz-teplo-eu-stedre-zadotuje-nejen-plynovy-kotel -------------------------------------------------------------------------------- /marigold.cz.txt: -------------------------------------------------------------------------------- 1 | http_header(accept): */* 2 | 3 | test_url: https://www.marigold.cz/item/projektovy-manazer-je-v-cesku-sproste-slovo-ke-skode-projektu 4 | -------------------------------------------------------------------------------- /monkeyuser.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ', normalize-space(@class), ' '), ' content ')] 2 | 3 | test_url: https://www.monkeyuser.com/2019/v-201/ 4 | -------------------------------------------------------------------------------- /news.techmeme.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='main']/div[@class='item'] 2 | strip: //div[@class='right'] 3 | 4 | test_url: http://news.techmeme.com/110516/fh-rip -------------------------------------------------------------------------------- /python.org.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(concat(' ',normalize-space(@class),' '),' text ')] 2 | 3 | test_url: https://www.python.org/about/success/cog/ 4 | -------------------------------------------------------------------------------- /utdailybeacon.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='blox-story-text'] 2 | test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html 3 | -------------------------------------------------------------------------------- /worldwidewords.org.txt: -------------------------------------------------------------------------------- 1 | title: //p[@id='content'] 2 | 3 | body: //div[@class='contentblock'] 4 | test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm -------------------------------------------------------------------------------- /.tweakblogs.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="article"] 2 | author: //p[@class="author"]/a 3 | 4 | test_url: http://harryl.tweakblogs.net/blog/11988/voorstellen 5 | -------------------------------------------------------------------------------- /caseinterview.com.txt: -------------------------------------------------------------------------------- 1 | body: (//div[@data-elementor-type="wp-post"])[1] 2 | strip: //footer 3 | 4 | prune: no 5 | 6 | test_url: https://caseinterview.com/mece 7 | -------------------------------------------------------------------------------- /gizmologia.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://gizmologia.com/2011/09/amd-trinity-el-sucesor-de-llano-en-una-demostracion-muy-interesante -------------------------------------------------------------------------------- /happyassassin.net.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/7.2 2 | 3 | test_url: https://www.happyassassin.net/2014/01/25/uefi-boot-how-does-that-actually-work-then/ 4 | -------------------------------------------------------------------------------- /iansommerville.com.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): PHP/5.3 2 | 3 | test_url: http://iansommerville.com/blog/a-guide-to-scottish-delicacies-for-tgo-challengers/ 4 | -------------------------------------------------------------------------------- /iplaysoft.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content']//div[@class='entry-banner' or @class='entry-content'] 2 | test_url: http://www.iplaysoft.com/webbrowserpassview.html -------------------------------------------------------------------------------- /jobbank.gc.ca.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='formatCont_en'] 2 | 3 | prune: no 4 | 5 | test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 -------------------------------------------------------------------------------- /lefilrouge.media.txt: -------------------------------------------------------------------------------- 1 | 2 | strip_id_or_class: the_champ_sharing_container 3 | 4 | test_url: https://lefilrouge.media/etats-unis-nevada-burning-man-festival/ 5 | -------------------------------------------------------------------------------- /matt.might.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: string("Matt Might") 3 | strip: //h1/following-sibling::div 4 | 5 | test_url: http://matt.might.net/articles/oo-cesk/ -------------------------------------------------------------------------------- /mein-mmo.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='gp-entry-text'] 2 | strip: //aside 3 | 4 | test_url: http://mein-mmo.de/pokemon-go-15-staerksten-pokemon-der-2-generation/ 5 | -------------------------------------------------------------------------------- /radionz.co.nz.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='body'] 2 | title: //div[@class='newsstory']/h2 3 | test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d -------------------------------------------------------------------------------- /roomescapeartist.com.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | 3 | strip: //svg 4 | 5 | test_url: https://roomescapeartist.com/2020/05/18/locked-amsterdam-submarine-hivemind-review/ 6 | -------------------------------------------------------------------------------- /sivers.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content']//article 2 | 3 | strip: //header 4 | strip: //footer 5 | 6 | prune: no 7 | 8 | test_url: https://sivers.org/exex 9 | -------------------------------------------------------------------------------- /sports.ru.txt: -------------------------------------------------------------------------------- 1 | title: //meta[@property="twitter:title"]/@content 2 | date: //time/@datetime 3 | 4 | test_url: https://www.sports.ru/football/blogs/3326825.html 5 | -------------------------------------------------------------------------------- /stumbleupon.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //iframe[@id='tb-stumble-frame']/@src 2 | 3 | test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ -------------------------------------------------------------------------------- /thenews.coop.txt: -------------------------------------------------------------------------------- 1 | body: //div[@itemprop='articleBody'] 2 | 3 | test_url: http://www.thenews.coop/98221/news/co-operatives/jeremy-corbyn-needs-co-op-movement/ 4 | -------------------------------------------------------------------------------- /turnoff.us.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='post-title'] 2 | body: //article[@class='post-content'] 3 | test_url: https://turnoff.us/geek/the-depressed-developer-13/ 4 | -------------------------------------------------------------------------------- /vedonlyonti.com.txt: -------------------------------------------------------------------------------- 1 | strip: //style 2 | 3 | test_url: https://vedonlyonti.com/veikkausvihjeet/pitkavetovihjeet/nba-boston-celtics-cleveland-cavaliers-8-5-2024 4 | -------------------------------------------------------------------------------- /betabeat.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="entry-content"] 2 | test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/ -------------------------------------------------------------------------------- /cashless.pl.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' post-page-content ')] 2 | 3 | test_url: https://www.cashless.pl/5465-revolut-kryzys 4 | -------------------------------------------------------------------------------- /cfclrk.com.txt: -------------------------------------------------------------------------------- 1 | body: //body 2 | 3 | strip: //header 4 | strip: //nav 5 | 6 | prune: no 7 | 8 | test_url: https://www.cfclrk.com/articles/github_identities.html 9 | -------------------------------------------------------------------------------- /china-gadgets.de.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: bcac-item 2 | strip_id_or_class: ez-toc-container 3 | 4 | test_url: https://www.china-gadgets.de/blitzwolf-bw-v7-beamer/ 5 | -------------------------------------------------------------------------------- /davidwalsh.name.txt: -------------------------------------------------------------------------------- 1 | author: //article//span[@itemprop="name"] 2 | strip: //div[@class="article-block"] 3 | 4 | test_url: https://davidwalsh.name/optional-chaining 5 | -------------------------------------------------------------------------------- /democracynow.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'blog_body')] 2 | 3 | prune: no 4 | 5 | test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a -------------------------------------------------------------------------------- /dissentmagazine.org.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | author: //a[@rel='author'] 4 | 5 | test_url: https://www.dissentmagazine.org/article/why-the-left-needs-liberals 6 | -------------------------------------------------------------------------------- /earvingad.github.io.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | strip: //header 4 | strip: //footer 5 | 6 | prune: no 7 | 8 | test_url: https://earvingad.github.io/posts/headscale/ 9 | -------------------------------------------------------------------------------- /explosm.net.txt: -------------------------------------------------------------------------------- 1 | body: //*[@id="comic"]/div/div[2]/div/span 2 | author: //*[@id="comic"]/div/div[2]/div/div/div[1] 3 | 4 | test_url: http://explosm.net/comics/3955/ 5 | -------------------------------------------------------------------------------- /freelancer.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="projectDetailsContent"]//td 2 | 3 | test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html -------------------------------------------------------------------------------- /fs.blog.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 2 | 3 | prune: no 4 | 5 | test_url: https://fs.blog/inside-a-miracle/ 6 | -------------------------------------------------------------------------------- /gauchiste.fr.txt: -------------------------------------------------------------------------------- 1 | date: //meta[@name="date"]/@content 2 | 3 | test_url: https://gauchiste.fr/post/2019/11/08/Un-catadioptre-vous-sauvera-(peut-%C3%AAtre)-la-vie 4 | -------------------------------------------------------------------------------- /izismile.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[starts-with(@id, 'news-id-')] 2 | prune: no 3 | 4 | test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html -------------------------------------------------------------------------------- /lado.mx.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //a[starts-with(@href, 'http') and contains(., 'Leer noticia completa')] 2 | 3 | test_url: https://lado.mx/noticia.php?id=15249243 4 | -------------------------------------------------------------------------------- /make.wordpress.org.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: glossary-item-hidden-content 2 | 3 | test_url: https://make.wordpress.org/core/2020/12/21/bug-scrub-schedule-for-5-7/ 4 | -------------------------------------------------------------------------------- /mbk-news.appspot.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'evo-entry-content')] 2 | 3 | prune: no 4 | 5 | test_url: https://mbk-news.appspot.com/suzhet/ee-imya-budet/ 6 | -------------------------------------------------------------------------------- /mytotalretail.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@itemprop="content"] 3 | 4 | test_url: https://www.mytotalretail.com/article/how-site-search-could-kill-amazon/ 5 | -------------------------------------------------------------------------------- /openstreetmap.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | strip: //div[@class='standard-form'] 3 | test_url: https://www.openstreetmap.org/user/woodpeck/diary/393947 4 | -------------------------------------------------------------------------------- /php.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | strip_id_or_class: manualnavbar 3 | 4 | prune: no 5 | 6 | test_url: http://www.php.net/manual/en/migration5.incompatible.php -------------------------------------------------------------------------------- /vakarm.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="block_news_main_content_preview"] 2 | 3 | test_url: https://www.vakarm.net/news/read/Choual-History-X-tout-un-cinema/10270/2 4 | -------------------------------------------------------------------------------- /archiloque.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | 3 | date: //span[@id='revdate'] 4 | 5 | test_url: https://archiloque.net/blog/task-engine-ruby/task-engine-ruby.html 6 | -------------------------------------------------------------------------------- /berlingske.dk.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='headline'] 2 | body: //div[contains(@class, 'article-wrapper')] 3 | test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa -------------------------------------------------------------------------------- /carnegie.ru.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-body ')] 2 | prune: no 3 | 4 | test_url: https://carnegie.ru/commentary/86428 5 | -------------------------------------------------------------------------------- /cooper.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class,'post-body')] 2 | date: //abbr[@class='published'] 3 | 4 | test_url: http://www.cooper.com/journal/2015/6/creating-personas 5 | -------------------------------------------------------------------------------- /doughellmann.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //a[.="Read more…"]/@href 2 | 3 | test_url: https://doughellmann.com/blog/2017/02/06/getopt-command-line-option-parsing-pymotw-3/ 4 | -------------------------------------------------------------------------------- /fokus.se.txt: -------------------------------------------------------------------------------- 1 | title: //h2[contains(@class, 'entry-title')] 2 | body: //div[contains(@class, 'entry-content')] 3 | test_url: http://www.fokus.se/2017/03/olosta-karnfragor/ 4 | -------------------------------------------------------------------------------- /geenstijl.nl.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id = 'article'] 2 | strip: //div[@id = 'klasbox'] 3 | test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html -------------------------------------------------------------------------------- /hackertarget.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '), ' btx-post-body ')] 2 | 3 | test_url: https://hackertarget.com/ssh-examples-tunnels/ 4 | -------------------------------------------------------------------------------- /ianlewis.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class), ' '),' post-content ')] 2 | 3 | test_url: https://www.ianlewis.org/en/almighty-pause-container 4 | -------------------------------------------------------------------------------- /juliareda.eu.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | strip: //aside[contains(@class, "mashsb-container")] 4 | 5 | test_url: https://juliareda.eu/2019/02/eu-copyright-final-text/ 6 | -------------------------------------------------------------------------------- /kresus.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="entry-title"] 2 | date: //time[@class="published"]/@datetime 3 | 4 | test_url: https://kresus.org/blog/kresus-version-0-14-0.html 5 | -------------------------------------------------------------------------------- /letraslibres.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: concat(link[@rel="canonical"], "?page=full") 2 | 3 | test_url: http://www.letraslibres.com/revista/dossier/quien-manda-en-europa 4 | -------------------------------------------------------------------------------- /longform.org.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //div[@class="post"]/div[@class="title"]/a 2 | 3 | test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ -------------------------------------------------------------------------------- /oschina.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | strip_id_or_class: syntaxhighlighter 3 | test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print -------------------------------------------------------------------------------- /reactjs.org.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | strip: //header 4 | strip: //aside 5 | 6 | prune: no 7 | 8 | test_url: https://reactjs.org/docs/uncontrolled-components.html 9 | -------------------------------------------------------------------------------- /signal.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="post-title"] 2 | author: //p[contains(@class, "body2")]//a 3 | 4 | test_url: https://signal.org/blog/signal-private-group-system/ 5 | -------------------------------------------------------------------------------- /slrlounge.com.txt: -------------------------------------------------------------------------------- 1 | replace_string(): 3 | 4 | test_url: https://www.slrlounge.com/flash-outdoors-ambient-balancing-natural/ 5 | -------------------------------------------------------------------------------- /yosoy.red.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | 3 | test_url: https://yosoy.red/2021/01/22/politica/ 4 | test_contains: es fundamental entender que los sistemas son creados por humanos 5 | -------------------------------------------------------------------------------- /.mitpress.mit.edu.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: wp-block-pullquote 2 | 3 | test_url: https://thereader.mitpress.mit.edu/a-master-perfumers-reflections-on-patchouli-and-vetiver/ 4 | -------------------------------------------------------------------------------- /aerobuzz.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' chapo ')] 2 | 3 | test_url: https://www.aerobuzz.fr/depose-minute/aerodromes-de-proximite/ 4 | -------------------------------------------------------------------------------- /alexduner.com.txt: -------------------------------------------------------------------------------- 1 | body: //section[@class='content'] 2 | date: //span[1] 3 | author: //h1[@id='sitetitle'] 4 | test_url: http://alexduner.com/blog/something-i-learned-today 5 | -------------------------------------------------------------------------------- /archdaily.com.txt: -------------------------------------------------------------------------------- 1 | date: //div[@class='post_date'] 2 | 3 | body: //div[@class='post_content'] 4 | 5 | test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up -------------------------------------------------------------------------------- /c.newsnow.com.txt: -------------------------------------------------------------------------------- 1 | # Also as c.newsnow.co.uk.txt 2 | 3 | single_page_link: //div[@id="js-retrieval-msg"]//a 4 | 5 | test_url: http://c.newsnow.com/A/1041394538?-15254:37150 6 | -------------------------------------------------------------------------------- /codeproject.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="contentdiv"] 2 | date: //span[@class="date"] 3 | test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code -------------------------------------------------------------------------------- /declassifieduk.org.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: related-post 2 | 3 | test_url: https://declassifieduk.org/british-warmongering-is-driving-europe-towards-catastrophe-in-ukraine/ 4 | -------------------------------------------------------------------------------- /elblogsalmon.com.txt: -------------------------------------------------------------------------------- 1 | replace_string(sf-src): src 2 | 3 | test_url: https://www.elblogsalmon.com/economia/no-todo-fue-mal-con-el-euro-datos-que-indican-que-fue-una-buena-idea 4 | -------------------------------------------------------------------------------- /esglobal.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='blog-content'] 2 | 3 | test_url: http://www.esglobal.org/el-caos-en-el-este-los-socios-de-la-ue-necesitan-que-se-les-preste-atencion/ 4 | -------------------------------------------------------------------------------- /gold.ac.uk.txt: -------------------------------------------------------------------------------- 1 | body: //article//div[contains(concat(' ',normalize-space(@class),' '), ' rich-content ')] 2 | 3 | test_url: https://www.gold.ac.uk/news/carbon-neutral-plan/ 4 | -------------------------------------------------------------------------------- /nextcloud.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="blog-title-and-subtitle"]//h1 2 | date: //time/@datetime 3 | 4 | test_url: https://nextcloud.com/blog/celebrating-2-years-nextcloud/ 5 | -------------------------------------------------------------------------------- /optimizesmart.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="content"] 2 | 3 | prune: no 4 | 5 | test_url: https://www.optimizesmart.com/how-to-set-up-conversion-tracking-in-google-analytics-4/ 6 | -------------------------------------------------------------------------------- /pastepad.fivefilters.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@id='ff-pastepad-content'] 3 | prune: no 4 | # todo: add test file 5 | test_url: http://pastepad.fivefilters.org/test.html -------------------------------------------------------------------------------- /pixellibre.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="entry-title"] 2 | author: //a[@class='url fn n'] 3 | 4 | test_url: https://pixellibre.net/2017/10/vie-privee-smartphones-applications/ 5 | -------------------------------------------------------------------------------- /radishzz.cc.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-content ')] 2 | 3 | prune: no 4 | 5 | test_url: https://radishzz.cc/posts/384776b2/ 6 | -------------------------------------------------------------------------------- /sme.sk.txt: -------------------------------------------------------------------------------- 1 | title: //meta[@property='og:title']/@content 2 | date: //p[@class='autor_line']/b/text() 3 | test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html -------------------------------------------------------------------------------- /sourcebooks.com.txt: -------------------------------------------------------------------------------- 1 | #grab the actual content div 2 | body: //div[@class='rt-article'] 3 | 4 | test_url: http://www.sourcebooks.com/blog/happy-27th-birthday-sourcebooks.html 5 | -------------------------------------------------------------------------------- /sprengsatz.de.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | author: string('Michael Spreng') 3 | date: //div[@class='date'] 4 | body: //div[@class='entry'] 5 | test_url: http://www.sprengsatz.de/?p=3691 -------------------------------------------------------------------------------- /stopgame.ru.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(concat(' ',normalize-space(@class),' '),' article ')] 2 | 3 | test_url: https://stopgame.ru/show/113377/phoenotopia_awakening_review 4 | -------------------------------------------------------------------------------- /thefilmexperience.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='body'] 2 | test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html -------------------------------------------------------------------------------- /triplebyte.com.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(concat(' ',normalize-space(@class), ' '), ' blog-post ')] 2 | 3 | test_url: https://triplebyte.com/blog/marissa-mayer-interview 4 | -------------------------------------------------------------------------------- /vivirmexico.com.txt: -------------------------------------------------------------------------------- 1 | body: //*[(@class = "historia")] 2 | test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social -------------------------------------------------------------------------------- /w3.org.txt: -------------------------------------------------------------------------------- 1 | date: //time[@class="entry-date"]/@datetime 2 | 3 | test_url: https://www.w3.org/blog/2019/05/w3c-and-whatwg-to-work-together-to-advance-the-open-web-platform/ 4 | -------------------------------------------------------------------------------- /www.seriouseats.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'recipe-wrapper')] 2 | 3 | test_url: https://www.seriouseats.com/recipes/2014/02/braised-short-ribs-from-daniel.html 4 | -------------------------------------------------------------------------------- /a11ywithlindsey.com.txt: -------------------------------------------------------------------------------- 1 | body: //main[@id="main-content"] 2 | author: "Lindsey Kopacz" 3 | 4 | test_url: https://www.a11ywithlindsey.com/blog/javascript-accessibility-accordions/ 5 | -------------------------------------------------------------------------------- /blog.eng.xogrp.com.txt: -------------------------------------------------------------------------------- 1 | title: //article[contains(@class, 'type_text')]//h2 2 | test_url: http://blog.eng.xogrp.com/post/154005485319/node-js-promise-enterprise-grade-first-of-all 3 | -------------------------------------------------------------------------------- /blog.nightly.mozilla.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | date: //time/@datetime 3 | 4 | test_url: https://blog.nightly.mozilla.org/2018/06/27/protecting-your-privacy-in-firefox-pre-release/ 5 | -------------------------------------------------------------------------------- /c.newsnow.co.uk.txt: -------------------------------------------------------------------------------- 1 | # Also as c.newsnow.com.txt 2 | 3 | single_page_link: //div[@id="js-retrieval-msg"]//a 4 | 5 | test_url: http://c.newsnow.co.uk/A/1041394538?-15254:37150 6 | -------------------------------------------------------------------------------- /cabinetmagazine.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'blog-content')] 2 | 3 | prune: no 4 | 5 | test_url: https://www.cabinetmagazine.org/kiosk/komska_yuliya_4_march_2021.php 6 | -------------------------------------------------------------------------------- /caffereggio.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="pf-content"] 2 | 3 | test_url: http://www.caffereggio.net/2017/02/09/apoyo-pablo-iglesias-podemos-unidos-podemos-vicenc-navarro-publico/ 4 | -------------------------------------------------------------------------------- /economie.gouv.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'txtVisu')] 2 | prune: no 3 | 4 | test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance -------------------------------------------------------------------------------- /gnu.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' top-level-extent ')] 2 | prune:no 3 | 4 | test_url: https://www.gnu.org/software/bash/manual/bash.html 5 | -------------------------------------------------------------------------------- /lvsl.fr.txt: -------------------------------------------------------------------------------- 1 | title: //h1[contains(@class,'entry-title')] 2 | body: //div[contains(@class,'body-content')] 3 | test_url: http://lvsl.fr/peuple-manifestant-saez-a-t-pondu-hexagone 4 | -------------------------------------------------------------------------------- /lwlies.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' singleColRight ')] 2 | 3 | test_url: https://lwlies.com/articles/rope-alfred-hitchcock-masterpiece/ 4 | -------------------------------------------------------------------------------- /marriedtothesea.com.txt: -------------------------------------------------------------------------------- 1 | body: //img[contains(@src, '.gif')] 2 | 3 | test_url: http://www.marriedtothesea.com 4 | test_url: http://www.marriedtothesea.com/index.php?date=010818 5 | -------------------------------------------------------------------------------- /miops.com.txt: -------------------------------------------------------------------------------- 1 | strip: //div[@id="relatedblogs"] 2 | strip: //div[@class="product-card"] 3 | 4 | test_url: https://www.miops.com/blogs/news/master-tips-to-take-lightning-photos 5 | -------------------------------------------------------------------------------- /missnumerique.com.txt: -------------------------------------------------------------------------------- 1 | strip: //figure/noscript 2 | 3 | test_url: https://www.missnumerique.com/blog/la-proxiphotographie-la-solution-nature-pour-exprimer-son-sens-artistique/ 4 | -------------------------------------------------------------------------------- /pmf.silvrback.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' all_external_links ')] 2 | test_url: https://pmf.silvrback.com/fixing-tethering-on-android-kitkat -------------------------------------------------------------------------------- /pogue.blogs.nytimes.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="content"]/div[1] 2 | 3 | title: //h1[@class="entry-title"] 4 | test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ -------------------------------------------------------------------------------- /queerty.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='copy'] 2 | title: //h1[@class='hed'] 3 | test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ -------------------------------------------------------------------------------- /renverse.co.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'article-texte')] 2 | 3 | prune: no 4 | 5 | test_url: https://renverse.co/infos-locales/article/rejoignez-la-zad-du-geissberg-3486 6 | -------------------------------------------------------------------------------- /thebaffler.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='pico'] 2 | 3 | strip_id_or_class: wp-block-pullquote 4 | 5 | test_url: https://thebaffler.com/latest/hong-kong-literatures-growing-pains-chu 6 | -------------------------------------------------------------------------------- /theses.enc.sorbonne.fr.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="head"] 2 | author: //div[@class="name"] 3 | body: //article[@id="text"] 4 | 5 | test_url: http://theses.enc.sorbonne.fr/2014/sidre 6 | -------------------------------------------------------------------------------- /tidbits.com.txt: -------------------------------------------------------------------------------- 1 | author: //span[@class='fn'] 2 | date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') 3 | test_url: http://tidbits.com/article/12651 -------------------------------------------------------------------------------- /viget.com.txt: -------------------------------------------------------------------------------- 1 | body: (//article)[1] 2 | 3 | strip_id_or_class: sharer 4 | 5 | prune: no 6 | 7 | test_url: https://www.viget.com/articles/understanding-futures-in-rust-part-1/ 8 | -------------------------------------------------------------------------------- /yourerie.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@itemprop="articleBody"] 2 | test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA 3 | -------------------------------------------------------------------------------- /zaknrw.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single-content ')] 2 | 3 | test_url: http://www.zaknrw.de/medium/audience-development-und-diversitaet 4 | -------------------------------------------------------------------------------- /9to5mac.com.txt: -------------------------------------------------------------------------------- 1 | strip: //p[preceding::hr]/span[@class="embed-youtube"] 2 | strip: //hr 3 | 4 | test_url: https://9to5mac.com/2017/04/14/toshiba-semiconductor-business-apple-foxconn/ 5 | -------------------------------------------------------------------------------- /abplive.com.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0 2 | 3 | test_url: https://www.abplive.com/news/india/feed 4 | -------------------------------------------------------------------------------- /aps.dz.txt: -------------------------------------------------------------------------------- 1 | # author: HolgerAusB | version 2023-02-06 2 | 3 | body: //div[@class='itemBody'] 4 | 5 | test_url: https://www.aps.dz/algerie/151377-communique-du-conseil-des-ministres 6 | -------------------------------------------------------------------------------- /brookings.edu.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: inline-widget 2 | 3 | test_url: https://www.brookings.edu/blog/techtank/2019/06/26/why-data-ownership-is-the-wrong-approach-to-protecting-privacy/ 4 | -------------------------------------------------------------------------------- /car-it.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry-content'] 2 | strip: //div[@class='yasr-auto-insert-visitor'] 3 | 4 | test_url: https://www.car-it.com/die-utopie-von-level-5/id-0071510 5 | 6 | -------------------------------------------------------------------------------- /cn.engadget.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="posttitle"] 2 | body: //div[@class="postbody"] 3 | prune: no 4 | 5 | test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/ 6 | -------------------------------------------------------------------------------- /dictionary.reference.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'source-data')] 2 | strip: //button 3 | 4 | prune: no 5 | 6 | test_url: http://dictionary.reference.com/browse/propaganda 7 | -------------------------------------------------------------------------------- /ericsuh.com.txt: -------------------------------------------------------------------------------- 1 | date: //h6[@class='datetime']/child::text() 2 | author: string("Eric J. Suh") 3 | footnotes: yes 4 | test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html -------------------------------------------------------------------------------- /f-droid.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="post-title"] 2 | author: //header[@class="post-header"]//img/@alt 3 | 4 | test_url: https://f-droid.org/en/2020/01/16/tracking-the-trackers.html 5 | -------------------------------------------------------------------------------- /github.blog.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="post__header-content"]//h1 2 | 3 | test_url: https://github.blog/2020-02-12-supercharge-your-command-line-experience-github-cli-is-now-in-beta/ 4 | -------------------------------------------------------------------------------- /grumpygamer.com.txt: -------------------------------------------------------------------------------- 1 | title: //h3[@class="post-title"] 2 | author: "Ron Gilbert" 3 | body: //div[@class="grumpypost"] 4 | 5 | test_url: https://grumpygamer.com/scope_budget_schedule 6 | -------------------------------------------------------------------------------- /hmercer.com.txt: -------------------------------------------------------------------------------- 1 | title: //*[@class='ptitle'] 2 | date: //span[@class='date'] 3 | body: //div[@class='body'] 4 | prune: no 5 | test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ -------------------------------------------------------------------------------- /laughingsquid.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='entry-title'] 2 | body: //div[@class='entry-content'] 3 | test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ -------------------------------------------------------------------------------- /lawfareblog.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'node-body')]//div[@class='field-items'] 2 | title: //div[@class='title'] 3 | 4 | test_url: https://lawfareblog.com/limits-panopticon 5 | -------------------------------------------------------------------------------- /nf-farn.de.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(concat(" ",normalize-space(@class)," ")," view-mode-full ")] 2 | 3 | prune: no 4 | 5 | test_url: https://www.nf-farn.de/maer-ueberbevoelkerung 6 | -------------------------------------------------------------------------------- /osmc.tv.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="post-title"] 2 | date: //time[@class="post-date"]/@datetime 3 | 4 | test_url: https://osmc.tv/2019/11/osmcs-november-update-is-here-with-kodi-18-5/ 5 | -------------------------------------------------------------------------------- /prolost.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='body'] 2 | title: //h2[@class='title'] 3 | date: //span[@class='posted-on'] 4 | test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html -------------------------------------------------------------------------------- /radar.oreilly.com.txt: -------------------------------------------------------------------------------- 1 | date://span[@class='date'] 2 | body://div[@class='entry-body'] 3 | test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html -------------------------------------------------------------------------------- /the-tls.co.uk.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'tls-article-body')] 2 | 3 | test_url: https://www.the-tls.co.uk/articles/katrina-history-1915-2015-andy-horowitz-review-peter-coates/ 4 | -------------------------------------------------------------------------------- /theoaklandpress.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='fullstory'] 2 | strip: //div[@id='page_leftbar'] 3 | test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt -------------------------------------------------------------------------------- /thisamericanlife.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | 3 | test_url: https://www.thisamericanlife.org/282/transcript 4 | test_contains: I was part of sending an innocent man to jail 5 | -------------------------------------------------------------------------------- /wenow.com.txt: -------------------------------------------------------------------------------- 1 | title: //article//h1 2 | 3 | strip: //a[@class="post-previous"] 4 | strip: //aside 5 | 6 | test_url: https://www.wenow.com/2021/06/07/empreinte-carbone-de-la-viande/ 7 | -------------------------------------------------------------------------------- /.sodexo.com.txt: -------------------------------------------------------------------------------- 1 | body: //main[@id='main'] 2 | 3 | strip: //button 4 | 5 | test_url: https://uk.sodexo.com/home/media/news-room/newsList-area/uk-press-releases/SSAFA-Friendly-to-Forces.html 6 | -------------------------------------------------------------------------------- /blogs.reuters.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='single']/h1 2 | body: //div[@id='postcontent'] 3 | test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ -------------------------------------------------------------------------------- /crimethinc.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="readingtext"] 2 | title: substring-after(substring-after(//title, ':'), ':') 3 | test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php -------------------------------------------------------------------------------- /good.is.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="title"]/div/h1 2 | body: //div[@class="body"] 3 | date: //li[@class="date-time"] 4 | test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ -------------------------------------------------------------------------------- /highscalability.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='journal-entry-text'] 2 | 3 | test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html -------------------------------------------------------------------------------- /lesecolohumanistes.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]/*[not(self::figure)] 2 | 3 | test_url: https://lesecolohumanistes.fr/interdependance/ 4 | -------------------------------------------------------------------------------- /marketresearchdirect.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="product_tabs-0"] 2 | 3 | test_url: https://www.marketresearchdirect.com/consumer-goods/smart-lighting-market-in-india-2018-2023-market-report 4 | -------------------------------------------------------------------------------- /mebedo.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' av_two_third ')] 2 | 3 | test_url: https://www.mebedo.de/themen/faq-zur-verantwortlichen-elektrofachkraft-vefk/ 4 | -------------------------------------------------------------------------------- /philosophyforlife.org.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | replace_string(data-src=):src= 3 | 4 | test_url: https://www.philosophyforlife.org/blog/mind-palaces-the-art-of-psycho-technics-or-soul-craft 5 | -------------------------------------------------------------------------------- /redalemeden.com.txt: -------------------------------------------------------------------------------- 1 | author: //header//h1[@class="full-name"] 2 | date: //div[@class="post-metadata"]//time/@date 3 | 4 | test_url: https://redalemeden.com/blog/2019/we-need-chrome-no-more 5 | -------------------------------------------------------------------------------- /redtimmy.com.txt: -------------------------------------------------------------------------------- 1 | title: //h4[@class='entry-title'] 2 | body: //div[@class='post-content'] 3 | 4 | test_url: https://www.redtimmy.com/docker/a-tale-of-escaping-a-hardened-docker-container/ 5 | -------------------------------------------------------------------------------- /saltyworld.net.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(@class, 'hentry')] 2 | 3 | prune: no 4 | # remove related 5 | strip_id_or_class: relpost 6 | 7 | test_url: https://saltyworld.net/contrapoints/ 8 | -------------------------------------------------------------------------------- /sfweekly.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'content_body')] 2 | strip_id_or_class: det_rel 3 | test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ -------------------------------------------------------------------------------- /techno-science.net.txt: -------------------------------------------------------------------------------- 1 | title://div[@class="news"]/div[@class="titre"] 2 | body://div[@class="news"]/div[@class="texte"] 3 | test_url: http://www.techno-science.net/?onglet=news&news=14808 4 | -------------------------------------------------------------------------------- /tthfanfic.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | author: //a[starts-with(@href, '/AuthorStories')] 3 | body: //div[@id='storyinnerbody'] 4 | test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm -------------------------------------------------------------------------------- /web-libre.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='template_article'] 2 | 3 | strip_id_or_class: article_more 4 | strip: //hr 5 | 6 | test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html -------------------------------------------------------------------------------- /americanthinker.com.txt: -------------------------------------------------------------------------------- 1 | # Avoid duplicating content 2 | strip_id_or_class: mrf-hidden 3 | 4 | test_url: https://www.americanthinker.com/articles/2019/10/hillarys_health_coming_up_again.html 5 | -------------------------------------------------------------------------------- /autoactu.com.txt: -------------------------------------------------------------------------------- 1 | 2 | body: //div[@id="bloc_actu"]/parent::* 3 | title: //div[@id="content"]/h1[1] 4 | 5 | test_url: http://www.autoactu.com/thomas-owsianski-nomme-president-d-audi-chine.shtml 6 | -------------------------------------------------------------------------------- /bobbyromeo.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: adsbygoogle 2 | strip_id_or_class: yarpp-related 3 | 4 | test_url: http://bobbyromeo.com/technology/xiaomi-smart-1080p-wifi-ip-camera-rtsp-streaming-hack/ 5 | -------------------------------------------------------------------------------- /brettterpstra.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post full'] 2 | title: //h1 3 | author: substring-after(//title, '- ') 4 | date: //span[@class='date'] 5 | test_url: http://brettterpstra.com/byword-for-ios/ -------------------------------------------------------------------------------- /code.google.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="gc-pagecontent"] 2 | strip: //a[@class="backtotop"] 3 | prune: no 4 | 5 | test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html -------------------------------------------------------------------------------- /domo-blog.fr.txt: -------------------------------------------------------------------------------- 1 | strip: //div[@id="extras"] 2 | strip: //div[@class="herald-da"] 3 | 4 | test_url: https://www.domo-blog.fr/economisez-energie-argent-avec-domotique-chauffe-eau-shelly-1-pro/ 5 | -------------------------------------------------------------------------------- /fair.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 2 | 3 | test_url: https://fair.org/home/still-manufacturing-consent-an-interview-with-noam-chomsky/ 4 | -------------------------------------------------------------------------------- /historic-uk.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'content__inner__text')] 2 | 3 | prune: no 4 | 5 | test_url: https://www.historic-uk.com/HistoryUK/HistoryofEngland/Tragic-Demise-Edward-II/ 6 | -------------------------------------------------------------------------------- /keycloak.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | 3 | strip_id_or_class: preamble 4 | strip_id_or_class: sidebarblock 5 | 6 | test_url: https://www.keycloak.org/docs/12.0/authorization_services/ 7 | -------------------------------------------------------------------------------- /mcorbin.fr.txt: -------------------------------------------------------------------------------- 1 | title: //div[contains(concat(' ',normalize-space(@class),' '),' post-header ')]//h2 2 | body: //div[@id="post"]/div[2] 3 | test_url: https://www.mcorbin.fr/posts/2023-07-04-metriques/ -------------------------------------------------------------------------------- /mises.org.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: 'book-ad' 2 | strip_id_or_class: 'bigger pullquote' 3 | strip_id_or_class: 'subscribe' 4 | strip_id_or_class: 'blog-link' 5 | test_url: http://mises.org/daily/4804 -------------------------------------------------------------------------------- /motorcyclistonline.com.txt: -------------------------------------------------------------------------------- 1 | http_header(cookie): bonnier_consent=true 2 | 3 | strip_id_or_class: arcAdsBox 4 | 5 | author: //div[contains(concat(' ',normalize-space(@class),' '),' by_author ')] 6 | -------------------------------------------------------------------------------- /pymotw.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[starts-with(@id, 'module-')] 2 | 3 | test_url: https://pymotw.com/3/configparser/ 4 | test_url: https://pymotw.com/3/shlex/ 5 | test_url: https://pymotw.com/3/sys/ 6 | -------------------------------------------------------------------------------- /riffreporter.de.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | strip: //svg 3 | prune: no 4 | 5 | test_url: https://www.riffreporter.de/de/wissen/klimakrise-artensterben-biodiversitaet-droht-sechstes-massenaussterben 6 | -------------------------------------------------------------------------------- /rnd.de.txt: -------------------------------------------------------------------------------- 1 | body: //article/div[contains(concat(' ',normalize-space(@class),' '),' a__in ')] 2 | 3 | strip_id_or_class: a__rel-a-li 4 | strip_id_or_class: ord--0 5 | strip_id_or_class: a-hd__aut 6 | -------------------------------------------------------------------------------- /scottohara.me.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: "Scott O'Hara" 3 | date: //time[@itemprop="datePublished"]/@datetime 4 | 5 | test_url: https://www.scottohara.me/blog/2019/01/21/how-do-you-figure.html 6 | -------------------------------------------------------------------------------- /share.ez.no.txt: -------------------------------------------------------------------------------- 1 | author: //div[@class="entry-user"]//a 2 | body: //div[@class="attribute-long"] 3 | 4 | test_url: http://share.ez.no/blogs/jean-luc-chassaing/how-one-should-code-in-ez-platform 5 | -------------------------------------------------------------------------------- /sqlite.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='ff-body'] 2 | 3 | replace_string(

):

4 | 5 | prune: no 6 | 7 | test_url: http://www.sqlite.org/fileformat2.html -------------------------------------------------------------------------------- /thingiverse.com.txt: -------------------------------------------------------------------------------- 1 | tidy: yes 2 | autodetect_on_failure: yes 3 | prune: yes 4 | body: /html/head/title 5 | 6 | test_url: https://www.thingiverse.com/thing:3868321 7 | test_contains: Panther Origami -------------------------------------------------------------------------------- /varsity.co.uk.txt: -------------------------------------------------------------------------------- 1 | # FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser 2 | 3 | strip: //h2 4 | test_url: http://www.varsity.co.uk/reviews/2662 -------------------------------------------------------------------------------- /will-self.com.txt: -------------------------------------------------------------------------------- 1 | strip: //div[@class="widget-area"] 2 | title: //*[@class="entry-title"] 3 | date: //time[@class="entry-date"] 4 | test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ -------------------------------------------------------------------------------- /brandingstrategyinsider.com.txt: -------------------------------------------------------------------------------- 1 | date://h2[@class="date-header"] 2 | body://div[@class="entry-content"] 3 | test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html -------------------------------------------------------------------------------- /catb.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='article'] 2 | strip: //div[@class='revhistory'] 3 | strip: //div[@class='toc'] 4 | tidy: no 5 | prune: no 6 | 7 | test_url: http://catb.org/~esr/faqs/smart-questions.html -------------------------------------------------------------------------------- /forbiddenstories.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'complexe-content')] 2 | 3 | prune: no 4 | 5 | test_url: https://forbiddenstories.org/story-killers/gauri-lankesh-in-the-age-of-false-news/ 6 | -------------------------------------------------------------------------------- /hanselman.com.txt: -------------------------------------------------------------------------------- 1 | date: //span[@class="item-date"] 2 | body: //div[@class="item-content"] 3 | strip_comments: no 4 | test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx -------------------------------------------------------------------------------- /iphonetweak.fr.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='fond_titre']/h1[@class='post-title'] 2 | body: //div[@class="post-chapo"] 3 | 4 | test_url: http://iphonetweak.fr/2016/05/20/apple-watch-deja-jailbreakee 5 | -------------------------------------------------------------------------------- /m.theregister.co.uk.txt: -------------------------------------------------------------------------------- 1 | strip: //div[@class='wptl btm'] 2 | body: //div[@id='article']//h2 | //div[@id='body'] 3 | 4 | test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/ 5 | -------------------------------------------------------------------------------- /publications.parliament.uk.txt: -------------------------------------------------------------------------------- 1 | author: //meta[@name="Author"] 2 | date: //meta[@name="Date"] 3 | strip: //h5 4 | test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm -------------------------------------------------------------------------------- /robertsspaceindustries.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: 'sharedaddy' 2 | strip_id_or_class: 'respond' 3 | strip_id_or_class: 'meta' 4 | test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/ -------------------------------------------------------------------------------- /smarthomebeginner.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' inner-post-entry ')] 2 | 3 | test_url: https://www.smarthomebeginner.com/docker-home-media-server-2018-basic/ 4 | -------------------------------------------------------------------------------- /taxacc.jp.txt: -------------------------------------------------------------------------------- 1 | title: substring-before(//title, '/朝日税理士法人') 2 | 3 | strip: //h3[contains(., 'カテゴリー')] 4 | strip: //h3[contains(., '月別アーカイブ')] 5 | 6 | test_url: https://www.taxacc.jp/blog/202512/ 7 | -------------------------------------------------------------------------------- /theintercept.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="Headline"] 2 | body: //div[@class="PostContent"] 3 | 4 | test_url: https://theintercept.com/2014/10/30/inside-story-matt-taibbis-departure-first-look-media/ 5 | -------------------------------------------------------------------------------- /therumpus.net.txt: -------------------------------------------------------------------------------- 1 | title: /html/body/div/div[2]/div/div/h1 2 | 3 | body: /html/body/div/div[2]/div/div/div[2] 4 | test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes -------------------------------------------------------------------------------- /.stanford.edu.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='aueditable']/h1 2 | body: //div[@id='content'] 3 | strip: //div[@id='message' or @id='linklist'] 4 | prune: no 5 | test_url: http://plato.stanford.edu/entries/supervenience/ -------------------------------------------------------------------------------- /5by5.tv.txt: -------------------------------------------------------------------------------- 1 | body: //*[@id="episode"] 2 | prune: no 3 | tidy: no 4 | 5 | autodetect_next_page: no 6 | strip_id_or_class: player 7 | 8 | strip://*[@id="header"] 9 | test_url: http://5by5.tv/buildanalyze/60 -------------------------------------------------------------------------------- /batenka.ru.txt: -------------------------------------------------------------------------------- 1 | body: //section[@itemprop="articleBody"] 2 | 3 | strip: //section[contains(@class, 'hide-mobile')] 4 | 5 | prune: no 6 | 7 | test_url: https://batenka.ru/unity/sect/white-brotherhood/ 8 | -------------------------------------------------------------------------------- /bez.es.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='text_art'] 2 | strip: //div[@class='cab_datos_opinion'] 3 | strip: //div[@class='sumario2_left'] 4 | 5 | test_url: http://www.bez.es/382758623/otros-fracasos-empresas.html 6 | -------------------------------------------------------------------------------- /business-standard.com.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0 2 | 3 | test_url: https://www.business-standard.com/rss/opinion-105.rss 4 | -------------------------------------------------------------------------------- /derekseaman.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry-content'] 2 | 3 | prune: no 4 | tidy: no 5 | 6 | test_url: https://www.derekseaman.com/2019/09/how-to-pi-hole-plus-dnscrypt-setup-on-raspberry-pi-4.html 7 | -------------------------------------------------------------------------------- /houstonchronicle.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='subsection_wrap'] 2 | next_page_link: //ul[@class='pagination']//a[contains(text(), '»')] 3 | 4 | test_url: http://www.houstonchronicle.com/nasa/adrift/1/ 5 | -------------------------------------------------------------------------------- /hs.fi.txt: -------------------------------------------------------------------------------- 1 | prune: yes 2 | tidy: yes 3 | replace_string('), '>') 2 | body: //div[@class='NewsArticleContent'] 3 | test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html -------------------------------------------------------------------------------- /medialens.org.txt: -------------------------------------------------------------------------------- 1 | author: //meta[@property="og:site_name"]/@content 2 | prune: no 3 | test_url: https://www.medialens.org/2012/the-illusion-of-democracy/ 4 | test_contains: In an era of permanent war, economic meltdown 5 | -------------------------------------------------------------------------------- /moo.nac.uci.edu.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='header']//h1[1] 2 | 3 | body: //div[@id='content'] 4 | 5 | strip_id_or_class: toc 6 | 7 | prune: no 8 | 9 | test_url: http://moo.nac.uci.edu/~hjm/HOWTO_move_data.html 10 | -------------------------------------------------------------------------------- /pjmedia.com.txt: -------------------------------------------------------------------------------- 1 | find_string:display:none 2 | replace_string: .. 3 | 4 | single_page_link: //div[@class='single-page-button']//a 5 | 6 | test_url: https://pjmedia.com/eddriscoll/2016/08/31/tom-wolfe-kingdom-of-speech/ 7 | -------------------------------------------------------------------------------- /real.gr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'article-photo-wrapper')] 2 | prune: no 3 | 4 | test_url: http://www.real.gr/DefaultArthro.aspx?page=arthro&id=360962&catID=1 5 | test_contains: Επισήμως το αποψινό υπουργικό 6 | -------------------------------------------------------------------------------- /thepointmag.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='article'] 2 | strip_id_or_class: z-max 3 | strip_id_or_class: readLaterMenu 4 | 5 | test_url: https://thepointmag.com/2016/examined-life/can-liberal-education-save-the-sciences 6 | -------------------------------------------------------------------------------- /ux.artu.tv.txt: -------------------------------------------------------------------------------- 1 | author: ("Arturo Toledo") 2 | title: //div[@class="post"]/h2 3 | body: //div[@class="entry"] 4 | 5 | # Remove Twitter button 6 | strip: //div[@class="entry"]/p[2]/a/img 7 | test_url: http://ux.artu.tv/?p=192 -------------------------------------------------------------------------------- /wpbeginner.com.txt: -------------------------------------------------------------------------------- 1 | # Required to load the feed properly 2 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0 3 | 4 | test_url: https://www.wpbeginner.com/feed/ 5 | -------------------------------------------------------------------------------- /alexwlchan.net.txt: -------------------------------------------------------------------------------- 1 | body: //article[1] 2 | date: //time/@datetime 3 | 4 | strip: //hgroup 5 | strip: //picture/source 6 | 7 | prune: no 8 | 9 | test_url: https://alexwlchan.net/2025/personal-archive-of-the-web/ 10 | -------------------------------------------------------------------------------- /arduino-tutorial.de.txt: -------------------------------------------------------------------------------- 1 | // Stripping of Crayon Syntax Highlighter in duplicate 2 | strip: //textarea[contains(@class, 'crayon-plain')] 3 | 4 | test_url: https://www.arduino-tutorial.de/ein-altes-handy-als-arduino-input/ 5 | -------------------------------------------------------------------------------- /ascarter.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='article_title'] 2 | author: //span[@class='author'] 3 | date: //h2[@class='dateline'] 4 | body: //div[@class='article_body'] 5 | test_url: http://ascarter.net/2012/02/20/enough-is-enough.html -------------------------------------------------------------------------------- /buquad.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: //h2/a 3 | date: substring-after(//h2, '|') 4 | strip_id_or_class: 'attachment' 5 | strip: //h3 6 | 7 | body: //div[@class='entry'] 8 | test_url: http://buquad.com/2012/04/09/paul-ryan/ -------------------------------------------------------------------------------- /hazlitt.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='article-wrap'] 2 | title: //h1[@class='article-title'] 3 | author: //div[@class='article-footer']//div[@class='author-name']/a 4 | 5 | test_url: https://hazlitt.net/longreads/real-lolita 6 | -------------------------------------------------------------------------------- /health.com.txt: -------------------------------------------------------------------------------- 1 | http_header(cookie): euConsent=true; euConsentId=61c78ceb-c244-4016-b707-3c640ba09311 2 | 3 | test_url: https://www.health.com/news/cell-phone-elbow-new-ill-wired-age 4 | test_contains: As symptoms progress 5 | -------------------------------------------------------------------------------- /histoire-filante.fr.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="entry-title"] 2 | date: //header[@class="entry-header"]//time[@class="entry-date published"]/@datetime 3 | 4 | test_url: https://histoire-filante.fr/2019/09/05/notre-itineraire/ 5 | -------------------------------------------------------------------------------- /mesec.cz.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='urs'] | //div[@itemprop='articleBody'] 2 | strip_id_or_class: promo-in-article 3 | 4 | test_url: http://www.mesec.cz/aktuality/ceske-drahy-pridaji-zamestnancum-jejich-mzdy-vzrostou-o-1-7/ 5 | -------------------------------------------------------------------------------- /pandemicequityinitiative.com.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | strip: //svg 4 | 5 | test_url: https://www.pandemicequityinitiative.com/hold-misinformation-lies-betrayal 6 | test_contains: Here’s the truth: the pandemic is not over 7 | -------------------------------------------------------------------------------- /pandodaily.com.txt: -------------------------------------------------------------------------------- 1 | tidy: no 2 | body: //article 3 | date: //time/@datetime 4 | strip_id_or_class: sharedaddy 5 | test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ -------------------------------------------------------------------------------- /philstar.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@property="content:encoded"] 2 | 3 | test_url: http://www.philstar.com/headlines/2017/03/06/1678561/samuel-martires-named-new-sc-justice 4 | test_url: http://www.philstar.com/rss/breakingnews 5 | -------------------------------------------------------------------------------- /racjonalista.pl.txt: -------------------------------------------------------------------------------- 1 | author: /html/body/center/b 2 | date: /html/body/table/tr[2]/td/i 3 | single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1] 4 | 5 | test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa -------------------------------------------------------------------------------- /rom-game.fr.txt: -------------------------------------------------------------------------------- 1 | author: //div[@id="main"]//a[contains(@href, "/auteurs/")] 2 | date: //meta[@itemprop="datePublished"]/@content 3 | 4 | test_url: https://www.rom-game.fr/news/3371-Goldeneye+007+devient+Goldeneye+25.html 5 | -------------------------------------------------------------------------------- /singaporeanstocksinvestor.blogspot.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='post-body'] 2 | strip: //div[@id='lws_0'] 3 | prune: no 4 | 5 | test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html -------------------------------------------------------------------------------- /stadt-muenster.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='inhalts-spalte'] 2 | prune: no 3 | strip: //div[@class='footerlinks'] 4 | test_url: https://www.stadt-muenster.de/immobilien/wohnbaupotenzialflaechen/sozialgerechte-bodennutzung.html 5 | -------------------------------------------------------------------------------- /tbray.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='centercontent'] 2 | strip: //div[@id='rightcontent'] 3 | date: substring-before( //div[@id='cats'], '·') 4 | title: //h1 5 | test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money -------------------------------------------------------------------------------- /thesocialitefamily.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' strates-single ')] 2 | 3 | test_url: https://www.thesocialitefamily.com/familles/mulhouse/chez-anne-hubert-la-cerise-sur-le-gateau/ 4 | -------------------------------------------------------------------------------- /tokyo-np.co.jp.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="content-area"] 2 | 3 | strip_id_or_class: cmp-lst016 4 | strip_id_or_class: cmp-misc010 5 | strip_id_or_class: cmp-hdg005 6 | 7 | test_url: https://www.tokyo-np.co.jp/article/454831 8 | -------------------------------------------------------------------------------- /web.gekisaka.jp.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="geki_contentitem"] 2 | 3 | title: substring-before(//meta[@property='og:title']/@content , ' | ゲキサカ') 4 | 5 | test_url: https://web.gekisaka.jp/news/japan/detail/?441149-441149-fl= 6 | -------------------------------------------------------------------------------- /writerunboxed.com.txt: -------------------------------------------------------------------------------- 1 | http_header(User-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0 2 | 3 | test_url: https://writerunboxed.com/2021/09/23/writerslife-the-411-on-writing-retreats/ 4 | -------------------------------------------------------------------------------- /.fivefilters.org.txt: -------------------------------------------------------------------------------- 1 | # Pastepad specific 2 | title: //div[@id='ff-pastepad-content']//h1 3 | body: //div[@id='ff-pastepad-content'] 4 | prune: no 5 | # todo: add test file 6 | test_url: http://pastepad.fivefilters.org/test.html 7 | -------------------------------------------------------------------------------- /.robweychert.com.txt: -------------------------------------------------------------------------------- 1 | body: //article[@id='post'] 2 | title: //h1[@class='title'] 3 | prune: no 4 | strip_id_or_class: metadata 5 | strip: //footer 6 | 7 | test_url: https://v6.robweychert.com/blog/2023/02/field-day-nyc/ 8 | 9 | -------------------------------------------------------------------------------- /24a11y.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: substring-after(//div[contains(@class, "author-description")]//h2[@class="author-title"], 'About') 3 | 4 | test_url: https://www.24a11y.com/2018/web-components-still-need-to-be-accessible/ 5 | -------------------------------------------------------------------------------- /blog.native-instruments.com.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(@class, 'post-main')] 2 | 3 | prune: no 4 | 5 | test_url: https://blog.native-instruments.com/mario-kruselj-a-k-a-evildragon-talks-creativity-kontakt-scripting-and-more/ 6 | -------------------------------------------------------------------------------- /deia.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='widget full_article'] 2 | strip: //div[@class='Herramientas'] 3 | 4 | test_url: http://www.deia.com/2015/10/03/politica/euskadi/el-envilecimiento-de-la-politica-y-de-los-medios-de-comunicacion 5 | -------------------------------------------------------------------------------- /greaterwrong.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' post-body ')] 2 | 3 | prune: no 4 | 5 | test_url: https://www.greaterwrong.com/posts/BHqzGLNyQHjDXhEc8/is-stupidity-expanding-some-hypotheses 6 | -------------------------------------------------------------------------------- /halo.bungie.org.txt: -------------------------------------------------------------------------------- 1 | title:substring-before(id("maincontent")/table, 'Posted') 2 | body:id("maincontent")/p 3 | # eventually convert linebreaks better 4 | 5 | test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html -------------------------------------------------------------------------------- /jacobin.com.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(@class, 'po__article')] 2 | 3 | strip: //aside 4 | strip: //footer 5 | 6 | prune: no 7 | 8 | test_url: https://www.jacobin.com/2019/07/noam-chomsky-interview-climate-change-imperialism 9 | -------------------------------------------------------------------------------- /jandan.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content']//div[@class = 'post f'] 2 | strip_id_or_class: comment-big 3 | strip_id_or_class: avatar 4 | strip: //div[@class='time_s'] 5 | 6 | test_url: http://jandan.net/2011/04/03/iphone-5-sony.html -------------------------------------------------------------------------------- /leb.fbi.gov.txt: -------------------------------------------------------------------------------- 1 | body: //section[@id="main-content"] 2 | body: //div[@id="main-content-container"] 3 | 4 | prune: no 5 | 6 | test_url: https://leb.fbi.gov/articles/featured-articles/current-state-of-interview-and-interrogation 7 | -------------------------------------------------------------------------------- /microsiervos.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //article 3 | 4 | strip: //div[@class='compartir-post'] 5 | 6 | test_url: https://www.microsiervos.com/archivo/espacio/pequeno-vistazo-interior-edificio-ensamblado-vehiculos-chino.html 7 | -------------------------------------------------------------------------------- /proskauer.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'article-content__copy')] 2 | strip: //section[@id='cookie'] 3 | 4 | insert_detected_image: no 5 | 6 | test_url: https://www.proskauer.com/pub/launching-a-hedge-fund-in-a-crisis 7 | -------------------------------------------------------------------------------- /roughtype.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='content'] 2 | strip: //p[@class='postmeta']/following::* 3 | strip: //p[@class='postmeta'] 4 | strip: //p[@align='left'] 5 | test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php -------------------------------------------------------------------------------- /slog.thestranger.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: postCategory 2 | title: //h3[@class='postTitle'] 3 | body: //div[@class='postBody'] 4 | test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone -------------------------------------------------------------------------------- /utiliser-lightroom.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="titrearticle"] 2 | body: //div[@class="article"] 3 | 4 | test_url: http://www.utiliser-lightroom.com/blog/2019/04/23/skylum-flex-et-airmagic-pour-les-utilisateurs-de-lightroom/ 5 | -------------------------------------------------------------------------------- /www2.cnrs.fr.txt: -------------------------------------------------------------------------------- 1 | # Site configuration for CNRS press releases 2 | 3 | body: //div[@id="contenu"]//h2[@id="chapeau"] | //div[@id="contenu"]/div[@id="textContenu"] 4 | 5 | test_url: http://www2.cnrs.fr/presse/communique/5327.htm 6 | -------------------------------------------------------------------------------- /.repubblica.it.txt: -------------------------------------------------------------------------------- 1 | body: //div[@itemprop="articleBody"] 2 | 3 | prune: no 4 | 5 | test_url: https://rep.repubblica.it/pwa/longform/2020/05/27/news/rsa_morti_coronavirus_trivulzio-257664697/?ref=RHPPTP-BH-I257793309-C12-P7-S1.12-T1 6 | -------------------------------------------------------------------------------- /bjango.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='articlehead'] 2 | body: //div[@class='column'] 3 | strip: //h1 4 | strip: //div[@class='help'] 5 | 6 | #no author or date/time provided in current layout 7 | test_url: http://bjango.com/articles/actions/ -------------------------------------------------------------------------------- /blog.robertelder.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' blog-article ')] 2 | 3 | strip: //table[@class='leads-table'] 4 | 5 | test_url: https://blog.robertelder.org/detect-keyup-event-linux-terminal/ 6 | -------------------------------------------------------------------------------- /blog.sentry.io.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | author: //aside/a[contains(@href, '/authors/')]/span 3 | 4 | strip: //article/div[1] 5 | strip: //article/ul[1] 6 | strip: //aside 7 | 8 | test_url: https://blog.sentry.io/gdpr-sentry-and-you/ 9 | -------------------------------------------------------------------------------- /cars.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[contains(@class, 'basicInfo')]//h1 2 | 3 | body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox'] 4 | 5 | prune: no 6 | 7 | test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779 -------------------------------------------------------------------------------- /cohost.org.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' co-project-display-name ')] 3 | date: //time 4 | prune: no 5 | strip_id_or_class: co-comment-box 6 | strip_id_or_class: co-thread-footer 7 | -------------------------------------------------------------------------------- /devblogs.microsoft.com.txt: -------------------------------------------------------------------------------- 1 | title: //article//h1 2 | body: //article 3 | 4 | strip_id_or_class: entry-meta 5 | strip_id_or_class: post-card 6 | strip: //footer 7 | 8 | test_url: https://devblogs.microsoft.com/oldnewthing/?p=42203 9 | -------------------------------------------------------------------------------- /edge.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="block-system-main"] 2 | title: //div[@id="block-system-main"]//h1 3 | 4 | prune: no 5 | 6 | test_url: https://www.edge.org/responses/what-scientific-term-or%C2%A0concept-ought-to-be-more-widely-known 7 | -------------------------------------------------------------------------------- /gocomics.com.txt: -------------------------------------------------------------------------------- 1 | body: //a[@class="photo"]/img[@class="strip"] 2 | author: //meta[@name="author"]/@content 3 | date: //meta[@property="gocomics:publish_date"]/@content 4 | 5 | test_url: http://www.gocomics.com/garfield/2015/06/13 6 | -------------------------------------------------------------------------------- /gurusblog.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='entry-title'] 2 | body: //div[@class='content'] 3 | 4 | test_url: https://www.gurusblog.com/archives/las-subastas-de-joyas-de-christies-y-sothebys-rompen-todos-los-records-historicos/15/11/2018/ 5 | -------------------------------------------------------------------------------- /isource.com.txt: -------------------------------------------------------------------------------- 1 | # Remove social buttons 2 | strip: //div[@id='temp_Content_Right'] 3 | 4 | # Remove duplicate article title 5 | strip: //*[(@class='storytitle')] 6 | test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ -------------------------------------------------------------------------------- /maitre-eolas.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="post-content"] 2 | author: //meta[@name="author"]/@content 3 | date: //meta[@name="date"]/@content 4 | 5 | test_url: http://www.maitre-eolas.fr/post/2018/05/13/Pour-en-finir-avec-les-fiches-S 6 | -------------------------------------------------------------------------------- /manga-news.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class='actu-title'] 2 | 3 | body: //div[@class='actu-content'] 4 | 5 | prune: no 6 | 7 | test_url: http://www.manga-news.com/index.php/actus/2016/02/02/Deux-retours-dans-le-planning-des-editions-Pika 8 | -------------------------------------------------------------------------------- /nosalty.hu.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='tab-recept']//h1 2 | body: //div[@id='tab-recept']//div[contains(@class, 'column-container')] 3 | strip_id_or_class: ajanlo-box 4 | prune: no 5 | 6 | test_url: http://www.nosalty.hu/recept/szupergyors-fank -------------------------------------------------------------------------------- /protonmail.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="entry-title"] 2 | author: //header[@class="entry-header"]//a[@class="url fn n"] 3 | 4 | strip: //div[@class="csbtns"] 5 | 6 | test_url: https://protonmail.com/blog/biometric-authentication/ 7 | -------------------------------------------------------------------------------- /ruhlman.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='entry-title'] 2 | author: ///span[@class='author vcard'] 3 | date: //abbr[@class='published'] 4 | body: //div[@class='entry-content'] 5 | 6 | test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ -------------------------------------------------------------------------------- /shifteleven.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[ @class='entry-content' ] 2 | 3 | strip: //div[ contains(@class, 'sharing') ] 4 | 5 | date: //div[ @class='entry-meta' ]/a 6 | test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit -------------------------------------------------------------------------------- /timesofisrael.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: banner-placeholder 2 | strip_id_or_class: newsletter-article 3 | 4 | test_url: https://www.timesofisrael.com/in-foul-mouthed-tirade-trump-fumes-at-israel-for-striking-iran-violating-ceasefire/ 5 | -------------------------------------------------------------------------------- /wordpress.org.txt: -------------------------------------------------------------------------------- 1 | author: //div[@class="meta"]/a[1] 2 | 3 | strip: //div[@class="meta"] 4 | strip: //div[@id="likes-other-gravatars"] 5 | strip: //noscript 6 | 7 | test_url: https://wordpress.org/news/2018/07/update-on-gutenberg/ 8 | -------------------------------------------------------------------------------- /wz-newsline.de.txt: -------------------------------------------------------------------------------- 1 | title://h1 2 | 3 | date://p[@class='articleDate'] 4 | body://div[@class='articleBody wzStandardArticle'] 5 | test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 -------------------------------------------------------------------------------- /.allthingsd.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content-left']/div[@class='post'] 2 | strip_id_or_class: social 3 | strip_id_or_class: atd-disqus-disclaimer 4 | tidy: no 5 | 6 | test_url: http://mediamemo.allthingsd.com/20110516/bit-ly-gets-a-new-boss/ -------------------------------------------------------------------------------- /37signals.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='post_header']//h2/a 2 | author: //span[@class='author'] 3 | date: //span[@class='date'] 4 | body: //div[@id='Content'] 5 | 6 | test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department -------------------------------------------------------------------------------- /7newsbelize.com.txt: -------------------------------------------------------------------------------- 1 | title: //*[@id='sstitle'] 2 | body: //div[@id='sstory'] 3 | strip_id_or_class: newsoptions 4 | prune: no 5 | 6 | test_url: http://www.7newsbelize.com/sstory.php?nid=25654 7 | test_url: http://www.7newsbelize.com/7news.xml -------------------------------------------------------------------------------- /agirpourlatransition.ademe.fr.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | 3 | strip: //div[@class="o-block-projet-content-avis"] 4 | 5 | test_url: https://agirpourlatransition.ademe.fr/particuliers/maison/sante/pourquoi-conseille-daerer-logement-tous-jours 6 | -------------------------------------------------------------------------------- /bbva.es.txt: -------------------------------------------------------------------------------- 1 | body: (//main//article)[1] 2 | 3 | strip_id_or_class: breadcrumb 4 | strip: //div[@data-component="index"] 5 | 6 | prune: no 7 | 8 | test_url: https://www.bbva.es/finanzas-vistazo/aprendemos-juntos/juan-antonio-madrid.html 9 | -------------------------------------------------------------------------------- /bdaily.co.uk.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' Artivent__content ')] 2 | 3 | prune: no 4 | 5 | test_url: https://bdaily.co.uk/articles/2022/07/06/bupa-launches-new-paralympicsgb-partnership-at-salford-hq 6 | -------------------------------------------------------------------------------- /blog.chriszacharias.com.txt: -------------------------------------------------------------------------------- 1 | author: //h2[@id="nav_title"]//a 2 | date: //time[@class="article_time"]/@datetime 3 | 4 | strip: //figure[contains(@class, "kudo")] 5 | 6 | test_url: http://blog.chriszacharias.com/a-conspiracy-to-kill-ie6 7 | -------------------------------------------------------------------------------- /blog.twitter.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="component-wrapper"] 2 | 3 | strip_id_or_class: tweet-error-text 4 | 5 | test_url: https://blog.twitter.com/developer/en_us/topics/tools/2018/new-developer-requirements-to-protect-our-platform.html 6 | -------------------------------------------------------------------------------- /blog.wells.ee.txt: -------------------------------------------------------------------------------- 1 | title: //h2/a[@class="no-link title"] 2 | author: //h2[@id="blog_owner"] 3 | date: //time 4 | strip: //h2/a[@class="no-link title"] 5 | test_url: http://blog.wells.ee/retina 6 | test_url: http://blog.wells.ee/skeuomorphism -------------------------------------------------------------------------------- /borderhouseblog.com.txt: -------------------------------------------------------------------------------- 1 | title://h1 2 | author://div[@class="meta"]/span/a 3 | date://div[@class="date"] 4 | body://div[@class="content article"] 5 | strip://div[@class="content article"]/h1 6 | 7 | test_url: http://borderhouseblog.com/?p=7832 -------------------------------------------------------------------------------- /business2community.com.txt: -------------------------------------------------------------------------------- 1 | date: substring-after(//p[@class='byline'],'Published') 2 | 3 | strip: //div[@class='article-meta'] 4 | 5 | test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340 6 | -------------------------------------------------------------------------------- /cert-bund.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='announcement-top information']|//div[@class='announcement revisionhistory']|//div[@class='announcement description'] 2 | 3 | test_url: https://www.cert-bund.de/advisoryshort/CB-K18-1118%20UPDATE%205 4 | -------------------------------------------------------------------------------- /ciaosamin.com.txt: -------------------------------------------------------------------------------- 1 | body://div[contains(@class, 'entry-content')] 2 | date://h2[contains(@class, 'date-header')] 3 | title://h3[contains(@class, 'post-title')] 4 | test_url: http://ciaosamin.com/ciao/2015/12/28/recipe-million-dollar-caramels 5 | -------------------------------------------------------------------------------- /communities-dominate.blogs.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="entry-body"] 2 | test_url: http://communities-dominate.blogs.com/brands/2012/03/brutal-truth-about-lumia-cannot-sustain-even-1-to-1-replacement-of-symbian-windows-phone-strategy-do.html -------------------------------------------------------------------------------- /eckerd.edu.txt: -------------------------------------------------------------------------------- 1 | title: //h1[contains(@class, 'entry-title')] 2 | body: //div[contains(@class, 'main-content')] 3 | date: //span[contains(@class, 'meta-date')] 4 | test_url: https://www.eckerd.edu/news/blog/eckerd-college-data-science-minor/ 5 | -------------------------------------------------------------------------------- /hespress.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] 2 | 3 | prune: no 4 | tidy: no 5 | 6 | test_url: http://hespress.com/videos/73684.html 7 | test_url: http://hespress.com/permalink/73678.html -------------------------------------------------------------------------------- /hometheaterreview.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='entry-body'] 2 | strip_id_or_class: paginate 3 | strip: //p[contains(., 'Additional Resources')] 4 | test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ -------------------------------------------------------------------------------- /literaryreview.co.uk.txt: -------------------------------------------------------------------------------- 1 | date: //meta[@itemprop="datePublished"]/@content 2 | title: //meta[@property="og:title"]/@content 3 | author: (//h1[@itemprop="author"])[1] 4 | 5 | test_url: https://literaryreview.co.uk/the-woolworths-poltergeist 6 | -------------------------------------------------------------------------------- /maxim.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'field-name-body') or contains(@class, 'featured-image')] 2 | 3 | test_url: http://www.maxim.com/rss-feeds.xml 4 | test_url: http://www.maxim.com/entertainment/article/second-city-chicago-goes-flames -------------------------------------------------------------------------------- /palmbeachpost.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' story-text ')] 2 | 3 | test_url: http://www.palmbeachpost.com/news/crime--law/new-pbso-releases-sketch-person-interest-shooting/IcToH2Mij0hAw3EIFnw5tL/ 4 | -------------------------------------------------------------------------------- /phys.org.txt: -------------------------------------------------------------------------------- 1 | http_header(User-Agent): Mastodon/4.3.2 (http.rb/5.2.0; +https://mastodon.social/) Bot 2 | 3 | test_url: https://phys.org/news/2025-03-dark-universe-telescope.html 4 | test_url: https://phys.org/rss-feed/breaking/space-news/ 5 | -------------------------------------------------------------------------------- /propakistani.pk.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' content-post ')] 2 | test_url: https://propakistani.pk/2017/06/23/eid-mubarak-happy-holidays-everyone/ 3 | test_contains: away from work for Eid Holidays 4 | -------------------------------------------------------------------------------- /retractionwatch.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 3 | test_url: https://retractionwatch.com/2018/07/30/how-institutions-gaslight-whistleblowers-and-what-can-be-done/ 4 | -------------------------------------------------------------------------------- /robots.thoughtbot.com.txt: -------------------------------------------------------------------------------- 1 | body: //section[@class='post text'] 2 | title: //h1[@class='title'] 3 | date: //p[@class='post-date'] 4 | strip: //section[@class='meta-info'] 5 | test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test -------------------------------------------------------------------------------- /spiderum.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' editor ')] 2 | 3 | prune: no 4 | 5 | test_url: https://spiderum.com/bai-dang/Dung-luot-mang-xa-hoi-trong-thoi-gian-ranh-nua-Hay-lam-5-dieu-nay-PvXeTJ9tAR3D 6 | -------------------------------------------------------------------------------- /techpinions.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="post"] 2 | 3 | strip: //div[@class="post-meta"] 4 | strip: //div[@id="socialicons"] 5 | strip: //div[@id="authorbox"] 6 | 7 | test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 -------------------------------------------------------------------------------- /thewirecutter.com.txt: -------------------------------------------------------------------------------- 1 | body: //main/div/article 2 | 3 | strip: //header 4 | strip: //aside 5 | 6 | author: //p[@data-scp="author_name"] 7 | date: //time/@datetime 8 | 9 | test_url: https://thewirecutter.com/money/credit-cards/ 10 | -------------------------------------------------------------------------------- /tuaw.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='posttitle'] 2 | author: //span[@class='author']/a 3 | date: //span[@class='timestamp'] 4 | body: //div[@class='body'] 5 | 6 | test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ -------------------------------------------------------------------------------- /wpmayor.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='nrelate_flyout_placeholder'] 2 | 3 | strip_id_or_class: share 4 | 5 | prune: no 6 | 7 | test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/ 8 | test_url: http://www.wpmayor.com/feed/ -------------------------------------------------------------------------------- /alsacreations.com.txt: -------------------------------------------------------------------------------- 1 | author: //div[@class="auteur-meta"]//a[contains(@href, '/profil/')] 2 | date: //div[@class="auteur-meta"]//time/@datetime 3 | 4 | test_url: https://www.alsacreations.com/tuto/lire/1771-css-grid-layout-en-production.html 5 | -------------------------------------------------------------------------------- /cjr.org.txt: -------------------------------------------------------------------------------- 1 | body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] 2 | prune: no 3 | 4 | single_page_link: //li[@class='print']/a 5 | 6 | test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php -------------------------------------------------------------------------------- /filamentgroup.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id="content"]//h1 2 | author: //p[@class="article_author"]//a[contains(@href, "/about")] 3 | 4 | strip: //p[@class="articles_viewall"] 5 | 6 | test_url: https://www.filamentgroup.com/lab/select-css.html 7 | -------------------------------------------------------------------------------- /fortelabs.co.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' postContent ')] 2 | 3 | author: //li[@itemprop='author'] 4 | 5 | prune: no 6 | 7 | test_url: https://fortelabs.co/blog/the-ultimate-guide-to-summarizing-books/ 8 | -------------------------------------------------------------------------------- /geeksofdoom.com.txt: -------------------------------------------------------------------------------- 1 | author: substring-after(//span[@class='storyauthor'],'Posted by') 2 | date: //span[@class='storydate'] 3 | test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ -------------------------------------------------------------------------------- /higcapital.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2[contains(@class, 'news-release-title')] 2 | body: //article 3 | strip: //div[contains(@class, 'intro')] 4 | strip: //div[contains(@class, 'contact')] 5 | 6 | test_url: https://higcapital.com/news/release/1251 7 | -------------------------------------------------------------------------------- /humantransit.org.txt: -------------------------------------------------------------------------------- 1 | title: //h3[@class="entry-header"] 2 | date: //h2[@class="date-header"] 3 | body: //div[contains(@class, 'entry')] 4 | 5 | test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html -------------------------------------------------------------------------------- /mforum.cari.com.my.txt: -------------------------------------------------------------------------------- 1 | body: (//td[starts-with(@id, 'postmessage_')])[1] 2 | 3 | prune: no 4 | 5 | test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033 6 | test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0 -------------------------------------------------------------------------------- /mikeash.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class="blogtitle"] 2 | strip: //div[@class="blogtitle"] 3 | 4 | author: substring-after(//span[@class="blogheader"], 'Author: ') 5 | test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html -------------------------------------------------------------------------------- /neunetz.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'entry-content')] 2 | prune: no 3 | 4 | strip_id_or_class: sharedaddy 5 | 6 | test_url: https://neunetz.com/2019/09/13/apple-tv-quasi-geschenkt/ 7 | test_contains: Apple muss hier noch aufholen 8 | -------------------------------------------------------------------------------- /seattletransitblog.com.txt: -------------------------------------------------------------------------------- 1 | title: //h3[@class="storytitle"] 2 | date: //div[@class='meta'] 3 | body: //div[@class='storycontent'] 4 | 5 | test_url: http://seattletransitblog.com/2012/06/19/times-st-louis-interested-in-buying-waterfront-streetcars/ -------------------------------------------------------------------------------- /spectrejournal.com.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | prune: no 4 | 5 | strip_id_or_class: elementor-widget-share-buttons 6 | 7 | test_url: https://spectrejournal.com/roundtable-on-china/ 8 | test_contains: I know you write in pretty mainstream media 9 | -------------------------------------------------------------------------------- /squashed.tumblr.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='content'] 2 | date: substring-before( //div[@class='unit dateAndNotes'], 'with') 3 | title: //h3 4 | test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending -------------------------------------------------------------------------------- /tijd.be.txt: -------------------------------------------------------------------------------- 1 | strip: //div[@class="ac_paragraph-hidden"] 2 | 3 | test_url: https://www.tijd.be/politiek-economie/internationaal/algemeen/ik-ben-vooral-bezorgd-dat-corona-uitbreekt-in-afrika/10204834.html 4 | test_contains: U klinkt gealarmeerder 5 | -------------------------------------------------------------------------------- /vk.com.txt: -------------------------------------------------------------------------------- 1 | # use this to avoid meta redirect to badbrowser.php 2 | # user agent will cause a redirect to m.vk.com (mobile version) instead 3 | http_header(user-agent): PHP/7.2 4 | 5 | test_url: https://vk.com/noamchomskyvk?w=wall433994637_327 6 | -------------------------------------------------------------------------------- /voltairenet.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1[contains(concat(' ',normalize-space(@class),' '),' titre_serif_1 ')] 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' texte_sans ')] 3 | 4 | test_url: http://www.voltairenet.org/article195149.html -------------------------------------------------------------------------------- /vozpopuli.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@class='mce-body'] 3 | 4 | strip: //aside 5 | 6 | test_url: https://www.vozpopuli.com/opinion/pandemia-racismo-iglesias_0_1365163963.html 7 | test_contains: Ande yo caliente y muérase la gente 8 | -------------------------------------------------------------------------------- /wochenanzeiger.de.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: kleinertext 2 | strip: //h3[text() = 'Weiterlesen'] 3 | strip: //div[starts-with(@class, 'box') and contains(@class, '_simple')] 4 | 5 | test_url: https://www.wochenanzeiger.de/article/238700.html 6 | 7 | -------------------------------------------------------------------------------- /adslzone.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='entry-title'] 2 | body: //article 3 | 4 | strip: //div[@class='io-word-count'] 5 | strip: //aside 6 | strip_id_or_class: breadcrumb 7 | 8 | test_url: https://www.adslzone.net/2017/12/19/xiaomi-sneaky-santa-17/ 9 | -------------------------------------------------------------------------------- /basicthinking.de.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | date: //span[@class='date'] 3 | body: //div[@class='entry'] 4 | 5 | strip: //div[@class='zusatz'] 6 | 7 | test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ -------------------------------------------------------------------------------- /blog.eleven-labs.com.txt: -------------------------------------------------------------------------------- 1 | body://div[@class='post_inner_wrapper'] 2 | date://div[@class='sub_page_caption'] 3 | strip://div[contains(concat(' ',normalize-space(@class),' '),' post_img ')] 4 | test_url: http://blog.eleven-labs.com/en/cqrs-pattern-2/ 5 | -------------------------------------------------------------------------------- /diagonalperiodico.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='field__items'] 2 | title: //div[@class='art_titulo'] 3 | 4 | test_url: https://www.diagonalperiodico.net/global/27947-cuanto-mas-tiempo-nos-aferremos-este-sistema-peor-y-menores-seran-nuestras-opciones 5 | -------------------------------------------------------------------------------- /ecranlarge.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' content-description ')] 2 | 3 | test_url: https://www.ecranlarge.com/films/news/997080-star-wars-les-derniers-jedi-de-nouvelles-photos-de-rey-et-luke-en-mode-badass 4 | -------------------------------------------------------------------------------- /engineering.tumblr.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | body: //div[@class="post_content"] 3 | author: //p[@class="author"]/a 4 | date: //p[@class="date"] 5 | strip: //h2 6 | strip: //header 7 | test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose -------------------------------------------------------------------------------- /inhabitat.com.txt: -------------------------------------------------------------------------------- 1 | # set body 2 | body: //div[@class='post-listing'] 3 | 4 | # remove clutter 5 | strip: //a/big 6 | strip: //a/em 7 | strip: //p/em 8 | test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ -------------------------------------------------------------------------------- /muycomputerpro.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='entry-title'] 2 | body: //div[@id='mvp-content-main'] 3 | 4 | strip: //div[@class='recomienda-wrap'] 5 | 6 | test_url: https://www.muycomputerpro.com/2017/09/20/fundacion-universidad-empresa-hyperloop-one 7 | -------------------------------------------------------------------------------- /mysqlblog.fivefarmers.com.txt: -------------------------------------------------------------------------------- 1 | http_header(User-Agent): Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 2 | 3 | test_url: http://mysqlblog.fivefarmers.com/2012/08/16/understanding-mysql_config_editors-security-aspects/ 4 | 5 | -------------------------------------------------------------------------------- /newcriterion.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-text-column ')] 2 | title: //meta[@property="og:title"]/@content 3 | 4 | prune: no 5 | 6 | test_url: https://newcriterion.com/issues/2020/12/acheiropoieta 7 | -------------------------------------------------------------------------------- /papodehomem.com.br.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="page_title"] 2 | body: //div[@class="entry arquivo"] 3 | author: //span[@class="author"] 4 | footnotes: yes 5 | prune: yes 6 | test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/ -------------------------------------------------------------------------------- /rasgolatente.es.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='entry-title'] 2 | author: //div[@class='entry-author'] 3 | date: //div[@class='entry-time'] 4 | body: //div[@class='entry-content'] 5 | 6 | test_url: http://rasgolatente.es/estupidez-psicologia-estupidos/ 7 | -------------------------------------------------------------------------------- /resilience.org.txt: -------------------------------------------------------------------------------- 1 | # Article's main content 2 | body: //section[@class="post-content"] 3 | 4 | # A test URL, e.g. the article you used to write the file 5 | test_url: https://www.resilience.org/stories/2021-10-14/rural-resilience-all-in-good-time/ 6 | -------------------------------------------------------------------------------- /sayidaty.net.txt: -------------------------------------------------------------------------------- 1 | date: //meta[@property='article:published_time']/@content 2 | body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')] 3 | 4 | test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed -------------------------------------------------------------------------------- /sdxcentral.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry-content'] 2 | author: //div[@class='entry-byline']//a[@class='entry-author'] 3 | strip: //aside 4 | 5 | test_url: https://www.sdxcentral.com/articles/news/pandemic-muddles-sd-wan-supply-chain/2020/05/ 6 | -------------------------------------------------------------------------------- /toolinux.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2[contains(@class,'news')] 2 | body: //div[contains(@class,'articleContent')] 3 | date: substring-after(//div[@class = 'SupaDate']/text(), 'le') 4 | 5 | test_url: http://www.toolinux.com/Wi-Fi-Linksys-WRT-la-legende-de 6 | -------------------------------------------------------------------------------- /blog.landr.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[contains(concat(' ',normalize-space(@class),' '),' landr_single_header_title ')] 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 3 | test_url: https://blog.landr.com/how-to-eq-vocals/ 4 | -------------------------------------------------------------------------------- /blog.pinboard.in.txt: -------------------------------------------------------------------------------- 1 | title: //a[@class="blog_title"] 2 | date: //p[@class="when"]/a 3 | body: //div[@class="blog_entry"] 4 | strip_id_or_class:blog_title 5 | strip_id_or_class:when 6 | test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ -------------------------------------------------------------------------------- /boundlessline.org.txt: -------------------------------------------------------------------------------- 1 | title: substring-before(//title, '|') 2 | body: //div[@class="entry"] 3 | # Remove the author's picture 4 | strip: //div[@class="entry"]/a[1] 5 | test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html -------------------------------------------------------------------------------- /devlinsangle.blogspot.co.at.txt: -------------------------------------------------------------------------------- 1 | date: //h2[@class='date-header'] 2 | body: //div[@class='post hentry'] 3 | title: //h3 4 | strip: //div[@class='post-footer'] 5 | 6 | test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html -------------------------------------------------------------------------------- /eff.org.txt: -------------------------------------------------------------------------------- 1 | author: //meta[@name="author"]/@content 2 | 3 | wrap_in(blockquote): //p[contains(concat(' ',normalize-space(@class),' '),' pull-quote ')] 4 | 5 | test_url: https://www.eff.org/deeplinks/2018/02/john-perry-barlow-internet-pioneer-1947-2018 6 | -------------------------------------------------------------------------------- /gurumed.org.txt: -------------------------------------------------------------------------------- 1 | prune: no 2 | body: //div[@class='entry'] 3 | strip: //div[@class='addthis_toolbox'] 4 | strip: //div[@class='yarpp-related'] 5 | 6 | test_url: http://www.gurumed.org/2015/06/22/nous-entrons-dsormais-dans-la-sixime-extinction-massive/ 7 | -------------------------------------------------------------------------------- /hvg.hu.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='pg-content']//h1 2 | body: //div[@id='articleBody0'] 3 | replace_string():

4 | 5 | single_page_link: //div[@class="up-header"]/a 6 | 7 | prune: no 8 | 9 | test_url: http://hvg.hu/w/20111125_sparta -------------------------------------------------------------------------------- /ilyabirman.ru.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='published visible e2-smart-title']//span 2 | author: //span[@id='e2-blog-title'] 3 | date: //p[@class='super-h'] 4 | body: //div[@class='text published visible'] 5 | test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ -------------------------------------------------------------------------------- /interconnected.org.txt: -------------------------------------------------------------------------------- 1 | body: //article[1] 2 | date: //meta[@property="og:updated_time"]/@content 3 | 4 | strip: //h2[1] 5 | strip: //h5[1] 6 | 7 | prune: no 8 | tidy: no 9 | 10 | test_url: https://interconnected.org/home/2020/09/10/streak 11 | -------------------------------------------------------------------------------- /jp.motorsport.com.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: relatedContent 2 | strip_id_or_class: text-body-lg 3 | strip: //p[contains(@class, "mt-1")] 4 | 5 | test_url: https://jp.motorsport.com/super-formula/news/sf-r12-kobayashi-on-2026-season/10779252/#goog_rewarded 6 | -------------------------------------------------------------------------------- /lehollandaisvolant.net.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="blog-post-title"] 2 | date: //time[contains(@itemprop, "datePublished")]/@datetime 3 | 4 | test_url: https://lehollandaisvolant.net/?d=2019/12/07/11/41/38-non-respirer-nemet-pas-plus-de-co2-dans-latmosphere 5 | -------------------------------------------------------------------------------- /maritimedanmark.dk.txt: -------------------------------------------------------------------------------- 1 | body: //main[contains(concat(' ',normalize-space(@class),' '),' relative ')] 2 | 3 | strip: //div[contains(@class, 'js-banner-promoted')] 4 | 5 | test_url: https://www.maritimedanmark.dk/nkts-nye-kabelskib-kan-operere-pa-metanol 6 | -------------------------------------------------------------------------------- /novinky.cz.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'tpl-king-top-content')] 2 | 3 | prune: no 4 | strip_id_or_class: ogm-advert 5 | 6 | test_url: https://www.novinky.cz/koronavirus/clanek/zlom-v-boji-s-koronavirem-svycari-chteji-ockovat-uz-na-podzim-40321864 7 | -------------------------------------------------------------------------------- /rachelandrew.co.uk.txt: -------------------------------------------------------------------------------- 1 | date: //time/@datetime 2 | 3 | strip: //aside[@class="article-meta"] 4 | strip: //div[@class="keep-in-touch"] 5 | 6 | test_url: https://www.rachelandrew.co.uk/archives/2019/01/30/html-css-and-our-vanishing-industry-entry-points/ 7 | -------------------------------------------------------------------------------- /rezeptwelt.de.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='step-content'] | //div[@class='global-active ingredients-box'] 2 | title: //div[@class='step-1-container'] 3 | 4 | tidy: no 5 | test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372 -------------------------------------------------------------------------------- /stefanjudis.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@class="c-markdown"] 3 | date: (//dl[@class="c-pageDetails"]//time[@class="c-prettyDate"]/@datetime)[1] 4 | 5 | test_url: https://www.stefanjudis.com/today-i-learned/css-content-accepts-alternative-text/ 6 | -------------------------------------------------------------------------------- /techcommunity.microsoft.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' lia-message-body-content ')] 2 | 3 | test_url: https://techcommunity.microsoft.com/t5/Azure-Active-Directory-Identity/Your-Pa-word-doesn-t-matter/ba-p/731984 4 | -------------------------------------------------------------------------------- /visualcapitalist.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@id='mvp-content-main'] | //article//header 3 | 4 | strip_id_or_class: mvp-post-cat 5 | 6 | test_url: https://www.visualcapitalist.com/from-greek-to-latin-visualizing-the-evolution-of-the-alphabet/ 7 | -------------------------------------------------------------------------------- /.ietf.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content'] 2 | date: //time/@datetime 3 | 4 | insert_detected_image: no 5 | 6 | strip_id_or_class: pilcrow 7 | 8 | prune: no 9 | 10 | test_url: https://datatracker.ietf.org/doc/html/draft-ietf-oauth-sd-jwt-vc-01 11 | -------------------------------------------------------------------------------- /.philhist.unibas.ch.txt: -------------------------------------------------------------------------------- 1 | body: (//div[contains(concat(" ",normalize-space(@class)," ")," content-unibas ")])[1] 2 | 3 | prune: no 4 | 5 | test_url: https://dg.philhist.unibas.ch/de/studium/werkzeugkasten-geschichte/wissenschaftliches-arbeiten/quellenanalyse/ 6 | -------------------------------------------------------------------------------- /24.ae.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='DivTitle'] 2 | body: //div[@id='divImages' or @id='Divkhabarcontent'] 3 | author: //div[@id='DivAuthor'] 4 | 5 | prune: no 6 | 7 | test_url: http://24.ae/article.aspx?ArticleId=123304 8 | test_url: http://24.ae/rss.aspx?pageId=30 9 | -------------------------------------------------------------------------------- /acroswing.fr.txt: -------------------------------------------------------------------------------- 1 | tidy:no 2 | date: //time[@class='updated'] 3 | dissolve: //ul[@class='video-gallery']/li 4 | dissolve: //ul[@class='video-gallery'] 5 | test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php -------------------------------------------------------------------------------- /angrymetalguy.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class,'entry_content')] 2 | 3 | author: //a[@rel="author"] 4 | 5 | date: //time[contains(@class, 'published')] 6 | 7 | test_url: https://www.angrymetalguy.com/mental-health-awareness-interview-with-aln-of-mizmor/ 8 | -------------------------------------------------------------------------------- /cleafy.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'full blog')] 2 | title: //h1[@class='clabtitle'] 3 | 4 | strip_id_or_class: author-block 5 | 6 | test_url: https://www.cleafy.com/cleafy-labs/on-device-fraud-on-the-rise-exposing-a-recent-copybara-fraud-campaign 7 | -------------------------------------------------------------------------------- /donnahay.com.au.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' recipe-panel ')] 2 | title: //div[(contains(concat(' ',normalize-space(@class),' '),' recipe-title__mobile '))] 3 | strip_id_or_class: modal 4 | strip_id_or_class: hide-print 5 | -------------------------------------------------------------------------------- /gawker.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="post-body"] 2 | 3 | # Remove 'content is restricted' 4 | strip: //div[@id='agegate_IDHERE'] 5 | 6 | http_header(user-agent): PHP/5.3 7 | 8 | test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy -------------------------------------------------------------------------------- /glazman.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="post-title"] 2 | date: //meta[@name="date"]/@content 3 | author: //meta[@name="dc.publisher"]/@content 4 | 5 | test_url: http://www.glazman.org/weblog/dotclear/index.php?post/2019/01/23/WebExtensions-v3-considered-harmful 6 | -------------------------------------------------------------------------------- /gnppn.fr.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="entry-title"] 2 | author: //p[@class="site-title"] 3 | date: //time[@class="entry-date published"]/@datetime 4 | 5 | test_url: https://gnppn.fr/2018/10/21/quotidiens-nationaux-et-abonnement-en-ligne-prime-a-la-penibilite/ 6 | -------------------------------------------------------------------------------- /hiphopleeft.nl.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class = 'pd'] 2 | strip: //div[@id = 'overzicht-albumrecensies'] 3 | strip: //div[@id = 'jc'] 4 | test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 -------------------------------------------------------------------------------- /icannabis.tumblr.com.txt: -------------------------------------------------------------------------------- 1 | tidy:no 2 | prune:no 3 | 4 | body://div[contains(@id,'content')] 5 | 6 | strip_id_or_class:meta 7 | strip_id_or_class:notes 8 | strip_id_or_class:pagination 9 | test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000 -------------------------------------------------------------------------------- /labs.ripe.net.txt: -------------------------------------------------------------------------------- 1 | body: //article[@class="prose"] 2 | 3 | test_url: https://labs.ripe.net/author/kathleen_moriarty/the-llm-misinformation-problem-i-was-not-expecting/ 4 | test_contains: The problems around vetting AI results won't be going away anytime soon. 5 | -------------------------------------------------------------------------------- /nojesguiden.se.txt: -------------------------------------------------------------------------------- 1 | author: //span[@class='meta']/span[@class='username'] 2 | body: //div[@class='article-content'] 3 | 4 | strip_id_or_class: 'article-actions' 5 | test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i -------------------------------------------------------------------------------- /openai.com.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | strip: //nav 3 | strip: //*[@aria-hidden="true"] 4 | 5 | # strip additional header-line in text 6 | strip: //main/text() 7 | 8 | prune: no 9 | 10 | test_url: https://openai.com/index/learning-to-reason-with-llms/ 11 | -------------------------------------------------------------------------------- /photopills.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | body: //article[contains(@class, 'post')] 3 | author: //p[@class="author"]//a 4 | 5 | strip: //p[@class="tags"] 6 | strip: //div[@class="freebie"] 7 | 8 | test_url: https://www.photopills.com/articles/depth-of-field-guide 9 | -------------------------------------------------------------------------------- /researchandmarkets.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' order-description-container ')] 2 | 3 | prune: no 4 | 5 | test_url: https://www.researchandmarkets.com/reports/5264361/retail-point-of-sale-market-by-product-fixed 6 | -------------------------------------------------------------------------------- /rpgsite.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='news-text'] 2 | prune: no 3 | test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy 4 | test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork -------------------------------------------------------------------------------- /timeshighereducation.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class="col-md-12 radix-layouts-contentheader panel-panel"] 2 | strip: //div[@class="htmlContent subscribe_box"] 3 | test_url: https://www.timeshighereducation.com/blog/jeremy-corbyn-serious-about-free-higher-education 4 | -------------------------------------------------------------------------------- /useit.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | 3 | date: substring-after(//p[@class='overline']/strong, ',') 4 | body: //div[@class="maintext"] 5 | strip: //p[@class='overline'] 6 | strip: //h1 7 | tidy: no 8 | test_url: http://www.useit.com/alertbox/mobile-startup-screen.html -------------------------------------------------------------------------------- /vitispr.com.txt: -------------------------------------------------------------------------------- 1 | strip: //*[(@id = "ja-search")] 2 | body: //*[(@id = "ja-mainbody")] 3 | body: //*[(@id = "content-mass-bottom")] 4 | strip://h3[contains(span,'Related Posts')] 5 | strip://img 6 | test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot -------------------------------------------------------------------------------- /yostivanich.com.txt: -------------------------------------------------------------------------------- 1 | title://div[@class='entry-title'] 2 | body://div[@class='entry-content'] 3 | strip_comments:yes 4 | convert_double_br_tags:yes 5 | test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ -------------------------------------------------------------------------------- /accaglobal.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'article-container')] 2 | 3 | prune: no 4 | 5 | test_url: https://www.accaglobal.com/gb/en/student/exam-support-resources/professional-exams-study-resources/p5/technical-articles/economic-value-added-part1.html 6 | -------------------------------------------------------------------------------- /audiobookshelf.org.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | title: //article/h1[1] 3 | 4 | strip: //div[@class='nuxt-content']/hr[1] 5 | strip: //a[@aria-hidden="true"] 6 | strip: //h1/a 7 | 8 | prune: no 9 | 10 | test_url: https://www.audiobookshelf.org/guides/users/ 11 | -------------------------------------------------------------------------------- /ebay.com.txt: -------------------------------------------------------------------------------- 1 | body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] 2 | 3 | strip_image_src: imgLoading_30x30.gif 4 | 5 | test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 -------------------------------------------------------------------------------- /grafikart.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' formatted ')] 2 | date: //div[contains(concat(' ',normalize-space(@class),' '),' tutoriel_date ')] 3 | 4 | test_url: https://www.grafikart.fr/tutoriels/json-web-token-presentation-958 5 | -------------------------------------------------------------------------------- /inessential.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='weblogPost']/h3[1] 2 | author: ("Brent Simmons") 3 | date: //span[@class="weblogPostDisplayDate"] 4 | body: //div[@class='weblogPostBody'] 5 | test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo -------------------------------------------------------------------------------- /mintpressnews.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 2 | 3 | test_url: https://www.mintpressnews.com/mega-group-maxwells-mossad-spy-story-jeffrey-epstein-scandal/261172/ 4 | test_contains: One of those businessmen 5 | -------------------------------------------------------------------------------- /omiliya.org.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='squeeze']/h1 2 | strip: //div[@id='squeeze']/h1 3 | author: //div[@class='submitted']/a 4 | strip: //div[@class='submitted']/a 5 | convert_double_br_tags: yes 6 | 7 | 8 | 9 | test_url: http://omiliya.org/content/predchuvstvie.html -------------------------------------------------------------------------------- /popehat.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='entry-title'] 2 | author: //div[@class='entry-author'] 3 | date: //div[@class='entry-time'] 4 | body: //div[@class='entry-content'] 5 | 6 | test_url: https://popehat.com/2015/12/16/eric-posner-the-first-amendments-nemesis/ 7 | -------------------------------------------------------------------------------- /searchenginejournal.com.txt: -------------------------------------------------------------------------------- 1 | strip: //ul[contains(@id, "social")] 2 | strip: //div[contains(@class, "ts-fab-wrapper")] 3 | strip: //div[contains(@id, 'gpt-ad')] 4 | 5 | test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/ 6 | -------------------------------------------------------------------------------- /terrestres.org.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' td-post-content ')] 2 | 3 | strip_id_or_class: td-tags 4 | 5 | test_url: https://www.terrestres.org/2020/06/04/lutter-avec-le-droit-contre-les-derives-de-letat-durgence-sanitaire/ 6 | -------------------------------------------------------------------------------- /thecounter.org.txt: -------------------------------------------------------------------------------- 1 | body: //section[@id="main-article-content"] 2 | 3 | strip_id_or_class: article-pullquote 4 | strip_id_or_class: hide-on-amp 5 | 6 | prune: no 7 | 8 | test_url: https://thecounter.org/familiar-refrain-united-farm-workers-grapples-how-grow-ufw/ 9 | -------------------------------------------------------------------------------- /touilleur-express.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '), ' post_ctn ')] 2 | 3 | strip_id_or_class: yarpp-related 4 | 5 | test_url: https://www.touilleur-express.fr/2022/03/08/le-chiffrement-de-bout-en-bout-et-la-signature-denveloppe/ -------------------------------------------------------------------------------- /valdaiclub.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'dk-entry__body')] 2 | author: //meta[@property="author"]/@content 3 | title: //meta[@property="og:title"]/@content 4 | 5 | prune: no 6 | 7 | test_url: https://valdaiclub.com/a/highlights/do-empires-have-allies/ 8 | -------------------------------------------------------------------------------- /vc.ru.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' l-entry ')] 2 | 3 | strip_id_or_class: l-hidden 4 | strip_id_or_class: l-entry__banner 5 | 6 | test_url: https://vc.ru/finance/128687-60-poleznyh-saytov-dlya-poiska-investiciy-v-startap 7 | -------------------------------------------------------------------------------- /.craigslist.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class='postingtitle'] 2 | date: //p[@class='postinginfo']/date 3 | 4 | body: //figure[@class='iw'] | //section[@class='cltags' or @id='postingbody'] 5 | prune: no 6 | tidy: no 7 | 8 | test_url: http://sfbay.craigslist.org/hhh/index.rss -------------------------------------------------------------------------------- /accesstoinsight.org.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id='H_docTitle'] 2 | 3 | body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] 4 | 5 | strip_id_or_class: F_toenail 6 | 7 | prune: no 8 | 9 | test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html -------------------------------------------------------------------------------- /addendum.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@class='copy__content'] | //div[@class='heroStage__introText'] 3 | 4 | test_url: https://www.addendum.org/pendler/problem-pendlerpauschale 5 | test_url: https://www.addendum.org/repraesentation/zusammensetzung-nationalrat/ 6 | -------------------------------------------------------------------------------- /americandrink.net.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='head']/h2/a 2 | author: //div[@class='head']/a 3 | date: //div[@class='head']/p[@class='date']/a 4 | body: //div[@class='copy'] 5 | strip: //p[@class='meta'] 6 | test_url: http://americandrink.net/post/10567188712/free-the-hooch -------------------------------------------------------------------------------- /blog.dropbox.com.txt: -------------------------------------------------------------------------------- 1 | body: (//article)[1] 2 | 3 | strip_id_or_class: quote-plank 4 | strip_id_or_class: article-next-prev-plank 5 | strip_id_or_class: author-bios__description-mobile 6 | 7 | test_url: https://blog.dropbox.com/topics/work-culture/meet-the-lasermonks 8 | -------------------------------------------------------------------------------- /cnrs.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, "introduction")] | //div[contains(@class, 'field__items')] 2 | date: //div[contains(@class, 'main-column')]//time[contains(@class, "datetime")]/@datetime 3 | 4 | test_url: https://www.cnrs.fr/fr/cnrsinfo/lespace-au-quotidien 5 | -------------------------------------------------------------------------------- /commonwealmagazine.org.txt: -------------------------------------------------------------------------------- 1 | body: //article//div[contains(@class, 'paragraphs-items-full')] 2 | 3 | strip_id_or_class: paragraphs-item-pullquote 4 | 5 | prune: no 6 | 7 | test_url: https://www.commonwealmagazine.org/albert-schweitzer-racism-africa-medicine-Lambarene 8 | -------------------------------------------------------------------------------- /da.feedsportal.com.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //a 2 | tidy: no 3 | prune: no 4 | 5 | test_url: http://da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm 6 | -------------------------------------------------------------------------------- /firstthings.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@class='articleTitle'] 2 | author: //div[@class='articleAuthor'] 3 | body: //div[@class='articleContent'] 4 | prune: no 5 | convert_double_br_tags: yes 6 | 7 | test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand -------------------------------------------------------------------------------- /granta.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-content ')] 2 | 3 | prune: no 4 | 5 | strip_id_or_class: social-share-container 6 | 7 | test_url: https://granta.com/having-and-being-had/ 8 | test_url: https://granta.com/feed 9 | -------------------------------------------------------------------------------- /haberler.com.txt: -------------------------------------------------------------------------------- 1 | title: //div[@id="habermetni"]/h1[@id="haber_baslik"] 2 | body: //div[@id="habermetni"]/p 3 | strip: //img[@class='newsDetailLeft'] 4 | strip_image_src: /haber-resimleri/ 5 | test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ -------------------------------------------------------------------------------- /itwire.com.txt: -------------------------------------------------------------------------------- 1 | author: //a[@rel="author"] 2 | date: //li[@class="itemDateCreated"] 3 | strip: //div[contains(@class, 'legend-rounded')] 4 | 5 | test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands 6 | -------------------------------------------------------------------------------- /journal.markusthoma.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry-content'] 2 | strip_id_or_class: bcac-item 3 | strip_id_or_class: crp_related 4 | strip_id_or_class: optin-after-content 5 | strip: //h2 6 | 7 | test_url: https://journal.markusthoma.com/gegenlicht-fotografie/ 8 | 9 | -------------------------------------------------------------------------------- /lezephyrmag.com.txt: -------------------------------------------------------------------------------- 1 | 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')] 3 | 4 | strip_id_or_class: swp_social_panel 5 | 6 | test_url: https://lezephyrmag.com/libre/paroles/jean-roch-waro-voyager-avec-la-musique-est-un-acte-poetique/ 7 | -------------------------------------------------------------------------------- /libcom.org.txt: -------------------------------------------------------------------------------- 1 | date: //span[contains(@class, 'page-date')] 2 | body: //div[@id='node-page'] 3 | strip_id_or_class: book-navigation 4 | prune: no 5 | 6 | test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1 7 | test_url: http://libcom.org/library-latest/feed -------------------------------------------------------------------------------- /msdn.microsoft.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="mainBody"] 2 | prune: no 3 | test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx 4 | test_url: https://msdn.microsoft.com/library/hh191443(vs.110).aspx 5 | test_contains: An async method typically contains 6 | -------------------------------------------------------------------------------- /nakedsecurity.sophos.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='entry-content'] 2 | title: //div[@class='entry-title'] 3 | strip: //div[@class='entry-sharing'] 4 | 5 | test_url: https://nakedsecurity.sophos.com/2016/04/04/new-ransomware-with-an-old-trick-petya-parties-like-its-1989/ 6 | -------------------------------------------------------------------------------- /nextdraft.com.txt: -------------------------------------------------------------------------------- 1 | body: //section[contains(concat(' ',normalize-space(@class),' '),' page-content-wrap ')] 2 | strip_id_or_class: social-sharing-wrap 3 | strip_id_or_class: current-edition-title-date 4 | 5 | test_url: https://nextdraft.com/archives/n20221208/ 6 | 7 | -------------------------------------------------------------------------------- /politifact.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="content"] 2 | 3 | strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"] 4 | test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ -------------------------------------------------------------------------------- /snob.ru.txt: -------------------------------------------------------------------------------- 1 | body: //article[contains(concat(' ',normalize-space(@class),' '),' entry__body ')] 2 | 3 | strip_id_or_class: entry__tags 4 | strip_id_or_class: entry__likes 5 | 6 | prune: no 7 | 8 | test_url: https://snob.ru/society/masony-v-lozhe-i-vokrug-nee-chast-1/ 9 | -------------------------------------------------------------------------------- /timeshighereducation.co.uk.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@class="storytext"] 3 | strip: //div[@id="thelogin"] 4 | strip: //*[@class="hide"] 5 | strip: //div[@id="anchored"] 6 | test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 -------------------------------------------------------------------------------- /wiki.guildwars.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@id='content'] 3 | strip_id_or_class: editsection 4 | strip_id_or_class: toc 5 | strip: //div[@id='siteNotice'] 6 | strip: //div[@id='content']//table[last()] 7 | prune: no 8 | test_url: http://wiki.guildwars.com/wiki/Monk -------------------------------------------------------------------------------- /36kr.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[contains(@class, 'entry-title')] 2 | date: //meta[@name='weibo: article:create_at']/@content 3 | body: //div[contains(@class, 'mainContent')] 4 | strip_id_or_class: related_topics 5 | 6 | prune: no 7 | 8 | test_url: http://www.36kr.com/p/207879.html -------------------------------------------------------------------------------- /amandala.com.bz.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='content']//div[contains(@class, 'content')] 2 | strip_id_or_class: widget 3 | strip: //a[contains(@href, 'upm_export=')] 4 | 5 | test_url: http://amandala.com.bz/news/feed/ 6 | test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/ -------------------------------------------------------------------------------- /annouchka.fr.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: code-block 2 | strip_id_or_class: hellocoton_vote 3 | 4 | strip: //a[@class='c3'] 5 | strip: //a[@class='c4'] 6 | strip: //a[@class='c5'] 7 | 8 | test_url: http://www.annouchka.fr/5-conseils-pour-profiter-un-peu-plus-de-ses-enfants/ 9 | -------------------------------------------------------------------------------- /econlog.econlib.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="title"] 2 | author: //div[@class="hosted"]/a 3 | date: substring-after(//div[@class="dateline"]/text(), '|') 4 | 5 | strip: //a[@class="top" and @href="#"] 6 | test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html -------------------------------------------------------------------------------- /exoplanets.nasa.gov.txt: -------------------------------------------------------------------------------- 1 | strip_id_or_class: related 2 | strip_id_or_class: related_content_module 3 | strip_id_or_class: multimedia_teaser 4 | 5 | test_url: https://exoplanets.nasa.gov/news/1672/discovery-alert-first-six-star-system-where-all-six-stars-undergo-eclipses/ 6 | -------------------------------------------------------------------------------- /folklore.org.txt: -------------------------------------------------------------------------------- 1 | author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2] 2 | date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2] 3 | body: //div[@class='main'] 4 | test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt -------------------------------------------------------------------------------- /gizmodo.uol.com.br.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | 3 | body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p 4 | 5 | test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/ 6 | test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/ 7 | -------------------------------------------------------------------------------- /healthletter.mayoclinic.com.txt: -------------------------------------------------------------------------------- 1 | body: //main//div[contains(@class, 'article')] 2 | 3 | prune: no 4 | 5 | test_url: https://healthletter.mayoclinic.com/issues/june-2018/heat-illness 6 | test_url: https://healthletter.mayoclinic.com/issues/january-2019/freshening-bad-breath 7 | -------------------------------------------------------------------------------- /linuxjournal.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[@class='content-area'] 2 | next_page_link: //a[@title='Go to next page'] 3 | author: //a[@title='View user profile.'] 4 | strip_id_or_class: comments 5 | 6 | test_url: http://www.linuxjournal.com/content/be-mechanicwith-android-and-linux 7 | -------------------------------------------------------------------------------- /macg.co.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' print:shadow-none ')]//article 2 | strip: //footer 3 | strip_id_or_class: comments 4 | 5 | test_url: https://www.macg.co/ailleurs/2024/10/microsoft-donne-des-yeux-et-une-voix-lia-de-copilot-146279 6 | -------------------------------------------------------------------------------- /marco.org.txt: -------------------------------------------------------------------------------- 1 | tidy: no 2 | prune: no 3 | date: //article//time[@pubdate] 4 | title: //article/header/h2 5 | body: //article 6 | strip: //header 7 | test_url: http://www.marco.org/2012/09/08/businessweek-gruber 8 | test_url: http://www.marco.org/2012/04/24/might-upgrade-someday -------------------------------------------------------------------------------- /nicj.net.txt: -------------------------------------------------------------------------------- 1 | strip: //h1 2 | strip: //div[@class="info"] 3 | strip: //div[@class="osd-sms-wrapper"] 4 | strip: //div[@id="sidebar"] 5 | strip: //div[@id="postnavi"] 6 | strip: //form[@id="commentform"] 7 | 8 | test_url: https://nicj.net/cumulative-layout-shift-in-practice/ 9 | -------------------------------------------------------------------------------- /pastebin.com.txt: -------------------------------------------------------------------------------- 1 | title://div[@class="paste_box_line1"]/h1 2 | author://div[@class="paste_box_line2"]/a 3 | body://div[@class="text"] 4 | date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') 5 | dissolve://li 6 | test_url: http://pastebin.com/LAykd1es -------------------------------------------------------------------------------- /splinternews.com.txt: -------------------------------------------------------------------------------- 1 | title: //head/title 2 | author: //meta[@name="author"]/@content 3 | body: //div[contains(@class, 'post-content')] 4 | strip: //div[contains(@class, 'content-summary')] 5 | 6 | test_url: https://splinternews.com/jonathan-chait-has-no-excuse-1831076209 7 | -------------------------------------------------------------------------------- /thesimpledollar.com.txt: -------------------------------------------------------------------------------- 1 | title: //h3[@class='post-title']/a[@class='post-title-link'] 2 | body: //div[@class='post-content'] 3 | author: //div[@class='post-meta-under-title']/a 4 | test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ -------------------------------------------------------------------------------- /web.dev.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, "w-post-content")] 2 | author: //a[@class="w-author__name-link"] 3 | date: //header//time 4 | 5 | strip: //div[@class="w-post-breadcrumbs"] 6 | strip: //div[@class="w-chips"] 7 | 8 | test_url: https://web.dev/content-visibility/ 9 | -------------------------------------------------------------------------------- /wiki.guildwars2.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //div[@id='content'] 3 | strip_id_or_class: editsection 4 | strip_id_or_class: toc 5 | strip: //div[@id='siteNotice'] 6 | strip: //div[@id='content']//table[last()] 7 | prune: no 8 | test_url: http://wiki.guildwars2.com/wiki/Guardian -------------------------------------------------------------------------------- /wmpoweruser.com.txt: -------------------------------------------------------------------------------- 1 | date://*[@class="entry-date"] 2 | author://*[@class="author vcard"] 3 | strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"] 4 | test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ -------------------------------------------------------------------------------- /.livejournal.com.txt: -------------------------------------------------------------------------------- 1 | title: //title 2 | strip_image_src: 'l-stat.livejournal.com' 3 | strip_image_src: 'www.livejournal.com' 4 | strip_image_src: 'l-userpic.livejournal.com' 5 | test_url: http://news.livejournal.com/136664.html 6 | test_url: http://stelazin.livejournal.com/91363.html -------------------------------------------------------------------------------- /.redbullmusicacademy.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' body ')] 2 | title: //h1[contains(concat(' ',normalize-space(@class),' '),' headline-main ')] 3 | 4 | test_url: https://daily.redbullmusicacademy.com/2014/11/pizzicato-five-feature 5 | 6 | -------------------------------------------------------------------------------- /actualitte.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article ')] 2 | author: //p[@class="auteur"]/a 3 | 4 | test_url: https://www.actualitte.com/article/monde-edition/jean-miguel-pire-n-est-plus-le-conseiller-culture-livre-et-lecture-de-nyssen/84556 5 | -------------------------------------------------------------------------------- /albayan.ae.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='main-column']//div[@class='content'] 2 | 3 | strip_id_or_class: social-buttons 4 | 5 | prune: no 6 | 7 | test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 8 | test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout -------------------------------------------------------------------------------- /annatravelling.wordpress.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="title"] 2 | 3 | author: ("Anna Manasova") 4 | # is ignored, unfortunately 5 | 6 | date: //p[@class="date"] 7 | 8 | body: //div[@class="entry"] 9 | test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ -------------------------------------------------------------------------------- /blog.kaelig.fr.txt: -------------------------------------------------------------------------------- 1 | body: //*[contains(@class, 'post_content')] 2 | author: string('Kaelig Deloumeau-Prigent') 3 | title: //h1[@class='title'] 4 | date: //span[@class='date'] 5 | test_url: http://blog.kaelig.fr/post/24877648508/pr%C3%A9processeurs-css-renoncer-par-choix-ou-par 6 | -------------------------------------------------------------------------------- /bostonreview.net.txt: -------------------------------------------------------------------------------- 1 | body: //div[@itemprop="articleBody"] 2 | 3 | prune: no 4 | 5 | strip_id_or_class: pullquote 6 | 7 | test_url: https://bostonreview.net/articles/vivian-gornick-sex-love-and-letters-review/ 8 | test_contains: Upon its publication, first in French in 1949 9 | -------------------------------------------------------------------------------- /ekultura.hu.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class='style6 nevek'] 2 | 3 | body: //div[@class='bal3'] 4 | 5 | 6 | prune: yes 7 | 8 | tidy: yes 9 | convert_double_br_tags: yes 10 | 11 | test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december -------------------------------------------------------------------------------- /fictionpress.com.txt: -------------------------------------------------------------------------------- 1 | body: id('storytext') 2 | author: //a[starts-with(@href, '/u/')] 3 | #next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") 4 | strip_id_or_class: 'a2a_kit' 5 | test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew -------------------------------------------------------------------------------- /franceculture.fr.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' text-zone ')] 2 | src_lazy_load_attr: data-dejavu-src 3 | 4 | test_url: https://www.franceculture.fr/emissions/entendez-vous-leco/paroles-deconomistes-24-passer-leconomie-le-role-de-luniversite 5 | -------------------------------------------------------------------------------- /gofugyourself.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'featured-embed-holder')] | //div[contains(@class, 'article-content') and @itemprop="articleBody"] 2 | 3 | strip_id_or_class: wp-polls 4 | 5 | test_url: https://www.gofugyourself.com/can-maren-morris-be-a-cinderella-03-2019 6 | -------------------------------------------------------------------------------- /kottke.org.txt: -------------------------------------------------------------------------------- 1 | title: //h2 2 | author: //*[@id='main']/div/a[1] 3 | date: substring-before(substring-after(//div[@class='meta'],'•'),'•') 4 | body: //div[@id='main'] 5 | strip: //div[@class='meta'] 6 | test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters -------------------------------------------------------------------------------- /linkedin.com.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' prose ')] 3 | 4 | strip: //header 5 | 6 | prune: no 7 | tidy: no 8 | 9 | test_url: https://www.linkedin.com/pulse/20140328055547-78273192-how-to-become-seo-expert 10 | -------------------------------------------------------------------------------- /n-tv.de.txt: -------------------------------------------------------------------------------- 1 | date: //span[@class='article__date'] 2 | title: //title 3 | body: //div[@class='article__text'] 4 | 5 | test_url: https://www.n-tv.de/politik/Die-alten-Maenner-werden-es-nie-blicken-article21447318.html 6 | 7 | # Last update: 2019-12-12 8 | # Tested with: Wallabag 2.3.8 -------------------------------------------------------------------------------- /redmas.com.co.txt: -------------------------------------------------------------------------------- 1 | http_header(user-agent): Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0 2 | 3 | test_url: https://redmas.com.co/tecnologia/Clientes-prepago-pospago-y-hogar-de-Claro-tendran-acceso-sin-costo-a-100-GB-en-la-nube-20231004-0032.html 4 | -------------------------------------------------------------------------------- /warriordudimanche.net.txt: -------------------------------------------------------------------------------- 1 | title: //article[contains(concat(' ',normalize-space(@class),' '),' article ')]//header//h1 2 | body: //article[contains(concat(' ',normalize-space(@class),' '),' article ')]//section 3 | 4 | test_url: http://warriordudimanche.net/article458/589065212a599 -------------------------------------------------------------------------------- /alternet.org.txt: -------------------------------------------------------------------------------- 1 | single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')] 2 | 3 | test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest 4 | test_url: http://feeds.feedblitz.com/alternet -------------------------------------------------------------------------------- /blog.mozilla.org.txt: -------------------------------------------------------------------------------- 1 | title: //main[@id="content"]//h1 2 | author: //address 3 | date: //div[@class="entry-info"]//time/@datetime 4 | body: //div[@class="entry-content"] 5 | 6 | test_url: https://blog.mozilla.org/blog/2018/06/07/parlez-vous-deutsch-rhagor-o-leisiau-i-common-voice/ 7 | -------------------------------------------------------------------------------- /coffeecircle.com.txt: -------------------------------------------------------------------------------- 1 | tidy: no 2 | prune: no 3 | body: //div[@class='post--title'] | //div[@class='blog-content'] 4 | strip_id_or_class: tag 5 | strip_id_or_class: photoset-grid 6 | strip_id_or_class: newsletter 7 | 8 | test_url: https://www.coffeecircle.com/de/b/cold-brew-eiskaffee 9 | -------------------------------------------------------------------------------- /csswizardry.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | author: //b[@itemprop="author"] 3 | date: //time[@itemprop="datePublished"]/@datetime 4 | 5 | strip: //nav[@class="c-pagination"] 6 | strip: //a[@href="/services/"] 7 | 8 | test_url: https://csswizardry.com/2019/05/self-host-your-static-assets/ 9 | -------------------------------------------------------------------------------- /enikos.gr.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id='article']//div[contains(@class, 'inside')] 2 | 3 | strip_id_or_class: tags 4 | strip_id_or_class: actions 5 | strip_id_or_class: google-ads 6 | 7 | prune: no 8 | 9 | test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html -------------------------------------------------------------------------------- /framablog.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | 3 | strip: //div[contains(@class, 'pdfprnt-buttons')] 4 | strip: //div[contains(@class, 'social-sharing')] 5 | strip: //div[@id="diaspora-button-container"] 6 | 7 | test_url: https://framablog.org/2018/05/31/peertube-vers-la-version-1-et-au-dela/ 8 | -------------------------------------------------------------------------------- /information.dk.txt: -------------------------------------------------------------------------------- 1 | title: //meta[@property='og:title']/@content 2 | author: //*[@property='dc:creator'] 3 | date: //*[@property='dc:date']/@content 4 | body: //div[@id='page-content']//div[contains(@class, 'article-body')] 5 | 6 | tidy: no 7 | test_url: http://www.information.dk/282307 -------------------------------------------------------------------------------- /itsfoss.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1 2 | body: //article[contains(@class,'type-post')]/div[@itemprop='text'] 3 | strip: //a[@rel='dofollow'] 4 | strip: //div[contains(@class,'zem_rp_wrap')] 5 | strip: //div[@id='shr_canvas2'] 6 | test_url: https://itsfoss.com/fix-gvfsd-smb-high-cpu-ubuntu/ 7 | -------------------------------------------------------------------------------- /kickstarter.com.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@id='name'] 2 | body: //*[@id='leftcol'] 3 | 4 | strip_id_or_class: 'share-box' 5 | strip_id_or_class: 'project-faqs' 6 | strip_id_or_class: 'report-issue-wrap' 7 | test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone -------------------------------------------------------------------------------- /linux.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' article-content ')] 2 | 3 | author: //div[contains(concat(' ',normalize-space(@class),' '),' td-post-author-name ')]/a 4 | 5 | test_url: https://www.linux.com/tutorials/lf-networking-releases-onap-dublin/ 6 | -------------------------------------------------------------------------------- /marmiton.org.txt: -------------------------------------------------------------------------------- 1 | title: //h1[@class="main-title"] 2 | author: //span[@class="recipe-author__name"] 3 | body: //div[@id="sticky-desktop-only"] 4 | strip: //div[@id="bloc-video"] 5 | 6 | test_url: http://www.marmiton.org/recettes/recette_gateau-au-chocolat-fondant-rapide_166352.aspx 7 | -------------------------------------------------------------------------------- /mentalfloss.com.txt: -------------------------------------------------------------------------------- 1 | body: //div[contains(@class, 'article-content')][1] 2 | author: //div[contains(@class, 'author')][1] 3 | 4 | strip_id_or_class: date-text 5 | strip_id_or_class: after-article-subscribe-nl 6 | strip_id_or_class: eyebrow-text 7 | strip_id_or_class: eyebrow-tail 8 | -------------------------------------------------------------------------------- /mirrorfootball.co.uk.txt: -------------------------------------------------------------------------------- 1 | # Remove extra links 2 | strip: //*[@class='appended_html'] 3 | test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html -------------------------------------------------------------------------------- /news.rambler.ru.txt: -------------------------------------------------------------------------------- 1 | body: //article 2 | title: //h1 3 | author: //span[@class='b-article-source-dropdown'] 4 | strip: //span[@class='b-article-photo-incut__source'] 5 | strip: //a[@class='b-read-more b-read-more_bottom'] 6 | 7 | 8 | tidy:no 9 | test_url: http://news.rambler.ru/12972208/ -------------------------------------------------------------------------------- /parislemon.com.txt: -------------------------------------------------------------------------------- 1 | title: //h2[@class="post-title"] 2 | author: substring-after(//div[@class="description"],'Words by ') 3 | date: //li[@class="date"] 4 | strip: //h2[@class="post-title"] 5 | body: //div[@class="copy"] 6 | test_url: http://parislemon.com/post/13462682469/the-15-inch-air -------------------------------------------------------------------------------- /sec.gov.txt: -------------------------------------------------------------------------------- 1 | body: //div[@id="contentDiv"] 2 | prune: no 3 | http_header(user-agent): Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:140.0) Gecko/20100101 Firefox/140.0 4 | test_url: https://www.sec.gov/Archives/edgar/data/1544204/000154420425000003/0001544204-25-000003-index.htm 5 | -------------------------------------------------------------------------------- /shahinkalantari.com.txt: -------------------------------------------------------------------------------- 1 | body: //main 2 | 3 | # force right-to-left text orientation, needed for wallabag 4 | find_string: