):
4 |
5 | prune: no
6 |
7 | test_url: http://www.sqlite.org/fileformat2.html
--------------------------------------------------------------------------------
/squashed.tumblr.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='content']
2 | date: substring-before( //div[@class='unit dateAndNotes'], 'with')
3 | title: //h3
4 | test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending
--------------------------------------------------------------------------------
/stadt-muenster.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='inhalts-spalte']
2 | prune: no
3 | strip: //div[@class='footerlinks']
4 | test_url: https://www.stadt-muenster.de/immobilien/wohnbaupotenzialflaechen/sozialgerechte-bodennutzung.html
5 |
--------------------------------------------------------------------------------
/staltz.com.txt:
--------------------------------------------------------------------------------
1 | author: //h2[@class="name-title"]
2 |
3 | test_url: https://staltz.com/a-plan-to-rescue-the-web-from-the-internet.html
4 |
--------------------------------------------------------------------------------
/stefanjudis.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@class="c-markdown"]
3 | date: (//dl[@class="c-pageDetails"]//time[@class="c-prettyDate"]/@datetime)[1]
4 |
5 | test_url: https://www.stefanjudis.com/today-i-learned/css-content-accepts-alternative-text/
6 |
--------------------------------------------------------------------------------
/stjv.fr.txt:
--------------------------------------------------------------------------------
1 | date: //time[contains(@class, 'published')]/@datetime
2 |
3 | test_url: https://www.stjv.fr/2018/01/au-sujet-quantic-dream/
4 |
--------------------------------------------------------------------------------
/stopgame.ru.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(concat(' ',normalize-space(@class),' '),' article ')]
2 |
3 | test_url: https://stopgame.ru/show/113377/phoenotopia_awakening_review
4 |
--------------------------------------------------------------------------------
/stumbleupon.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link: //iframe[@id='tb-stumble-frame']/@src
2 |
3 | test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
--------------------------------------------------------------------------------
/summitroute.com.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://summitroute.com/blog/2015/12/24/instagram_bounty_case_study_for_defense/
4 |
--------------------------------------------------------------------------------
/swcarpentry.github.io.txt:
--------------------------------------------------------------------------------
1 | body: //article
2 |
3 | prune: no
4 |
5 | test_url: https://swcarpentry.github.io/shell-novice/02-filedir/index.html
6 |
--------------------------------------------------------------------------------
/tbray.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='centercontent']
2 | strip: //div[@id='rightcontent']
3 | date: substring-before( //div[@id='cats'], '·')
4 | title: //h1
5 | test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money
--------------------------------------------------------------------------------
/techcommunity.microsoft.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' lia-message-body-content ')]
2 |
3 | test_url: https://techcommunity.microsoft.com/t5/Azure-Active-Directory-Identity/Your-Pa-word-doesn-t-matter/ba-p/731984
4 |
--------------------------------------------------------------------------------
/techmeme.com.txt:
--------------------------------------------------------------------------------
1 | single_page_link_in_feed: //b/a
2 |
3 | test_url: http://www.techmeme.com/feed.xml
4 |
--------------------------------------------------------------------------------
/techno-science.net.txt:
--------------------------------------------------------------------------------
1 | title://div[@class="news"]/div[@class="titre"]
2 | body://div[@class="news"]/div[@class="texte"]
3 | test_url: http://www.techno-science.net/?onglet=news&news=14808
4 |
--------------------------------------------------------------------------------
/techpinions.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="post"]
2 |
3 | strip: //div[@class="post-meta"]
4 | strip: //div[@id="socialicons"]
5 | strip: //div[@id="authorbox"]
6 |
7 | test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572
--------------------------------------------------------------------------------
/terrestres.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' td-post-content ')]
2 |
3 | strip_id_or_class: td-tags
4 |
5 | test_url: https://www.terrestres.org/2020/06/04/lutter-avec-le-droit-contre-les-derives-de-letat-durgence-sanitaire/
6 |
--------------------------------------------------------------------------------
/the-magazine.org.txt:
--------------------------------------------------------------------------------
1 | tidy: no
2 |
3 | test_url: http://the-magazine.org/1/alone-together-again
--------------------------------------------------------------------------------
/the-tls.co.uk.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'tls-article-body')]
2 |
3 | test_url: https://www.the-tls.co.uk/articles/katrina-history-1915-2015-andy-horowitz-review-peter-coates/
4 |
--------------------------------------------------------------------------------
/thebaffler.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='pico']
2 |
3 | strip_id_or_class: wp-block-pullquote
4 |
5 | test_url: https://thebaffler.com/latest/hong-kong-literatures-growing-pains-chu
6 |
--------------------------------------------------------------------------------
/thecounter.org.txt:
--------------------------------------------------------------------------------
1 | body: //section[@id="main-article-content"]
2 |
3 | strip_id_or_class: article-pullquote
4 | strip_id_or_class: hide-on-amp
5 |
6 | prune: no
7 |
8 | test_url: https://thecounter.org/familiar-refrain-united-farm-workers-grapples-how-grow-ufw/
9 |
--------------------------------------------------------------------------------
/thefilmexperience.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='body']
2 | test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html
--------------------------------------------------------------------------------
/theintercept.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="Headline"]
2 | body: //div[@class="PostContent"]
3 |
4 | test_url: https://theintercept.com/2014/10/30/inside-story-matt-taibbis-departure-first-look-media/
5 |
--------------------------------------------------------------------------------
/thenetworkgarden.blogs.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="beta-inner"]
2 | title: //h3[@class="entry-header"]
3 |
4 | test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html
--------------------------------------------------------------------------------
/thenews.coop.txt:
--------------------------------------------------------------------------------
1 | body: //div[@itemprop='articleBody']
2 |
3 | test_url: http://www.thenews.coop/98221/news/co-operatives/jeremy-corbyn-needs-co-op-movement/
4 |
--------------------------------------------------------------------------------
/theoaklandpress.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='fullstory']
2 | strip: //div[@id='page_leftbar']
3 | test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt
--------------------------------------------------------------------------------
/theodinproject.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' lesson-content ')]
2 |
3 | prune: no
4 |
5 | test_url: https://www.theodinproject.com/lessons/foundations-command-line-basics
6 |
--------------------------------------------------------------------------------
/thepointmag.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='article']
2 | strip_id_or_class: z-max
3 | strip_id_or_class: readLaterMenu
4 |
5 | test_url: https://thepointmag.com/2016/examined-life/can-liberal-education-save-the-sciences
6 |
--------------------------------------------------------------------------------
/therumpus.net.txt:
--------------------------------------------------------------------------------
1 | title: /html/body/div/div[2]/div/div/h1
2 |
3 | body: /html/body/div/div[2]/div/div/div[2]
4 | test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes
--------------------------------------------------------------------------------
/theses.enc.sorbonne.fr.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="head"]
2 | author: //div[@class="name"]
3 | body: //article[@id="text"]
4 |
5 | test_url: http://theses.enc.sorbonne.fr/2014/sidre
6 |
--------------------------------------------------------------------------------
/thesimpledollar.com.txt:
--------------------------------------------------------------------------------
1 | title: //h3[@class='post-title']/a[@class='post-title-link']
2 | body: //div[@class='post-content']
3 | author: //div[@class='post-meta-under-title']/a
4 | test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/
--------------------------------------------------------------------------------
/thesocialitefamily.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' strates-single ')]
2 |
3 | test_url: https://www.thesocialitefamily.com/familles/mulhouse/chez-anne-hubert-la-cerise-sur-le-gateau/
4 |
--------------------------------------------------------------------------------
/theteaspot.com.txt:
--------------------------------------------------------------------------------
1 | body: //main[@id='MainContent']
2 |
3 | prune: no
4 |
5 | test_url: https://www.theteaspot.com/pages/about-tea
6 |
--------------------------------------------------------------------------------
/thethaovanhoa.vn.txt:
--------------------------------------------------------------------------------
1 | strip:/html/body/form/div[2]/div[3]/div/div/div/div/div/div/div/div/div/div[2]/div[3]/div[2]/div/p[2]
2 | test_url: http://thethaovanhoa.vn/151N20110519085606745T129/levante-quyet-giu-caicedo.htm
--------------------------------------------------------------------------------
/thewirecutter.com.txt:
--------------------------------------------------------------------------------
1 | body: //main/div/article
2 |
3 | strip: //header
4 | strip: //aside
5 |
6 | author: //p[@data-scp="author_name"]
7 | date: //time/@datetime
8 |
9 | test_url: https://thewirecutter.com/money/credit-cards/
10 |
--------------------------------------------------------------------------------
/thingiverse.com.txt:
--------------------------------------------------------------------------------
1 | tidy: yes
2 | autodetect_on_failure: yes
3 | prune: yes
4 | body: /html/head/title
5 |
6 | test_url: https://www.thingiverse.com/thing:3868321
7 | test_contains: Panther Origami
--------------------------------------------------------------------------------
/thinkspot.com.txt:
--------------------------------------------------------------------------------
1 | # This only appears after JS is processed
2 | body: //div[contains(@class, 'desc_div')]
3 |
4 | insert_detected_image: no
5 |
6 | prune: no
7 |
8 | test_url: https://www.thinkspot.com/discourse/OJumEP/post/spencer-t-folmar/telling-stories-that-liberate/9YtZ6zl
9 |
--------------------------------------------------------------------------------
/thisamericanlife.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='content']
2 |
3 | test_url: https://www.thisamericanlife.org/282/transcript
4 | test_contains: I was part of sending an innocent man to jail
5 |
--------------------------------------------------------------------------------
/tidbits.com.txt:
--------------------------------------------------------------------------------
1 | author: //span[@class='fn']
2 | date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
3 | test_url: http://tidbits.com/article/12651
--------------------------------------------------------------------------------
/tijd.be.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@class="ac_paragraph-hidden"]
2 |
3 | test_url: https://www.tijd.be/politiek-economie/internationaal/algemeen/ik-ben-vooral-bezorgd-dat-corona-uitbreekt-in-afrika/10204834.html
4 | test_contains: U klinkt gealarmeerder
5 |
--------------------------------------------------------------------------------
/timeshighereducation.co.uk.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@class="storytext"]
3 | strip: //div[@id="thelogin"]
4 | strip: //*[@class="hide"]
5 | strip: //div[@id="anchored"]
6 | test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1
--------------------------------------------------------------------------------
/timeshighereducation.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="col-md-12 radix-layouts-contentheader panel-panel"]
2 | strip: //div[@class="htmlContent subscribe_box"]
3 | test_url: https://www.timeshighereducation.com/blog/jeremy-corbyn-serious-about-free-higher-education
4 |
--------------------------------------------------------------------------------
/tldp.org.txt:
--------------------------------------------------------------------------------
1 | title: //title
2 | body: //h2 | //p | //ul
3 | prune: no
4 | tidy: no
5 |
6 | test_url: http://www.tldp.org/HOWTO/Plug-and-Play-HOWTO-7.html
--------------------------------------------------------------------------------
/tofugu.com.txt:
--------------------------------------------------------------------------------
1 | body://div[@class='entry-content']
2 |
3 | test_url: http://www.tofugu.com/2015/07/20/interview-with-toriena-japanese-chiptune/
4 |
--------------------------------------------------------------------------------
/toolinux.com.txt:
--------------------------------------------------------------------------------
1 | title: //h2[contains(@class,'news')]
2 | body: //div[contains(@class,'articleContent')]
3 | date: substring-after(//div[@class = 'SupaDate']/text(), 'le')
4 |
5 | test_url: http://www.toolinux.com/Wi-Fi-Linksys-WRT-la-legende-de
6 |
--------------------------------------------------------------------------------
/toolsandtoys.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class='post']
2 |
3 | strip: //div[@class='social']
4 | strip: //span[@class='next']
5 | strip: //span[@class='previous']
6 | test_url: http://toolsandtoys.net/noble-tonic-02/
--------------------------------------------------------------------------------
/touilleur-express.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '), ' post_ctn ')]
2 |
3 | strip_id_or_class: yarpp-related
4 |
5 | test_url: https://www.touilleur-express.fr/2022/03/08/le-chiffrement-de-bout-en-bout-et-la-signature-denveloppe/
--------------------------------------------------------------------------------
/trailer.web-view.net.txt:
--------------------------------------------------------------------------------
1 | title: concat(substring-before(//title,':'),': ',//div[@class='Date2'])
2 | test_url: http://trailer.web-view.net/Show/0XC4EFE5D648B716BA2E134BC7CE61B9CC001E04F11E9434438186735DBD637488.htm
--------------------------------------------------------------------------------
/triplebyte.com.txt:
--------------------------------------------------------------------------------
1 | body: //article[contains(concat(' ',normalize-space(@class), ' '), ' blog-post ')]
2 |
3 | test_url: https://triplebyte.com/blog/marissa-mayer-interview
4 |
--------------------------------------------------------------------------------
/tthfanfic.org.txt:
--------------------------------------------------------------------------------
1 | title: //h2
2 | author: //a[starts-with(@href, '/AuthorStories')]
3 | body: //div[@id='storyinnerbody']
4 | test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm
--------------------------------------------------------------------------------
/tuaw.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class='posttitle']
2 | author: //span[@class='author']/a
3 | date: //span[@class='timestamp']
4 | body: //div[@class='body']
5 |
6 | test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/
--------------------------------------------------------------------------------
/turnoff.us.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class='post-title']
2 | body: //article[@class='post-content']
3 | test_url: https://turnoff.us/geek/the-depressed-developer-13/
4 |
--------------------------------------------------------------------------------
/tvtropes.org.txt:
--------------------------------------------------------------------------------
1 | #The examples are useful - keep
2 | #strip: //div[contains(concat(' ',normalize-space(@class),' '),' folder ')]
3 | #strip: //div[contains(concat(' ',normalize-space(@class),' '),' folderlabel ')]
4 |
5 | test_url: https://tvtropes.org/pmwiki/pmwiki.php/Main/Masquerade
6 |
--------------------------------------------------------------------------------
/urbandictionary.com.txt:
--------------------------------------------------------------------------------
1 | title: //title
2 | body: //table[@id='entries']
3 | test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
4 |
--------------------------------------------------------------------------------
/useit.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 |
3 | date: substring-after(//p[@class='overline']/strong, ',')
4 | body: //div[@class="maintext"]
5 | strip: //p[@class='overline']
6 | strip: //h1
7 | tidy: no
8 | test_url: http://www.useit.com/alertbox/mobile-startup-screen.html
--------------------------------------------------------------------------------
/utdailybeacon.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='blox-story-text']
2 | test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html
3 |
--------------------------------------------------------------------------------
/utiliser-lightroom.com.txt:
--------------------------------------------------------------------------------
1 | title: //div[@class="titrearticle"]
2 | body: //div[@class="article"]
3 |
4 | test_url: http://www.utiliser-lightroom.com/blog/2019/04/23/skylum-flex-et-airmagic-pour-les-utilisateurs-de-lightroom/
5 |
--------------------------------------------------------------------------------
/ux.artu.tv.txt:
--------------------------------------------------------------------------------
1 | author: ("Arturo Toledo")
2 | title: //div[@class="post"]/h2
3 | body: //div[@class="entry"]
4 |
5 | # Remove Twitter button
6 | strip: //div[@class="entry"]/p[2]/a/img
7 | test_url: http://ux.artu.tv/?p=192
--------------------------------------------------------------------------------
/vakarm.net.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="block_news_main_content_preview"]
2 |
3 | test_url: https://www.vakarm.net/news/read/Choual-History-X-tout-un-cinema/10270/2
4 |
--------------------------------------------------------------------------------
/valdaiclub.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'dk-entry__body)]
2 | author: //meta[@property="author"]/@content
3 | title: //meta[@property="og:title"]/@content
4 |
5 | prune: no
6 |
7 | test_url: https://valdaiclub.com/a/highlights/do-empires-have-allies/
8 |
--------------------------------------------------------------------------------
/varsity.co.uk.txt:
--------------------------------------------------------------------------------
1 | # FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
2 |
3 | strip: //h2
4 | test_url: http://www.varsity.co.uk/reviews/2662
--------------------------------------------------------------------------------
/vc.ru.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' l-entry ')]
2 |
3 | strip_id_or_class: l-hidden
4 | strip_id_or_class: l-entry__banner
5 |
6 | test_url: https://vc.ru/finance/128687-60-poleznyh-saytov-dlya-poiska-investiciy-v-startap
7 |
--------------------------------------------------------------------------------
/vedonlyonti.com.txt:
--------------------------------------------------------------------------------
1 | strip: //style
2 |
3 | test_url: https://vedonlyonti.com/veikkausvihjeet/pitkavetovihjeet/nba-boston-celtics-cleveland-cavaliers-8-5-2024
4 |
--------------------------------------------------------------------------------
/venturebeat.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="entry-title"]
2 | author: //div[@class="author-name"]
3 | date: //span[@class="the-time"]
4 | body: //div[@class="entry-content"]
5 | strip: //div[@class="vb-gallery"]
6 | test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1
--------------------------------------------------------------------------------
/vg.no.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='artikkelspalte']
2 | strip_id_or_class: 'breadcrumb'
3 | test_url: http://www.vg.no/spill/artikkel.php?artid=10003628
--------------------------------------------------------------------------------
/viget.com.txt:
--------------------------------------------------------------------------------
1 | body: (//article)[1]
2 |
3 | strip_id_or_class: sharer
4 |
5 | prune: no
6 |
7 | test_url: https://www.viget.com/articles/understanding-futures-in-rust-part-1/
8 |
--------------------------------------------------------------------------------
/visualcapitalist.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id='mvp-content-main'] | //article//header
3 |
4 | strip_id_or_class: mvp-post-cat
5 |
6 | test_url: https://www.visualcapitalist.com/from-greek-to-latin-visualizing-the-evolution-of-the-alphabet/
7 |
--------------------------------------------------------------------------------
/vitispr.com.txt:
--------------------------------------------------------------------------------
1 | strip: //*[(@id = "ja-search")]
2 | body: //*[(@id = "ja-mainbody")]
3 | body: //*[(@id = "content-mass-bottom")]
4 | strip://h3[contains(span,'Related Posts')]
5 | strip://img
6 | test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot
--------------------------------------------------------------------------------
/vivirmexico.com.txt:
--------------------------------------------------------------------------------
1 | body: //*[(@class = "historia")]
2 | test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social
--------------------------------------------------------------------------------
/vk.com.txt:
--------------------------------------------------------------------------------
1 | # use this to avoid meta redirect to badbrowser.php
2 | # user agent will cause a redirect to m.vk.com (mobile version) instead
3 | http_header(user-agent): PHP/7.2
4 |
5 | test_url: https://vk.com/noamchomskyvk?w=wall433994637_327
6 |
--------------------------------------------------------------------------------
/voltairenet.org.txt:
--------------------------------------------------------------------------------
1 | title: //h1[contains(concat(' ',normalize-space(@class),' '),' titre_serif_1 ')]
2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' texte_sans ')]
3 |
4 | test_url: http://www.voltairenet.org/article195149.html
--------------------------------------------------------------------------------
/vot-tak.tv.txt:
--------------------------------------------------------------------------------
1 | # prevent duplicated images
2 | strip: //img[@fallback]
3 |
4 | test_url: https://vot-tak.tv/novosti/16-08-2021-nevzlin-intervyu/
5 |
--------------------------------------------------------------------------------
/vozpopuli.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@class='mce-body']
3 |
4 | strip: //aside
5 |
6 | test_url: https://www.vozpopuli.com/opinion/pandemia-racismo-iglesias_0_1365163963.html
7 | test_contains: Ande yo caliente y muérase la gente
8 |
--------------------------------------------------------------------------------
/w3.org.txt:
--------------------------------------------------------------------------------
1 | date: //time[@class="entry-date"]/@datetime
2 |
3 | test_url: https://www.w3.org/blog/2019/05/w3c-and-whatwg-to-work-together-to-advance-the-open-web-platform/
4 |
--------------------------------------------------------------------------------
/wallabag.org.txt:
--------------------------------------------------------------------------------
1 | date: //meta[@name="date"]/@content
2 |
3 | test_url: https://wallabag.org/news/20241103-new-release-wallabag-2610/
--------------------------------------------------------------------------------
/warnerbros.fr.txt:
--------------------------------------------------------------------------------
1 | body: //div[@class="article-body"]
2 | test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait
3 |
--------------------------------------------------------------------------------
/warriordudimanche.net.txt:
--------------------------------------------------------------------------------
1 | title: //article[contains(concat(' ',normalize-space(@class),' '),' article ')]//header//h1
2 | body: //article[contains(concat(' ',normalize-space(@class),' '),' article ')]//section
3 |
4 | test_url: http://warriordudimanche.net/article458/589065212a599
--------------------------------------------------------------------------------
/watoday.com.au.txt:
--------------------------------------------------------------------------------
1 | # strip the breadcrumb
2 | strip: //div//ul
3 | strip: //aside
4 | strip_id_or_class: adWrapper
5 | strip_id_or_class: noPrint
6 |
7 | test_url: https://www.watoday.com.au/business/the-economy/no-one-died-behind-the-typo-on-the-new-50-note-20190618-p51yqg.html
8 |
--------------------------------------------------------------------------------
/web-libre.org.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='template_article']
2 |
3 | strip_id_or_class: article_more
4 | strip: //hr
5 |
6 | test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html
--------------------------------------------------------------------------------
/web.dev.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, "w-post-content")]
2 | author: //a[@class="w-author__name-link"]
3 | date: //header//time
4 |
5 | strip: //div[@class="w-post-breadcrumbs"]
6 | strip: //div[@class="w-chips"]
7 |
8 | test_url: https://web.dev/content-visibility/
9 |
--------------------------------------------------------------------------------
/wenow.com.txt:
--------------------------------------------------------------------------------
1 | title: //article//h1
2 |
3 | strip: //a[@class="post-previous"]
4 | strip: //aside
5 |
6 | test_url: https://www.wenow.com/2021/06/07/empreinte-carbone-de-la-viande/
7 |
--------------------------------------------------------------------------------
/what-if.xkcd.com.txt:
--------------------------------------------------------------------------------
1 | autodetect_next_page: no
2 | test_url: http://what-if.xkcd.com/1/
--------------------------------------------------------------------------------
/whatever.scalzi.com.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@class="navigation"]
2 | strip: //div[@id="sidebar"]
3 | strip: //div[@id="post-extra-content"]
4 | strip: //div[@id="footer"]
5 | strip: //div[contains(@class, "sharing")]
6 |
7 | test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/
--------------------------------------------------------------------------------
/wiki.guildwars.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id='content']
3 | strip_id_or_class: editsection
4 | strip_id_or_class: toc
5 | strip: //div[@id='siteNotice']
6 | strip: //div[@id='content']//table[last()]
7 | prune: no
8 | test_url: http://wiki.guildwars.com/wiki/Monk
--------------------------------------------------------------------------------
/wiki.guildwars2.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id='content']
3 | strip_id_or_class: editsection
4 | strip_id_or_class: toc
5 | strip: //div[@id='siteNotice']
6 | strip: //div[@id='content']//table[last()]
7 | prune: no
8 | test_url: http://wiki.guildwars2.com/wiki/Guardian
--------------------------------------------------------------------------------
/wikiwand.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id="fullContent"]
2 |
3 | strip: //div[@data-aaad]
4 |
5 | prune: no
6 |
7 | test_url: https://www.wikiwand.com/en/Maslow%27s_hierarchy_of_needs
8 | test_contains: Personal security
9 |
--------------------------------------------------------------------------------
/will-self.com.txt:
--------------------------------------------------------------------------------
1 | strip: //div[@class="widget-area"]
2 | title: //*[@class="entry-title"]
3 | date: //time[@class="entry-date"]
4 | test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/
--------------------------------------------------------------------------------
/wmpoweruser.com.txt:
--------------------------------------------------------------------------------
1 | date://*[@class="entry-date"]
2 | author://*[@class="author vcard"]
3 | strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"]
4 | test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/
--------------------------------------------------------------------------------
/wochenanzeiger.de.txt:
--------------------------------------------------------------------------------
1 | strip_id_or_class: kleinertext
2 | strip: //h3[text() = 'Weiterlesen']
3 | strip: //div[starts-with(@class, 'box') and contains(@class, '_simple')]
4 |
5 | test_url: https://www.wochenanzeiger.de/article/238700.html
6 |
7 |
--------------------------------------------------------------------------------
/wordpress.org.txt:
--------------------------------------------------------------------------------
1 | author: //div[@class="meta"]/a[1]
2 |
3 | strip: //div[@class="meta"]
4 | strip: //div[@id="likes-other-gravatars"]
5 | strip: //noscript
6 |
7 | test_url: https://wordpress.org/news/2018/07/update-on-gutenberg/
8 |
--------------------------------------------------------------------------------
/worldwidewords.org.txt:
--------------------------------------------------------------------------------
1 | title: //p[@id='content']
2 |
3 | body: //div[@class='contentblock']
4 | test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm
--------------------------------------------------------------------------------
/wpbeginner.com.txt:
--------------------------------------------------------------------------------
1 | # Required to load the feed properly
2 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0
3 |
4 | test_url: https://www.wpbeginner.com/feed/
5 |
--------------------------------------------------------------------------------
/wphive.com.txt:
--------------------------------------------------------------------------------
1 | # Required to load the feed properly
2 | http_header(user-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0
3 |
4 | test_url: https://www.wphive.com/feed/
5 |
--------------------------------------------------------------------------------
/wpmayor.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@id='nrelate_flyout_placeholder']
2 |
3 | strip_id_or_class: share
4 |
5 | prune: no
6 |
7 | test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/
8 | test_url: http://www.wpmayor.com/feed/
--------------------------------------------------------------------------------
/writerunboxed.com.txt:
--------------------------------------------------------------------------------
1 | http_header(User-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0
2 |
3 | test_url: https://writerunboxed.com/2021/09/23/writerslife-the-411-on-writing-retreats/
4 |
--------------------------------------------------------------------------------
/www.seriouseats.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(@class, 'recipe-wrapper')]
2 |
3 | test_url: https://www.seriouseats.com/recipes/2014/02/braised-short-ribs-from-daniel.html
4 |
--------------------------------------------------------------------------------
/www2.cnrs.fr.txt:
--------------------------------------------------------------------------------
1 | # Site configuration for CNRS press releases
2 |
3 | body: //div[@id="contenu"]//h2[@id="chapeau"] | //div[@id="contenu"]/div[@id="textContenu"]
4 |
5 | test_url: http://www2.cnrs.fr/presse/communique/5327.htm
6 |
--------------------------------------------------------------------------------
/wz-newsline.de.txt:
--------------------------------------------------------------------------------
1 | title://h1
2 |
3 | date://p[@class='articleDate']
4 | body://div[@class='articleBody wzStandardArticle']
5 | test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050
--------------------------------------------------------------------------------
/xlsemanal.com.txt:
--------------------------------------------------------------------------------
1 | body: //section[contains(concat(' ',normalize-space(@class),' '),' post-content ')]
2 |
3 | test_url: http://www.xlsemanal.com/firmas/20171126/perez-reverte-el-hombre-que-si-estaba-alli.html
4 |
--------------------------------------------------------------------------------
/yosoy.red.txt:
--------------------------------------------------------------------------------
1 | prune: no
2 |
3 | test_url: https://yosoy.red/2021/01/22/politica/
4 | test_contains: es fundamental entender que los sistemas son creados por humanos
5 |
--------------------------------------------------------------------------------
/yostivanich.com.txt:
--------------------------------------------------------------------------------
1 | title://div[@class='entry-title']
2 | body://div[@class='entry-content']
3 | strip_comments:yes
4 | convert_double_br_tags:yes
5 | test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/
--------------------------------------------------------------------------------
/yourerie.com.txt:
--------------------------------------------------------------------------------
1 | body: //div[@itemprop="articleBody"]
2 | test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA
3 |
--------------------------------------------------------------------------------
/zaknrw.de.txt:
--------------------------------------------------------------------------------
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single-content ')]
2 |
3 | test_url: http://www.zaknrw.de/medium/audience-development-und-diversitaet
4 |
--------------------------------------------------------------------------------
/zataz.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1[@class="blog-title"]
2 |
3 | test_url: https://www.zataz.com/bientot-noel-securisons-le-prochain-pc-de-la-famille/
4 |
--------------------------------------------------------------------------------
/zerokspot.com.txt:
--------------------------------------------------------------------------------
1 | title: //h1
2 | body: //div[@id="primarycontent"]
3 | test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/
--------------------------------------------------------------------------------
/zoomit.ir.txt:
--------------------------------------------------------------------------------
1 | body: //div[@itemprop="image" or @itemprop="description" or @itemprop="articleBody"]
2 |
3 | test_url: http://www.zoomit.ir/2017/2/25/152960/tiny-engine-turns-natural-gas-into-hydrogen/
4 |
--------------------------------------------------------------------------------