├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── wrong_charset.md └── workflows │ ├── cd.yml │ └── ci.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── large_datasets.rs └── large_payload.rs ├── src ├── assets.rs ├── cd.rs ├── consts.rs ├── entity.rs ├── lib.rs ├── md.rs ├── md │ ├── plugins.rs │ └── structs.rs ├── normalizer.rs ├── performance.rs ├── tests │ ├── cd.rs │ ├── data │ │ ├── largesets │ │ │ ├── None │ │ │ │ ├── sample-1.gif │ │ │ │ ├── sample-1.jpg │ │ │ │ ├── sample-1.mp4 │ │ │ │ ├── sample-1.png │ │ │ │ ├── sample-1.webp │ │ │ │ ├── sample-1.xlsx │ │ │ │ ├── sample-2.png │ │ │ │ └── sample-3.png │ │ │ ├── ascii │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── _chromium_iso-8859-1_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.rst │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── book-stats.json │ │ │ │ ├── books.json │ │ │ │ ├── dummy-1.pem │ │ │ │ ├── empty.json │ │ │ │ ├── iris-utf-8.csv │ │ │ │ ├── iris-utf-8.json │ │ │ │ ├── parchments.json │ │ │ │ ├── playlist.m3u │ │ │ │ └── simple.json │ │ │ ├── big5 │ │ │ │ ├── 0804.blogspot.com.xml │ │ │ │ ├── _chromium_Big5_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── blog.worren.net.xml │ │ │ │ ├── carbonxiv.blogspot.com.xml │ │ │ │ ├── catshadow.blogspot.com.xml │ │ │ │ ├── coolloud.org.tw.xml │ │ │ │ ├── digitalwall.com.xml │ │ │ │ ├── ebao.us.xml │ │ │ │ ├── fudesign.blogspot.com.xml │ │ │ │ ├── kafkatseng.blogspot.com.xml │ │ │ │ ├── ke207.blogspot.com.xml │ │ │ │ ├── leavesth.blogspot.com.xml │ │ │ │ ├── letterlego.blogspot.com.xml │ │ │ │ ├── linyijen.blogspot.com.xml │ │ │ │ ├── marilynwu.blogspot.com.xml │ │ │ │ ├── myblog.pchome.com.tw.xml │ │ │ │ ├── oui-design.com.xml │ │ │ │ ├── sanwenji.blogspot.com.xml │ │ │ │ ├── sinica.edu.tw.xml │ │ │ │ ├── sylvia1976.blogspot.com.xml │ │ │ │ ├── tlkkuo.blogspot.com.xml │ │ │ │ ├── unoriginalblog.com.xml │ │ │ │ ├── upsaid.com.xml │ │ │ │ ├── willythecop.blogspot.com.xml │ │ │ │ └── ytc.blogspot.com.xml │ │ │ ├── euc-jp │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── aivy.co.jp.xml │ │ │ │ ├── akaname.main.jp.xml │ │ │ │ ├── arclamp.jp.xml │ │ │ │ ├── aristrist.s57.xrea.com.xml │ │ │ │ ├── artifact-jp.com.xml │ │ │ │ ├── atom.ycf.nanet.co.jp.xml │ │ │ │ ├── azito.under.jp.xml │ │ │ │ ├── azoz.org.xml │ │ │ │ ├── blog.kabu-navi.com.atom.xml │ │ │ │ ├── blog.kabu-navi.com.xml │ │ │ │ ├── bphrs.net.xml │ │ │ │ ├── ch.kitaguni.tv.xml │ │ │ │ ├── club.h14m.org.xml │ │ │ │ ├── contents-factory.com.xml │ │ │ │ ├── furusatonoeki.cutegirl.jp.xml │ │ │ │ ├── manana.moo.jp.xml │ │ │ │ ├── mimizun.com.xml │ │ │ │ ├── misuzilla.org.xml │ │ │ │ ├── overcube.com.atom.xml │ │ │ │ ├── overcube.com.xml │ │ │ │ ├── pinkupa.com.xml │ │ │ │ ├── rdf.ycf.nanet.co.jp.xml │ │ │ │ ├── siesta.co.jp.aozora.xml │ │ │ │ ├── tls.org.xml │ │ │ │ └── yukiboh.moo.jp.xml │ │ │ ├── euc-kr │ │ │ │ ├── _chromium_windows-949_with_no_encoding_specified.html │ │ │ │ ├── _ude_euc1.txt │ │ │ │ ├── _ude_euc2.txt │ │ │ │ ├── acnnewswire.net.xml │ │ │ │ ├── alogblog.com.xml │ │ │ │ ├── arts.egloos.com.xml │ │ │ │ ├── birder.egloos.com.xml │ │ │ │ ├── blog.bd-lab.com.xml │ │ │ │ ├── blog.empas.com.xml │ │ │ │ ├── blog.rss.naver.com.xml │ │ │ │ ├── calmguy.egloos.com.xml │ │ │ │ ├── chisato.info.xml │ │ │ │ ├── console.linuxstudy.pe.kr.xml │ │ │ │ ├── critique.or.kr.xml │ │ │ │ ├── epitaph.egloos.com.xml │ │ │ │ ├── ittrend.egloos.com.xml │ │ │ │ ├── jely.egloos.com.xml │ │ │ │ ├── jely.pe.kr.xml │ │ │ │ ├── jowchung.oolim.net.xml │ │ │ │ ├── kina.egloos.com.xml │ │ │ │ ├── lennon81.egloos.com.xml │ │ │ │ ├── oroll.egloos.com.xml │ │ │ │ ├── poliplus.egloos.com.xml │ │ │ │ ├── scarletkh2.egloos.com.xml │ │ │ │ ├── siwoo.org.xml │ │ │ │ ├── sparcs.kaist.ac.kr.xml │ │ │ │ ├── tori02.egloos.com.xml │ │ │ │ ├── willis.egloos.com.xml │ │ │ │ ├── xenix.egloos.com.xml │ │ │ │ ├── yunho.egloos.com.xml │ │ │ │ └── zangsalang.egloos.com.xml │ │ │ ├── gb18030 │ │ │ │ └── _ude_1.txt │ │ │ ├── gbk │ │ │ │ ├── 14.blog.westca.com.xml │ │ │ │ ├── 2.blog.westca.com.xml │ │ │ │ ├── _chromium_gb18030_with_no_encoding_specified.html.xml │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── acnnewswire.net.xml │ │ │ │ ├── bbs.blogsome.com.xml │ │ │ │ ├── cappuccinos.3322.org.xml │ │ │ │ ├── chen56.blogcn.com.xml │ │ │ │ ├── cindychen.com.xml │ │ │ │ ├── cnblog.org.xml │ │ │ │ ├── coverer.com.xml │ │ │ │ ├── eighthday.blogspot.com.xml │ │ │ │ ├── godthink.blogsome.com.xml │ │ │ │ ├── jjgod.3322.org.xml │ │ │ │ ├── lily.blogsome.com.xml │ │ │ │ ├── luciferwang.blogcn.com.xml │ │ │ │ ├── pda.blogsome.com.xml │ │ │ │ ├── softsea.net.xml │ │ │ │ ├── w3cn.org.xml │ │ │ │ └── xy15400.blogcn.com.xml │ │ │ ├── ibm866 │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── aif.ru.health.xml │ │ │ │ ├── aug32.hole.ru.xml │ │ │ │ ├── aviaport.ru.xml │ │ │ │ ├── blog.mlmaster.com.xml │ │ │ │ ├── forum.template-toolkit.ru.1.xml │ │ │ │ ├── forum.template-toolkit.ru.4.xml │ │ │ │ ├── forum.template-toolkit.ru.6.xml │ │ │ │ ├── forum.template-toolkit.ru.8.xml │ │ │ │ ├── forum.template-toolkit.ru.9.xml │ │ │ │ ├── greek.ru.xml │ │ │ │ ├── intertat.ru.xml │ │ │ │ ├── janulalife.blogspot.com.xml │ │ │ │ ├── kapranoff.ru.xml │ │ │ │ ├── money.rin.ru.xml │ │ │ │ ├── music.peeps.ru.xml │ │ │ │ ├── newsru.com.xml │ │ │ │ └── susu.ac.ru.xml │ │ │ ├── iso-2022-jp │ │ │ │ └── _ude_1.txt │ │ │ ├── iso-8859-1,windows-1252 │ │ │ │ ├── _mozilla_bug421271_text.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_4.txt │ │ │ │ ├── _ude_5.txt │ │ │ │ ├── _ude_6.txt │ │ │ │ ├── anzeige-value-stars.html │ │ │ │ └── github_bug_9.txt │ │ │ ├── iso-8859-2 │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_10.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_4.txt │ │ │ │ ├── _ude_5.txt │ │ │ │ ├── _ude_6.txt │ │ │ │ ├── _ude_7.txt │ │ │ │ ├── _ude_8.txt │ │ │ │ ├── _ude_9.txt │ │ │ │ ├── auto-apro.hu.xml │ │ │ │ ├── cigartower.hu.xml │ │ │ │ ├── escience.hu.xml │ │ │ │ ├── hirtv.hu.xml │ │ │ │ ├── honositomuhely.hu.xml │ │ │ │ ├── saraspatak.hu.xml │ │ │ │ ├── shamalt.uw.hu.mk.xml │ │ │ │ ├── shamalt.uw.hu.mr.xml │ │ │ │ ├── shamalt.uw.hu.mv.xml │ │ │ │ ├── shamalt.uw.hu.xml │ │ │ │ └── ugyanmar.blogspot.com.xml │ │ │ ├── iso-8859-4 │ │ │ │ └── _ude_1.txt │ │ │ ├── iso-8859-5 │ │ │ │ ├── _chromium_ISO-8859-5_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── aero-bg.com.xml │ │ │ │ ├── aif.ru.health.xml │ │ │ │ ├── aug32.hole.ru.xml │ │ │ │ ├── aviaport.ru.xml │ │ │ │ ├── bbc.co.uk.popshow.xml │ │ │ │ ├── blog.mlmaster.com.xml │ │ │ │ ├── bpm.cult.bg.2.xml │ │ │ │ ├── bpm.cult.bg.4.xml │ │ │ │ ├── bpm.cult.bg.9.xml │ │ │ │ ├── bpm.cult.bg.medusa.4.xml │ │ │ │ ├── bpm.cult.bg.xml │ │ │ │ ├── debian.gabrovo.com.news.xml │ │ │ │ ├── debian.gabrovo.com.xml │ │ │ │ ├── doncho.net.comments.xml │ │ │ │ ├── ecloga.cult.bg.xml │ │ │ │ ├── forum.template-toolkit.ru.1.xml │ │ │ │ ├── forum.template-toolkit.ru.4.xml │ │ │ │ ├── forum.template-toolkit.ru.6.xml │ │ │ │ ├── forum.template-toolkit.ru.8.xml │ │ │ │ ├── forum.template-toolkit.ru.9.xml │ │ │ │ ├── greek.ru.xml │ │ │ │ ├── ide.li.xml │ │ │ │ ├── intertat.ru.xml │ │ │ │ ├── janulalife.blogspot.com.xml │ │ │ │ ├── kapranoff.ru.xml │ │ │ │ ├── linux-bg.org.xml │ │ │ │ ├── money.rin.ru.xml │ │ │ │ ├── music.peeps.ru.xml │ │ │ │ ├── newsru.com.xml │ │ │ │ └── susu.ac.ru.xml │ │ │ ├── iso-8859-6 │ │ │ │ ├── _chromium_ISO-8859-6_with_no_encoding_specified.html │ │ │ │ └── _ude_1.txt │ │ │ ├── iso-8859-7 │ │ │ │ ├── _chromium_ISO-8859-7_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_greek.txt │ │ │ │ ├── disabled.gr.xml │ │ │ │ ├── hotstation.gr.xml │ │ │ │ ├── naftemporiki.gr.bus.xml │ │ │ │ ├── naftemporiki.gr.cmm.xml │ │ │ │ ├── naftemporiki.gr.fin.xml │ │ │ │ ├── naftemporiki.gr.mrk.xml │ │ │ │ ├── naftemporiki.gr.mrt.xml │ │ │ │ ├── naftemporiki.gr.spo.xml │ │ │ │ └── naftemporiki.gr.wld.xml │ │ │ ├── koi8-r │ │ │ │ ├── _chromium_KOI8-R_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── aif.ru.health.xml │ │ │ │ ├── aug32.hole.ru.xml │ │ │ │ ├── aviaport.ru.xml │ │ │ │ ├── blog.mlmaster.com.xml │ │ │ │ ├── forum.template-toolkit.ru.1.xml │ │ │ │ ├── forum.template-toolkit.ru.4.xml │ │ │ │ ├── forum.template-toolkit.ru.6.xml │ │ │ │ ├── forum.template-toolkit.ru.8.xml │ │ │ │ ├── forum.template-toolkit.ru.9.xml │ │ │ │ ├── greek.ru.xml │ │ │ │ ├── intertat.ru.xml │ │ │ │ ├── janulalife.blogspot.com.xml │ │ │ │ ├── kapranoff.ru.xml │ │ │ │ ├── koi.kinder.ru.xml │ │ │ │ ├── money.rin.ru.xml │ │ │ │ ├── music.peeps.ru.xml │ │ │ │ ├── newsru.com.xml │ │ │ │ └── susu.ac.ru.xml │ │ │ ├── shift_jis │ │ │ │ ├── 10e.org.xml │ │ │ │ ├── 1affliate.com.xml │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_4.txt │ │ │ │ ├── accessories-brand.com.xml │ │ │ │ ├── amefoot.net.xml │ │ │ │ ├── andore.com.inami.xml │ │ │ │ ├── andore.com.money.xml │ │ │ │ ├── andore.com.xml │ │ │ │ ├── blog.inkase.net.xml │ │ │ │ ├── blog.paseri.ne.jp.xml │ │ │ │ ├── bloglelife.com.xml │ │ │ │ ├── brag.zaka.to.xml │ │ │ │ ├── celeb.lalalu.com.xml │ │ │ │ ├── clickablewords.com.xml │ │ │ │ ├── do.beginnersrack.com.xml │ │ │ │ ├── dogsinn.jp.xml │ │ │ │ ├── grebeweb.net.xml │ │ │ │ ├── milliontimes.jp.xml │ │ │ │ ├── moon-light.ne.jp.xml │ │ │ │ ├── nextbeaut.com.xml │ │ │ │ ├── ooganemochi.com.xml │ │ │ │ ├── perth-on.net.xml │ │ │ │ ├── sakusaka-silk.net.xml │ │ │ │ ├── setsuzei119.jp.xml │ │ │ │ ├── tamuyou.haun.org.xml │ │ │ │ └── yasuhisa.com.xml │ │ │ ├── utf-16be │ │ │ │ └── bom-utf-16-be.srt │ │ │ ├── utf-16le │ │ │ │ └── bom-utf-16-le.srt │ │ │ ├── utf-8 │ │ │ │ ├── _chromium_UTF-8_with_no_encoding_specified.html │ │ │ │ ├── _mozilla_bug306272_text.html │ │ │ │ ├── _mozilla_bug426271_text-utf-8.html │ │ │ │ ├── _ude_1.md │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_4.txt │ │ │ │ ├── _ude_5.txt │ │ │ │ ├── _ude_6.txt │ │ │ │ ├── _ude_greek.txt │ │ │ │ ├── _ude_he1.txt │ │ │ │ ├── _ude_he2.txt │ │ │ │ ├── _ude_he3.txt │ │ │ │ ├── _ude_russian.txt │ │ │ │ ├── anitabee.blogspot.com.xml │ │ │ │ ├── balatonblog.typepad.com.xml │ │ │ │ ├── bom-utf-8.srt │ │ │ │ ├── boobooo.blogspot.com.xml │ │ │ │ ├── howto.diveintomark.org.xml │ │ │ │ ├── linuxbox.hu.xml │ │ │ │ ├── pihgy.hu.xml │ │ │ │ ├── reddit_wsb.csv │ │ │ │ ├── weblabor.hu.2.xml │ │ │ │ └── weblabor.hu.xml │ │ │ ├── windows-1250 │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_10.txt │ │ │ │ ├── _ude_11.txt │ │ │ │ ├── _ude_12.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── _ude_3.txt │ │ │ │ ├── _ude_4.txt │ │ │ │ ├── _ude_5.txt │ │ │ │ ├── _ude_6.txt │ │ │ │ ├── _ude_7.txt │ │ │ │ ├── _ude_8.txt │ │ │ │ ├── _ude_9.txt │ │ │ │ ├── bbc.co.uk.hu.forum.xml │ │ │ │ ├── bbc.co.uk.hu.learningenglish.xml │ │ │ │ ├── bbc.co.uk.hu.pressreview.xml │ │ │ │ ├── bbc.co.uk.hu.xml │ │ │ │ ├── objektivhir.hu.xml │ │ │ │ └── torokorszag.blogspot.com.xml │ │ │ ├── windows-1251 │ │ │ │ ├── _chromium_windows-1251_with_no_encoding_specified.html │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── aif.ru.health.xml │ │ │ │ ├── anthropology.ru.xml │ │ │ │ ├── aug32.hole.ru.xml │ │ │ │ ├── aviaport.ru.xml │ │ │ │ ├── bbc.co.uk.popshow.xml │ │ │ │ ├── blog.mlmaster.com.xml │ │ │ │ ├── bpm.cult.bg.2.xml │ │ │ │ ├── bpm.cult.bg.3.xml │ │ │ │ ├── bpm.cult.bg.4.xml │ │ │ │ ├── bpm.cult.bg.9.xml │ │ │ │ ├── bpm.cult.bg.medusa.4.xml │ │ │ │ ├── bpm.cult.bg.xml │ │ │ │ ├── cp1251.longCamelCase.txt │ │ │ │ ├── debian.gabrovo.com.news.xml │ │ │ │ ├── debian.gabrovo.com.xml │ │ │ │ ├── doncho.net.comments.xml │ │ │ │ ├── doncho.net.xml │ │ │ │ ├── ecloga.cult.bg.xml │ │ │ │ ├── forum.template-toolkit.ru.1.xml │ │ │ │ ├── forum.template-toolkit.ru.4.xml │ │ │ │ ├── forum.template-toolkit.ru.6.xml │ │ │ │ ├── forum.template-toolkit.ru.8.xml │ │ │ │ ├── forum.template-toolkit.ru.9.xml │ │ │ │ ├── greek.ru.xml │ │ │ │ ├── ide.li.xml │ │ │ │ ├── informator.org.xml │ │ │ │ ├── intertat.ru.xml │ │ │ │ ├── janulalife.blogspot.com.xml │ │ │ │ ├── kapranoff.ru.xml │ │ │ │ ├── linux-bg.org.xml │ │ │ │ ├── money.rin.ru.xml │ │ │ │ ├── music.peeps.ru.xml │ │ │ │ ├── newsru.com.xml │ │ │ │ └── rinennor.org.xml │ │ │ ├── windows-1254 │ │ │ │ ├── _chromium_windows-1254_with_no_encoding_specified.html │ │ │ │ └── _ude_1.txt │ │ │ ├── windows-1255,iso-8859-8 │ │ │ │ ├── _chromium_ISO-8859-8_with_no_encoding_specified.html │ │ │ │ ├── _chromium_windows-1255_with_no_encoding_specified.html │ │ │ │ ├── _ude_he1.txt │ │ │ │ ├── _ude_he2.txt │ │ │ │ ├── _ude_he3.txt │ │ │ │ ├── carshops.co.il.xml │ │ │ │ ├── exego.net.2.xml │ │ │ │ ├── hagada.org.il.xml │ │ │ │ ├── halemo.net.edoar.xml │ │ │ │ ├── hevra.org.il.xml │ │ │ │ ├── hydepark.hevre.co.il.7957.xml │ │ │ │ ├── info.org.il.xml │ │ │ │ ├── infomed.co.il.xml │ │ │ │ ├── law.co.il.xml │ │ │ │ ├── maakav.org.xml │ │ │ │ ├── neviim.net.xml │ │ │ │ ├── notes.co.il.50.xml │ │ │ │ ├── notes.co.il.6.xml │ │ │ │ ├── notes.co.il.7.xml │ │ │ │ ├── notes.co.il.8.xml │ │ │ │ ├── pcplus.co.il.xml │ │ │ │ ├── sharks.co.il.xml │ │ │ │ └── whatsup.org.il.xml │ │ │ ├── windows-1256 │ │ │ │ ├── _chromium_windows-1256_with_no_encoding_specified.html │ │ │ │ └── _ude_1.txt │ │ │ ├── windows-1257 │ │ │ │ └── _ude_1.txt │ │ │ ├── windows-1258 │ │ │ │ ├── _ude_1.txt │ │ │ │ └── _ude_2.txt │ │ │ ├── windows-874 │ │ │ │ ├── _mozilla_bug488426_text.html │ │ │ │ ├── opentle.org.xml │ │ │ │ ├── pharmacy.kku.ac.th.analyse1.xml │ │ │ │ ├── pharmacy.kku.ac.th.centerlab.xml │ │ │ │ ├── pharmacy.kku.ac.th.healthinfo-ne.xml │ │ │ │ └── trickspot.boxchart.com.xml │ │ │ └── x-mac-cyrillic │ │ │ │ ├── _ude_1.txt │ │ │ │ ├── _ude_2.txt │ │ │ │ ├── aif.ru.health.xml │ │ │ │ ├── aug32.hole.ru.xml │ │ │ │ ├── aviaport.ru.xml │ │ │ │ ├── blog.mlmaster.com.xml │ │ │ │ ├── forum.template-toolkit.ru.4.xml │ │ │ │ ├── forum.template-toolkit.ru.6.xml │ │ │ │ ├── forum.template-toolkit.ru.8.xml │ │ │ │ ├── forum.template-toolkit.ru.9.xml │ │ │ │ ├── greek.ru.xml │ │ │ │ ├── intertat.ru.xml │ │ │ │ ├── kapranoff.ru.xml │ │ │ │ ├── koi.kinder.ru.xml │ │ │ │ ├── money.rin.ru.xml │ │ │ │ ├── music.peeps.ru.xml │ │ │ │ ├── newsru.com.xml │ │ │ │ └── susu.ac.ru.xml │ │ └── samples │ │ │ ├── NOTICE.md │ │ │ ├── sample-arabic-1.txt │ │ │ ├── sample-arabic.txt │ │ │ ├── sample-bulgarian.txt │ │ │ ├── sample-chinese.txt │ │ │ ├── sample-english.bom.txt │ │ │ ├── sample-french-1.txt │ │ │ ├── sample-french.txt │ │ │ ├── sample-greek-2.txt │ │ │ ├── sample-greek.txt │ │ │ ├── sample-hebrew-2.txt │ │ │ ├── sample-hebrew-3.txt │ │ │ ├── sample-korean.txt │ │ │ ├── sample-polish.txt │ │ │ ├── sample-russian-2.txt │ │ │ ├── sample-russian-3.txt │ │ │ ├── sample-russian.txt │ │ │ ├── sample-spanish.txt │ │ │ └── sample-turkish.txt │ ├── detection_base.rs │ ├── detection_edge_case.rs │ ├── detection_full.rs │ ├── detection_large_payload.rs │ ├── entity.rs │ ├── md.rs │ ├── mod.rs │ └── utils.rs └── utils.rs └── tests └── normalizer.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | src/tests/data/** linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us fix something bad like an exception 4 | title: "[BUG]" 5 | labels: bug, help wanted 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug/exception is. 12 | 13 | **To Reproduce** 14 | Give us the target text file. Host it somewhere with untouched encoding. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Logs** 20 | If applicable, add console outputs to help explain your problem. 21 | 22 | **Desktop (please complete the following information):** 23 | - OS: [e.g. Linux, Windows or Mac] 24 | - Rust version [e.g. 1.7] 25 | - Package version [eg. 1.0.0] 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[Proposal]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/wrong_charset.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Wrong charset / Detection issue 3 | about: Create a report to help us improve the detection mechanism 4 | title: "[DETECTION]" 5 | labels: help wanted, detection 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Notice** 11 | I hereby announce that my raw input is not : 12 | - Too small content (<=32 characters) as I do know that ANY charset detector heavily depends on content 13 | - Encoded in a deprecated/abandoned encoding that is not even supported by encoding Rust library 14 | 15 | **Provide the file** 16 | A accessible way of retrieving the file concerned. Host it somewhere with untouched encoding. 17 | 18 | **Verbose output** 19 | Using the CLI, run `normalizer -v ./my-file.txt` and past the result in here. 20 | 21 | ``` 22 | (venv) >normalizer -v ./data/sample.1.ar.srt 23 | 2021-05-21 08:38:44,050 | DEBUG | ascii does not fit given bytes sequence at ALL. 'ascii' codec can't decode byte 0xca in position 54: ordinal not in range(128) 24 | 2021-05-21 08:38:44,051 | DEBUG | big5 does not fit given bytes sequence at ALL. 'big5' codec can't decode byte 0xc9 in position 60: illegal multibyte sequence 25 | 2021-05-21 08:38:44,051 | DEBUG | big5hkscs does not fit given bytes sequence at ALL. 'big5hkscs' codec can't decode byte 0xc9 in position 60: illegal multibyte sequence 26 | .... 27 | ``` 28 | 29 | **Expected encoding** 30 | A clear and concise description of what you expected as encoding. Any more details about how the current guess is wrong 31 | is very much appreciated. 32 | 33 | **Desktop (please complete the following information):** 34 | - OS: [e.g. Linux, Windows or Mac] 35 | - Python version [e.g. 1.7] 36 | - Package version [eg. 1.0.0] 37 | 38 | **Additional context** 39 | Add any other context about the problem here. 40 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Delivery 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]+' # Trigger when a new tag is pushed following SemVer pattern 7 | 8 | jobs: 9 | pre_flight_check: 10 | name: Preflight Checks 11 | uses: ./.github/workflows/ci.yml 12 | 13 | publish: 14 | needs: [pre_flight_check] 15 | name: Publish to Crates.io 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v2 20 | 21 | - name: Login 22 | run: cargo login ${{ secrets.CRATES_IO_TOKEN }} 23 | 24 | - name: Publish 25 | run: cargo publish 26 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | workflow_call: 9 | 10 | env: 11 | CARGO_TERM_COLOR: always 12 | 13 | jobs: 14 | lint: 15 | name: 🎨 Linters 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Formatting 20 | run: cargo fmt --check 21 | - name: Clippy 22 | run: rustup component add clippy && cargo clippy 23 | tests: 24 | name: ✅ Tests 25 | needs: 26 | - lint 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v3 30 | - name: Run tests 31 | run: cargo test --release --verbose 32 | perfomance: 33 | name: ⚡ Performance & 📈 Coverage 34 | needs: 35 | - lint 36 | - tests 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v3 40 | - name: Run performance & coverage tool 41 | run: cargo run --bin performance --features performance --release 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .idea/ 3 | .idea 4 | .DS_Store 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # celery beat schedule file 99 | celerybeat-schedule 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea/ 132 | char-dataset/ 133 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at ahmed.tahri@cloudnursery.dev. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | If you’re reading this, you’re probably interested in contributing to Charset Normalizer. 4 | Thank you very much! Open source projects live-and-die based on the support they receive from others, 5 | and the fact that you’re even considering contributing to this project is very generous of you. 6 | 7 | ## Questions 8 | 9 | The GitHub issue tracker is for *bug reports* and *feature requests*. 10 | Questions are allowed only when no answer are provided in docs. 11 | 12 | ## Good Bug Reports 13 | 14 | Please be aware of the following things when filing bug reports: 15 | 16 | 1. Avoid raising duplicate issues. *Please* use the GitHub issue search feature 17 | to check whether your bug report or feature request has been mentioned in 18 | the past. Duplicate bug reports and feature requests are a huge maintenance 19 | burden on the limited resources of the project. If it is clear from your 20 | report that you would have struggled to find the original, that's ok, but 21 | if searching for a selection of words in your issue title would have found 22 | the duplicate then the issue will likely be closed extremely abruptly. 23 | 2. When filing bug reports about exceptions or tracebacks, please include the 24 | *complete* traceback. Partial tracebacks, or just the exception text, are 25 | not helpful. Issues that do not contain complete tracebacks may be closed 26 | without warning. 27 | 3. Make sure you provide a suitable amount of information to work with. This 28 | means you should provide: 29 | 30 | - Guidance on **how to reproduce the issue**. Ideally, this should be a 31 | *small* code sample that can be run immediately by the maintainers. 32 | Failing that, let us know what you're doing, how often it happens, what 33 | environment you're using, etc. Be thorough: it prevents us needing to ask 34 | further questions. 35 | - Tell us **what you expected to happen**. When we run your example code, 36 | what are we expecting to happen? What does "success" look like for your 37 | code? 38 | - Tell us **what actually happens**. It's not helpful for you to say "it 39 | doesn't work" or "it fails". Tell us *how* it fails: do you get an 40 | exception? A None answer? How was the actual result 41 | different from your expected result? 42 | - Tell us **what version of Charset Normalizer you're using**, and 43 | **how you installed it**. Different versions of Charset Normalizer behave 44 | differently and have different bugs. 45 | 46 | If you do not provide all of these things, it will take us much longer to 47 | fix your problem. If we ask you to clarify these, and you never respond, we 48 | will close your issue without fixing it. 49 | 50 | 51 | ## What PR are we accepting? 52 | 53 | Mostly anything, from cosmetic to the detection-mechanism improvement at the solo condition that you do not break 54 | the backward-compatibility. 55 | 56 | ## What PR may be doomed? 57 | 58 | - Add support for a Rust encoding unsupported charset/encoding 59 | > If you looked carefully at the project, you would see that it aims to be generic whenever possible. So adding a specific prober is out of the question. 60 | 61 | - Of course, if the CI/CD are failing 62 | > Getting the discussion started often mean doing the minimum effort to get it Green! (Be reassured, maintainers will look into it, given a reasonable amount of time) 63 | 64 | - Submitting a PR without any description OR viable commit description 65 | > This is obvious, maintainers need to understand as fast as possible what are you trying to submit without putting too much effort. 66 | 67 | ## How to run tests locally? 68 | 69 | It is essential that you run, prior to any submissions the mandatory checks. 70 | Run: 71 | * `cargo fmt` to check and auto-fix formatting, 72 | * `cargo clippy` to linter your code, 73 | * `cargo test` to run all tests. 74 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "charset-normalizer-rs" 3 | version = "1.0.6" 4 | authors = ["Nikolay Yarovoy "] 5 | edition = "2021" 6 | description = "Truly universal encoding detector in pure Rust - port of Python version" 7 | license-file = "LICENSE" 8 | documentation = "https://docs.rs/charset-normalizer-rs" 9 | readme = "README.md" 10 | repository = "https://github.com/nickspring/charset-normalizer-rs" 11 | keywords = ["encoding", "charset", "detector", "conversion", "normalizer"] 12 | categories = ["encoding", "internationalization", "localization"] 13 | exclude = [ 14 | "/src/tests/data/**", 15 | "/CONTRIBUTING.md", 16 | "/CODE_OF_CONDUCT.md", 17 | "/.github/**", 18 | ".gitattributes" 19 | ] 20 | 21 | [dependencies] 22 | ahash = "0.8.3" 23 | bitflags = "2.4.0" 24 | cached = "0.46.0" 25 | chardet = { version = "0.2.4", optional = true } 26 | chardetng = { version = "0.1.17", optional = true } 27 | clap = { version = "4.4.2", features = ["derive"] } 28 | counter = "0.5.7" 29 | dialoguer = "0.10.4" 30 | encoding = "0.2.33" 31 | env_logger = "0.10.0" 32 | icu_normalizer = "1.3.2" 33 | icu_properties = "1.3.2" 34 | log = "0.4.20" 35 | once_cell = "1.18.0" 36 | ordered-float = "3.9.1" 37 | regex = "1.9.3" 38 | serde = { version = "1.0.188", features = ["derive"] } 39 | serde_json = "1.0.107" 40 | strsim = "0.10.0" 41 | unicode_names2 = "1.1.0" 42 | 43 | [dev-dependencies] 44 | assert_cmd = "2.0.12" 45 | criterion = "0.3" 46 | predicates = "3.0.3" 47 | 48 | [[bench]] 49 | name = "large_payload" 50 | harness = false 51 | 52 | [[bench]] 53 | name = "large_datasets" 54 | harness = false 55 | 56 | [features] 57 | performance = ["chardet", "chardetng"] 58 | 59 | [[bin]] 60 | name = "performance" 61 | path = "src/performance.rs" 62 | required-features = ["performance"] 63 | 64 | [[bin]] 65 | name = "normalizer" 66 | path = "src/normalizer.rs" 67 | 68 | [profile.release] 69 | opt-level = 3 70 | lto = "fat" 71 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 YAROVOY Nikolay (Rust version). 4 | Copyright (c) 2019 TAHRI Ahmed R. (author of original Python version) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /benches/large_datasets.rs: -------------------------------------------------------------------------------- 1 | use charset_normalizer_rs::from_path; 2 | use charset_normalizer_rs::utils::get_large_test_datasets; 3 | use criterion::BenchmarkId; 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 5 | use std::path::PathBuf; 6 | 7 | fn bench_foo(paths: &[String]) { 8 | for path in paths { 9 | let _ = from_path(&PathBuf::from(path), None); 10 | } 11 | } 12 | 13 | pub fn large_datasets(c: &mut Criterion) { 14 | let paths: Vec = get_large_test_datasets() 15 | .unwrap() 16 | .iter() 17 | .map(|v| v.0.clone()) 18 | .collect(); 19 | 20 | let mut group = c.benchmark_group("sample-size-example"); 21 | group.significance_level(0.1).sample_size(10); 22 | group.bench_with_input(BenchmarkId::new("large_datasets", ""), &paths, |b, s| { 23 | b.iter(|| { 24 | bench_foo(s); 25 | black_box(()) 26 | }); 27 | }); 28 | } 29 | 30 | criterion_group!(benches, large_datasets); 31 | criterion_main!(benches); 32 | -------------------------------------------------------------------------------- /benches/large_payload.rs: -------------------------------------------------------------------------------- 1 | use charset_normalizer_rs::consts::TOO_BIG_SEQUENCE; 2 | use charset_normalizer_rs::from_bytes; 3 | use criterion::BenchmarkId; 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 5 | 6 | pub fn large_payload(c: &mut Criterion) { 7 | let mut payload = b"hello simple ascii " 8 | .repeat(TOO_BIG_SEQUENCE) 9 | .as_slice() 10 | .to_vec(); 11 | payload.extend("我没有埋怨,磋砣的只是一些时间。 磋砣的只是一些时间。".as_bytes()); 12 | c.bench_with_input(BenchmarkId::new("large_payload", ""), &payload, |b, s| { 13 | b.iter(|| black_box(from_bytes(s, None))); 14 | }); 15 | } 16 | 17 | criterion_group!(benches, large_payload); 18 | criterion_main!(benches); 19 | -------------------------------------------------------------------------------- /src/assets.rs: -------------------------------------------------------------------------------- 1 | use crate::entity::Language; 2 | use ahash::HashMap; 3 | 4 | use once_cell::sync::Lazy; 5 | use std::iter::FromIterator; 6 | 7 | pub(crate) static LANGUAGES: Lazy<[(Language, &'static str, bool, bool); 41]> = Lazy::new(|| { 8 | [ 9 | // language, alphabet, have_accents, pure_latin 10 | (Language::English, "eationsrhldcmufpgwbyvkjxzq", false, true, ), 11 | (Language::English, "eationsrhldcumfpgwybvkxjzq", false, true, ), 12 | (Language::German, "enirstadhulgocmbfkwzpvüäöj", true, true, ), 13 | (Language::French, "easnitrluodcpmévgfbhqàxèyj", true, true, ), 14 | (Language::Dutch, "enairtodslghvmukcpbwjzfyxë", true, true, ), 15 | (Language::Italian, "eiaonltrscdupmgvfbzhqèàkyò", true, true, ), 16 | (Language::Polish, "aioenrzwsctkydpmuljłgbhąęó", true, true, ), 17 | (Language::Spanish, "eaonsrildtcumpbgvfyóhqíjzá", true, true, ), 18 | (Language::Russian, "оаеинстрвлкмдпугяызбйьчхжц", false, false, ), 19 | (Language::Japanese, "人一大亅丁丨竹笑口日今二彳行十土丶寸寺時乙丿乂气気冂巾亠市目儿見八小凵県月彐門間木東山出本中刀分耳又取最言田心思刂前京尹事生厶云会未来白冫楽灬馬尸尺駅明耂者了阝都高卜占厂广店子申奄亻俺上方冖学衣艮食自", false, false, ), 20 | (Language::Japanese, "ーンス・ルトリイアラックドシレジタフロカテマィグバムプオコデニウメサビナブャエュチキズダパミェョハセベガモツネボソノァヴワポペピケゴギザホゲォヤヒユヨヘゼヌゥゾヶヂヲヅヵヱヰヮヽ゠ヾヷヿヸヹヺ", false, false, ), 21 | (Language::Japanese, "のにるたとはしいをでてがなれからさっりすあもこまうくよきんめおけそつだやえどわちみせじばへびずろほげむべひょゆぶごゃねふぐぎぼゅづざぞぬぜぱぽぷぴぃぁぇぺゞぢぉぅゐゝゑ゛゜ゎゔ゚ゟ゙ゕゖ", false, false, ), 22 | (Language::Portuguese, "aeosirdntmuclpgvbfhãqéçází", true, true, ), 23 | (Language::Swedish, "eanrtsildomkgvhfupäcböåyjx", true, true, ), 24 | (Language::Chinese, "的一是不了在人有我他这个们中来上大为和国地到以说时要就出会可也你对生能而子那得于着下自之年过发后作里用道行所然家种事成方多经么去法学如都同现当没动面起看定天分还进好小部其些主样理心她本前开但因只从想实", false, false, ), 25 | (Language::Ukrainian, "оаніирвтесклудмпзяьбгйчхцї", false, false, ), 26 | (Language::Norwegian, "erntasioldgkmvfpubhåyjøcæw", false, true, ), 27 | (Language::Finnish, "aintesloukämrvjhpydögcbfwz", true, true, ), 28 | (Language::Vietnamese, "nhticgaoumlràđsevpbyưdákộế", true, true, ), 29 | (Language::Czech, "oeantsilvrkdumpíchzáyjběéř", true, true, ), 30 | (Language::Hungarian, "eatlsnkriozáégmbyvdhupjöfc", true, true, ), 31 | (Language::Korean, "이다에의는로하을가고지서한은기으년대사시를리도인스일", false, false, ), 32 | (Language::Indonesian, "aneirtusdkmlgpbohyjcwfvzxq", false, true, ), 33 | (Language::Turkish, "aeinrlıkdtsmyuobüşvgzhcpçğ", true, true, ), 34 | (Language::Romanian, "eiarntulocsdpmăfvîgbșțzhâj", true, true, ), 35 | (Language::Farsi, "ایردنهومتبسلکشزفگعخقجآپحطص", false, false, ), 36 | (Language::Arabic, "اليمونرتبةعدسفهكقأحجشطصىخإ", false, false, ), 37 | (Language::Danish, "erntaisdlogmkfvubhpåyøæcjw", false, true, ), 38 | (Language::Serbian, "аиоенрсуткјвдмплгзбaieonцш", false, false, ), 39 | (Language::Lithuanian, "iasoretnukmlpvdjgėbyųšžcąį", false, true, ), 40 | (Language::Slovene, "eaionrsltjvkdpmuzbghčcšžfy", false, true, ), 41 | (Language::Slovak, "oaenirvtslkdmpuchjbzáyýíčé", true, true, ), 42 | (Language::Hebrew, "יוהלרבתמאשנעםדקחפסכגטצןזך", false, false, ), 43 | (Language::Bulgarian, "аиоентрсвлкдпмзгяъубчцйжщх", false, false, ), 44 | (Language::Croatian, "aioenrjstuklvdmpgzbcčhšžćf", true, true, ), 45 | (Language::Hindi, "करसनतमहपयलवजदगबशटअएथभडचधषइ", false, false, ), 46 | (Language::Estonian, "aiestlunokrdmvgpjhäbõüfcöy", true, true, ), 47 | (Language::Thai, "านรอกเงมยลวดทสตะปบคหแจพชขใ", false, false, ), 48 | (Language::Greek, "ατοιενρσκηπςυμλίόάγέδήωχθύ", false, false, ), 49 | (Language::Tamil, "கதபடரமலனவறயளசநஇணஅஆழஙஎஉஒஸ", false, false, ), 50 | (Language::Kazakh, "аыентрлідсмқкобиуғжңзшйпгө", false, false, ), 51 | ] 52 | }); 53 | pub(crate) static LANGUAGE_SUPPORTED_COUNT: Lazy = Lazy::new(|| LANGUAGES.len()); // 41 54 | 55 | pub(crate) static ENCODING_TO_LANGUAGE: Lazy> = Lazy::new(|| { 56 | HashMap::from_iter([ 57 | ("euc-kr", Language::Korean), 58 | ("big5", Language::Chinese), 59 | ("hz", Language::Chinese), 60 | ("gbk", Language::Chinese), 61 | ("gb18030", Language::Chinese), 62 | ("euc-jp", Language::Japanese), 63 | ("iso-2022-jp", Language::Japanese), 64 | ("shift_jis", Language::Japanese), 65 | ]) 66 | }); 67 | -------------------------------------------------------------------------------- /src/md.rs: -------------------------------------------------------------------------------- 1 | use cached::proc_macro::cached; 2 | use log::{log_enabled, trace}; 3 | use ordered_float::OrderedFloat; 4 | 5 | pub(crate) mod plugins; 6 | pub(crate) mod structs; 7 | 8 | use plugins::{ 9 | ArchaicUpperLowerPlugin, CjkInvalidStopPlugin, MessDetectorPlugin, SuperWeirdWordPlugin, 10 | SuspiciousDuplicateAccentPlugin, SuspiciousRangePlugin, TooManyAccentuatedPlugin, 11 | TooManySymbolOrPunctuationPlugin, UnprintablePlugin, 12 | }; 13 | use structs::MessDetectorChar; 14 | 15 | // 16 | // Mess detection module 17 | // 18 | 19 | // Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. 20 | #[cached(size = 2048)] 21 | pub(crate) fn mess_ratio( 22 | decoded_sequence: String, 23 | maximum_threshold: Option>, 24 | ) -> f32 { 25 | let maximum_threshold = f32::from(maximum_threshold.unwrap_or(OrderedFloat(0.2))); 26 | let mut detectors: Vec> = vec![ 27 | Box::::default(), 28 | Box::::default(), 29 | Box::::default(), 30 | Box::::default(), 31 | Box::::default(), 32 | Box::::default(), 33 | Box::::default(), 34 | Box::::default(), 35 | ]; 36 | 37 | let mut mean_mess_ratio: Option = None; 38 | let early_calc_period: usize = match decoded_sequence.chars().count() { 39 | ..=510 => 32, 40 | 511..=1023 => 64, 41 | _ => 128, 42 | }; 43 | // Traverse through chars and detectors 44 | for (index, ch) in decoded_sequence 45 | .chars() 46 | .chain(std::iter::once('\n')) 47 | .enumerate() 48 | { 49 | let mess_char = MessDetectorChar::new(ch); 50 | detectors 51 | .iter_mut() 52 | .filter(|detector| detector.eligible(&mess_char)) 53 | .for_each(|detector| detector.feed(&mess_char)); 54 | 55 | if index % early_calc_period == early_calc_period - 1 { 56 | let early_mess_ratio: f32 = detectors.iter().map(|x| x.ratio()).sum(); 57 | if early_mess_ratio >= maximum_threshold { 58 | mean_mess_ratio = Some(early_mess_ratio); 59 | break; 60 | } 61 | } 62 | } 63 | let return_ratio = mean_mess_ratio.unwrap_or(detectors.iter().map(|x| x.ratio()).sum()); 64 | 65 | if log_enabled!(log::Level::Trace) { 66 | trace!( 67 | "Mess-detector extended-analysis start: early_calc_period={}, mean_mess_ratio={}, maximum_threshold={} \ 68 | {}", 69 | early_calc_period, 70 | return_ratio, 71 | maximum_threshold, 72 | detectors 73 | .iter() 74 | .filter(|d| d.ratio() > 0.0) 75 | .map(|d| format!("{} produces ratio: {}", d.name(), d.ratio())) 76 | .collect::>() 77 | .join("===") 78 | ); 79 | } 80 | 81 | return_ratio 82 | } 83 | -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.gif -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.jpg -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.mp4 -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.png -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.webp -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-1.xlsx -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-2.png -------------------------------------------------------------------------------- /src/tests/data/largesets/None/sample-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/None/sample-3.png -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/_chromium_iso-8859-1_with_no_encoding_specified.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | iso-8859-1 5 | 6 | 7 |

The World Wide Web Consortium (W3C)

8 |

Leading the Web to Its Full Potential...

9 | 10 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/_ude_1.rst: -------------------------------------------------------------------------------- 1 | .. raw:: html 2 | 3 |

4 | 5 | urllib3 6 | 7 |

8 |

9 | PyPI Version 10 | Python Versions 11 | Join our Discord 12 | Coverage Status 13 | Build Status on GitHub 14 | Documentation Status 15 |

16 | 17 | urllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the 18 | Python ecosystem already uses urllib3 and you should too. 19 | urllib3 brings many critical features that are missing from the Python 20 | standard libraries: 21 | 22 | - Thread safety. 23 | - Connection pooling. 24 | - Client-side SSL/TLS verification. 25 | - File uploads with multipart encoding. 26 | - Helpers for retrying requests and dealing with HTTP redirects. 27 | - Support for gzip, deflate, and brotli encoding. 28 | - Proxy support for HTTP and SOCKS. 29 | - 100% test coverage. 30 | 31 | urllib3 is powerful and easy to use: 32 | 33 | .. code-block:: pycon 34 | 35 | >>> import urllib3 36 | >>> http = urllib3.PoolManager() 37 | >>> resp = http.request("GET", "http://httpbin.org/robots.txt") 38 | >>> resp.status 39 | 200 40 | >>> resp.data 41 | b"User-agent: *\nDisallow: /deny\n" 42 | 43 | 44 | Installing 45 | ---------- 46 | 47 | urllib3 can be installed with `pip `_: 48 | 49 | .. code-block:: bash 50 | 51 | $ python -m pip install urllib3 52 | 53 | Alternatively, you can grab the latest source code from `GitHub `_: 54 | 55 | .. code-block:: bash 56 | 57 | $ git clone git://github.com/urllib3/urllib3.git 58 | $ python setup.py install 59 | 60 | 61 | Documentation 62 | ------------- 63 | 64 | urllib3 has usage and reference documentation at `urllib3.readthedocs.io `_. 65 | 66 | 67 | Community 68 | --------- 69 | 70 | urllib3 has a `community Discord channel `_ for asking questions and 71 | collaborating with other contributors. Drop by and say hello 👋 72 | 73 | 74 | Contributing 75 | ------------ 76 | 77 | urllib3 happily accepts contributions. Please see our 78 | `contributing documentation `_ 79 | for some tips on getting started. 80 | 81 | 82 | Security Disclosures 83 | -------------------- 84 | 85 | To report a security vulnerability, please use the 86 | `Tidelift security contact `_. 87 | Tidelift will coordinate the fix and disclosure with maintainers. 88 | 89 | 90 | Maintainers 91 | ----------- 92 | 93 | - `@sethmlarson `__ (Seth M. Larson) 94 | - `@pquentin `__ (Quentin Pradet) 95 | - `@theacodes `__ (Thea Flowers) 96 | - `@haikuginger `__ (Jess Shapiro) 97 | - `@lukasa `__ (Cory Benfield) 98 | - `@sigmavirus24 `__ (Ian Stapleton Cordasco) 99 | - `@shazow `__ (Andrey Petrov) 100 | 101 | 👋 102 | 103 | 104 | Sponsorship 105 | ----------- 106 | 107 | If your company benefits from this library, please consider `sponsoring its 108 | development `_. 109 | 110 | 111 | For Enterprise 112 | -------------- 113 | 114 | .. |tideliftlogo| image:: https://nedbatchelder.com/pix/Tidelift_Logos_RGB_Tidelift_Shorthand_On-White_small.png 115 | :width: 75 116 | :alt: Tidelift 117 | 118 | .. list-table:: 119 | :widths: 10 100 120 | 121 | * - |tideliftlogo| 122 | - Professional support for urllib3 is available as part of the `Tidelift 123 | Subscription`_. Tidelift gives software development teams a single source for 124 | purchasing and maintaining their software, with professional grade assurances 125 | from the experts who know it best, while seamlessly integrating with existing 126 | tools. 127 | 128 | .. _Tidelift Subscription: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme 129 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/_ude_1.txt: -------------------------------------------------------------------------------- 1 | Bahasa Indonesia adalah bahasa resmi Republik Indonesia dan bahasa persatuan bangsa Indonesia.[8][9] Bahasa Indonesia adalah salah satu dari banyak varietas bahasa Melayu.[10] Bahasa Indonesia diresmikan penggunaannya setelah Proklamasi Kemerdekaan Indonesia, tepatnya sehari sesudahnya, bersamaan dengan mulai berlakunya konstitusi. Di Timor Leste, bahasa Indonesia berstatus sebagai bahasa kerja. 2 | 3 | Dasar bahasa Indonesia baku adalah bahasa Melayu Riau.[11][12] [13] [14][15][16] Dalam perkembangannya, bahasa ini mengalami perubahan akibat penggunaannya sebagai bahasa kerja di lingkungan administrasi kolonial dan berbagai proses pembakuan sejak awal abad ke-20. Penamaan "bahasa Indonesia" diawali sejak dicanangkannya Sumpah Pemuda pada 28 Oktober 1928, untuk menghindari kesan "imperialisme bahasa" apabila nama bahasa Melayu tetap digunakan.[17] Proses ini menyebabkan berbedanya bahasa Indonesia saat ini dari varian bahasa Melayu yang digunakan di Riau dan kepulauan maupun Semenanjung Malaya. Hingga saat ini, bahasa Indonesia merupakan bahasa yang hidup, yang terus menghasilkan kata-kata baru, baik melalui penciptaan maupun penyerapan dari bahasa daerah dan bahasa asing. 4 | 5 | Meskipun dipahami dan dituturkan oleh lebih dari 90% warga Indonesia, bahasa Indonesia bukanlah bahasa ibu bagi kebanyakan penuturnya. Sebagian besar warga Indonesia menggunakan salah satu dari 748 bahasa yang ada di Indonesia sebagai bahasa ibu.[18] Istilah "bahasa Indonesia" paling umum dikaitkan dengan bahasa baku yang digunakan dalam situasi formal.[15] Ragam bahasa baku tersebut berhubungan diglosik dengan bentuk-bentuk bahasa Melayu vernakular yang digunakan sebagai peranti komunikasi sehari-hari.[15] Artinya, penutur bahasa Indonesia kerap kali menggunakan versi sehari-hari (colloquial) dan/atau mencampuradukkan dengan dialek Melayu lainnya atau bahasa ibunya. Meskipun demikian, bahasa Indonesia digunakan sangat luas di perguruan-perguruan, di media massa, sastra, perangkat lunak, surat-menyurat resmi, dan berbagai forum publik lainnya,[19] sehingga dapatlah dikatakan bahwa bahasa Indonesia digunakan oleh semua warga Indonesia. 6 | 7 | Fonologi dan tata bahasa bahasa Indonesia dianggap relatif mudah.[20] Menurut sebagian peneliti, dasar-dasar yang penting untuk komunikasi dasar dapat dipelajari hanya dalam kurun waktu beberapa minggu.[21] 8 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/book-stats.json: -------------------------------------------------------------------------------- 1 | { 2 | "books_count": 1000, 3 | "topbooks_count": 100 4 | } 5 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/dummy-1.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFijCCBHKgAwIBAgITAP/Q5kMAYCMtjq1aFvOJ1hHy1TANBgkqhkiG9w0BAQsF 3 | ADBDMQswCQYDVQQGEwJVUzESMBAGA1UEChMJZ29vZCBndXlzMSAwHgYDVQQDExdD 4 | QSBpbnRlcm1lZGlhdGUgKFJTQSkgQTAeFw0yMTA4MDIyMjA1MzBaFw0yMTEwMzEy 5 | MjA1MjhaMC0xKzApBgNVBAMTImRyeS1ydW4tYXV0aHotZGVhY3RpdmF0aW9uLmd3 6 | MS53dGYwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDEBREkoTsQmFQI 7 | /vW8Y8NZBCQxw2Sznb8iNL+IeeAIq4HA0H+83rEbJ+Nz/AQB5I4d3xexrdBVMAFU 8 | Q6oDK/8a+6m3L+d6ZQ+gQ4BWXmU3NprcH+IuB5DEM04Itxkc/NhQUMjNgjafUU38 9 | +Pk9183TIz5zoVb8mzo8UrC8Y/SABLYPVbG9rcNpJrg9QU/qufWws/g8edze6tq+ 10 | HNGMoQkT0pNDLxO171qmzGvBEZhbQYgXrlPzhAOpZKWLQG9UWC0kPszjFR2H5dlR 11 | ak1acD2M8GT29g4+o2uhSg4LeYnu12FBwVlnlYEI52U0W4jnjLwUVdj8C/cWv+U8 12 | Kkn5GUadAgMBAAGjggKLMIIChzAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYI 13 | KwYBBQUHAwEGCCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFLxzlJ46 14 | E5XNGJXDjeJ8llfAR3b5MB8GA1UdIwQYMBaAFOTVDEkrqAfZuudoLYOiqn6/b6kp 15 | MGYGCCsGAQUFBwEBBFowWDAiBggrBgEFBQcwAYYWaHR0cDovLzEyNy4wLjAuMTo0 16 | MDAyLzAyBggrBgEFBQcwAoYmaHR0cDovLzEyNy4wLjAuMTo0MDAwL2FjbWUvaXNz 17 | dWVyLWNlcnQwLQYDVR0RBCYwJIIiZHJ5LXJ1bi1hdXRoei1kZWFjdGl2YXRpb24u 18 | Z3cxLnd0ZjAnBgNVHR8EIDAeMBygGqAYhhZodHRwOi8vZXhhbXBsZS5jb20vY3Js 19 | MEAGA1UdIAQ5MDcwCAYGZ4EMAQIBMCsGAyoDBDAkMCIGCCsGAQUFBwIBFhZodHRw 20 | Oi8vZXhhbXBsZS5jb20vY3BzMIIBBAYKKwYBBAHWeQIEAgSB9QSB8gDwAHYAFuhp 21 | wdGV6tfD+Jca4/B2AfeM4badMahSGLaDfzGoFQgAAAF7CR10gQAABAMARzBFAiBB 22 | naWMBFJeNPJnZatbhNPmkV9bxiAhc/wAAY17PGsCDAIhAM5ho0LknpV4Mu8KbaHb 23 | uGkS4kqq0Xlj9o0EzsBDfqxFAHYA3Zk0/KXnJIDJVmh9gTSZCEmySfe1adjHvKs/ 24 | XMHzbmQAAAF7CR12dAAABAMARzBFAiEAyjJRBCeyy+2QVZEIIKFha6p9IIGMNbEO 25 | CUBmPe3pkX0CIE5g+zlZ3Sv6yoV7FGQP54pC8f/VLbbWPqpN350ls/7cMA0GCSqG 26 | SIb3DQEBCwUAA4IBAQCOF8x80zYy62OsyjDeJhT4Qzc04yIwMHY7JBDd1a9VqZ2S 27 | HU5z0wkLWjOljMqQ/ilT17d7hNUgSIe7ySh5H24OlwoSjMi21EDIWflP5k6BpNMI 28 | Cd0OBgbO6u2IJtxpbEQ9ItXt0XkybxwCaHhtJ7tZBIV66WDpkJQrGagTc60AKNhK 29 | 7njItel21ptoeewjd05npXdxx8p0E8fwqfWRJFCdyGLKIYzx7iEGqP5FbfxuBvla 30 | JvOr3gFSpx7/Nt+QoVC9zq6qwcMZi8SI0F7D3fKppIA4V2bxXUHQOPRxqdBgTLor 31 | 49EFobCAuhTP4lE5GxrfdO17Xp2aBLf2e5r0LpND 32 | -----END CERTIFICATE----- 33 | -----BEGIN CERTIFICATE----- 34 | MIIFUDCCAzigAwIBAgIRAKLompG71lfO4fH4/13saQYwDQYJKoZIhvcNAQELBQAw 35 | OTELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCWdvb2QgZ3V5czEWMBQGA1UEAxMNQ0Eg 36 | cm9vdCAoUlNBKTAeFw0yMDAxMDExMjAwMDBaFw00MDAxMDExMjAwMDBaMEMxCzAJ 37 | BgNVBAYTAlVTMRIwEAYDVQQKEwlnb29kIGd1eXMxIDAeBgNVBAMTF0NBIGludGVy 38 | bWVkaWF0ZSAoUlNBKSBBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA 39 | tdBBK6gfh77lUexax3cOgEMl94ODtStoHlxXXB1QZIQLlqsHhaTby+32vogMpyVa 40 | YtYcBAY6x6X21qRPJpYX5fGU7wMEiEai8EWwxflLvGrzj8VkIADRoKkSKDhDeEBb 41 | oPZlLw+lRmd/BjIEc1mAyUG8DUSep1LNHJa5Ulx6Z/TpV71HmR3DnlqGZXdgKmX9 42 | UAIJUerGn8fPVcGizFRtK6BV4w88otizvWQ/nltxkub7K54hvcKlvUA7N5u56cdR 43 | vIsKciov8P7cJ2J9wO606GsztOgqJ3HvxR9PBUWAtqD+D/r4q+wb1B0MtvniGqJo 44 | dX7pGwfNUhBy7H45ypxV+QIDAQABo4IBRzCCAUMwDgYDVR0PAQH/BAQDAgGGMB0G 45 | A1UdJQQWMBQGCCsGAQUFBwMCBggrBgEFBQcDATASBgNVHRMBAf8ECDAGAQH/AgEA 46 | MB0GA1UdDgQWBBTk1QxJK6gH2brnaC2Doqp+v2+pKTAfBgNVHSMEGDAWgBS6Dxvq 47 | tq6ikrb5C1vuYrqqHqaHcDBYBggrBgEFBQcBAQRMMEowIwYIKwYBBQUHMAGGF2h0 48 | dHA6Ly9leGFtcGxlLmNvbS9vY3NwMCMGCCsGAQUFBzAChhdodHRwOi8vZXhhbXBs 49 | ZS5jb20vcm9vdDAnBgNVHR8EIDAeMBygGqAYhhZodHRwOi8vZXhhbXBsZS5jb20v 50 | Y3JsMDsGA1UdIAQ0MDIwBAYCKgMwKgYCLQYwJDAiBggrBgEFBQcCARYWaHR0cDov 51 | L2V4YW1wbGUuY29tL2NwczANBgkqhkiG9w0BAQsFAAOCAgEAeayTk2bni4WKrx4p 52 | 2PgFYf+YmfufvMHbSWsOVH4iePAiTr1xft8IpA/Tv6WmU25lZTslw2kKFSZaTQ/5 53 | YsjR7V9dZGMt/Npglhqm1/gOYL51FoNf9sQRsrfRYcNDYt5F6VPQ33z/QmD85ASq 54 | wM9qcJNKo9Tr5nh+C8HbASG5dddPygytcLNQ7R21AOKythHZt9pknLn4/FL7Bqej 55 | dRqvRSOpb8yQT9IlF2XF6hUX6SWaswpU6peTXa1hP4sUmTaUfaO7SgF8WJdBZral 56 | rY29aKYe59D0pwIbf4WNWWX2DSa7fdvKO1dgYZTM+tzvHiJ11JIBjJyAVxGwpSJq 57 | FqZ5LUfVKCnDLuFMpKKrjgHs5XzjqtNwcU4l85ekLTBlSso+8iQLHAhG7LHAPcKV 58 | dGpkApN8ubXIxF2+Zj2tkpUTKv77Jriom5Jh/CPU9nQM22pquBTYOztXLN5SCL4x 59 | TwpNQ215muUvKveKBsE4gxUWIDI2gH0pmBbm9VJ090oXUJt39LGijD8rsbfzDcHz 60 | jN6fVnpxGG6JMinry0JZO/dgQP5kH+AAATkvniAGzCzF4ghCb5j+YlngADRdsW9m 61 | CvSLlKRvXQ3iN1e6Bexn9IaatUzgxLgDmOI7NCl5KFlWvhZkZiMnPTsKoGP4m/6U 62 | +tmD1GfhXpfU+KlbYL0F9xZcOEM= 63 | -----END CERTIFICATE----- 64 | 65 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/empty.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/parchments.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "/contexts/Parchment", 3 | "@id": "/parchments", 4 | "@type": "hydra:Collection", 5 | "hydra:member": [], 6 | "hydra:totalItems": 0 7 | } 8 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/playlist.m3u: -------------------------------------------------------------------------------- 1 | #EXTM3U 2 | #EXT-X-VERSION:3 3 | #EXT-X-INDEPENDENT-SEGMENTS 4 | #EXT-X-STREAM-INF:BANDWIDTH=2197800,AVERAGE-BANDWIDTH=2340800,CODECS="avc1.64001f,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=30.000 5 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_2000-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 6 | #EXT-X-STREAM-INF:BANDWIDTH=1375000,AVERAGE-BANDWIDTH=1460800,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000 7 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_1200-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 8 | #EXT-X-STREAM-INF:BANDWIDTH=3226300,AVERAGE-BANDWIDTH=3440800,CODECS="avc1.64001f,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=30.000 9 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_3000-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 10 | #EXT-X-STREAM-INF:BANDWIDTH=860750,AVERAGE-BANDWIDTH=910800,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000 11 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_700-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 12 | #EXT-X-STREAM-INF:BANDWIDTH=552200,AVERAGE-BANDWIDTH=580800,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000 13 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_400-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 14 | #EXT-X-STREAM-INF:BANDWIDTH=395780,AVERAGE-BANDWIDTH=422400,CODECS="avc1.64000d,mp4a.40.2",RESOLUTION=416x234,FRAME-RATE=30.000 15 | hdntl=exp=1643972337~acl=%2f*~id=4f438024-f654-4af3-9c6a-101482650e58~data=hdntl~hmac=8c42f3c7b0dd204018864ea1d327f3d3d11760a1921b3a0ab0167da03819320c/master_256-archive.m3u8?aka_me_session_id=AAAAAAAAAADxBv1hAAAAADTfiy+oginOE4jN+hFJrIKRPQ67pCViSQHn8EchSozFyglB+7GJKsiCvCbjdp2kPr1SQn0oYjH3&aka_media_format_type=hls&startTime=1643803194&endTime=1643809839 16 | -------------------------------------------------------------------------------- /src/tests/data/largesets/ascii/simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "list_of_things": [ 4 | { 5 | "id": 1, 6 | "tags": [ 7 | "test 1", 8 | "test 2", 9 | "test 3" 10 | ] 11 | }, 12 | { 13 | "id": 2, 14 | "tags": [ 15 | "test 4", 16 | "test 5", 17 | "test 6" 18 | ] 19 | } 20 | ] 21 | }, 22 | "meta": "Hey friends!" 23 | } -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/0804.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/0804.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/_chromium_Big5_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/_chromium_Big5_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/blog.worren.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/blog.worren.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/carbonxiv.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/carbonxiv.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/catshadow.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/catshadow.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/coolloud.org.tw.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/coolloud.org.tw.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/digitalwall.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/digitalwall.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/ebao.us.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/ebao.us.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/fudesign.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/fudesign.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/kafkatseng.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/kafkatseng.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/ke207.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/ke207.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/leavesth.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/leavesth.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/letterlego.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/letterlego.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/linyijen.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/linyijen.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/marilynwu.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/marilynwu.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/myblog.pchome.com.tw.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/myblog.pchome.com.tw.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/oui-design.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/oui-design.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/sanwenji.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/sanwenji.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/sinica.edu.tw.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/sinica.edu.tw.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/sylvia1976.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/sylvia1976.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/tlkkuo.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/tlkkuo.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/unoriginalblog.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/unoriginalblog.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/upsaid.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/upsaid.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/willythecop.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/willythecop.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/big5/ytc.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/big5/ytc.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/aivy.co.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/aivy.co.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/akaname.main.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/akaname.main.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/arclamp.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/arclamp.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/aristrist.s57.xrea.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/aristrist.s57.xrea.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/artifact-jp.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/artifact-jp.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/atom.ycf.nanet.co.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/atom.ycf.nanet.co.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/azito.under.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/azito.under.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/azoz.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/azoz.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/blog.kabu-navi.com.atom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/blog.kabu-navi.com.atom.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/blog.kabu-navi.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/blog.kabu-navi.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/bphrs.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/bphrs.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/ch.kitaguni.tv.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/ch.kitaguni.tv.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/club.h14m.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/club.h14m.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/contents-factory.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/contents-factory.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/furusatonoeki.cutegirl.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/furusatonoeki.cutegirl.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/manana.moo.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/manana.moo.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/mimizun.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/mimizun.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/misuzilla.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/misuzilla.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/overcube.com.atom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/overcube.com.atom.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/overcube.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/overcube.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/pinkupa.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/pinkupa.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/rdf.ycf.nanet.co.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/rdf.ycf.nanet.co.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/siesta.co.jp.aozora.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/siesta.co.jp.aozora.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/tls.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/tls.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-jp/yukiboh.moo.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-jp/yukiboh.moo.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/_chromium_windows-949_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/_chromium_windows-949_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/_ude_euc1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/_ude_euc1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/_ude_euc2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/_ude_euc2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/acnnewswire.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/acnnewswire.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/alogblog.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/alogblog.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/arts.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/arts.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/birder.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/birder.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/blog.bd-lab.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/blog.bd-lab.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/blog.empas.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/blog.empas.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/blog.rss.naver.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/blog.rss.naver.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/calmguy.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/calmguy.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/chisato.info.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/chisato.info.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/console.linuxstudy.pe.kr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/console.linuxstudy.pe.kr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/critique.or.kr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/critique.or.kr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/epitaph.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/epitaph.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/ittrend.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/ittrend.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/jely.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/jely.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/jely.pe.kr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/jely.pe.kr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/jowchung.oolim.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/jowchung.oolim.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/kina.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/kina.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/lennon81.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/lennon81.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/oroll.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/oroll.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/poliplus.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/poliplus.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/scarletkh2.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/scarletkh2.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/siwoo.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/siwoo.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/sparcs.kaist.ac.kr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/sparcs.kaist.ac.kr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/tori02.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/tori02.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/willis.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/willis.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/xenix.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/xenix.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/yunho.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/yunho.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/euc-kr/zangsalang.egloos.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/euc-kr/zangsalang.egloos.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gb18030/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gb18030/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/14.blog.westca.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/14.blog.westca.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/2.blog.westca.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/2.blog.westca.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/_chromium_gb18030_with_no_encoding_specified.html.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/_chromium_gb18030_with_no_encoding_specified.html.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/acnnewswire.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/acnnewswire.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/bbs.blogsome.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/bbs.blogsome.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/cappuccinos.3322.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/cappuccinos.3322.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/chen56.blogcn.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/chen56.blogcn.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/cindychen.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/cindychen.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/cnblog.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/cnblog.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/coverer.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/coverer.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/eighthday.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/eighthday.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/godthink.blogsome.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/godthink.blogsome.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/jjgod.3322.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/jjgod.3322.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/lily.blogsome.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/lily.blogsome.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/luciferwang.blogcn.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/luciferwang.blogcn.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/pda.blogsome.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/pda.blogsome.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/softsea.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/softsea.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/w3cn.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/w3cn.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/gbk/xy15400.blogcn.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/gbk/xy15400.blogcn.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/aif.ru.health.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/aif.ru.health.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/aug32.hole.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/aug32.hole.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/aviaport.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/aviaport.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/blog.mlmaster.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/blog.mlmaster.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/forum.template-toolkit.ru.1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/forum.template-toolkit.ru.1.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/forum.template-toolkit.ru.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/forum.template-toolkit.ru.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/forum.template-toolkit.ru.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/forum.template-toolkit.ru.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/forum.template-toolkit.ru.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/forum.template-toolkit.ru.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/forum.template-toolkit.ru.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/forum.template-toolkit.ru.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/greek.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/greek.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/intertat.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/intertat.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/janulalife.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/janulalife.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/kapranoff.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/kapranoff.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/money.rin.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/money.rin.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/music.peeps.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/music.peeps.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/newsru.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/newsru.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/ibm866/susu.ac.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/ibm866/susu.ac.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-2022-jp/_ude_1.txt: -------------------------------------------------------------------------------- 1 | ======================================================================== 2 | $B%3%s%=!<%k(J $B%"%W%j%1!<%7%g%s(J : universalchardet $B%W%m%8%'%/%H$N35MW(J 3 | ======================================================================== 4 | 5 | $B$3$N(J universalchardet $B%"%W%j%1!<%7%g%s$O!"(JAppWizard $B$K$h$C$F:n@.$5$l$^$7$?!#(J 6 | 7 | $B$3$N%U%!%$%k$K$O!"(Juniversalchardet $B%"%W%j%1!<%7%g%s$r9=@.$9$k3F%U%!%$%k$N(J 8 | $BFbMF$N35N,$,5-=R$5$l$F$$$^$9!#(J 9 | 10 | 11 | universalchardet.vcproj 12 | $B$3$l$O!"%"%W%j%1!<%7%g%s(J $B%&%#%6!<%I$G@8@.$5$l$k(J VC++ $B%W%m%8%'%/%H$N%a%$%s$N(J 13 | $B%W%m%8%'%/%H(J $B%U%!%$%k$G$9!#(J 14 | $B%U%!%$%k$r@8@.$7$?(J Visual C++ $B$N%P!<%8%g%s>pJs$H!"%"%W%j%1!<%7%g%s(J 15 | $B%&%#%6!<%I$GA*Br$7$?%W%i%C%H%U%)!<%`!"9=@.!"$*$h$S%W%m%8%'%/%H$N5!G=$K4X$9$k(J 16 | $B>pJs$,5-=R$5$l$F$$$^$9!#(J 17 | 18 | universalchardet.cpp 19 | $B$3$l$O!"%a%$%s$N%"%W%j%1!<%7%g%s(J $B%=!<%9(J $B%U%!%$%k$G$9!#(J 20 | 21 | ///////////////////////////////////////////////////////////////////////////// 22 | $B$=$NB>$NI8=`%U%!%$%k(J : 23 | 24 | StdAfx.h, StdAfx.cpp 25 | $B$3$l$i$N%U%!%$%k$O!"%3%s%Q%$%k:Q$_%X%C%@!<(J (PCH) $B%U%!%$%k(J 26 | universalchardet.pch $B$H%W%j%3%s%Q%$%k:Q$_7?%U%!%$%k(J StdAfx.obj $B$r(J 27 | $B%S%k%I$9$k$?$a$K;HMQ$7$^$9!#(J 28 | 29 | ///////////////////////////////////////////////////////////////////////////// 30 | $B$=$NB>$N%a%b(J : 31 | 32 | AppWizard $B$G$O(J "TODO:" $B%3%a%s%H$r;HMQ$7$F!"%f!<%6!<$,DI2C$^$?$O%+%9%?%^%$%:$9$k(J 33 | $B%=!<%9ItJ,$r<($7$^$9!#(J 34 | 35 | ///////////////////////////////////////////////////////////////////////////// 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_mozilla_bug421271_text.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_mozilla_bug421271_text.html -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_4.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_5.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/_ude_6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/_ude_6.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/anzeige-value-stars.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/anzeige-value-stars.html -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-1,windows-1252/github_bug_9.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-1,windows-1252/github_bug_9.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_10.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_10.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_4.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_5.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_6.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_7.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_8.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/_ude_9.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/_ude_9.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/auto-apro.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/auto-apro.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/cigartower.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/cigartower.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/escience.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/escience.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/hirtv.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/hirtv.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/honositomuhely.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/honositomuhely.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/saraspatak.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/saraspatak.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mk.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mk.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mv.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.mv.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/shamalt.uw.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-2/ugyanmar.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-2/ugyanmar.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-4/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-4/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/_chromium_ISO-8859-5_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/_chromium_ISO-8859-5_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/aero-bg.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/aero-bg.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/aif.ru.health.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/aif.ru.health.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/aug32.hole.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/aug32.hole.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/aviaport.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/aviaport.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bbc.co.uk.popshow.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bbc.co.uk.popshow.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/blog.mlmaster.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/blog.mlmaster.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bpm.cult.bg.2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bpm.cult.bg.2.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bpm.cult.bg.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bpm.cult.bg.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bpm.cult.bg.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bpm.cult.bg.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bpm.cult.bg.medusa.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bpm.cult.bg.medusa.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/bpm.cult.bg.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/bpm.cult.bg.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/debian.gabrovo.com.news.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/debian.gabrovo.com.news.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/debian.gabrovo.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/debian.gabrovo.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/doncho.net.comments.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/doncho.net.comments.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/ecloga.cult.bg.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/ecloga.cult.bg.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.1.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/forum.template-toolkit.ru.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/greek.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/greek.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/ide.li.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/ide.li.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/intertat.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/intertat.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/janulalife.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/janulalife.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/kapranoff.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/kapranoff.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/linux-bg.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/linux-bg.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/money.rin.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/money.rin.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/music.peeps.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/music.peeps.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/newsru.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/newsru.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-5/susu.ac.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-5/susu.ac.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-6/_chromium_ISO-8859-6_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-6/_chromium_ISO-8859-6_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-6/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-6/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/_chromium_ISO-8859-7_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/_chromium_ISO-8859-7_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/_ude_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/_ude_3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/_ude_greek.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/_ude_greek.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/disabled.gr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/disabled.gr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/hotstation.gr.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/hotstation.gr.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.bus.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.bus.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.cmm.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.cmm.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.fin.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.fin.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.mrk.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.mrk.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.mrt.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.mrt.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.spo.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.spo.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/iso-8859-7/naftemporiki.gr.wld.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/iso-8859-7/naftemporiki.gr.wld.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/_chromium_KOI8-R_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/_chromium_KOI8-R_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/aif.ru.health.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/aif.ru.health.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/aug32.hole.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/aug32.hole.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/aviaport.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/aviaport.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/blog.mlmaster.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/blog.mlmaster.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.1.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/forum.template-toolkit.ru.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/greek.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/greek.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/intertat.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/intertat.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/janulalife.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/janulalife.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/kapranoff.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/kapranoff.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/koi.kinder.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/koi.kinder.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/money.rin.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/money.rin.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/music.peeps.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/music.peeps.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/newsru.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/newsru.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/koi8-r/susu.ac.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/koi8-r/susu.ac.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/10e.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/10e.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/1affliate.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/1affliate.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/_ude_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/_ude_3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/_ude_4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/_ude_4.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/accessories-brand.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/accessories-brand.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/amefoot.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/amefoot.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/andore.com.inami.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/andore.com.inami.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/andore.com.money.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/andore.com.money.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/andore.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/andore.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/blog.inkase.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/blog.inkase.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/blog.paseri.ne.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/blog.paseri.ne.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/bloglelife.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/bloglelife.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/brag.zaka.to.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/brag.zaka.to.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/celeb.lalalu.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/celeb.lalalu.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/clickablewords.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/clickablewords.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/do.beginnersrack.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/do.beginnersrack.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/dogsinn.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/dogsinn.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/grebeweb.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/grebeweb.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/milliontimes.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/milliontimes.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/moon-light.ne.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/moon-light.ne.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/nextbeaut.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/nextbeaut.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/ooganemochi.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/ooganemochi.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/perth-on.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/perth-on.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/sakusaka-silk.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/sakusaka-silk.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/setsuzei119.jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/setsuzei119.jp.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/tamuyou.haun.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/tamuyou.haun.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/shift_jis/yasuhisa.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/shift_jis/yasuhisa.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-16be/bom-utf-16-be.srt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/utf-16be/bom-utf-16-be.srt -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-16le/bom-utf-16-le.srt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/utf-16le/bom-utf-16-le.srt -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_chromium_UTF-8_with_no_encoding_specified.html: -------------------------------------------------------------------------------- 1 | 2 | UTF-8 3 | 4 | 麦蒂的天赋极高,即使是著名的“麦黑”弗兰.布林巴里也直率的表示,麦蒂的天赋足可以排进NBA的前10位。但是在麦蒂的职业生涯中,他却从未取得比季后赛第一轮更好的成绩,现在还甚至被广大的球迷和专家口诛笔伐称为“毒瘤”,称他不在场上的时候火箭反而能够打得更好——作为一个年薪2000万超级球星来说,麦蒂似乎已经失去了他的天赋所应该给他和球队带来的场上价值。 5 | 6 | “麦蒂一直认为,他的天赋可以让他做到一些事情。”在与凯尔特人的比赛之后,阿泰斯特如是说。“但是我们有时候会忘记,其实努力比天赋更重要,我们必须努力才行。” 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_mozilla_bug306272_text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 306272 5 | 6 | 7 | 8 | Antti Näyhä <Antti.Nayha@somewhere.fi> 9 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_mozilla_bug426271_text-utf-8.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 日本語エンコードテスト 5 | 6 | 7 | これはUTF-8です昔々、ある所に子供のいない老夫婦が住んでいた。ある日、お婆さんが川で洗濯をしていると、大きな桃が流れて来たので、お爺さんと食べようと持ち帰った。二人で桃を割ると中から男の子が生まれたので、「桃太郎」と名付けて大事に育てた。 8 | 9 | 成長した桃太郎は、鬼ヶ島の鬼が人々を苦しめていることを知り、鬼退治を決意する。両親から黍団子を餞別に貰い、道中にそれを分け与えてイヌ、サル、キジを家来に従える。鬼ヶ島で鬼と戦い、見事に勝利を収め、鬼が方々から奪っていった財宝を持ち帰り、お爺さん・お婆さんの元に返り、幸せに暮らしたという。出典: フリー百科事典『ウィキペディア(Wikipedia)』 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_1.txt: -------------------------------------------------------------------------------- 1 | 역사적 예수 연구자들은 복음서나 사도들의 서신서 속의 교리적 예수가 아닌, 역사적 인간으로서의 예수를 추구한다. 20세기 이후 역사적 예수에 대한 연구는 마커스 보그, 가톨릭 수사 출신으로 환속한 도미닉 크로산 등의 예수 세미나 운동 시작을 통해 진행되고 있다. 대한민국에서는 한국 기독교 연구소(소장 김준우)에서 크로산 등의 신학 문서들을 출판하여, 역사적 예수에 대한 연구 성과들을 소개하고 있다. 2 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_2.txt: -------------------------------------------------------------------------------- 1 | 북조선 사람들은 흔히 자국을 조선(朝鮮)이나 공화국(共和國)이라고 부른다. 지역적으로 한반도의 남쪽(대한민국)을 의미하는 남조선(南朝鮮)에 대응하여 북조선(北朝鮮)이라고도 부른다. 2 | 3 | 대한민국에서는 남·북의 대치 상황과 맞물려 공식 명칭인 '조선민주주의인민공화국'을 잘 사용하지 않는다. 한국 사람들은 대개 ‘북한(北韓)’이라고 부르며, 짧게 ‘북’으로 부를 때도 있다. 나이가 많은 세대에서는 간혹 ‘이북(以北)’이라는 표현을 쓰기도 하며, 제한적으로 '북조선'이라 부르기도 한다. 과거 대한민국 정부에서는 북조선(북한)을 옛 소련의 괴뢰정권으로 비하하는 ‘북괴(北傀)’로 비칭하였으나 관계 개선과 함께 잘 쓰이지 않게 되었다.[3] 간혹 일부 반공주의 보수단체들은 북괴라는 표현을 쓰기도 한다. 4 | 5 | 대한민국에서는 조선이나 북조선이란 표현을 기피하며[출처 필요], 조선은 조선 왕조나 단군조선 등을 가리키는 말로 쓴다. 남북간의 교류가 활성화되면서 특별한 색채가 없는 ‘북측’이나 ‘북쪽’이란 표현도 많이 쓰이고 있는 추세이다. 대한민국에서는 한국 전쟁 이전의 행정 구역인 평안남도·평안북도·함경남도·함경북도·황해도를 ‘이북 5도(以北五道)’라 부르기도 한다. 6 | 7 | 공식적인 영어 명칭은 DPRK(Democratic People's Republic of Korea)이며 보통 '조선반도(한반도) 북쪽'을 의미하는 North Korea라고 부른다. 8 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_3.txt: -------------------------------------------------------------------------------- 1 | \\\\\\\{ssss } siaaaaaaaaa ssssi à è ì 2 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_4.txt: -------------------------------------------------------------------------------- 1 | ======================================================================== 2 | コンソール アプリケーション : universalchardet プロジェクトの概要 3 | ======================================================================== 4 | 5 | この universalchardet アプリケーションは、AppWizard によって作成されました。 6 | 7 | このファイルには、universalchardet アプリケーションを構成する各ファイルの 8 | 内容の概略が記述されています。 9 | 10 | 11 | universalchardet.vcproj 12 | これは、アプリケーション ウィザードで生成される VC++ プロジェクトのメインの 13 | プロジェクト ファイルです。 14 | ファイルを生成した Visual C++ のバージョン情報と、アプリケーション 15 | ウィザードで選択したプラットフォーム、構成、およびプロジェクトの機能に関する 16 | 情報が記述されています。 17 | 18 | universalchardet.cpp 19 | これは、メインのアプリケーション ソース ファイルです。 20 | 21 | ///////////////////////////////////////////////////////////////////////////// 22 | その他の標準ファイル : 23 | 24 | StdAfx.h, StdAfx.cpp 25 | これらのファイルは、コンパイル済みヘッダー (PCH) ファイル 26 | universalchardet.pch とプリコンパイル済み型ファイル StdAfx.obj を 27 | ビルドするために使用します。 28 | 29 | ///////////////////////////////////////////////////////////////////////////// 30 | その他のメモ : 31 | 32 | AppWizard では "TODO:" コメントを使用して、ユーザーが追加またはカスタマイズする 33 | ソース部分を示します。 34 | 35 | ///////////////////////////////////////////////////////////////////////////// 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_5.txt: -------------------------------------------------------------------------------- 1 | 仙人洞文化係話到萬年大源盆地一隻叫仙人洞嗰溶洞發現嗰史前文化。九十年代到許裡尋到嘍距今距今1萬年嗰穀植矽石標本,咁一吖子就搦人類栽禾嗰歷史提早嘍5000年,仙人洞遺跡也就成為世界頭上嗰「稻作之源」。 2 | 3 | 萬年仙人洞人種出世界首棵水稻 贛鄱是世界的稻作起源中心區 4 | 5 | 1隻分類: 江西嗰歷史 6 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_6.txt: -------------------------------------------------------------------------------- 1 | English is a West Germanic language originally spoken by the inhabitants of early medieval England.[3][4][5] It is named after the Angles, one of the ancient Germanic peoples that migrated to the area of Great Britain, which later took their name, England. Both names derive from Anglia, a peninsula on the Baltic Sea. English is most closely related to Frisian and Low Saxon, while its vocabulary has been significantly influenced by other Germanic languages, particularly Old Norse (a North Germanic language), as well as Latin and French.[6][7][8] 2 | 3 | English has developed over the course of more than 1,400 years. The earliest forms of English, a group of West Germanic (Ingvaeonic) dialects brought to Great Britain by Anglo-Saxon settlers in the 5th century, are collectively called Old English. Middle English began in the late 11th century with the Norman conquest of England; this was a period in which English was influenced by Old French, in particular through its Old Norman dialect.[9][10] Early Modern English began in the late 15th century with the introduction of the printing press to London, the printing of the King James Bible and the start of the Great Vowel Shift.[11] 4 | 5 | Modern English has been spreading around the world since the 17th century by the worldwide influence of the British Empire and the United States. Through all types of printed and electronic media of these countries, English has become the leading language of international discourse and the lingua franca in many regions and professional contexts such as science, navigation and law.[3] Modern English grammar is the result of a gradual change from a typical Indo-European dependent marking pattern, with a rich inflectional morphology and relatively free word order, to a mostly analytic pattern with little inflection, a fairly fixed subject–verb–object word order and a complex syntax.[12] Modern English relies more on auxiliary verbs and word order for the expression of complex tenses, aspect and mood, as well as passive constructions, interrogatives and some negation. 6 | 7 | English is the largest language by number of speakers,[13] and the third most-spoken native language in the world, after Standard Chinese and Spanish.[14] It is the most widely learned second language and is either the official language or one of the official languages in almost 60 sovereign states. There are more people who have learned it as a second language than there are native speakers. As of 2005, it was estimated that there were over 2 billion speakers of English.[15] English is the majority native language in the United States, the United Kingdom, Canada, Australia, New Zealand and Ireland, an official and the main language of Singapore, and it is widely spoken in some areas of the Caribbean, Africa, South Asia, Southeast Asia, and Oceania.[16] It is a co-official language of the United Nations, the European Union and many other world and regional international organisations. It is the most widely spoken Germanic language, accounting for at least 70% of speakers of this Indo-European branch. English speakers are called "Anglophones". There is much variability among the many accents and dialects of English used in different countries and regions—in terms of phonetics and phonology, and sometimes also vocabulary, idioms, grammar, and spelling— but it does not typically prevent understanding by speakers of other dialects and accents, although mutual unintelligibility can occur at extreme ends of the dialect continuum. 8 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_greek.txt: -------------------------------------------------------------------------------- 1 | Η ελληνική αποτελεί τη μητρική γλώσσα περίπου 12 εκατομμυρίων ανθρώπων, κυρίως στην Ελλάδα και στην Κύπρο. Αποτελεί επίσης την μητρική γλώσσα αυτοχθόνων πληθυσμών στην Αλβανία, στη Βουλγαρία, στην ΠΓΔΜ και στην Τουρκία. Εξαιτίας της μετανάστευσης η γλώσσα μιλιέται ακόμα σε χώρες-προορισμούς ελληνόφωνων πληθυσμών μεταξύ των οποίων η Αυστραλία, ο Καναδάς, η Γερμανία, το Ηνωμένο Βασίλειο, η Ρωσία, η Σερβία και οι Ηνωμένες Πολιτείες. Συνολικά υπολογίζεται ότι ο συνολικός αριθμός ανθρώπων που μιλάνε τα ελληνικά σαν πρώτη ή δεύτερη γλώσσα είναι γύρω στα 20 εκατομμύρια. 2 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_he1.txt: -------------------------------------------------------------------------------- 1 | השם עבר מופיע בתנ"ך כשמו של סבו של אברהם אבינו. המושג "עברי" נזכר בתנ"ך פעמים רבות, אולם שפתם של העברים אינה נקראת עברית. כיום מכנים את שפת התנ"ך "לשון המקרא" (או "לשון הקודש") כדי להבדיל אותה מלשון חז"ל המכונה גם "לשון חכמים", שהיא בעצם ניב מאוחר של עברית. המונח כתב עברי מציין בלשונם של חז"ל דווקא את הכתב הארמי על שם "עבר הנהר". 2 | 3 | הקובץ המפורסם ביותר שנכתב בשפה העברית הוא התנ"ך, אם כי בו עצמו לא נזכר שמה של השפה. עם זאת, במלכים ב' יח, כו, ובישעיהו לו, יא, מסופר כי שליחי חזקיהו המלך מבקשים מרבשקה, שליחו של סנחריב מלך אשור, לדבר עמם ב"ארמית" ולא ב"יהודית", כדי שהעם (שכנראה לא דיבר ארמית) לא יבין את דבריהם, ונראה שזה היה שמה של השפה, או לפחות שמו של הניב שדובר באזור ירושלים. 4 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_he2.txt: -------------------------------------------------------------------------------- 1 | העברית היא שפה המשתייכת לקבוצת הלשונות השמיות הצפון מערביות, ומהווה את אחד הדיאלקטים של השפה הכנענית. שפה זו הייתה מדוברת החל מהאלף ה-2 לפני הספירה באזור הקרוי הלבנט, שהיום נמצא בשטחן של לבנון, סוריה, ארץ ישראל וירדן. טקסטים מהתקופה הזו שהתגלו בירדן ובלבנון חושפים קרבה רבה בין השפה העברית לשפה הפיניקית והמואבית. 2 | 3 | בעברית נכתבו רוב ספרי התנ"ך, כל המשנה, רוב הספרים החיצוניים ורוב המגילות הגנוזות. המקרא נכתב בעברית מקראית, ואילו המשנה נכתבה בניב הקרוי לשון חז"ל. בתקופה מסוימת בסוף המאה ה-2 לספירה או קצת מאוחר יותר (החוקרים חלוקים בשאלה זו) פסקו רוב היהודים מלהשתמש בעברית כבשפת דיבור. מאות שנים לאחר חתימת המשנה כאשר חדלו היהודים להשתמש בעברית כבר נכתבו התלמודים בארמית. עם זאת ישנן עדויות שאף במאה ה-8 לספירה שפת הדיבור בטבריה שם פעלו בעלי המסורה הייתה עברית. 4 | 5 | גם כשהשפה העברית לא שימשה שפת דיבור, עדיין שימשה לאורך הדורות, במה שמכונה תקופת הביניים של העברית, כשפת הכתב העיקרית של היהודים, בעיקר בעניינים הלכתיים: כתיבת פרוטוקולים של בתי דין, קובצי הלכות, פרשנות לכתבי קודש ועוד. גם כתיבת מכתבים וחוזים בין גברים יהודים נעשתה לעתים קרובות בעברית. ספרות הלכתית לנשים בקהילות אשכנזיות נכתבה ביידיש (למשל ספר ההלכות "צאינה וראינה"), כיוון שהנשים, בניגוד לגברים, לא למדו עברית. חיבורים יהודיים בעלי אופי חילוני או לא-הלכתי נכתבו בשפות יהודיות או בשפות זרות, לדוגמה: הרמב"ם כתב את ספרו "משנה תורה" בעברית, על אף שספרו הפילוסופי המפורסם "מורה נבוכים" שיועד למשכילי זמנו נכתב בערבית יהודית. עם זאת, "מורה נבוכים", כמו ספרים אחרים בנושאים חילוניים, תורגמו לעברית כשהיה בהם עניין לקהילות יהודיות דוברות שפות אחרות. אחת המשפחות היהודיות המפורסמות שעסקו בתרגום מערבית-יהודית לעברית בימי הביניים היא משפחת אבן תיבון. 6 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_he3.txt: -------------------------------------------------------------------------------- 1 | אין כמעט ניבים אזוריים עבריים. למעשה, השפה הנשמעת בפי דוברים ילידיים זהה כמעט בכל חלקי ישראל. אפשר להבחין בשוני בין הניבים המדוברים בפי עדות יהודיות שונות (אתנולקטים), אולם שוני זה מתבטא בעיקר בפונולוגיה, ולא בתחביר או במורפולוגיה. שוני מסוים בתחביר ובמורפולוגיה קיים בין ניבים מעמדיים של השפה (סוציולקטים), אולם שוני זה אינו גדול (יחסית). 2 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/_ude_russian.txt: -------------------------------------------------------------------------------- 1 | В гимназии он не был в числе первых учеников (исключение составляли математика и латынь). Укоренившаяся система механического заучивания материала учащимися (которая, как он считал, наносит вред самому духу учёбы и творческому мышлению), а также авторитарное отношение учителей к ученикам вызывало у Альберта Эйнштейна неприятие, поэтому он часто вступал в споры со своими преподавателями. 2 | 3 | После окончательного разорения отца семейства в 1894 году Эйнштейны переехали из Мюнхена в итальянский город Павию, близ Милана. Сам Альберт оставался в Мюнхене ещё некоторое время, чтобы окончить все шесть классов гимназии. Так и не получив аттестата зрелости, в 1895 году он присоединился к своей семье в Милане. 4 | 5 | Осенью 1895 г. Альберт Эйнштейн прибыл в Швейцарию, чтобы сдать вступительные экзамены в Высшее техническое училище (Политехникум) в Цюрихе и стать преподавателем физики. Блестяще проявив себя на экзамене по математике, он в то же время провалил экзамены по ботанике и французскому языку, что не позволило ему поступить в Цюрихский Политехникум. Однако директор училища посоветовал молодому человеку поступить в выпускной класс школы в Аарау (Швейцария), чтобы получить аттестат и повторить поступление. 6 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/bom-utf-8.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:06,500 --> 00:00:09,000 3 | About 2 months ago I found myself on 4 | the comment section of YouTube 5 | 6 | 2 7 | 00:00:11,000 --> 00:00:17,000 8 | And I was commenting, 9 | unfortunately I was commenting, 10 | on a video about the famous Ayn Rand 11 | 12 | 3 13 | 00:00:19,000 --> 00:00:24,000 14 | And I 15 | posted underneath against 16 | this woman's tirades, 17 | against what is essentially 18 | the human race. 19 | 20 | 4 21 | 00:00:25,000 --> 00:00:31,000 22 | that, this monetary system seems to have no point, seems to actually hinder people 23 | 24 | 5 25 | 00:00:31,000 --> 00:00:36,000 26 | and hinder progress, and one of the responses I got, I didn't answer it, was: 27 | 28 | 6 29 | 00:00:37,000 --> 00:00:43,000 30 | what actually money creates is an incentive to invent the new items, that's the driving force behind it 31 | 32 | 7 33 | 00:00:43,000 --> 00:00:50,000 34 | So what I thought I do is instead if answering on a YouTube comment is organize a global awareness day 35 | 36 | -------------------------------------------------------------------------------- /src/tests/data/largesets/utf-8/howto.diveintomark.org.xml: -------------------------------------------------------------------------------- 1 | 2 | howto.diveintomark.org 3 | 1 out of 3 ain't bad 4 | tag:howto.diveintomark.org,2005:0 5 | 6 | 7 | 2005-11-05T05:02:33Z 8 | Copyright 2005, licensed under the Creative Commons Attribution-ShareAlike 2.5 license 9 | 10 | Mark Pilgrim 11 | mark@diveintomark.org 12 | http://diveintomark.org/ 13 | 14 | 15 | <![CDATA[HOWTO Use Your Mac From Anywhere]]> 16 | 17 | tag:howto.diveintomark.org,2005:6 18 | 2005-11-03T21:28:59Z 19 | 2005-11-03T21:28:59Z 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | <![CDATA[HOWTO Backup Your DVD Movies]]> 36 | 37 | tag:howto.diveintomark.org,2005:4 38 | 2005-10-25T13:41:50Z 39 | 2005-10-25T13:41:50Z 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | <![CDATA[HOWTO Put Porn On Your iPod]]> 52 | 53 | tag:howto.diveintomark.org,2005:3 54 | 2005-10-14T03:41:13Z 55 | 2005-10-14T03:41:13Z 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | <![CDATA[HOWTO Rip DVD Movies To Your iPod Using Free Software]]> 65 | 66 | tag:howto.diveintomark.org,2005:1 67 | 2005-10-14T02:03:08Z 68 | 2005-10-14T02:03:08Z 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_10.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_10.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_11.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_11.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_12.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_12.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_4.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_5.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_6.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_7.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_8.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/_ude_9.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/_ude_9.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/bbc.co.uk.hu.forum.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/bbc.co.uk.hu.forum.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/bbc.co.uk.hu.learningenglish.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/bbc.co.uk.hu.learningenglish.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/bbc.co.uk.hu.pressreview.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/bbc.co.uk.hu.pressreview.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/bbc.co.uk.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/bbc.co.uk.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/objektivhir.hu.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/objektivhir.hu.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1250/torokorszag.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1250/torokorszag.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/_chromium_windows-1251_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/_chromium_windows-1251_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/aif.ru.health.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/aif.ru.health.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/anthropology.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/anthropology.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/aug32.hole.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/aug32.hole.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/aviaport.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/aviaport.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bbc.co.uk.popshow.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bbc.co.uk.popshow.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/blog.mlmaster.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/blog.mlmaster.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.2.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.3.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.3.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.medusa.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.medusa.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/bpm.cult.bg.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/bpm.cult.bg.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/cp1251.longCamelCase.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/cp1251.longCamelCase.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/debian.gabrovo.com.news.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/debian.gabrovo.com.news.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/debian.gabrovo.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/debian.gabrovo.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/doncho.net.comments.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/doncho.net.comments.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/doncho.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/doncho.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/ecloga.cult.bg.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/ecloga.cult.bg.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.1.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/forum.template-toolkit.ru.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/greek.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/greek.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/ide.li.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/ide.li.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/informator.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/informator.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/intertat.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/intertat.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/janulalife.blogspot.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/janulalife.blogspot.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/kapranoff.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/kapranoff.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/linux-bg.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/linux-bg.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/money.rin.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/money.rin.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/music.peeps.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/music.peeps.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/newsru.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/newsru.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1251/rinennor.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1251/rinennor.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1254/_chromium_windows-1254_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1254/_chromium_windows-1254_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1254/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1254/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/_chromium_ISO-8859-8_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/_chromium_ISO-8859-8_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/_chromium_windows-1255_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/_chromium_windows-1255_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/_ude_he3.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/carshops.co.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/carshops.co.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/exego.net.2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/exego.net.2.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/hagada.org.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/hagada.org.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/halemo.net.edoar.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/halemo.net.edoar.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/hevra.org.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/hevra.org.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/hydepark.hevre.co.il.7957.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/hydepark.hevre.co.il.7957.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/info.org.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/info.org.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/infomed.co.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/infomed.co.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/law.co.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/law.co.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/maakav.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/maakav.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/neviim.net.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/neviim.net.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.50.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.50.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.7.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.7.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/notes.co.il.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/pcplus.co.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/pcplus.co.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/sharks.co.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/sharks.co.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1255,iso-8859-8/whatsup.org.il.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1255,iso-8859-8/whatsup.org.il.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1256/_chromium_windows-1256_with_no_encoding_specified.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1256/_chromium_windows-1256_with_no_encoding_specified.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1256/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1256/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1257/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1257/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1258/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1258/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-1258/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-1258/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/_mozilla_bug488426_text.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/_mozilla_bug488426_text.html -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/opentle.org.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/opentle.org.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.analyse1.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.analyse1.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.centerlab.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.centerlab.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.healthinfo-ne.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/pharmacy.kku.ac.th.healthinfo-ne.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/windows-874/trickspot.boxchart.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/windows-874/trickspot.boxchart.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/_ude_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/_ude_1.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/_ude_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/_ude_2.txt -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/aif.ru.health.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/aif.ru.health.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/aug32.hole.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/aug32.hole.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/aviaport.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/aviaport.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/blog.mlmaster.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/blog.mlmaster.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.4.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.6.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.8.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.9.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/forum.template-toolkit.ru.9.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/greek.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/greek.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/intertat.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/intertat.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/kapranoff.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/kapranoff.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/koi.kinder.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/koi.kinder.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/money.rin.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/money.rin.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/music.peeps.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/music.peeps.ru.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/newsru.com.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/newsru.com.xml -------------------------------------------------------------------------------- /src/tests/data/largesets/x-mac-cyrillic/susu.ac.ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/largesets/x-mac-cyrillic/susu.ac.ru.xml -------------------------------------------------------------------------------- /src/tests/data/samples/NOTICE.md: -------------------------------------------------------------------------------- 1 | Included and Redistributed Files 2 | --------------------------------- 3 | 4 | 17 files are included in the source distribution tar. They are used to verify the standard functions of 5 | this library. They are mandatory to run `pytest` but not required to make the lib usable after install. 6 | They DO NOT guarantee that the detection-coverage will not regress. 7 | 8 | Those are EITHER pulled from Wikipedia _(CC-BY-SA)_ OR public domain archive. 9 | You SHALL NOT modify any of those files without explicit approval. 10 | -------------------------------------------------------------------------------- /src/tests/data/samples/sample-arabic-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-arabic-1.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-arabic.txt: -------------------------------------------------------------------------------- 1 | بالموازاة مع ذلك وللإشارة إلى المنطقة المغاربية بشكل عام، كان المؤرخون العرب في القرون الوسطى يستعملون لفظ "بلاد المغرب" بينما الأوروبيون يستعملون لفظ "الساحل البربري" للدلالة على ثلاثة أقاليم: المغرب الأدنى (إفريقية أو تونس الحالية)، المغرب الأوسط (الجزائر الحالية)، المغرب الأقصى (المملكة المغربية الحالية). 2 | 3 | أحيانًا كان يُشار للبلاد بتسمية مرتبطة بعاصمتها: كـ "موريطنية الطنجية" التي كانت عاصمتها طنجة وكذا "مملكة مراكش" و"مملكة فاس" نسبة إلى عواصمها المعروفة آنذاك، وكانت الظهائر والمعاهدات الدولية يوقّعها سلاطين المغرب تارة باسم سلطان مراكش وتارة باسم سلطان فاس. 4 | 5 | تمت الإشارة للبلاد لاحقًا باسم المغرب الأقصى باللغة العربية حيث اعتَقد الناس في العالم القديم أن الشمس تشرق من اليابان (باللغة الصينية نيهون: مكان شروق الشمس) وتغرب في المملكة المغربية (باللغة العربية المغرب: مكان غروب الشمس). بينما اشتَقت البلاد اسمها في اللغات الأوروبية من الكلمة اللاتينية مرك (باللغة اللاتينية: Morroch) وهي تصحيف 6 | 7 | -------------------------------------------------------------------------------- /src/tests/data/samples/sample-bulgarian.txt: -------------------------------------------------------------------------------- 1 | Член 26 2 | 1. Bсеки човек има право на образование. Oбразованието трябва да бъде безплатно, поне що се отнася до началното и основното образование. Hачалното образование трябва да бъде задължително. Tехническото и професионалното образование трябва да бъдат общодостъпни, а висшето образование трябва да бъде еднакво достъпно за всички на основата на техните способности. 3 | 2. Oбразованието трябва да бъде насочено към цялостно развитие на човешката личност и заcилване на уважението към правата на човека и основните свободи. Tо трябва да съдейства за разбирателството, тъпримостта и приятелството между всички народи, расови или религиозни групи, както и за осъществяване дейността на Oрганизацията на Oбединените нации за поддържане на мира. 4 | 3. Pодителите имат право, с приоритет, да избират вида образование, което да получат техните деца. 5 | Член 27 6 | 1. Bсеки човек има право свободно да участва в културния живот на обществото, да се наслаждава на изкуствата, да участва в научния напредък и да се ползва от неговите достижения. 7 | 2. Bсеки човек има право на закрила на моралните и материалните си интереси, които са резултат от каквото и да е научно, литературно или художествено произведение, на което той е автор. -------------------------------------------------------------------------------- /src/tests/data/samples/sample-chinese.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-chinese.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-english.bom.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:06,500 --> 00:00:09,000 3 | About 2 months ago I found myself on 4 | the comment section of YouTube 5 | 6 | 2 7 | 00:00:11,000 --> 00:00:17,000 8 | And I was commenting, 9 | unfortunately I was commenting, 10 | on a video about the famous Ayn Rand 11 | 12 | 3 13 | 00:00:19,000 --> 00:00:24,000 14 | And I 15 | posted underneath against 16 | this woman's tirades, 17 | against what is essentially 18 | the human race. 19 | 20 | 4 21 | 00:00:25,000 --> 00:00:31,000 22 | that, this monetary system seems to have no point, seems to actually hinder people 23 | 24 | 5 25 | 00:00:31,000 --> 00:00:36,000 26 | and hinder progress, and one of the responses I got, I didn't answer it, was: 27 | 28 | 6 29 | 00:00:37,000 --> 00:00:43,000 30 | what actually money creates is an incentive to invent the new items, that's the driving force behind it 31 | 32 | 7 33 | 00:00:43,000 --> 00:00:50,000 34 | So what I thought I do is instead if answering on a YouTube comment is organize a global awareness day 35 | 36 | -------------------------------------------------------------------------------- /src/tests/data/samples/sample-french-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-french-1.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-french.txt: -------------------------------------------------------------------------------- 1 | JEAN-BAPTISTE POQUELIN MOLIÈRE 2 | NÉ LE 15 JANVIER 1622, MORT LE 17 FÉVRIER 1673 3 | 4 | 5 | «Quel est le plus grand des écrivains de mon règne? demandait Louis XIV 6 | à Boileau.--Sire, c'est Molière.» 7 | 8 | Non-seulement Despréaux ne se trompait pas, mais de tous les écrivains 9 | que la France a produits, sans excepter Voltaire lui-même, imprégné de 10 | l'esprit anglais par son séjour à Londres, c'est incontestablement 11 | Molière ou Poquelin qui reproduit avec l'exactitude la plus vive et la 12 | plus complète le fond du génie français. 13 | 14 | En raison de cette identité de son génie avec le nôtre, il exerça sur 15 | l'époque subséquente, sur le dix-huitième siècle, sur l'époque même où 16 | nous écrivons, la plus active, la plus redoutable influence. Tout ce 17 | qu'il a voulu détruire est en ruine. Les types qu'il a créés ne peuvent 18 | mourir. Le sens de la vie pratique, qu'il a recommandé d'après Gassendi, 19 | a fini par l'emporter sur les idées qui imposaient à la société 20 | française. Il n'y a pas de superstition qu'il n'ait attaquée, pas de 21 | crédulité qu'il n'ait saisie corps à corps pour la terrasser, pas de 22 | formule qu'il ne se soit efforcé de détruire. A-t-il, comme l'exprime si 23 | bien Swift, _déchiré l'étoffe avec la doublure_? l'histoire le dira. Ce 24 | qui est certain, c'est que l'élève de Lucrèce, le protégé de Louis XIV, 25 | poursuivait un but déterminé vers lequel il a marché d'un pas ferme, 26 | obstiné, tantôt foulant aux pieds les obstacles, tantôt les tournant 27 | avec adresse. Le sujet de _Tartuffe_ est dans Lucrèce; à Lucrèce 28 | appartient ce vers, véritable devise de Molière: 29 | 30 | _Et religionis..... nodos solvere curo[1]._ 31 | 32 | La puissance de Molière sur les esprits a été telle, qu'une légende 33 | inexacte, calomnieuse de son vivant, romanesque après sa mort, s'est 34 | formée autour de cette gloire populaire. Il est un mythe comme Jules 35 | César et Apollon. 36 | 37 | [1] Ce que je veux, c'est rompre les entraves qui nous enchaînent 38 | (_religionis.... quod religat_). 39 | 40 | Dates, événements, réalités, souvenirs, sont venus se confondre dans un 41 | inextricable chaos où la figure de Molière a disparu. Tous les vices 42 | jusqu'à l'ivrognerie, jusqu'à l'inceste et au vol, lui furent imputés de 43 | son vivant. Les vertus les plus éthérées lui furent attribuées par les 44 | prêtres de son culte. Homme d'action, sans cesse en face du public, du 45 | roi ou de sa troupe, occupé de son gouvernement et de la création de ses 46 | œuvres, il n'a laissé aucune trace de sa propre vie, aucun document 47 | biographique, à peine une lettre. Les pamphlets pour et contre lui 48 | composaient déjà une bibliothèque, lorsqu'un écouteur aux portes, nommé 49 | Grimarest, collecteur d'anas, aimant l'exagération des récits et 50 | incapable de critique, prétendit, trente-deux ans après la mort du 51 | comédien populaire, raconter et expliquer sa vie. Vers la même époque, 52 | une comédienne, à ce que l'on croit du moins, forcée de se réfugier en 53 | Hollande, jetait dans un libelle les souvenirs de coulisse qu'elle avait 54 | pu recueillir sur l'intérieur du ménage de Molière et de sa femme. Enfin 55 | quelques détails authentiques, semés dans l'édition de ses œuvres 56 | publiée par Lagrange en 1682, complètent l'ensemble des documents 57 | comtemporains qui ont servi de base à cette légende de Molière, 58 | excellente à consulter, mais qu'il est bon de soumettre à l'examen le 59 | plus scrupuleux. 60 | -------------------------------------------------------------------------------- /src/tests/data/samples/sample-greek-2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-greek-2.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-greek.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-greek.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-hebrew-2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-hebrew-2.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-hebrew-3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-hebrew-3.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-korean.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-korean.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-russian-2.txt: -------------------------------------------------------------------------------- 1 | В гимназии он не был в числе первых учеников (исключение составляли математика и латынь). Укоренившаяся система механического заучивания материала учащимися (которая, как он считал, наносит вред самому духу учёбы и творческому мышлению), а также авторитарное отношение учителей к ученикам вызывало у Альберта Эйнштейна неприятие, поэтому он часто вступал в споры со своими преподавателями. 2 | 3 | После окончательного разорения отца семейства в 1894 году Эйнштейны переехали из Мюнхена в итальянский город Павию, близ Милана. Сам Альберт оставался в Мюнхене ещё некоторое время, чтобы окончить все шесть классов гимназии. Так и не получив аттестата зрелости, в 1895 году он присоединился к своей семье в Милане. 4 | 5 | Осенью 1895 г. Альберт Эйнштейн прибыл в Швейцарию, чтобы сдать вступительные экзамены в Высшее техническое училище (Политехникум) в Цюрихе и стать преподавателем физики. Блестяще проявив себя на экзамене по математике, он в то же время провалил экзамены по ботанике и французскому языку, что не позволило ему поступить в Цюрихский Политехникум. Однако директор училища посоветовал молодому человеку поступить в выпускной класс школы в Аарау (Швейцария), чтобы получить аттестат и повторить поступление. 6 | -------------------------------------------------------------------------------- /src/tests/data/samples/sample-russian-3.txt: -------------------------------------------------------------------------------- 1 | Москва́ (произношение (инф.)) — столица России, город федерального значения, административный центр Центрального федерального округа и центр Московской области, в состав которой не входит[6]. Крупнейший по численности населения город России и её субъект — 12 655 050[3] человек (2021), самый населённый из городов, полностью расположенных в Европе, занимает 22 место среди городов мира по численности населения[7], крупнейший русскоязычный город в мире. Центр Московской городской агломерации. 2 | 3 | Историческая столица Великого княжества Московского, Русского царства, Российской империи (в 1728—1732 годах[8][9][10][11]), Советской России и СССР. Город-герой. В Москве находятся федеральные органы государственной власти Российской Федерации (за исключением Конституционного суда), посольства иностранных государств, штаб-квартиры большинства крупнейших российских коммерческих организаций и общественных объединений. 4 | 5 | Расположена на западе России, на реке Москве в центре Восточно-Европейской равнины, в междуречье Оки и Волги. Как субъект федерации, Москва граничит с Московской и Калужской областями. 6 | 7 | Москва — популярный туристический центр России. Кремль, Красная площадь, Новодевичий монастырь и Церковь Вознесения в Коломенском входят в список объектов всемирного наследия ЮНЕСКО[12]. Она является важнейшим транспортным узлом: город обслуживают 6 аэропортов, 10 железнодорожных вокзалов, 3 речных порта (имеется речное сообщение с морями бассейнов Атлантического и Северного Ледовитого океанов). С 1935 года в Москве работает метрополитен. Москва — спортивный центр страны. В 1980 году в Москве прошли XXII летние Олимпийские игры, а в 2018 город стал одним из хозяев чемпионата мира по футболу. -------------------------------------------------------------------------------- /src/tests/data/samples/sample-russian.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-russian.txt -------------------------------------------------------------------------------- /src/tests/data/samples/sample-turkish.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nickspring/charset-normalizer-rs/c426e168ebc8cb780e79f3856ca8707e2efa6961/src/tests/data/samples/sample-turkish.txt -------------------------------------------------------------------------------- /src/tests/detection_edge_case.rs: -------------------------------------------------------------------------------- 1 | use crate::from_bytes; 2 | 3 | #[test] 4 | fn test_undefined_unicode_ranges() { 5 | let tests = [b"\xef\xbb\xbf\xf0\x9f\xa9\xb3".as_slice()]; 6 | 7 | for input in tests { 8 | let result = from_bytes(input, None); 9 | let best_guess = result.get_best(); 10 | assert!( 11 | best_guess.is_some(), 12 | "Dead-simple ASCII detection has failed! Input: {:?}", 13 | &input 14 | ); 15 | assert_eq!( 16 | best_guess.unwrap().encoding(), 17 | "utf-8", 18 | "UTF-8 payload wrongly detected! Input: {:?}", 19 | &input 20 | ); 21 | assert_eq!( 22 | best_guess.unwrap().unicode_ranges().len(), 23 | 0, 24 | "This property in that edge case, should return a empty list. Input: {:?}", 25 | &input 26 | ); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/tests/detection_full.rs: -------------------------------------------------------------------------------- 1 | use crate::from_path; 2 | use crate::tests::FILES_SAMPLES; 3 | use crate::utils::{get_large_test_datasets, is_multi_byte_encoding}; 4 | use std::path::PathBuf; 5 | 6 | #[test] 7 | fn test_elementary_detection() { 8 | for sample in &*FILES_SAMPLES { 9 | let filename = sample.0; 10 | let encoding = &sample.1; 11 | let language = sample.2; 12 | 13 | let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); 14 | path.push(format!("src/tests/data/samples/{}", filename)); 15 | let result = from_path(&path, None); 16 | 17 | assert!(result.is_ok()); 18 | let result = result.unwrap(); 19 | let best_guess = result.get_best(); 20 | let enc = best_guess.unwrap().encoding(); 21 | let languages = best_guess.unwrap().languages(); 22 | 23 | assert!( 24 | best_guess.is_some(), 25 | "Elementary detection has failed upon '{}'", 26 | filename 27 | ); 28 | assert!( 29 | encoding.contains(&enc), 30 | "Elementary charset detection has failed upon '{}', {} NOT IN {:?}", 31 | filename, 32 | enc, 33 | encoding 34 | ); 35 | assert!( 36 | languages.contains(&language), 37 | "Elementary language detection has failed upon '{}', {} NOT IN {:?}", 38 | filename, 39 | language, 40 | languages 41 | ); 42 | } 43 | } 44 | 45 | #[test] 46 | fn test_largesets() { 47 | for (path, encoding) in get_large_test_datasets().unwrap() { 48 | let result = from_path(&PathBuf::from(path.clone()), None); 49 | assert!(result.is_ok()); 50 | 51 | let result = result.unwrap(); 52 | let best_guess = result.get_best(); 53 | let mut guess_encoding = "None"; 54 | if best_guess.is_some() { 55 | guess_encoding = best_guess.unwrap().encoding(); 56 | } 57 | let fail = !encoding.contains(&guess_encoding.to_string()) 58 | && (guess_encoding == "None" 59 | || encoding 60 | .iter() 61 | .any(|x| is_multi_byte_encoding(guess_encoding) != is_multi_byte_encoding(x))); 62 | 63 | assert!(!fail, "Problems with {}", path); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/tests/detection_large_payload.rs: -------------------------------------------------------------------------------- 1 | use crate::consts::TOO_BIG_SEQUENCE; 2 | use crate::from_bytes; 3 | 4 | #[test] 5 | fn test_large_payload_utf8_sig_basic_entry() { 6 | let mut payload = b"\xef\xbb\xbf".as_slice().to_vec(); 7 | payload.extend(b"0".repeat(TOO_BIG_SEQUENCE + 1).as_slice().to_vec()); 8 | 9 | let result = from_bytes(&payload, None); 10 | let best_guess = result.get_best(); 11 | assert!( 12 | best_guess.is_some(), 13 | "Large U8 payload case detection completely failed" 14 | ); 15 | assert_eq!( 16 | best_guess.unwrap().encoding(), 17 | "utf-8", 18 | "Large U8 payload case detection wrongly detected!" 19 | ); 20 | assert!(best_guess.unwrap().bom(), "SIG/BOM property should be True"); 21 | assert_eq!( 22 | best_guess.unwrap().raw().len(), 23 | payload.len(), 24 | "Large payload should remain untouched when accessed through .raw" 25 | ); 26 | } 27 | 28 | #[test] 29 | fn test_large_payload_ascii_sig_basic_entry() { 30 | let payload = b"0".repeat(TOO_BIG_SEQUENCE + 1).as_slice().to_vec(); 31 | 32 | let result = from_bytes(&payload, None); 33 | let best_guess = result.get_best(); 34 | assert!( 35 | best_guess.is_some(), 36 | "Large ASCII payload case detection completely failed" 37 | ); 38 | assert_eq!( 39 | best_guess.unwrap().encoding(), 40 | "ascii", 41 | "Large ASCII payload case detection wrongly detected!" 42 | ); 43 | assert!( 44 | !best_guess.unwrap().bom(), 45 | "SIG/BOM property should be False" 46 | ); 47 | assert_eq!( 48 | best_guess.unwrap().raw().len(), 49 | payload.len(), 50 | "Large payload should remain untouched when accessed through .raw" 51 | ); 52 | } 53 | 54 | #[test] 55 | fn test_misleading_large_sequence() { 56 | let mut payload = b"hello simple ascii " 57 | .repeat(TOO_BIG_SEQUENCE) 58 | .as_slice() 59 | .to_vec(); 60 | payload.extend("我没有埋怨,磋砣的只是一些时间。 磋砣的只是一些时间。".as_bytes()); 61 | 62 | let result = from_bytes(&payload, None); 63 | assert!(!result.is_empty(), "No results"); 64 | let best_guess = result.get_best(); 65 | assert!(best_guess.is_some(), "Best guess is exists"); 66 | assert_eq!( 67 | best_guess.unwrap().encoding(), 68 | "utf-8", 69 | "Best guess is not utf-8" 70 | ); 71 | assert!( 72 | best_guess.unwrap().decoded_payload().is_some(), 73 | "Decoded content is empty" 74 | ); 75 | } 76 | -------------------------------------------------------------------------------- /src/tests/entity.rs: -------------------------------------------------------------------------------- 1 | use crate::entity::{CharsetMatch, CharsetMatches, CoherenceMatch, Language}; 2 | 3 | #[test] 4 | fn test_charset_matches() { 5 | /////////////////////////////////////////////////////////////////////////////////////////// 6 | // CharsetMatches tests 7 | /////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | let mut c_matches = CharsetMatches::new(Some(vec![CharsetMatch::new( 10 | &[0xD0, 0xA2, 0xD0, 0xB5, 0xD1, 0x81, 0xD1, 0x82], 11 | "utf-8", 12 | 0.01, 13 | false, 14 | &vec![ 15 | CoherenceMatch { 16 | language: &Language::Russian, 17 | score: 0.99, 18 | }, 19 | CoherenceMatch { 20 | language: &Language::Ukrainian, 21 | score: 0.8, 22 | }, 23 | ], 24 | None, 25 | )])); 26 | assert_eq!(c_matches.len(), 1); 27 | 28 | // append new CharsetMatch 29 | c_matches.append(CharsetMatch::new( 30 | &[0xD0, 0xA2, 0xD0, 0xB5, 0xD1, 0x81, 0xD1, 0x82], 31 | "utf-16le", 32 | 0.011, 33 | false, 34 | &vec![ 35 | CoherenceMatch { 36 | language: &Language::Russian, 37 | score: 0.95, 38 | }, 39 | CoherenceMatch { 40 | language: &Language::Kazakh, 41 | score: 0.7, 42 | }, 43 | ], 44 | None, 45 | )); 46 | assert_eq!(c_matches.len(), 2); 47 | 48 | // check best match 49 | assert!(c_matches.get_best().is_some()); 50 | assert_eq!(c_matches.get_best().unwrap().encoding(), "utf-8"); 51 | 52 | // check get by encoding 53 | assert!(c_matches.get_by_encoding("utf-8").is_some()); 54 | assert_eq!( 55 | c_matches 56 | .get_by_encoding("utf-8") 57 | .unwrap() 58 | .decoded_payload() 59 | .unwrap(), 60 | "Тест" 61 | ); 62 | 63 | // test indexation impl 64 | assert_eq!(c_matches[0].encoding(), "utf-8"); 65 | 66 | // test iteration 67 | let mut i = 0; 68 | for _ in c_matches.iter_mut() { 69 | i += 1; 70 | } 71 | assert_eq!(i, 2); 72 | let mut i = 0; 73 | for _ in c_matches.iter_mut() { 74 | i += 1; 75 | } 76 | assert_eq!(i, 2); 77 | assert_eq!(c_matches.len(), 2); 78 | 79 | /////////////////////////////////////////////////////////////////////////////////////////// 80 | // CharsetMatch tests 81 | /////////////////////////////////////////////////////////////////////////////////////////// 82 | 83 | // PartialEq test 84 | assert_ne!(c_matches[0], c_matches[1]); 85 | assert_eq!( 86 | c_matches[1], 87 | CharsetMatch::new( 88 | &[0xD0, 0xA2, 0xD0, 0xB5, 0xD1, 0x81, 0xD1, 0x82], 89 | "utf-16le", 90 | 0.044, 91 | true, 92 | &vec!( 93 | CoherenceMatch { 94 | language: &Language::Russian, 95 | score: 0.1, 96 | }, 97 | CoherenceMatch { 98 | language: &Language::Kazakh, 99 | score: 0.5, 100 | }, 101 | ), 102 | None, 103 | ) 104 | ); 105 | 106 | // most_probably_language 107 | assert_eq!(c_matches[0].most_probably_language(), &Language::Russian); 108 | 109 | // languages 110 | assert!(c_matches[0].languages().contains(&&Language::Ukrainian)); 111 | 112 | // multi_byte_usage 113 | for m in c_matches.iter_mut() { 114 | assert_eq!(m.multi_byte_usage(), 0.5); 115 | } 116 | 117 | // chaos_percents 118 | assert_eq!(c_matches[0].chaos_percents(), 1.0); 119 | assert_eq!(c_matches[1].chaos_percents(), 1.1); 120 | 121 | // coherence_percents 122 | assert_eq!(c_matches[0].coherence_percents(), 99.0); 123 | assert_eq!(c_matches[1].coherence_percents(), 95.0); 124 | 125 | // unicode_ranges 126 | for m in c_matches.iter_mut() { 127 | if m.encoding() == "utf-8" { 128 | assert!(m.unicode_ranges().contains(&String::from("Cyrillic"))); 129 | } else { 130 | assert!(m 131 | .unicode_ranges() 132 | .contains(&String::from("CJK Unified Ideographs"))); 133 | } 134 | } 135 | 136 | // encoding_aliases 137 | assert!(c_matches[0].encoding_aliases().contains(&"unicode11utf8")); 138 | } 139 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | use crate::entity::Language; 3 | use once_cell::sync::Lazy; 4 | mod cd; 5 | mod detection_base; 6 | mod detection_edge_case; 7 | mod detection_full; 8 | mod detection_large_payload; 9 | mod entity; 10 | mod md; 11 | mod utils; 12 | 13 | pub static FILES_SAMPLES: Lazy, &'static Language)>> = 14 | Lazy::new(|| { 15 | vec![ 16 | ( 17 | "sample-turkish.txt", 18 | vec!["windows-1254"], 19 | &Language::Turkish, 20 | ), 21 | ("sample-chinese.txt", vec!["big5"], &Language::Chinese), 22 | ( 23 | "sample-french-1.txt", 24 | vec!["iso-8859-1", "windows-1252"], 25 | &Language::French, 26 | ), 27 | ( 28 | "sample-arabic-1.txt", 29 | vec!["windows-1256"], 30 | &Language::Arabic, 31 | ), 32 | ("sample-arabic.txt", vec!["utf-8"], &Language::Arabic), 33 | ( 34 | "sample-greek.txt", 35 | vec!["windows-1253", "iso-8859-7"], 36 | &Language::Greek, 37 | ), 38 | ("sample-french.txt", vec!["utf-8"], &Language::French), 39 | ("sample-russian-3.txt", vec!["utf-8"], &Language::Russian), 40 | ( 41 | "sample-greek-2.txt", 42 | vec!["windows-1253", "iso-8859-7"], 43 | &Language::Greek, 44 | ), 45 | ( 46 | "sample-hebrew-2.txt", 47 | vec!["windows-1255", "iso-8859-8"], 48 | &Language::Hebrew, 49 | ), 50 | ( 51 | "sample-hebrew-3.txt", 52 | vec!["windows-1255", "iso-8859-8"], 53 | &Language::Hebrew, 54 | ), 55 | ("sample-bulgarian.txt", vec!["utf-8"], &Language::Bulgarian), 56 | ("sample-english.bom.txt", vec!["utf-8"], &Language::English), 57 | ("sample-spanish.txt", vec!["utf-8"], &Language::Spanish), 58 | ("sample-korean.txt", vec!["euc-kr"], &Language::Korean), 59 | ("sample-russian-2.txt", vec!["utf-8"], &Language::Russian), 60 | ( 61 | "sample-russian.txt", 62 | vec!["x-mac-cyrillic"], 63 | &Language::Russian, 64 | ), 65 | ("sample-polish.txt", vec!["utf-8"], &Language::Polish), 66 | ] 67 | }); 68 | -------------------------------------------------------------------------------- /tests/normalizer.rs: -------------------------------------------------------------------------------- 1 | use assert_cmd::Command; 2 | use predicates::prelude::*; 3 | use std::ffi::OsString; 4 | use std::fs; 5 | use std::path::PathBuf; 6 | 7 | fn get_sample_path(sample_name: &str) -> OsString { 8 | let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); 9 | path.push(format!("src/tests/data/samples/{}", sample_name)); 10 | path.as_os_str().to_os_string() 11 | } 12 | 13 | #[test] 14 | fn test_cli_single_file() { 15 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 16 | cmd.args(&[get_sample_path("sample-arabic-1.txt")]) 17 | .assert() 18 | .success() 19 | .code(predicate::eq(0)) 20 | .stdout(predicate::str::contains("language\": \"Arabic\"")); 21 | } 22 | 23 | #[test] 24 | fn test_cli_version_output_success() { 25 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 26 | cmd.args(["--version"]) 27 | .assert() 28 | .success() 29 | .code(predicate::eq(0)) 30 | .stdout(predicate::str::contains( 31 | "The Real First Universal Charset Detector", 32 | )); 33 | } 34 | 35 | #[test] 36 | fn test_cli_single_file_normalize() { 37 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 38 | cmd.args(&[ 39 | get_sample_path("sample-arabic-1.txt"), 40 | OsString::from("--normalize"), 41 | ]) 42 | .assert() 43 | .success() 44 | .code(predicate::eq(0)) 45 | .stdout(predicate::str::contains("language\": \"Arabic\"")); 46 | 47 | let normalized_path = &get_sample_path("sample-arabic-1.windows-1256.txt"); 48 | assert!(fs::metadata(normalized_path).is_ok()); 49 | fs::remove_file(normalized_path).expect("Normalized file is not exists"); 50 | } 51 | 52 | #[test] 53 | fn test_cli_single_verbose_file() { 54 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 55 | cmd.args(&[ 56 | get_sample_path("sample-arabic-1.txt"), 57 | OsString::from("--verbose"), 58 | ]) 59 | .assert() 60 | .success() 61 | .code(predicate::eq(0)) 62 | .stdout(predicate::str::contains("language\": \"Arabic\"")); 63 | } 64 | 65 | #[test] 66 | fn test_cli_multiple_files() { 67 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 68 | cmd.args(&[ 69 | get_sample_path("sample-arabic-1.txt"), 70 | get_sample_path("sample-french.txt"), 71 | get_sample_path("sample-chinese.txt"), 72 | ]) 73 | .assert() 74 | .success() 75 | .code(predicate::eq(0)); 76 | } 77 | 78 | #[test] 79 | fn test_cli_multiple_files_with_alternative() { 80 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 81 | cmd.args(&[ 82 | OsString::from("-a"), 83 | get_sample_path("sample-arabic-1.txt"), 84 | get_sample_path("sample-french.txt"), 85 | get_sample_path("sample-chinese.txt"), 86 | ]) 87 | .assert() 88 | .success() 89 | .code(predicate::eq(0)); 90 | } 91 | 92 | #[test] 93 | fn test_cli_multiple_files_with_minimal_output() { 94 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 95 | cmd.args(&[ 96 | OsString::from("-m"), 97 | get_sample_path("sample-arabic-1.txt"), 98 | get_sample_path("sample-french.txt"), 99 | get_sample_path("sample-chinese.txt"), 100 | ]) 101 | .assert() 102 | .success() 103 | .code(predicate::eq(0)); 104 | } 105 | 106 | #[test] 107 | fn test_cli_non_existent_file() { 108 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 109 | cmd.args(&[get_sample_path("non-exists-file.txt")]) 110 | .assert() 111 | .failure() 112 | .code(predicate::gt(0)); 113 | } 114 | 115 | #[test] 116 | fn test_cli_replace_without_normalize() { 117 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 118 | cmd.args(&[ 119 | OsString::from("--replace"), 120 | get_sample_path("sample-arabic-1.txt"), 121 | ]) 122 | .assert() 123 | .failure() 124 | .code(predicate::gt(0)); 125 | } 126 | 127 | #[test] 128 | fn test_cli_force_replace_without_replace() { 129 | let mut cmd = Command::cargo_bin("normalizer").unwrap(); 130 | cmd.args(&[ 131 | OsString::from("--replace"), 132 | get_sample_path("sample-arabic-1.txt"), 133 | ]) 134 | .assert() 135 | .failure() 136 | .code(predicate::gt(0)); 137 | } 138 | --------------------------------------------------------------------------------