├── examples ├── report │ ├── img │ │ ├── errors.png │ │ └── warnings.png │ ├── csv │ │ ├── wordcountlt1000.csv │ │ ├── error-OPF-031.csv │ │ ├── error-RSC-008.csv │ │ ├── warning-OPF-003.csv │ │ ├── warning-OPF-055.csv │ │ ├── error-OPF-030.csv │ │ ├── error-CSS-020.csv │ │ ├── error-HTM-003.csv │ │ ├── error-OPF-073.csv │ │ ├── error-RSC-007.csv │ │ ├── error-RSC-011.csv │ │ ├── error-RSC-016.csv │ │ ├── error-RSC-020.csv │ │ ├── warning-PKG-010.csv │ │ ├── error-OPF-032.csv │ │ ├── error-RSC-012.csv │ │ ├── warnings.csv │ │ ├── error-NCX-001.csv │ │ ├── error-RSC-005.csv │ │ ├── errors.csv │ │ └── errorsorwarnings.csv │ ├── report.md │ ├── css │ │ └── github-markdown.css │ └── report.html └── example.csv ├── run-ace.sh ├── .gitignore ├── README.md ├── extract.py ├── report.py └── css └── github-markdown.css /examples/report/img/errors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ebooks-qa/master/examples/report/img/errors.png -------------------------------------------------------------------------------- /examples/report/img/warnings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ebooks-qa/master/examples/report/img/warnings.png -------------------------------------------------------------------------------- /examples/report/csv/wordcountlt1000.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | -------------------------------------------------------------------------------- /examples/report/csv/error-OPF-031.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-008.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 3 | -------------------------------------------------------------------------------- /examples/report/csv/warning-OPF-003.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 3 | -------------------------------------------------------------------------------- /examples/report/csv/warning-OPF-055.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-OPF-030.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Alice's_Adventures_in_Wonderland_3.epub,,Alice's Adventures in Wonderland 3,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Newgen Knowledgeworks,3.2,False,2,0,OPF-030 RSC-005,,27196 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-CSS-020.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-HTM-003.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-OPF-073.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-007.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-011.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-016.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-020.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/warning-PKG-010.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | -------------------------------------------------------------------------------- /examples/report/csv/error-OPF-032.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 3 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 4 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-012.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 3 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 4 | -------------------------------------------------------------------------------- /examples/report/csv/warnings.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 3 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 4 | -------------------------------------------------------------------------------- /examples/report/csv/error-NCX-001.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Alice's_Adventures_in_Wonderland_1.epub,7e824858-f120-11eb-9a03-0242ac130003,Alice's Adventures in Wonderland 1,['Lewis Carroll'],Wikisource,3.2,False,1,0,NCX-001,,27036 3 | ./samples/Lotgevallen_van_Ferdinand_Huyck_2.epub,e6ae2224-f09b-11eb-9a03-0242ac130003,De lotgevallen van Ferdinand Huyck 2,['Jacob van Lennep'],dbnl / KB / Newgen Knowledgeworks,3.2,False,1,0,NCX-001,,218333 4 | -------------------------------------------------------------------------------- /run-ace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # DAISY Run ace for all EPUB files in a directory (no recursion) 5 | 6 | if [ "$#" -ne 2 ] ; then 7 | echo "Usage: run-ace.sh dirIn dirOut" >&2 8 | exit 1 9 | fi 10 | 11 | if ! [ -d "$1" ] ; then 12 | echo "dirIn must be a directory" >&2 13 | exit 1 14 | fi 15 | 16 | dirIn="$1" 17 | # dirOut, normalise to absolute path 18 | dirOut="$(readlink -f $2)" 19 | 20 | while IFS= read -d $'\0' -r file ; do 21 | # File basename, extension removed 22 | bName=$(basename "$file" | cut -f 1 -d '.') 23 | # Submit file to Tika server, using bName as filename hint 24 | dirOutEpub="$dirOut""/""$bName" 25 | ace -o "$dirOutEpub" "$file" 26 | #echo "$dirOutEpub" "$file" 27 | done < <(find $dirIn -name '*.epub' -type f -print0) 28 | -------------------------------------------------------------------------------- /examples/report/csv/error-RSC-005.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 3 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 4 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 5 | ./samples/Alice_in_Wonderland_4.epub,urn:uuid:d692392c-3eaa-4b7c-a045-c6d6d51307b2,Alice’s Adventures in Wonderland,['Lewis Caroll'],KB National Library of the Netherlands,3.2,False,1,0,RSC-005,,25949 6 | ./samples/Alice's_Adventures_in_Wonderland_3.epub,,Alice's Adventures in Wonderland 3,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Newgen Knowledgeworks,3.2,False,2,0,OPF-030 RSC-005,,27196 7 | -------------------------------------------------------------------------------- /examples/report/csv/errors.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Alice's_Adventures_in_Wonderland_1.epub,7e824858-f120-11eb-9a03-0242ac130003,Alice's Adventures in Wonderland 1,['Lewis Carroll'],Wikisource,3.2,False,1,0,NCX-001,,27036 3 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 4 | ./samples/Lotgevallen_van_Ferdinand_Huyck_2.epub,e6ae2224-f09b-11eb-9a03-0242ac130003,De lotgevallen van Ferdinand Huyck 2,['Jacob van Lennep'],dbnl / KB / Newgen Knowledgeworks,3.2,False,1,0,NCX-001,,218333 5 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 6 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 7 | ./samples/Alice_in_Wonderland_4.epub,urn:uuid:d692392c-3eaa-4b7c-a045-c6d6d51307b2,Alice’s Adventures in Wonderland,['Lewis Caroll'],KB National Library of the Netherlands,3.2,False,1,0,RSC-005,,25949 8 | ./samples/Alice's_Adventures_in_Wonderland_3.epub,,Alice's Adventures in Wonderland 3,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Newgen Knowledgeworks,3.2,False,2,0,OPF-030 RSC-005,,27196 9 | -------------------------------------------------------------------------------- /examples/report/csv/errorsorwarnings.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Alice's_Adventures_in_Wonderland_1.epub,7e824858-f120-11eb-9a03-0242ac130003,Alice's Adventures in Wonderland 1,['Lewis Carroll'],Wikisource,3.2,False,1,0,NCX-001,,27036 3 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 4 | ./samples/Lotgevallen_van_Ferdinand_Huyck_2.epub,e6ae2224-f09b-11eb-9a03-0242ac130003,De lotgevallen van Ferdinand Huyck 2,['Jacob van Lennep'],dbnl / KB / Newgen Knowledgeworks,3.2,False,1,0,NCX-001,,218333 5 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 6 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 7 | ./samples/Alice_in_Wonderland_4.epub,urn:uuid:d692392c-3eaa-4b7c-a045-c6d6d51307b2,Alice’s Adventures in Wonderland,['Lewis Caroll'],KB National Library of the Netherlands,3.2,False,1,0,RSC-005,,25949 8 | ./samples/Alice's_Adventures_in_Wonderland_3.epub,,Alice's Adventures in Wonderland 3,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Newgen Knowledgeworks,3.2,False,2,0,OPF-030 RSC-005,,27196 9 | -------------------------------------------------------------------------------- /examples/example.csv: -------------------------------------------------------------------------------- 1 | fileName,identifier,title,author,publisher,epubVersion,epubStatus,noErrors,noWarnings,errors,warnings,wordCount 2 | ./samples/Testbook_Formaat.epub,urn:uuid:33011D53-E2BB-4885-B7EB-72E99AEED70B,Test book epub3,['Ferdinand Rusch'],Formaat,3.2,True,0,0,,,2779 3 | ./samples/Alice's_Adventures_in_Wonderland_1.epub,7e824858-f120-11eb-9a03-0242ac130003,Alice's Adventures in Wonderland 1,['Lewis Carroll'],Wikisource,3.2,False,1,0,NCX-001,,27036 4 | ./samples/Open_a_GLAM_Lab_2.epub,61572367834730,Open a GLAM Lab 2,[],,3.2,True,0,0,,,31777 5 | ./samples/Max_Havelaar_1.epub,mult001maxh01,Max Havelaar of de koffiveilingen der Nederlandsche Handelmaatschappy 1,['Multatuli'],dbnl,2.0.1,False,3,0,OPF-032 RSC-005 RSC-012,,125528 6 | ./samples/Lotgevallen_van_Ferdinand_Huyck_2.epub,e6ae2224-f09b-11eb-9a03-0242ac130003,De lotgevallen van Ferdinand Huyck 2,['Jacob van Lennep'],dbnl / KB / Newgen Knowledgeworks,3.2,False,1,0,NCX-001,,218333 7 | ./samples/Lotgevallen_van_Ferdinand_Huyck_1.epub,lenn006lotg01,De lotgevallen van Ferdinand Huyck 1,['Jacob van Lennep'],dbnl,2.0.1,True,0,0,,,217263 8 | ./samples/Open_a_GLAM_Lab_1.epub,urn:uuid:b1818c01-4aab-42ae-b9c8-2c21457156c8,Open a GLAM Lab 1,['International GLAM Labs Community'],Glamlabs.io,3.2,False,8,1,CSS-020 HTM-003 OPF-073 RSC-005 RSC-007 RSC-011 RSC-016 RSC-020,PKG-010,30439 9 | ./samples/Eva_1.epub,brug004eva_01,Eva 1,['Carry van Bruggen'],dbnl,2.0.1,False,5,2,OPF-031 OPF-032 RSC-005 RSC-008 RSC-012,OPF-003 OPF-055,68221 10 | ./samples/Alice_in_Wonderland_4.epub,urn:uuid:d692392c-3eaa-4b7c-a045-c6d6d51307b2,Alice’s Adventures in Wonderland,['Lewis Caroll'],KB National Library of the Netherlands,3.2,False,1,0,RSC-005,,25949 11 | ./samples/Max_Havelaar_A.epub,urn:uuid:C8C09083-B435-4498-8C4F-F2AF8CBC4106,Max Havelaar 2,['Multatuli'],"KB, DBNL",3.2,True,0,0,,,125275 12 | ./samples/Alice's_Adventures_in_Wonderland_2.epub,urn:uuid:650E8F77-B80F-4C8E-BAEC-D985FE0581A0,Alice's Adventures in Wonderland 2,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Formaat,3.2,True,0,0,,,27238 13 | ./samples/Alice's_Adventures_in_Wonderland_3.epub,,Alice's Adventures in Wonderland 3,"['Lewis Carroll, John Tenniel']",Wikisource / KB / Newgen Knowledgeworks,3.2,False,2,0,OPF-030 RSC-005,,27196 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you're thinking of un-ignoring any of these artefacts in a lower 2 | # level .gitignore please think again. The none eclipse / maven options 3 | # below are recommended candidates from http://help.github.com/ignore-files/ 4 | # 5 | # As a general rule please don't commit: 6 | # IDE generated files, it upsets the IDEs of others 7 | # Compiled / built files (exes, jars, etc.), it's a source repository 8 | # Test data larger than a few KB, we'll go for bigger test files in the testbed 9 | # 10 | # Remember, we'd like to keep the git repo light and small enough for people to 11 | # download quickly and easily. 12 | # 13 | # Any questions then get in touch: 14 | # 15 | # Carl Wilson Open Planets Foundation 16 | # carlwilson@GitHub carl( AT )openplanetsfoundation.org. 17 | 18 | # Eclipse Files # 19 | ################# 20 | .externalToolBuilders 21 | .settings 22 | .classpath 23 | .project 24 | *.md.html 25 | bin 26 | .pydevproject 27 | 28 | # Netbeans Files # 29 | ################# 30 | nbactions.xml 31 | 32 | # project build directories # 33 | ############################# 34 | target 35 | build 36 | dist 37 | pyi-build 38 | __pycache__/ 39 | 40 | # Compiled Source # 41 | ################### 42 | *.com 43 | *.class 44 | *.dll 45 | *.exe 46 | *.o 47 | *.so 48 | *.pyc 49 | 50 | # PyInstaller bits # 51 | #################### 52 | # *.spec 53 | 54 | # Vagrant bits # 55 | #################### 56 | .vagrant/ 57 | 58 | # Packages # 59 | ############ 60 | # Better to unpack and commt the raw source 61 | # git has its own built in compression methods 62 | *.7z 63 | *.dmg 64 | *.gz 65 | *.iso 66 | *.jar 67 | *.rar 68 | *.tar 69 | *.war 70 | *.zip 71 | *.dsc 72 | *.deb 73 | *.changes 74 | *.egg-info 75 | 76 | # Logs and databases # 77 | ###################### 78 | *.log 79 | *.sql 80 | *.sqlite 81 | 82 | # Vue Backup Files # 83 | ###################### 84 | .~*.vue 85 | 86 | # OS Generated files # 87 | ###################### 88 | .DS_Store* 89 | ehthumbs.db 90 | Icon? 91 | Thumbs.db 92 | .directory 93 | 94 | # Files from gh-pages # 95 | ####################### 96 | /_site 97 | 98 | # Pytest coverage file 99 | .coverage 100 | coverage.xml 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About this repo 2 | 3 | This repo contains scripts and resources for automated quality assessement of e-books (for now only EPUB; PDF may follow later). Scripts require Python 3.x and do *not* work with Python 2.x! 4 | 5 | ## Dependencies 6 | 7 | - [Epubcheck Python wrapper](https://github.com/titusz/epubcheck) (`pip install epubcheck`) (tested with v. 4.2.6) 8 | - [tika-python](https://github.com/chrismattmann/tika-python) (`pip install tika`) 9 | - [pandas](https://pandas.pydata.org/) (`pip install pandas`) 10 | - [matplotlib](https://matplotlib.org/) (`pip install matplotlib`) 11 | - [python-tabulate](https://github.com/astanin/python-tabulate) (`pip install tabulate`) 12 | 13 | ## extract.py 14 | 15 | This script recursively walks through a directory tree, and runs Epubcheck for each EPUB file (identified by its file extension). It then extracts all validation error and warning codes, removing duplicate codes, and writes them to a comma-delimited text file. Note that the script only reports on *unique* errors and warnings. For example, if an EPUB contains multiple missing referenced resources (error code `RSC-007`), any duplicate instances are removed. 16 | 17 | The script also reports some basic metadata (identifier, author, title, publisher) and a word count for each file. The word count can be a useful heuristic for identifying EPUBs that contain only images without any actual text (particularly common for illustrated childrens books of some publishers). For these books the word count is typically less than 1000. 18 | 19 | 20 | ### Usage 21 | ``` 22 | python3 extract.py rootDir prefixOut 23 | ``` 24 | 25 | ### Output 26 | 27 | The script generates two output files (the names are based on the user-specified value of prefixOut): 28 | 29 | 1. A comma-delimited text file (\$prefixOut.csv) with, for each EPUB, the following columns: 30 | 31 | - **fileName**: full path to file 32 | - **identifier**: identifier 33 | - **title**: title 34 | - **author**: author name 35 | - **publisher**: publisher name 36 | - **epubVersion**: EPUB version string 37 | - **epubStatus**: EpubCheck validation outcome 38 | - **noErrors**: number of *unique* errors reported by EpubCheck 39 | - **noWarnings**: number of *unique* warnings reported by EpubCheck 40 | - **errors**: space-delimited list of *unique* errors reported by EpubCheck 41 | - **warnings**: space-delimited list of *unique* warnings reported by EpubCheck 42 | - **wordCount**: word count (based on extracted text with Apache Tika) 43 | 44 | Errors and warnings are reported as codes; the meaning of these codes can be found in EpubCheck's [default MessageBundle.properties file](https://github.com/w3c/epubcheck/blob/main/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties). 45 | 46 | 2. A text file (\$prefixOut_ec.txt) with the full Epubcheck output of all proceessed files. 47 | 48 | ## report.py 49 | 50 | ### Usage 51 | 52 | ``` 53 | python3 report.py inputFile dirOut 54 | ``` 55 | 56 | Here *inputFile* is the CSV file produced by *extract.py*, and *dirOut* is the name of a directory where all output is written. 57 | 58 | ## Output 59 | 60 | - **report.md**: report in Markdown format 61 | - **report.html**: report in HTML format 62 | - **csv**: directory with CSV files (description can be found in the report, which also links to these files) 63 | 64 | ## Example output 65 | 66 | - [Example output file of extract.py](./examples/example.csv) 67 | - [Example report generated by report.py](./examples/report/report.md) 68 | 69 | ## run-ace.sh 70 | 71 | Runs DAISY Ace tool on all EPUBs in a directory. 72 | 73 | ## License 74 | 75 | [github-markdown-css](https://github.com/sindresorhus/github-markdown-css) by [Sindre Sorhus](https://sindresorhus.com/), released under the MIT license. 76 | -------------------------------------------------------------------------------- /extract.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | import os 5 | from epubcheck import EpubCheck 6 | import csv 7 | from lxml import etree 8 | from tika import parser 9 | 10 | 11 | def validate(epub): 12 | """Validate file with Epubcheck""" 13 | ecOut = EpubCheck(epub) 14 | ecOutMeta = ecOut.meta 15 | ecOutMessages = ecOut.messages 16 | 17 | # Dictionary for Epubcheck results 18 | ecResults = {} 19 | 20 | ecResults['file'] = epub 21 | ecResults['valid'] = ecOut.valid 22 | 23 | # Metadata 24 | meta = {} 25 | 26 | meta['publisher'] = ecOutMeta.publisher 27 | meta['title'] = ecOutMeta.title 28 | meta['creator'] = ecOutMeta.creator 29 | meta['date'] = ecOutMeta.date 30 | meta['subject'] = ecOutMeta.subject 31 | meta['description'] = ecOutMeta.description 32 | meta['rights'] = ecOutMeta.rights 33 | meta['identifier'] = ecOutMeta.identifier 34 | meta['language'] = ecOutMeta.language 35 | meta['nSpines'] = ecOutMeta.nSpines 36 | meta['checkSum'] = ecOutMeta.checkSum 37 | meta['renditionLayout'] = ecOutMeta.renditionLayout 38 | meta['renditionOrientation'] = ecOutMeta.renditionOrientation 39 | meta['renditionSpread'] = ecOutMeta.renditionSpread 40 | meta['ePubVersion'] = ecOutMeta.ePubVersion 41 | meta['isScripted'] = ecOutMeta.isScripted 42 | meta['hasFixedFormat'] = ecOutMeta.hasFixedFormat 43 | meta['isBackwardCompatible'] = ecOutMeta.isBackwardCompatible 44 | meta['hasAudio'] = ecOutMeta.hasAudio 45 | meta['hasVideo'] = ecOutMeta.hasVideo 46 | meta['charsCount'] = ecOutMeta.charsCount 47 | meta['embeddedFonts'] = ecOutMeta.embeddedFonts 48 | meta['refFonts'] = ecOutMeta.refFonts 49 | meta['hasEncryption'] = ecOutMeta.hasEncryption 50 | meta['hasSignatures'] = ecOutMeta.hasSignatures 51 | meta['contributors'] = ecOutMeta.contributors 52 | 53 | # Validation errors and warnings 54 | errors = [] 55 | warnings = [] 56 | infos = [] 57 | for ecOutMessage in ecOutMessages: 58 | message = {} 59 | message['id'] = ecOutMessage.id 60 | message['level'] = ecOutMessage.level 61 | message['location'] = ecOutMessage.location 62 | message['message'] = ecOutMessage.message 63 | if ecOutMessage.level in ['ERROR', 'FATAL']: 64 | errors.append(message) 65 | elif ecOutMessage.level == 'WARNING': 66 | warnings.append(message) 67 | else: 68 | infos.append(message) 69 | 70 | ecResults['valid'] = ecOut.valid 71 | ecResults['meta'] = meta 72 | ecResults['errors'] = errors 73 | ecResults['warnings'] = warnings 74 | ecResults['infos'] = infos 75 | 76 | return ecResults 77 | 78 | 79 | def main(): 80 | 81 | if len(sys.argv) < 3: 82 | sys.stderr.write("USAGE: extract.py \n") 83 | sys.exit() 84 | else: 85 | # Command line args 86 | rootDir = sys.argv[1] 87 | prefixOut = sys.argv[2] 88 | 89 | # Output files 90 | outFile = prefixOut + ".csv" 91 | ecFile = prefixOut + "_ec.txt" 92 | 93 | # Open output CSV file 94 | fOut = open(outFile, 'w', encoding='utf-8') 95 | 96 | # Open file with full epubcheck output 97 | fECFull = open(ecFile, 'w', encoding='utf-8') 98 | 99 | # Create CSV writer object 100 | csvOut = csv.writer(fOut, lineterminator='\n') 101 | 102 | # Write header row 103 | 104 | headerItems = ['fileName', 'identifier', 'title' ,'author', 'publisher', 'epubVersion', 'epubStatus', 'noErrors', 'noWarnings', 'errors', 'warnings', 'wordCount'] 105 | csvOut.writerow(headerItems) 106 | 107 | # Set up list that will contain all EPUBs 108 | epubs= [] 109 | 110 | # Recursively walk through directory tree 111 | for root, subdirs, files in os.walk(rootDir): 112 | 113 | for subdir in subdirs: 114 | dirPath = os.path.join(root, subdir) 115 | 116 | for filename in files: 117 | filePath = os.path.join(root, filename) 118 | 119 | if filePath.endswith(('.epub', '.EPUB')): 120 | epubs.append(filePath) 121 | 122 | for epub in epubs: 123 | # Run Epubcheck 124 | ecResults = validate(epub) 125 | 126 | epubStatus = ecResults['valid'] 127 | epubMeta = ecResults['meta'] 128 | epubErrors = ecResults['errors'] 129 | epubWarnings = ecResults['warnings'] 130 | 131 | epubVersion = epubMeta['ePubVersion'] 132 | identifier = epubMeta['identifier'] 133 | title = epubMeta['title'] 134 | author = epubMeta['creator'] 135 | publisher = epubMeta['publisher'] 136 | 137 | # Unique error and warning codes 138 | epubErrorsUnique = [] 139 | for epubError in epubErrors: 140 | if epubError['id'] not in epubErrorsUnique: 141 | epubErrorsUnique.append(epubError['id']) 142 | 143 | epubWarningsUnique = [] 144 | for epubWarning in epubWarnings: 145 | if epubWarning['id'] not in epubWarningsUnique: 146 | epubWarningsUnique.append(epubWarning['id']) 147 | 148 | # Number of unique error and warning codes 149 | noErrors = len (epubErrorsUnique) 150 | noWarnings = len(epubWarningsUnique) 151 | 152 | # Create space-separated strings of unique errors / warnings 153 | errors = ' '.join(epubErrorsUnique) 154 | warnings = ' '.join(epubWarningsUnique) 155 | 156 | # Extract text with Tika and count words 157 | parsed = parser.from_file(os.path.normpath(epub)) 158 | extractedText = parsed["content"].strip() 159 | noWords = len(extractedText.split()) 160 | 161 | # Put all items that are to be written to a list and write row 162 | rowItems = [epub, identifier, title , author, publisher, epubVersion, epubStatus, noErrors, noWarnings, errors, warnings, noWords] 163 | csvOut.writerow(rowItems) 164 | """ 165 | # TODO update this 166 | # Write full Epubcheck output for this file 167 | fECFull.write('****\n') 168 | fECFull.write(epub + '\n') 169 | fECFull.write(ecOut + '\n') 170 | fECFull.write(ecErr + '\n') 171 | """ 172 | 173 | # Close output file 174 | fOut.close() 175 | fECFull.close() 176 | 177 | main() -------------------------------------------------------------------------------- /examples/report/report.md: -------------------------------------------------------------------------------- 1 | # EPUB analysis report 2 | 3 | Report generated: 2022-11-07 15:50:28 4 | 5 | Input file: ./example.csv 6 | 7 | 8 | ## Summary 9 | 10 | | | Count | % of all EPUBs | 11 | |:--------------------------------|--------:|:-----------------| 12 | | EPUBs | 12 | | 13 | | EPUBs with errors | 7 | 58.33 | 14 | | EPUBs with warnings | 2 | 16.67 | 15 | | EPUBs with errors or warnings | 7 | 58.33 | 16 | | EPUBs with less than 1000 words | 0 | 0.0 | 17 | 18 | ## CSV subsets 19 | 20 | | | File | 21 | |:--------------------------------|:---------------------------------------------------| 22 | | EPUBs with errors | [errors.csv](./csv/errors.csv) | 23 | | EPUBs with warnings | [warnings.csv](./csv/warnings.csv) | 24 | | EPUBs with errors or warnings | [errorsorwarnings.csv](./csv/errorsorwarnings.csv) | 25 | | EPUBs with less than 1000 words | [wordcountlt1000.csv](./csv/wordcountlt1000.csv) | 26 | 27 | ## EPUB versions 28 | 29 | | epubVersion | Count | % of all EPUBs | 30 | |:--------------|--------:|-----------------:| 31 | | 3.2 | 9 | 75 | 32 | | 2.0.1 | 3 | 25 | 33 | 34 | ## Frequency of validation errors 35 | 36 | | Code | Description | Count | % of all EPUBs | 37 | |:--------|:-------------------------------------------------------------------------------------------------|--------:|-----------------:| 38 | | RSC-005 | Error while parsing file: %1$s | 5 | 41.67 | 39 | | NCX-001 | NCX identifier ("%1$s") does not match OPF identifier ("%2$s"). | 2 | 16.67 | 40 | | OPF-032 | Guide references "%1$s" which is not a valid "OPS Content Document". | 2 | 16.67 | 41 | | RSC-012 | Fragment identifier is not defined. | 2 | 16.67 | 42 | | CSS-020 | CSS font selector declaration uses unexpected font-size value "%1$s". | 1 | 8.33 | 43 | | HTM-003 | External entities are not allowed in EPUB v3 documents. External entity declaration found: %1$s. | 1 | 8.33 | 44 | | OPF-073 | External identifiers must not appear in the document type declaration. | 1 | 8.33 | 45 | | RSC-007 | Referenced resource "%1$s" could not be found in the EPUB. | 1 | 8.33 | 46 | | RSC-011 | Found a reference to a resource that is not a spine item. | 1 | 8.33 | 47 | | RSC-016 | Fatal Error while parsing file: %1$s | 1 | 8.33 | 48 | | RSC-020 | "%1$s" is not a valid URI. | 1 | 8.33 | 49 | | OPF-031 | File listed in reference element in guide was not declared in OPF manifest: %1$s. | 1 | 8.33 | 50 | | RSC-008 | Referenced resource "%1$s" is not declared in the OPF manifest. | 1 | 8.33 | 51 | | OPF-030 | The unique-identifier "%1$s" was not found. | 1 | 8.33 | 52 | 53 | ![](./img/errors.png) 54 | 55 | 56 | ## CSV subsets for each error 57 | 58 | | Code | File | 59 | |:--------|:---------------------------------------------| 60 | | RSC-005 | [error-RSC-005.csv](./csv/error-RSC-005.csv) | 61 | | NCX-001 | [error-NCX-001.csv](./csv/error-NCX-001.csv) | 62 | | OPF-032 | [error-OPF-032.csv](./csv/error-OPF-032.csv) | 63 | | RSC-012 | [error-RSC-012.csv](./csv/error-RSC-012.csv) | 64 | | CSS-020 | [error-CSS-020.csv](./csv/error-CSS-020.csv) | 65 | | HTM-003 | [error-HTM-003.csv](./csv/error-HTM-003.csv) | 66 | | OPF-073 | [error-OPF-073.csv](./csv/error-OPF-073.csv) | 67 | | RSC-007 | [error-RSC-007.csv](./csv/error-RSC-007.csv) | 68 | | RSC-011 | [error-RSC-011.csv](./csv/error-RSC-011.csv) | 69 | | RSC-016 | [error-RSC-016.csv](./csv/error-RSC-016.csv) | 70 | | RSC-020 | [error-RSC-020.csv](./csv/error-RSC-020.csv) | 71 | | OPF-031 | [error-OPF-031.csv](./csv/error-OPF-031.csv) | 72 | | RSC-008 | [error-RSC-008.csv](./csv/error-RSC-008.csv) | 73 | | OPF-030 | [error-OPF-030.csv](./csv/error-OPF-030.csv) | 74 | 75 | ## Frequency of validation warnings 76 | 77 | | Code | Description | Count | % of all EPUBs | 78 | |:--------|:-------------------------------------------------------------------------------------------------------|--------:|-----------------:| 79 | | PKG-010 | Filename contains spaces, therefore URI escaping is necessary. Consider removing spaces from filename. | 1 | 8.33 | 80 | | OPF-003 | Item "%1$s" exists in the EPUB, but is not declared in the OPF manifest. | 1 | 8.33 | 81 | | OPF-055 | %1$s tag is empty. | 1 | 8.33 | 82 | 83 | ![](./img/warnings.png) 84 | 85 | 86 | ## CSV subsets for each warning 87 | 88 | | Code | File | 89 | |:--------|:-------------------------------------------------| 90 | | PKG-010 | [warning-PKG-010.csv](./csv/warning-PKG-010.csv) | 91 | | OPF-003 | [warning-OPF-003.csv](./csv/warning-OPF-003.csv) | 92 | | OPF-055 | [warning-OPF-055.csv](./csv/warning-OPF-055.csv) | 93 | 94 | ## Detailed statistics 95 | 96 | 97 | ### All EPUBs 98 | 99 | | | noErrors | noWarnings | wordCount | 100 | |:------|-----------:|-------------:|------------:| 101 | | count | 12 | 12 | 12 | 102 | | mean | 1.75 | 0.25 | 77252.8 | 103 | | std | 2.49089 | 0.621582 | 76391.1 | 104 | | min | 0 | 0 | 2779 | 105 | | 25% | 0 | 0 | 27156 | 106 | | 50% | 1 | 0 | 31108 | 107 | | 75% | 2.25 | 0 | 125338 | 108 | | max | 8 | 2 | 218333 | 109 | 110 | ### EPUBs with errors 111 | 112 | | | noErrors | noWarnings | wordCount | 113 | |:------|-----------:|-------------:|------------:| 114 | | count | 7 | 7 | 7 | 115 | | mean | 3 | 0.428571 | 74671.7 | 116 | | std | 2.64575 | 0.786796 | 73166.6 | 117 | | min | 1 | 0 | 25949 | 118 | | 25% | 1 | 0 | 27116 | 119 | | 50% | 2 | 0 | 30439 | 120 | | 75% | 4 | 0.5 | 96874.5 | 121 | | max | 8 | 2 | 218333 | 122 | 123 | ### EPUBs with warnings 124 | 125 | | | noErrors | noWarnings | wordCount | 126 | |:------|-----------:|-------------:|------------:| 127 | | count | 2 | 2 | 2 | 128 | | mean | 6.5 | 1.5 | 49330 | 129 | | std | 2.12132 | 0.707107 | 26715.9 | 130 | | min | 5 | 1 | 30439 | 131 | | 25% | 5.75 | 1.25 | 39884.5 | 132 | | 50% | 6.5 | 1.5 | 49330 | 133 | | 75% | 7.25 | 1.75 | 58775.5 | 134 | | max | 8 | 2 | 68221 | 135 | 136 | ### EPUBs with errors or warnings 137 | 138 | | | noErrors | noWarnings | wordCount | 139 | |:------|-----------:|-------------:|------------:| 140 | | count | 7 | 7 | 7 | 141 | | mean | 3 | 0.428571 | 74671.7 | 142 | | std | 2.64575 | 0.786796 | 73166.6 | 143 | | min | 1 | 0 | 25949 | 144 | | 25% | 1 | 0 | 27116 | 145 | | 50% | 2 | 0 | 30439 | 146 | | 75% | 4 | 0.5 | 96874.5 | 147 | | max | 8 | 2 | 218333 | 148 | 149 | ### EPUBs with less than 1000 words 150 | 151 | | | noErrors | noWarnings | wordCount | 152 | |:------|-----------:|-------------:|------------:| 153 | | count | 0 | 0 | 0 | 154 | | mean | nan | nan | nan | 155 | | std | nan | nan | nan | 156 | | min | nan | nan | nan | 157 | | 25% | nan | nan | nan | 158 | | 50% | nan | nan | nan | 159 | | 75% | nan | nan | nan | 160 | | max | nan | nan | nan | 161 | -------------------------------------------------------------------------------- /report.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import shutil 6 | import urllib.request 7 | import codecs 8 | import datetime 9 | import pandas as pd 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import matplotlib.pylab as pylab 13 | import markdown 14 | from tabulate import tabulate 15 | 16 | """ 17 | Analyses output CSV generated by extract.py, and generates reports in Markdown and HTML 18 | """ 19 | 20 | # Set defaults for pyplot 21 | params = {'legend.fontsize': 'x-large', 22 | 'figure.figsize': (8, 6), 23 | 'axes.labelsize': '18', 24 | 'axes.titlesize':'x-large', 25 | 'xtick.labelsize':'x-large', 26 | 'ytick.labelsize':'x-large'} 27 | pylab.rcParams.update(params) 28 | 29 | def dfToMarkdown(dataframe, headers='keys'): 30 | """Convert Data Frame to Markdown table with optionally custom headers""" 31 | mdOut = dataframe.pipe(tabulate, headers=headers, tablefmt='pipe') 32 | return mdOut 33 | 34 | def main(): 35 | if len(sys.argv) < 3: 36 | sys.stderr.write("USAGE: report.py \n") 37 | sys.exit() 38 | else: 39 | fileEcResults=sys.argv[1] 40 | dirOut=os.path.normpath(sys.argv[2]) 41 | 42 | if not os.path.isfile(fileEcResults): 43 | sys.stderr.write("Input file does not exist\n") 44 | sys.exit() 45 | 46 | if not os.path.isdir(dirOut): 47 | os.makedirs(dirOut) 48 | 49 | dirCSS = os.path.join(dirOut, 'css') 50 | dirCSV = os.path.join(dirOut, 'csv') 51 | dirImg = os.path.join(dirOut, 'img') 52 | 53 | if not os.path.isdir(dirCSS): 54 | os.makedirs(dirCSS) 55 | 56 | if not os.path.isdir(dirCSV): 57 | os.makedirs(dirCSV) 58 | 59 | 60 | if not os.path.isdir(dirImg): 61 | os.makedirs(dirImg) 62 | 63 | # Copy style sheet to CSS dir 64 | try: 65 | cssIn = os.path.join(sys.path[0], 'css', 'github-markdown.css') 66 | cssOut = os.path.join(dirCSS, 'github-markdown.css') 67 | shutil.copyfile(cssIn, cssOut) 68 | except: 69 | sys.stderr.write("Cannot copy style sheet\n") 70 | sys.exit() 71 | 72 | # Download Epubcheck MessageBundle.properties file 73 | try: 74 | response = urllib.request.urlopen('https://raw.githubusercontent.com/w3c/epubcheck/master/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties') 75 | mbProperties = response.read().decode("utf-8", errors="ignore").split('\n') 76 | 77 | except: 78 | sys.stderr.write("Cannot read Epubcheck MessageBundle.properties file\n") 79 | sys.exit() 80 | 81 | # Dictionary that links error/warning codes to descriptions 82 | messageLookup={} 83 | 84 | for line in mbProperties: 85 | line.strip() 86 | if not line.startswith('#') and line != '': 87 | lineSplit = line.split('=') 88 | # Replace underscores with '-' (which are output by Epubcheck) 89 | code = lineSplit[0].replace('_', '-') 90 | desc = lineSplit[1] 91 | messageLookup[code] = desc 92 | 93 | # Markdown-formatted string that is used to write report 94 | mdString = '' 95 | mdString += '# EPUB analysis report\n' 96 | 97 | mdString += '\nReport generated: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n' 98 | mdString += '\nInput file: ' + fileEcResults + '\n' 99 | 100 | 101 | # Read CSV to Data Frame 102 | epubsAll = pd.read_csv(fileEcResults, index_col=0, encoding="utf-8") 103 | 104 | # Create lists to store all individual error and warning codes 105 | errorsAll = [] 106 | warningsAll = [] 107 | 108 | # Iterate over rows and extract errors and warnings fields 109 | for index, row in epubsAll.iterrows(): 110 | errorsRow = row["errors"] 111 | warningsRow = row["warnings"] 112 | 113 | if not pd.isnull(errorsRow): 114 | # Split individual error codes into list 115 | errorsAsList = errorsRow.split(' ') 116 | 117 | # Add error codes to errorsAll 118 | for error in errorsAsList: 119 | if error != '': 120 | errorsAll.append(error) 121 | 122 | if not pd.isnull(warningsRow): 123 | # Split individual warning codes into list 124 | warningsAsList = warningsRow.split(' ') 125 | 126 | # Add warning codes to warningAll 127 | for warning in warningsAsList: 128 | if warning != '': 129 | warningsAll.append(warning) 130 | 131 | 132 | # Errors and Warnings lists have different size and are not linked to a file, 133 | # so we create separate series for them 134 | errors = pd.Series(np.array(errorsAll)) 135 | warnings = pd.Series(np.array(warningsAll)) 136 | 137 | # Number of files 138 | noEpubs = len(epubsAll) 139 | 140 | # EPUBs with errors 141 | epubsWithErrors = epubsAll[epubsAll.noErrors > 0] 142 | noEpubsWithErrors = len(epubsWithErrors) 143 | # Write to CSV 144 | epubsWithErrors.to_csv(os.path.join(dirCSV, 'errors.csv'), encoding='utf-8') 145 | 146 | # EPUBs with warnings 147 | epubsWithWarnings = epubsAll[epubsAll.noWarnings > 0] 148 | noEpubsWithWarnings = len(epubsWithWarnings) 149 | # Write to CSV 150 | epubsWithWarnings.to_csv(os.path.join(dirCSV, 'warnings.csv'), encoding='utf-8') 151 | 152 | # EPUBs with errors or warnings 153 | epubsWithErrorsOrWarnings = epubsAll[(epubsAll.noErrors > 0) | (epubsAll.noWarnings > 0)] 154 | noEpubsWithErrorsOrWarnings = len(epubsWithErrorsOrWarnings) 155 | # Write to CSV 156 | epubsWithErrorsOrWarnings.to_csv(os.path.join(dirCSV, 'errorsorwarnings.csv'), encoding='utf-8') 157 | 158 | # EPUBs with word count < 1000 159 | epubsWithWClt1000 = epubsAll[epubsAll.wordCount < 1000] 160 | noEpubsWithWClt1000 = len(epubsWithWClt1000) 161 | # Write to CSV 162 | epubsWithWClt1000.to_csv(os.path.join(dirCSV, 'wordcountlt1000.csv'), encoding='utf-8') 163 | 164 | # Create summary table 165 | summaryTable = [ 166 | ['EPUBs', noEpubs, ''], 167 | ['EPUBs with errors', noEpubsWithErrors, round(100*noEpubsWithErrors/noEpubs, 2)], 168 | ['EPUBs with warnings', noEpubsWithWarnings, round(100*noEpubsWithWarnings/noEpubs, 2)], 169 | ['EPUBs with errors or warnings', noEpubsWithErrorsOrWarnings, round(100*noEpubsWithErrorsOrWarnings/noEpubs, 2)], 170 | ['EPUBs with less than 1000 words', noEpubsWithWClt1000, round(100*noEpubsWithWClt1000/noEpubs, 2)]] 171 | 172 | headers = ['', 'Count', '% of all EPUBs'] 173 | 174 | mdString += '\n\n## Summary\n\n' 175 | mdString += tabulate(summaryTable, headers, tablefmt='pipe') 176 | 177 | # Create table with links to generated CSV files 178 | csvTable = [ 179 | ['EPUBs with errors', '[errors.csv](./csv/errors.csv)'], 180 | ['EPUBs with warnings', '[warnings.csv](./csv/warnings.csv)'], 181 | ['EPUBs with errors or warnings', '[errorsorwarnings.csv](./csv/errorsorwarnings.csv)'], 182 | ['EPUBs with less than 1000 words', '[wordcountlt1000.csv](./csv/wordcountlt1000.csv)']] 183 | 184 | headers = ['', 'File'] 185 | 186 | mdString += '\n\n## CSV subsets\n\n' 187 | mdString += tabulate(csvTable, headers, tablefmt='pipe') 188 | 189 | # Frequency of EPUB versions 190 | epubVCounts = epubsAll['epubVersion'].value_counts().to_frame() 191 | 192 | # Add column with relative frequencies 193 | versionRelFrequencies = [] 194 | for i, row in epubVCounts.iterrows(): 195 | relFrequency = 100*row[0]/noEpubs 196 | versionRelFrequencies.append(round(relFrequency, 2)) 197 | 198 | epubVCounts.insert(1, '%', versionRelFrequencies) 199 | 200 | mdString += '\n\n## EPUB versions\n\n' 201 | mdString += dfToMarkdown(epubVCounts,['epubVersion', 'Count', '% of all EPUBs']) 202 | 203 | # Frequency of errors 204 | errorCounts = errors.value_counts().to_frame(name="count") 205 | 206 | if not errorCounts.empty: 207 | # Insert columns with error descriptions and relative frequencies 208 | # also report CSV file of all EPUBs for each error code 209 | errorDescriptions = [] 210 | errorRelFrequencies = [] 211 | errorLinkTable = [] 212 | errorLinkheaders = ['Code', 'File'] 213 | 214 | for i, row in errorCounts.iterrows(): 215 | description = messageLookup.get(i, "n/a") 216 | errorDescriptions.append(description) 217 | 218 | relFrequency = 100*row["count"]/noEpubs 219 | errorRelFrequencies.append(round(relFrequency, 2)) 220 | 221 | # Select all corresponding records with this error and write to CSV 222 | records = epubsWithErrors[epubsWithErrors['errors'].str.contains(str(i))] 223 | fName = 'error-' + str(i) + '.csv' 224 | records.to_csv(os.path.join(dirCSV, fName), encoding='utf-8') 225 | # Add link to link table 226 | errorLinkTable.append([str(i), '[' + fName + '](' + './csv/' + fName + ')']) 227 | 228 | errorCounts.insert(0, 'description', errorDescriptions) 229 | errorCounts.insert(2, '%', errorRelFrequencies) 230 | 231 | mdString += '\n\n## Frequency of validation errors\n\n' 232 | mdString += dfToMarkdown(errorCounts,['Code', 'Description', 'Count', '% of all EPUBs']) 233 | 234 | mdString += '\n\n![](./img/errors.png)\n' 235 | 236 | mdString += '\n\n## CSV subsets for each error\n\n' 237 | mdString += tabulate(errorLinkTable, errorLinkheaders, tablefmt='pipe') 238 | 239 | # Frequency of warnings 240 | warningCounts = warnings.value_counts().to_frame(name="count") 241 | 242 | if not warningCounts.empty: 243 | # Insert columns with warning descriptions and relative frequencies 244 | # also report CSV file of all EPUBs for each warning code 245 | warningDescriptions = [] 246 | warningRelFrequencies = [] 247 | warningLinkTable = [] 248 | warningLinkheaders = ['Code', 'File'] 249 | 250 | for i, row in warningCounts.iterrows(): 251 | description = messageLookup.get(i, "n/a") 252 | warningDescriptions.append(description) 253 | 254 | relFrequency = 100*row["count"]/noEpubs 255 | warningRelFrequencies.append(round(relFrequency, 2)) 256 | 257 | # Select all corresponding records with this warning and write to CSV 258 | records = epubsWithWarnings[epubsWithWarnings['warnings'].str.contains(str(i))] 259 | fName = 'warning-' + str(i) + '.csv' 260 | records.to_csv(os.path.join(dirCSV, fName), encoding='utf-8') 261 | # Add link to link table 262 | warningLinkTable.append([str(i), '[' + fName + '](' + './csv/' + fName + ')']) 263 | 264 | warningCounts.insert(0, 'description', warningDescriptions) 265 | warningCounts.insert(2, '%', warningRelFrequencies) 266 | 267 | mdString += '\n\n## Frequency of validation warnings\n\n' 268 | mdString += dfToMarkdown(warningCounts,['Code', 'Description', 'Count', '% of all EPUBs']) 269 | 270 | mdString += '\n\n![](./img/warnings.png)\n' 271 | 272 | mdString += '\n\n## CSV subsets for each warning\n\n' 273 | mdString += tabulate(warningLinkTable, warningLinkheaders, tablefmt='pipe') 274 | 275 | if not errorCounts.empty: 276 | # Plot of errors 277 | ecPlot = errorCounts.sort_values(by="count").plot(kind='barh', 278 | y='count', 279 | lw=2.5, 280 | figsize=(8,8)) 281 | 282 | ecPlot.set_xlabel('Count') 283 | ecPlot.set_ylabel('Error') 284 | 285 | fig = ecPlot.get_figure() 286 | fig.savefig(os.path.join(dirImg, 'errors.png')) 287 | 288 | if not warningCounts.empty: 289 | # Plot of warnings 290 | wcPlot = warningCounts.sort_values(by="count").plot(kind='barh', 291 | y='count', 292 | lw=2.5, 293 | figsize=(8,8)) 294 | 295 | wcPlot.set_xlabel('Count') 296 | wcPlot.set_ylabel('Warning') 297 | 298 | fig = wcPlot.get_figure() 299 | fig.savefig(os.path.join(dirImg, 'warnings.png')) 300 | 301 | # Write detailed statistics 302 | mdString += '\n\n## Detailed statistics\n' 303 | 304 | mdString += '\n\n### All EPUBs\n\n' 305 | mdString += dfToMarkdown(epubsAll.describe()) 306 | 307 | mdString += '\n\n### EPUBs with errors\n\n' 308 | mdString += dfToMarkdown(epubsWithErrors.describe()) 309 | 310 | mdString += '\n\n### EPUBs with warnings\n\n' 311 | mdString += dfToMarkdown(epubsWithWarnings.describe()) 312 | 313 | mdString += '\n\n### EPUBs with errors or warnings\n\n' 314 | mdString += dfToMarkdown(epubsWithErrorsOrWarnings.describe()) 315 | 316 | mdString += '\n\n### EPUBs with less than 1000 words\n\n' 317 | mdString += dfToMarkdown(epubsWithWClt1000.describe()) 318 | 319 | mdString += '\n' 320 | # Write Markdown report 321 | 322 | # Open output report (Markdown format) for writing 323 | try: 324 | reportMD = os.path.join(dirOut, 'report.md') 325 | fOut = codecs.open(reportMD, "w", "utf-8") 326 | except: 327 | sys.stderr.write("Cannot write output report\n") 328 | sys.exit() 329 | 330 | fOut.write(mdString) 331 | fOut.close() 332 | 333 | # Convert report to html 334 | 335 | try: 336 | reportHTML = os.path.join(dirOut, 'report.html') 337 | fHTML = codecs.open(reportHTML, 'w', 'utf-8') 338 | except: 339 | sys.stderr.write("Cannot write HTML report\n") 340 | sys.exit() 341 | 342 | fHTML.write(""" 343 | 344 | 345 | 346 | Report 347 | 348 | 363 | 364 | 365 | \n""") 366 | 367 | HTML = markdown.markdown(mdString, 368 | output_format='html5', 369 | output=fHTML, 370 | encoding='utf-8', 371 | extensions=['extra']) 372 | 373 | 374 | fHTML.write(HTML) 375 | fHTML.write("""\n\n\n\n""") 376 | 377 | fHTML.close() 378 | 379 | 380 | 381 | main() 382 | 383 | -------------------------------------------------------------------------------- /css/github-markdown.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: octicons-link; 3 | src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAZwABAAAAAACFQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEU0lHAAAGaAAAAAgAAAAIAAAAAUdTVUIAAAZcAAAACgAAAAoAAQAAT1MvMgAAAyQAAABJAAAAYFYEU3RjbWFwAAADcAAAAEUAAACAAJThvmN2dCAAAATkAAAABAAAAAQAAAAAZnBnbQAAA7gAAACyAAABCUM+8IhnYXNwAAAGTAAAABAAAAAQABoAI2dseWYAAAFsAAABPAAAAZwcEq9taGVhZAAAAsgAAAA0AAAANgh4a91oaGVhAAADCAAAABoAAAAkCA8DRGhtdHgAAAL8AAAADAAAAAwGAACfbG9jYQAAAsAAAAAIAAAACABiATBtYXhwAAACqAAAABgAAAAgAA8ASm5hbWUAAAToAAABQgAAAlXu73sOcG9zdAAABiwAAAAeAAAAME3QpOBwcmVwAAAEbAAAAHYAAAB/aFGpk3jaTY6xa8JAGMW/O62BDi0tJLYQincXEypYIiGJjSgHniQ6umTsUEyLm5BV6NDBP8Tpts6F0v+k/0an2i+itHDw3v2+9+DBKTzsJNnWJNTgHEy4BgG3EMI9DCEDOGEXzDADU5hBKMIgNPZqoD3SilVaXZCER3/I7AtxEJLtzzuZfI+VVkprxTlXShWKb3TBecG11rwoNlmmn1P2WYcJczl32etSpKnziC7lQyWe1smVPy/Lt7Kc+0vWY/gAgIIEqAN9we0pwKXreiMasxvabDQMM4riO+qxM2ogwDGOZTXxwxDiycQIcoYFBLj5K3EIaSctAq2kTYiw+ymhce7vwM9jSqO8JyVd5RH9gyTt2+J/yUmYlIR0s04n6+7Vm1ozezUeLEaUjhaDSuXHwVRgvLJn1tQ7xiuVv/ocTRF42mNgZGBgYGbwZOBiAAFGJBIMAAizAFoAAABiAGIAznjaY2BkYGAA4in8zwXi+W2+MjCzMIDApSwvXzC97Z4Ig8N/BxYGZgcgl52BCSQKAA3jCV8CAABfAAAAAAQAAEB42mNgZGBg4f3vACQZQABIMjKgAmYAKEgBXgAAeNpjYGY6wTiBgZWBg2kmUxoDA4MPhGZMYzBi1AHygVLYQUCaawqDA4PChxhmh/8ODDEsvAwHgMKMIDnGL0x7gJQCAwMAJd4MFwAAAHjaY2BgYGaA4DAGRgYQkAHyGMF8NgYrIM3JIAGVYYDT+AEjAwuDFpBmA9KMDEwMCh9i/v8H8sH0/4dQc1iAmAkALaUKLgAAAHjaTY9LDsIgEIbtgqHUPpDi3gPoBVyRTmTddOmqTXThEXqrob2gQ1FjwpDvfwCBdmdXC5AVKFu3e5MfNFJ29KTQT48Ob9/lqYwOGZxeUelN2U2R6+cArgtCJpauW7UQBqnFkUsjAY/kOU1cP+DAgvxwn1chZDwUbd6CFimGXwzwF6tPbFIcjEl+vvmM/byA48e6tWrKArm4ZJlCbdsrxksL1AwWn/yBSJKpYbq8AXaaTb8AAHja28jAwOC00ZrBeQNDQOWO//sdBBgYGRiYWYAEELEwMTE4uzo5Zzo5b2BxdnFOcALxNjA6b2ByTswC8jYwg0VlNuoCTWAMqNzMzsoK1rEhNqByEyerg5PMJlYuVueETKcd/89uBpnpvIEVomeHLoMsAAe1Id4AAAAAAAB42oWQT07CQBTGv0JBhagk7HQzKxca2sJCE1hDt4QF+9JOS0nbaaYDCQfwCJ7Au3AHj+LO13FMmm6cl7785vven0kBjHCBhfpYuNa5Ph1c0e2Xu3jEvWG7UdPDLZ4N92nOm+EBXuAbHmIMSRMs+4aUEd4Nd3CHD8NdvOLTsA2GL8M9PODbcL+hD7C1xoaHeLJSEao0FEW14ckxC+TU8TxvsY6X0eLPmRhry2WVioLpkrbp84LLQPGI7c6sOiUzpWIWS5GzlSgUzzLBSikOPFTOXqly7rqx0Z1Q5BAIoZBSFihQYQOOBEdkCOgXTOHA07HAGjGWiIjaPZNW13/+lm6S9FT7rLHFJ6fQbkATOG1j2OFMucKJJsxIVfQORl+9Jyda6Sl1dUYhSCm1dyClfoeDve4qMYdLEbfqHf3O/AdDumsjAAB42mNgYoAAZQYjBmyAGYQZmdhL8zLdDEydARfoAqIAAAABAAMABwAKABMAB///AA8AAQAAAAAAAAAAAAAAAAABAAAAAA==) format('woff'); 4 | } 5 | 6 | .markdown-body { 7 | -ms-text-size-adjust: 100%; 8 | -webkit-text-size-adjust: 100%; 9 | line-height: 1.5; 10 | color: #24292e; 11 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; 12 | font-size: 16px; 13 | line-height: 1.5; 14 | word-wrap: break-word; 15 | } 16 | 17 | .markdown-body .pl-c { 18 | color: #6a737d; 19 | } 20 | 21 | .markdown-body .pl-c1, 22 | .markdown-body .pl-s .pl-v { 23 | color: #005cc5; 24 | } 25 | 26 | .markdown-body .pl-e, 27 | .markdown-body .pl-en { 28 | color: #6f42c1; 29 | } 30 | 31 | .markdown-body .pl-smi, 32 | .markdown-body .pl-s .pl-s1 { 33 | color: #24292e; 34 | } 35 | 36 | .markdown-body .pl-ent { 37 | color: #22863a; 38 | } 39 | 40 | .markdown-body .pl-k { 41 | color: #d73a49; 42 | } 43 | 44 | .markdown-body .pl-s, 45 | .markdown-body .pl-pds, 46 | .markdown-body .pl-s .pl-pse .pl-s1, 47 | .markdown-body .pl-sr, 48 | .markdown-body .pl-sr .pl-cce, 49 | .markdown-body .pl-sr .pl-sre, 50 | .markdown-body .pl-sr .pl-sra { 51 | color: #032f62; 52 | } 53 | 54 | .markdown-body .pl-v, 55 | .markdown-body .pl-smw { 56 | color: #e36209; 57 | } 58 | 59 | .markdown-body .pl-bu { 60 | color: #b31d28; 61 | } 62 | 63 | .markdown-body .pl-ii { 64 | color: #fafbfc; 65 | background-color: #b31d28; 66 | } 67 | 68 | .markdown-body .pl-c2 { 69 | color: #fafbfc; 70 | background-color: #d73a49; 71 | } 72 | 73 | .markdown-body .pl-c2::before { 74 | content: "^M"; 75 | } 76 | 77 | .markdown-body .pl-sr .pl-cce { 78 | font-weight: bold; 79 | color: #22863a; 80 | } 81 | 82 | .markdown-body .pl-ml { 83 | color: #735c0f; 84 | } 85 | 86 | .markdown-body .pl-mh, 87 | .markdown-body .pl-mh .pl-en, 88 | .markdown-body .pl-ms { 89 | font-weight: bold; 90 | color: #005cc5; 91 | } 92 | 93 | .markdown-body .pl-mi { 94 | font-style: italic; 95 | color: #24292e; 96 | } 97 | 98 | .markdown-body .pl-mb { 99 | font-weight: bold; 100 | color: #24292e; 101 | } 102 | 103 | .markdown-body .pl-md { 104 | color: #b31d28; 105 | background-color: #ffeef0; 106 | } 107 | 108 | .markdown-body .pl-mi1 { 109 | color: #22863a; 110 | background-color: #f0fff4; 111 | } 112 | 113 | .markdown-body .pl-mc { 114 | color: #e36209; 115 | background-color: #ffebda; 116 | } 117 | 118 | .markdown-body .pl-mi2 { 119 | color: #f6f8fa; 120 | background-color: #005cc5; 121 | } 122 | 123 | .markdown-body .pl-mdr { 124 | font-weight: bold; 125 | color: #6f42c1; 126 | } 127 | 128 | .markdown-body .pl-ba { 129 | color: #586069; 130 | } 131 | 132 | .markdown-body .pl-sg { 133 | color: #959da5; 134 | } 135 | 136 | .markdown-body .pl-corl { 137 | text-decoration: underline; 138 | color: #032f62; 139 | } 140 | 141 | .markdown-body .octicon { 142 | display: inline-block; 143 | vertical-align: text-top; 144 | fill: currentColor; 145 | } 146 | 147 | .markdown-body a { 148 | background-color: transparent; 149 | } 150 | 151 | .markdown-body a:active, 152 | .markdown-body a:hover { 153 | outline-width: 0; 154 | } 155 | 156 | .markdown-body strong { 157 | font-weight: inherit; 158 | } 159 | 160 | .markdown-body strong { 161 | font-weight: bolder; 162 | } 163 | 164 | .markdown-body h1 { 165 | font-size: 2em; 166 | margin: 0.67em 0; 167 | } 168 | 169 | .markdown-body img { 170 | border-style: none; 171 | } 172 | 173 | .markdown-body code, 174 | .markdown-body kbd, 175 | .markdown-body pre { 176 | font-family: monospace, monospace; 177 | font-size: 1em; 178 | } 179 | 180 | .markdown-body hr { 181 | box-sizing: content-box; 182 | height: 0; 183 | overflow: visible; 184 | } 185 | 186 | .markdown-body input { 187 | font: inherit; 188 | margin: 0; 189 | } 190 | 191 | .markdown-body input { 192 | overflow: visible; 193 | } 194 | 195 | .markdown-body [type="checkbox"] { 196 | box-sizing: border-box; 197 | padding: 0; 198 | } 199 | 200 | .markdown-body * { 201 | box-sizing: border-box; 202 | } 203 | 204 | .markdown-body input { 205 | font-family: inherit; 206 | font-size: inherit; 207 | line-height: inherit; 208 | } 209 | 210 | .markdown-body a { 211 | color: #0366d6; 212 | text-decoration: none; 213 | } 214 | 215 | .markdown-body a:hover { 216 | text-decoration: underline; 217 | } 218 | 219 | .markdown-body strong { 220 | font-weight: 600; 221 | } 222 | 223 | .markdown-body hr { 224 | height: 0; 225 | margin: 15px 0; 226 | overflow: hidden; 227 | background: transparent; 228 | border: 0; 229 | border-bottom: 1px solid #dfe2e5; 230 | } 231 | 232 | .markdown-body hr::before { 233 | display: table; 234 | content: ""; 235 | } 236 | 237 | .markdown-body hr::after { 238 | display: table; 239 | clear: both; 240 | content: ""; 241 | } 242 | 243 | .markdown-body table { 244 | border-spacing: 0; 245 | border-collapse: collapse; 246 | } 247 | 248 | .markdown-body td, 249 | .markdown-body th { 250 | padding: 0; 251 | } 252 | 253 | .markdown-body h1, 254 | .markdown-body h2, 255 | .markdown-body h3, 256 | .markdown-body h4, 257 | .markdown-body h5, 258 | .markdown-body h6 { 259 | margin-top: 0; 260 | margin-bottom: 0; 261 | } 262 | 263 | .markdown-body h1 { 264 | font-size: 32px; 265 | font-weight: 600; 266 | } 267 | 268 | .markdown-body h2 { 269 | font-size: 24px; 270 | font-weight: 600; 271 | } 272 | 273 | .markdown-body h3 { 274 | font-size: 20px; 275 | font-weight: 600; 276 | } 277 | 278 | .markdown-body h4 { 279 | font-size: 16px; 280 | font-weight: 600; 281 | } 282 | 283 | .markdown-body h5 { 284 | font-size: 14px; 285 | font-weight: 600; 286 | } 287 | 288 | .markdown-body h6 { 289 | font-size: 12px; 290 | font-weight: 600; 291 | } 292 | 293 | .markdown-body p { 294 | margin-top: 0; 295 | margin-bottom: 10px; 296 | } 297 | 298 | .markdown-body blockquote { 299 | margin: 0; 300 | } 301 | 302 | .markdown-body ul, 303 | .markdown-body ol { 304 | padding-left: 0; 305 | margin-top: 0; 306 | margin-bottom: 0; 307 | } 308 | 309 | .markdown-body ol ol, 310 | .markdown-body ul ol { 311 | list-style-type: lower-roman; 312 | } 313 | 314 | .markdown-body ul ul ol, 315 | .markdown-body ul ol ol, 316 | .markdown-body ol ul ol, 317 | .markdown-body ol ol ol { 318 | list-style-type: lower-alpha; 319 | } 320 | 321 | .markdown-body dd { 322 | margin-left: 0; 323 | } 324 | 325 | .markdown-body code { 326 | font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 327 | font-size: 12px; 328 | } 329 | 330 | .markdown-body pre { 331 | margin-top: 0; 332 | margin-bottom: 0; 333 | font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 334 | font-size: 12px; 335 | } 336 | 337 | .markdown-body .octicon { 338 | vertical-align: text-bottom; 339 | } 340 | 341 | .markdown-body .pl-0 { 342 | padding-left: 0 !important; 343 | } 344 | 345 | .markdown-body .pl-1 { 346 | padding-left: 4px !important; 347 | } 348 | 349 | .markdown-body .pl-2 { 350 | padding-left: 8px !important; 351 | } 352 | 353 | .markdown-body .pl-3 { 354 | padding-left: 16px !important; 355 | } 356 | 357 | .markdown-body .pl-4 { 358 | padding-left: 24px !important; 359 | } 360 | 361 | .markdown-body .pl-5 { 362 | padding-left: 32px !important; 363 | } 364 | 365 | .markdown-body .pl-6 { 366 | padding-left: 40px !important; 367 | } 368 | 369 | .markdown-body::before { 370 | display: table; 371 | content: ""; 372 | } 373 | 374 | .markdown-body::after { 375 | display: table; 376 | clear: both; 377 | content: ""; 378 | } 379 | 380 | .markdown-body>*:first-child { 381 | margin-top: 0 !important; 382 | } 383 | 384 | .markdown-body>*:last-child { 385 | margin-bottom: 0 !important; 386 | } 387 | 388 | .markdown-body a:not([href]) { 389 | color: inherit; 390 | text-decoration: none; 391 | } 392 | 393 | .markdown-body .anchor { 394 | float: left; 395 | padding-right: 4px; 396 | margin-left: -20px; 397 | line-height: 1; 398 | } 399 | 400 | .markdown-body .anchor:focus { 401 | outline: none; 402 | } 403 | 404 | .markdown-body p, 405 | .markdown-body blockquote, 406 | .markdown-body ul, 407 | .markdown-body ol, 408 | .markdown-body dl, 409 | .markdown-body table, 410 | .markdown-body pre { 411 | margin-top: 0; 412 | margin-bottom: 16px; 413 | } 414 | 415 | .markdown-body hr { 416 | height: 0.25em; 417 | padding: 0; 418 | margin: 24px 0; 419 | background-color: #e1e4e8; 420 | border: 0; 421 | } 422 | 423 | .markdown-body blockquote { 424 | padding: 0 1em; 425 | color: #6a737d; 426 | border-left: 0.25em solid #dfe2e5; 427 | } 428 | 429 | .markdown-body blockquote>:first-child { 430 | margin-top: 0; 431 | } 432 | 433 | .markdown-body blockquote>:last-child { 434 | margin-bottom: 0; 435 | } 436 | 437 | .markdown-body kbd { 438 | display: inline-block; 439 | padding: 3px 5px; 440 | font-size: 11px; 441 | line-height: 10px; 442 | color: #444d56; 443 | vertical-align: middle; 444 | background-color: #fafbfc; 445 | border: solid 1px #c6cbd1; 446 | border-bottom-color: #959da5; 447 | border-radius: 3px; 448 | box-shadow: inset 0 -1px 0 #959da5; 449 | } 450 | 451 | .markdown-body h1, 452 | .markdown-body h2, 453 | .markdown-body h3, 454 | .markdown-body h4, 455 | .markdown-body h5, 456 | .markdown-body h6 { 457 | margin-top: 24px; 458 | margin-bottom: 16px; 459 | font-weight: 600; 460 | line-height: 1.25; 461 | } 462 | 463 | .markdown-body h1 .octicon-link, 464 | .markdown-body h2 .octicon-link, 465 | .markdown-body h3 .octicon-link, 466 | .markdown-body h4 .octicon-link, 467 | .markdown-body h5 .octicon-link, 468 | .markdown-body h6 .octicon-link { 469 | color: #1b1f23; 470 | vertical-align: middle; 471 | visibility: hidden; 472 | } 473 | 474 | .markdown-body h1:hover .anchor, 475 | .markdown-body h2:hover .anchor, 476 | .markdown-body h3:hover .anchor, 477 | .markdown-body h4:hover .anchor, 478 | .markdown-body h5:hover .anchor, 479 | .markdown-body h6:hover .anchor { 480 | text-decoration: none; 481 | } 482 | 483 | .markdown-body h1:hover .anchor .octicon-link, 484 | .markdown-body h2:hover .anchor .octicon-link, 485 | .markdown-body h3:hover .anchor .octicon-link, 486 | .markdown-body h4:hover .anchor .octicon-link, 487 | .markdown-body h5:hover .anchor .octicon-link, 488 | .markdown-body h6:hover .anchor .octicon-link { 489 | visibility: visible; 490 | } 491 | 492 | .markdown-body h1 { 493 | padding-bottom: 0.3em; 494 | font-size: 2em; 495 | border-bottom: 1px solid #eaecef; 496 | } 497 | 498 | .markdown-body h2 { 499 | padding-bottom: 0.3em; 500 | font-size: 1.5em; 501 | border-bottom: 1px solid #eaecef; 502 | } 503 | 504 | .markdown-body h3 { 505 | font-size: 1.25em; 506 | } 507 | 508 | .markdown-body h4 { 509 | font-size: 1em; 510 | } 511 | 512 | .markdown-body h5 { 513 | font-size: 0.875em; 514 | } 515 | 516 | .markdown-body h6 { 517 | font-size: 0.85em; 518 | color: #6a737d; 519 | } 520 | 521 | .markdown-body ul, 522 | .markdown-body ol { 523 | padding-left: 2em; 524 | } 525 | 526 | .markdown-body ul ul, 527 | .markdown-body ul ol, 528 | .markdown-body ol ol, 529 | .markdown-body ol ul { 530 | margin-top: 0; 531 | margin-bottom: 0; 532 | } 533 | 534 | .markdown-body li { 535 | word-wrap: break-all; 536 | } 537 | 538 | .markdown-body li>p { 539 | margin-top: 16px; 540 | } 541 | 542 | .markdown-body li+li { 543 | margin-top: 0.25em; 544 | } 545 | 546 | .markdown-body dl { 547 | padding: 0; 548 | } 549 | 550 | .markdown-body dl dt { 551 | padding: 0; 552 | margin-top: 16px; 553 | font-size: 1em; 554 | font-style: italic; 555 | font-weight: 600; 556 | } 557 | 558 | .markdown-body dl dd { 559 | padding: 0 16px; 560 | margin-bottom: 16px; 561 | } 562 | 563 | .markdown-body table { 564 | display: block; 565 | width: 100%; 566 | overflow: auto; 567 | } 568 | 569 | .markdown-body table th { 570 | font-weight: 600; 571 | } 572 | 573 | .markdown-body table th, 574 | .markdown-body table td { 575 | padding: 6px 13px; 576 | border: 1px solid #dfe2e5; 577 | } 578 | 579 | .markdown-body table tr { 580 | background-color: #fff; 581 | border-top: 1px solid #c6cbd1; 582 | } 583 | 584 | .markdown-body table tr:nth-child(2n) { 585 | background-color: #f6f8fa; 586 | } 587 | 588 | .markdown-body img { 589 | max-width: 100%; 590 | box-sizing: content-box; 591 | background-color: #fff; 592 | } 593 | 594 | .markdown-body img[align=right] { 595 | padding-left: 20px; 596 | } 597 | 598 | .markdown-body img[align=left] { 599 | padding-right: 20px; 600 | } 601 | 602 | .markdown-body code { 603 | padding: 0.2em 0.4em; 604 | margin: 0; 605 | font-size: 85%; 606 | background-color: rgba(27,31,35,0.05); 607 | border-radius: 3px; 608 | } 609 | 610 | .markdown-body pre { 611 | word-wrap: normal; 612 | } 613 | 614 | .markdown-body pre>code { 615 | padding: 0; 616 | margin: 0; 617 | font-size: 100%; 618 | word-break: normal; 619 | white-space: pre; 620 | background: transparent; 621 | border: 0; 622 | } 623 | 624 | .markdown-body .highlight { 625 | margin-bottom: 16px; 626 | } 627 | 628 | .markdown-body .highlight pre { 629 | margin-bottom: 0; 630 | word-break: normal; 631 | } 632 | 633 | .markdown-body .highlight pre, 634 | .markdown-body pre { 635 | padding: 16px; 636 | overflow: auto; 637 | font-size: 85%; 638 | line-height: 1.45; 639 | background-color: #f6f8fa; 640 | border-radius: 3px; 641 | } 642 | 643 | .markdown-body pre code { 644 | display: inline; 645 | max-width: auto; 646 | padding: 0; 647 | margin: 0; 648 | overflow: visible; 649 | line-height: inherit; 650 | word-wrap: normal; 651 | background-color: transparent; 652 | border: 0; 653 | } 654 | 655 | .markdown-body .full-commit .btn-outline:not(:disabled):hover { 656 | color: #005cc5; 657 | border-color: #005cc5; 658 | } 659 | 660 | .markdown-body kbd { 661 | display: inline-block; 662 | padding: 3px 5px; 663 | font: 11px "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 664 | line-height: 10px; 665 | color: #444d56; 666 | vertical-align: middle; 667 | background-color: #fafbfc; 668 | border: solid 1px #d1d5da; 669 | border-bottom-color: #c6cbd1; 670 | border-radius: 3px; 671 | box-shadow: inset 0 -1px 0 #c6cbd1; 672 | } 673 | 674 | .markdown-body :checked+.radio-label { 675 | position: relative; 676 | z-index: 1; 677 | border-color: #0366d6; 678 | } 679 | 680 | .markdown-body .task-list-item { 681 | list-style-type: none; 682 | } 683 | 684 | .markdown-body .task-list-item+.task-list-item { 685 | margin-top: 3px; 686 | } 687 | 688 | .markdown-body .task-list-item input { 689 | margin: 0 0.2em 0.25em -1.6em; 690 | vertical-align: middle; 691 | } 692 | 693 | .markdown-body hr { 694 | border-bottom-color: #eee; 695 | } 696 | -------------------------------------------------------------------------------- /examples/report/css/github-markdown.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: octicons-link; 3 | src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAZwABAAAAAACFQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEU0lHAAAGaAAAAAgAAAAIAAAAAUdTVUIAAAZcAAAACgAAAAoAAQAAT1MvMgAAAyQAAABJAAAAYFYEU3RjbWFwAAADcAAAAEUAAACAAJThvmN2dCAAAATkAAAABAAAAAQAAAAAZnBnbQAAA7gAAACyAAABCUM+8IhnYXNwAAAGTAAAABAAAAAQABoAI2dseWYAAAFsAAABPAAAAZwcEq9taGVhZAAAAsgAAAA0AAAANgh4a91oaGVhAAADCAAAABoAAAAkCA8DRGhtdHgAAAL8AAAADAAAAAwGAACfbG9jYQAAAsAAAAAIAAAACABiATBtYXhwAAACqAAAABgAAAAgAA8ASm5hbWUAAAToAAABQgAAAlXu73sOcG9zdAAABiwAAAAeAAAAME3QpOBwcmVwAAAEbAAAAHYAAAB/aFGpk3jaTY6xa8JAGMW/O62BDi0tJLYQincXEypYIiGJjSgHniQ6umTsUEyLm5BV6NDBP8Tpts6F0v+k/0an2i+itHDw3v2+9+DBKTzsJNnWJNTgHEy4BgG3EMI9DCEDOGEXzDADU5hBKMIgNPZqoD3SilVaXZCER3/I7AtxEJLtzzuZfI+VVkprxTlXShWKb3TBecG11rwoNlmmn1P2WYcJczl32etSpKnziC7lQyWe1smVPy/Lt7Kc+0vWY/gAgIIEqAN9we0pwKXreiMasxvabDQMM4riO+qxM2ogwDGOZTXxwxDiycQIcoYFBLj5K3EIaSctAq2kTYiw+ymhce7vwM9jSqO8JyVd5RH9gyTt2+J/yUmYlIR0s04n6+7Vm1ozezUeLEaUjhaDSuXHwVRgvLJn1tQ7xiuVv/ocTRF42mNgZGBgYGbwZOBiAAFGJBIMAAizAFoAAABiAGIAznjaY2BkYGAA4in8zwXi+W2+MjCzMIDApSwvXzC97Z4Ig8N/BxYGZgcgl52BCSQKAA3jCV8CAABfAAAAAAQAAEB42mNgZGBg4f3vACQZQABIMjKgAmYAKEgBXgAAeNpjYGY6wTiBgZWBg2kmUxoDA4MPhGZMYzBi1AHygVLYQUCaawqDA4PChxhmh/8ODDEsvAwHgMKMIDnGL0x7gJQCAwMAJd4MFwAAAHjaY2BgYGaA4DAGRgYQkAHyGMF8NgYrIM3JIAGVYYDT+AEjAwuDFpBmA9KMDEwMCh9i/v8H8sH0/4dQc1iAmAkALaUKLgAAAHjaTY9LDsIgEIbtgqHUPpDi3gPoBVyRTmTddOmqTXThEXqrob2gQ1FjwpDvfwCBdmdXC5AVKFu3e5MfNFJ29KTQT48Ob9/lqYwOGZxeUelN2U2R6+cArgtCJpauW7UQBqnFkUsjAY/kOU1cP+DAgvxwn1chZDwUbd6CFimGXwzwF6tPbFIcjEl+vvmM/byA48e6tWrKArm4ZJlCbdsrxksL1AwWn/yBSJKpYbq8AXaaTb8AAHja28jAwOC00ZrBeQNDQOWO//sdBBgYGRiYWYAEELEwMTE4uzo5Zzo5b2BxdnFOcALxNjA6b2ByTswC8jYwg0VlNuoCTWAMqNzMzsoK1rEhNqByEyerg5PMJlYuVueETKcd/89uBpnpvIEVomeHLoMsAAe1Id4AAAAAAAB42oWQT07CQBTGv0JBhagk7HQzKxca2sJCE1hDt4QF+9JOS0nbaaYDCQfwCJ7Au3AHj+LO13FMmm6cl7785vven0kBjHCBhfpYuNa5Ph1c0e2Xu3jEvWG7UdPDLZ4N92nOm+EBXuAbHmIMSRMs+4aUEd4Nd3CHD8NdvOLTsA2GL8M9PODbcL+hD7C1xoaHeLJSEao0FEW14ckxC+TU8TxvsY6X0eLPmRhry2WVioLpkrbp84LLQPGI7c6sOiUzpWIWS5GzlSgUzzLBSikOPFTOXqly7rqx0Z1Q5BAIoZBSFihQYQOOBEdkCOgXTOHA07HAGjGWiIjaPZNW13/+lm6S9FT7rLHFJ6fQbkATOG1j2OFMucKJJsxIVfQORl+9Jyda6Sl1dUYhSCm1dyClfoeDve4qMYdLEbfqHf3O/AdDumsjAAB42mNgYoAAZQYjBmyAGYQZmdhL8zLdDEydARfoAqIAAAABAAMABwAKABMAB///AA8AAQAAAAAAAAAAAAAAAAABAAAAAA==) format('woff'); 4 | } 5 | 6 | .markdown-body { 7 | -ms-text-size-adjust: 100%; 8 | -webkit-text-size-adjust: 100%; 9 | line-height: 1.5; 10 | color: #24292e; 11 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; 12 | font-size: 16px; 13 | line-height: 1.5; 14 | word-wrap: break-word; 15 | } 16 | 17 | .markdown-body .pl-c { 18 | color: #6a737d; 19 | } 20 | 21 | .markdown-body .pl-c1, 22 | .markdown-body .pl-s .pl-v { 23 | color: #005cc5; 24 | } 25 | 26 | .markdown-body .pl-e, 27 | .markdown-body .pl-en { 28 | color: #6f42c1; 29 | } 30 | 31 | .markdown-body .pl-smi, 32 | .markdown-body .pl-s .pl-s1 { 33 | color: #24292e; 34 | } 35 | 36 | .markdown-body .pl-ent { 37 | color: #22863a; 38 | } 39 | 40 | .markdown-body .pl-k { 41 | color: #d73a49; 42 | } 43 | 44 | .markdown-body .pl-s, 45 | .markdown-body .pl-pds, 46 | .markdown-body .pl-s .pl-pse .pl-s1, 47 | .markdown-body .pl-sr, 48 | .markdown-body .pl-sr .pl-cce, 49 | .markdown-body .pl-sr .pl-sre, 50 | .markdown-body .pl-sr .pl-sra { 51 | color: #032f62; 52 | } 53 | 54 | .markdown-body .pl-v, 55 | .markdown-body .pl-smw { 56 | color: #e36209; 57 | } 58 | 59 | .markdown-body .pl-bu { 60 | color: #b31d28; 61 | } 62 | 63 | .markdown-body .pl-ii { 64 | color: #fafbfc; 65 | background-color: #b31d28; 66 | } 67 | 68 | .markdown-body .pl-c2 { 69 | color: #fafbfc; 70 | background-color: #d73a49; 71 | } 72 | 73 | .markdown-body .pl-c2::before { 74 | content: "^M"; 75 | } 76 | 77 | .markdown-body .pl-sr .pl-cce { 78 | font-weight: bold; 79 | color: #22863a; 80 | } 81 | 82 | .markdown-body .pl-ml { 83 | color: #735c0f; 84 | } 85 | 86 | .markdown-body .pl-mh, 87 | .markdown-body .pl-mh .pl-en, 88 | .markdown-body .pl-ms { 89 | font-weight: bold; 90 | color: #005cc5; 91 | } 92 | 93 | .markdown-body .pl-mi { 94 | font-style: italic; 95 | color: #24292e; 96 | } 97 | 98 | .markdown-body .pl-mb { 99 | font-weight: bold; 100 | color: #24292e; 101 | } 102 | 103 | .markdown-body .pl-md { 104 | color: #b31d28; 105 | background-color: #ffeef0; 106 | } 107 | 108 | .markdown-body .pl-mi1 { 109 | color: #22863a; 110 | background-color: #f0fff4; 111 | } 112 | 113 | .markdown-body .pl-mc { 114 | color: #e36209; 115 | background-color: #ffebda; 116 | } 117 | 118 | .markdown-body .pl-mi2 { 119 | color: #f6f8fa; 120 | background-color: #005cc5; 121 | } 122 | 123 | .markdown-body .pl-mdr { 124 | font-weight: bold; 125 | color: #6f42c1; 126 | } 127 | 128 | .markdown-body .pl-ba { 129 | color: #586069; 130 | } 131 | 132 | .markdown-body .pl-sg { 133 | color: #959da5; 134 | } 135 | 136 | .markdown-body .pl-corl { 137 | text-decoration: underline; 138 | color: #032f62; 139 | } 140 | 141 | .markdown-body .octicon { 142 | display: inline-block; 143 | vertical-align: text-top; 144 | fill: currentColor; 145 | } 146 | 147 | .markdown-body a { 148 | background-color: transparent; 149 | } 150 | 151 | .markdown-body a:active, 152 | .markdown-body a:hover { 153 | outline-width: 0; 154 | } 155 | 156 | .markdown-body strong { 157 | font-weight: inherit; 158 | } 159 | 160 | .markdown-body strong { 161 | font-weight: bolder; 162 | } 163 | 164 | .markdown-body h1 { 165 | font-size: 2em; 166 | margin: 0.67em 0; 167 | } 168 | 169 | .markdown-body img { 170 | border-style: none; 171 | } 172 | 173 | .markdown-body code, 174 | .markdown-body kbd, 175 | .markdown-body pre { 176 | font-family: monospace, monospace; 177 | font-size: 1em; 178 | } 179 | 180 | .markdown-body hr { 181 | box-sizing: content-box; 182 | height: 0; 183 | overflow: visible; 184 | } 185 | 186 | .markdown-body input { 187 | font: inherit; 188 | margin: 0; 189 | } 190 | 191 | .markdown-body input { 192 | overflow: visible; 193 | } 194 | 195 | .markdown-body [type="checkbox"] { 196 | box-sizing: border-box; 197 | padding: 0; 198 | } 199 | 200 | .markdown-body * { 201 | box-sizing: border-box; 202 | } 203 | 204 | .markdown-body input { 205 | font-family: inherit; 206 | font-size: inherit; 207 | line-height: inherit; 208 | } 209 | 210 | .markdown-body a { 211 | color: #0366d6; 212 | text-decoration: none; 213 | } 214 | 215 | .markdown-body a:hover { 216 | text-decoration: underline; 217 | } 218 | 219 | .markdown-body strong { 220 | font-weight: 600; 221 | } 222 | 223 | .markdown-body hr { 224 | height: 0; 225 | margin: 15px 0; 226 | overflow: hidden; 227 | background: transparent; 228 | border: 0; 229 | border-bottom: 1px solid #dfe2e5; 230 | } 231 | 232 | .markdown-body hr::before { 233 | display: table; 234 | content: ""; 235 | } 236 | 237 | .markdown-body hr::after { 238 | display: table; 239 | clear: both; 240 | content: ""; 241 | } 242 | 243 | .markdown-body table { 244 | border-spacing: 0; 245 | border-collapse: collapse; 246 | } 247 | 248 | .markdown-body td, 249 | .markdown-body th { 250 | padding: 0; 251 | } 252 | 253 | .markdown-body h1, 254 | .markdown-body h2, 255 | .markdown-body h3, 256 | .markdown-body h4, 257 | .markdown-body h5, 258 | .markdown-body h6 { 259 | margin-top: 0; 260 | margin-bottom: 0; 261 | } 262 | 263 | .markdown-body h1 { 264 | font-size: 32px; 265 | font-weight: 600; 266 | } 267 | 268 | .markdown-body h2 { 269 | font-size: 24px; 270 | font-weight: 600; 271 | } 272 | 273 | .markdown-body h3 { 274 | font-size: 20px; 275 | font-weight: 600; 276 | } 277 | 278 | .markdown-body h4 { 279 | font-size: 16px; 280 | font-weight: 600; 281 | } 282 | 283 | .markdown-body h5 { 284 | font-size: 14px; 285 | font-weight: 600; 286 | } 287 | 288 | .markdown-body h6 { 289 | font-size: 12px; 290 | font-weight: 600; 291 | } 292 | 293 | .markdown-body p { 294 | margin-top: 0; 295 | margin-bottom: 10px; 296 | } 297 | 298 | .markdown-body blockquote { 299 | margin: 0; 300 | } 301 | 302 | .markdown-body ul, 303 | .markdown-body ol { 304 | padding-left: 0; 305 | margin-top: 0; 306 | margin-bottom: 0; 307 | } 308 | 309 | .markdown-body ol ol, 310 | .markdown-body ul ol { 311 | list-style-type: lower-roman; 312 | } 313 | 314 | .markdown-body ul ul ol, 315 | .markdown-body ul ol ol, 316 | .markdown-body ol ul ol, 317 | .markdown-body ol ol ol { 318 | list-style-type: lower-alpha; 319 | } 320 | 321 | .markdown-body dd { 322 | margin-left: 0; 323 | } 324 | 325 | .markdown-body code { 326 | font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 327 | font-size: 12px; 328 | } 329 | 330 | .markdown-body pre { 331 | margin-top: 0; 332 | margin-bottom: 0; 333 | font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 334 | font-size: 12px; 335 | } 336 | 337 | .markdown-body .octicon { 338 | vertical-align: text-bottom; 339 | } 340 | 341 | .markdown-body .pl-0 { 342 | padding-left: 0 !important; 343 | } 344 | 345 | .markdown-body .pl-1 { 346 | padding-left: 4px !important; 347 | } 348 | 349 | .markdown-body .pl-2 { 350 | padding-left: 8px !important; 351 | } 352 | 353 | .markdown-body .pl-3 { 354 | padding-left: 16px !important; 355 | } 356 | 357 | .markdown-body .pl-4 { 358 | padding-left: 24px !important; 359 | } 360 | 361 | .markdown-body .pl-5 { 362 | padding-left: 32px !important; 363 | } 364 | 365 | .markdown-body .pl-6 { 366 | padding-left: 40px !important; 367 | } 368 | 369 | .markdown-body::before { 370 | display: table; 371 | content: ""; 372 | } 373 | 374 | .markdown-body::after { 375 | display: table; 376 | clear: both; 377 | content: ""; 378 | } 379 | 380 | .markdown-body>*:first-child { 381 | margin-top: 0 !important; 382 | } 383 | 384 | .markdown-body>*:last-child { 385 | margin-bottom: 0 !important; 386 | } 387 | 388 | .markdown-body a:not([href]) { 389 | color: inherit; 390 | text-decoration: none; 391 | } 392 | 393 | .markdown-body .anchor { 394 | float: left; 395 | padding-right: 4px; 396 | margin-left: -20px; 397 | line-height: 1; 398 | } 399 | 400 | .markdown-body .anchor:focus { 401 | outline: none; 402 | } 403 | 404 | .markdown-body p, 405 | .markdown-body blockquote, 406 | .markdown-body ul, 407 | .markdown-body ol, 408 | .markdown-body dl, 409 | .markdown-body table, 410 | .markdown-body pre { 411 | margin-top: 0; 412 | margin-bottom: 16px; 413 | } 414 | 415 | .markdown-body hr { 416 | height: 0.25em; 417 | padding: 0; 418 | margin: 24px 0; 419 | background-color: #e1e4e8; 420 | border: 0; 421 | } 422 | 423 | .markdown-body blockquote { 424 | padding: 0 1em; 425 | color: #6a737d; 426 | border-left: 0.25em solid #dfe2e5; 427 | } 428 | 429 | .markdown-body blockquote>:first-child { 430 | margin-top: 0; 431 | } 432 | 433 | .markdown-body blockquote>:last-child { 434 | margin-bottom: 0; 435 | } 436 | 437 | .markdown-body kbd { 438 | display: inline-block; 439 | padding: 3px 5px; 440 | font-size: 11px; 441 | line-height: 10px; 442 | color: #444d56; 443 | vertical-align: middle; 444 | background-color: #fafbfc; 445 | border: solid 1px #c6cbd1; 446 | border-bottom-color: #959da5; 447 | border-radius: 3px; 448 | box-shadow: inset 0 -1px 0 #959da5; 449 | } 450 | 451 | .markdown-body h1, 452 | .markdown-body h2, 453 | .markdown-body h3, 454 | .markdown-body h4, 455 | .markdown-body h5, 456 | .markdown-body h6 { 457 | margin-top: 24px; 458 | margin-bottom: 16px; 459 | font-weight: 600; 460 | line-height: 1.25; 461 | } 462 | 463 | .markdown-body h1 .octicon-link, 464 | .markdown-body h2 .octicon-link, 465 | .markdown-body h3 .octicon-link, 466 | .markdown-body h4 .octicon-link, 467 | .markdown-body h5 .octicon-link, 468 | .markdown-body h6 .octicon-link { 469 | color: #1b1f23; 470 | vertical-align: middle; 471 | visibility: hidden; 472 | } 473 | 474 | .markdown-body h1:hover .anchor, 475 | .markdown-body h2:hover .anchor, 476 | .markdown-body h3:hover .anchor, 477 | .markdown-body h4:hover .anchor, 478 | .markdown-body h5:hover .anchor, 479 | .markdown-body h6:hover .anchor { 480 | text-decoration: none; 481 | } 482 | 483 | .markdown-body h1:hover .anchor .octicon-link, 484 | .markdown-body h2:hover .anchor .octicon-link, 485 | .markdown-body h3:hover .anchor .octicon-link, 486 | .markdown-body h4:hover .anchor .octicon-link, 487 | .markdown-body h5:hover .anchor .octicon-link, 488 | .markdown-body h6:hover .anchor .octicon-link { 489 | visibility: visible; 490 | } 491 | 492 | .markdown-body h1 { 493 | padding-bottom: 0.3em; 494 | font-size: 2em; 495 | border-bottom: 1px solid #eaecef; 496 | } 497 | 498 | .markdown-body h2 { 499 | padding-bottom: 0.3em; 500 | font-size: 1.5em; 501 | border-bottom: 1px solid #eaecef; 502 | } 503 | 504 | .markdown-body h3 { 505 | font-size: 1.25em; 506 | } 507 | 508 | .markdown-body h4 { 509 | font-size: 1em; 510 | } 511 | 512 | .markdown-body h5 { 513 | font-size: 0.875em; 514 | } 515 | 516 | .markdown-body h6 { 517 | font-size: 0.85em; 518 | color: #6a737d; 519 | } 520 | 521 | .markdown-body ul, 522 | .markdown-body ol { 523 | padding-left: 2em; 524 | } 525 | 526 | .markdown-body ul ul, 527 | .markdown-body ul ol, 528 | .markdown-body ol ol, 529 | .markdown-body ol ul { 530 | margin-top: 0; 531 | margin-bottom: 0; 532 | } 533 | 534 | .markdown-body li { 535 | word-wrap: break-all; 536 | } 537 | 538 | .markdown-body li>p { 539 | margin-top: 16px; 540 | } 541 | 542 | .markdown-body li+li { 543 | margin-top: 0.25em; 544 | } 545 | 546 | .markdown-body dl { 547 | padding: 0; 548 | } 549 | 550 | .markdown-body dl dt { 551 | padding: 0; 552 | margin-top: 16px; 553 | font-size: 1em; 554 | font-style: italic; 555 | font-weight: 600; 556 | } 557 | 558 | .markdown-body dl dd { 559 | padding: 0 16px; 560 | margin-bottom: 16px; 561 | } 562 | 563 | .markdown-body table { 564 | display: block; 565 | width: 100%; 566 | overflow: auto; 567 | } 568 | 569 | .markdown-body table th { 570 | font-weight: 600; 571 | } 572 | 573 | .markdown-body table th, 574 | .markdown-body table td { 575 | padding: 6px 13px; 576 | border: 1px solid #dfe2e5; 577 | } 578 | 579 | .markdown-body table tr { 580 | background-color: #fff; 581 | border-top: 1px solid #c6cbd1; 582 | } 583 | 584 | .markdown-body table tr:nth-child(2n) { 585 | background-color: #f6f8fa; 586 | } 587 | 588 | .markdown-body img { 589 | max-width: 100%; 590 | box-sizing: content-box; 591 | background-color: #fff; 592 | } 593 | 594 | .markdown-body img[align=right] { 595 | padding-left: 20px; 596 | } 597 | 598 | .markdown-body img[align=left] { 599 | padding-right: 20px; 600 | } 601 | 602 | .markdown-body code { 603 | padding: 0.2em 0.4em; 604 | margin: 0; 605 | font-size: 85%; 606 | background-color: rgba(27,31,35,0.05); 607 | border-radius: 3px; 608 | } 609 | 610 | .markdown-body pre { 611 | word-wrap: normal; 612 | } 613 | 614 | .markdown-body pre>code { 615 | padding: 0; 616 | margin: 0; 617 | font-size: 100%; 618 | word-break: normal; 619 | white-space: pre; 620 | background: transparent; 621 | border: 0; 622 | } 623 | 624 | .markdown-body .highlight { 625 | margin-bottom: 16px; 626 | } 627 | 628 | .markdown-body .highlight pre { 629 | margin-bottom: 0; 630 | word-break: normal; 631 | } 632 | 633 | .markdown-body .highlight pre, 634 | .markdown-body pre { 635 | padding: 16px; 636 | overflow: auto; 637 | font-size: 85%; 638 | line-height: 1.45; 639 | background-color: #f6f8fa; 640 | border-radius: 3px; 641 | } 642 | 643 | .markdown-body pre code { 644 | display: inline; 645 | max-width: auto; 646 | padding: 0; 647 | margin: 0; 648 | overflow: visible; 649 | line-height: inherit; 650 | word-wrap: normal; 651 | background-color: transparent; 652 | border: 0; 653 | } 654 | 655 | .markdown-body .full-commit .btn-outline:not(:disabled):hover { 656 | color: #005cc5; 657 | border-color: #005cc5; 658 | } 659 | 660 | .markdown-body kbd { 661 | display: inline-block; 662 | padding: 3px 5px; 663 | font: 11px "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; 664 | line-height: 10px; 665 | color: #444d56; 666 | vertical-align: middle; 667 | background-color: #fafbfc; 668 | border: solid 1px #d1d5da; 669 | border-bottom-color: #c6cbd1; 670 | border-radius: 3px; 671 | box-shadow: inset 0 -1px 0 #c6cbd1; 672 | } 673 | 674 | .markdown-body :checked+.radio-label { 675 | position: relative; 676 | z-index: 1; 677 | border-color: #0366d6; 678 | } 679 | 680 | .markdown-body .task-list-item { 681 | list-style-type: none; 682 | } 683 | 684 | .markdown-body .task-list-item+.task-list-item { 685 | margin-top: 3px; 686 | } 687 | 688 | .markdown-body .task-list-item input { 689 | margin: 0 0.2em 0.25em -1.6em; 690 | vertical-align: middle; 691 | } 692 | 693 | .markdown-body hr { 694 | border-bottom-color: #eee; 695 | } 696 | -------------------------------------------------------------------------------- /examples/report/report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Report 6 | 7 | 22 | 23 | 24 | 25 |

EPUB analysis report

26 |

Report generated: 2022-11-07 15:50:28

27 |

Input file: ./example.csv

28 |

Summary

29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 |
Count% of all EPUBs
EPUBs12
EPUBs with errors758.33
EPUBs with warnings216.67
EPUBs with errors or warnings758.33
EPUBs with less than 1000 words00.0
65 |

CSV subsets

66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 |
File
EPUBs with errorserrors.csv
EPUBs with warningswarnings.csv
EPUBs with errors or warningserrorsorwarnings.csv
EPUBs with less than 1000 wordswordcountlt1000.csv
92 |

EPUB versions

93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 |
epubVersionCount% of all EPUBs
3.2975
2.0.1325
114 |

Frequency of validation errors

115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 |
CodeDescriptionCount% of all EPUBs
RSC-005Error while parsing file: %1$s541.67
NCX-001NCX identifier ("%1$s") does not match OPF identifier ("%2$s").216.67
OPF-032Guide references "%1$s" which is not a valid "OPS Content Document".216.67
RSC-012Fragment identifier is not defined.216.67
CSS-020CSS font selector declaration uses unexpected font-size value "%1$s".18.33
HTM-003External entities are not allowed in EPUB v3 documents. External entity declaration found: %1$s.18.33
OPF-073External identifiers must not appear in the document type declaration.18.33
RSC-007Referenced resource "%1$s" could not be found in the EPUB.18.33
RSC-011Found a reference to a resource that is not a spine item.18.33
RSC-016Fatal Error while parsing file: %1$s18.33
RSC-020"%1$s" is not a valid URI.18.33
OPF-031File listed in reference element in guide was not declared in OPF manifest: %1$s.18.33
RSC-008Referenced resource "%1$s" is not declared in the OPF manifest.18.33
OPF-030The unique-identifier "%1$s" was not found.18.33
211 |

212 |

CSV subsets for each error

213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 |
CodeFile
RSC-005error-RSC-005.csv
NCX-001error-NCX-001.csv
OPF-032error-OPF-032.csv
RSC-012error-RSC-012.csv
CSS-020error-CSS-020.csv
HTM-003error-HTM-003.csv
OPF-073error-OPF-073.csv
RSC-007error-RSC-007.csv
RSC-011error-RSC-011.csv
RSC-016error-RSC-016.csv
RSC-020error-RSC-020.csv
OPF-031error-OPF-031.csv
RSC-008error-RSC-008.csv
OPF-030error-OPF-030.csv
279 |

Frequency of validation warnings

280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 |
CodeDescriptionCount% of all EPUBs
PKG-010Filename contains spaces, therefore URI escaping is necessary. Consider removing spaces from filename.18.33
OPF-003Item "%1$s" exists in the EPUB, but is not declared in the OPF manifest.18.33
OPF-055%1$s tag is empty.18.33
310 |

311 |

CSV subsets for each warning

312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 |
CodeFile
PKG-010warning-PKG-010.csv
OPF-003warning-OPF-003.csv
OPF-055warning-OPF-055.csv
334 |

Detailed statistics

335 |

All EPUBs

336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 |
noErrorsnoWarningswordCount
count121212
mean1.750.2577252.8
std2.490890.62158276391.1
min002779
25%0027156
50%1031108
75%2.250125338
max82218333
396 |

EPUBs with errors

397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 |
noErrorsnoWarningswordCount
count777
mean30.42857174671.7
std2.645750.78679673166.6
min1025949
25%1027116
50%2030439
75%40.596874.5
max82218333
457 |

EPUBs with warnings

458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 |
noErrorsnoWarningswordCount
count222
mean6.51.549330
std2.121320.70710726715.9
min5130439
25%5.751.2539884.5
50%6.51.549330
75%7.251.7558775.5
max8268221
518 |

EPUBs with errors or warnings

519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 |
noErrorsnoWarningswordCount
count777
mean30.42857174671.7
std2.645750.78679673166.6
min1025949
25%1027116
50%2030439
75%40.596874.5
max82218333
579 |

EPUBs with less than 1000 words

580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 |
noErrorsnoWarningswordCount
count000
meannannannan
stdnannannan
minnannannan
25%nannannan
50%nannannan
75%nannannan
maxnannannan
640 |
641 | 642 | 643 | --------------------------------------------------------------------------------