├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── docs
├── README.md
├── _config.yml
├── _includes
│ ├── foot.html
│ ├── head.html
│ └── nav.html
├── _layouts
│ ├── default.html
│ └── default_exp.html
├── access_control.md
├── assets
│ ├── css
│ │ └── style.css
│ ├── favicon.ico
│ └── philo.png
├── configure_web_app.md
├── database_loading.md
├── encoding_spec.md
├── index.md
├── installation.md
├── query_syntax.md
└── specific_installations
│ ├── redhat_installation.md
│ └── ubuntu_installation.md
├── extras
├── FrenchStopwords.txt
├── artfl_theme.scss
├── exportResults.py
├── load_config.py
├── metadata_extractor.py
├── philodb_convert-4.6_to_4.7.py
├── plain_text_load_script.py
├── python2_to_python3_port.py
├── rebuild_app.py
├── tol_theme.scss
├── utilities
│ ├── extract_metadata.py
│ ├── fix_drama.py
│ ├── fix_notes.py
│ ├── list_xpath_in_header.py
│ ├── tei_cleanup.py
│ ├── update_toms.py
│ └── xml_cleanup.py
├── vf_theme.scss
└── web_config_convert_4_6_to_4_7.py
├── install.sh
├── libphilo
├── Makefile
├── README
├── args.c
├── args.h
├── blockmap.c
├── blockmap.h
├── c.h
├── db
│ ├── Makefile
│ ├── bitsvector.c
│ ├── bitsvector.h
│ ├── corpus_search.c
│ ├── db.c
│ ├── db.h
│ ├── dbspecs.h
│ ├── dbspecs2.h
│ ├── dbspecs4.h
│ ├── mergewords.c
│ ├── pack.c
│ ├── pack.dSYM
│ │ └── Contents
│ │ │ ├── Info.plist
│ │ │ └── Resources
│ │ │ └── DWARF
│ │ │ └── pack
│ ├── pack.h
│ ├── parsedb.c
│ ├── test_search.py
│ ├── unpack.c
│ ├── unpack.h
│ └── validate-index.c
├── gmap.c
├── gmap.h
├── level.c
├── level.h
├── log.h
├── out.c
├── out.h
├── plugin
│ ├── Makefile
│ ├── Makefile.in
│ ├── dbplugins.h
│ ├── hit.h
│ ├── hitcmp.c
│ ├── hitcmp.h
│ ├── hitcmp_cooc.c
│ ├── hitcmp_cooc.h
│ ├── hitcmp_phrase.c
│ ├── hitcmp_phrase.h
│ ├── hitcmp_proxy.c
│ ├── hitcmp_proxy.h
│ ├── hitcmp_sent.c
│ ├── hitcmp_sent.h
│ ├── hitcon.h
│ ├── hitcrp.c
│ ├── hitcrp.h
│ ├── hitdef.c
│ ├── hitdef.h
│ ├── hitman.c
│ ├── hitman.h
│ ├── hitout.c
│ ├── hitout.h
│ ├── method.c
│ ├── method.h
│ ├── plugin.c
│ ├── plugin.h
│ └── searchmethods.h
├── retreive.c
├── retreive.h
├── search.c
├── search.h
├── search.py
├── search4.c
├── word.c
└── word.h
├── python
├── LICENSE
├── README
├── philologic
│ ├── Config.py
│ ├── TagCensus.py
│ ├── __init__.py
│ ├── loadtime
│ │ ├── LoadFilters.py
│ │ ├── LoadOptions.py
│ │ ├── Loader.py
│ │ ├── OHCOVector.py
│ │ ├── Parser.py
│ │ ├── PhiloLoad.py
│ │ ├── PlainTextParser.py
│ │ ├── PostFilters.py
│ │ └── __init__.py
│ ├── runtime
│ │ ├── DB.py
│ │ ├── FragmentParser.py
│ │ ├── HitList.py
│ │ ├── HitWrapper.py
│ │ ├── MetadataQuery.py
│ │ ├── ObjectFormatter.py
│ │ ├── Query.py
│ │ ├── QuerySyntax.py
│ │ ├── WSGIHandler.py
│ │ ├── __init__.py
│ │ ├── access_control.py
│ │ ├── citations.py
│ │ ├── collocation_scores.py
│ │ ├── find_similar_words.py
│ │ ├── get_text.py
│ │ ├── link.py
│ │ ├── pages.py
│ │ ├── reports
│ │ │ ├── __init__.py
│ │ │ ├── aggregation.py
│ │ │ ├── bibliography.py
│ │ │ ├── collocation.py
│ │ │ ├── concordance.py
│ │ │ ├── filter_word_by_property.py
│ │ │ ├── frequency.py
│ │ │ ├── generate_word_frequency.py
│ │ │ ├── kwic.py
│ │ │ ├── landing_page.py
│ │ │ ├── navigation.py
│ │ │ ├── table_of_contents.py
│ │ │ └── time_series.py
│ │ └── web_config.py
│ ├── shlax.py
│ ├── shlaxtree.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── convert_entities.py
│ │ ├── load_module.py
│ │ ├── metadata_type_handler.py
│ │ ├── pretty_print.py
│ │ └── sort.py
└── pyproject.toml
└── www
├── .htaccess
├── app
├── .env
├── .gitignore
├── index.html
├── misconfiguration.html
├── package-lock.json
├── package.json
├── public
│ └── favicon.ico
├── src
│ ├── App.vue
│ ├── assets
│ │ ├── language.png
│ │ ├── logo.png
│ │ ├── philo.png
│ │ └── styles
│ │ │ └── theme.module.scss
│ ├── components
│ │ ├── AccessControl.vue
│ │ ├── Aggregation.vue
│ │ ├── Bibliography.vue
│ │ ├── Citations.vue
│ │ ├── Collocation.vue
│ │ ├── Concordance.vue
│ │ ├── ExportResults.vue
│ │ ├── Facets.vue
│ │ ├── Header.vue
│ │ ├── Kwic.vue
│ │ ├── LandingPage.vue
│ │ ├── LocaleChanger.vue
│ │ ├── Pages.vue
│ │ ├── ResultsBibliography.vue
│ │ ├── ResultsSummary.vue
│ │ ├── SearchArguments.vue
│ │ ├── SearchForm.vue
│ │ ├── SearchTips.vue
│ │ ├── TableOfContents.vue
│ │ ├── TextNavigation.vue
│ │ └── TimeSeries.vue
│ ├── i18n.js
│ ├── locales
│ │ ├── en.json
│ │ └── fr.json
│ ├── main.js
│ ├── mixins.js
│ ├── router
│ │ └── index.js
│ └── store
│ │ └── index.js
└── vite.config.js
├── dispatcher.py
├── favicon.ico
├── reports
├── __init__.py
├── aggregation.py
├── bibliography.py
├── collocation.py
├── concordance.py
├── kwic.py
├── navigation.py
├── table_of_contents.py
├── time_series.py
└── word_property_filter.py
├── scripts
├── __init__.py
├── access_request.py
├── alignment_to_text.py
├── autocomplete_metadata.py
├── autocomplete_term.py
├── export_results.py
├── get_academic_citation.py
├── get_bibliography.py
├── get_filter_list.py
├── get_frequency.py
├── get_header.py
├── get_hitlist_stats.py
├── get_landing_page_content.py
├── get_more_context.py
├── get_neighboring_words.py
├── get_notes.py
├── get_query_terms.py
├── get_sorted_frequency.py
├── get_sorted_kwic.py
├── get_table_of_contents.py
├── get_term_groups.py
├── get_text_object.py
├── get_total_results.py
├── get_web_config.py
├── get_word_frequency.py
├── lookup_word.py
└── resolve_cite.py
└── webApp.py
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 4.7 ###
2 | - New aggregation report
3 | - New metadata stats in search results
4 | - Results bibliography in concordance and KWIC results.
5 | - Database size should be between 50% and 80% (or more) smaller
6 | - Significant speed-ups for:
7 | * Collocations: in some cases 3-4X
8 | * Sorted KWICs: between 6X and 25X (or more) depending on use case, with no more limits on the size of the sort as a result.
9 | * Faceted browsing (frequencies): anywhere from 3X to 100X (or more)
10 | * Landing page browsing: 10X faster or more on large corpora
11 | - Export results to CSV
12 | - Web config has been simplified with the use of global variables for citations
13 | - Some breaking changes to web config: you should not use a 4.6 config
14 | - Revamped Web UI: move to VueJS and Bootstrap 5.
15 | - Cleaner URLS for queries
16 | - Faster database loads
17 | - New generic dictionary lookup code
18 | - Support for date and integer types for metadata fields.
19 |
20 | ### 4.6 ###
21 | - Port PhiloLogic4 codebase to Python3
22 | - Switch load time compression from Gzip to LZ4: big speed-up in loading large databases
23 | - Lib reorganization
24 |
25 | #### 4.0 => 4.5 ####
26 | - Completely rewritten parser: can now parse broken XML
27 | - Massive lib reorg
28 | - A new system wide config
29 | - Loading process completely revamped: use philoload4 command
30 | - Completely rewritten collocations: faster and accurate
31 | - Added relative frequencies to frequencies in facets
32 | - Added sorted KWIC
33 | - Added support for regexes in quoted term searches (aka exact matches)
34 | - Added ability to filter out words in query expansion through a popup using the NOT syntax
35 | - Added configurable citations for all reports
36 | - Added concordance results sorting by metadata
37 | - Added approximate word searches using Levenshtein distance
38 | - Redesign facets and time series
39 | - Bug fixes and optimizations everywhere...
40 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | ENV DEBIAN_FRONTEND=noninteractive
4 |
5 | # Install dependencies
6 | RUN apt update && apt install -y curl && curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
7 |
8 | RUN apt-get update && apt-get upgrade -y && \
9 | apt-get install -y --no-install-recommends libxml2-dev libxslt-dev zlib1g-dev apache2 libgdbm-dev liblz4-tool brotli ripgrep gcc make python3-dev wget sudo nodejs python3.10-venv && \
10 | apt-get clean && rm -rf /var/lib/apt
11 |
12 | # Install pip
13 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
14 |
15 | # Install PhiloLogic
16 | COPY . /PhiloLogic4
17 | WORKDIR /PhiloLogic4
18 | RUN sh install.sh && mkdir /var/www/html/philologic
19 |
20 | RUN a2enmod rewrite && a2enmod cgi && a2enmod brotli
21 |
22 |
23 | # Configure global variables
24 | RUN sed -i 's/database_root = None/database_root = "\/var\/www\/html\/philologic\/"/' /etc/philologic/philologic4.cfg && \
25 | sed -i 's/url_root = None/url_root = "http:\/\/localhost\/philologic\/"/' /etc/philologic/philologic4.cfg
26 |
27 | RUN echo "#!/bin/bash\nservice apache2 stop\nrm /var/run/apache2/*\napachectl -D FOREGROUND" > /autostart.sh && chmod +x /autostart.sh
28 |
29 | # Set up Apache
30 | RUN perl -i -p0e 's/\n\tOptions Indexes FollowSymLinks\n\tAllowOverride None/\n\tOptions Indexes FollowSymLinks\n\tAllowOverride all/smg' /etc/apache2/apache2.conf
31 | EXPOSE 80
32 | CMD ["/autostart.sh"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |  4.7
2 | ===========
3 |
4 | PhiloLogic is an XML database/search engine/web app that is designed for the particular difficulties of TEI XML. For a more theoretical
5 | description, you can refer to [our research publications](http://jtei.revues.org/817) or [our blog](http://artfl.blogspot.com).
6 |
7 | Note that as of version 4.7.3, PhiloLogic can now parse plain text files. See documentation for more details.
8 |
9 | ### See [documentation](https://artfl-project.github.io/PhiloLogic4/)
10 |
11 | ### IMPORTANT ###
12 | * PhiloLogic 4.7 will only work on Unix-based systems (Linux, *BSD) though MacOS is not supported and guaranteed to work.
13 | * PhiloLogic 4.7 will only run on the Apache Webserver
14 | * PhiloLogic 4.7 has only been tested on Python 3.8 and up. For a Python 2 version, use the [latest PhiloLogic 4.5 release](https://github.com/ARTFL-Project/PhiloLogic4/releases/tag/v4.5.9).
15 | * The PhiloLogic 4.7 Web App will only work on recent versions of web browsers: Chrome, Firefox, Safari, Opera, Edge. No support for Internet Explorer.
16 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | For PhiloLogic documentation, please visit https://artfl-project.github.io/PhiloLogic4/
2 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | title: PhiloLogic4 • Fulltext Search Engine for TEI-XML
2 | description: The BEST
3 | theme: jekyll-theme-slate
4 | highlighter: rouge
5 | url: https://artfl-project.github.io/PhiloLogic4/
6 |
--------------------------------------------------------------------------------
/docs/_includes/foot.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |