├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── README.md ├── _config.yml ├── _includes │ ├── foot.html │ ├── head.html │ └── nav.html ├── _layouts │ ├── default.html │ └── default_exp.html ├── access_control.md ├── assets │ ├── css │ │ └── style.css │ ├── favicon.ico │ └── philo.png ├── configure_web_app.md ├── database_loading.md ├── encoding_spec.md ├── index.md ├── installation.md ├── query_syntax.md └── specific_installations │ ├── redhat_installation.md │ └── ubuntu_installation.md ├── extras ├── FrenchStopwords.txt ├── artfl_theme.scss ├── exportResults.py ├── load_config.py ├── metadata_extractor.py ├── philodb_convert-4.6_to_4.7.py ├── plain_text_load_script.py ├── python2_to_python3_port.py ├── rebuild_app.py ├── tol_theme.scss ├── utilities │ ├── extract_metadata.py │ ├── fix_drama.py │ ├── fix_notes.py │ ├── list_xpath_in_header.py │ ├── tei_cleanup.py │ ├── update_toms.py │ └── xml_cleanup.py ├── vf_theme.scss └── web_config_convert_4_6_to_4_7.py ├── install.sh ├── libphilo ├── Makefile ├── README ├── args.c ├── args.h ├── blockmap.c ├── blockmap.h ├── c.h ├── db │ ├── Makefile │ ├── bitsvector.c │ ├── bitsvector.h │ ├── corpus_search.c │ ├── db.c │ ├── db.h │ ├── dbspecs.h │ ├── dbspecs2.h │ ├── dbspecs4.h │ ├── mergewords.c │ ├── pack.c │ ├── pack.dSYM │ │ └── Contents │ │ │ ├── Info.plist │ │ │ └── Resources │ │ │ └── DWARF │ │ │ └── pack │ ├── pack.h │ ├── parsedb.c │ ├── test_search.py │ ├── unpack.c │ ├── unpack.h │ └── validate-index.c ├── gmap.c ├── gmap.h ├── level.c ├── level.h ├── log.h ├── out.c ├── out.h ├── plugin │ ├── Makefile │ ├── Makefile.in │ ├── dbplugins.h │ ├── hit.h │ ├── hitcmp.c │ ├── hitcmp.h │ ├── hitcmp_cooc.c │ ├── hitcmp_cooc.h │ ├── hitcmp_phrase.c │ ├── hitcmp_phrase.h │ ├── hitcmp_proxy.c │ ├── hitcmp_proxy.h │ ├── hitcmp_sent.c │ ├── hitcmp_sent.h │ ├── hitcon.h │ ├── hitcrp.c │ ├── hitcrp.h │ ├── hitdef.c │ ├── hitdef.h │ ├── hitman.c │ ├── hitman.h │ ├── hitout.c │ ├── hitout.h │ ├── method.c │ ├── method.h │ ├── plugin.c │ ├── plugin.h │ └── searchmethods.h ├── retreive.c ├── retreive.h ├── search.c ├── search.h ├── search.py ├── search4.c ├── word.c └── word.h ├── python ├── LICENSE ├── README ├── philologic │ ├── Config.py │ ├── TagCensus.py │ ├── __init__.py │ ├── loadtime │ │ ├── LoadFilters.py │ │ ├── LoadOptions.py │ │ ├── Loader.py │ │ ├── OHCOVector.py │ │ ├── Parser.py │ │ ├── PhiloLoad.py │ │ ├── PlainTextParser.py │ │ ├── PostFilters.py │ │ └── __init__.py │ ├── runtime │ │ ├── DB.py │ │ ├── FragmentParser.py │ │ ├── HitList.py │ │ ├── HitWrapper.py │ │ ├── MetadataQuery.py │ │ ├── ObjectFormatter.py │ │ ├── Query.py │ │ ├── QuerySyntax.py │ │ ├── WSGIHandler.py │ │ ├── __init__.py │ │ ├── access_control.py │ │ ├── citations.py │ │ ├── collocation_scores.py │ │ ├── find_similar_words.py │ │ ├── get_text.py │ │ ├── link.py │ │ ├── pages.py │ │ ├── reports │ │ │ ├── __init__.py │ │ │ ├── aggregation.py │ │ │ ├── bibliography.py │ │ │ ├── collocation.py │ │ │ ├── concordance.py │ │ │ ├── filter_word_by_property.py │ │ │ ├── frequency.py │ │ │ ├── generate_word_frequency.py │ │ │ ├── kwic.py │ │ │ ├── landing_page.py │ │ │ ├── navigation.py │ │ │ ├── table_of_contents.py │ │ │ └── time_series.py │ │ └── web_config.py │ ├── shlax.py │ ├── shlaxtree.py │ └── utils │ │ ├── __init__.py │ │ ├── convert_entities.py │ │ ├── load_module.py │ │ ├── metadata_type_handler.py │ │ ├── pretty_print.py │ │ └── sort.py └── pyproject.toml └── www ├── .htaccess ├── app ├── .env ├── .gitignore ├── index.html ├── misconfiguration.html ├── package-lock.json ├── package.json ├── public │ └── favicon.ico ├── src │ ├── App.vue │ ├── assets │ │ ├── language.png │ │ ├── logo.png │ │ ├── philo.png │ │ └── styles │ │ │ └── theme.module.scss │ ├── components │ │ ├── AccessControl.vue │ │ ├── Aggregation.vue │ │ ├── Bibliography.vue │ │ ├── Citations.vue │ │ ├── Collocation.vue │ │ ├── Concordance.vue │ │ ├── ExportResults.vue │ │ ├── Facets.vue │ │ ├── Header.vue │ │ ├── Kwic.vue │ │ ├── LandingPage.vue │ │ ├── LocaleChanger.vue │ │ ├── Pages.vue │ │ ├── ResultsBibliography.vue │ │ ├── ResultsSummary.vue │ │ ├── SearchArguments.vue │ │ ├── SearchForm.vue │ │ ├── SearchTips.vue │ │ ├── TableOfContents.vue │ │ ├── TextNavigation.vue │ │ └── TimeSeries.vue │ ├── i18n.js │ ├── locales │ │ ├── en.json │ │ └── fr.json │ ├── main.js │ ├── mixins.js │ ├── router │ │ └── index.js │ └── store │ │ └── index.js └── vite.config.js ├── dispatcher.py ├── favicon.ico ├── reports ├── __init__.py ├── aggregation.py ├── bibliography.py ├── collocation.py ├── concordance.py ├── kwic.py ├── navigation.py ├── table_of_contents.py ├── time_series.py └── word_property_filter.py ├── scripts ├── __init__.py ├── access_request.py ├── alignment_to_text.py ├── autocomplete_metadata.py ├── autocomplete_term.py ├── export_results.py ├── get_academic_citation.py ├── get_bibliography.py ├── get_filter_list.py ├── get_frequency.py ├── get_header.py ├── get_hitlist_stats.py ├── get_landing_page_content.py ├── get_more_context.py ├── get_neighboring_words.py ├── get_notes.py ├── get_query_terms.py ├── get_sorted_frequency.py ├── get_sorted_kwic.py ├── get_table_of_contents.py ├── get_term_groups.py ├── get_text_object.py ├── get_total_results.py ├── get_web_config.py ├── get_word_frequency.py ├── lookup_word.py └── resolve_cite.py └── webApp.py /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 4.7 ### 2 | - New aggregation report 3 | - New metadata stats in search results 4 | - Results bibliography in concordance and KWIC results. 5 | - Database size should be between 50% and 80% (or more) smaller 6 | - Significant speed-ups for: 7 | * Collocations: in some cases 3-4X 8 | * Sorted KWICs: between 6X and 25X (or more) depending on use case, with no more limits on the size of the sort as a result. 9 | * Faceted browsing (frequencies): anywhere from 3X to 100X (or more) 10 | * Landing page browsing: 10X faster or more on large corpora 11 | - Export results to CSV 12 | - Web config has been simplified with the use of global variables for citations 13 | - Some breaking changes to web config: you should not use a 4.6 config 14 | - Revamped Web UI: move to VueJS and Bootstrap 5. 15 | - Cleaner URLS for queries 16 | - Faster database loads 17 | - New generic dictionary lookup code 18 | - Support for date and integer types for metadata fields. 19 | 20 | ### 4.6 ### 21 | - Port PhiloLogic4 codebase to Python3 22 | - Switch load time compression from Gzip to LZ4: big speed-up in loading large databases 23 | - Lib reorganization 24 | 25 | #### 4.0 => 4.5 #### 26 | - Completely rewritten parser: can now parse broken XML 27 | - Massive lib reorg 28 | - A new system wide config 29 | - Loading process completely revamped: use philoload4 command 30 | - Completely rewritten collocations: faster and accurate 31 | - Added relative frequencies to frequencies in facets 32 | - Added sorted KWIC 33 | - Added support for regexes in quoted term searches (aka exact matches) 34 | - Added ability to filter out words in query expansion through a popup using the NOT syntax 35 | - Added configurable citations for all reports 36 | - Added concordance results sorting by metadata 37 | - Added approximate word searches using Levenshtein distance 38 | - Redesign facets and time series 39 | - Bug fixes and optimizations everywhere... 40 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | # Install dependencies 6 | RUN apt update && apt install -y curl && curl -fsSL https://deb.nodesource.com/setup_22.x | bash - 7 | 8 | RUN apt-get update && apt-get upgrade -y && \ 9 | apt-get install -y --no-install-recommends libxml2-dev libxslt-dev zlib1g-dev apache2 libgdbm-dev liblz4-tool brotli ripgrep gcc make python3-dev wget sudo nodejs python3.10-venv && \ 10 | apt-get clean && rm -rf /var/lib/apt 11 | 12 | # Install pip 13 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py 14 | 15 | # Install PhiloLogic 16 | COPY . /PhiloLogic4 17 | WORKDIR /PhiloLogic4 18 | RUN sh install.sh && mkdir /var/www/html/philologic 19 | 20 | RUN a2enmod rewrite && a2enmod cgi && a2enmod brotli 21 | 22 | 23 | # Configure global variables 24 | RUN sed -i 's/database_root = None/database_root = "\/var\/www\/html\/philologic\/"/' /etc/philologic/philologic4.cfg && \ 25 | sed -i 's/url_root = None/url_root = "http:\/\/localhost\/philologic\/"/' /etc/philologic/philologic4.cfg 26 | 27 | RUN echo "#!/bin/bash\nservice apache2 stop\nrm /var/run/apache2/*\napachectl -D FOREGROUND" > /autostart.sh && chmod +x /autostart.sh 28 | 29 | # Set up Apache 30 | RUN perl -i -p0e 's/\n\tOptions Indexes FollowSymLinks\n\tAllowOverride None/\n\tOptions Indexes FollowSymLinks\n\tAllowOverride all/smg' /etc/apache2/apache2.conf 31 | EXPOSE 80 32 | CMD ["/autostart.sh"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![alt text](www/app/src/assets/philo.png) 4.7 2 | =========== 3 | 4 | PhiloLogic is an XML database/search engine/web app that is designed for the particular difficulties of TEI XML. For a more theoretical 5 | description, you can refer to [our research publications](http://jtei.revues.org/817) or [our blog](http://artfl.blogspot.com). 6 | 7 | Note that as of version 4.7.3, PhiloLogic can now parse plain text files. See documentation for more details. 8 | 9 | ### See [documentation](https://artfl-project.github.io/PhiloLogic4/) 10 | 11 | ### IMPORTANT ### 12 | * PhiloLogic 4.7 will only work on Unix-based systems (Linux, *BSD) though MacOS is not supported and guaranteed to work. 13 | * PhiloLogic 4.7 will only run on the Apache Webserver 14 | * PhiloLogic 4.7 has only been tested on Python 3.8 and up. For a Python 2 version, use the [latest PhiloLogic 4.5 release](https://github.com/ARTFL-Project/PhiloLogic4/releases/tag/v4.5.9). 15 | * The PhiloLogic 4.7 Web App will only work on recent versions of web browsers: Chrome, Firefox, Safari, Opera, Edge. No support for Internet Explorer. 16 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | For PhiloLogic documentation, please visit https://artfl-project.github.io/PhiloLogic4/ 2 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | title: PhiloLogic4 • Fulltext Search Engine for TEI-XML 2 | description: The BEST 3 | theme: jekyll-theme-slate 4 | highlighter: rouge 5 | url: https://artfl-project.github.io/PhiloLogic4/ 6 | -------------------------------------------------------------------------------- /docs/_includes/foot.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /docs/_includes/nav.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | {% include head.html %} 2 |

3 |

{{ page.title }}

4 |

5 | 6 | {{ content }} 7 | 8 | {% include foot.html %} -------------------------------------------------------------------------------- /docs/_layouts/default_exp.html: -------------------------------------------------------------------------------- 1 | {% include head.html %} 2 | 3 |

{{ page.title }}

4 | 5 | {{ content }} 6 | 7 | {% include nav.html %} 8 | 9 | {% include foot.html %} -------------------------------------------------------------------------------- /docs/access_control.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: How to set-up access-control 3 | --- 4 | 5 | There are two ways to control access, user/password authentication, and ip/domain checks. You can use either separately, or both. 6 | 7 | ### Turn on access control 8 | 9 | The first thing you need to do to turn on access control is to set 10 | the variable `access_control` in `your_db_dir/data/web_config.cfg` to `True`, such as: 11 | 12 | ```Python 13 | access_control = True 14 | ``` 15 | 16 | While this option turns on the ability to control access, you still need to configure authentication or ip check, 17 | otherwise access control will be turned off. 18 | 19 | ### User authentication 20 | 21 | To use user authentication, you need to create a logins.txt file inside your `your_db_dir/data/` directory. This can be a symlink. 22 | If no file is found, access will be granted. 23 | The logins.txt should one user/pass per line, separated by a tab, such as 24 | 25 | ``` 26 | username password 27 | another_user another_password 28 | ``` 29 | 30 | ### Domain and IP range check 31 | 32 | To use this feature, you need to specify the location of the file in `web_config.cfg` in the `access_file` variable. 33 | 34 | This file should contain 3 Python variables: `domain_list`, `allowed_ips`, 35 | `blocked_ips`. Each variable should be a list containing the salient info. 36 | 37 | The `domain_list` variable should be a list of domains allowed to access you database. 38 | 39 | ```Python 40 | domain_list = [ 41 | "uchicago.edu", 42 | "indiana.edu", 43 | "louisiana.edu", 44 | "northwestern.edu" 45 | ] 46 | ``` 47 | 48 | The `allowed_ips` variable is a list of ips which are given access to the DB. Note that these are 49 | matched using a regular expression, so you can express the whole ip, or just a part of it. 50 | 51 | ```Python 52 | allowed_ips = [ 53 | "128.135.", 54 | "128.32", 55 | "136.152", 56 | "136.153.1.1-255" 57 | ] 58 | ``` 59 | 60 | Note that the last IP notation expresses an IP range. 61 | 62 | The `blocked_ips` variable is a list of IPs (exact matches needed) to deny access to: 63 | 64 | ```Python 65 | blocked_ips = [ 66 | "1.1.1.4" 67 | ] 68 | ``` 69 | 70 | ### What happens when you're granted access 71 | 72 | A cookie is saved to your browser, so that subsequent visits no longer require access check. 73 | -------------------------------------------------------------------------------- /docs/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/docs/assets/favicon.ico -------------------------------------------------------------------------------- /docs/assets/philo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/docs/assets/philo.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: What is Philologic? 3 | --- 4 | 5 | PhiloLogic is an XML database/search engine/web app developped at the [ARTFL Project](https://artfl-project.uchicago.edu) and designed 6 | for the particular difficulties of TEI XML. For a more theoretical 7 | description, you can refer to [our blog](). 8 | 9 | ### Documentation 10 | 11 | - [**Installation**](installation.md) 12 | - [**Database Loading**](database_loading.md) 13 | - [**Configuring the Web Application**](configure_web_app.md) 14 | - [**Query Syntax**](query_syntax.md) 15 | - [**Text Encoding Spec**](encoding_spec.md) 16 | - [**Access Control**](access_control.md) 17 | 18 | ### IMPORTANT 19 | 20 | - PhiloLogic4.7 will only work on Unix-based systems (Linux, \*BSD) though MacOS is not supported and guaranteed to work. 21 | - PhiloLogic4.7 will only run on the Apache Webserver 22 | - PhiloLogic4.7 has only been tested on Python 3.8 and up 23 | - The PhiloLogic4.7 Web App will only work on recent versions of web browsers: Chrome, Firefox, Safari, Opera, Edge. No support for Internet Explorer. 24 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installation 3 | --- 4 | 5 | Installing PhiloLogic consists of two steps: 6 | 7 | 1. Install the C and Python libraries system-wide 8 | 2. Set up a directory in your web server to serve databases from 9 | 10 | You can find more detailed installation instructions for specific OSes here: 11 | 12 | - [RedHat (and CentOS)](specific_installations/redhat_installation.md) 13 | - [Ubuntu](specific_installations/ubuntu_installation.md) 14 | 15 | ### Downloading 16 | 17 | IMPORTANT: Do not install from the master branch on github: this is the development branch and is in no way garanteed to be stable 18 | 19 | You can find a copy of the latest version of PhiloLogic4 [here](../../../releases/). 20 | 21 | ### Prerequisites 22 | 23 | - Apache Webserver 24 | - Python 3.8 and up 25 | - GCC 26 | - Make 27 | - [gdbm](http://www.gnu.org.ua/software/gdbm/) 28 | - LZ4 29 | - Brotli (for Apache compression) 30 | 31 | ### Installing 32 | 33 | Installing PhiloLogic's libraries requires administrator privileges. 34 | The C library depends on `gdbm`, which _must_ be installed first, to compile correctly. 35 | 36 | Just run the install.sh in the top level directory of the PhiloLogic4 you downloaded to install PhiloLogic and its dependencies: 37 | 38 | `./install.sh` 39 | 40 | ### Global Configuration 41 | 42 | The installer creates a file in `/etc/philologic/philologic4.cfg` which contains several important global variables: 43 | 44 | - `database_root` defines the filesytem path to the root web directory for your PhiloLogic install such as `/var/www/html/philologic`. Make sure your user or group has full write permissions to that directory. 45 | - `url_root` defines the URL path to the same root directory for your philologic install, such as http://localhost/philologic/ 46 | - `web_app_dir` defines the location of the PhiloLogic4 www directory. By default, the installer will copy the contents of the PhiloLogic www directory (which contains the web app) to /etc/philologic/web_app/. 47 | 48 | ### Setting up PhiloLogic Web Application 49 | 50 | Each new PhiloLogic database you load, containing one or more TEI-XML files, will be served 51 | by a its own dedicated copy of PhiloLogic web application. 52 | By convention, this database and web app reside together in a directory 53 | accessible via an HTTP server configured to run Python CGI scripts. 54 | 55 | Make sure you configure the `/etc/philologic/philologic4.cfg` appropriately. 56 | 57 | Configuring your web server is outside of the scope of this document; but the web install 58 | does come with a preconfigured .htaccess file that allows you to run the Web App. 59 | Therefore, you need to make sure your server is configured to allow htaccess files. 60 | -------------------------------------------------------------------------------- /docs/specific_installations/redhat_installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installing PhiloLogic on RedHat (and CentOS) 3 | --- 4 | 5 | * Install gdbm 6 | 7 | `sudo yum install gdbm gbdm-devel` 8 | 9 | 10 | * Run install script 11 | 12 | `./install.sh` 13 | 14 | * Configure Apache 15 | * Make sure your prefered webspace allows full override for htaccess files: `AllowOverride All` 16 | * Make sure the correct permissions are set on the folder dedicated to PhiloLogic databases, 17 | i.e. write access for the user/group that will be building databases. 18 | -------------------------------------------------------------------------------- /docs/specific_installations/ubuntu_installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installing PhiloLogic4 on Ubuntu 3 | --- 4 | 5 | - The following dependencies need to be installed: 6 | 7 | - libxml2-dev 8 | - libxslt-dev 9 | - zlib1g-dev 10 | - apache2 11 | - libgdbm-dev 12 | - libgdbm-dev 13 | - liblz4-tool 14 | - brotli 15 | - ripgrep 16 | 17 | Run the following command: 18 | 19 | `sudo apt-get install libxml2-dev libxslt-dev zlib1g-dev apache2 libgdbm-dev liblz4-tool brotli ripgrep` 20 | 21 | - Install pip3 (not the version from Ubuntu repos since it breaks pyproject.toml builds). First delete the python3-setuptools Ubuntu package if present: `sudo apt purge python3-setuptools`, then run: 22 | `wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py` 23 | 24 | - Run install script inside the PhiloLogic4 directory 25 | 26 | `./install.sh` 27 | 28 | - Set-up Apache: 29 | - enable mod_rewrite: `sudo a2enmod rewrite` 30 | - enable mod_cgi: `sudo a2enmod cgi` 31 | - enable brotli: `sudo a2enmod brotli` 32 | - Make sure to set `AllowOverride` to `all` for the directory containined your philologic databases in your Apache config 33 | -------------------------------------------------------------------------------- /extras/artfl_theme.scss: -------------------------------------------------------------------------------- 1 | // Custom Bootstrap changes: don't edit 2 | $popover-max-width: 50%; 3 | .custom-popover { 4 | overflow: auto; 5 | text-align: justify !important; 6 | max-height: 60%; 7 | } 8 | 9 | // Theme colors 10 | $header-color: rgb(245, 219, 157); 11 | $button-color: rgba(143, 57, 49, .8); 12 | $button-color-active: rgb(143, 57, 49); 13 | $link-color: #8f3931; 14 | $passage-color: rgb(180, 106, 85); 15 | // Themed elements 16 | nav.navbar { 17 | background-color: $header-color !important; 18 | } 19 | 20 | $secondary: $button-color; 21 | .btn-secondary.active { 22 | background-color: $button-color-active !important; 23 | } 24 | 25 | .btn-outline-secondary.active { 26 | color: #fff !important; 27 | } 28 | 29 | $info: $button-color; 30 | .btn-light { 31 | border: solid 1px rgb(206, 212, 218) !important; 32 | } 33 | 34 | .btn-light.active { 35 | background-color: #eee !important; 36 | } 37 | 38 | .link-color { 39 | color: $link-color 40 | } 41 | 42 | a { 43 | color: $link-color !important 44 | } 45 | 46 | a.btn-secondary { 47 | color: #fff !important; 48 | } 49 | 50 | .number, 51 | .card-header { 52 | background-color: $header-color !important; 53 | color: $link-color !important; 54 | } 55 | 56 | .input-group-text, 57 | .custom-control-input:checked~.custom-control-label::before, 58 | .custom-control-input:focus~.custom-control-label::before { 59 | color: $link-color !important; 60 | background-color: #fff !important; 61 | border-color: $link-color !important; 62 | } 63 | 64 | .metadata-args, 65 | .remove-metadata, 66 | .term-groups, 67 | .close-pill { 68 | border-color: $link-color !important; 69 | } 70 | 71 | .metadata-label, 72 | .remove-metadata:hover, 73 | .term-group-word:hover, 74 | .close-pill:hover { 75 | background-color: $button-color !important; 76 | color: #fff !important; 77 | } 78 | 79 | .custom-control-label::after { 80 | background-color: $button-color !important; 81 | } 82 | 83 | .letter { 84 | color: $link-color !important; 85 | } 86 | 87 | .letter:hover, 88 | #dico-landing-volume .list-group-item:hover { 89 | background-color: $button-color !important; 90 | color: #fff !important; 91 | } 92 | 93 | #dico-landing-volume a:hover { 94 | color: #fff !important; 95 | } 96 | 97 | #report-error { 98 | color: #fff !important; 99 | } 100 | 101 | .custom-select:focus, 102 | .custom-control-input:checked~.custom-control-label::before, 103 | .custom-control-input:focus~.custom-control-label::before, 104 | input[type="text"]:focus { 105 | box-shadow: 0 0 0 0.05rem $button-color !important; 106 | border-color: $button-color !important; 107 | opacity: .5 108 | } 109 | 110 | .landing-page-btn:focus { 111 | border-color: $link-color !important; 112 | } 113 | 114 | #report button:focus { 115 | mix-blend-mode: hard-light; 116 | } 117 | 118 | :export { 119 | color: $link-color 120 | } -------------------------------------------------------------------------------- /extras/metadata_extractor.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import json 4 | import os 5 | import sqlite3 6 | import sys 7 | 8 | from philologic.runtime.DB import DB 9 | 10 | 11 | object_levels = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5} 12 | 13 | 14 | def main(object_level, db_path): 15 | metadata_fields = {} 16 | doc_filenames = {} 17 | database = DB(os.path.join(db_path, "data")) 18 | cursor = database.dbh.cursor() 19 | cursor.execute("SELECT philo_id, filename FROM toms WHERE philo_type='doc'") 20 | for philo_id, filename in cursor: 21 | doc_id = philo_id.split()[0] 22 | doc_filenames[doc_id] = filename 23 | cursor.execute("SELECT * FROM toms WHERE philo_type=?", (object_level,)) 24 | for result in cursor: 25 | fields = result 26 | philo_id = "_".join(fields["philo_id"].split()[: object_levels[object_level]]) 27 | metadata_fields[philo_id] = {} 28 | for field in database.locals["metadata_fields"]: 29 | metadata_fields[philo_id][field] = result[field] or "" 30 | doc_id = result["philo_id"].split()[0] 31 | metadata_fields[philo_id]["filename"] = doc_filenames[doc_id] 32 | with open("metadata.json", "w") as metadata_file: 33 | json.dump(metadata_fields, metadata_file) 34 | 35 | 36 | if __name__ == "__main__": 37 | object_level = sys.argv[1] 38 | db_path = sys.argv[2] 39 | main(object_level, db_path) 40 | -------------------------------------------------------------------------------- /extras/python2_to_python3_port.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """This script should be used to convert a PhiloLogic 4.5 loaded database to 3 | a PhiloLogic 4.5 database. Essentially makes it run under Python3. 4 | This code assums all the code in the PhiloLogic database cirectory is unmodified. 5 | All custom code should be handled separately""" 6 | 7 | import sys 8 | import os 9 | 10 | PHILOLOGIC_INSTALL = "/var/lib/philologic4/web_app/" 11 | TWO_TO_THREE_EXEC = "2to3-3.6" 12 | FORMAT_CODE = False 13 | UPDATE_WEB_APP = False 14 | 15 | 16 | def convert_config(database_to_convert, config_file): 17 | """Convert config files""" 18 | os.system(f"{TWO_TO_THREE_EXEC} --no-diffs -w {database_to_convert}/data/{config_file} > /dev/null 2>&1") 19 | if FORMAT_CODE is True: 20 | os.system(f"black -q -l 120 {database_to_convert}/data/{config_file} > /dev/null 2>&1") 21 | 22 | 23 | def main(): 24 | """Main Loop""" 25 | database_to_convert = sys.argv[1] 26 | convert_config(database_to_convert, "web_config.cfg") 27 | convert_config(database_to_convert, "db.locals.py") 28 | with open(os.path.join(database_to_convert, "data/db.locals.py")) as file: 29 | whole_file = file.read() 30 | whole_file = whole_file.replace( 31 | r'''token_regex = "[\\&A-Za-z0-9\x7f-\xff][\\&A-Za-z0-9\x7f-\xff\\_';]*"''', r'''token_regex = "\w+|[&\w;]+"''' 32 | ) 33 | with open(os.path.join(database_to_convert, "data/db.locals.py"), "w") as output: 34 | output.write(whole_file) 35 | # convert_config(database_to_convert, "load_config.py") 36 | 37 | os.system(f"cp -f {PHILOLOGIC_INSTALL}/*py {database_to_convert}") 38 | os.system(f"cp -f {PHILOLOGIC_INSTALL}/reports/*py {database_to_convert}/reports/") 39 | os.system(f"cp -f {PHILOLOGIC_INSTALL}/scripts/*py {database_to_convert}/scripts/") 40 | if UPDATE_WEB_APP is True: 41 | os.system(f"cp -Rf {PHILOLOGIC_INSTALL}/app/* {database_to_convert}/app/") 42 | 43 | print(database_to_convert, "converted...") 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /extras/rebuild_app.py: -------------------------------------------------------------------------------- 1 | """Rebuild web app after PhiloLogic database copied to new server / VM / docker env""" 2 | import sys 3 | import os 4 | 5 | 6 | if __name__ == "__main__": 7 | philo_db = sys.argv[1] 8 | app_path = f"{philo_db}/app" 9 | os.system(f"rm -rf {app_path}") 10 | os.system(f"cp -R /var/lib/philologic4/web_app/app {philo_db}/") 11 | os.system(f"chown -R $(whoami) ${app_path}") # Make sure we have the correct permissions for npm to run 12 | os.system(f"cd {app_path}; npm run build;") 13 | print(f"{philo_db} done") 14 | -------------------------------------------------------------------------------- /extras/tol_theme.scss: -------------------------------------------------------------------------------- 1 | // Custom Bootstrap changes: don't edit 2 | $popover-max-width: 50%; 3 | .custom-popover { 4 | overflow: auto; 5 | text-align: justify !important; 6 | max-height: 60%; 7 | } 8 | 9 | // Theme colors 10 | $header-color: rgb(103, 10, 10); 11 | $button-color: rgb(103, 10, 10); 12 | $button-color-active: rgb(256, 256, 256); 13 | $link-color: rgb(103, 10, 10); 14 | $passage-color: rgb(180, 106, 85); 15 | // Themed elements 16 | nav.navbar { 17 | background-color: #fff !important; 18 | } 19 | 20 | $secondary: $button-color; 21 | .btn-secondary.active { 22 | background-color: $button-color-active !important; 23 | color: $link-color !important; 24 | border-color: $button-color !important; 25 | } 26 | 27 | .btn-outline-secondary.active { 28 | color: #fff !important; 29 | } 30 | 31 | $info: $button-color; 32 | .btn-light.active { 33 | background-color: #eee !important; 34 | } 35 | 36 | .link-color { 37 | color: $link-color 38 | } 39 | 40 | a { 41 | color: $link-color !important 42 | } 43 | 44 | a.btn-secondary { 45 | color: #fff !important; 46 | } 47 | 48 | .number, 49 | .card-header { 50 | background-color: $header-color !important; 51 | color: #fff !important; 52 | } 53 | 54 | .input-group-text, 55 | .custom-control-input:checked~.custom-control-label::before, 56 | .custom-control-input:focus~.custom-control-label::before { 57 | color: $link-color !important; 58 | background-color: #fff !important; 59 | border-color: $link-color !important; 60 | } 61 | 62 | .metadata-args, 63 | .remove-metadata, 64 | .term-groups, 65 | .close-pill { 66 | border-color: $link-color !important; 67 | } 68 | 69 | .metadata-label, 70 | .remove-metadata:hover, 71 | .term-group-word:hover, 72 | .close-pill:hover { 73 | background-color: $button-color !important; 74 | color: #fff !important; 75 | } 76 | 77 | .custom-control-label::after { 78 | background-color: $button-color !important; 79 | } 80 | 81 | .letter { 82 | color: $link-color !important; 83 | } 84 | 85 | .letter:hover { 86 | background-color: $button-color !important; 87 | color: #fff !important; 88 | } 89 | 90 | #report-error { 91 | color: #fff !important; 92 | } 93 | 94 | .custom-select:focus, 95 | .custom-control-input:checked~.custom-control-label::before, 96 | .custom-control-input:focus~.custom-control-label::before, 97 | input[type="text"]:focus { 98 | box-shadow: 0 0 0 0.05rem $button-color !important; 99 | border-color: $button-color !important; 100 | opacity: .5 101 | } 102 | 103 | .landing-page-btn { 104 | border-bottom-width: 1px !important; 105 | } 106 | 107 | .note-ref { 108 | vertical-align: 0.3em !important; 109 | font-size: .7em !important; 110 | font-weight: 700 !important; 111 | color: #670a0a !important; 112 | padding: 0 0.2rem !important; 113 | background-color: #fff !important; 114 | } 115 | 116 | :export { 117 | color: $link-color 118 | } -------------------------------------------------------------------------------- /extras/utilities/extract_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import regex as re 6 | from lxml import etree 7 | from philologic.Loader import Loader 8 | from philologic.Parser import DefaultMetadataXPaths 9 | 10 | 11 | ### USAGE ### 12 | # python extract_metadata.py files 13 | 14 | 15 | def pre_parse_whole_file(fn): 16 | fh = open(fn) 17 | tree = etree.fromstring(fh.read()) 18 | # Remove namespace 19 | for el in tree.iter(): 20 | try: 21 | if el.tag.startswith("{"): 22 | el.tag = el.tag.rsplit("}", 1)[-1] 23 | except AttributeError: ## el.tag is not a string for some reason 24 | pass 25 | return tree 26 | 27 | 28 | def sort_by_metadata(filelist, metadata_xpaths, *fields, **options): 29 | load_metadata = [] 30 | if "reverse" in options: 31 | reverse = options["reverse"] 32 | else: 33 | reverse = False 34 | 35 | for fn in filelist: 36 | data = {"filename": fn} 37 | tree = pre_parse_whole_file(fn) 38 | 39 | for type, xpath, field in metadata_xpaths: 40 | if type == "doc": 41 | if field not in data: 42 | attr_pattern_match = re.search(r"@([^\/\[\]]+)$", xpath) 43 | if attr_pattern_match: 44 | xp_prefix = xpath[: attr_pattern_match.start(0)] 45 | attr_name = attr_pattern_match.group(1) 46 | elements = tree.findall(xp_prefix) 47 | for el in elements: 48 | if el is not None and el.get(attr_name, ""): 49 | data[field] = el.get(attr_name, "").encode("utf-8") 50 | break 51 | else: 52 | el = tree.find(xpath) 53 | if el is not None and el.text is not None: 54 | data[field] = el.text.encode("utf-8") 55 | load_metadata.append(data) 56 | 57 | def make_sort_key(d): 58 | key = [d.get(f, "") for f in fields] 59 | return key 60 | 61 | load_metadata.sort(key=make_sort_key, reverse=reverse) 62 | return load_metadata 63 | 64 | 65 | if __name__ == "__main__": 66 | try: 67 | from artfl_xpaths import metadata_xpaths 68 | except: 69 | metadata_xpaths = DefaultMetadataXPaths 70 | load_metadata = sort_by_metadata(sys.argv[1:], metadata_xpaths) 71 | 72 | for file in load_metadata: 73 | print("## Metadata found for %s ##" % file["filename"]) 74 | for metadata in file: 75 | if metadata != "filename": 76 | print("%s: %s" % (metadata, file[metadata])) 77 | print() 78 | -------------------------------------------------------------------------------- /extras/utilities/fix_notes.py: -------------------------------------------------------------------------------- 1 | """Move notes to end of TEI file according to PhiloLogic's spec.""" 2 | 3 | from copy import deepcopy 4 | import sys 5 | from lxml import etree 6 | 7 | 8 | def update_notes(filename): 9 | """Add inline notes at the end of the file""" 10 | with open(filename, "rb") as input_file: 11 | text = input_file.read() 12 | parser = etree.XMLParser(remove_blank_text=True) 13 | root = etree.fromstring(text, parser) 14 | for el in root.getiterator(): 15 | try: 16 | if el.tag.startswith("{"): 17 | el.tag = el.tag.rsplit("}", 1)[-1] 18 | except AttributeError: 19 | pass 20 | note_div = etree.Element("div", type="notes") 21 | head = etree.Element("head") 22 | head.text = "Notes" 23 | head.tail = "\n" 24 | note_div.insert(0, head) 25 | note_div.text = "\n" 26 | note_count = 1 27 | notes_skipped = 0 28 | for el in root.iter("note"): 29 | inHeader = False 30 | for ancestor in el.iterancestors(): 31 | if ancestor.tag == "teiHeader": 32 | inHeader = True 33 | notes_skipped += 1 34 | break 35 | if inHeader: 36 | continue 37 | new_note = deepcopy(el) 38 | for attr in new_note.attrib: 39 | del new_note.attrib[attr] 40 | new_note.attrib["id"] = f"{note_count}" 41 | new_note.tail = "\n" 42 | note_div.append(new_note) 43 | el.tag = "ref" 44 | el.attrib["type"] = "note" 45 | el.attrib["target"] = f"{note_count}" 46 | for child in el: 47 | el.remove(child) 48 | el.text = "" 49 | note_count += 1 50 | if note_count > 1: 51 | root[-1].append(note_div) 52 | extension = filename.split(".")[-1] 53 | new_file = f'{filename.replace(f".{extension}", "")}_fixed_notes.{extension}' 54 | with open(new_file, "w", encoding="utf8") as output: 55 | tree = etree.ElementTree(root) 56 | output.write(etree.tostring(tree, encoding="unicode", pretty_print=True)) 57 | 58 | 59 | if __name__ == "__main__": 60 | update_notes(sys.argv[1]) 61 | -------------------------------------------------------------------------------- /extras/utilities/list_xpath_in_header.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import regex as re 5 | from lxml import etree 6 | 7 | 8 | ### USAGE ### 9 | # python list_xpaths_in_header.py files 10 | 11 | 12 | def pre_parse_header(fn): 13 | """Parse header""" 14 | fh = open(fn) 15 | header = "" 16 | while True: 17 | line = fh.readline() 18 | scan = re.search(r"|", line, re.IGNORECASE) 19 | if scan: 20 | header = line[scan.start() :] 21 | break 22 | while True: 23 | line = fh.readline() 24 | scan = re.search(r"|<\/?temphead>", line, re.IGNORECASE) 25 | if scan: 26 | header = header + line[: scan.end()] 27 | break 28 | else: 29 | header = header + line 30 | tree = etree.fromstring(header) 31 | for el in tree.iter(): 32 | try: 33 | if el.tag.startswith("{"): 34 | el.tag = el.tag.rsplit("}", 1)[-1] 35 | except AttributeError: ## el.tag is not a string for some reason 36 | pass 37 | return tree 38 | 39 | 40 | def retrieve_xpaths(filelist): 41 | """Retrieve XPATHS""" 42 | for fn in filelist: 43 | print("## XPATHS for %s" % fn) 44 | tree = pre_parse_header(fn) 45 | root = tree.getroottree() 46 | for el in tree.iter(): 47 | if el.getchildren() == [] and el.text != None: 48 | print(root.getpath(el)) 49 | print() 50 | 51 | 52 | if __name__ == "__main__": 53 | xpaths = retrieve_xpaths(sys.argv[1:]) 54 | -------------------------------------------------------------------------------- /extras/utilities/update_toms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sqlite3 4 | import sys 5 | import regex as re 6 | from philologic.PostFilters import metadata_frequencies, normalized_metadata_frequencies 7 | 8 | 9 | def change_metadata(metadata_field): 10 | """This is an example of a modification you could make to your metadata field 11 | Modify at will""" 12 | updated_metadata = re.sub(".*(\d{4}).*", "\\1", metadata_field) 13 | return updated_metadata 14 | 15 | 16 | def update_function(c, field, db_location): 17 | query = 'select philo_id, %s from toms where philo_type="doc"' % field 18 | c.execute(query) 19 | updated_value = {} 20 | for i in c: 21 | philo_id, metadata_field = i 22 | updated_value[philo_id] = change_metadata(metadata_field) 23 | 24 | ## Update SQL table 25 | for id, new_value in updated_value.items(): 26 | update_query = 'update toms set %s="%s" where philo_id="%s"' % (field, new_value, id) 27 | c.execute(update_query) 28 | conn.commit() 29 | conn.close() 30 | 31 | ## Update frequency file 32 | loader_obj = LoaderObj(db_location, field) 33 | print(loader_obj.destination, loader_obj.metadata_fields) 34 | metadata_frequencies(loader_obj) 35 | normalized_metadata_frequencies(loader_obj) 36 | 37 | 38 | def parse_command_line(args): 39 | if len(args) == 1 or len(args) == 2: 40 | print("You need two arguments to execute this script") 41 | print("python update_toms.py db_location field_to_update") 42 | sys.exit() 43 | db_location = sys.argv[1] 44 | field = sys.argv[2] 45 | return db_location, field 46 | 47 | 48 | def connect_to_db(db_location): 49 | conn = sqlite3.connect(db_location + "/data/toms.db") 50 | cursor = conn.cursor() 51 | return conn, cursor 52 | 53 | 54 | ## Build a loader class with the attributes needed to update the frequency files 55 | class LoaderObj(object): 56 | def __init__(self, db_location, field): 57 | self.destination = db_location + "/data" 58 | self.metadata_fields = [field] 59 | 60 | 61 | if __name__ == "__main__": 62 | db_location, field = parse_command_line(sys.argv) 63 | conn, c = connect_to_db(db_location) 64 | update_function(c, field, db_location) 65 | -------------------------------------------------------------------------------- /extras/vf_theme.scss: -------------------------------------------------------------------------------- 1 | // Custom Bootstrap changes: don't edit 2 | $popover-max-width: 50%; 3 | .custom-popover { 4 | overflow: auto; 5 | text-align: justify !important; 6 | max-height: 60%; 7 | } 8 | 9 | // Theme colors 10 | $header-color: #fff; 11 | $button-color: rgba(18, 47, 83, .9); 12 | $button-color-active: rgb(0, 33, 71); 13 | $link-color: rgb(0, 33, 71); 14 | $passage-color: rgba(18, 47, 83, .9); 15 | // Themed elements 16 | nav.navbar { 17 | background-color: $header-color !important; 18 | } 19 | 20 | $secondary: $button-color; 21 | .btn-secondary.active { 22 | background-color: $button-color-active !important; 23 | } 24 | 25 | .btn-outline-secondary.active { 26 | color: #fff !important; 27 | } 28 | 29 | $info: $button-color; 30 | .btn-light { 31 | border: solid 1px rgb(206, 212, 218) !important; 32 | } 33 | 34 | .btn-light.active { 35 | background-color: #eee !important; 36 | } 37 | 38 | .link-color { 39 | color: $link-color 40 | } 41 | 42 | a { 43 | color: $link-color !important 44 | } 45 | 46 | a.btn-secondary { 47 | color: #fff !important; 48 | } 49 | 50 | .number, 51 | .card-header { 52 | background-color: $link-color !important; 53 | color: #fff !important; 54 | } 55 | 56 | .input-group-text, 57 | .custom-control-input:checked~.custom-control-label::before, 58 | .custom-control-input:focus~.custom-control-label::before { 59 | color: $link-color !important; 60 | background-color: #fff !important; 61 | border-color: $link-color !important; 62 | } 63 | 64 | .metadata-args, 65 | .remove-metadata, 66 | .term-groups, 67 | .close-pill { 68 | border-color: $link-color !important; 69 | } 70 | 71 | .metadata-label, 72 | .remove-metadata:hover, 73 | .term-group-word:hover, 74 | .close-pill:hover { 75 | background-color: $button-color !important; 76 | color: #fff !important; 77 | } 78 | 79 | .custom-control-label::after { 80 | background-color: $button-color !important; 81 | } 82 | 83 | .letter { 84 | color: $link-color !important; 85 | } 86 | 87 | .letter:hover { 88 | background-color: $button-color !important; 89 | color: #fff !important; 90 | } 91 | 92 | #report-error { 93 | color: #fff !important; 94 | } 95 | 96 | .custom-select:focus, 97 | .custom-control-input:checked~.custom-control-label::before, 98 | .custom-control-input:focus~.custom-control-label::before, 99 | input[type="text"]:focus { 100 | box-shadow: 0 0 0 0.05rem $button-color !important; 101 | border-color: $button-color !important; 102 | opacity: .5 103 | } 104 | 105 | .landing-page-btn:focus { 106 | border-color: $link-color !important; 107 | } 108 | 109 | #report button:focus { 110 | mix-blend-mode: hard-light; 111 | } 112 | 113 | :export { 114 | color: $link-color 115 | } -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CORE_INSTALL="\n## INSTALLING PHILOLOGIC C CORE ##" 4 | echo "$CORE_INSTALL" 5 | cd libphilo/ 6 | make clean 7 | make 8 | if [[ $OSTYPE == 'darwin'* ]]; 9 | then 10 | sudo /usr/bin/install -c db/corpus_search /usr/local/bin/ 11 | sudo /usr/bin/install -c db/pack4 /usr/local/bin/ 12 | else 13 | sudo /usr/bin/install -c db/corpus_search /bin/ 14 | sudo /usr/bin/install -c db/pack4 /bin/ 15 | fi 16 | 17 | cd ..; 18 | PYTHON_INSTALL="\n## INSTALLING PYTHON LIBRARY ##" 19 | echo "$PYTHON_INSTALL" 20 | sudo pip3 install build 21 | cd python; 22 | rm -rf dist/ 23 | python3 -m build --sdist 24 | sudo -H pip3 install dist/*gz 25 | sudo mkdir -p /etc/philologic/ 26 | 27 | cd ..; 28 | sudo mkdir -p /var/lib/philologic4/web_app/ 29 | sudo rm -rf /var/lib/philologic4/web_app/* 30 | if [ -d www/app/node_modules ] 31 | then 32 | sudo rm -rf www/app/node_modules 33 | fi 34 | sudo cp -R www/* /var/lib/philologic4/web_app/ 35 | sudo cp www/.htaccess /var/lib/philologic4/web_app/ 36 | 37 | if [ ! -f /etc/philologic/philologic4.cfg ] 38 | then 39 | db_url="# Set the filesytem path to the root web directory for your PhiloLogic install. 40 | database_root = None 41 | # /var/www/html/philologic/ is conventional for linux, 42 | # /Library/WebServer/Documents/philologic for Mac OS.\n" 43 | echo "$db_url" | sed "s/^ *//g" | sudo tee /etc/philologic/philologic4.cfg > /dev/null 44 | url_root="# Set the URL path to the same root directory for your philologic install. 45 | url_root = None 46 | # http://localhost/philologic/ is appropriate if you don't have a DNS hostname.\n" 47 | echo "$url_root" | sed "s/^ *//g" | sudo tee -a /etc/philologic/philologic4.cfg > /dev/null 48 | web_app_dir="## This should be set to the location of the PhiloLogic4 www directory 49 | web_app_dir = '/var/lib/philologic4/web_app/'" 50 | echo "$web_app_dir" | sed "s/^ *//g" | sudo tee -a /etc/philologic/philologic4.cfg > /dev/null 51 | else 52 | echo "\n## WARNING ##" 53 | echo "/etc/philologic/philologic4.cfg already exists" 54 | echo "Please delete and rerun the install script to avoid incompatibilities\n" 55 | fi 56 | -------------------------------------------------------------------------------- /libphilo/README: -------------------------------------------------------------------------------- 1 | As of June 2012, PhiloLogic 4 has now entered alpha testing. 2 | It should compile and install reasonably well, 3 | but should not be expected to be feature or documentation complete. 4 | Note that this repository only contains the library code. 5 | See the related PhiloLogic4-Templates repo for a "soup to nuts" web app framework. 6 | 7 | C Dependencies: 8 | gdbm - installed to your standard INCLUDE path [Mac users see :http://macosx.com/forums/unix-x11/257664-compile-gdbm.html] 9 | [Also note that the /usr/local/ directory is often not included in default compiler lookups. 10 | If your gdbm lives there, either add it to your search path in the environment or as a compiler flag, as described below.] 11 | 12 | Python Dependences: 13 | 2.6 and below: ElementTree 1.3alpha: http://effbot.org/zone/elementtree-13-intro.htm 14 | 2.7 and greater: None 15 | 16 | Installation instructions: 17 | make 18 | sudo make install 19 | 20 | Due to an absence of a configuration script, 21 | you can supply non-standard compiler arguments to the make commands. For example, 22 | 'make LDFLAGS=-L/usr/local/lib/'. 23 | 24 | After installation, philologic will have installed: 25 | low-level search executables to /bin/ 26 | PhiloLogic python library to your standard python path, according to distutils. 27 | 28 | To get started loading documents, please install the PhiloLogic4-Templates package. 29 | 30 | -- 31 | Richard Whaling 32 | ARTFL Project 33 | July 2012 -------------------------------------------------------------------------------- /libphilo/args.h: -------------------------------------------------------------------------------- 1 | // $Id: args.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #ifndef C_H 21 | #include "c.h" 22 | #endif 23 | 24 | #define BAD_ARGZ 1 25 | 26 | #define BAD_ENGINE_ARGZ "badly defined output (-E:) arguments" 27 | #define BAD_SEARCH_ARGZ "badly defined search (-S:) arguments" 28 | #define BAD_CORPUS_ARGZ "badly defined corpus (-C:) arguments" 29 | #define BAD_OUTPUT_ARGZ "badly defined output (-P:) arguments" 30 | #define BAD_PLUGIN_ARGZ "badly defined plugin (-D:) argument" 31 | 32 | 33 | extern Z32 process_command_argz(); 34 | extern Z32 process_command_argz_backwardcompat(); 35 | 36 | 37 | -------------------------------------------------------------------------------- /libphilo/blockmap.h: -------------------------------------------------------------------------------- 1 | // $Id: blockmap.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #ifdef BLOCKMAP_H 21 | #error "blockmap.h multiply included" 22 | #else 23 | 24 | 25 | /* 26 | block map is an object that contains a pointer to a Word 27 | object and a counter pointing to the current position on 28 | the object map there; by going along this list of blockmap 29 | objects ("Blockmap") and re-sorting it in the process, we 30 | conduct the search. 31 | */ 32 | 33 | #define BLOCKMAP_H 34 | 35 | #ifndef C_H 36 | #include "c.h" 37 | #endif 38 | 39 | #ifndef WORD_H 40 | #include "word.h" 41 | #endif 42 | 43 | 44 | #define BLOCKMAP_BUILT 0 45 | #define BLOCKMAP_MALLOC_ERROR 1 46 | #define BLOCKMAP_BUILD_ERROR 2 47 | 48 | 49 | typedef struct blockMap_st *blockMap, blockMap_; 50 | 51 | struct blockMap_st 52 | { 53 | Word w; /* pointer to word object */ 54 | N32 n; /* map counter in the object above */ 55 | N32 bn; /* batch (or "level") number */ 56 | }; 57 | 58 | /* 59 | extern Z32 build_blockMap( Search, Z32 ); 60 | */ 61 | 62 | extern void blockmap_sort (); 63 | 64 | #endif /* #ifdef BLOCKMAP_H */ 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /libphilo/c.h: -------------------------------------------------------------------------------- 1 | // $Id: c.h,v 2.12 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef C_H 20 | #error "c.h multiply included" 21 | #else 22 | #define C_H 23 | #include 24 | 25 | typedef uint8_t N1, N2, N3, N4, N5, N6, N7, N8; 26 | typedef char Z8; 27 | typedef uint16_t N16; 28 | typedef int16_t Z16; 29 | typedef uint32_t N24, N32, N; 30 | typedef int32_t Z24, Z32; 31 | typedef int Z; 32 | typedef uint8_t *String; 33 | typedef uint64_t N64; 34 | 35 | /* Make sure we can define a function that is shadowed by a macro: 36 | we use this by defining "foo FUNCTION(args)", thereby suppressing 37 | the macro call interpretation of foo(args). 38 | */ 39 | #define FUNCTION 40 | 41 | #endif 42 | 43 | #define HITLIST_LIMIT_EXCEEDED 111 44 | 45 | 46 | -------------------------------------------------------------------------------- /libphilo/db/Makefile: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.in,v 1.2 2004/05/28 19:22:11 o Exp $ 2 | CC= gcc 3 | PH_CFLAGS= -I.. -I../../search-engine 4 | CFLAGS= 5 | CPPFLAGS= 6 | LDFLAGS= 7 | PH_LDMODULEFLAGS=-shared 8 | PH_FPIC=-fPIC 9 | all: unpack.o db.o bitsvector.o pack4 parsedb mergewords corpus_search 10 | 11 | corpus_search: corpus_search.c db.o unpack.o bitsvector.o 12 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o corpus_search corpus_search.c db.o unpack.o bitsvector.o -lgdbm 13 | 14 | mergewords: mergewords.c db.o unpack.o bitsvector.o 15 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o mergewords mergewords.c db.o unpack.o bitsvector.o -lgdbm 16 | 17 | parsedb: parsedb.c 18 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o parsedb parsedb.c db.c unpack.c bitsvector.c -lgdbm 19 | 20 | pack4: pack.c db.c 21 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o pack4 pack.c db.c -lgdbm 22 | 23 | validate-index: validate-index.c unpack.o bitsvector.o getresource.o 24 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o validate-index validate-index.c unpack.o bitsvector.o getresource.o -lgdbm 25 | 26 | clean: 27 | rm -f *.o *.lo libunpack_e.bundle *~ validate-index pack pack4 mergewords parsedb -------------------------------------------------------------------------------- /libphilo/db/bitsvector.c: -------------------------------------------------------------------------------- 1 | // $Id: bitsvector.c,v 2.10 2004/05/28 19:22:04 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #include 20 | #include 21 | #include "bitsvector.h" 22 | 23 | bitsvector *bitsvectorNew(N8 *v) 24 | { bitsvector *r = malloc(sizeof(bitsvector)); 25 | 26 | r->v = v; 27 | r->o = 0; 28 | r->s = 0; 29 | r->b = 0; 30 | return r; 31 | } 32 | 33 | void bitsvectorOld(bitsvector *f) 34 | { free(f->v); 35 | free(f); 36 | } 37 | 38 | 39 | 40 | /* 41 | N24 bitsvectorGet FUNCTION(Bitsvector f, N5 n) {return bitsvectorGet(f, n);} 42 | */ 43 | 44 | N64 bitsvectorGet (bitsvector *f, N8 n) 45 | { 46 | N64 ret = 0; 47 | 48 | N64 buffer = 0; 49 | N64 mask = 1; 50 | 51 | N32 i; 52 | 53 | N32 o_shift = 0; 54 | 55 | if ( n > 64 ) 56 | { 57 | fprintf (stderr, "attempted bitsvectorGet on >64 bit integer!\n"); 58 | fprintf (stderr, "whoa! that's a big-ass integer!\n"); 59 | 60 | exit (1); 61 | } 62 | 63 | o_shift = ( n + f->s ) / 8; 64 | 65 | ret = f->v[f->o]; 66 | 67 | for ( i = 0; i < o_shift; i++ ) 68 | { 69 | buffer = f->v[f->o + i + 1]; 70 | ret |= (buffer << ( 8 * (i + 1))); 71 | } 72 | 73 | ret >>= f->s; 74 | 75 | mask <<= n; 76 | mask--; 77 | 78 | ret &= mask; 79 | 80 | f->o += o_shift; 81 | f->s = ( f->s + n ) % 8; 82 | 83 | return ret; 84 | 85 | } 86 | -------------------------------------------------------------------------------- /libphilo/db/bitsvector.h: -------------------------------------------------------------------------------- 1 | // $Id: bitsvector.h,v 2.10 2004/05/28 19:22:04 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef BITSVECTOR_H 20 | #error "bitsvector.h multiply included" 21 | #else 22 | #define BITSFILE_H 23 | 24 | #ifndef C_H 25 | #include "../c.h" 26 | #endif 27 | 28 | 29 | 30 | struct Bitsvector 31 | { N8 *v; 32 | N32 o; 33 | N16 s; 34 | N32 b; 35 | }; 36 | 37 | typedef struct Bitsvector bitsvector; 38 | 39 | bitsvector *bitsvectorNew(N8 *v); 40 | 41 | void bitsvectorOld(bitsvector *f); 42 | 43 | N64 bitsvectorGet (bitsvector *f, N8 n); 44 | 45 | #define bitsvectorTell(x) ((((x)->o) << 3) + (x)->s) 46 | 47 | #define bitsfileSeek(x, n) \ 48 | begin (x)->o = ((n) + (x)->o) >> 3; \ 49 | (x)->s = 0, (Void)bitsvectorGet(x, n & 7); end 50 | 51 | 52 | #define bitsvectorGet24(x, n) \ 53 | ( (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8, \ 54 | (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8, \ 55 | (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8) ) ), \ 56 | (x)->s -= (n), ((x)->b >> (32 - (x)->s - (n))) & (1 << (n)) - 1 ) 57 | 58 | #define bitsvectorGetBoolean(f) bitsvectorGet((f), 1) 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /libphilo/db/db.h: -------------------------------------------------------------------------------- 1 | #ifndef _INC_DB_H 2 | #define _INC_DB_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | struct philo_dbspec 9 | { 10 | int fields; 11 | int type_length; 12 | int block_size; 13 | int freq1_length; 14 | int freq2_length; 15 | int offset_length; 16 | int *negatives; 17 | int *dependencies; 18 | int *bitlengths; 19 | int bitwidth; 20 | int hits_per_block; 21 | int uncompressed_hit_size; 22 | }; 23 | 24 | typedef struct philo_dbspec dbspec; 25 | 26 | dbspec *new_dbspec(int fields, 27 | int type_length, 28 | int block_size, 29 | int freq1_length, 30 | int freq2_length, 31 | int offset_length, 32 | int *negatives, 33 | int *dependencies, 34 | int *bitlengths); 35 | 36 | int delete_dbspec(dbspec* dbs_ptr); 37 | 38 | dbspec *init_dbspec_file(FILE *dbspec); 39 | 40 | struct philo_dbh 41 | { 42 | GDBM_FILE hash_file; 43 | FILE *block_file; 44 | dbspec *dbspec; 45 | }; 46 | 47 | typedef struct philo_dbh dbh; 48 | 49 | dbh *new_dbh(char *gdbm_f, char *index_f, dbspec *dbs); 50 | dbh *init_dbh_folder(char *db_path); 51 | int delete_dbh(dbh *dbh_ptr); 52 | int dbh_info(dbh *db); 53 | #endif 54 | -------------------------------------------------------------------------------- /libphilo/db/dbspecs.h: -------------------------------------------------------------------------------- 1 | // $Id: dbspecs.H,v 2.10 2004/05/28 19:22:02 o Exp $ 2 | /* 3 | * Database-specific constants 4 | */ 5 | 6 | #define FIELDS 9 7 | 8 | #define BLK_SIZE 2048 9 | #define TYPE_LENGTH 1 10 | #define FREQ1_LENGTH 4 11 | 12 | #define NEGATIVES {0,1,1,1,1,1,0,0,0} 13 | #define DEPENDENCIES {-1,0,1,2,3,4,5,0,7} 14 | 15 | 16 | #define BITLENGTHS {1,6,1,1,7,5,8,20,10} 17 | #define FREQ2_LENGTH 13 18 | #define OFFST_LENGTH 20 19 | -------------------------------------------------------------------------------- /libphilo/db/dbspecs2.h: -------------------------------------------------------------------------------- 1 | // $Id: dbspecs.H,v 2.10 2004/05/28 19:22:02 o Exp $ 2 | /* 3 | * Database-specific constants 4 | */ 5 | 6 | #define FIELDS 9 7 | 8 | #define BLK_SIZE 2048 9 | #define TYPE_LENGTH 1 10 | #define FREQ1_LENGTH 4 11 | 12 | #define NEGATIVES {0,1,1,1,1,1,0,0,0} 13 | #define DEPENDENCIES {-1,0,1,2,3,4,5,0,7} 14 | 15 | 16 | #define BITLENGTHS {13,12,11,9,13,12,14,24,14} 17 | #define FREQ2_LENGTH 25 18 | #define OFFST_LENGTH 33 19 | 20 | -------------------------------------------------------------------------------- /libphilo/db/dbspecs4.h: -------------------------------------------------------------------------------- 1 | #define FIELDS 9 2 | #define TYPE_LENGTH 1 3 | #define BLK_SIZE 2048 4 | #define FREQ1_LENGTH 4 5 | #define FREQ2_LENGTH 25 6 | #define OFFST_LENGTH 33 7 | #define NEGATIVES {0,1,1,1,1,1,0,0,0} 8 | #define DEPENDENCIES {-1,0,1,2,3,4,5,0,7} 9 | #define BITLENGTHS {13,12,11,9,13,12,14,24,14} 10 | 11 | 12 | -------------------------------------------------------------------------------- /libphilo/db/pack.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.pack 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /libphilo/db/pack.dSYM/Contents/Resources/DWARF/pack: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/libphilo/db/pack.dSYM/Contents/Resources/DWARF/pack -------------------------------------------------------------------------------- /libphilo/db/pack.h: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | #include "gdbm.h" 3 | #include "../c.h" 4 | 5 | #define PHILO_INDEX_CUTOFF 10 6 | #define PHILO_BLOCK_FULL 1 7 | 8 | struct hitbuffer { 9 | dbh *db; 10 | Z32 *dir; 11 | Z32 *blk; 12 | Z8 type; 13 | N64 freq; 14 | N64 offset; 15 | Z8 in_block; 16 | Z8 word[512]; 17 | N64 dir_length; 18 | N64 dir_malloced; 19 | N64 blk_length; 20 | N64 blk_malloced; 21 | }; 22 | 23 | typedef struct hitbuffer hitbuffer; 24 | 25 | hitbuffer *new_hb(dbspec *dbs); 26 | int delete_hb(hitbuffer *hb); 27 | int hitbuffer_init(hitbuffer *hb, Z8 *word); 28 | int hitbuffer_inc(hitbuffer *hb, Z32 *hit); 29 | int hitbuffer_finish(hitbuffer *hb); 30 | -------------------------------------------------------------------------------- /libphilo/db/parsedb.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../plugin/hitcon.h" 5 | #include "db.h" 6 | #include "unpack.h" 7 | 8 | int parsedbspecs(FILE *f) { 9 | int fields; 10 | int res; 11 | if (res = fscanf(f,"#define FIELDS %d", &fields)) { 12 | printf("%d fields.\n", fields); 13 | } 14 | else { 15 | printf("Couldn't get fields.\n"); 16 | return 1; 17 | } 18 | return 0; 19 | } 20 | 21 | int main(int argc, char **argv) { 22 | 23 | char buffer[256]; 24 | int form_ptr = 0; 25 | dbspec *dbs; 26 | dbh *db; 27 | char word[256]; 28 | 29 | db = init_dbh_folder(argv[1]); 30 | 31 | int lu_type; 32 | int lu_freq; 33 | uint64_t lu_offset; 34 | int lu_blocks; 35 | int32_t *hits; 36 | 37 | while (fgets(buffer,256,stdin)) { 38 | int i = 0; 39 | int j = 0; 40 | lu_type = 0; 41 | lu_freq = 0; 42 | lu_offset = 0; 43 | lu_blocks = 0; 44 | sscanf(buffer,"%s256",word); 45 | fprintf(stderr,"looking up %s : ",word); 46 | word_lookup(db,word); 47 | hits = hit_lookup(db,word,&lu_type,&lu_freq,&lu_blocks,&lu_offset); 48 | fprintf(stderr,"%d\n", lu_freq); 49 | if (lu_type == 0) { 50 | for (i = 0; i < (db->dbspec->fields * lu_freq); i++) { 51 | fprintf(stdout,"%d ",hits[i]); 52 | } 53 | fprintf(stdout,"\n"); 54 | } 55 | else { 56 | fprintf(stderr, "%d blocks:\n", lu_blocks); 57 | int hit_offset = 0; 58 | int hit_offset_2; 59 | int32_t *temp_hit = malloc(sizeof(int32_t) * db->dbspec->fields); 60 | int32_t *block_hits; 61 | int block_count; 62 | int block_number = 0; 63 | for (i = 0; i < (db->dbspec->fields * lu_blocks); i++) { 64 | hit_offset = (i % db->dbspec->fields); 65 | temp_hit[hit_offset] = hits[i]; 66 | if ((hit_offset == 8)) { 67 | fprintf(stdout,"\n"); 68 | block_number++; 69 | block_hits = hit_gethits(db,lu_type,temp_hit,lu_offset,&block_count); 70 | hit_offset_2 = 0; 71 | for (j = 0; j < (db->dbspec->fields) * block_count; j++) { 72 | hit_offset_2 = (j % db->dbspec->fields); 73 | if (hit_offset_2 == 0) { 74 | fprintf(stdout,"\n"); 75 | } 76 | fprintf(stdout,"%d ",block_hits[j]); 77 | } 78 | fprintf(stdout,"\n[%d hits in block %d]\n",block_count, block_number); 79 | lu_offset += db->dbspec->block_size; 80 | } 81 | // fprintf(stdout,"%d ",hits[i]); 82 | } 83 | fprintf(stdout,"\n"); 84 | 85 | 86 | } 87 | } 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /libphilo/db/unpack.h: -------------------------------------------------------------------------------- 1 | #ifndef _INC_UNPACK_H 2 | #define _INC_UNPACK_H 3 | 4 | #include "db.h" 5 | #include "bitsvector.h" 6 | #include 7 | 8 | int word_lookup(dbh *db, Z8 *keyword); 9 | 10 | Z32 *hit_lookup(dbh *db, Z8 *keyword, N32 *type_num, N32 *freq, N32 *blkcount, N64 *offset); 11 | Z32 *unpack(dbh *db, bitsvector *v, N32 count); 12 | Z32 *hit_gethits(dbh *db, N32 type, Z32 *first, N64 offset, N32 *blockcount); 13 | #endif 14 | -------------------------------------------------------------------------------- /libphilo/gmap.c: -------------------------------------------------------------------------------- 1 | // $Id: gmap.c,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #include 20 | #include 21 | #include "gmap.h" 22 | 23 | Z32 *gm_get_eod ( Gmap gm ) 24 | { 25 | 26 | if ( gm->gm_eod > gm->gm_l - 1 ) 27 | { 28 | gm->gm_h = (Z32 *)realloc(gm->gm_h, 2 * gm->gm_l*sizeof(Z32)*gm->gm_f); 29 | 30 | if ( gm->gm_h == NULL ) 31 | { 32 | gm->gm_e = GMAP_MALLOCFAIL; 33 | gm->gm_l = 0; 34 | return NULL; 35 | } 36 | 37 | gm->gm_l *= 2; 38 | 39 | } 40 | 41 | return ( gm->gm_h + gm->gm_eod * gm->gm_f ); 42 | } 43 | 44 | Z32 gm_set_eod ( Gmap gm, Z32 eod ) 45 | { 46 | return gm->gm_eod = eod; 47 | } 48 | 49 | Z32 gm_inc_eod ( Gmap gm ) 50 | { 51 | if ( gm->gm_eod > gm->gm_l - 1 ) 52 | return 0; 53 | 54 | gm->gm_eod++; 55 | return gm->gm_eod; 56 | } 57 | 58 | Z32 *gm_get_cur_pos ( Gmap gm ) 59 | { 60 | return ( gm->gm_h + gm->gm_c * gm->gm_f ); 61 | } 62 | 63 | Z32 *gm_get_pos ( Gmap gm, N pos ) 64 | { 65 | return ( gm->gm_h + pos * gm->gm_f ); 66 | } 67 | 68 | Z32 gm_inc_pos ( Gmap gm ) 69 | { 70 | if ( gm->gm_c >= gm->gm_eod - 1 ) 71 | return 0; 72 | 73 | gm->gm_c++; return 1; 74 | } 75 | 76 | Z32 gm_set_pos ( Gmap gm, N32 pos ) 77 | { 78 | return gm->gm_c = pos; 79 | } 80 | 81 | Gmap new_Gmap ( N32 initlen, N32 factor ) 82 | { 83 | Gmap g = (Gmap) malloc (sizeof(gmap)); 84 | 85 | if ( g == NULL ) 86 | return NULL; 87 | 88 | g->gm_f = factor; 89 | g->gm_c = g->gm_eod = 0; 90 | g->gm_e = GMAP_OK; 91 | 92 | if ( initlen ) 93 | { 94 | g->gm_h = (Z32 *) malloc (initlen * sizeof(Z32) * factor); 95 | 96 | if ( g->gm_h == NULL ) 97 | { 98 | g->gm_e = GMAP_MALLOCFAIL; 99 | g->gm_l = 0; 100 | return g; 101 | } 102 | } 103 | 104 | g->gm_l = initlen; 105 | return g; 106 | } 107 | 108 | 109 | void old_Gmap ( Gmap m ) 110 | { 111 | /* "old" is the opposite of "new" */ 112 | 113 | free (m->gm_h); 114 | free (m); 115 | 116 | } 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /libphilo/gmap.h: -------------------------------------------------------------------------------- 1 | // $Id: gmap.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifndef C_H 20 | #include "c.h" 21 | #endif 22 | 23 | #ifdef GMAP_H 24 | #error "gmap.h multiply included" 25 | #else 26 | #define GMAP_H 27 | 28 | #define GMAP_OK 0 29 | #define GMAP_TOOMANY 1 30 | #define GMAP_MALLOCFAIL 2 31 | 32 | typedef struct gmap *Gmap, gmap; 33 | 34 | struct gmap 35 | { 36 | N32 gm_f; /* factor -- i.e., how many integers/hit */ 37 | Z32 *gm_h; /* hits */ 38 | N32 gm_c; /* counter, i.e., the current location */ 39 | N32 gm_l; /* limit, i.e. malloc-ed size of gm_h */ 40 | N32 gm_eod;/* "end of data", i.e., the current length */ 41 | Z32 gm_e; /* error condition */ 42 | }; 43 | 44 | extern Gmap new_Gmap ( N32,N32 ); 45 | extern void old_Gmap ( Gmap ); 46 | extern Z32 *gm_get_eod ( Gmap ); 47 | extern Z32 gm_set_eod ( Gmap, Z32 ); 48 | extern Z32 gm_inc_eod ( Gmap ); 49 | extern Z32 *gm_get_cur_pos ( Gmap ); 50 | extern Z32 *gm_get_pos ( Gmap, N ); 51 | extern Z32 gm_inc_pos ( Gmap ); 52 | extern Z32 gm_set_pos ( Gmap, N32 ); 53 | 54 | #endif 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /libphilo/level.h: -------------------------------------------------------------------------------- 1 | // $Id: level.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef BATCH_H 20 | #error "level.h multiply included" 21 | #else 22 | 23 | #define BATCH_H 24 | 25 | #ifndef C_H 26 | #include "c.h" 27 | #endif 28 | 29 | #ifndef WORD_H 30 | #include "word.h" 31 | #endif 32 | 33 | #ifndef BLOCKMAP_H 34 | #include "blockmap.h" 35 | #endif 36 | 37 | #ifndef GMAP_H 38 | #include "gmap.h" 39 | #endif 40 | 41 | #define MAXBATCHES 10 42 | 43 | #define BATCH_PROCESSED 0 44 | #define BATCH_PROCESSED_LAST 1 45 | #define BATCH_EMPTY 2 46 | #define BATCH_PROCESSING_ERROR 4 47 | 48 | 49 | typedef struct Batch *Batch, Batch_; 50 | 51 | struct Batch 52 | { 53 | N32 howmany; /* number of words in the batch */ 54 | N32 total; /* total frequency of the words in the batch */ 55 | N32 number; /* "real" number of the batch, used in phrase searches */ 56 | N32 malloced; /* currently malloc-ed word list */ 57 | 58 | N32 not_op; /* boolean 'NOT' operator; */ 59 | 60 | Word w_list; /* list of Word objects */ 61 | 62 | hitcmp* hit_cmp; 63 | 64 | blockMap blockmap; /* Block Map */ 65 | N32 blockmap_l; 66 | N32 blkmapctr; 67 | 68 | Gmap map; 69 | Gmap res; 70 | Gmap stored; 71 | }; 72 | 73 | #ifndef SEARCH_H 74 | #include "search.h" 75 | #endif 76 | 77 | void init_batchObject ( Batch b, N32 n ); 78 | Z32 process_input ( Search s, FILE *f ); 79 | Z32 create_batches ( Search s, FILE *f ); 80 | Z32 process_batch ( Search s, FILE *f, N32 bn ); 81 | void sort_batches ( Batch b, N32 n ); 82 | int batch_sort_function ( const void *v0, const void *v1 ); 83 | void rearrange_batches ( Search s ); 84 | int delete_batch(Batch b); 85 | 86 | #endif /* #ifdef BATCH_H */ 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /libphilo/log.h: -------------------------------------------------------------------------------- 1 | // $Id: log.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef LOG_H 20 | #error "log.h multiply included" 21 | #else 22 | #define LOG_H 23 | 24 | #define L_QUIET 0 25 | #define L_ERROR 1 26 | #define L_INFO 2 27 | 28 | extern void s_log (Z32, Z32, char *, Z8 *); 29 | 30 | #define s_log(state,level,format,message) { \ 31 | if (state == level) \ 32 | fprintf (stderr, "%s\n", (char *)message); \ 33 | } 34 | 35 | #define s_logf(state,level,format,message) { \ 36 | if (state == level) \ 37 | fprintf (stderr, format,message); \ 38 | } 39 | #endif 40 | -------------------------------------------------------------------------------- /libphilo/out.c: -------------------------------------------------------------------------------- 1 | // $Id: out.c,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #include 21 | #include "c.h" 22 | #include "search.h" 23 | #include "out.h" 24 | 25 | Z32 dump_hits_out ( Search s, N32 level, Gmap m ) 26 | { 27 | N32 n = m->gm_eod; 28 | N32 i; 29 | s_logf ( s->debug, L_INFO, "dumping out results; (%d hits on map)", n ); 30 | s_logf ( s->debug, L_INFO, "map position set to %d;", m->gm_c ); 31 | if ( s->hit_def->output == HIT_OUT_ASCII ) { 32 | s_log ( s->debug, L_INFO, NULL, (Z8 *)"(output set to ASCII)" ); 33 | } 34 | for ( i = 0; i < n; i++ ) { 35 | if ( s->depth_r ) { 36 | (void) hit_out ( gm_get_pos(m, i), s->hit_def, level, s->depth_r ); 37 | } 38 | else { 39 | (void) hit_out ( gm_get_pos(m, i), s->hit_def, level, s->depth ); 40 | } 41 | s->n_printed++; 42 | if ( ! ( s->n_printed % 100 ) ) { 43 | fflush( stdout ); 44 | } 45 | else if ( s->n_printed == s->soft_limit ) { 46 | s->batch_limit = DEFAULT_BATCH_LIMIT; 47 | fflush( stdout ); 48 | } 49 | if ( s->n_printed == s->print_limit ) { 50 | fflush( stdout ); 51 | s->exitcode = 111; 52 | return SEARCH_PRINT_LIMIT_REACHED; 53 | } 54 | } 55 | fflush( stdout ); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /libphilo/out.h: -------------------------------------------------------------------------------- 1 | // $Id: out.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #ifndef C_H 21 | #include "c.h" 22 | #endif 23 | 24 | extern Z32 dump_hits_out ( Search, N32, Gmap ); 25 | -------------------------------------------------------------------------------- /libphilo/plugin/Makefile: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.in,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | CC= gcc 3 | PH_CFLAGS = -O3 -I.. 4 | CFLAGS= $(PH_CFLAGS) 5 | 6 | all: libindex.a 7 | 8 | libindex.a: hitdef.o hitcmp_sent.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o 9 | ar ru $@ hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o hitcmp_sent.o 10 | ranlib $@ 11 | 12 | hitdef.o: hitdef.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 13 | # $(CC) -c -o hitdef.o hitdef.c 14 | 15 | hitcmp_cooc.o: hitcmp_cooc.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 16 | # $(CC) -c -o hitcmp_cooc.o hitcmp_cooc.c 17 | 18 | hitcmp_phrase.o: hitcmp_phrase.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 19 | 20 | hitcmp_proxy.o: hitcmp_proxy.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 21 | 22 | hitcmp_sent.o: hitcmp_sent.c hitcmp_sent.h hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 23 | 24 | hitout.o: hitout.c hitcon.h hitout.h hitdef.h hitman.h hitcmp.h 25 | # $(CC) -c -o hitout.o hitout.c 26 | 27 | hitman.o: hitman.c hitcon.h hitman.h hitdef.h hitcmp.h 28 | # $(CC) -c -o hitman.o hitman.c 29 | 30 | hitcrp.o: hitcrp.c hitcon.h hitdef.h hitcmp.h 31 | # $(CC) -c -o hitcrp.o hitcrp.c 32 | 33 | clean: 34 | rm -f *.o *~ *.a core 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /libphilo/plugin/Makefile.in: -------------------------------------------------------------------------------- 1 | # $Id: Makefile.in,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | CC= @CC@ 3 | PH_CFLAGS = -I.. 4 | CFLAGS= @CFLAGS@ $(PH_CFLAGS) 5 | 6 | all: libindex.a 7 | 8 | libindex.a: hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o 9 | ar ru $@ hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o 10 | ranlib $@ 11 | 12 | hitdef.o: hitdef.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 13 | # $(CC) -c -o hitdef.o hitdef.c 14 | 15 | hitcmp_cooc.o: hitcmp_cooc.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 16 | # $(CC) -c -o hitcmp_cooc.o hitcmp_cooc.c 17 | 18 | hitcmp_phrase.o: hitcmp_phrase.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 19 | 20 | hitcmp_proxy.o: hitcmp_proxy.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h 21 | 22 | hitout.o: hitout.c hitcon.h hitout.h hitdef.h hitman.h hitcmp.h 23 | # $(CC) -c -o hitout.o hitout.c 24 | 25 | hitman.o: hitman.c hitcon.h hitman.h hitdef.h hitcmp.h 26 | # $(CC) -c -o hitman.o hitman.c 27 | 28 | hitcrp.o: hitcrp.c hitcon.h hitdef.h hitcmp.h 29 | # $(CC) -c -o hitcrp.o hitcrp.c 30 | 31 | clean: 32 | rm -f *.o *~ *.a core 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /libphilo/plugin/dbplugins.h: -------------------------------------------------------------------------------- 1 | // $Id: dbplugins.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #include "plugin.h" 20 | 21 | extern struct 22 | { 23 | Z8 *dbp_tag; 24 | dbPlugin *dbp; 25 | } 26 | dbPlugins[]; 27 | -------------------------------------------------------------------------------- /libphilo/plugin/hit.h: -------------------------------------------------------------------------------- 1 | // $Id: hit.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HIT_H 20 | #error "hit.h multiply included" 21 | #else 22 | 23 | #define HIT_H 24 | 25 | 26 | #ifndef HITCON_H 27 | #include "hitcon.h" 28 | #endif 29 | 30 | #ifndef HITCMP_H 31 | #include "hitcmp.h" 32 | 33 | #endif 34 | 35 | #ifndef HITDEF_H 36 | #include "hitdef.h" 37 | #endif 38 | 39 | #ifndef HITMAN_H 40 | #include "hitman.h" 41 | #endif 42 | 43 | #ifndef HITOUT_H 44 | #include "hitout.h" 45 | #endif 46 | 47 | #ifndef METHOD_H 48 | #include "method.h" 49 | #endif 50 | 51 | #ifndef PLUGIN_H 52 | #include "plugin.h" 53 | #endif 54 | 55 | extern struct 56 | { 57 | Z8 *dbp_tag; 58 | dbPlugin *dbp; 59 | } 60 | dbPlugins[]; 61 | 62 | #endif 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp.c: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp.c,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HITCMP_H 20 | #error "hitcmp.h multiply included" 21 | #else 22 | #define HITCMP_H 23 | 24 | #ifndef STDIO_H 25 | #include 26 | #include 27 | #endif 28 | 29 | #ifndef C_H 30 | #include "../c.h" 31 | #endif 32 | 33 | #ifndef HITCON_H 34 | #include "hitcon.h" 35 | #endif 36 | 37 | typedef struct hitcmp_st hitcmp; 38 | 39 | #ifndef HITDEF_H 40 | #include "hitdef.h" 41 | #endif 42 | 43 | struct hitcmp_st 44 | { 45 | Z32 (*h2h_cmp_func) (Z32 *, Z32 *, hitdef *, Z32); 46 | Z32 (*h2m_cmp_func) (Z32 *, Z32 *, hitdef *, Z32); 47 | Z32 (*m2m_cmp_func) (Z32 *, Z32 *, hitdef *, Z32); 48 | 49 | Z32 (*h2h_sort_func) (Z32 *, Z32 *, hitdef *, Z32); 50 | Z32 (*h2m_sort_func) (Z32 *, Z32 *, hitdef *, Z32); 51 | 52 | Z32 (*cntxt_cmp_func)(Z32 *, Z32 *, hitdef *, Z32); 53 | Z32 (*h2m_cntxt_cmp_func)(Z32 *, Z32 *, hitdef *, Z32); 54 | 55 | Z32 (*h2m_put_func) (Z32 *, Z32 *, Z32 *, hitdef *, Z32); 56 | Z32 (*hitsize_func) (hitdef *, N8); 57 | 58 | void *config; 59 | void *opt; 60 | 61 | N8 type; 62 | 63 | N8 context; 64 | N8 s_context; 65 | N8 r_context; 66 | 67 | N8 merge; 68 | 69 | N8 distance; 70 | 71 | N8 n_level; 72 | N8 n_real; 73 | 74 | N8 boolean_op; 75 | 76 | }; 77 | 78 | #define HIT_CMP_COOC 1 79 | #define HIT_CMP_PHRASE 2 80 | #define HIT_CMP_PROXY 3 81 | #define HIT_CMP_SENTENCE 4 82 | 83 | #include "hitcmp_cooc.h" 84 | #include "hitcmp_phrase.h" 85 | #include "hitcmp_proxy.h" 86 | #include "hitcmp_sent.h" 87 | 88 | #define HIT_CMP_ARGZ_USAGE "{SEARCH OPTIONS} are: \ 89 | (cooc[:context]|phrase[:distance]|proxy[:distance])" 90 | 91 | #endif 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp_cooc.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp_cooc.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | extern Z8 *get_method_info_cooc (void); 20 | extern Z32 build_search_level_cooc (hitcmp *, Z8 *, Z32); 21 | 22 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp_phrase.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp_phrase.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | extern Z8 *get_method_info_phrase (void); 20 | extern Z32 build_search_level_phrase (hitcmp *, Z8 *, Z32); 21 | 22 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp_proxy.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp_proxy.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | extern Z8 *get_method_info_proxy (void); 21 | extern Z32 build_search_level_proxy (hitcmp *, Z8 *, Z32); 22 | 23 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcmp_sent.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcmp_proxy.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | extern Z8 *get_method_info_sentence (void); 21 | extern Z32 build_search_level_sentence (hitcmp *, Z8 *, Z32); 22 | 23 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcon.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcon.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | /* 21 | this file provides the constants that define the structure of the 22 | TLF v.2 occurence index ("hit") 23 | */ 24 | 25 | 26 | #ifdef HITCON_H 27 | #error "hitcon.h multiply included" 28 | #else 29 | #define HITCON_H 30 | 31 | /* 32 | TLF v.2 occurence indices have a fixed-field structure; 33 | each occurence index has 9 fields; 34 | */ 35 | 36 | #define INDEX_DEF_FIELDS 9 37 | #define FIELDS 9 38 | 39 | /* 40 | The following fields are stored for each occurence: 41 | */ 42 | 43 | #define INDEX_DEF_DOCUMENT 1 /* document number */ 44 | #define INDEX_DEF_P1 2 /* level 1 part number */ 45 | #define INDEX_DEF_P2 3 /* level 2 part number */ 46 | #define INDEX_DEF_P3 4 /* level 3 part number */ 47 | #define INDEX_DEF_PARAGRAPH 5 /* paragraph number */ 48 | #define INDEX_DEF_SENTENCE 6 /* sentence number */ 49 | #define INDEX_DEF_WORD 7 /* word number */ 50 | 51 | #define INDEX_DEF_OFFSET 8 /* byte offset */ 52 | #define INDEX_DEF_PAGE 9 /* page number */ 53 | 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /libphilo/plugin/hitcrp.h: -------------------------------------------------------------------------------- 1 | // $Id: hitcrp.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HITCRP_H 20 | #error "hitcrp.h multiply included" 21 | #else 22 | #define HITCRP_H 23 | 24 | #ifndef STDIO_H 25 | #include 26 | #endif 27 | 28 | #define HIT_CRP_BINARY 0 29 | #define HIT_BIN_ASCII 1 30 | 31 | 32 | extern Z32 hit_crp_args(hitdef *, Z32 *, Z32 *, Z32, Z8 *); 33 | Z32 h2h_cmp_crp ( Z32 *a, Z32 *b, hitdef *hit_def, Z32 level ); 34 | #endif 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /libphilo/plugin/hitdef.c: -------------------------------------------------------------------------------- 1 | // $Id: hitdef.c,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifndef C_H 20 | #include "../c.h" 21 | #endif 22 | 23 | #ifndef HITDEF_H 24 | #include "hitdef.h" 25 | #endif 26 | 27 | #include "searchmethods.h" 28 | 29 | hitdef *new_hitdef ( N32 nlevels ) 30 | { 31 | hitdef *ret; 32 | Z32 i; 33 | 34 | 35 | ret = (hitdef *) malloc (sizeof (hitdef)); 36 | 37 | ret->depth = nlevels; 38 | ret->depth_r = 0; 39 | 40 | ret->fields = INDEX_DEF_FIELDS; 41 | 42 | ret->levels = (hitcmp *) malloc ( nlevels * sizeof(hitcmp) ); 43 | 44 | ret->searchmethods = SearchMethods; 45 | 46 | return ret; 47 | } 48 | 49 | void old_hitdef ( hitdef *hc ) 50 | { 51 | free (hc->levels); 52 | free (hc); 53 | } 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /libphilo/plugin/hitdef.h: -------------------------------------------------------------------------------- 1 | // $Id: hitdef.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HITDEF_H 20 | #error "hitdef.h multiply included" 21 | #else 22 | #define HITDEF_H 23 | 24 | #ifndef C_H 25 | #include "../c.h" 26 | #endif 27 | 28 | #ifndef HITCON_H 29 | #include "hitcon.h" 30 | #endif 31 | 32 | typedef struct hitdef_st hitdef; 33 | 34 | #ifndef HITCMP_H 35 | #include "hitcmp.h" 36 | #endif 37 | 38 | #ifndef METHOD_H 39 | #include "method.h" 40 | #endif 41 | 42 | struct hitdef_st 43 | { 44 | N32 depth; 45 | N32 depth_r; 46 | 47 | N32 fields; 48 | 49 | hitcmp *levels; 50 | N8 output; 51 | 52 | SearchMethodEntry *searchmethods; /* search methods */ 53 | }; 54 | 55 | extern hitdef *new_hitdef ( N32 ); 56 | extern void old_hitdef ( hitdef * ); 57 | 58 | #endif 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /libphilo/plugin/hitman.h: -------------------------------------------------------------------------------- 1 | // $Id: hitman.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HITMAN_H 20 | #error "hitman.h multiply included" 21 | #else 22 | #define HITMAN_H 23 | 24 | extern Z32 hit_put (); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /libphilo/plugin/hitout.h: -------------------------------------------------------------------------------- 1 | // $Id: hitout.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef HITOUT_H 20 | #error "hitout.h multiply included" 21 | #else 22 | #define HITOUT_H 23 | 24 | #ifndef STDIO_H 25 | #include 26 | #endif 27 | 28 | #define HIT_OUT_BINARY 0 29 | #define HIT_OUT_ASCII 1 30 | 31 | #define HIT_OUT_ARGZ_USAGE "{PRINT OPTIONS} are: \ 32 | (a[scii*]|b[inary*])" 33 | 34 | #define hitout_size(c,n) (sizeof(Z16)*(c 21 | dbPlugin artfl = 22 | { 23 | new_hitdef, 24 | get_plugin_info_artfl 25 | }; 26 | 27 | struct 28 | { 29 | Z8 *dbp_tag; 30 | dbPlugin *dbp; 31 | } 32 | dbPlugins[] = 33 | { 34 | { (Z8 *)"artfl2t", &artfl }, 35 | { 0, 0 } 36 | }; 37 | 38 | Z8 *get_plugin_info_artfl () 39 | { 40 | Z8 *ret; 41 | 42 | ret = malloc ( 256* sizeof(Z8) ); 43 | sprintf ( (char *)ret, "This is the builtin (default) plugin for ARTFL textual database v.2t" ); 44 | 45 | return ret; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /libphilo/plugin/plugin.h: -------------------------------------------------------------------------------- 1 | // $Id: plugin.h,v 2.12 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifdef PLUGIN_H 20 | #error "plugin.h multiply included" 21 | #else 22 | #define PLUGIN_H 23 | #ifndef C_H 24 | #include "../c.h" 25 | #endif 26 | 27 | #ifndef HITDEF_H 28 | #include "hitdef.h" 29 | #endif 30 | 31 | typedef struct 32 | { 33 | hitdef *(*create_hitdef)(N32); 34 | Z8 *(*get_plugin_info)(void); 35 | } 36 | dbPlugin; 37 | 38 | extern Z8 *get_plugin_info_artfl(); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /libphilo/plugin/searchmethods.h: -------------------------------------------------------------------------------- 1 | // $Id: searchmethods.h,v 2.11 2004/05/28 19:22:08 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #ifndef METHOD_H 20 | #include "method.h" 21 | #endif 22 | 23 | extern SearchMethodEntry SearchMethods[]; 24 | -------------------------------------------------------------------------------- /libphilo/retreive.h: -------------------------------------------------------------------------------- 1 | // $Id: retreive.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | /* retreive.h: header file for the hit retreival subsystem */ 21 | 22 | #ifdef RETREIVE_H 23 | #error "retreive.h multiply included" 24 | #else 25 | 26 | #define RETREIVE_H 27 | 28 | #ifndef C_H 29 | #include "c.h" 30 | #endif 31 | 32 | #ifndef SEARCH_H 33 | #include "search.h" 34 | #endif 35 | 36 | #define RETR_BLK_OK 0 37 | #define RETR_BLK_CLEAN 1 38 | #define RETR_HITS_CACHED 2 39 | #define RETR_END_OF_MAP 4 40 | #define RETR_RESMAP_FULL 8 41 | #define RETR_BUMMER 16 42 | #define RETR_REACHED_NEXT_BLOCK_BOUNDARY 32 43 | 44 | 45 | extern N32 retreive_hits ( Search s, N level, Gmap, Gmap ); 46 | N32 retreive_hits ( Search s, N level, Gmap map, Gmap res ); 47 | Z32 process_block_unit ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 48 | Z32 process_hit_block ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 49 | Gmap retreive_cached_hits ( Search s, N8 bn, Word w, N32 n, Gmap map, N32 *howmany ); 50 | Gmap retreive_hit_block ( Search s, N8 bn, Word w, N32 n, Gmap map, N32 *howmany ); 51 | Z32 filternload_hits ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap hits, N32 howmany, Gmap res ) ; 52 | Z32 load_hits ( Search s, Gmap hits, N32 howmany, Gmap res ); 53 | Z32 process_single_entry ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 54 | Z32 filter_single_entry ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 55 | Z32 process_hit_block_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 56 | Gmap retreive_hit_block_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res, N32 *howmany ); 57 | Gmap retreive_cached_hits_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res, N32 *howmany ); 58 | Z32 process_single_entry_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res ); 59 | Z32 filternload_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap hits, Z32 howmany, Gmap res ); 60 | Z32 chkstatus_EOM ( Z32 status ); 61 | Z32 chkstatus_CACH ( Z32 status ); 62 | Z32 chkstatus_ERR ( Z32 status ); 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /libphilo/search.h: -------------------------------------------------------------------------------- 1 | // $Id: search.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #ifdef SEARCH_H 21 | #error "search.h multiply included" 22 | #else 23 | #define SEARCH_H 24 | #include "db/db.h" 25 | #ifndef C_H 26 | #include "c.h" 27 | #endif 28 | 29 | #define MAP_INIT_LEN 8192 30 | 31 | #define DEFAULT_BATCH_LIMIT 8192 32 | #define DEFAULT_PRINT_LIMIT 3000 33 | 34 | #define SEARCH_PASS_OK 0 35 | #define SEARCH_BUMMER_OCCURED 1 36 | #define SEARCH_PASS_FINISHED 2 37 | #define SEARCH_BATCH_FINISHED 3 38 | #define SEARCH_BATCH_LIMIT_REACHED 4 39 | #define SEARCH_PRINT_LIMIT_REACHED 5 40 | 41 | #define BAD_ARGZ 1 42 | 43 | #define BAD_ENGINE_ARGZ "badly defined output (-E:) arguments" 44 | #define BAD_SEARCH_ARGZ "badly defined search (-S:) arguments" 45 | #define BAD_CORPUS_ARGZ "badly defined corpus (-C:) arguments" 46 | #define BAD_OUTPUT_ARGZ "badly defined output (-P:) arguments" 47 | #define BAD_PLUGIN_ARGZ "badly defined plugin (-D:) argument" 48 | typedef struct Search *Search, Search_; 49 | 50 | #ifndef BATCH_H 51 | #include "level.h" 52 | #endif 53 | 54 | #ifndef HIT_H 55 | #include "plugin/hit.h" 56 | #endif 57 | 58 | #ifndef LOG_H 59 | #include "log.h" 60 | #endif 61 | 62 | struct Search 63 | { 64 | dbh *db; 65 | N32 depth; 66 | N32 depth_r; 67 | Batch batches; 68 | 69 | hitdef *hit_def; 70 | 71 | 72 | Z32 bn; 73 | 74 | Z32 bincorpus; 75 | Z8 *corpus; 76 | N8 cfactor; 77 | 78 | Gmap map; 79 | 80 | Z32 debug; 81 | 82 | Z32 print_limit; 83 | Z32 n_printed; 84 | Z32 batch_limit; 85 | /* Z32 offset; */ 86 | Z32 soft_limit; 87 | 88 | Z32 exitcode; 89 | Z8 errstr[1024]; 90 | 91 | }; 92 | 93 | extern Search new_searchObject (); 94 | Search new_search( dbh *db, Z8 *method, Z8 *arg, int ascii, int limit, int corpussize, char * corpusfile); 95 | int set_search_method( Search s, Z8 *methodstring, Z8 *argstring); 96 | #endif 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /libphilo/search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from __future__ import absolute_import 3 | import sys,os 4 | from ctypes import * 5 | stdlib=cdll.LoadLibrary("libc.dylib") 6 | 7 | stdin = stdlib.fdopen(sys.stdin.fileno(),"r") 8 | 9 | libphilo = cdll.LoadLibrary("./libphilo.dylib") 10 | 11 | db = libphilo.init_dbh_folder("/var/lib/philologic/databases/PerseusGreekDev/") 12 | 13 | s = libphilo.new_search(db,"phrase",None,1,100000,0,None) 14 | 15 | libphilo.process_input(s,stdin) 16 | 17 | 18 | libphilo.search_pass(s,0) 19 | -------------------------------------------------------------------------------- /libphilo/search4.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "c.h" 7 | #include "search.h" 8 | 9 | int main(int argc, char **argv) { 10 | 11 | int c; 12 | int option_index; 13 | int remaining_argument = 0; 14 | int argcounter = 1; 15 | 16 | char method[256]; 17 | int method_set = 0; 18 | char dbname[256]; 19 | int dbname_set = 0; 20 | char search_arg[256]; 21 | int arg_set = 0; 22 | char *temp_search_arg = NULL; 23 | 24 | int ascii_set = 0; 25 | int corpussize = 1; 26 | char corpusfile[256]; 27 | int corpusfile_set = 0; 28 | int debug = 0; 29 | int limit = 0; 30 | 31 | Search s; 32 | dbh *db; 33 | int status; 34 | 35 | char *usage = "search4 [--ascii --corpussize c --corpusfile f --debug d --limit l] dbname [search method]\n"; 36 | 37 | static struct option long_options[] = 38 | { 39 | {"ascii", no_argument, 0, 'a'}, 40 | {"corpussize", required_argument, 0, 'c'}, 41 | {"corpusfile", required_argument, 0, 'f'}, 42 | {"debug", required_argument, 0, 'd'}, 43 | {"limit", required_argument, 0, 'l'}, 44 | {0,0,0,0} 45 | }; 46 | 47 | while (0 < (c = getopt_long(argc, argv, "ac:d:f:l:", long_options, &option_index) ) ) { 48 | //while we step through all options in argv: 49 | fprintf(stderr,"%s is set. ", long_options[option_index].name); 50 | if (optarg) { 51 | fprintf (stderr," with arg %s", optarg); 52 | } 53 | switch(c) { 54 | case 'a': 55 | ascii_set = 1; 56 | break; 57 | case 'l': 58 | limit = atol(optarg); 59 | break; 60 | case 'c': 61 | corpussize = atoi(optarg); 62 | break; 63 | case 'f': 64 | strncpy(corpusfile,optarg,255); 65 | corpusfile_set = 1; 66 | default: 67 | break; 68 | } 69 | fprintf(stderr,"\n"); 70 | } 71 | 72 | if (!corpusfile_set) { 73 | corpussize = 0; 74 | } 75 | 76 | while (optind < argc) { 77 | if (argcounter == 1) { 78 | strncpy(dbname, argv[optind],256); 79 | fprintf(stderr,"database name is %s\n",dbname); 80 | dbname_set = 1; 81 | } 82 | if (argcounter == 2) { 83 | strncpy(method, argv[optind],256); 84 | // printf("search method is %s\n",method); 85 | method_set = 1; 86 | } 87 | if (argcounter == 3) { 88 | strncpy(search_arg, argv[optind], 256); 89 | // printf("search arg is %s\n",search_arg); 90 | arg_set = 1; 91 | } 92 | optind += 1; 93 | argcounter += 1; 94 | } 95 | 96 | if (!dbname_set) { 97 | printf("%s", usage); 98 | return 1; 99 | } 100 | 101 | db = init_dbh_folder(dbname); 102 | if (!method_set) { 103 | strncpy(method,"phrase",256); 104 | } 105 | if (arg_set) { 106 | temp_search_arg = malloc(sizeof(Z8 *) * 256); 107 | strncpy(temp_search_arg,search_arg,256); 108 | } 109 | s = new_search(db, method, temp_search_arg, ascii_set,limit,corpussize,corpusfile); 110 | status = process_input ( s, stdin ); 111 | if ( status == BATCH_EMPTY ) { 112 | fprintf(stderr,"no hits found.\n"); 113 | return 0; 114 | } 115 | // s->batches->map = s->map; 116 | while ( status = search_pass ( s, 0 ) == SEARCH_PASS_OK ) { 117 | continue; 118 | } 119 | delete_search(s); 120 | delete_dbh(db); 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /libphilo/word.c: -------------------------------------------------------------------------------- 1 | // $Id: word.c,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | #include 20 | 21 | #include "word.h" 22 | #include "db/unpack.h" 23 | #ifndef SEARC_H 24 | #include "search.h" 25 | #endif 26 | 27 | Z32 init_wordObject ( Search s, Z8 *word, Word w, N32 *block_n ) 28 | { 29 | /* 30 | it's already malloc-ed somewhere else; we get long 31 | word lists occasionally and they are malloc-ed in bulk; 32 | */ 33 | 34 | Z32 *tmp; 35 | w->type=0; 36 | w->freq=0; 37 | w->blkcount=0; 38 | w->offset=0; 39 | if (( tmp = hit_lookup (s->db, word, 40 | &(w->type), 41 | &(w->freq), 42 | &(w->blkcount), 43 | &(w->offset))) 44 | 45 | == NULL) 46 | { 47 | /* not found */ 48 | return 0; 49 | } 50 | 51 | w->dir = new_Gmap ( 0, s->hit_def->fields ); 52 | 53 | w->dir->gm_l = w->blkcount; 54 | w->dir->gm_h = tmp; 55 | 56 | w->mapctr = 0; 57 | w->blkproc = 0; 58 | 59 | w->blk_cached = -1; 60 | w->n_cached = 0; 61 | w->cached = NULL; 62 | 63 | *block_n = w->type ? w->blkcount : w->freq; 64 | 65 | return w->freq; 66 | 67 | } 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /libphilo/word.h: -------------------------------------------------------------------------------- 1 | // $Id: word.h,v 2.11 2004/05/28 19:22:06 o Exp $ 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine 3 | // Copyright (C) 2004 University of Chicago 4 | // 5 | // This program is free software; you can redistribute it and/or modify 6 | // it under the terms of the Affero General Public License as published by 7 | // Affero, Inc.; either version 1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This program is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | // Affero General Public License for more details. 14 | // 15 | // You should have received a copy of the Affero General Public License 16 | // along with this program; if not, write to Affero, Inc., 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA. 18 | 19 | 20 | #ifdef WORD_H 21 | #error "word.h multiply included" 22 | #else 23 | 24 | #define WORD_H 25 | 26 | #ifndef C_H 27 | #include "c.h" 28 | #endif 29 | 30 | #ifndef HIT_H 31 | #include "plugin/hit.h" 32 | #endif 33 | 34 | #ifndef GMAP_H 35 | #include "gmap.h" 36 | #endif 37 | 38 | #define W_LENGTH_MAX 1024 39 | #define INITWORDS 1024 40 | 41 | typedef struct Word *Word, Word_; 42 | 43 | struct Word 44 | { 45 | /*hit *dir;*/ 46 | Gmap dir; 47 | 48 | N32 type; 49 | N32 freq; 50 | 51 | N32 blkcount; 52 | N64 offset; 53 | 54 | N32 blkproc; 55 | /* hit hitproc;*/ 56 | 57 | N32 mapctr; 58 | 59 | Z32 blk_cached; /* block from which hits have been cached */ 60 | N32 n_cached; /* number of hits cached */ 61 | Z32 *cached; /* hits cached */ 62 | 63 | }; 64 | 65 | #endif /* #ifdef WORD_H */ 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /python/README: -------------------------------------------------------------------------------- 1 | These are the Python bindings for the PhiloLogic search engine. 2 | 3 | This package is only useful for the TEI Parser which you can import for use 4 | outside of PhiloLogic. 5 | 6 | To get the full functionality of PhiloLogic, you need to install the search core 7 | as well as the web components. See https://github.com/ARTFL-Project/PhiloLogic4 8 | for instructions and further documentation. -------------------------------------------------------------------------------- /python/philologic/__init__.py: -------------------------------------------------------------------------------- 1 | from . import shlax 2 | from . import shlaxtree 3 | from .TagCensus import TagCensus 4 | from .runtime import DB 5 | -------------------------------------------------------------------------------- /python/philologic/loadtime/PhiloLoad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | 6 | from philologic.loadtime.Loader import Loader, setup_db_dir 7 | from philologic.loadtime.LoadOptions import CONFIG_FILE, LoadOptions 8 | 9 | os.environ["LC_ALL"] = "C" # Exceedingly important to get uniform sort order. 10 | os.environ["PYTHONIOENCODING"] = "utf-8" 11 | 12 | 13 | def philoload(): 14 | load_options = LoadOptions() 15 | load_options.parse(sys.argv) 16 | setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete) 17 | 18 | # Database load 19 | l = Loader.set_class_attributes(load_options.values) 20 | l.add_files(load_options.files) 21 | if load_options.bibliography: 22 | load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order) 23 | else: 24 | load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header) 25 | l.set_file_data(load_metadata, l.textdir, l.workdir) 26 | l.parse_files(load_options.cores) 27 | l.merge_objects() 28 | l.analyze() 29 | l.setup_sql_load() 30 | l.post_processing() 31 | l.finish() 32 | if l.deleted_files: 33 | print( 34 | "The following files where not loaded due to invalid data in the header:\n{}".format( 35 | "\n".join(l.deleted_files) 36 | ) 37 | ) 38 | 39 | print(f"Application viewable at {os.path.join(CONFIG_FILE.url_root, load_options.dbname)}\n") 40 | -------------------------------------------------------------------------------- /python/philologic/loadtime/__init__.py: -------------------------------------------------------------------------------- 1 | from philologic.loadtime.Parser import XMLParser 2 | from philologic.loadtime.PhiloLoad import philoload 3 | -------------------------------------------------------------------------------- /python/philologic/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | """Runtime exports""" 2 | 3 | from philologic.runtime.access_control import check_access, login_access 4 | from philologic.runtime.find_similar_words import find_similar_words 5 | from philologic.runtime.FragmentParser import FragmentParser 6 | from philologic.runtime.get_text import get_concordance_text, get_tei_header 7 | from philologic.runtime.pages import page_interval 8 | from philologic.runtime.Query import parse_query 9 | from philologic.runtime.reports import ( 10 | bibliography_results, 11 | collocation_results, 12 | concordance_results, 13 | filter_words_by_property, 14 | frequency_results, 15 | generate_text_object, 16 | generate_time_series, 17 | generate_toc_object, 18 | generate_word_frequency, 19 | get_start_end_date, 20 | kwic_hit_object, 21 | kwic_results, 22 | group_by_metadata, 23 | group_by_range, 24 | landing_page_bibliography, 25 | aggregation_by_field, 26 | ) 27 | from philologic.runtime.web_config import WebConfig 28 | from philologic.runtime.WSGIHandler import WSGIHandler 29 | -------------------------------------------------------------------------------- /python/philologic/runtime/collocation_scores.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Compute collocation scores""" 3 | 4 | 5 | from math import log 6 | 7 | 8 | def pointwise_mutual_information(total_word_count, collocate_count, collocate, cursor): 9 | """Calculate Pointwise Mutual Information.""" 10 | if collocate_count < 5: 11 | return 0 12 | query = """select count(*) from words where philo_name='%s'""" % collocate 13 | cursor.execute(query) 14 | total_collocate_count = cursor.fetchone()[0] 15 | score = log(collocate_count / total_word_count * total_collocate_count) 16 | return score 17 | -------------------------------------------------------------------------------- /python/philologic/runtime/find_similar_words.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Find similar words to query term.""" 3 | 4 | 5 | import hashlib 6 | import os 7 | 8 | from Levenshtein import ratio 9 | from philologic.runtime.Query import get_expanded_query 10 | from unidecode import unidecode 11 | 12 | 13 | def get_all_words(db, request): 14 | """Expand query to all search terms.""" 15 | words = request["q"].replace('"', "") 16 | hits = db.query(words) 17 | hits.finish() 18 | expanded_terms = get_expanded_query(hits) 19 | if expanded_terms: 20 | word_groups = [] 21 | for word_group in expanded_terms: 22 | normalized_group = [] 23 | for word in word_group: 24 | word = word.replace('"', "") 25 | if db.locals.ascii_conversion is True: 26 | word = unidecode(word) 27 | normalized_group.append(word) 28 | word_groups.append(normalized_group) 29 | return word_groups 30 | return [words.split()] 31 | 32 | 33 | def find_similar_words(db, config, request): 34 | """Edit distance function.""" 35 | # Check if lookup is cached 36 | hashed_query = hashlib.sha256() 37 | hashed_query.update(request["q"].encode("utf8")) 38 | hashed_query.update(str(request.approximate_ratio).encode("utf8")) 39 | approximate_filename = os.path.join(config.db_path, f"data/hitlists/{hashed_query.hexdigest()}.approximate_terms") 40 | if os.path.isfile(approximate_filename): 41 | with open(approximate_filename, encoding="utf8") as fh: 42 | approximate_terms = fh.read().strip() 43 | return approximate_terms 44 | query_groups = get_all_words(db, request) 45 | file_path = os.path.join(config.db_path, "data/frequencies/normalized_word_frequencies") 46 | new_query_groups = [set([]) for i in query_groups] 47 | with open(file_path, encoding="utf8") as fh: 48 | for line in fh: 49 | line = line.strip() 50 | try: 51 | normalized_word, regular_word = line.split("\t") 52 | for pos, query_group in enumerate(query_groups): 53 | for query_word in query_group: 54 | if ratio(query_word, normalized_word) >= float(request.approximate_ratio): 55 | new_query_groups[pos].add(f'"{regular_word}"') 56 | except ValueError: 57 | pass 58 | new_query_groups = " ".join([" | ".join(group) for group in new_query_groups]) 59 | with open(approximate_filename, "w", encoding="utf8") as cached_file: 60 | cached_file.write(new_query_groups) 61 | return new_query_groups 62 | -------------------------------------------------------------------------------- /python/philologic/runtime/link.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Build PhiloLogic links""" 3 | 4 | from urllib.parse import quote_plus 5 | 6 | 7 | def url_encode(q_params): 8 | """URL encode.""" 9 | encoded_str = [] 10 | for k, v in q_params: 11 | if v: 12 | if isinstance(v, list): 13 | for s in v: 14 | encoded_str.append(f'{quote_plus(k, safe="/")}={quote_plus(s, safe="/")}') 15 | else: 16 | encoded_str.append(f'{quote_plus(k, safe="/")}={quote_plus(v, safe="/")}') 17 | else: # Value is None 18 | encoded_str.append(f'{quote_plus(k, safe="/")}=') 19 | return "&".join(encoded_str) 20 | 21 | 22 | def make_object_link(philo_id, hit_bytes): 23 | """Takes a valid PhiloLogic object, and returns a relative URL representation of such.""" 24 | href = f'./{"/".join(map(str, philo_id))}{byte_query(hit_bytes)}' 25 | return href 26 | 27 | 28 | def make_absolute_object_link(config, philo_id, byte_offsets=None): 29 | """Takes a valid PhiloLogic object, and returns an absolute URL representation of such.""" 30 | href = f"/navigate/{'/'.join(map(str, philo_id))}" 31 | if byte_offsets is not None: 32 | href += byte_query(byte_offsets) 33 | return href 34 | 35 | 36 | def make_absolute_query_link(config, params, script_name="/query", **extra_params): 37 | """Takes a dictionary of query parameters as produced by WSGIHandler, 38 | and returns an absolute URL representation of such.""" 39 | params = dict([i for i in params]) 40 | for k, v in extra_params.items(): 41 | params[k] = v 42 | query_string = url_encode(list(params.items())) 43 | if script_name: 44 | return f"{script_name}?{query_string}" 45 | return query_string 46 | 47 | 48 | def byte_query(hit_bytes): 49 | """This is used for navigating concordance results and highlighting hits""" 50 | return f'?{"&".join([f"byte={byte}" for byte in hit_bytes])}' 51 | 52 | 53 | def make_byte_range_link(config, philo_id, start_byte, end_byte): 54 | """Return an absolute link with byte range to highlight""" 55 | href = make_absolute_object_link(config, philo_id.split()) 56 | href += f"?start_byte={start_byte}&end_byte={end_byte}" 57 | return href 58 | 59 | 60 | def byte_range_to_link(db, config, request, obj_level="div1"): 61 | """Find container objects for given byte range and doc id and return links""" 62 | cursor = db.dbh.cursor() 63 | cursor.execute("SELECT philo_id FROM toms WHERE filename=?", (request.filename,)) 64 | doc_id = cursor.fetchone()[0].split()[0] 65 | next_doc_id = str(int(doc_id) + 1) 66 | cursor.execute("SELECT rowid FROM toms WHERE philo_doc_id=?", (next_doc_id,)) 67 | rowid = cursor.fetchone()[0] 68 | cursor.execute( 69 | f"SELECT philo_id FROM toms WHERE rowid < {rowid} and philo_type='{obj_level}' AND philo_id like '{doc_id} %' AND cast(start_byte as decimal) <= {request.start_byte} ORDER BY rowid desc" 70 | ) 71 | philo_id = cursor.fetchone()[0] 72 | philo_id = philo_id.split() 73 | while int(philo_id[-1]) == 0: 74 | philo_id.pop() 75 | link = make_byte_range_link(config, " ".join(philo_id), request.start_byte, request.end_byte) 76 | return link 77 | -------------------------------------------------------------------------------- /python/philologic/runtime/pages.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Page intervals""" 3 | 4 | 5 | def page_interval(num, results, start, end): 6 | """Return page intervals""" 7 | start = int(start) 8 | end = int(end) 9 | num = int(num) 10 | if start <= 0: 11 | start = 1 12 | if end <= 0: 13 | end = start + (num - 1) 14 | results_len = len(results) 15 | if end > results_len and results.done: 16 | end = results_len 17 | n = start - 1 18 | return start, end, n 19 | -------------------------------------------------------------------------------- /python/philologic/runtime/reports/__init__.py: -------------------------------------------------------------------------------- 1 | """Report exports""" 2 | from philologic.runtime.reports.concordance import concordance_results 3 | from philologic.runtime.reports.bibliography import bibliography_results 4 | from philologic.runtime.reports.time_series import generate_time_series, get_start_end_date 5 | from philologic.runtime.reports.navigation import generate_text_object 6 | from philologic.runtime.reports.table_of_contents import generate_toc_object 7 | from philologic.runtime.reports.kwic import kwic_results, kwic_hit_object 8 | from philologic.runtime.reports.generate_word_frequency import generate_word_frequency 9 | from philologic.runtime.reports.frequency import frequency_results 10 | from philologic.runtime.reports.collocation import collocation_results 11 | from philologic.runtime.reports.filter_word_by_property import filter_words_by_property 12 | from philologic.runtime.reports.landing_page import landing_page_bibliography, group_by_range, group_by_metadata 13 | from philologic.runtime.reports.aggregation import aggregation_by_field 14 | -------------------------------------------------------------------------------- /python/philologic/runtime/reports/concordance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Concordance report""" 3 | 4 | import regex as re 5 | from philologic.runtime.pages import page_interval 6 | from philologic.runtime.citations import citations, citation_links 7 | from philologic.runtime.get_text import get_concordance_text 8 | from philologic.runtime.DB import DB 9 | from philologic.runtime.HitList import CombinedHitlist 10 | 11 | 12 | def concordance_results(request, config): 13 | """Fetch concordances results.""" 14 | db = DB(config.db_path + "/data/") 15 | if request.collocation_type: 16 | first_hits = db.query( 17 | request["q"], 18 | request["method"], 19 | request["arg"], 20 | **request.metadata, 21 | ) 22 | second_hits = db.query( 23 | request["left"], 24 | request["method"], 25 | request["arg"], 26 | **request.metadata, 27 | ) 28 | hits = CombinedHitlist(first_hits, second_hits) 29 | else: 30 | hits = db.query( 31 | request["q"], 32 | request["method"], 33 | request["arg"], 34 | sort_order=request["sort_order"], 35 | **request.metadata, 36 | ) 37 | start, end, _ = page_interval(request["results_per_page"], hits, request.start, request.end) 38 | 39 | concordance_object = { 40 | "description": {"start": start, "end": end, "results_per_page": request.results_per_page}, 41 | "query": dict([i for i in request]), 42 | "default_object": db.locals["default_object_level"], 43 | } 44 | 45 | formatting_regexes = [] 46 | if config.concordance_formatting_regex: 47 | for pattern, replacement in config.concordance_formatting_regex: 48 | compiled_regex = re.compile(rf"{pattern}") 49 | formatting_regexes.append((compiled_regex, replacement)) 50 | results = [] 51 | for hit in hits[start - 1 : end]: 52 | citation_hrefs = citation_links(db, config, hit) 53 | metadata_fields = {metadata: hit[metadata] for metadata in db.locals["metadata_fields"]} 54 | citation = citations(hit, citation_hrefs, config, report="concordance") 55 | context = get_concordance_text(db, hit, config.db_path, config.concordance_length) 56 | if formatting_regexes: 57 | for formatting_regex, replacement in formatting_regexes: 58 | context = formatting_regex.sub(rf"{replacement}", context) 59 | result_obj = { 60 | "philo_id": hit.philo_id, 61 | "citation": citation, 62 | "citation_links": citation_hrefs, 63 | "context": context, 64 | "metadata_fields": metadata_fields, 65 | "bytes": hit.bytes, 66 | } 67 | results.append(result_obj) 68 | concordance_object["results"] = results 69 | concordance_object["results_length"] = len(hits) 70 | concordance_object["query_done"] = hits.done 71 | return concordance_object 72 | -------------------------------------------------------------------------------- /python/philologic/runtime/reports/filter_word_by_property.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Filter word by property 3 | Currently unmaintained""" 4 | 5 | from philologic.runtime.citations import citation_links, citations 6 | from philologic.runtime.get_text import get_concordance_text 7 | from philologic.runtime.reports.generate_word_frequency import get_word_attrib 8 | from philologic.runtime.DB import DB 9 | 10 | 11 | def filter_words_by_property(request, config): 12 | """Filter words by property""" 13 | db = DB(config.db_path + "/data/") 14 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 15 | concordance_object = {"query": dict([i for i in request])} 16 | 17 | # Do these need to be captured in wsgi_handler? 18 | word_property = request["word_property"] 19 | word_property_value = request["word_property_value"] 20 | word_property_total = request["word_property_total"] 21 | 22 | new_hitlist = [] 23 | results = [] 24 | position = 0 25 | more_pages = False 26 | 27 | if request.start == 0: 28 | start = 1 29 | else: 30 | start = request.start 31 | 32 | for hit in hits: 33 | # get my chunk of text 34 | hit_val = get_word_attrib(hit, word_property, db) 35 | 36 | if hit_val == word_property_value: 37 | position += 1 38 | if position < start: 39 | continue 40 | new_hitlist.append(hit) 41 | citation_hrefs = citation_links(db, config, hit) 42 | metadata_fields = {} 43 | for metadata in db.locals["metadata_fields"]: 44 | metadata_fields[metadata] = hit[metadata] 45 | citation = citations(hit, citation_hrefs, config) 46 | context = get_concordance_text(db, hit, config.db_path, config.concordance_length) 47 | result_obj = { 48 | "philo_id": hit.philo_id, 49 | "citation": citation, 50 | "citation_links": citation_hrefs, 51 | "context": context, 52 | "metadata_fields": metadata_fields, 53 | "bytes": hit.bytes, 54 | "collocate_count": 1, 55 | } 56 | results.append(result_obj) 57 | 58 | if len(new_hitlist) == (request.results_per_page): 59 | more_pages = True 60 | break 61 | 62 | end = start + len(results) - 1 63 | if len(results) < request.results_per_page: 64 | word_property_total = end 65 | else: 66 | word_property_total = end + 1 67 | concordance_object["results"] = results 68 | concordance_object["query_done"] = hits.done 69 | concordance_object["results_length"] = word_property_total 70 | concordance_object["description"] = { 71 | "start": start, 72 | "end": end, 73 | "results_per_page": request.results_per_page, 74 | "more_pages": more_pages, 75 | } 76 | return concordance_object 77 | -------------------------------------------------------------------------------- /python/philologic/runtime/reports/generate_word_frequency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Generate word frequency 3 | Currently unmaintained.""" 4 | 5 | import timeit 6 | 7 | from philologic.runtime.link import make_absolute_query_link 8 | from philologic.runtime.DB import DB 9 | 10 | 11 | def generate_word_frequency(request, config): 12 | """reads through a hitlist. looks up request["field"] in each hit, and builds up a list of 13 | unique values and their frequencies.""" 14 | db = DB(config.db_path + "/data/") 15 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 16 | field = request["field"] 17 | counts = {} 18 | frequency_object = {} 19 | start_time = timeit.default_timer() 20 | last_hit_done = request.start 21 | try: 22 | for n in hits[request.start :]: 23 | key = get_word_attrib(n, field, db) 24 | if not key: 25 | # NULL is a magic value for queries, don't change it 26 | # recklessly. 27 | key = "NULL" 28 | if key not in counts: 29 | counts[key] = 0 30 | counts[key] += 1 31 | elapsed = timeit.default_timer() - start_time 32 | last_hit_done += 1 33 | if elapsed > 5: 34 | break 35 | 36 | table = {} 37 | for k, v in counts.items(): 38 | url = make_absolute_query_link( 39 | config, 40 | request, 41 | start="0", 42 | end="0", 43 | report="word_property_filter", 44 | word_property=field, 45 | word_property_value=k, 46 | ) 47 | table[k] = {"count": v, "url": url} 48 | 49 | frequency_object["results"] = table 50 | frequency_object["hits_done"] = last_hit_done 51 | if last_hit_done == len(hits): 52 | frequency_object["more_results"] = False 53 | else: 54 | frequency_object["more_results"] = True 55 | 56 | except IndexError: 57 | frequency_object["results"] = {} 58 | frequency_object["more_results"] = False 59 | 60 | frequency_object["results_length"] = len(hits) 61 | frequency_object["query"] = dict([i for i in request]) 62 | 63 | return frequency_object 64 | 65 | 66 | def get_word_attrib(n, field, db): 67 | """Get word attribute""" 68 | words = n.words 69 | key = field 70 | if key == "token": 71 | key = "philo_name" 72 | if key == "morph": 73 | key = "pos" 74 | val = "" 75 | for word in words: 76 | word_obj = word 77 | if val: 78 | val += "_" 79 | if word_obj[key]: 80 | val += word_obj[key] 81 | else: 82 | val += "NULL" 83 | 84 | if isinstance(val, str): 85 | return val.encode("utf-8") 86 | return val 87 | -------------------------------------------------------------------------------- /python/philologic/runtime/reports/navigation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Navigate inside objects""" 3 | 4 | import regex as re 5 | from philologic.runtime.citations import citation_links, citations 6 | from philologic.runtime.get_text import get_text_obj 7 | from philologic.runtime.DB import DB 8 | 9 | 10 | def generate_text_object(request, config, note=False): 11 | """Return text object given an philo_id""" 12 | # verify this isn't a page ID or if this is a note 13 | if len(request.philo_id.split()) == 9 and note is not True: 14 | width = 9 15 | else: 16 | width = 7 17 | db = DB(config.db_path + "/data/", width=width) 18 | if note: 19 | target = request.target.replace("#", "") 20 | doc_id = request.philo_id.split()[0] + " %" 21 | cursor = db.dbh.cursor() 22 | cursor.execute("select philo_id from toms where id=? and philo_id like ? limit 1", (target, doc_id)) 23 | philo_id = cursor.fetchone()["philo_id"].split()[:7] 24 | obj = db[philo_id] 25 | else: 26 | try: 27 | obj = db[request.philo_id] 28 | except ValueError: 29 | obj = db[" ".join(request.path_components)] 30 | philo_id = obj.philo_id 31 | if width != 9: 32 | while obj["philo_name"] == "__philo_virtual" and obj["philo_type"] != "div1": 33 | philo_id.pop() 34 | obj = db[philo_id] 35 | philo_id = list(obj.philo_id) 36 | while int(philo_id[-1]) == 0: 37 | philo_id.pop() 38 | text_object = {"query": dict([i for i in request]), "philo_id": " ".join([str(i) for i in philo_id])} 39 | text_object["prev"] = neighboring_object_id(db, obj.prev, width) 40 | text_object["next"] = neighboring_object_id(db, obj.next, width) 41 | metadata_fields = {} 42 | for metadata in db.locals["metadata_fields"]: 43 | metadata_fields[metadata] = obj[metadata] 44 | text_object["metadata_fields"] = metadata_fields 45 | if width != 9: 46 | citation_hrefs = citation_links(db, config, obj) 47 | citation = citations(obj, citation_hrefs, config, report="navigation") 48 | else: 49 | db = DB(config.db_path + "/data/", width=7) 50 | doc_obj = db[obj.philo_id[0]] 51 | citation_hrefs = citation_links(db, config, doc_obj) 52 | citation = citations(doc_obj, citation_hrefs, config, report="navigation") 53 | text_object["citation"] = citation 54 | text, imgs = get_text_obj(obj, config, request, db.locals["token_regex"], note=note) 55 | if config.navigation_formatting_regex: 56 | for pattern, replacement in config.navigation_formatting_regex: 57 | text = re.sub(r"%s" % pattern, "%s" % replacement, text) 58 | text_object["text"] = text 59 | text_object["imgs"] = imgs 60 | return text_object 61 | 62 | 63 | def neighboring_object_id(db, philo_id, width): 64 | """Get neighboring object ID""" 65 | if not philo_id: 66 | return "" 67 | philo_id = philo_id.split()[:width] 68 | while philo_id[-1] == "0": 69 | philo_id.pop() 70 | philo_id = str(" ".join(philo_id)) 71 | obj = db[philo_id] 72 | if obj["philo_name"] == "__philo_virtual" and obj["philo_type"] != "div1": 73 | # Remove the last number (1) in the philo_id and point to one object 74 | # level lower 75 | philo_id = " ".join(philo_id.split()[:-1]) 76 | return philo_id 77 | -------------------------------------------------------------------------------- /python/philologic/runtime/web_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | from philologic.Config import MakeWebConfig 6 | from orjson import dumps 7 | 8 | 9 | class brokenConfig(object): 10 | """Broken config returned with some default values""" 11 | 12 | def __init__(self, db_path, traceback): 13 | self.web_config_path = db_path + "/data/web_config.cfg" 14 | self.valid_config = False 15 | self.traceback = traceback 16 | self.db_path = db_path 17 | 18 | def __getitem__(self, _): 19 | return "" 20 | 21 | def to_json(self): 22 | """Return JSON representation of config""" 23 | return dumps({"valid_config": False, "traceback": self.traceback, "web_config_path": self.web_config_path}) 24 | 25 | 26 | def WebConfig(db_path): 27 | """Build runtime web config object""" 28 | try: 29 | return MakeWebConfig(db_path + "/data/web_config.cfg") 30 | except Exception as err: 31 | print(err, file=sys.stderr) 32 | return brokenConfig(db_path, str(err)) 33 | -------------------------------------------------------------------------------- /python/philologic/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .pretty_print import pretty_print 2 | from .sort import sort_list 3 | from .convert_entities import convert_entities 4 | from .load_module import load_module 5 | from .metadata_type_handler import extract_full_date, extract_integer 6 | -------------------------------------------------------------------------------- /python/philologic/utils/convert_entities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | from html.entities import name2codepoint 5 | import regex as re 6 | 7 | entities_match = re.compile(r"&#?\w+;") 8 | 9 | 10 | def convert_entities(text): 11 | """Convert entities""" 12 | 13 | def fixup(m): 14 | text = m.group(0) 15 | if text[:2] == "&#": 16 | # character reference 17 | try: 18 | if text[:3] == "&#x": 19 | return chr(int(text[3:-1], 16)) 20 | else: 21 | return chr(int(text[2:-1])) 22 | except ValueError: 23 | pass 24 | else: 25 | # named entity 26 | try: 27 | text = chr(name2codepoint[text[1:-1]]) 28 | except KeyError: 29 | pass 30 | return text # leave as is 31 | 32 | return entities_match.sub(fixup, text) 33 | -------------------------------------------------------------------------------- /python/philologic/utils/load_module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Load Python source file""" 3 | 4 | from importlib.machinery import SourceFileLoader 5 | from importlib.util import spec_from_loader, module_from_spec 6 | 7 | 8 | def load_module(module_name, path): 9 | """Load arbitrary Python source file""" 10 | loader = SourceFileLoader(module_name, path) 11 | spec = spec_from_loader(loader.name, loader) 12 | module = module_from_spec(spec) 13 | loader.exec_module(module) 14 | return module 15 | -------------------------------------------------------------------------------- /python/philologic/utils/metadata_type_handler.py: -------------------------------------------------------------------------------- 1 | """Series of functions to extract and/or convert metadata field values to the right SQL type""" 2 | 3 | import datetime 4 | import regex as re 5 | 6 | INTEGER = re.compile(r"^(-?\d{1,})") 7 | YEAR_MONTH_DAY = re.compile(r"(\d+)-(\d+)-(\d+)") 8 | YEAR_MONTH = re.compile(r"^(\d+)-(\d+)\Z") 9 | YEAR = re.compile(r"^(\d+)\Z") 10 | MONTH_MAX_DAY = {1: 31, 2: 29, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31} 11 | 12 | 13 | def day_fail_safe(day, month=None): 14 | """Make sure we have a valid day""" 15 | if month is not None and month != 0: 16 | if day > MONTH_MAX_DAY[month]: 17 | day = 1 18 | if day > 31 or day <= 0: 19 | day = 1 20 | return day 21 | 22 | 23 | def month_fail_safe(month): 24 | """Make sure we have a valid month""" 25 | if month > 12 or month < 1: 26 | month = 1 27 | return month 28 | 29 | 30 | def extract_full_date(date): 31 | """Extract full dates and format as year-month-day""" 32 | full_date_match = re.search(r"^(\d+)-(\d+)-(\d+)", date) 33 | if full_date_match: # e.g. 1987-10-23 34 | year, month, day = map(int, full_date_match.groups()) 35 | month = month_fail_safe(month) 36 | day = day_fail_safe(day, month) 37 | return datetime.date(year, month, day) 38 | month_year_match = re.search(r"^(\d+)-(\d+)$", date) 39 | if month_year_match: # e.g. 1987-10 40 | year, month = map(int, month_year_match.groups()) 41 | month = month_fail_safe(month) 42 | return datetime.date(year, month, 1) 43 | year_match = re.search(r"^(\d+)$", date) 44 | if year_match: # e.g. 1987 45 | year_str = year_match.groups()[0] 46 | if len(year_str) > 4: 47 | year_str = year_str[:4] 48 | year = int(year_str) 49 | return datetime.date(year, 1, 1) 50 | return datetime.date(9999, 12, 31) 51 | 52 | 53 | def extract_integer(field_value): 54 | """Extract integer from field value and return a Python int""" 55 | integer = INTEGER.search(field_value) 56 | if integer is not None: 57 | return int(integer.group()) 58 | else: 59 | return None 60 | -------------------------------------------------------------------------------- /python/philologic/utils/pretty_print.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | def pretty_print(value, htchar="\t", lfchar="\n", indent=0): 5 | """Pretty printing from a Stack Overflow answer: 6 | http://stackoverflow.com/questions/3229419/pretty-printing-nested-dictionaries-in-python#answer-26209900.""" 7 | nlch = lfchar + htchar * (indent + 1) 8 | if type(value) is dict: 9 | if value: 10 | items = [nlch + repr(key) + ": " + pretty_print(value[key], htchar, lfchar, indent + 1) for key in value] 11 | return "{%s}" % (",".join(items) + lfchar + htchar * indent) 12 | else: 13 | return "{}" 14 | elif type(value) is list: 15 | if value: 16 | items = [nlch + pretty_print(item, htchar, lfchar, indent + 1) for item in value] 17 | return "[%s]" % (",".join(items) + lfchar + htchar * indent) 18 | else: 19 | return "[]" 20 | elif type(value) is tuple: 21 | if value: 22 | items = [nlch + pretty_print(item, htchar, lfchar, indent + 1) for item in value] 23 | return "(%s)" % (",".join(items) + lfchar + htchar * indent) 24 | else: 25 | return "()" 26 | else: 27 | return repr(value) 28 | -------------------------------------------------------------------------------- /python/philologic/utils/sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | 5 | from natsort import natsorted 6 | from unidecode import unidecode 7 | 8 | 9 | def get_key(d, f): 10 | key = d.get(f, "ZZZZZ") 11 | if isinstance(key, datetime.date): 12 | return f"{key.year}-{key.month}-{key.day}" 13 | elif isinstance(key, int): 14 | return key 15 | else: 16 | return unidecode(key) 17 | 18 | 19 | def sort_list(list_to_sort, sort_keys): 20 | """Sort strings converted to ascii""" 21 | 22 | def make_sort_key(d): 23 | key = [get_key(d, f) for f in sort_keys] 24 | return key 25 | 26 | return natsorted(list_to_sort, key=make_sort_key, reverse=False) 27 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [build-system] 3 | requires = ["setuptools>=61.0", "wheel", "build"] 4 | build-backend = "setuptools.build_meta" 5 | 6 | [project] 7 | name = "philologic" 8 | version = "4.7.5.0" 9 | authors = [ 10 | { name = "Clovis Gladstone", email = "clovisgladstone@artfl.uchicago.edu" }, 11 | ] 12 | license = { file = "LICENSE" } 13 | description = "A concordance search engine for TEI-XML" 14 | readme = "README" 15 | urls = { Homepage = "https://github.com/ARTFL-Project/PhiloLogic4" } 16 | 17 | requires-python = ">=3.10" 18 | dependencies = [ 19 | "regex", 20 | "lxml", 21 | "python-levenshtein", 22 | "natsort", 23 | "multiprocess", 24 | "tqdm", 25 | "orjson", 26 | "black", 27 | "msgpack", 28 | "unidecode", 29 | "lz4", 30 | ] 31 | 32 | [project.scripts] 33 | philoload4 = "philologic.loadtime:philoload" 34 | 35 | 36 | [tool.setuptools] 37 | packages = [ 38 | "philologic", 39 | "philologic.runtime", 40 | "philologic.utils", 41 | "philologic.runtime.reports", 42 | "philologic.loadtime", 43 | ] 44 | -------------------------------------------------------------------------------- /www/.htaccess: -------------------------------------------------------------------------------- 1 | Options +ExecCGI 2 | Options -Indexes 3 | AddHandler cgi-script py 4 | DirectoryIndex dispatcher.py 5 | 6 | RewriteEngine on 7 | RewriteRule ^assets/(.*) app/dist/assets/$1 [L] 8 | RewriteRule ^img/(.*) app/dist/img/$1 [L] 9 | RewriteRule ^concordance(.*) dispatcher.py$1 [PT,QSA] 10 | RewriteRule ^kwic(.*) dispatcher.py$1 [PT,QSA] 11 | RewriteRule ^collocation(.*) dispatcher.py$1 [PT,QSA] 12 | RewriteRule ^time_series(.*) dispatcher.py$1 [PT,QSA] 13 | RewriteRule ^query?report=(.*) dispatcher.py$1 [PT,QSA] 14 | RewriteRule ^navigate(.*) dispatcher.py$1 [PT,QSA] 15 | RewriteRule ^aggregation(.*) dispatcher.py$1 [PT,QSA] 16 | RewriteRule ^bibliography(.*) dispatcher.py$1 [PT,QSA] 17 | 18 | 19 | 20 | AddOutputFilterByType BROTLI_COMPRESS text/html text/plain text/xml text/css text/javascript application/javascript application/xhtml+xml 21 | -------------------------------------------------------------------------------- /www/app/.env: -------------------------------------------------------------------------------- 1 | VUE_APP_I18N_LOCALE=en 2 | VUE_APP_I18N_FALLBACK_LOCALE=en 3 | -------------------------------------------------------------------------------- /www/app/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /dist 4 | 5 | # local env files 6 | .env.local 7 | .env.*.local 8 | 9 | # Log files 10 | npm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | 14 | # Editor directories and files 15 | .idea 16 | .vscode 17 | *.suo 18 | *.ntvs* 19 | *.njsproj 20 | *.sln 21 | *.sw* 22 | -------------------------------------------------------------------------------- /www/app/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 14 | 15 | PhiloLogic4 16 | 17 | 18 | 19 | 25 |
26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /www/app/misconfiguration.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | PhiloLogic4 misconfiguration 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | $CSS 21 | 42 | 43 | 44 | 45 | 46 |
47 |

PhiloLogic configuration error!

48 |
49 | A syntax error was detected in the $CONFIG_FILE configuration file. The following traceback should help you identify the issue: 50 |
51 |
$TRACEBACK
52 |
53 | 54 | 55 | -------------------------------------------------------------------------------- /www/app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "philologic", 3 | "version": "4.7", 4 | "private": true, 5 | "scripts": { 6 | "serve": "npm i && vite preview", 7 | "build": "npm i && vite build && rm -rf node_modules", 8 | "host": "npm i && vite --host" 9 | }, 10 | "dependencies": { 11 | "@intlify/unplugin-vue-i18n": "^0.8.2", 12 | "@popperjs/core": "^2.9.2", 13 | "axios": "^0.21.0", 14 | "bootstrap": "^5.0.1", 15 | "chart.js": "^2.9.3", 16 | "core-js": "^3.13.1", 17 | "glightbox": "^3.2.0", 18 | "gsap": "^3.9.1", 19 | "vue": "^3.2.0", 20 | "vue-i18n": "^9.2.2", 21 | "vue-router": "^4.0.0", 22 | "vue-scrollto": "^2.17.1", 23 | "vuex": "^4.0.0", 24 | "vuex-map-fields": "^1.4.0" 25 | }, 26 | "devDependencies": { 27 | "@originjs/vite-plugin-commonjs": "^1.0.3", 28 | "@vitejs/plugin-vue": "^4.0.0", 29 | "@vue/compiler-sfc": "^3.1.0", 30 | "eslint": "^7.5.0", 31 | "eslint-plugin-vue": "^8.5.0", 32 | "sass": "^1.49.9", 33 | "vite": "^4.1.1" 34 | }, 35 | "eslintConfig": { 36 | "root": true, 37 | "env": { 38 | "node": true 39 | }, 40 | "extends": [ 41 | "plugin:vue/essential", 42 | "eslint:recommended" 43 | ], 44 | "rules": { 45 | "no-console": "off" 46 | }, 47 | "parserOptions": { 48 | "ecmaVersion": 12, 49 | "sourceType": "module" 50 | } 51 | }, 52 | "browserslist": [ 53 | "> 1%", 54 | "last 2 versions" 55 | ] 56 | } -------------------------------------------------------------------------------- /www/app/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/public/favicon.ico -------------------------------------------------------------------------------- /www/app/src/assets/language.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/language.png -------------------------------------------------------------------------------- /www/app/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/logo.png -------------------------------------------------------------------------------- /www/app/src/assets/philo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/philo.png -------------------------------------------------------------------------------- /www/app/src/assets/styles/theme.module.scss: -------------------------------------------------------------------------------- 1 | // Custom Bootstrap changes: don't edit 2 | $popover-max-width: 50%; 3 | .custom-popover { 4 | overflow: auto; 5 | text-align: justify !important; 6 | max-height: 60%; 7 | } 8 | 9 | // Theme colors 10 | $header-color: rgb(245, 219, 157); 11 | $button-color: rgba(143, 57, 49, .8); 12 | $button-color-active: rgb(143, 57, 49); 13 | $link-color: #8f3931; 14 | $passage-color: rgb(180, 106, 85); 15 | // Themed elements 16 | nav.navbar { 17 | background-color: $header-color !important; 18 | } 19 | 20 | $secondary: $button-color; 21 | .btn-secondary.active { 22 | background-color: $button-color-active !important; 23 | } 24 | 25 | .btn-outline-secondary.active { 26 | color: #fff !important; 27 | } 28 | 29 | $info: $button-color; 30 | .btn-light { 31 | border: solid 1px rgb(206, 212, 218) !important; 32 | } 33 | 34 | .btn-light.active { 35 | background-color: #eee !important; 36 | } 37 | 38 | .link-color { 39 | color: $link-color 40 | } 41 | 42 | a { 43 | color: $link-color !important 44 | } 45 | 46 | a.btn-secondary { 47 | color: #fff !important; 48 | } 49 | 50 | .number, 51 | .card-header { 52 | background-color: $header-color !important; 53 | color: $link-color !important; 54 | } 55 | 56 | .input-group-text, 57 | .custom-control-input:checked~.custom-control-label::before, 58 | .custom-control-input:focus~.custom-control-label::before { 59 | color: $link-color !important; 60 | background-color: #fff !important; 61 | border-color: $link-color !important; 62 | } 63 | 64 | .metadata-args, 65 | .remove-metadata, 66 | .term-groups, 67 | .close-pill { 68 | border-color: $link-color !important; 69 | } 70 | 71 | .metadata-label, 72 | .remove-metadata:hover, 73 | .term-group-word:hover, 74 | .close-pill:hover { 75 | background-color: $button-color !important; 76 | color: #fff !important; 77 | } 78 | 79 | .custom-control-label::after { 80 | background-color: $button-color !important; 81 | } 82 | 83 | .letter { 84 | color: $link-color !important; 85 | } 86 | 87 | .letter:hover, 88 | #dico-landing-volume .list-group-item:hover { 89 | background-color: $button-color !important; 90 | color: #fff !important; 91 | } 92 | 93 | #dico-landing-volume a:hover { 94 | color: #fff !important; 95 | } 96 | 97 | #report-error { 98 | color: #fff !important; 99 | } 100 | 101 | .custom-select:focus, 102 | .custom-control-input:checked~.custom-control-label::before, 103 | .custom-control-input:focus~.custom-control-label::before, 104 | input[type="text"]:focus { 105 | box-shadow: 0 0 0 0.05rem $button-color !important; 106 | border-color: $button-color !important; 107 | opacity: .5 108 | } 109 | 110 | .landing-page-btn:focus { 111 | border-color: $link-color !important; 112 | } 113 | 114 | #report button:focus { 115 | mix-blend-mode: hard-light; 116 | } 117 | 118 | :export { 119 | color: $link-color 120 | } -------------------------------------------------------------------------------- /www/app/src/components/Citations.vue: -------------------------------------------------------------------------------- 1 | 14 | 20 | 29 | -------------------------------------------------------------------------------- /www/app/src/components/ExportResults.vue: -------------------------------------------------------------------------------- 1 | 30 | -------------------------------------------------------------------------------- /www/app/src/components/LocaleChanger.vue: -------------------------------------------------------------------------------- 1 | 16 | 35 | -------------------------------------------------------------------------------- /www/app/src/i18n.js: -------------------------------------------------------------------------------- 1 | import { createI18n } from 'vue-i18n' 2 | import messages from "@intlify/unplugin-vue-i18n/messages"; 3 | 4 | 5 | export default createI18n({ 6 | legacy: false, 7 | locale: 'en', 8 | fallbackLocale: 'en', 9 | availableLocales: ["en", "fr"], 10 | messages: messages, 11 | }) -------------------------------------------------------------------------------- /www/app/src/main.js: -------------------------------------------------------------------------------- 1 | import { createApp } from "vue"; 2 | import vueScrollTo from "vue-scrollto"; 3 | import App from "./App.vue"; 4 | import router from "./router"; 5 | import store from "./store"; 6 | import { 7 | paramsFilter, 8 | paramsToRoute, 9 | paramsToUrlString, 10 | copyObject, 11 | saveToLocalStorage, 12 | mergeResults, 13 | sortResults, 14 | deepEqual, 15 | dictionaryLookup, 16 | debug, 17 | } from "./mixins.js"; 18 | import axios from "axios"; 19 | import "bootstrap"; 20 | 21 | import appConfig from "../appConfig.json"; 22 | import i18n from "./i18n"; 23 | 24 | axios 25 | .get(`${appConfig.dbUrl}/scripts/get_web_config.py`, {}) 26 | .then((response) => { 27 | const app = createApp(App).use(i18n); 28 | app.config.globalProperties.$philoConfig = response.data; 29 | app.config.globalProperties.$scrollTo = vueScrollTo.scrollTo; 30 | app.config.globalProperties.$dbUrl = appConfig.dbUrl; 31 | app.config.unwrapInjectedRef = true; 32 | app.provide("$http", axios); 33 | app.provide("$dbUrl", appConfig.dbUrl); 34 | app.provide("$philoConfig", response.data); 35 | app.use(router); 36 | app.use(store); 37 | app.mixin({ 38 | methods: { 39 | paramsFilter, 40 | paramsToRoute, 41 | paramsToUrlString, 42 | copyObject, 43 | saveToLocalStorage, 44 | mergeResults, 45 | sortResults, 46 | deepEqual, 47 | dictionaryLookup, 48 | debug, 49 | }, 50 | }); 51 | app.directive("scroll", { 52 | mounted: function (el, binding) { 53 | el.scrollHandler = function (evt) { 54 | if (binding.value(evt, el)) { 55 | window.removeEventListener("scroll", el.scrollHandler); 56 | } 57 | }; 58 | window.addEventListener("scroll", el.scrollHandler); 59 | }, 60 | unmounted: function (el) { 61 | window.removeEventListener("scroll", el.scrollHandler); 62 | }, 63 | }); 64 | 65 | router.isReady().then(() => app.mount("#app")); 66 | }) 67 | .catch((error) => { 68 | // this.loading = false 69 | console.log(error.toString()); 70 | }); 71 | -------------------------------------------------------------------------------- /www/app/src/router/index.js: -------------------------------------------------------------------------------- 1 | // import Vue from 'vue' 2 | import { createRouter, createWebHistory } from 'vue-router' 3 | 4 | const concordance = () => import('../components/Concordance'); 5 | const kwic = () => import('../components/Kwic'); 6 | const bibliography = () => import('../components/Bibliography'); 7 | const collocation = () => import('../components/Collocation'); 8 | const timeSeries = () => import('../components/TimeSeries'); 9 | const textNavigation = () => import('../components/TextNavigation'); 10 | const tableOfContents = () => import('../components/TableOfContents'); 11 | const landingPage = () => import('../components/LandingPage'); 12 | const aggregation = () => import("../components/Aggregation"); 13 | import appConfig from '../../appConfig.json' 14 | 15 | 16 | const router = createRouter({ 17 | history: createWebHistory(appConfig.dbUrl.replace(/https?:\/\/[^/]+\//, "")), 18 | routes: [{ 19 | path: '/', 20 | name: 'home', 21 | component: landingPage 22 | }, 23 | { 24 | path: '/concordance', 25 | name: 'concordance', 26 | component: concordance 27 | }, 28 | { 29 | path: '/kwic', 30 | name: 'kwic', 31 | component: kwic 32 | }, 33 | { 34 | path: '/bibliography', 35 | name: 'bibliography', 36 | component: bibliography 37 | }, 38 | { 39 | path: "/collocation", 40 | name: "collocation", 41 | component: collocation 42 | }, 43 | { 44 | path: "/time_series", 45 | name: "time_series", 46 | component: timeSeries 47 | }, 48 | { 49 | path: "/navigate/:pathInfo([\\d/]+)", 50 | name: "textNavigation", 51 | component: textNavigation 52 | }, 53 | { 54 | path: "/navigate/:pathInfo(\\d+)/table-of-contents", 55 | name: "tableOfContents", 56 | component: tableOfContents 57 | }, 58 | { 59 | path: "/aggregation", 60 | name: 'aggregation', 61 | component: aggregation 62 | }, 63 | // for compatibility with old Philo links: still used in landing page and TOC 64 | { 65 | path: "/query", 66 | redirect: to => { 67 | return { 68 | name: to.query.report, 69 | params: to.params 70 | } 71 | } 72 | } 73 | ], 74 | scrollBehavior(to, from, savedPosition) { 75 | if (savedPosition) { 76 | return savedPosition 77 | } else { 78 | return { 79 | left: 0, 80 | top: 0 81 | } 82 | } 83 | } 84 | }) 85 | export default router -------------------------------------------------------------------------------- /www/app/src/store/index.js: -------------------------------------------------------------------------------- 1 | import Vuex from 'vuex' 2 | 3 | import { 4 | getField, 5 | updateField 6 | } from 'vuex-map-fields' 7 | 8 | 9 | export default new Vuex.Store({ 10 | strict: true, 11 | state: { 12 | formData: {}, 13 | reportValues: {}, 14 | resultsLength: 0, 15 | textNavigationCitation: {}, 16 | textObject: '', 17 | navBar: '', 18 | tocElements: {}, 19 | byte: '', 20 | searching: false, 21 | currentReport: "concordance", 22 | description: { 23 | start: 0, 24 | end: 0, 25 | results_per_page: 25, 26 | termGroups: [], 27 | }, 28 | aggregationCache: { 29 | results: [], 30 | query: {} 31 | }, 32 | sortedKwicCache: { 33 | queryParams: {}, 34 | results: [], 35 | totalResults: 0 36 | }, 37 | totalResultsDone: false, 38 | showFacets: true, 39 | urlUpdate: "", 40 | metadataUpdate: {}, 41 | }, 42 | getters: { 43 | getField 44 | }, 45 | mutations: { 46 | updateField, 47 | updateFormData(state, payload) { 48 | state.formData = payload 49 | }, 50 | setDefaultFields(state, payload) { 51 | for (let field in payload) { 52 | state.formData[field] = payload[field] 53 | } 54 | }, 55 | updateFormDataField(state, payload) { 56 | state.formData[payload.key] = payload.value 57 | }, 58 | updateAllMetadata(state, payload) { 59 | state.formData = { ...state.formData, ...payload } 60 | }, 61 | setReportValues(state, payload) { 62 | state.reportValues = payload 63 | }, 64 | updateCitation(state, payload) { 65 | state.textNavigationCitation = payload 66 | }, 67 | updateDescription(state, payload) { 68 | state.description = payload 69 | }, 70 | updateResultsLength(state, payload) { 71 | state.resultsLength = payload 72 | } 73 | }, 74 | actions: { 75 | updateStartEndDate(context, payload) { 76 | context.commit("updateFormData", { 77 | ...context.state.formData, 78 | start_date: payload.startDate, 79 | end_date: payload.endDate 80 | }) 81 | }, 82 | } 83 | }) -------------------------------------------------------------------------------- /www/app/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import vue from "@vitejs/plugin-vue"; 3 | import VueI18nPlugin from "@intlify/unplugin-vue-i18n/vite"; 4 | import { fileURLToPath, URL } from "node:url"; 5 | import { resolve, dirname } from "node:path"; 6 | 7 | export default defineConfig({ 8 | plugins: [ 9 | vue(), 10 | VueI18nPlugin({ 11 | include: resolve( 12 | dirname(fileURLToPath(import.meta.url)), 13 | "./src/locales/**" 14 | ), 15 | }), 16 | ], 17 | base: process.env.NODE_ENV === "production" ? getBaseUrl() : "/", 18 | resolve: { 19 | alias: { 20 | "@": fileURLToPath(new URL("./src", import.meta.url)), 21 | }, 22 | // TODO: Remove by explicitely adding extension in imports 23 | extensions: [".js", ".json", ".vue"], 24 | }, 25 | server: { 26 | hmr: { 27 | overlay: false, 28 | }, 29 | }, 30 | }); 31 | 32 | function getBaseUrl() { 33 | const fs = require("fs"); 34 | let appConfig = fs.readFileSync("appConfig.json"); 35 | let dbUrl = JSON.parse(appConfig).dbUrl; 36 | if (dbUrl == "") { 37 | let dbPath = __dirname.replace(/app$/, ""); 38 | let dbname = dbPath.split("/").reverse()[1]; 39 | let config = fs.readFileSync("/etc/philologic/philologic4.cfg", "utf8"); 40 | let re = /url_root = ["']([^"]+)["']/gm; 41 | let match = re.exec(config); 42 | let rootPath = match[1]; 43 | if (rootPath.endsWith("/")) { 44 | rootPath = rootPath.slice(0, -1); 45 | } 46 | dbUrl = rootPath + "/" + dbname + "/"; 47 | let jsonString = JSON.stringify({ dbUrl: dbUrl }); 48 | fs.writeFileSync("./appConfig.json", jsonString); 49 | return dbUrl; 50 | } 51 | return dbUrl; 52 | } 53 | -------------------------------------------------------------------------------- /www/dispatcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Routing for PhiloLogic4.""" 3 | 4 | 5 | import datetime 6 | import os 7 | from random import randint 8 | from typing import Callable 9 | from urllib.parse import parse_qs, urlparse 10 | from wsgiref.handlers import CGIHandler 11 | 12 | import reports 13 | from philologic.runtime import WebConfig, WSGIHandler 14 | from webApp import start_web_app 15 | 16 | path = os.path.abspath(os.path.dirname(__file__)) 17 | 18 | 19 | def philo_dispatcher(environ, start_response): 20 | """Dispatcher function.""" 21 | config = WebConfig(path) 22 | request = WSGIHandler(environ, config) 23 | if request.content_type == "application/json" or request.format == "json": 24 | try: 25 | path_components = [c for c in environ["PATH_INFO"].split("/") if c] 26 | except Exception: 27 | path_components = [] 28 | if path_components: 29 | if path_components[-1] == "table-of-contents": 30 | yield b"".join(reports.table_of_contents(environ, start_response)) 31 | else: 32 | yield b"".join(reports.navigation(environ, start_response)) 33 | else: 34 | try: 35 | report_name: str = parse_qs(environ["QUERY_STRING"])["report"][0] 36 | except KeyError: 37 | report_name = urlparse(environ["REQUEST_URI"]).path.split("/")[-1] 38 | report: Callable = getattr(reports, report_name) 39 | yield b"".join(report(environ, start_response)) 40 | elif request.full_bibliography is True: 41 | yield b"".join(reports.bibliography(environ, start_response)) 42 | else: 43 | yield start_web_app(environ, start_response).encode("utf8") 44 | 45 | # clean-up hitlist every now and then 46 | if randint(0, 10) == 1: 47 | for file in os.scandir(os.path.join(path, "data/hitlists/*")): 48 | file_modified = datetime.datetime.fromtimestamp(os.path.getmtime(file.path)) 49 | if datetime.datetime.now() - file_modified > datetime.timedelta(minutes=10): 50 | os.remove(file.path) 51 | 52 | 53 | if __name__ == "__main__": 54 | CGIHandler().run(philo_dispatcher) 55 | -------------------------------------------------------------------------------- /www/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/favicon.ico -------------------------------------------------------------------------------- /www/reports/__init__.py: -------------------------------------------------------------------------------- 1 | from .concordance import concordance 2 | from .kwic import kwic 3 | from .bibliography import bibliography 4 | from .collocation import collocation 5 | from .time_series import time_series 6 | from .navigation import navigation 7 | from .table_of_contents import table_of_contents 8 | from .word_property_filter import word_property_filter 9 | from .aggregation import aggregation 10 | -------------------------------------------------------------------------------- /www/reports/aggregation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import aggregation_by_field 15 | except ImportError: 16 | from philologic.runtime import aggregation_by_field 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def aggregation(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | aggregation_object = aggregation_by_field(request, config) 31 | headers = [ 32 | ("Content-type", "application/json; charset=UTF-8"), 33 | ("Access-Control-Allow-Origin", "*"), 34 | ] 35 | start_response("200 OK", headers) 36 | yield orjson.dumps(aggregation_object) 37 | 38 | 39 | if __name__ == "__main__": 40 | CGIHandler().run(aggregation) 41 | -------------------------------------------------------------------------------- /www/reports/bibliography.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | import sys 6 | from wsgiref.handlers import CGIHandler 7 | 8 | sys.path.append("..") 9 | import custom_functions 10 | 11 | try: 12 | from custom_functions import bibliography_results 13 | except ImportError: 14 | from philologic.runtime import bibliography_results 15 | try: 16 | from custom_functions import WebConfig 17 | except ImportError: 18 | from philologic.runtime import WebConfig 19 | try: 20 | from custom_functions import WSGIHandler 21 | except ImportError: 22 | from philologic.runtime import WSGIHandler 23 | 24 | 25 | def bibliography(environ, start_response): 26 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 27 | request = WSGIHandler(environ, config) 28 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 29 | start_response("200 OK", headers) 30 | bibliography_object, _ = bibliography_results(request, config) 31 | yield orjson.dumps(bibliography_object) 32 | 33 | 34 | if __name__ == "__main__": 35 | CGIHandler().run(bibliography) 36 | -------------------------------------------------------------------------------- /www/reports/collocation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import collocation_results 15 | except ImportError: 16 | from philologic.runtime import collocation_results 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def collocation(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 31 | start_response("200 OK", headers) 32 | collocation_object = collocation_results(request, config) 33 | yield orjson.dumps(collocation_object) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(collocation) 38 | -------------------------------------------------------------------------------- /www/reports/concordance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import concordance_results 15 | except ImportError: 16 | from philologic.runtime import concordance_results 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def concordance(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | concordance_object = concordance_results(request, config) 31 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 32 | start_response("200 OK", headers) 33 | yield orjson.dumps(concordance_object) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(concordance) 38 | -------------------------------------------------------------------------------- /www/reports/kwic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import kwic_results 15 | except ImportError: 16 | from philologic.runtime import kwic_results 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def kwic(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | kwic_object = kwic_results(request, config) 31 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 32 | start_response("200 OK", headers) 33 | yield orjson.dumps(kwic_object) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(kwic) 38 | -------------------------------------------------------------------------------- /www/reports/navigation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import generate_text_object 15 | except ImportError: 16 | from philologic.runtime import generate_text_object 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def navigation(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 31 | start_response("200 OK", headers) 32 | text_object = generate_text_object(request, config) 33 | yield orjson.dumps(text_object) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(navigation) 38 | -------------------------------------------------------------------------------- /www/reports/table_of_contents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import generate_toc_object 15 | except ImportError: 16 | from philologic.runtime import generate_toc_object 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def table_of_contents(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | 31 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 32 | start_response("200 OK", headers) 33 | toc_object = generate_toc_object(request, config) 34 | yield orjson.dumps(toc_object) 35 | 36 | 37 | if __name__ == "__main__": 38 | CGIHandler().run(table_of_contents) 39 | -------------------------------------------------------------------------------- /www/reports/time_series.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import generate_time_series 15 | except ImportError: 16 | from philologic.runtime import generate_time_series 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def time_series(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | time_series_object = generate_time_series(request, config) 31 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 32 | start_response("200 OK", headers) 33 | yield orjson.dumps(time_series_object) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(time_series) 38 | -------------------------------------------------------------------------------- /www/reports/word_property_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import filter_words_by_property 15 | except ImportError: 16 | from philologic.runtime import filter_words_by_property 17 | try: 18 | from custom_functions import WebConfig 19 | except ImportError: 20 | from philologic.runtime import WebConfig 21 | try: 22 | from custom_functions import WSGIHandler 23 | except ImportError: 24 | from philologic.runtime import WSGIHandler 25 | 26 | 27 | def word_property_filter(environ, start_response): 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", "")) 29 | request = WSGIHandler(environ, config) 30 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 31 | start_response("200 OK", headers) 32 | filter_results = filter_words_by_property(hits, config.db_path, request, db, config) 33 | yield orjson.dumps(filter_results) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(word_property_filter) 38 | -------------------------------------------------------------------------------- /www/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /www/scripts/access_request.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | import orjson 7 | from philologic.runtime import access_control, login_access 8 | 9 | import sys 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | default_reports = ["concordance", "kwic", "collocation", "time_series", "navigation"] 25 | 26 | 27 | def access_request(environ, start_response): 28 | status = "200 OK" 29 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 30 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 31 | request = WSGIHandler(environ, config) 32 | access, headers = login_access(environ, request, config, headers) 33 | start_response(status, headers) 34 | if access: 35 | yield orjson.dumps({"access": True}) 36 | else: 37 | incoming_address, domain_name = access_control.get_client_info(environ) 38 | yield orjson.dumps({"access": False, "incoming_address": incoming_address, "domain_name": domain_name}) 39 | 40 | 41 | if __name__ == "__main__": 42 | CGIHandler().run(access_request) 43 | -------------------------------------------------------------------------------- /www/scripts/alignment_to_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | from json import dumps 6 | from wsgiref.handlers import CGIHandler 7 | 8 | from philologic.runtime.DB import DB 9 | from philologic.runtime.link import byte_range_to_link 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | def alignment_to_text(environ, start_response): 25 | status = "200 OK" 26 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 27 | start_response(status, headers) 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 29 | db = DB(config.db_path + "/data/") 30 | request = WSGIHandler(environ, config) 31 | link = byte_range_to_link(db, config, request) 32 | yield dumps({"link": link}).encode("utf-8") 33 | 34 | 35 | if __name__ == "__main__": 36 | CGIHandler().run(alignment_to_text) 37 | -------------------------------------------------------------------------------- /www/scripts/get_academic_citation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import orjson 5 | from wsgiref.handlers import CGIHandler 6 | from philologic.runtime.DB import DB 7 | from philologic.runtime.citations import citations, citation_links 8 | 9 | 10 | import sys 11 | 12 | sys.path.append("..") 13 | import custom_functions 14 | 15 | try: 16 | from custom_functions import WebConfig 17 | except ImportError: 18 | from philologic.runtime import WebConfig 19 | try: 20 | from custom_functions import WSGIHandler 21 | except ImportError: 22 | from philologic.runtime import WSGIHandler 23 | 24 | 25 | def get_academic_citation(environ, start_response): 26 | status = "200 OK" 27 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 28 | start_response(status, headers) 29 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 30 | request = WSGIHandler(environ, config) 31 | db = DB(config.db_path + "/data/") 32 | text_obj = db[request.philo_id] 33 | citation_hrefs = citation_links(db, config, text_obj) 34 | citation = citations(text_obj, citation_hrefs, config, citation_type=config.academic_citation["citation"]) 35 | yield orjson.dumps({"citation": citation}) 36 | 37 | 38 | if __name__ == "__main__": 39 | CGIHandler().run(get_academic_citation) 40 | -------------------------------------------------------------------------------- /www/scripts/get_bibliography.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | from philologic.runtime import landing_page_bibliography 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import WebConfig 15 | except ImportError: 16 | from philologic.runtime import WebConfig 17 | try: 18 | from custom_functions import WSGIHandler 19 | except ImportError: 20 | from philologic.runtime import WSGIHandler 21 | 22 | 23 | def get_bibliography(environ, start_response): 24 | status = "200 OK" 25 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 26 | start_response(status, headers) 27 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 28 | request = WSGIHandler(environ, config) 29 | yield landing_page_bibliography(request, config) 30 | 31 | 32 | if __name__ == "__main__": 33 | CGIHandler().run(get_bibliography) 34 | -------------------------------------------------------------------------------- /www/scripts/get_filter_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime import build_filter_list 8 | 9 | import sys 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | def get_filter_list(environ, start_response): 25 | status = "200 OK" 26 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 27 | start_response(status, headers) 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 29 | request = WSGIHandler(environ, config) 30 | filter_list = build_filter_list(request, config) 31 | yield orjson.dumps(filter_list) 32 | 33 | 34 | if __name__ == "__main__": 35 | CGIHandler().run(get_filter_list) 36 | -------------------------------------------------------------------------------- /www/scripts/get_frequency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | import sys 8 | 9 | sys.path.append("..") 10 | import custom_functions 11 | 12 | try: 13 | from custom_functions import WebConfig 14 | except ImportError: 15 | from philologic.runtime import WebConfig 16 | try: 17 | from custom_functions import WSGIHandler 18 | except ImportError: 19 | from philologic.runtime import WSGIHandler 20 | try: 21 | from custom_functions import frequency_results 22 | except ImportError: 23 | from philologic.runtime import frequency_results 24 | 25 | 26 | def get_frequency(environ, start_response): 27 | """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of 28 | unique values and their frequencies.""" 29 | status = "200 OK" 30 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 31 | start_response(status, headers) 32 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 33 | request = WSGIHandler(environ, config) 34 | results = frequency_results(request, config) 35 | yield orjson.dumps(results) 36 | 37 | 38 | if __name__ == "__main__": 39 | CGIHandler().run(get_frequency) 40 | -------------------------------------------------------------------------------- /www/scripts/get_header.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | from philologic.runtime import get_tei_header 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import WebConfig 15 | except ImportError: 16 | from philologic.runtime import WebConfig 17 | try: 18 | from custom_functions import WSGIHandler 19 | except ImportError: 20 | from philologic.runtime import WSGIHandler 21 | 22 | 23 | def get_header(environ, start_response): 24 | status = "200 OK" 25 | headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 26 | start_response(status, headers) 27 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 28 | request = WSGIHandler(environ, config) 29 | header = get_tei_header(request, config) 30 | yield header.encode("utf8") 31 | 32 | 33 | if __name__ == "__main__": 34 | CGIHandler().run(get_header) 35 | -------------------------------------------------------------------------------- /www/scripts/get_landing_page_content.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from wsgiref.handlers import CGIHandler 5 | 6 | from philologic.runtime import group_by_metadata, group_by_range 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import WebConfig 15 | except ImportError: 16 | from philologic.runtime import WebConfig 17 | try: 18 | from custom_functions import WSGIHandler 19 | except ImportError: 20 | from philologic.runtime import WSGIHandler 21 | 22 | 23 | def landing_page_content(environ, start_response): 24 | """Get landing page content""" 25 | status = "200 OK" 26 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 27 | start_response(status, headers) 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 29 | request = WSGIHandler(environ, config) 30 | if request.is_range == "true": 31 | if isinstance(request.query, bytes): 32 | request_range = request.query.decode("utf8") 33 | request_range = [item.strip() for item in request.query.lower().split("-")] 34 | if len(request_range) == 1: 35 | request_range.append(request_range[0]) 36 | results = group_by_range(request_range, request, config) 37 | else: 38 | results = group_by_metadata(request, config) 39 | yield results 40 | 41 | 42 | if __name__ == "__main__": 43 | CGIHandler().run(landing_page_content) 44 | -------------------------------------------------------------------------------- /www/scripts/get_more_context.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime import get_concordance_text 9 | 10 | import sys 11 | 12 | sys.path.append("..") 13 | import custom_functions 14 | 15 | try: 16 | from custom_functions import WebConfig 17 | except ImportError: 18 | from philologic.runtime import WebConfig 19 | try: 20 | from custom_functions import WSGIHandler 21 | except ImportError: 22 | from philologic.runtime import WSGIHandler 23 | 24 | 25 | def get_more_context(environ, start_response): 26 | status = "200 OK" 27 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 28 | start_response(status, headers) 29 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 30 | db = DB(config.db_path + "/data/") 31 | request = WSGIHandler(environ, config) 32 | hit_num = int(request.hit_num) 33 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 34 | context_size = config["concordance_length"] * 3 35 | hit_context = get_concordance_text(db, hits[hit_num], config.db_path, context_size) 36 | yield orjson.dumps(hit_context) 37 | 38 | 39 | if __name__ == "__main__": 40 | CGIHandler().run(get_more_context) 41 | -------------------------------------------------------------------------------- /www/scripts/get_notes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime import generate_text_object 9 | 10 | import sys 11 | 12 | sys.path.append("..") 13 | import custom_functions 14 | 15 | try: 16 | from custom_functions import WebConfig 17 | except ImportError: 18 | from philologic.runtime import WebConfig 19 | try: 20 | from custom_functions import WSGIHandler 21 | except ImportError: 22 | from philologic.runtime import WSGIHandler 23 | 24 | 25 | def get_notes(environ, start_response): 26 | status = "200 OK" 27 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 28 | start_response(status, headers) 29 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 30 | request = WSGIHandler(environ, config) 31 | text_object = generate_text_object(request, config, note=True) 32 | yield orjson.dumps(text_object) 33 | 34 | 35 | if __name__ == "__main__": 36 | CGIHandler().run(get_notes) 37 | -------------------------------------------------------------------------------- /www/scripts/get_query_terms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime.Query import get_expanded_query 9 | 10 | import sys 11 | 12 | sys.path.append("..") 13 | import custom_functions 14 | 15 | try: 16 | from custom_functions import WebConfig 17 | except ImportError: 18 | from philologic.runtime import WebConfig 19 | try: 20 | from custom_functions import WSGIHandler 21 | except ImportError: 22 | from philologic.runtime import WSGIHandler 23 | 24 | 25 | def term_list(environ, start_response): 26 | status = "200 OK" 27 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 28 | start_response(status, headers) 29 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 30 | db = DB(config.db_path + "/data/") 31 | request = WSGIHandler(environ, config) 32 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 33 | hits.finish() 34 | expanded_terms = get_expanded_query(hits) 35 | yield orjson.dumps(expanded_terms[0]) 36 | 37 | 38 | if __name__ == "__main__": 39 | CGIHandler().run(term_list) 40 | -------------------------------------------------------------------------------- /www/scripts/get_sorted_frequency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime import frequency_results 8 | 9 | import sys 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | def get_frequency(environ, start_response): 25 | """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of 26 | unique values and their frequencies.""" 27 | status = "200 OK" 28 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 29 | start_response(status, headers) 30 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 31 | request = WSGIHandler(environ, config) 32 | results = frequency_results(request, config, sorted_results=True) 33 | yield orjson.dumps(results) 34 | 35 | 36 | if __name__ == "__main__": 37 | CGIHandler().run(get_frequency) 38 | -------------------------------------------------------------------------------- /www/scripts/get_sorted_kwic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime import kwic_hit_object, page_interval 9 | 10 | import sys 11 | 12 | 13 | sys.path.append("..") 14 | import custom_functions 15 | 16 | try: 17 | from custom_functions import WebConfig 18 | except ImportError: 19 | from philologic.runtime import WebConfig 20 | try: 21 | from custom_functions import WSGIHandler 22 | except ImportError: 23 | from philologic.runtime import WSGIHandler 24 | 25 | 26 | def get_sorted_kwic(environ, start_response): 27 | """Get sorted KWIC""" 28 | status = "200 OK" 29 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 30 | start_response(status, headers) 31 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 32 | db = DB(config.db_path + "/data/") 33 | request = WSGIHandler(environ, config) 34 | sorted_hits = get_sorted_hits(request, config, db) 35 | yield orjson.dumps(sorted_hits) 36 | 37 | 38 | def get_sorted_hits(request, config, db): 39 | """Get sorted hits""" 40 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 41 | start, end, _ = page_interval(request.results_per_page, hits, request.start, request.end) 42 | kwic_object = { 43 | "description": {"start": start, "end": end, "results_per_page": request.results_per_page}, 44 | "query": dict([i for i in request]), 45 | } 46 | if not os.path.exists(f"{request.cache_path}.sorted"): 47 | with open(request.cache_path) as cache: 48 | fields = cache.readline().strip().split("\t") 49 | sort_order = [] 50 | if request.first_kwic_sorting_option: 51 | key = fields.index(request.first_kwic_sorting_option) + 1 52 | sort_order.append(f"-k {key},{key}") 53 | if request.second_kwic_sorting_option: 54 | key = fields.index(request.second_kwic_sorting_option) + 1 55 | sort_order.append(f"-k {key},{key}") 56 | if request.third_kwic_sorting_option: 57 | key = fields.index(request.third_kwic_sorting_option) + 1 58 | sort_order.append(f"-k {key},{key}") 59 | sort_order = " ".join(sort_order) 60 | os.system( 61 | f"tail -n +2 {request.cache_path} | sort {sort_order} > {request.cache_path}.sorted && rm {request.cache_path}" 62 | ) # no numeric sort since we would have to know the type of the field being sorted on: e.g. -k 2,2n 63 | kwic_results = [] 64 | with open(f"{request.cache_path}.sorted") as sorted_results: 65 | for line_number, line in enumerate(sorted_results, 1): 66 | if line_number < start: 67 | continue 68 | if line_number > end: 69 | break 70 | index = int(line.split("\t")[0]) 71 | hit = hits[index] 72 | kwic_result = kwic_hit_object(hit, config, db) 73 | kwic_results.append(kwic_result) 74 | 75 | kwic_object["results"] = kwic_results 76 | kwic_object["results_length"] = len(hits) 77 | kwic_object["query_done"] = hits.done 78 | 79 | return kwic_object 80 | 81 | 82 | if __name__ == "__main__": 83 | CGIHandler().run(get_sorted_kwic) 84 | -------------------------------------------------------------------------------- /www/scripts/get_table_of_contents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | 8 | import sys 9 | 10 | sys.path.append("..") 11 | import custom_functions 12 | 13 | try: 14 | from custom_functions import WebConfig 15 | except ImportError: 16 | from philologic.runtime import WebConfig 17 | try: 18 | from custom_functions import WSGIHandler 19 | except ImportError: 20 | from philologic.runtime import WSGIHandler 21 | try: 22 | from custom_functions import generate_toc_object 23 | except ImportError: 24 | from philologic.runtime import generate_toc_object 25 | 26 | 27 | def get_table_of_contents(environ, start_response): 28 | status = "200 OK" 29 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 30 | start_response(status, headers) 31 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 32 | request = WSGIHandler(environ, config) 33 | philo_id = request["philo_id"].split() 34 | toc_object = generate_toc_object(request, config) 35 | current_obj_position = 0 36 | philo_id = " ".join(philo_id) 37 | for pos, toc_element in enumerate(toc_object["toc"]): 38 | if toc_element["philo_id"] == philo_id: 39 | current_obj_position = pos 40 | break 41 | toc_object["current_obj_position"] = current_obj_position 42 | yield orjson.dumps(toc_object) 43 | 44 | 45 | if __name__ == "__main__": 46 | CGIHandler().run(get_table_of_contents) 47 | -------------------------------------------------------------------------------- /www/scripts/get_term_groups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime.Query import split_terms 9 | from philologic.runtime.QuerySyntax import group_terms, parse_query 10 | 11 | import sys 12 | 13 | sys.path.append("..") 14 | import custom_functions 15 | 16 | try: 17 | from custom_functions import WebConfig 18 | except ImportError: 19 | from philologic.runtime import WebConfig 20 | try: 21 | from custom_functions import WSGIHandler 22 | except ImportError: 23 | from philologic.runtime import WSGIHandler 24 | 25 | 26 | def term_group(environ, start_response): 27 | status = "200 OK" 28 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 29 | start_response(status, headers) 30 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 31 | db = DB(config.db_path + "/data/") 32 | request = WSGIHandler(environ, config) 33 | if not request["q"]: 34 | dump = orjson.dumps({"original_query": "", "term_groups": []}) 35 | else: 36 | hits = db.query( 37 | request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata 38 | ) 39 | parsed = parse_query(request.q) 40 | group = group_terms(parsed) 41 | all_groups = split_terms(group) 42 | term_groups = [] 43 | for g in all_groups: 44 | term_group = "" 45 | not_started = False 46 | for kind, term in g: 47 | if kind == "NOT": 48 | if not_started is False: 49 | not_started = True 50 | term_group += " NOT " 51 | elif kind == "OR": 52 | term_group += "|" 53 | elif kind == "TERM": 54 | term_group += " %s " % term 55 | elif kind == "QUOTE": 56 | term_group += " %s " % term 57 | term_group = term_group.strip() 58 | term_groups.append(term_group) 59 | dump = orjson.dumps({"term_groups": term_groups, "original_query": request.original_q}) 60 | yield dump 61 | 62 | 63 | if __name__ == "__main__": 64 | CGIHandler().run(term_group) 65 | -------------------------------------------------------------------------------- /www/scripts/get_text_object.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | from philologic.runtime.HitWrapper import ObjectWrapper 9 | from philologic.runtime import generate_text_object 10 | 11 | import sys 12 | 13 | sys.path.append("..") 14 | import custom_functions 15 | 16 | try: 17 | from custom_functions import WebConfig 18 | except ImportError: 19 | from philologic.runtime import WebConfig 20 | try: 21 | from custom_functions import WSGIHandler 22 | except ImportError: 23 | from philologic.runtime import WSGIHandler 24 | 25 | 26 | def get_text_object(environ, start_response): 27 | status = "200 OK" 28 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 29 | start_response(status, headers) 30 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 31 | db = DB(config.db_path + "/data/") 32 | request = WSGIHandler(environ, config) 33 | path = config.db_path 34 | zeros = 7 - len(request.philo_id) 35 | if zeros: 36 | request.philo_id += zeros * " 0" 37 | obj = ObjectWrapper(request["philo_id"].split(), db) 38 | text_object = generate_text_object(request, config) 39 | yield orjson.dumps(text_object) 40 | 41 | 42 | if __name__ == "__main__": 43 | CGIHandler().run(get_text_object) 44 | -------------------------------------------------------------------------------- /www/scripts/get_total_results.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime.DB import DB 8 | 9 | import sys 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | def get_total_results(environ, start_response): 25 | status = "200 OK" 26 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 27 | start_response(status, headers) 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 29 | db = DB(config.db_path + "/data/") 30 | request = WSGIHandler(environ, config) 31 | if request.no_q: 32 | if request.no_metadata: 33 | hits = db.get_all(db.locals["default_object_level"], request["sort_order"]) 34 | else: 35 | hits = db.query(sort_order=request["sort_order"], **request.metadata) 36 | else: 37 | hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) 38 | total_results = 0 39 | hits.finish() 40 | total_results = len(hits) 41 | yield orjson.dumps(total_results) 42 | 43 | 44 | if __name__ == "__main__": 45 | CGIHandler().run(get_total_results) 46 | -------------------------------------------------------------------------------- /www/scripts/get_web_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sqlite3 5 | from wsgiref.handlers import CGIHandler 6 | 7 | import sys 8 | 9 | sys.path.append("..") 10 | import custom_functions 11 | 12 | try: 13 | from custom_functions import WebConfig 14 | except ImportError: 15 | from philologic.runtime import WebConfig 16 | 17 | from philologic.Config import MakeDBConfig 18 | from philologic.runtime.DB import DB 19 | 20 | 21 | def get_web_config(_, start_response): 22 | """Retrieve Web Config data""" 23 | status = "200 OK" 24 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 25 | start_response(status, headers) 26 | db_path = os.path.abspath(os.path.dirname(__file__)).replace("scripts", "") 27 | config = WebConfig(db_path) 28 | if config.valid_config is False: 29 | yield config.to_json() 30 | else: 31 | config.time_series_status = time_series_tester(config) 32 | db_locals = MakeDBConfig(os.path.join(db_path, "data/db.locals.py")) 33 | config.data["available_metadata"] = db_locals.metadata_fields 34 | yield config.to_json() 35 | 36 | 37 | def time_series_tester(config): 38 | """Test if we have at least two distinct values for time series""" 39 | frequencies_file = os.path.join(config.db_path, f"data/frequencies/{config.time_series_year_field}_frequencies") 40 | if os.path.exists(frequencies_file): 41 | with open(frequencies_file) as input_file: 42 | line_count = sum(1 for _ in input_file) 43 | if line_count > 1: 44 | return True 45 | return False 46 | 47 | 48 | if __name__ == "__main__": 49 | CGIHandler().run(get_web_config) 50 | -------------------------------------------------------------------------------- /www/scripts/get_word_frequency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import orjson 4 | import os 5 | from wsgiref.handlers import CGIHandler 6 | 7 | from philologic.runtime import concordance_results 8 | 9 | import sys 10 | 11 | sys.path.append("..") 12 | import custom_functions 13 | 14 | try: 15 | from custom_functions import WebConfig 16 | except ImportError: 17 | from philologic.runtime import WebConfig 18 | try: 19 | from custom_functions import WSGIHandler 20 | except ImportError: 21 | from philologic.runtime import WSGIHandler 22 | 23 | 24 | def get_frequency(environ, start_response): 25 | status = "200 OK" 26 | headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 27 | start_response(status, headers) 28 | config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) 29 | request = WSGIHandler(environ, config) 30 | word_frequency_object = generate_word_frequency(request, config) 31 | yield orjson.dumps(word_frequency_object) 32 | 33 | 34 | if __name__ == "__main__": 35 | CGIHandler().run(get_frequency) 36 | -------------------------------------------------------------------------------- /www/webApp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Bootstrap Web app""" 3 | 4 | 5 | import os.path 6 | 7 | from philologic.runtime import WebConfig 8 | from philologic.runtime import WSGIHandler 9 | from philologic.runtime import access_control 10 | 11 | PATH = os.path.abspath(os.path.dirname(__file__)) 12 | 13 | 14 | def start_web_app(environ, start_response): 15 | """Return index.html to start web app""" 16 | config = WebConfig(os.path.abspath(PATH)) 17 | headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] 18 | if not config.valid_config: # This means we have an error in the webconfig file 19 | html_page = build_misconfig_page(config.traceback, "webconfig.cfg") 20 | else: 21 | request = WSGIHandler(environ, config) 22 | if config.access_control: 23 | if not request.authenticated: 24 | token = access_control.check_access(environ, config) 25 | if token: 26 | h, ts = token 27 | headers.append(("Set-Cookie", "hash=%s" % h)) 28 | headers.append(("Set-Cookie", "timestamp=%s" % ts)) 29 | with open(f"{config.db_path}/app/dist/index.html") as index_page: 30 | html_page = index_page.read() 31 | start_response("200 OK", headers) 32 | return html_page 33 | 34 | 35 | def build_misconfig_page(traceback, config_file): 36 | """Return bad config HTML page""" 37 | with open("%s/app/misconfiguration.html" % PATH) as input: 38 | html_page = input.read() 39 | html_page = html_page.replace("$TRACEBACK", traceback) 40 | html_page = html_page.replace("$config_FILE", config_file) 41 | return html_page 42 | --------------------------------------------------------------------------------