├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── docs
    ├── README.md
    ├── _config.yml
    ├── _includes
    │   ├── foot.html
    │   ├── head.html
    │   └── nav.html
    ├── _layouts
    │   ├── default.html
    │   └── default_exp.html
    ├── access_control.md
    ├── assets
    │   ├── css
    │   │   └── style.css
    │   ├── favicon.ico
    │   └── philo.png
    ├── configure_web_app.md
    ├── database_loading.md
    ├── encoding_spec.md
    ├── index.md
    ├── installation.md
    ├── query_syntax.md
    └── specific_installations
    │   ├── redhat_installation.md
    │   └── ubuntu_installation.md
├── extras
    ├── FrenchStopwords.txt
    ├── artfl_theme.scss
    ├── exportResults.py
    ├── load_config.py
    ├── metadata_extractor.py
    ├── philodb_convert-4.6_to_4.7.py
    ├── plain_text_load_script.py
    ├── python2_to_python3_port.py
    ├── rebuild_app.py
    ├── tol_theme.scss
    ├── utilities
    │   ├── extract_metadata.py
    │   ├── fix_drama.py
    │   ├── fix_notes.py
    │   ├── list_xpath_in_header.py
    │   ├── tei_cleanup.py
    │   ├── update_toms.py
    │   └── xml_cleanup.py
    ├── vf_theme.scss
    └── web_config_convert_4_6_to_4_7.py
├── install.sh
├── libphilo
    ├── Makefile
    ├── README
    ├── args.c
    ├── args.h
    ├── blockmap.c
    ├── blockmap.h
    ├── c.h
    ├── db
    │   ├── Makefile
    │   ├── bitsvector.c
    │   ├── bitsvector.h
    │   ├── corpus_search.c
    │   ├── db.c
    │   ├── db.h
    │   ├── dbspecs.h
    │   ├── dbspecs2.h
    │   ├── dbspecs4.h
    │   ├── mergewords.c
    │   ├── pack.c
    │   ├── pack.dSYM
    │   │   └── Contents
    │   │   │   ├── Info.plist
    │   │   │   └── Resources
    │   │   │       └── DWARF
    │   │   │           └── pack
    │   ├── pack.h
    │   ├── parsedb.c
    │   ├── test_search.py
    │   ├── unpack.c
    │   ├── unpack.h
    │   └── validate-index.c
    ├── gmap.c
    ├── gmap.h
    ├── level.c
    ├── level.h
    ├── log.h
    ├── out.c
    ├── out.h
    ├── plugin
    │   ├── Makefile
    │   ├── Makefile.in
    │   ├── dbplugins.h
    │   ├── hit.h
    │   ├── hitcmp.c
    │   ├── hitcmp.h
    │   ├── hitcmp_cooc.c
    │   ├── hitcmp_cooc.h
    │   ├── hitcmp_phrase.c
    │   ├── hitcmp_phrase.h
    │   ├── hitcmp_proxy.c
    │   ├── hitcmp_proxy.h
    │   ├── hitcmp_sent.c
    │   ├── hitcmp_sent.h
    │   ├── hitcon.h
    │   ├── hitcrp.c
    │   ├── hitcrp.h
    │   ├── hitdef.c
    │   ├── hitdef.h
    │   ├── hitman.c
    │   ├── hitman.h
    │   ├── hitout.c
    │   ├── hitout.h
    │   ├── method.c
    │   ├── method.h
    │   ├── plugin.c
    │   ├── plugin.h
    │   └── searchmethods.h
    ├── retreive.c
    ├── retreive.h
    ├── search.c
    ├── search.h
    ├── search.py
    ├── search4.c
    ├── word.c
    └── word.h
├── python
    ├── LICENSE
    ├── README
    ├── philologic
    │   ├── Config.py
    │   ├── TagCensus.py
    │   ├── __init__.py
    │   ├── loadtime
    │   │   ├── LoadFilters.py
    │   │   ├── LoadOptions.py
    │   │   ├── Loader.py
    │   │   ├── OHCOVector.py
    │   │   ├── Parser.py
    │   │   ├── PhiloLoad.py
    │   │   ├── PlainTextParser.py
    │   │   ├── PostFilters.py
    │   │   └── __init__.py
    │   ├── runtime
    │   │   ├── DB.py
    │   │   ├── FragmentParser.py
    │   │   ├── HitList.py
    │   │   ├── HitWrapper.py
    │   │   ├── MetadataQuery.py
    │   │   ├── ObjectFormatter.py
    │   │   ├── Query.py
    │   │   ├── QuerySyntax.py
    │   │   ├── WSGIHandler.py
    │   │   ├── __init__.py
    │   │   ├── access_control.py
    │   │   ├── citations.py
    │   │   ├── collocation_scores.py
    │   │   ├── find_similar_words.py
    │   │   ├── get_text.py
    │   │   ├── link.py
    │   │   ├── pages.py
    │   │   ├── reports
    │   │   │   ├── __init__.py
    │   │   │   ├── aggregation.py
    │   │   │   ├── bibliography.py
    │   │   │   ├── collocation.py
    │   │   │   ├── concordance.py
    │   │   │   ├── filter_word_by_property.py
    │   │   │   ├── frequency.py
    │   │   │   ├── generate_word_frequency.py
    │   │   │   ├── kwic.py
    │   │   │   ├── landing_page.py
    │   │   │   ├── navigation.py
    │   │   │   ├── table_of_contents.py
    │   │   │   └── time_series.py
    │   │   └── web_config.py
    │   ├── shlax.py
    │   ├── shlaxtree.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── convert_entities.py
    │   │   ├── load_module.py
    │   │   ├── metadata_type_handler.py
    │   │   ├── pretty_print.py
    │   │   └── sort.py
    └── pyproject.toml
└── www
    ├── .htaccess
    ├── app
        ├── .env
        ├── .gitignore
        ├── index.html
        ├── misconfiguration.html
        ├── package-lock.json
        ├── package.json
        ├── public
        │   └── favicon.ico
        ├── src
        │   ├── App.vue
        │   ├── assets
        │   │   ├── language.png
        │   │   ├── logo.png
        │   │   ├── philo.png
        │   │   └── styles
        │   │   │   └── theme.module.scss
        │   ├── components
        │   │   ├── AccessControl.vue
        │   │   ├── Aggregation.vue
        │   │   ├── Bibliography.vue
        │   │   ├── Citations.vue
        │   │   ├── Collocation.vue
        │   │   ├── Concordance.vue
        │   │   ├── ExportResults.vue
        │   │   ├── Facets.vue
        │   │   ├── Header.vue
        │   │   ├── Kwic.vue
        │   │   ├── LandingPage.vue
        │   │   ├── LocaleChanger.vue
        │   │   ├── Pages.vue
        │   │   ├── ResultsBibliography.vue
        │   │   ├── ResultsSummary.vue
        │   │   ├── SearchArguments.vue
        │   │   ├── SearchForm.vue
        │   │   ├── SearchTips.vue
        │   │   ├── TableOfContents.vue
        │   │   ├── TextNavigation.vue
        │   │   └── TimeSeries.vue
        │   ├── i18n.js
        │   ├── locales
        │   │   ├── en.json
        │   │   └── fr.json
        │   ├── main.js
        │   ├── mixins.js
        │   ├── router
        │   │   └── index.js
        │   └── store
        │   │   └── index.js
        └── vite.config.js
    ├── dispatcher.py
    ├── favicon.ico
    ├── reports
        ├── __init__.py
        ├── aggregation.py
        ├── bibliography.py
        ├── collocation.py
        ├── concordance.py
        ├── kwic.py
        ├── navigation.py
        ├── table_of_contents.py
        ├── time_series.py
        └── word_property_filter.py
    ├── scripts
        ├── __init__.py
        ├── access_request.py
        ├── alignment_to_text.py
        ├── autocomplete_metadata.py
        ├── autocomplete_term.py
        ├── export_results.py
        ├── get_academic_citation.py
        ├── get_bibliography.py
        ├── get_filter_list.py
        ├── get_frequency.py
        ├── get_header.py
        ├── get_hitlist_stats.py
        ├── get_landing_page_content.py
        ├── get_more_context.py
        ├── get_neighboring_words.py
        ├── get_notes.py
        ├── get_query_terms.py
        ├── get_sorted_frequency.py
        ├── get_sorted_kwic.py
        ├── get_table_of_contents.py
        ├── get_term_groups.py
        ├── get_text_object.py
        ├── get_total_results.py
        ├── get_web_config.py
        ├── get_word_frequency.py
        ├── lookup_word.py
        └── resolve_cite.py
    └── webApp.py


/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ### 4.7 ###
 2 | - New aggregation report
 3 | - New metadata stats in search results
 4 | - Results bibliography in concordance and KWIC results.
 5 | - Database size should be between 50% and 80% (or more) smaller
 6 | - Significant speed-ups for:
 7 |     * Collocations: in some cases 3-4X
 8 |     * Sorted KWICs: between 6X and 25X (or more) depending on use case, with no more limits on the size of the sort as a result.
 9 |     * Faceted browsing (frequencies): anywhere from 3X to 100X (or more)
10 |     * Landing page browsing: 10X faster or more on large corpora
11 | - Export results to CSV
12 | - Web config has been simplified with the use of global variables for citations
13 | - Some breaking changes to web config: you should not use a 4.6 config
14 | - Revamped Web UI: move to VueJS and Bootstrap 5.
15 | - Cleaner URLS for queries
16 | - Faster database loads
17 | - New generic dictionary lookup code
18 | - Support for date and integer types for metadata fields.
19 | 
20 | ### 4.6 ###
21 | - Port PhiloLogic4 codebase to Python3
22 | - Switch load time compression from Gzip to LZ4: big speed-up in loading large databases
23 | - Lib reorganization
24 | 
25 | #### 4.0 => 4.5 ####
26 | - Completely rewritten parser: can now parse broken XML
27 | - Massive lib reorg
28 | - A new system wide config
29 | - Loading process completely revamped: use philoload4 command
30 | - Completely rewritten collocations: faster and accurate
31 | - Added relative frequencies to frequencies in facets
32 | - Added sorted KWIC
33 | - Added support for regexes in quoted term searches (aka exact matches)
34 | - Added ability to filter out words in query expansion through a popup using the NOT syntax
35 | - Added configurable citations for all reports
36 | - Added concordance results sorting by metadata
37 | - Added approximate word searches using Levenshtein distance
38 | - Redesign facets and time series
39 | - Bug fixes and optimizations everywhere...
40 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | # Install dependencies
 6 | RUN apt update && apt install -y curl && curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
 7 | 
 8 | RUN apt-get update && apt-get upgrade -y && \
 9 |     apt-get install -y --no-install-recommends libxml2-dev libxslt-dev zlib1g-dev apache2 libgdbm-dev liblz4-tool brotli ripgrep gcc make python3-dev wget sudo nodejs python3.10-venv && \
10 |     apt-get clean && rm -rf /var/lib/apt
11 | 
12 | # Install pip
13 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
14 | 
15 | # Install PhiloLogic
16 | COPY . /PhiloLogic4
17 | WORKDIR /PhiloLogic4
18 | RUN sh install.sh && mkdir /var/www/html/philologic
19 | 
20 | RUN a2enmod rewrite && a2enmod cgi && a2enmod brotli
21 | 
22 | 
23 | # Configure global variables
24 | RUN sed -i 's/database_root = None/database_root = "\/var\/www\/html\/philologic\/"/' /etc/philologic/philologic4.cfg && \
25 |     sed -i 's/url_root = None/url_root = "http:\/\/localhost\/philologic\/"/' /etc/philologic/philologic4.cfg
26 | 
27 | RUN echo "#!/bin/bash\nservice apache2 stop\nrm /var/run/apache2/*\napachectl -D FOREGROUND" > /autostart.sh && chmod +x /autostart.sh
28 | 
29 | # Set up Apache
30 | RUN perl -i -p0e 's/<Directory \/var\/www\/>\n\tOptions Indexes FollowSymLinks\n\tAllowOverride None/<Directory \/var\/www\/>\n\tOptions Indexes FollowSymLinks\n\tAllowOverride all/smg' /etc/apache2/apache2.conf
31 | EXPOSE 80
32 | CMD ["/autostart.sh"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![alt text](www/app/src/assets/philo.png) 4.7
 2 | ===========
 3 | 
 4 | PhiloLogic is an XML database/search engine/web app that is designed for the particular difficulties of TEI XML.  For a more theoretical
 5 | description, you can refer to [our research publications](http://jtei.revues.org/817) or [our blog](http://artfl.blogspot.com).
 6 | 
 7 | Note that as of version 4.7.3, PhiloLogic can now parse plain text files. See documentation for more details.
 8 | 
 9 | ### See [documentation](https://artfl-project.github.io/PhiloLogic4/)
10 | 
11 | ### IMPORTANT ###
12 | * PhiloLogic 4.7 will only work on Unix-based systems (Linux, *BSD) though MacOS is not supported and guaranteed to work.
13 | * PhiloLogic 4.7 will only run on the Apache Webserver
14 | * PhiloLogic 4.7 has only been tested on Python 3.8 and up. For a Python 2 version, use the [latest PhiloLogic 4.5 release](https://github.com/ARTFL-Project/PhiloLogic4/releases/tag/v4.5.9).
15 | * The PhiloLogic 4.7 Web App will only work on recent versions of web browsers: Chrome, Firefox, Safari, Opera, Edge. No support for Internet Explorer.
16 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | For PhiloLogic documentation, please visit https://artfl-project.github.io/PhiloLogic4/
2 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | title: PhiloLogic4 • Fulltext Search Engine for TEI-XML
2 | description: The BEST
3 | theme: jekyll-theme-slate
4 | highlighter: rouge
5 | url: https://artfl-project.github.io/PhiloLogic4/
6 | 


--------------------------------------------------------------------------------
/docs/_includes/foot.html:
--------------------------------------------------------------------------------
 1 | </div>
 2 | </div>
 3 | </div>
 4 | </div>
 5 | <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
 6 | <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
 7 | <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
 8 | </body>
 9 | 
10 | </html>


--------------------------------------------------------------------------------
/docs/_includes/nav.html:
--------------------------------------------------------------------------------
1 | <nav class="nav-primary" role="navigation">
2 |     <ul>
3 |         {% for p in site.pages %}
4 |         <li>
5 |             <a {% if p.url == page.url %}class="active" {% endif %} href="{{ site.baseurl }}{{ p.url }}">{{ p.title }}</a>
6 |         </li>
7 |         {% endfor %}
8 |     </ul>
9 | </nav>


--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
1 | {% include head.html %}
2 | <h1>
3 |   <p class="title text-center">{{ page.title }}</p>
4 | </h1>
5 | 
6 | {{ content }}
7 | 
8 | {% include foot.html %}


--------------------------------------------------------------------------------
/docs/_layouts/default_exp.html:
--------------------------------------------------------------------------------
1 | {% include head.html %}
2 | 
3 | <h1>{{ page.title }}</h1>
4 | 
5 | {{ content }}
6 | 
7 | {% include nav.html %}
8 | 
9 | {% include foot.html %}


--------------------------------------------------------------------------------
/docs/access_control.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: How to set-up access-control
 3 | ---
 4 | 
 5 | There are two ways to control access, user/password authentication, and ip/domain checks. You can use either separately, or both.
 6 | 
 7 | ### Turn on access control
 8 | 
 9 | The first thing you need to do to turn on access control is to set
10 | the variable `access_control` in `your_db_dir/data/web_config.cfg` to `True`, such as:
11 | 
12 | ```Python
13 | access_control = True
14 | ```
15 | 
16 | While this option turns on the ability to control access, you still need to configure authentication or ip check,
17 | otherwise access control will be turned off.
18 | 
19 | ### User authentication
20 | 
21 | To use user authentication, you need to create a logins.txt file inside your `your_db_dir/data/` directory. This can be a symlink.
22 | If no file is found, access will be granted.
23 | The logins.txt should one user/pass per line, separated by a tab, such as
24 | 
25 | ```
26 | username  password
27 | another_user  another_password
28 | ```
29 | 
30 | ### Domain and IP range check
31 | 
32 | To use this feature, you need to specify the location of the file in `web_config.cfg` in the `access_file` variable.
33 | 
34 | This file should contain 3 Python variables: `domain_list`, `allowed_ips`,
35 | `blocked_ips`. Each variable should be a list containing the salient info.
36 | 
37 | The `domain_list` variable should be a list of domains allowed to access you database.
38 | 
39 | ```Python
40 | domain_list = [
41 |   "uchicago.edu",
42 |   "indiana.edu",
43 |   "louisiana.edu",
44 |   "northwestern.edu"
45 | ]
46 | ```
47 | 
48 | The `allowed_ips` variable is a list of ips which are given access to the DB. Note that these are
49 | matched using a regular expression, so you can express the whole ip, or just a part of it.
50 | 
51 | ```Python
52 | allowed_ips = [
53 |   "128.135.",
54 |   "128.32",
55 |   "136.152",
56 |   "136.153.1.1-255"
57 | ]
58 | ```
59 | 
60 | Note that the last IP notation expresses an IP range.
61 | 
62 | The `blocked_ips` variable is a list of IPs (exact matches needed) to deny access to:
63 | 
64 | ```Python
65 | blocked_ips = [
66 |   "1.1.1.4"
67 | ]
68 | ```
69 | 
70 | ### What happens when you're granted access
71 | 
72 | A cookie is saved to your browser, so that subsequent visits no longer require access check.
73 | 


--------------------------------------------------------------------------------
/docs/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/docs/assets/favicon.ico


--------------------------------------------------------------------------------
/docs/assets/philo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/docs/assets/philo.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: What is Philologic?
 3 | ---
 4 | 
 5 | PhiloLogic is an XML database/search engine/web app developped at the [ARTFL Project](https://artfl-project.uchicago.edu) and designed
 6 | for the particular difficulties of TEI XML. For a more theoretical
 7 | description, you can refer to [our blog](<http://artfl.blogspot.com>).
 8 | 
 9 | ### Documentation
10 | 
11 | -   [**Installation**](installation.md)
12 | -   [**Database Loading**](database_loading.md)
13 | -   [**Configuring the Web Application**](configure_web_app.md)
14 | -   [**Query Syntax**](query_syntax.md)
15 | -   [**Text Encoding Spec**](encoding_spec.md)
16 | -   [**Access Control**](access_control.md)
17 | 
18 | ### IMPORTANT
19 | 
20 | -   PhiloLogic4.7 will only work on Unix-based systems (Linux, \*BSD) though MacOS is not supported and guaranteed to work.
21 | -   PhiloLogic4.7 will only run on the Apache Webserver
22 | -   PhiloLogic4.7 has only been tested on Python 3.8 and up
23 | -   The PhiloLogic4.7 Web App will only work on recent versions of web browsers: Chrome, Firefox, Safari, Opera, Edge. No support for Internet Explorer.
24 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Installation
 3 | ---
 4 | 
 5 | Installing PhiloLogic consists of two steps:
 6 | 
 7 | 1. Install the C and Python libraries system-wide
 8 | 2. Set up a directory in your web server to serve databases from
 9 | 
10 | You can find more detailed installation instructions for specific OSes here:
11 | 
12 | -   [RedHat (and CentOS)](specific_installations/redhat_installation.md)
13 | -   [Ubuntu](specific_installations/ubuntu_installation.md)
14 | 
15 | ### Downloading
16 | 
17 | IMPORTANT: Do not install from the master branch on github: this is the development branch and is in no way garanteed to be stable
18 | 
19 | You can find a copy of the latest version of PhiloLogic4 [here](../../../releases/).
20 | 
21 | ### Prerequisites
22 | 
23 | -   Apache Webserver
24 | -   Python 3.8 and up
25 | -   GCC
26 | -   Make
27 | -   [gdbm](http://www.gnu.org.ua/software/gdbm/)
28 | -   LZ4
29 | -   Brotli (for Apache compression)
30 | 
31 | ### Installing
32 | 
33 | Installing PhiloLogic's libraries requires administrator privileges.
34 | The C library depends on `gdbm`, which _must_ be installed first, to compile correctly.
35 | 
36 | Just run the install.sh in the top level directory of the PhiloLogic4 you downloaded to install PhiloLogic and its dependencies:
37 | 
38 | `./install.sh`
39 | 
40 | ### <a name="global-config"></a>Global Configuration
41 | 
42 | The installer creates a file in `/etc/philologic/philologic4.cfg` which contains several important global variables:
43 | 
44 | -   `database_root` defines the filesytem path to the root web directory for your PhiloLogic install such as `/var/www/html/philologic`. Make sure your user or group has full write permissions to that directory.
45 | -   `url_root` defines the URL path to the same root directory for your philologic install, such as http://localhost/philologic/
46 | -   `web_app_dir` defines the location of the PhiloLogic4 www directory. By default, the installer will copy the contents of the PhiloLogic www directory (which contains the web app) to /etc/philologic/web_app/.
47 | 
48 | ### Setting up PhiloLogic Web Application
49 | 
50 | Each new PhiloLogic database you load, containing one or more TEI-XML files, will be served
51 | by a its own dedicated copy of PhiloLogic web application.
52 | By convention, this database and web app reside together in a directory
53 | accessible via an HTTP server configured to run Python CGI scripts.
54 | 
55 | Make sure you configure the `/etc/philologic/philologic4.cfg` appropriately.
56 | 
57 | Configuring your web server is outside of the scope of this document; but the web install
58 | does come with a preconfigured .htaccess file that allows you to run the Web App.
59 | Therefore, you need to make sure your server is configured to allow htaccess files.
60 | 


--------------------------------------------------------------------------------
/docs/specific_installations/redhat_installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Installing PhiloLogic on RedHat (and CentOS)
 3 | ---
 4 | 
 5 | * Install gdbm
 6 | 
 7 |   `sudo yum install gdbm gbdm-devel`
 8 | 
 9 | 
10 | * Run install script
11 | 
12 |   `./install.sh`
13 | 
14 | * Configure Apache
15 |   * Make sure your prefered webspace allows full override for htaccess files: `AllowOverride All`
16 |   * Make sure the correct permissions are set on the folder dedicated to PhiloLogic databases, 
17 |     i.e. write access for the user/group that will be building databases.
18 | 


--------------------------------------------------------------------------------
/docs/specific_installations/ubuntu_installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Installing PhiloLogic4 on Ubuntu
 3 | ---
 4 | 
 5 | -   The following dependencies need to be installed:
 6 | 
 7 |     -   libxml2-dev
 8 |     -   libxslt-dev
 9 |     -   zlib1g-dev
10 |     -   apache2
11 |     -   libgdbm-dev
12 |     -   libgdbm-dev
13 |     -   liblz4-tool
14 |     -   brotli
15 |     -   ripgrep
16 | 
17 |     Run the following command:
18 | 
19 |     `sudo apt-get install libxml2-dev libxslt-dev zlib1g-dev apache2 libgdbm-dev liblz4-tool brotli ripgrep`
20 | 
21 | - Install pip3 (not the version from Ubuntu repos since it breaks pyproject.toml builds). First delete the python3-setuptools Ubuntu package if present: `sudo apt purge python3-setuptools`, then run:
22 |     `wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py`
23 | 
24 | -   Run install script inside the PhiloLogic4 directory
25 | 
26 |     `./install.sh`
27 | 
28 | -   Set-up Apache:
29 |     -   enable mod_rewrite: `sudo a2enmod rewrite`
30 |     -   enable mod_cgi: `sudo a2enmod cgi`
31 |     -   enable brotli: `sudo a2enmod brotli`
32 |     -   Make sure to set `AllowOverride` to `all` for the directory containined your philologic databases in your Apache config
33 | 


--------------------------------------------------------------------------------
/extras/artfl_theme.scss:
--------------------------------------------------------------------------------
  1 | // Custom Bootstrap changes: don't edit
  2 | $popover-max-width: 50%;
  3 | .custom-popover {
  4 |     overflow: auto;
  5 |     text-align: justify !important;
  6 |     max-height: 60%;
  7 | }
  8 | 
  9 | // Theme colors
 10 | $header-color: rgb(245, 219, 157);
 11 | $button-color: rgba(143, 57, 49, .8);
 12 | $button-color-active: rgb(143, 57, 49);
 13 | $link-color: #8f3931;
 14 | $passage-color: rgb(180, 106, 85);
 15 | // Themed elements
 16 | nav.navbar {
 17 |     background-color: $header-color !important;
 18 | }
 19 | 
 20 | $secondary: $button-color;
 21 | .btn-secondary.active {
 22 |     background-color: $button-color-active !important;
 23 | }
 24 | 
 25 | .btn-outline-secondary.active {
 26 |     color: #fff !important;
 27 | }
 28 | 
 29 | $info: $button-color;
 30 | .btn-light {
 31 |     border: solid 1px rgb(206, 212, 218) !important;
 32 | }
 33 | 
 34 | .btn-light.active {
 35 |     background-color: #eee !important;
 36 | }
 37 | 
 38 | .link-color {
 39 |     color: $link-color
 40 | }
 41 | 
 42 | a {
 43 |     color: $link-color !important
 44 | }
 45 | 
 46 | a.btn-secondary {
 47 |     color: #fff !important;
 48 | }
 49 | 
 50 | .number,
 51 | .card-header {
 52 |     background-color: $header-color !important;
 53 |     color: $link-color !important;
 54 | }
 55 | 
 56 | .input-group-text,
 57 | .custom-control-input:checked~.custom-control-label::before,
 58 | .custom-control-input:focus~.custom-control-label::before {
 59 |     color: $link-color !important;
 60 |     background-color: #fff !important;
 61 |     border-color: $link-color !important;
 62 | }
 63 | 
 64 | .metadata-args,
 65 | .remove-metadata,
 66 | .term-groups,
 67 | .close-pill {
 68 |     border-color: $link-color !important;
 69 | }
 70 | 
 71 | .metadata-label,
 72 | .remove-metadata:hover,
 73 | .term-group-word:hover,
 74 | .close-pill:hover {
 75 |     background-color: $button-color !important;
 76 |     color: #fff !important;
 77 | }
 78 | 
 79 | .custom-control-label::after {
 80 |     background-color: $button-color !important;
 81 | }
 82 | 
 83 | .letter {
 84 |     color: $link-color !important;
 85 | }
 86 | 
 87 | .letter:hover,
 88 | #dico-landing-volume .list-group-item:hover {
 89 |     background-color: $button-color !important;
 90 |     color: #fff !important;
 91 | }
 92 | 
 93 | #dico-landing-volume a:hover {
 94 |     color: #fff !important;
 95 | }
 96 | 
 97 | #report-error {
 98 |     color: #fff !important;
 99 | }
100 | 
101 | .custom-select:focus,
102 | .custom-control-input:checked~.custom-control-label::before,
103 | .custom-control-input:focus~.custom-control-label::before,
104 | input[type="text"]:focus {
105 |     box-shadow: 0 0 0 0.05rem $button-color !important;
106 |     border-color: $button-color !important;
107 |     opacity: .5
108 | }
109 | 
110 | .landing-page-btn:focus {
111 |     border-color: $link-color !important;
112 | }
113 | 
114 | #report button:focus {
115 |     mix-blend-mode: hard-light;
116 | }
117 | 
118 | :export {
119 |     color: $link-color
120 | }


--------------------------------------------------------------------------------
/extras/metadata_extractor.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import json
 4 | import os
 5 | import sqlite3
 6 | import sys
 7 | 
 8 | from philologic.runtime.DB import DB
 9 | 
10 | 
11 | object_levels = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5}
12 | 
13 | 
14 | def main(object_level, db_path):
15 |     metadata_fields = {}
16 |     doc_filenames = {}
17 |     database = DB(os.path.join(db_path, "data"))
18 |     cursor = database.dbh.cursor()
19 |     cursor.execute("SELECT philo_id, filename FROM toms WHERE philo_type='doc'")
20 |     for philo_id, filename in cursor:
21 |         doc_id = philo_id.split()[0]
22 |         doc_filenames[doc_id] = filename
23 |     cursor.execute("SELECT * FROM toms WHERE philo_type=?", (object_level,))
24 |     for result in cursor:
25 |         fields = result
26 |         philo_id = "_".join(fields["philo_id"].split()[: object_levels[object_level]])
27 |         metadata_fields[philo_id] = {}
28 |         for field in database.locals["metadata_fields"]:
29 |             metadata_fields[philo_id][field] = result[field] or ""
30 |         doc_id = result["philo_id"].split()[0]
31 |         metadata_fields[philo_id]["filename"] = doc_filenames[doc_id]
32 |     with open("metadata.json", "w") as metadata_file:
33 |         json.dump(metadata_fields, metadata_file)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     object_level = sys.argv[1]
38 |     db_path = sys.argv[2]
39 |     main(object_level, db_path)
40 | 


--------------------------------------------------------------------------------
/extras/python2_to_python3_port.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """This script should be used to convert a PhiloLogic 4.5 loaded database to
 3 | a PhiloLogic 4.5 database. Essentially makes it run under Python3.
 4 | This code assums all the code in the PhiloLogic database cirectory is unmodified.
 5 | All custom code should be handled separately"""
 6 | 
 7 | import sys
 8 | import os
 9 | 
10 | PHILOLOGIC_INSTALL = "/var/lib/philologic4/web_app/"
11 | TWO_TO_THREE_EXEC = "2to3-3.6"
12 | FORMAT_CODE = False
13 | UPDATE_WEB_APP = False
14 | 
15 | 
16 | def convert_config(database_to_convert, config_file):
17 |     """Convert config files"""
18 |     os.system(f"{TWO_TO_THREE_EXEC} --no-diffs -w {database_to_convert}/data/{config_file} > /dev/null 2>&1")
19 |     if FORMAT_CODE is True:
20 |         os.system(f"black -q -l 120 {database_to_convert}/data/{config_file} > /dev/null 2>&1")
21 | 
22 | 
23 | def main():
24 |     """Main Loop"""
25 |     database_to_convert = sys.argv[1]
26 |     convert_config(database_to_convert, "web_config.cfg")
27 |     convert_config(database_to_convert, "db.locals.py")
28 |     with open(os.path.join(database_to_convert, "data/db.locals.py")) as file:
29 |         whole_file = file.read()
30 |     whole_file = whole_file.replace(
31 |         r'''token_regex = "[\\&A-Za-z0-9\x7f-\xff][\\&A-Za-z0-9\x7f-\xff\\_';]*"''', r'''token_regex = "\w+|[&\w;]+"'''
32 |     )
33 |     with open(os.path.join(database_to_convert, "data/db.locals.py"), "w") as output:
34 |         output.write(whole_file)
35 |     # convert_config(database_to_convert, "load_config.py")
36 | 
37 |     os.system(f"cp -f {PHILOLOGIC_INSTALL}/*py {database_to_convert}")
38 |     os.system(f"cp -f {PHILOLOGIC_INSTALL}/reports/*py {database_to_convert}/reports/")
39 |     os.system(f"cp -f {PHILOLOGIC_INSTALL}/scripts/*py {database_to_convert}/scripts/")
40 |     if UPDATE_WEB_APP is True:
41 |         os.system(f"cp -Rf {PHILOLOGIC_INSTALL}/app/* {database_to_convert}/app/")
42 | 
43 |     print(database_to_convert, "converted...")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/extras/rebuild_app.py:
--------------------------------------------------------------------------------
 1 | """Rebuild web app after PhiloLogic database copied to new server / VM / docker env"""
 2 | import sys
 3 | import os
 4 | 
 5 | 
 6 | if __name__ == "__main__":
 7 |     philo_db = sys.argv[1]
 8 |     app_path = f"{philo_db}/app"
 9 |     os.system(f"rm -rf {app_path}")
10 |     os.system(f"cp -R /var/lib/philologic4/web_app/app {philo_db}/")
11 |     os.system(f"chown -R $(whoami) ${app_path}")  # Make sure we have the correct permissions for npm to run
12 |     os.system(f"cd {app_path}; npm run build;")
13 |     print(f"{philo_db} done")
14 | 


--------------------------------------------------------------------------------
/extras/tol_theme.scss:
--------------------------------------------------------------------------------
  1 | // Custom Bootstrap changes: don't edit
  2 | $popover-max-width: 50%;
  3 | .custom-popover {
  4 |     overflow: auto;
  5 |     text-align: justify !important;
  6 |     max-height: 60%;
  7 | }
  8 | 
  9 | // Theme colors
 10 | $header-color: rgb(103, 10, 10);
 11 | $button-color: rgb(103, 10, 10);
 12 | $button-color-active: rgb(256, 256, 256);
 13 | $link-color: rgb(103, 10, 10);
 14 | $passage-color: rgb(180, 106, 85);
 15 | // Themed elements
 16 | nav.navbar {
 17 |     background-color: #fff !important;
 18 | }
 19 | 
 20 | $secondary: $button-color;
 21 | .btn-secondary.active {
 22 |     background-color: $button-color-active !important;
 23 |     color: $link-color !important;
 24 |     border-color: $button-color !important;
 25 | }
 26 | 
 27 | .btn-outline-secondary.active {
 28 |     color: #fff !important;
 29 | }
 30 | 
 31 | $info: $button-color;
 32 | .btn-light.active {
 33 |     background-color: #eee !important;
 34 | }
 35 | 
 36 | .link-color {
 37 |     color: $link-color
 38 | }
 39 | 
 40 | a {
 41 |     color: $link-color !important
 42 | }
 43 | 
 44 | a.btn-secondary {
 45 |     color: #fff !important;
 46 | }
 47 | 
 48 | .number,
 49 | .card-header {
 50 |     background-color: $header-color !important;
 51 |     color: #fff !important;
 52 | }
 53 | 
 54 | .input-group-text,
 55 | .custom-control-input:checked~.custom-control-label::before,
 56 | .custom-control-input:focus~.custom-control-label::before {
 57 |     color: $link-color !important;
 58 |     background-color: #fff !important;
 59 |     border-color: $link-color !important;
 60 | }
 61 | 
 62 | .metadata-args,
 63 | .remove-metadata,
 64 | .term-groups,
 65 | .close-pill {
 66 |     border-color: $link-color !important;
 67 | }
 68 | 
 69 | .metadata-label,
 70 | .remove-metadata:hover,
 71 | .term-group-word:hover,
 72 | .close-pill:hover {
 73 |     background-color: $button-color !important;
 74 |     color: #fff !important;
 75 | }
 76 | 
 77 | .custom-control-label::after {
 78 |     background-color: $button-color !important;
 79 | }
 80 | 
 81 | .letter {
 82 |     color: $link-color !important;
 83 | }
 84 | 
 85 | .letter:hover {
 86 |     background-color: $button-color !important;
 87 |     color: #fff !important;
 88 | }
 89 | 
 90 | #report-error {
 91 |     color: #fff !important;
 92 | }
 93 | 
 94 | .custom-select:focus,
 95 | .custom-control-input:checked~.custom-control-label::before,
 96 | .custom-control-input:focus~.custom-control-label::before,
 97 | input[type="text"]:focus {
 98 |     box-shadow: 0 0 0 0.05rem $button-color !important;
 99 |     border-color: $button-color !important;
100 |     opacity: .5
101 | }
102 | 
103 | .landing-page-btn {
104 |     border-bottom-width: 1px !important;
105 | }
106 | 
107 | .note-ref {
108 |     vertical-align: 0.3em !important;
109 |     font-size: .7em !important;
110 |     font-weight: 700 !important;
111 |     color: #670a0a !important;
112 |     padding: 0 0.2rem !important;
113 |     background-color: #fff !important;
114 | }
115 | 
116 | :export {
117 |     color: $link-color
118 | }


--------------------------------------------------------------------------------
/extras/utilities/extract_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import regex as re
 6 | from lxml import etree
 7 | from philologic.Loader import Loader
 8 | from philologic.Parser import DefaultMetadataXPaths
 9 | 
10 | 
11 | ### USAGE ###
12 | # python extract_metadata.py files
13 | 
14 | 
15 | def pre_parse_whole_file(fn):
16 |     fh = open(fn)
17 |     tree = etree.fromstring(fh.read())
18 |     # Remove namespace
19 |     for el in tree.iter():
20 |         try:
21 |             if el.tag.startswith("{"):
22 |                 el.tag = el.tag.rsplit("}", 1)[-1]
23 |         except AttributeError:  ## el.tag is not a string for some reason
24 |             pass
25 |     return tree
26 | 
27 | 
28 | def sort_by_metadata(filelist, metadata_xpaths, *fields, **options):
29 |     load_metadata = []
30 |     if "reverse" in options:
31 |         reverse = options["reverse"]
32 |     else:
33 |         reverse = False
34 | 
35 |     for fn in filelist:
36 |         data = {"filename": fn}
37 |         tree = pre_parse_whole_file(fn)
38 | 
39 |         for type, xpath, field in metadata_xpaths:
40 |             if type == "doc":
41 |                 if field not in data:
42 |                     attr_pattern_match = re.search(r"@([^\/\[\]]+)$", xpath)
43 |                     if attr_pattern_match:
44 |                         xp_prefix = xpath[: attr_pattern_match.start(0)]
45 |                         attr_name = attr_pattern_match.group(1)
46 |                         elements = tree.findall(xp_prefix)
47 |                         for el in elements:
48 |                             if el is not None and el.get(attr_name, ""):
49 |                                 data[field] = el.get(attr_name, "").encode("utf-8")
50 |                                 break
51 |                     else:
52 |                         el = tree.find(xpath)
53 |                         if el is not None and el.text is not None:
54 |                             data[field] = el.text.encode("utf-8")
55 |         load_metadata.append(data)
56 | 
57 |     def make_sort_key(d):
58 |         key = [d.get(f, "") for f in fields]
59 |         return key
60 | 
61 |     load_metadata.sort(key=make_sort_key, reverse=reverse)
62 |     return load_metadata
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     try:
67 |         from artfl_xpaths import metadata_xpaths
68 |     except:
69 |         metadata_xpaths = DefaultMetadataXPaths
70 |     load_metadata = sort_by_metadata(sys.argv[1:], metadata_xpaths)
71 | 
72 |     for file in load_metadata:
73 |         print("## Metadata found for %s ##" % file["filename"])
74 |         for metadata in file:
75 |             if metadata != "filename":
76 |                 print("%s: %s" % (metadata, file[metadata]))
77 |         print()
78 | 


--------------------------------------------------------------------------------
/extras/utilities/fix_notes.py:
--------------------------------------------------------------------------------
 1 | """Move notes to end of TEI file according to PhiloLogic's spec."""
 2 | 
 3 | from copy import deepcopy
 4 | import sys
 5 | from lxml import etree
 6 | 
 7 | 
 8 | def update_notes(filename):
 9 |     """Add inline notes at the end of the file"""
10 |     with open(filename, "rb") as input_file:
11 |         text = input_file.read()
12 |     parser = etree.XMLParser(remove_blank_text=True)
13 |     root = etree.fromstring(text, parser)
14 |     for el in root.getiterator():
15 |         try:
16 |             if el.tag.startswith("{"):
17 |                 el.tag = el.tag.rsplit("}", 1)[-1]
18 |         except AttributeError:
19 |             pass
20 |     note_div = etree.Element("div", type="notes")
21 |     head = etree.Element("head")
22 |     head.text = "Notes"
23 |     head.tail = "\n"
24 |     note_div.insert(0, head)
25 |     note_div.text = "\n"
26 |     note_count = 1
27 |     notes_skipped = 0
28 |     for el in root.iter("note"):
29 |         inHeader = False
30 |         for ancestor in el.iterancestors():
31 |             if ancestor.tag == "teiHeader":
32 |                 inHeader = True
33 |                 notes_skipped += 1
34 |                 break
35 |         if inHeader:
36 |             continue
37 |         new_note = deepcopy(el)
38 |         for attr in new_note.attrib:
39 |             del new_note.attrib[attr]
40 |         new_note.attrib["id"] = f"{note_count}"
41 |         new_note.tail = "\n"
42 |         note_div.append(new_note)
43 |         el.tag = "ref"
44 |         el.attrib["type"] = "note"
45 |         el.attrib["target"] = f"{note_count}"
46 |         for child in el:
47 |             el.remove(child)
48 |         el.text = ""
49 |         note_count += 1
50 |     if note_count > 1:
51 |         root[-1].append(note_div)
52 |     extension = filename.split(".")[-1]
53 |     new_file = f'{filename.replace(f".{extension}", "")}_fixed_notes.{extension}'
54 |     with open(new_file, "w", encoding="utf8") as output:
55 |         tree = etree.ElementTree(root)
56 |         output.write(etree.tostring(tree, encoding="unicode", pretty_print=True))
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     update_notes(sys.argv[1])
61 | 


--------------------------------------------------------------------------------
/extras/utilities/list_xpath_in_header.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import regex as re
 5 | from lxml import etree
 6 | 
 7 | 
 8 | ### USAGE ###
 9 | # python list_xpaths_in_header.py files
10 | 
11 | 
12 | def pre_parse_header(fn):
13 |     """Parse header"""
14 |     fh = open(fn)
15 |     header = ""
16 |     while True:
17 |         line = fh.readline()
18 |         scan = re.search(r"<teiheader>|<temphead>", line, re.IGNORECASE)
19 |         if scan:
20 |             header = line[scan.start() :]
21 |             break
22 |     while True:
23 |         line = fh.readline()
24 |         scan = re.search(r"</teiheader>|<\/?temphead>", line, re.IGNORECASE)
25 |         if scan:
26 |             header = header + line[: scan.end()]
27 |             break
28 |         else:
29 |             header = header + line
30 |     tree = etree.fromstring(header)
31 |     for el in tree.iter():
32 |         try:
33 |             if el.tag.startswith("{"):
34 |                 el.tag = el.tag.rsplit("}", 1)[-1]
35 |         except AttributeError:  ## el.tag is not a string for some reason
36 |             pass
37 |     return tree
38 | 
39 | 
40 | def retrieve_xpaths(filelist):
41 |     """Retrieve XPATHS"""
42 |     for fn in filelist:
43 |         print("## XPATHS for %s" % fn)
44 |         tree = pre_parse_header(fn)
45 |         root = tree.getroottree()
46 |         for el in tree.iter():
47 |             if el.getchildren() == [] and el.text != None:
48 |                 print(root.getpath(el))
49 |         print()
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     xpaths = retrieve_xpaths(sys.argv[1:])
54 | 


--------------------------------------------------------------------------------
/extras/utilities/update_toms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sqlite3
 4 | import sys
 5 | import regex as re
 6 | from philologic.PostFilters import metadata_frequencies, normalized_metadata_frequencies
 7 | 
 8 | 
 9 | def change_metadata(metadata_field):
10 |     """This is an example of a modification you could make to your metadata field
11 |     Modify at will"""
12 |     updated_metadata = re.sub(".*(\d{4}).*", "\\1", metadata_field)
13 |     return updated_metadata
14 | 
15 | 
16 | def update_function(c, field, db_location):
17 |     query = 'select philo_id, %s from toms where philo_type="doc"' % field
18 |     c.execute(query)
19 |     updated_value = {}
20 |     for i in c:
21 |         philo_id, metadata_field = i
22 |         updated_value[philo_id] = change_metadata(metadata_field)
23 | 
24 |     ## Update SQL table
25 |     for id, new_value in updated_value.items():
26 |         update_query = 'update toms set %s="%s" where philo_id="%s"' % (field, new_value, id)
27 |         c.execute(update_query)
28 |     conn.commit()
29 |     conn.close()
30 | 
31 |     ## Update frequency file
32 |     loader_obj = LoaderObj(db_location, field)
33 |     print(loader_obj.destination, loader_obj.metadata_fields)
34 |     metadata_frequencies(loader_obj)
35 |     normalized_metadata_frequencies(loader_obj)
36 | 
37 | 
38 | def parse_command_line(args):
39 |     if len(args) == 1 or len(args) == 2:
40 |         print("You need two arguments to execute this script")
41 |         print("python update_toms.py db_location field_to_update")
42 |         sys.exit()
43 |     db_location = sys.argv[1]
44 |     field = sys.argv[2]
45 |     return db_location, field
46 | 
47 | 
48 | def connect_to_db(db_location):
49 |     conn = sqlite3.connect(db_location + "/data/toms.db")
50 |     cursor = conn.cursor()
51 |     return conn, cursor
52 | 
53 | 
54 | ## Build a loader class with the attributes needed to update the frequency files
55 | class LoaderObj(object):
56 |     def __init__(self, db_location, field):
57 |         self.destination = db_location + "/data"
58 |         self.metadata_fields = [field]
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     db_location, field = parse_command_line(sys.argv)
63 |     conn, c = connect_to_db(db_location)
64 |     update_function(c, field, db_location)
65 | 


--------------------------------------------------------------------------------
/extras/vf_theme.scss:
--------------------------------------------------------------------------------
  1 | // Custom Bootstrap changes: don't edit
  2 | $popover-max-width: 50%;
  3 | .custom-popover {
  4 |     overflow: auto;
  5 |     text-align: justify !important;
  6 |     max-height: 60%;
  7 | }
  8 | 
  9 | // Theme colors
 10 | $header-color: #fff;
 11 | $button-color: rgba(18, 47, 83, .9);
 12 | $button-color-active: rgb(0, 33, 71);
 13 | $link-color: rgb(0, 33, 71);
 14 | $passage-color: rgba(18, 47, 83, .9);
 15 | // Themed elements
 16 | nav.navbar {
 17 |     background-color: $header-color !important;
 18 | }
 19 | 
 20 | $secondary: $button-color;
 21 | .btn-secondary.active {
 22 |     background-color: $button-color-active !important;
 23 | }
 24 | 
 25 | .btn-outline-secondary.active {
 26 |     color: #fff !important;
 27 | }
 28 | 
 29 | $info: $button-color;
 30 | .btn-light {
 31 |     border: solid 1px rgb(206, 212, 218) !important;
 32 | }
 33 | 
 34 | .btn-light.active {
 35 |     background-color: #eee !important;
 36 | }
 37 | 
 38 | .link-color {
 39 |     color: $link-color
 40 | }
 41 | 
 42 | a {
 43 |     color: $link-color !important
 44 | }
 45 | 
 46 | a.btn-secondary {
 47 |     color: #fff !important;
 48 | }
 49 | 
 50 | .number,
 51 | .card-header {
 52 |     background-color: $link-color !important;
 53 |     color: #fff !important;
 54 | }
 55 | 
 56 | .input-group-text,
 57 | .custom-control-input:checked~.custom-control-label::before,
 58 | .custom-control-input:focus~.custom-control-label::before {
 59 |     color: $link-color !important;
 60 |     background-color: #fff !important;
 61 |     border-color: $link-color !important;
 62 | }
 63 | 
 64 | .metadata-args,
 65 | .remove-metadata,
 66 | .term-groups,
 67 | .close-pill {
 68 |     border-color: $link-color !important;
 69 | }
 70 | 
 71 | .metadata-label,
 72 | .remove-metadata:hover,
 73 | .term-group-word:hover,
 74 | .close-pill:hover {
 75 |     background-color: $button-color !important;
 76 |     color: #fff !important;
 77 | }
 78 | 
 79 | .custom-control-label::after {
 80 |     background-color: $button-color !important;
 81 | }
 82 | 
 83 | .letter {
 84 |     color: $link-color !important;
 85 | }
 86 | 
 87 | .letter:hover {
 88 |     background-color: $button-color !important;
 89 |     color: #fff !important;
 90 | }
 91 | 
 92 | #report-error {
 93 |     color: #fff !important;
 94 | }
 95 | 
 96 | .custom-select:focus,
 97 | .custom-control-input:checked~.custom-control-label::before,
 98 | .custom-control-input:focus~.custom-control-label::before,
 99 | input[type="text"]:focus {
100 |     box-shadow: 0 0 0 0.05rem $button-color !important;
101 |     border-color: $button-color !important;
102 |     opacity: .5
103 | }
104 | 
105 | .landing-page-btn:focus {
106 |     border-color: $link-color !important;
107 | }
108 | 
109 | #report button:focus {
110 |     mix-blend-mode: hard-light;
111 | }
112 | 
113 | :export {
114 |     color: $link-color
115 | }


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | CORE_INSTALL="\n## INSTALLING PHILOLOGIC C CORE ##"
 4 | echo "$CORE_INSTALL"
 5 | cd libphilo/
 6 | make clean
 7 | make
 8 | if [[ $OSTYPE == 'darwin'* ]];
 9 | then
10 |   sudo /usr/bin/install -c db/corpus_search /usr/local/bin/
11 |   sudo /usr/bin/install -c db/pack4 /usr/local/bin/
12 | else
13 |   sudo /usr/bin/install -c db/corpus_search /bin/
14 |   sudo /usr/bin/install -c db/pack4 /bin/
15 | fi
16 | 
17 | cd ..;
18 | PYTHON_INSTALL="\n## INSTALLING PYTHON LIBRARY ##"
19 | echo "$PYTHON_INSTALL"
20 | sudo pip3 install build
21 | cd python;
22 | rm -rf dist/
23 | python3 -m build --sdist
24 | sudo -H pip3 install dist/*gz
25 | sudo mkdir -p /etc/philologic/
26 | 
27 | cd ..;
28 | sudo mkdir -p /var/lib/philologic4/web_app/
29 | sudo rm -rf /var/lib/philologic4/web_app/*
30 | if [ -d www/app/node_modules ]
31 |     then
32 |         sudo rm -rf www/app/node_modules
33 | fi
34 | sudo cp -R www/* /var/lib/philologic4/web_app/
35 | sudo cp www/.htaccess  /var/lib/philologic4/web_app/
36 | 
37 | if [ ! -f /etc/philologic/philologic4.cfg ]
38 |     then
39 |         db_url="# Set the filesytem path to the root web directory for your PhiloLogic install.
40 |         database_root = None
41 |         # /var/www/html/philologic/ is conventional for linux,
42 |         # /Library/WebServer/Documents/philologic for Mac OS.\n"
43 |         echo "$db_url" | sed "s/^ *//g" | sudo tee /etc/philologic/philologic4.cfg > /dev/null
44 |         url_root="# Set the URL path to the same root directory for your philologic install.
45 |         url_root = None
46 |         # http://localhost/philologic/ is appropriate if you don't have a DNS hostname.\n"
47 |         echo "$url_root" | sed "s/^ *//g" | sudo tee -a /etc/philologic/philologic4.cfg > /dev/null
48 |         web_app_dir="## This should be set to the location of the PhiloLogic4 www directory
49 |         web_app_dir = '/var/lib/philologic4/web_app/'"
50 |         echo "$web_app_dir" | sed "s/^ *//g" | sudo tee -a /etc/philologic/philologic4.cfg > /dev/null
51 | else
52 |     echo "\n## WARNING ##"
53 |     echo "/etc/philologic/philologic4.cfg already exists"
54 |     echo "Please delete and rerun the install script to avoid incompatibilities\n"
55 | fi
56 | 


--------------------------------------------------------------------------------
/libphilo/README:
--------------------------------------------------------------------------------
 1 | As of June 2012, PhiloLogic 4 has now entered alpha testing. 
 2 | It should compile and install reasonably well,
 3 | but should not be expected to be feature or documentation complete.
 4 | Note that this repository only contains the library code.
 5 | See the related PhiloLogic4-Templates repo for a "soup to nuts" web app framework.
 6 | 
 7 | C Dependencies: 
 8 | gdbm - installed to your standard INCLUDE path [Mac users see :http://macosx.com/forums/unix-x11/257664-compile-gdbm.html]
 9 |   [Also note that the /usr/local/ directory is often not included in default compiler lookups.  
10 |    If your gdbm lives there, either add it to your search path in the environment or as a compiler flag, as described below.]
11 | 
12 | Python Dependences:
13 | 2.6 and below: ElementTree 1.3alpha: http://effbot.org/zone/elementtree-13-intro.htm
14 | 2.7 and greater: None
15 | 
16 | Installation instructions:
17 | make
18 | sudo make install
19 | 
20 | Due to an absence of a configuration script, 
21 | you can supply non-standard compiler arguments to the make commands.  For example, 
22 | 'make LDFLAGS=-L/usr/local/lib/'.
23 | 
24 | After installation, philologic will have installed:
25 | low-level search executables to /bin/
26 | PhiloLogic python library to your standard python path, according to distutils.
27 | 
28 | To get started loading documents, please install the PhiloLogic4-Templates package.
29 | 
30 | --
31 | Richard Whaling
32 | ARTFL Project
33 | July 2012


--------------------------------------------------------------------------------
/libphilo/args.h:
--------------------------------------------------------------------------------
 1 | // $Id: args.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | #ifndef C_H
21 |   #include "c.h"
22 | #endif
23 | 
24 | #define BAD_ARGZ                1
25 | 
26 | #define BAD_ENGINE_ARGZ         "badly defined output (-E:) arguments"
27 | #define BAD_SEARCH_ARGZ         "badly defined search (-S:) arguments"
28 | #define BAD_CORPUS_ARGZ         "badly defined corpus (-C:) arguments"
29 | #define BAD_OUTPUT_ARGZ         "badly defined output (-P:) arguments"
30 | #define BAD_PLUGIN_ARGZ         "badly defined plugin (-D:) argument"
31 | 
32 | 
33 | extern Z32 process_command_argz(); 
34 | extern Z32 process_command_argz_backwardcompat(); 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/libphilo/blockmap.h:
--------------------------------------------------------------------------------
 1 | // $Id: blockmap.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | #ifdef BLOCKMAP_H
21 |   #error "blockmap.h multiply included"
22 | #else
23 | 
24 | 
25 |   /* 
26 |      block map is an object that contains a pointer to a Word
27 |      object and a counter pointing to the current position on 
28 |      the object map there; by going along this list of blockmap
29 |      objects ("Blockmap") and re-sorting it in the process, we
30 |      conduct the search. 
31 |    */
32 | 
33 |   #define BLOCKMAP_H
34 | 
35 |   #ifndef C_H
36 |     #include "c.h"
37 |   #endif
38 | 
39 |   #ifndef WORD_H
40 |     #include "word.h"
41 |   #endif
42 | 
43 | 
44 |   #define BLOCKMAP_BUILT            0
45 |   #define BLOCKMAP_MALLOC_ERROR     1
46 |   #define BLOCKMAP_BUILD_ERROR      2
47 | 
48 | 
49 |   typedef struct blockMap_st *blockMap, blockMap_;
50 | 
51 |   struct blockMap_st
52 |   {
53 |     Word    w;             /* pointer to word object */
54 |     N32     n;             /* map counter in the object above */
55 |     N32     bn;            /* batch (or "level") number */
56 |   };
57 | 
58 | /*
59 |   extern Z32 build_blockMap( Search, Z32 );
60 |  */
61 | 
62 |   extern void blockmap_sort (); 
63 | 
64 | #endif /* #ifdef BLOCKMAP_H */
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/libphilo/c.h:
--------------------------------------------------------------------------------
 1 | // $Id: c.h,v 2.12 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef C_H
20 |   #error "c.h multiply included"
21 | #else
22 |   #define C_H
23 | #include <stdint.h>
24 | 
25 | typedef uint8_t                 N1, N2, N3, N4, N5, N6, N7, N8;
26 | typedef char                   Z8;
27 | typedef uint16_t                N16;
28 |   typedef int16_t                  Z16;
29 |   typedef uint32_t                 N24, N32, N;
30 |   typedef int32_t                   Z24, Z32;
31 |   typedef int                           Z;
32 |   typedef uint8_t                  *String;
33 |   typedef uint64_t            N64;
34 | 
35 |   /*  Make sure we can define a function that is shadowed by a macro:
36 |       we use this by defining "foo FUNCTION(args)", thereby suppressing
37 |       the macro call interpretation of foo(args).
38 |   */
39 |   #define FUNCTION
40 |   
41 | #endif
42 | 
43 | #define HITLIST_LIMIT_EXCEEDED          111
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/libphilo/db/Makefile:
--------------------------------------------------------------------------------
 1 | # $Id: Makefile.in,v 1.2 2004/05/28 19:22:11 o Exp $
 2 | CC= gcc
 3 | PH_CFLAGS= -I.. -I../../search-engine 
 4 | CFLAGS=
 5 | CPPFLAGS=
 6 | LDFLAGS= 
 7 | PH_LDMODULEFLAGS=-shared
 8 | PH_FPIC=-fPIC
 9 | all: unpack.o db.o bitsvector.o pack4 parsedb mergewords corpus_search
10 | 
11 | corpus_search: corpus_search.c db.o unpack.o bitsvector.o
12 | 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o corpus_search corpus_search.c db.o unpack.o bitsvector.o -lgdbm
13 | 
14 | mergewords: mergewords.c db.o unpack.o bitsvector.o
15 | 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o mergewords mergewords.c db.o unpack.o bitsvector.o -lgdbm
16 | 
17 | parsedb: parsedb.c
18 | 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o parsedb parsedb.c db.c unpack.c bitsvector.c -lgdbm
19 | 
20 | pack4: pack.c db.c 
21 | 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o pack4 pack.c db.c -lgdbm
22 | 
23 | validate-index: validate-index.c unpack.o bitsvector.o getresource.o
24 | 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o validate-index validate-index.c unpack.o bitsvector.o getresource.o -lgdbm
25 | 
26 | clean: 
27 | 	rm -f *.o *.lo libunpack_e.bundle *~ validate-index pack pack4 mergewords parsedb


--------------------------------------------------------------------------------
/libphilo/db/bitsvector.c:
--------------------------------------------------------------------------------
 1 | // $Id: bitsvector.c,v 2.10 2004/05/28 19:22:04 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #include <stdio.h>
20 | #include <stdlib.h>
21 | #include "bitsvector.h"
22 | 
23 | bitsvector *bitsvectorNew(N8 *v)
24 |   { bitsvector *r = malloc(sizeof(bitsvector));
25 | 
26 |     r->v = v;
27 |     r->o = 0;
28 |     r->s = 0;
29 |     r->b = 0;
30 |     return r;
31 |   }
32 | 
33 | void bitsvectorOld(bitsvector *f)
34 |   { free(f->v);
35 |     free(f);
36 |   }
37 | 
38 | 
39 | 
40 | /*
41 |    N24 bitsvectorGet FUNCTION(Bitsvector f, N5 n) {return bitsvectorGet(f, n);}
42 | */
43 | 
44 | N64 bitsvectorGet (bitsvector *f, N8 n)
45 |   {
46 |     N64 ret = 0; 
47 | 
48 |     N64 buffer = 0; 
49 |     N64 mask = 1; 
50 | 
51 |     N32 i; 
52 | 
53 |     N32 o_shift = 0; 
54 | 
55 |     if ( n > 64 ) 
56 |       {
57 | 	fprintf (stderr, "attempted bitsvectorGet on >64 bit integer!\n");
58 | 	fprintf (stderr, "whoa! that's a big-ass integer!\n");
59 | 
60 | 	exit (1);
61 |       }
62 | 
63 |     o_shift = ( n + f->s ) / 8; 
64 | 
65 |     ret = f->v[f->o]; 
66 | 
67 |     for ( i = 0; i < o_shift; i++ )
68 |       {
69 | 	buffer = f->v[f->o + i + 1]; 
70 | 	ret |= (buffer << ( 8 * (i + 1))); 
71 |       }
72 | 
73 |     ret >>= f->s; 
74 | 
75 |     mask <<= n; 
76 |     mask--;
77 | 
78 |     ret &= mask;
79 | 
80 |     f->o += o_shift; 
81 |     f->s = ( f->s + n ) % 8; 
82 | 
83 |     return ret; 
84 | 
85 |   }
86 | 


--------------------------------------------------------------------------------
/libphilo/db/bitsvector.h:
--------------------------------------------------------------------------------
 1 | // $Id: bitsvector.h,v 2.10 2004/05/28 19:22:04 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef BITSVECTOR_H
20 |   #error "bitsvector.h multiply included"
21 | #else
22 |   #define BITSFILE_H
23 | 
24 |   #ifndef C_H
25 |     #include "../c.h"
26 |   #endif
27 | 
28 | 
29 | 
30 |   struct Bitsvector
31 |     { N8 *v;
32 |       N32 o;
33 |       N16 s;
34 |       N32 b;
35 |     };
36 | 
37 | typedef struct Bitsvector bitsvector;
38 | 
39 | bitsvector *bitsvectorNew(N8 *v);
40 | 
41 | void bitsvectorOld(bitsvector *f);
42 | 
43 | N64 bitsvectorGet (bitsvector *f, N8 n);
44 | 
45 |   #define bitsvectorTell(x)	((((x)->o) << 3) + (x)->s)
46 | 
47 |   #define bitsfileSeek(x, n)	\
48 |     begin (x)->o = ((n) + (x)->o) >> 3; \
49 |       (x)->s = 0, (Void)bitsvectorGet(x, n & 7); end
50 | 
51 | 
52 |   #define bitsvectorGet24(x, n) \
53 |    ( (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8, \
54 |       (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8, \
55 |        (x)->s < (n) && ( (x)->b >>= 8, (x)->b |= ((x)->v)[((x)->o)++] << 24, (x)->s += 8) ) ), \
56 |        (x)->s -= (n), ((x)->b >> (32 - (x)->s - (n))) & (1 << (n)) - 1 )
57 | 
58 |   #define bitsvectorGetBoolean(f)	bitsvectorGet((f), 1)
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/libphilo/db/db.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INC_DB_H
 2 | #define _INC_DB_H
 3 | 
 4 | #include <gdbm.h>
 5 | #include <stdio.h>
 6 | #include <stdint.h>
 7 | 
 8 | struct philo_dbspec
 9 |        {
10 |            int fields;
11 |            int type_length;
12 |            int block_size;
13 |            int freq1_length;
14 |            int freq2_length;
15 |            int offset_length;
16 |            int *negatives;
17 |            int *dependencies;
18 |            int *bitlengths;
19 |            int bitwidth;
20 |            int hits_per_block;
21 |            int uncompressed_hit_size;
22 |        };
23 | 
24 | typedef struct philo_dbspec dbspec;
25 | 
26 | dbspec *new_dbspec(int fields, 
27 |            		   int type_length,
28 |            	       int block_size,
29 |                    int freq1_length,
30 |                    int freq2_length,
31 |                    int offset_length,
32 |                    int *negatives,
33 |                    int *dependencies,
34 |                    int *bitlengths);
35 | 
36 | int delete_dbspec(dbspec* dbs_ptr);
37 | 
38 | dbspec *init_dbspec_file(FILE *dbspec);
39 | 
40 | struct philo_dbh
41 | 	{
42 | 		   GDBM_FILE hash_file;
43 | 		   FILE *block_file;
44 | 		   dbspec *dbspec;
45 | 	};
46 | 	
47 | typedef struct philo_dbh dbh;
48 | 
49 | dbh *new_dbh(char *gdbm_f, char *index_f, dbspec *dbs);
50 | dbh *init_dbh_folder(char *db_path);
51 | int delete_dbh(dbh *dbh_ptr);
52 | int dbh_info(dbh *db);
53 | #endif 
54 | 


--------------------------------------------------------------------------------
/libphilo/db/dbspecs.h:
--------------------------------------------------------------------------------
 1 | // $Id: dbspecs.H,v 2.10 2004/05/28 19:22:02 o Exp $
 2 | /*
 3 |  * Database-specific constants
 4 |  */
 5 | 
 6 | #define  FIELDS 9
 7 | 
 8 | #define  BLK_SIZE       2048
 9 | #define  TYPE_LENGTH    1
10 | #define  FREQ1_LENGTH   4
11 | 
12 | #define  NEGATIVES	{0,1,1,1,1,1,0,0,0}
13 | #define  DEPENDENCIES	{-1,0,1,2,3,4,5,0,7}
14 | 
15 | 
16 | #define BITLENGTHS      {1,6,1,1,7,5,8,20,10}
17 | #define  FREQ2_LENGTH    13
18 | #define  OFFST_LENGTH    20
19 | 


--------------------------------------------------------------------------------
/libphilo/db/dbspecs2.h:
--------------------------------------------------------------------------------
 1 | // $Id: dbspecs.H,v 2.10 2004/05/28 19:22:02 o Exp $
 2 | /*
 3 |  * Database-specific constants
 4 |  */
 5 | 
 6 | #define  FIELDS 9
 7 | 
 8 | #define  BLK_SIZE       2048
 9 | #define  TYPE_LENGTH    1
10 | #define  FREQ1_LENGTH   4
11 | 
12 | #define  NEGATIVES      {0,1,1,1,1,1,0,0,0}
13 | #define  DEPENDENCIES   {-1,0,1,2,3,4,5,0,7}
14 | 
15 | 
16 | #define BITLENGTHS      {13,12,11,9,13,12,14,24,14}
17 | #define  FREQ2_LENGTH    25
18 | #define  OFFST_LENGTH    33
19 | 
20 | 


--------------------------------------------------------------------------------
/libphilo/db/dbspecs4.h:
--------------------------------------------------------------------------------
 1 | #define FIELDS 9
 2 | #define TYPE_LENGTH 1
 3 | #define BLK_SIZE 2048
 4 | #define FREQ1_LENGTH 4
 5 | #define FREQ2_LENGTH 25
 6 | #define OFFST_LENGTH 33
 7 | #define NEGATIVES {0,1,1,1,1,1,0,0,0}
 8 | #define DEPENDENCIES {-1,0,1,2,3,4,5,0,7}
 9 | #define BITLENGTHS {13,12,11,9,13,12,14,24,14}
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/libphilo/db/pack.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.pack</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/libphilo/db/pack.dSYM/Contents/Resources/DWARF/pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/libphilo/db/pack.dSYM/Contents/Resources/DWARF/pack


--------------------------------------------------------------------------------
/libphilo/db/pack.h:
--------------------------------------------------------------------------------
 1 | #include "db.h"
 2 | #include "gdbm.h"
 3 | #include "../c.h"
 4 | 
 5 | #define PHILO_INDEX_CUTOFF 10
 6 | #define PHILO_BLOCK_FULL 1
 7 | 
 8 | struct hitbuffer {
 9 |   dbh *db;
10 |   Z32 *dir;
11 |   Z32 *blk;
12 |   Z8 type;
13 |   N64 freq;
14 |   N64 offset;
15 |   Z8 in_block;
16 |   Z8 word[512];
17 |   N64 dir_length;
18 |   N64 dir_malloced;
19 |   N64 blk_length;
20 |   N64 blk_malloced;
21 | };
22 | 
23 | typedef struct hitbuffer hitbuffer;
24 | 
25 | hitbuffer *new_hb(dbspec *dbs);
26 | int delete_hb(hitbuffer *hb);
27 | int hitbuffer_init(hitbuffer *hb, Z8 *word);
28 | int hitbuffer_inc(hitbuffer *hb, Z32 *hit);
29 | int hitbuffer_finish(hitbuffer *hb);
30 | 


--------------------------------------------------------------------------------
/libphilo/db/parsedb.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include "../plugin/hitcon.h"
 5 | #include "db.h"
 6 | #include "unpack.h"
 7 | 
 8 | int parsedbspecs(FILE *f) {
 9 |     int fields;
10 |     int res;
11 |     if (res = fscanf(f,"#define FIELDS %d", &fields)) {
12 |         printf("%d fields.\n", fields);
13 |     }
14 |     else {
15 |         printf("Couldn't get fields.\n");
16 |         return 1;
17 |     }
18 |     return 0;
19 | }
20 | 
21 | int main(int argc, char **argv) {
22 | 
23 |     char buffer[256];
24 |     int form_ptr = 0;
25 |     dbspec *dbs;
26 |     dbh *db;
27 |     char word[256];
28 | 
29 | 	db = init_dbh_folder(argv[1]);
30 | 
31 |     int lu_type;
32 |     int lu_freq;
33 |     uint64_t lu_offset;
34 |     int lu_blocks;
35 |     int32_t *hits;
36 | 
37 |     while (fgets(buffer,256,stdin)) {
38 |    		int i = 0;
39 |    		int j = 0;
40 |    		lu_type = 0;
41 |    		lu_freq = 0;
42 |    		lu_offset = 0;
43 |    		lu_blocks = 0;
44 |    		sscanf(buffer,"%s256",word);
45 |    		fprintf(stderr,"looking up %s : ",word);
46 |  		word_lookup(db,word);
47 | 		hits = hit_lookup(db,word,&lu_type,&lu_freq,&lu_blocks,&lu_offset);
48 | 		fprintf(stderr,"%d\n", lu_freq);
49 | 		if (lu_type == 0) {
50 | 			for (i = 0; i < (db->dbspec->fields * lu_freq); i++) {
51 | 				fprintf(stdout,"%d ",hits[i]);
52 | 			}
53 | 			fprintf(stdout,"\n");
54 | 	   	}
55 | 	   	else {
56 | 	   		fprintf(stderr, "%d blocks:\n", lu_blocks);
57 | 			int hit_offset = 0;
58 | 			int hit_offset_2;
59 | 			int32_t *temp_hit = malloc(sizeof(int32_t) * db->dbspec->fields);
60 | 			int32_t *block_hits;
61 | 			int block_count;
62 | 			int block_number = 0;
63 | 			for (i = 0; i < (db->dbspec->fields * lu_blocks); i++) {
64 | 				hit_offset = (i % db->dbspec->fields);
65 | 				temp_hit[hit_offset] = hits[i];
66 | 				if ((hit_offset == 8)) {
67 | 					fprintf(stdout,"\n");
68 | 					block_number++;
69 | 					block_hits = hit_gethits(db,lu_type,temp_hit,lu_offset,&block_count);
70 | 					hit_offset_2 = 0;
71 | 					for (j = 0; j < (db->dbspec->fields) * block_count; j++) {
72 | 						hit_offset_2 = (j % db->dbspec->fields);
73 | 						if (hit_offset_2 == 0) {
74 | 							fprintf(stdout,"\n");
75 | 						}
76 | 						fprintf(stdout,"%d ",block_hits[j]);
77 | 					}
78 | 					fprintf(stdout,"\n[%d hits in block %d]\n",block_count, block_number);
79 | 					lu_offset += db->dbspec->block_size;
80 | 				}
81 | //				fprintf(stdout,"%d ",hits[i]);
82 | 			}
83 | 			fprintf(stdout,"\n");	   	
84 | 	   	
85 | 	   	
86 | 	   	}
87 |     }
88 |     return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/libphilo/db/unpack.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INC_UNPACK_H
 2 | #define _INC_UNPACK_H
 3 | 
 4 | #include "db.h"
 5 | #include "bitsvector.h"
 6 | #include <stdint.h>
 7 | 
 8 | int word_lookup(dbh *db, Z8 *keyword);
 9 | 
10 | Z32 *hit_lookup(dbh *db, Z8 *keyword, N32 *type_num, N32 *freq, N32 *blkcount, N64 *offset);
11 | Z32 *unpack(dbh *db, bitsvector *v, N32 count);
12 | Z32 *hit_gethits(dbh *db, N32 type, Z32 *first, N64 offset, N32 *blockcount);
13 | #endif
14 | 


--------------------------------------------------------------------------------
/libphilo/gmap.c:
--------------------------------------------------------------------------------
  1 | // $Id: gmap.c,v 2.11 2004/05/28 19:22:06 o Exp $
  2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
  3 | // Copyright (C) 2004 University of Chicago
  4 | // 
  5 | // This program is free software; you can redistribute it and/or modify
  6 | // it under the terms of the Affero General Public License as published by
  7 | // Affero, Inc.; either version 1 of the License, or (at your option)
  8 | // any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // Affero General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the Affero General Public License
 16 | // along with this program; if not, write to Affero, Inc.,
 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
 18 | 
 19 | #include <stdio.h>
 20 | #include <stdlib.h>
 21 | #include "gmap.h"
 22 | 
 23 | Z32 *gm_get_eod ( Gmap gm )
 24 | {
 25 | 
 26 |   if ( gm->gm_eod > gm->gm_l - 1 )
 27 |     {
 28 |       gm->gm_h = (Z32 *)realloc(gm->gm_h, 2 * gm->gm_l*sizeof(Z32)*gm->gm_f); 
 29 | 
 30 |       if ( gm->gm_h == NULL )
 31 | 	{
 32 | 	  gm->gm_e = GMAP_MALLOCFAIL;
 33 | 	  gm->gm_l = 0;
 34 | 	  return NULL; 
 35 | 	}
 36 | 
 37 |       gm->gm_l *= 2; 
 38 | 
 39 |     }
 40 | 
 41 |     return ( gm->gm_h + gm->gm_eod * gm->gm_f );
 42 | }
 43 | 
 44 | Z32 gm_set_eod ( Gmap gm, Z32 eod )
 45 | {
 46 |   return gm->gm_eod = eod; 
 47 | }
 48 | 
 49 | Z32 gm_inc_eod ( Gmap gm )
 50 | {
 51 |   if ( gm->gm_eod > gm->gm_l - 1 )
 52 |     return 0; 
 53 | 
 54 |   gm->gm_eod++;
 55 |   return gm->gm_eod; 
 56 | }
 57 | 
 58 | Z32 *gm_get_cur_pos ( Gmap gm )
 59 | {
 60 |   return ( gm->gm_h + gm->gm_c * gm->gm_f );
 61 | }
 62 | 
 63 | Z32 *gm_get_pos ( Gmap gm, N pos )
 64 | {
 65 |   return ( gm->gm_h + pos * gm->gm_f );
 66 | }
 67 | 
 68 | Z32 gm_inc_pos ( Gmap gm )
 69 | {
 70 |   if ( gm->gm_c >= gm->gm_eod - 1 )
 71 |     return 0; 
 72 | 
 73 |   gm->gm_c++; return 1; 
 74 | }
 75 | 
 76 | Z32 gm_set_pos ( Gmap gm, N32 pos )
 77 | {
 78 |   return gm->gm_c = pos; 
 79 | }
 80 | 
 81 | Gmap new_Gmap ( N32 initlen, N32 factor )
 82 | {
 83 |   Gmap g = (Gmap) malloc (sizeof(gmap));
 84 | 
 85 |   if ( g == NULL )
 86 |     return NULL;
 87 | 
 88 |   g->gm_f = factor;
 89 |   g->gm_c = g->gm_eod = 0;
 90 |   g->gm_e = GMAP_OK;
 91 | 
 92 |   if ( initlen ) 
 93 |     {
 94 |       g->gm_h = (Z32 *) malloc (initlen * sizeof(Z32) * factor); 
 95 | 
 96 |       if ( g->gm_h == NULL )
 97 | 	{
 98 | 	  g->gm_e = GMAP_MALLOCFAIL;
 99 | 	  g->gm_l = 0;
100 | 	  return g;
101 | 	}
102 |     }
103 | 
104 |   g->gm_l = initlen;
105 |   return g; 
106 | }
107 | 
108 | 
109 | void old_Gmap ( Gmap m )
110 | {
111 |   /* "old" is the opposite of "new" */
112 | 
113 |   free (m->gm_h); 
114 |   free (m); 
115 | 
116 | }
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/libphilo/gmap.h:
--------------------------------------------------------------------------------
 1 | // $Id: gmap.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifndef C_H
20 |   #include "c.h"
21 | #endif
22 | 
23 | #ifdef GMAP_H
24 |   #error "gmap.h multiply included"
25 | #else
26 |   #define GMAP_H
27 | 
28 |   #define GMAP_OK         0
29 |   #define GMAP_TOOMANY    1
30 |   #define GMAP_MALLOCFAIL 2
31 | 
32 |   typedef struct gmap  *Gmap, gmap; 
33 | 
34 |   struct gmap
35 |   {
36 |     N32  gm_f;  /* factor -- i.e., how many integers/hit */
37 |     Z32 *gm_h;  /* hits */
38 |     N32  gm_c;  /* counter, i.e., the current location */
39 |     N32  gm_l;  /* limit, i.e. malloc-ed size of gm_h */
40 |     N32  gm_eod;/* "end of data", i.e., the current length */
41 |     Z32  gm_e;  /* error condition */
42 |   };
43 |  
44 |   extern Gmap new_Gmap ( N32,N32 );
45 |   extern void old_Gmap ( Gmap );
46 |   extern Z32 *gm_get_eod ( Gmap );
47 |   extern Z32  gm_set_eod ( Gmap, Z32 ); 
48 |   extern Z32  gm_inc_eod ( Gmap );
49 |   extern Z32 *gm_get_cur_pos ( Gmap );
50 |   extern Z32 *gm_get_pos ( Gmap, N ); 
51 |   extern Z32  gm_inc_pos ( Gmap ); 
52 |   extern Z32  gm_set_pos ( Gmap, N32 );
53 | 
54 | #endif
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/libphilo/level.h:
--------------------------------------------------------------------------------
 1 | // $Id: level.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef BATCH_H
20 |   #error "level.h multiply included"
21 | #else
22 | 
23 |   #define BATCH_H
24 | 
25 | #ifndef C_H
26 |   #include "c.h"
27 | #endif
28 | 
29 | #ifndef WORD_H
30 |   #include "word.h"
31 | #endif
32 | 
33 | #ifndef BLOCKMAP_H
34 |   #include "blockmap.h"
35 | #endif
36 | 
37 | #ifndef GMAP_H
38 |   #include "gmap.h"
39 | #endif
40 | 
41 | #define MAXBATCHES  10
42 | 
43 | #define BATCH_PROCESSED           0
44 | #define BATCH_PROCESSED_LAST      1
45 | #define BATCH_EMPTY               2
46 | #define BATCH_PROCESSING_ERROR    4
47 | 
48 | 
49 | typedef struct Batch *Batch, Batch_;
50 | 
51 | struct Batch
52 | {
53 |   N32      howmany;  /* number of words in the batch */
54 |   N32      total;    /* total frequency of the words in the batch */
55 |   N32      number;   /* "real" number of the batch, used in phrase searches */
56 |   N32      malloced; /* currently malloc-ed word list */
57 | 
58 |   N32      not_op;   /* boolean 'NOT' operator; */
59 | 
60 |   Word     w_list;   /* list of Word objects */
61 | 
62 |   hitcmp*  hit_cmp;
63 | 
64 |   blockMap blockmap; /* Block Map */
65 |   N32      blockmap_l;
66 |   N32      blkmapctr; 
67 | 
68 |   Gmap     map; 
69 |   Gmap     res;
70 |   Gmap     stored;
71 | };
72 | 
73 | #ifndef SEARCH_H
74 |   #include "search.h"
75 | #endif
76 | 
77 | void init_batchObject ( Batch b, N32 n );
78 | Z32 process_input ( Search s, FILE *f );
79 | Z32 create_batches ( Search s, FILE *f );
80 | Z32 process_batch ( Search s, FILE *f, N32 bn );
81 | void sort_batches ( Batch b, N32 n );
82 | int batch_sort_function ( const void *v0, const void *v1 );
83 | void rearrange_batches ( Search s );
84 | int delete_batch(Batch b);
85 | 
86 | #endif /* #ifdef BATCH_H */
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/libphilo/log.h:
--------------------------------------------------------------------------------
 1 | // $Id: log.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef LOG_H
20 |   #error "log.h multiply included"
21 | #else
22 |   #define LOG_H
23 | 
24 |   #define L_QUIET   0
25 |   #define L_ERROR   1
26 |   #define L_INFO    2
27 | 
28 |   extern void s_log (Z32, Z32, char *, Z8 *);
29 | 
30 |   #define s_log(state,level,format,message) { \
31 |   if (state == level) \
32 |       fprintf (stderr, "%s\n", (char *)message); \
33 |   }
34 |   
35 |   #define s_logf(state,level,format,message) {  \
36 |   if (state == level) \
37 |   	fprintf (stderr, format,message); \
38 |   }
39 | #endif
40 | 


--------------------------------------------------------------------------------
/libphilo/out.c:
--------------------------------------------------------------------------------
 1 | // $Id: out.c,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | #include <stdio.h>
21 | #include "c.h"
22 | #include "search.h"
23 | #include "out.h"
24 | 
25 | Z32 dump_hits_out ( Search s, N32 level, Gmap m )
26 | {
27 |   N32 n = m->gm_eod;
28 |   N32 i; 
29 |   s_logf ( s->debug, L_INFO, "dumping out results; (%d hits on map)", n );
30 |   s_logf ( s->debug, L_INFO, "map position set to %d;", m->gm_c ); 
31 |   if ( s->hit_def->output == HIT_OUT_ASCII ) {
32 |     s_log ( s->debug, L_INFO, NULL, (Z8 *)"(output set to ASCII)" );
33 |   }
34 |   for ( i = 0; i < n; i++ ) {
35 |     if ( s->depth_r ) {
36 | 	(void) hit_out ( gm_get_pos(m, i), s->hit_def, level, s->depth_r ); 
37 |     }
38 |     else {
39 | 	(void) hit_out ( gm_get_pos(m, i), s->hit_def, level, s->depth ); 
40 |     }
41 |     s->n_printed++; 
42 |     if ( ! ( s->n_printed % 100 ) ) {
43 | 	fflush( stdout );
44 |     }
45 |     else if ( s->n_printed == s->soft_limit ) {
46 | 	  s->batch_limit = DEFAULT_BATCH_LIMIT;
47 | 	  fflush( stdout );
48 |     }                                                                             
49 |     if ( s->n_printed == s->print_limit ) {
50 |       fflush( stdout );
51 |       s->exitcode = 111; 
52 |       return SEARCH_PRINT_LIMIT_REACHED;
53 |     }
54 |   }
55 |   fflush( stdout );
56 |   return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/libphilo/out.h:
--------------------------------------------------------------------------------
 1 | // $Id: out.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | #ifndef C_H
21 |   #include "c.h"
22 | #endif
23 | 
24 | extern Z32 dump_hits_out ( Search, N32, Gmap ); 
25 | 


--------------------------------------------------------------------------------
/libphilo/plugin/Makefile:
--------------------------------------------------------------------------------
 1 | # $Id: Makefile.in,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | CC= gcc
 3 | PH_CFLAGS = -O3 -I..
 4 | CFLAGS= $(PH_CFLAGS)
 5 | 
 6 | all: libindex.a 
 7 | 
 8 | libindex.a: hitdef.o hitcmp_sent.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o
 9 | 	ar ru $@ hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o hitcmp_sent.o
10 | 	ranlib $@
11 | 
12 | hitdef.o: 	hitdef.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
13 | #	$(CC)    -c -o hitdef.o hitdef.c
14 | 
15 | hitcmp_cooc.o: 	hitcmp_cooc.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
16 | #	$(CC)    -c -o hitcmp_cooc.o hitcmp_cooc.c
17 | 
18 | hitcmp_phrase.o: 	hitcmp_phrase.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
19 | 
20 | hitcmp_proxy.o: 	hitcmp_proxy.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
21 | 
22 | hitcmp_sent.o: hitcmp_sent.c hitcmp_sent.h hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
23 | 
24 | hitout.o: 	hitout.c hitcon.h hitout.h hitdef.h hitman.h hitcmp.h
25 | #	$(CC)    -c -o hitout.o hitout.c
26 | 
27 | hitman.o: 	hitman.c hitcon.h hitman.h hitdef.h hitcmp.h
28 | #	$(CC)    -c -o hitman.o hitman.c
29 | 
30 | hitcrp.o: 	hitcrp.c hitcon.h hitdef.h hitcmp.h
31 | #	$(CC)    -c -o hitcrp.o hitcrp.c
32 | 
33 | clean:
34 | 	rm -f *.o *~ *.a core
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/libphilo/plugin/Makefile.in:
--------------------------------------------------------------------------------
 1 | # $Id: Makefile.in,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | CC= @CC@
 3 | PH_CFLAGS = -I..
 4 | CFLAGS= @CFLAGS@ $(PH_CFLAGS)
 5 | 
 6 | all: libindex.a 
 7 | 
 8 | libindex.a: hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o
 9 | 	ar ru $@ hitdef.o hitcmp_cooc.o hitcmp_phrase.o hitcmp_proxy.o hitout.o hitman.o hitcrp.o method.o plugin.o
10 | 	ranlib $@
11 | 
12 | hitdef.o: 	hitdef.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
13 | #	$(CC)    -c -o hitdef.o hitdef.c
14 | 
15 | hitcmp_cooc.o: 	hitcmp_cooc.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
16 | #	$(CC)    -c -o hitcmp_cooc.o hitcmp_cooc.c
17 | 
18 | hitcmp_phrase.o: 	hitcmp_phrase.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
19 | 
20 | hitcmp_proxy.o: 	hitcmp_proxy.c hitcon.h hitcmp.h hitdef.h hitman.h hitout.h
21 | 
22 | hitout.o: 	hitout.c hitcon.h hitout.h hitdef.h hitman.h hitcmp.h
23 | #	$(CC)    -c -o hitout.o hitout.c
24 | 
25 | hitman.o: 	hitman.c hitcon.h hitman.h hitdef.h hitcmp.h
26 | #	$(CC)    -c -o hitman.o hitman.c
27 | 
28 | hitcrp.o: 	hitcrp.c hitcon.h hitdef.h hitcmp.h
29 | #	$(CC)    -c -o hitcrp.o hitcrp.c
30 | 
31 | clean:
32 | 	rm -f *.o *~ *.a core
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/libphilo/plugin/dbplugins.h:
--------------------------------------------------------------------------------
 1 | // $Id: dbplugins.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #include "plugin.h"
20 | 
21 | extern struct
22 | {
23 |   Z8       *dbp_tag;
24 |   dbPlugin *dbp;
25 | }
26 | dbPlugins[];
27 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hit.h:
--------------------------------------------------------------------------------
 1 | // $Id: hit.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HIT_H
20 |   #error "hit.h multiply included"
21 | #else
22 | 
23 |   #define HIT_H
24 | 
25 | 
26 |   #ifndef HITCON_H
27 |     #include "hitcon.h"
28 |   #endif
29 | 
30 |   #ifndef HITCMP_H
31 |     #include "hitcmp.h"
32 | 
33 |   #endif
34 | 
35 |   #ifndef HITDEF_H
36 |     #include "hitdef.h"
37 |   #endif
38 | 
39 |   #ifndef HITMAN_H
40 |     #include "hitman.h"
41 |   #endif
42 | 
43 |   #ifndef HITOUT_H
44 |     #include "hitout.h"
45 |   #endif
46 | 
47 |   #ifndef METHOD_H
48 |     #include "method.h"
49 |   #endif
50 | 
51 |   #ifndef PLUGIN_H
52 |     #include "plugin.h"
53 |   #endif
54 | 
55 | extern struct
56 | {
57 |   Z8       *dbp_tag;
58 |   dbPlugin *dbp;
59 | }
60 | dbPlugins[];
61 | 
62 | #endif
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp.c:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp.c,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HITCMP_H
20 |   #error "hitcmp.h multiply included"
21 | #else
22 |   #define HITCMP_H
23 | 
24 |   #ifndef STDIO_H
25 |     #include <stdio.h>
26 |     #include <stdlib.h>
27 |   #endif
28 | 
29 |   #ifndef C_H
30 |     #include "../c.h"
31 |   #endif
32 | 
33 |   #ifndef HITCON_H
34 |     #include "hitcon.h"
35 |   #endif
36 | 
37 |   typedef struct hitcmp_st hitcmp; 
38 | 
39 |   #ifndef HITDEF_H
40 |     #include "hitdef.h"
41 |   #endif
42 | 
43 |   struct   hitcmp_st
44 |   {
45 |     Z32   (*h2h_cmp_func)  (Z32 *, Z32 *, hitdef *, Z32);
46 |     Z32   (*h2m_cmp_func)  (Z32 *, Z32 *, hitdef *, Z32);
47 |     Z32   (*m2m_cmp_func)  (Z32 *, Z32 *, hitdef *, Z32);
48 | 
49 |     Z32   (*h2h_sort_func)  (Z32 *, Z32 *, hitdef *, Z32);
50 |     Z32   (*h2m_sort_func)  (Z32 *, Z32 *, hitdef *, Z32);
51 | 
52 |     Z32   (*cntxt_cmp_func)(Z32 *, Z32 *, hitdef *, Z32);
53 |     Z32   (*h2m_cntxt_cmp_func)(Z32 *, Z32 *, hitdef *, Z32);
54 | 
55 |     Z32   (*h2m_put_func) (Z32 *, Z32 *, Z32 *, hitdef *, Z32);
56 |     Z32   (*hitsize_func)  (hitdef *, N8);
57 | 
58 |     void   *config;
59 |     void   *opt;
60 | 
61 |     N8      type;
62 | 
63 |     N8      context;
64 |     N8      s_context;
65 |     N8      r_context;
66 | 
67 |     N8      merge;
68 | 
69 |     N8      distance;
70 | 
71 |     N8      n_level; 
72 |     N8      n_real;
73 | 
74 |     N8      boolean_op;
75 |  
76 |   };
77 | 
78 |   #define  HIT_CMP_COOC   1
79 |   #define  HIT_CMP_PHRASE 2
80 |   #define  HIT_CMP_PROXY  3
81 |   #define  HIT_CMP_SENTENCE 4
82 | 
83 |   #include "hitcmp_cooc.h"
84 |   #include "hitcmp_phrase.h"
85 |   #include "hitcmp_proxy.h"
86 |   #include "hitcmp_sent.h"
87 | 
88 |   #define  HIT_CMP_ARGZ_USAGE "{SEARCH OPTIONS} are: \
89 |            (cooc[:context]|phrase[:distance]|proxy[:distance])"
90 | 
91 | #endif
92 | 
93 | 
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp_cooc.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp_cooc.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 |   extern Z8 *get_method_info_cooc    (void);
20 |   extern Z32 build_search_level_cooc (hitcmp *, Z8 *, Z32);
21 | 
22 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp_phrase.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp_phrase.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 |   extern Z8 *get_method_info_phrase    (void);
20 |   extern Z32 build_search_level_phrase (hitcmp *, Z8 *, Z32);
21 | 
22 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp_proxy.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp_proxy.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 |   extern Z8 *get_method_info_proxy    (void);
21 |   extern Z32 build_search_level_proxy (hitcmp *, Z8 *, Z32);
22 | 
23 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcmp_sent.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcmp_proxy.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 |   extern Z8 *get_method_info_sentence    (void);
21 |   extern Z32 build_search_level_sentence (hitcmp *, Z8 *, Z32);
22 | 
23 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcon.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcon.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | /*
21 |   this file provides the constants that define the structure of the
22 |   TLF v.2 occurence index ("hit")
23 |  */
24 | 
25 | 
26 | #ifdef HITCON_H
27 |   #error "hitcon.h multiply included"
28 | #else
29 |   #define HITCON_H
30 | 
31 | /*
32 |   TLF v.2 occurence indices have a fixed-field structure; 
33 |   each occurence index has 9 fields;
34 |  */
35 | 
36 |   #define INDEX_DEF_FIELDS     9
37 |   #define FIELDS               9
38 | 
39 | /* 
40 |    The following fields are stored for each occurence: 
41 |  */
42 | 
43 |   #define INDEX_DEF_DOCUMENT   1  /* document number */
44 |   #define INDEX_DEF_P1         2  /* level 1 part number */
45 |   #define INDEX_DEF_P2         3  /* level 2 part number */
46 |   #define INDEX_DEF_P3         4  /* level 3 part number */
47 |   #define INDEX_DEF_PARAGRAPH  5  /* paragraph number */
48 |   #define INDEX_DEF_SENTENCE   6  /* sentence number */
49 |   #define INDEX_DEF_WORD       7  /* word number */
50 | 
51 |   #define INDEX_DEF_OFFSET     8  /* byte offset */
52 |   #define INDEX_DEF_PAGE       9  /* page number */
53 | 
54 | #endif
55 | 
56 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitcrp.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitcrp.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HITCRP_H
20 |   #error "hitcrp.h multiply included"
21 | #else
22 |   #define HITCRP_H
23 | 
24 |   #ifndef STDIO_H
25 |     #include <stdio.h>
26 |   #endif
27 | 
28 |   #define  HIT_CRP_BINARY   0
29 |   #define  HIT_BIN_ASCII    1
30 | 
31 | 
32 |   extern Z32 hit_crp_args(hitdef *, Z32 *, Z32 *, Z32, Z8 *);
33 |   Z32 h2h_cmp_crp ( Z32 *a, Z32 *b, hitdef *hit_def, Z32 level );
34 | #endif
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitdef.c:
--------------------------------------------------------------------------------
 1 | // $Id: hitdef.c,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifndef C_H
20 |   #include "../c.h"
21 | #endif
22 | 
23 | #ifndef HITDEF_H
24 |   #include "hitdef.h"
25 | #endif
26 | 
27 | #include "searchmethods.h"
28 | 
29 | hitdef *new_hitdef ( N32 nlevels )
30 | {
31 |   hitdef *ret; 
32 |   Z32     i;
33 | 
34 | 
35 |   ret           = (hitdef *) malloc (sizeof (hitdef));
36 | 
37 |   ret->depth    = nlevels; 
38 |   ret->depth_r  = 0; 
39 | 
40 |   ret->fields   = INDEX_DEF_FIELDS;
41 | 
42 |   ret->levels   = (hitcmp *) malloc ( nlevels * sizeof(hitcmp) ); 
43 | 
44 |   ret->searchmethods = SearchMethods; 
45 | 
46 |   return ret; 
47 | }
48 | 
49 | void old_hitdef ( hitdef *hc )
50 | {
51 |   free (hc->levels); 
52 |   free (hc); 
53 | }
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitdef.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitdef.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HITDEF_H
20 |   #error "hitdef.h multiply included"
21 | #else
22 |   #define HITDEF_H
23 |   
24 |   #ifndef C_H
25 |     #include "../c.h"
26 |   #endif
27 | 
28 |   #ifndef HITCON_H
29 |     #include "hitcon.h"
30 |   #endif
31 | 
32 |   typedef struct hitdef_st hitdef;
33 | 
34 |   #ifndef HITCMP_H
35 |     #include "hitcmp.h"
36 |   #endif
37 | 
38 |   #ifndef METHOD_H
39 |     #include "method.h"
40 |   #endif
41 | 
42 |   struct   hitdef_st
43 |   {
44 |     N32     depth; 
45 |     N32     depth_r; 
46 | 
47 |     N32     fields;
48 | 
49 |     hitcmp *levels;
50 |     N8      output;
51 | 
52 |     SearchMethodEntry    *searchmethods; /* search methods */
53 |   };
54 |  
55 |   extern hitdef *new_hitdef ( N32 );
56 |   extern void    old_hitdef ( hitdef * );
57 | 
58 | #endif
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitman.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitman.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HITMAN_H
20 |   #error "hitman.h multiply included"
21 | #else
22 |   #define HITMAN_H
23 | 
24 |   extern Z32 hit_put  (); 
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libphilo/plugin/hitout.h:
--------------------------------------------------------------------------------
 1 | // $Id: hitout.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef HITOUT_H
20 |   #error "hitout.h multiply included"
21 | #else
22 |   #define HITOUT_H
23 | 
24 |   #ifndef STDIO_H
25 |     #include <stdio.h>
26 |   #endif
27 | 
28 |   #define  HIT_OUT_BINARY   0
29 |   #define  HIT_OUT_ASCII    1
30 | 
31 |   #define  HIT_OUT_ARGZ_USAGE  "{PRINT OPTIONS} are: \
32 |            (a[scii*]|b[inary*])"
33 | 
34 |   #define  hitout_size(c,n) (sizeof(Z16)*(c<INDEX_DEF_SENTENCE?c:c-1)+n*sizeof(Z32))
35 | 
36 |   extern Z32 hit_out();
37 |   extern Z32 hit_out_args(hitdef *, Z8 *);
38 | 
39 | #endif
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/libphilo/plugin/method.c:
--------------------------------------------------------------------------------
 1 | // $Id: method.c,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #include "method.h"
20 | 
21 | SearchMethod cooc =
22 | {
23 |   build_search_level_cooc,
24 |   get_method_info_cooc
25 | };
26 | 
27 | 
28 | SearchMethod phrase =
29 | {
30 |   build_search_level_phrase,
31 |   get_method_info_phrase
32 | };
33 | 
34 | 
35 | SearchMethod proxy =
36 | {
37 |   build_search_level_proxy,
38 |   get_method_info_proxy
39 | };
40 | 
41 | SearchMethod sentence = 
42 | {
43 |   build_search_level_sentence,
44 |   get_method_info_sentence
45 | };
46 | 
47 | SearchMethodEntry SearchMethods[] = 
48 | {
49 |   { (Z8 *)"cooc", &cooc },
50 |   { (Z8 *)"phrase", &phrase },
51 |   { (Z8 *)"proxy", &proxy },
52 |   { (Z8 *)"sentence", &sentence },
53 |   { 0, 0 }
54 | };                                                                             
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/libphilo/plugin/method.h:
--------------------------------------------------------------------------------
 1 | // $Id: method.h,v 2.12 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef METHOD_H
20 |   #error "method.h multiply included"
21 | #else
22 |   #define METHOD_H
23 |   #ifndef C_H
24 |     #include "../c.h"
25 |   #endif
26 | 
27 |   typedef struct SearchMethod_st SearchMethod; 
28 |   typedef struct SearchMethodEntry_st SearchMethodEntry; 
29 | 
30 | 
31 |   #ifndef HITCMP_H
32 |     #include "hitcmp.h"
33 |   #endif 
34 | 
35 |   struct SearchMethod_st
36 |   {
37 |      Z32 (*build_search_level)(hitcmp *, Z8 *, Z32);
38 |      Z8 *(*get_method_info)(void);
39 |   }; 
40 | 
41 |   struct SearchMethodEntry_st
42 |   {
43 |      Z8           *sp_tag;
44 |      SearchMethod *sp;
45 |   };
46 | 
47 | #endif
48 | 
49 | 


--------------------------------------------------------------------------------
/libphilo/plugin/plugin.c:
--------------------------------------------------------------------------------
 1 | // $Id: plugin.c,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #include "plugin.h"
20 | #include <stdlib.h>
21 | dbPlugin artfl =
22 | {
23 |   new_hitdef,
24 |   get_plugin_info_artfl
25 | };
26 | 
27 | struct 
28 | {
29 |   Z8      *dbp_tag;
30 |   dbPlugin  *dbp;
31 | } 
32 | dbPlugins[] = 
33 | {
34 |   { (Z8 *)"artfl2t", &artfl },
35 |   { 0, 0 }
36 | };                                                                             
37 | 
38 | Z8 *get_plugin_info_artfl ()
39 | {
40 |   Z8 *ret;
41 | 
42 |   ret = malloc ( 256* sizeof(Z8) ); 
43 |   sprintf ( (char *)ret, "This is the builtin (default) plugin for ARTFL textual database v.2t" );
44 | 
45 |   return ret; 
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/libphilo/plugin/plugin.h:
--------------------------------------------------------------------------------
 1 | // $Id: plugin.h,v 2.12 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifdef PLUGIN_H
20 |   #error "plugin.h multiply included"
21 | #else
22 |   #define PLUGIN_H
23 |   #ifndef C_H
24 |     #include "../c.h"
25 |   #endif
26 | 
27 |   #ifndef HITDEF_H
28 |     #include "hitdef.h"
29 |   #endif 
30 | 
31 |   typedef struct 
32 |   {
33 |      hitdef *(*create_hitdef)(N32);
34 |      Z8     *(*get_plugin_info)(void);
35 |   }  
36 |   dbPlugin; 
37 | 
38 |  extern Z8 *get_plugin_info_artfl();
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/libphilo/plugin/searchmethods.h:
--------------------------------------------------------------------------------
 1 | // $Id: searchmethods.h,v 2.11 2004/05/28 19:22:08 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #ifndef METHOD_H
20 |   #include "method.h"
21 | #endif
22 | 
23 | extern SearchMethodEntry SearchMethods[];
24 | 


--------------------------------------------------------------------------------
/libphilo/retreive.h:
--------------------------------------------------------------------------------
 1 | // $Id: retreive.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | /* retreive.h: header file for the hit retreival subsystem */
21 | 
22 | #ifdef RETREIVE_H
23 |   #error "retreive.h multiply included"
24 | #else
25 |   
26 |   #define RETREIVE_H
27 | 
28 |   #ifndef C_H
29 |     #include "c.h"
30 |   #endif
31 | 
32 |   #ifndef SEARCH_H
33 |     #include "search.h"
34 |   #endif
35 | 
36 |   #define RETR_BLK_OK                       0
37 |   #define RETR_BLK_CLEAN                    1
38 |   #define RETR_HITS_CACHED                  2
39 |   #define RETR_END_OF_MAP                   4
40 |   #define RETR_RESMAP_FULL                  8
41 |   #define RETR_BUMMER                      16
42 |   #define RETR_REACHED_NEXT_BLOCK_BOUNDARY 32
43 | 
44 | 
45 |   extern N32 retreive_hits ( Search s, N level, Gmap, Gmap ); 
46 | N32 retreive_hits ( Search s, N level, Gmap map, Gmap res );
47 | Z32 process_block_unit ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
48 | Z32 process_hit_block ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
49 | Gmap retreive_cached_hits ( Search s, N8 bn, Word w, N32 n, Gmap map, N32 *howmany );
50 | Gmap retreive_hit_block ( Search s, N8 bn, Word w, N32 n, Gmap map, N32 *howmany );
51 | Z32 filternload_hits ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap hits, N32 howmany, Gmap res ) ;
52 | Z32 load_hits ( Search s, Gmap hits, N32 howmany, Gmap res );
53 | Z32 process_single_entry ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
54 | Z32 filter_single_entry ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
55 | Z32 process_hit_block_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
56 | Gmap retreive_hit_block_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res, N32 *howmany );
57 | Gmap retreive_cached_hits_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res, N32 *howmany );
58 | Z32 process_single_entry_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap res );
59 | Z32 filternload_booleannot ( Search s, N8 bn, Word w, N32 n, Gmap map, Gmap hits, Z32 howmany, Gmap res );
60 | Z32 chkstatus_EOM ( Z32 status );
61 | Z32 chkstatus_CACH ( Z32 status );
62 | Z32 chkstatus_ERR ( Z32 status );
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/libphilo/search.h:
--------------------------------------------------------------------------------
  1 | // $Id: search.h,v 2.11 2004/05/28 19:22:06 o Exp $
  2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
  3 | // Copyright (C) 2004 University of Chicago
  4 | // 
  5 | // This program is free software; you can redistribute it and/or modify
  6 | // it under the terms of the Affero General Public License as published by
  7 | // Affero, Inc.; either version 1 of the License, or (at your option)
  8 | // any later version.
  9 | // 
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // Affero General Public License for more details.
 14 | // 
 15 | // You should have received a copy of the Affero General Public License
 16 | // along with this program; if not, write to Affero, Inc.,
 17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
 18 | 
 19 | 
 20 | #ifdef SEARCH_H
 21 |   #error "search.h multiply included"
 22 | #else
 23 |   #define SEARCH_H
 24 | 	#include "db/db.h"
 25 |   #ifndef C_H
 26 |     #include "c.h"
 27 |   #endif 
 28 | 
 29 |   #define  MAP_INIT_LEN  8192
 30 | 
 31 |   #define DEFAULT_BATCH_LIMIT   8192
 32 |   #define DEFAULT_PRINT_LIMIT   3000
 33 | 
 34 |   #define SEARCH_PASS_OK               0
 35 |   #define SEARCH_BUMMER_OCCURED        1
 36 |   #define SEARCH_PASS_FINISHED         2
 37 |   #define SEARCH_BATCH_FINISHED        3
 38 |   #define SEARCH_BATCH_LIMIT_REACHED   4
 39 |   #define SEARCH_PRINT_LIMIT_REACHED   5
 40 | 
 41 | #define BAD_ARGZ                1
 42 | 
 43 | #define BAD_ENGINE_ARGZ         "badly defined output (-E:) arguments"
 44 | #define BAD_SEARCH_ARGZ         "badly defined search (-S:) arguments"
 45 | #define BAD_CORPUS_ARGZ         "badly defined corpus (-C:) arguments"
 46 | #define BAD_OUTPUT_ARGZ         "badly defined output (-P:) arguments"
 47 | #define BAD_PLUGIN_ARGZ         "badly defined plugin (-D:) argument"
 48 |   typedef struct Search *Search, Search_; 
 49 | 
 50 |   #ifndef BATCH_H
 51 |     #include "level.h"
 52 |   #endif
 53 | 
 54 |   #ifndef HIT_H
 55 |     #include "plugin/hit.h"
 56 |   #endif
 57 | 
 58 |   #ifndef LOG_H
 59 |     #include "log.h"
 60 |   #endif
 61 | 
 62 |   struct Search
 63 |   {
 64 |   	dbh *db;
 65 |     N32   depth;
 66 |     N32   depth_r;
 67 |     Batch batches;
 68 | 
 69 |     hitdef *hit_def;
 70 | 
 71 | 
 72 |     Z32    bn;
 73 |     
 74 |     Z32    bincorpus;
 75 |     Z8     *corpus;
 76 |     N8     cfactor;
 77 | 
 78 |     Gmap   map;
 79 | 
 80 |     Z32    debug; 
 81 | 
 82 |     Z32    print_limit; 
 83 |     Z32    n_printed;
 84 |     Z32    batch_limit; 
 85 | /*    Z32    offset; */
 86 |     Z32    soft_limit; 
 87 | 
 88 |     Z32    exitcode;
 89 |     Z8     errstr[1024];
 90 | 
 91 |   };
 92 | 
 93 |   extern Search new_searchObject ();
 94 | Search new_search( dbh *db, Z8 *method, Z8 *arg, int ascii, int limit, int corpussize, char * corpusfile);
 95 | int set_search_method( Search s, Z8 *methodstring, Z8 *argstring);
 96 | #endif
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/libphilo/search.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from __future__ import absolute_import
 3 | import sys,os
 4 | from ctypes import *
 5 | stdlib=cdll.LoadLibrary("libc.dylib")
 6 | 
 7 | stdin = stdlib.fdopen(sys.stdin.fileno(),"r")
 8 | 
 9 | libphilo = cdll.LoadLibrary("./libphilo.dylib")
10 | 
11 | db = libphilo.init_dbh_folder("/var/lib/philologic/databases/PerseusGreekDev/")
12 | 
13 | s = libphilo.new_search(db,"phrase",None,1,100000,0,None)
14 | 
15 | libphilo.process_input(s,stdin)
16 | 
17 | 
18 | libphilo.search_pass(s,0)
19 | 


--------------------------------------------------------------------------------
/libphilo/search4.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <unistd.h>
  5 | #include <getopt.h>
  6 | #include "c.h"
  7 | #include "search.h"
  8 | 
  9 | int main(int argc, char **argv) {
 10 | 
 11 | 	int c;
 12 | 	int option_index;
 13 | 	int remaining_argument = 0;
 14 | 	int argcounter = 1;
 15 | 
 16 | 	char method[256];
 17 | 	int method_set = 0;
 18 | 	char dbname[256];
 19 | 	int dbname_set = 0;
 20 | 	char search_arg[256];
 21 | 	int arg_set = 0;
 22 | 	char *temp_search_arg = NULL;
 23 | 
 24 | 	int ascii_set = 0;
 25 | 	int corpussize = 1;
 26 | 	char corpusfile[256];
 27 | 	int corpusfile_set = 0;
 28 | 	int debug = 0;
 29 | 	int limit = 0;
 30 | 
 31 | 	Search s;
 32 | 	dbh *db;
 33 | 	int status;
 34 | 
 35 | 	char *usage = "search4 [--ascii --corpussize c --corpusfile f --debug d --limit l] dbname [search method]\n";
 36 | 
 37 | 	static struct option long_options[] =
 38 | 		{
 39 | 			{"ascii", no_argument, 0, 'a'},
 40 | 			{"corpussize", required_argument, 0, 'c'},
 41 | 			{"corpusfile", required_argument, 0, 'f'},
 42 | 			{"debug", required_argument, 0, 'd'},
 43 | 			{"limit", required_argument, 0, 'l'},
 44 | 			{0,0,0,0}
 45 | 		};
 46 | 		
 47 | 	while (0 < (c = getopt_long(argc, argv, "ac:d:f:l:", long_options, &option_index) ) ) { 
 48 | 		//while we step through all options in argv:
 49 | 		fprintf(stderr,"%s is set. ", long_options[option_index].name);
 50 | 		if (optarg) {
 51 | 			fprintf (stderr," with arg %s", optarg);
 52 | 		}
 53 | 		switch(c) {
 54 | 			case 'a':
 55 | 				ascii_set = 1;
 56 | 				break;
 57 | 			case 'l':
 58 | 				limit = atol(optarg);
 59 | 				break;
 60 | 			case 'c':
 61 | 				corpussize = atoi(optarg);
 62 | 				break;
 63 | 			case 'f':
 64 | 				strncpy(corpusfile,optarg,255);
 65 | 				corpusfile_set = 1;				
 66 | 			default:
 67 | 				break;
 68 | 		}
 69 | 		fprintf(stderr,"\n");
 70 | 	}
 71 | 	
 72 | 	if (!corpusfile_set) {
 73 | 		corpussize = 0;
 74 | 	}
 75 | 	
 76 | 	while (optind < argc) {
 77 | 		if (argcounter == 1) {
 78 | 			strncpy(dbname, argv[optind],256);
 79 | 			fprintf(stderr,"database name is %s\n",dbname);
 80 | 			dbname_set = 1;
 81 | 		}
 82 | 		if (argcounter == 2) {
 83 | 			strncpy(method, argv[optind],256);
 84 | //			printf("search method is %s\n",method);
 85 | 			method_set = 1;
 86 | 		}
 87 | 		if (argcounter == 3) {
 88 | 		  strncpy(search_arg, argv[optind], 256);
 89 | 		  //		  printf("search arg is %s\n",search_arg);
 90 | 		  arg_set = 1;
 91 | 		}
 92 | 		optind += 1;
 93 | 		argcounter += 1;
 94 | 	}
 95 | 
 96 | 	if (!dbname_set) {
 97 | 		printf("%s", usage);
 98 | 		return 1;
 99 | 	}
100 | 
101 | 	db = init_dbh_folder(dbname);
102 | 	if (!method_set) {
103 | 		strncpy(method,"phrase",256);
104 | 	}
105 | 	if (arg_set) {
106 | 	  temp_search_arg = malloc(sizeof(Z8 *) * 256);
107 | 	  strncpy(temp_search_arg,search_arg,256);
108 | 	}
109 | 	s = new_search(db, method, temp_search_arg, ascii_set,limit,corpussize,corpusfile);
110 | 	status = process_input ( s, stdin );
111 | 	if ( status == BATCH_EMPTY ) {
112 | 		fprintf(stderr,"no hits found.\n");
113 | 		return 0;
114 | 	}
115 | 	//	s->batches->map = s->map; 
116 | 	while ( status = search_pass ( s, 0 ) == SEARCH_PASS_OK ) {
117 | 		continue;
118 | 	}
119 | 	delete_search(s);
120 | 	delete_dbh(db);
121 | 	return 0;
122 | }
123 | 


--------------------------------------------------------------------------------
/libphilo/word.c:
--------------------------------------------------------------------------------
 1 | // $Id: word.c,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | #include <stdio.h>
20 | 
21 | #include "word.h"
22 | #include "db/unpack.h"
23 | #ifndef SEARC_H
24 |   #include "search.h"
25 | #endif
26 | 
27 | Z32 init_wordObject ( Search s, Z8 *word, Word w, N32 *block_n ) 
28 | {
29 |   /*
30 |     it's already malloc-ed somewhere else; we get long
31 |     word lists occasionally and they are malloc-ed in bulk;
32 |    */
33 | 
34 |   Z32 *tmp; 
35 |   w->type=0;
36 |   w->freq=0;
37 |   w->blkcount=0;
38 |   w->offset=0;
39 |   if (( tmp = hit_lookup (s->db, word, 
40 | 		        &(w->type), 
41 | 		        &(w->freq), 
42 | 		        &(w->blkcount), 
43 | 		        &(w->offset)))
44 |       
45 |       == NULL)
46 |     {
47 |       /* not found */
48 |       return 0;
49 |     }
50 | 
51 |   w->dir        = new_Gmap ( 0, s->hit_def->fields ); 
52 | 
53 |   w->dir->gm_l  = w->blkcount;
54 |   w->dir->gm_h  = tmp;
55 | 
56 |   w->mapctr     = 0;
57 |   w->blkproc    = 0; 
58 | 
59 |   w->blk_cached = -1; 
60 |   w->n_cached   = 0; 
61 |   w->cached     = NULL;
62 | 
63 |   *block_n = w->type ? w->blkcount : w->freq; 
64 | 
65 |   return w->freq; 
66 |       
67 | }
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/libphilo/word.h:
--------------------------------------------------------------------------------
 1 | // $Id: word.h,v 2.11 2004/05/28 19:22:06 o Exp $
 2 | // philologic 2.8 -- TEI XML/SGML Full-text database engine
 3 | // Copyright (C) 2004 University of Chicago
 4 | // 
 5 | // This program is free software; you can redistribute it and/or modify
 6 | // it under the terms of the Affero General Public License as published by
 7 | // Affero, Inc.; either version 1 of the License, or (at your option)
 8 | // any later version.
 9 | // 
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | // Affero General Public License for more details.
14 | // 
15 | // You should have received a copy of the Affero General Public License
16 | // along with this program; if not, write to Affero, Inc.,
17 | // 510 Third Street, Suite 225, San Francisco, CA 94107 USA.
18 | 
19 | 
20 | #ifdef WORD_H
21 |   #error "word.h multiply included"
22 | #else
23 | 
24 | #define WORD_H
25 | 
26 | #ifndef C_H
27 |   #include "c.h"
28 | #endif
29 | 
30 | #ifndef HIT_H
31 |   #include "plugin/hit.h"
32 | #endif
33 | 
34 | #ifndef GMAP_H
35 |   #include "gmap.h"
36 | #endif
37 | 
38 | #define W_LENGTH_MAX 1024 
39 | #define INITWORDS    1024
40 | 
41 | typedef struct Word *Word, Word_; 
42 | 
43 | struct Word
44 | { 
45 |   /*hit *dir;*/
46 |   Gmap dir; 
47 |  
48 |   N32 type;
49 |   N32 freq;
50 | 
51 |   N32 blkcount;
52 |   N64 offset;
53 |  
54 |   N32 blkproc;
55 |   /*  hit hitproc;*/
56 |  
57 |   N32 mapctr;
58 | 
59 |   Z32 blk_cached;  /* block from which hits have been cached */
60 |   N32 n_cached;    /* number of hits cached */
61 |   Z32 *cached;     /* hits cached */
62 |  
63 | };
64 | 
65 | #endif /* #ifdef WORD_H */
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/python/README:
--------------------------------------------------------------------------------
1 | These are the Python bindings for the PhiloLogic search engine.
2 | 
3 | This package is only useful for the TEI Parser which you can import for use
4 | outside of PhiloLogic.
5 | 
6 | To get the full functionality of PhiloLogic, you need to install the search core
7 | as well as the web components. See <a href="https://github.com/ARTFL-Project/PhiloLogic4">https://github.com/ARTFL-Project/PhiloLogic4</a>
8 | for instructions and further documentation.


--------------------------------------------------------------------------------
/python/philologic/__init__.py:
--------------------------------------------------------------------------------
1 | from . import shlax
2 | from . import shlaxtree
3 | from .TagCensus import TagCensus
4 | from .runtime import DB
5 | 


--------------------------------------------------------------------------------
/python/philologic/loadtime/PhiloLoad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | from philologic.loadtime.Loader import Loader, setup_db_dir
 7 | from philologic.loadtime.LoadOptions import CONFIG_FILE, LoadOptions
 8 | 
 9 | os.environ["LC_ALL"] = "C"  # Exceedingly important to get uniform sort order.
10 | os.environ["PYTHONIOENCODING"] = "utf-8"
11 | 
12 | 
13 | def philoload():
14 |     load_options = LoadOptions()
15 |     load_options.parse(sys.argv)
16 |     setup_db_dir(load_options["db_destination"], load_options["web_app_dir"], force_delete=load_options.force_delete)
17 | 
18 |     # Database load
19 |     l = Loader.set_class_attributes(load_options.values)
20 |     l.add_files(load_options.files)
21 |     if load_options.bibliography:
22 |         load_metadata = l.parse_bibliography_file(load_options.bibliography, load_options.sort_order)
23 |     else:
24 |         load_metadata = l.parse_metadata(load_options.sort_order, header=load_options.header)
25 |     l.set_file_data(load_metadata, l.textdir, l.workdir)
26 |     l.parse_files(load_options.cores)
27 |     l.merge_objects()
28 |     l.analyze()
29 |     l.setup_sql_load()
30 |     l.post_processing()
31 |     l.finish()
32 |     if l.deleted_files:
33 |         print(
34 |             "The following files where not loaded due to invalid data in the header:\n{}".format(
35 |                 "\n".join(l.deleted_files)
36 |             )
37 |         )
38 | 
39 |     print(f"Application viewable at {os.path.join(CONFIG_FILE.url_root, load_options.dbname)}\n")
40 | 


--------------------------------------------------------------------------------
/python/philologic/loadtime/__init__.py:
--------------------------------------------------------------------------------
1 | from philologic.loadtime.Parser import XMLParser
2 | from philologic.loadtime.PhiloLoad import philoload
3 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/__init__.py:
--------------------------------------------------------------------------------
 1 | """Runtime exports"""
 2 | 
 3 | from philologic.runtime.access_control import check_access, login_access
 4 | from philologic.runtime.find_similar_words import find_similar_words
 5 | from philologic.runtime.FragmentParser import FragmentParser
 6 | from philologic.runtime.get_text import get_concordance_text, get_tei_header
 7 | from philologic.runtime.pages import page_interval
 8 | from philologic.runtime.Query import parse_query
 9 | from philologic.runtime.reports import (
10 |     bibliography_results,
11 |     collocation_results,
12 |     concordance_results,
13 |     filter_words_by_property,
14 |     frequency_results,
15 |     generate_text_object,
16 |     generate_time_series,
17 |     generate_toc_object,
18 |     generate_word_frequency,
19 |     get_start_end_date,
20 |     kwic_hit_object,
21 |     kwic_results,
22 |     group_by_metadata,
23 |     group_by_range,
24 |     landing_page_bibliography,
25 |     aggregation_by_field,
26 | )
27 | from philologic.runtime.web_config import WebConfig
28 | from philologic.runtime.WSGIHandler import WSGIHandler
29 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/collocation_scores.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Compute collocation scores"""
 3 | 
 4 | 
 5 | from math import log
 6 | 
 7 | 
 8 | def pointwise_mutual_information(total_word_count, collocate_count, collocate, cursor):
 9 |     """Calculate Pointwise Mutual Information."""
10 |     if collocate_count < 5:
11 |         return 0
12 |     query = """select count(*) from words where philo_name='%s'""" % collocate
13 |     cursor.execute(query)
14 |     total_collocate_count = cursor.fetchone()[0]
15 |     score = log(collocate_count / total_word_count * total_collocate_count)
16 |     return score
17 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/find_similar_words.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Find similar words to query term."""
 3 | 
 4 | 
 5 | import hashlib
 6 | import os
 7 | 
 8 | from Levenshtein import ratio
 9 | from philologic.runtime.Query import get_expanded_query
10 | from unidecode import unidecode
11 | 
12 | 
13 | def get_all_words(db, request):
14 |     """Expand query to all search terms."""
15 |     words = request["q"].replace('"', "")
16 |     hits = db.query(words)
17 |     hits.finish()
18 |     expanded_terms = get_expanded_query(hits)
19 |     if expanded_terms:
20 |         word_groups = []
21 |         for word_group in expanded_terms:
22 |             normalized_group = []
23 |             for word in word_group:
24 |                 word = word.replace('"', "")
25 |                 if db.locals.ascii_conversion is True:
26 |                     word = unidecode(word)
27 |                 normalized_group.append(word)
28 |             word_groups.append(normalized_group)
29 |         return word_groups
30 |     return [words.split()]
31 | 
32 | 
33 | def find_similar_words(db, config, request):
34 |     """Edit distance function."""
35 |     # Check if lookup is cached
36 |     hashed_query = hashlib.sha256()
37 |     hashed_query.update(request["q"].encode("utf8"))
38 |     hashed_query.update(str(request.approximate_ratio).encode("utf8"))
39 |     approximate_filename = os.path.join(config.db_path, f"data/hitlists/{hashed_query.hexdigest()}.approximate_terms")
40 |     if os.path.isfile(approximate_filename):
41 |         with open(approximate_filename, encoding="utf8") as fh:
42 |             approximate_terms = fh.read().strip()
43 |             return approximate_terms
44 |     query_groups = get_all_words(db, request)
45 |     file_path = os.path.join(config.db_path, "data/frequencies/normalized_word_frequencies")
46 |     new_query_groups = [set([]) for i in query_groups]
47 |     with open(file_path, encoding="utf8") as fh:
48 |         for line in fh:
49 |             line = line.strip()
50 |             try:
51 |                 normalized_word, regular_word = line.split("\t")
52 |                 for pos, query_group in enumerate(query_groups):
53 |                     for query_word in query_group:
54 |                         if ratio(query_word, normalized_word) >= float(request.approximate_ratio):
55 |                             new_query_groups[pos].add(f'"{regular_word}"')
56 |             except ValueError:
57 |                 pass
58 |     new_query_groups = " ".join([" | ".join(group) for group in new_query_groups])
59 |     with open(approximate_filename, "w", encoding="utf8") as cached_file:
60 |         cached_file.write(new_query_groups)
61 |     return new_query_groups
62 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/link.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Build PhiloLogic links"""
 3 | 
 4 | from urllib.parse import quote_plus
 5 | 
 6 | 
 7 | def url_encode(q_params):
 8 |     """URL encode."""
 9 |     encoded_str = []
10 |     for k, v in q_params:
11 |         if v:
12 |             if isinstance(v, list):
13 |                 for s in v:
14 |                     encoded_str.append(f'{quote_plus(k, safe="/")}={quote_plus(s, safe="/")}')
15 |             else:
16 |                 encoded_str.append(f'{quote_plus(k, safe="/")}={quote_plus(v, safe="/")}')
17 |         else:  # Value is None
18 |             encoded_str.append(f'{quote_plus(k, safe="/")}=')
19 |     return "&".join(encoded_str)
20 | 
21 | 
22 | def make_object_link(philo_id, hit_bytes):
23 |     """Takes a valid PhiloLogic object, and returns a relative URL representation of such."""
24 |     href = f'./{"/".join(map(str, philo_id))}{byte_query(hit_bytes)}'
25 |     return href
26 | 
27 | 
28 | def make_absolute_object_link(config, philo_id, byte_offsets=None):
29 |     """Takes a valid PhiloLogic object, and returns an absolute URL representation of such."""
30 |     href = f"/navigate/{'/'.join(map(str, philo_id))}"
31 |     if byte_offsets is not None:
32 |         href += byte_query(byte_offsets)
33 |     return href
34 | 
35 | 
36 | def make_absolute_query_link(config, params, script_name="/query", **extra_params):
37 |     """Takes a dictionary of query parameters as produced by WSGIHandler,
38 |     and returns an absolute URL representation of such."""
39 |     params = dict([i for i in params])
40 |     for k, v in extra_params.items():
41 |         params[k] = v
42 |     query_string = url_encode(list(params.items()))
43 |     if script_name:
44 |         return f"{script_name}?{query_string}"
45 |     return query_string
46 | 
47 | 
48 | def byte_query(hit_bytes):
49 |     """This is used for navigating concordance results and highlighting hits"""
50 |     return f'?{"&".join([f"byte={byte}" for byte in hit_bytes])}'
51 | 
52 | 
53 | def make_byte_range_link(config, philo_id, start_byte, end_byte):
54 |     """Return an absolute link with byte range to highlight"""
55 |     href = make_absolute_object_link(config, philo_id.split())
56 |     href += f"?start_byte={start_byte}&end_byte={end_byte}"
57 |     return href
58 | 
59 | 
60 | def byte_range_to_link(db, config, request, obj_level="div1"):
61 |     """Find container objects for given byte range and doc id and return links"""
62 |     cursor = db.dbh.cursor()
63 |     cursor.execute("SELECT philo_id FROM toms WHERE filename=?", (request.filename,))
64 |     doc_id = cursor.fetchone()[0].split()[0]
65 |     next_doc_id = str(int(doc_id) + 1)
66 |     cursor.execute("SELECT rowid FROM toms WHERE philo_doc_id=?", (next_doc_id,))
67 |     rowid = cursor.fetchone()[0]
68 |     cursor.execute(
69 |         f"SELECT philo_id FROM toms WHERE rowid < {rowid} and philo_type='{obj_level}' AND philo_id like '{doc_id} %' AND cast(start_byte as decimal) <= {request.start_byte} ORDER BY rowid desc"
70 |     )
71 |     philo_id = cursor.fetchone()[0]
72 |     philo_id = philo_id.split()
73 |     while int(philo_id[-1]) == 0:
74 |         philo_id.pop()
75 |     link = make_byte_range_link(config, " ".join(philo_id), request.start_byte, request.end_byte)
76 |     return link
77 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/pages.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Page intervals"""
 3 | 
 4 | 
 5 | def page_interval(num, results, start, end):
 6 |     """Return page intervals"""
 7 |     start = int(start)
 8 |     end = int(end)
 9 |     num = int(num)
10 |     if start <= 0:
11 |         start = 1
12 |     if end <= 0:
13 |         end = start + (num - 1)
14 |     results_len = len(results)
15 |     if end > results_len and results.done:
16 |         end = results_len
17 |     n = start - 1
18 |     return start, end, n
19 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/reports/__init__.py:
--------------------------------------------------------------------------------
 1 | """Report exports"""
 2 | from philologic.runtime.reports.concordance import concordance_results
 3 | from philologic.runtime.reports.bibliography import bibliography_results
 4 | from philologic.runtime.reports.time_series import generate_time_series, get_start_end_date
 5 | from philologic.runtime.reports.navigation import generate_text_object
 6 | from philologic.runtime.reports.table_of_contents import generate_toc_object
 7 | from philologic.runtime.reports.kwic import kwic_results, kwic_hit_object
 8 | from philologic.runtime.reports.generate_word_frequency import generate_word_frequency
 9 | from philologic.runtime.reports.frequency import frequency_results
10 | from philologic.runtime.reports.collocation import collocation_results
11 | from philologic.runtime.reports.filter_word_by_property import filter_words_by_property
12 | from philologic.runtime.reports.landing_page import landing_page_bibliography, group_by_range, group_by_metadata
13 | from philologic.runtime.reports.aggregation import aggregation_by_field
14 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/reports/concordance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Concordance report"""
 3 | 
 4 | import regex as re
 5 | from philologic.runtime.pages import page_interval
 6 | from philologic.runtime.citations import citations, citation_links
 7 | from philologic.runtime.get_text import get_concordance_text
 8 | from philologic.runtime.DB import DB
 9 | from philologic.runtime.HitList import CombinedHitlist
10 | 
11 | 
12 | def concordance_results(request, config):
13 |     """Fetch concordances results."""
14 |     db = DB(config.db_path + "/data/")
15 |     if request.collocation_type:
16 |         first_hits = db.query(
17 |             request["q"],
18 |             request["method"],
19 |             request["arg"],
20 |             **request.metadata,
21 |         )
22 |         second_hits = db.query(
23 |             request["left"],
24 |             request["method"],
25 |             request["arg"],
26 |             **request.metadata,
27 |         )
28 |         hits = CombinedHitlist(first_hits, second_hits)
29 |     else:
30 |         hits = db.query(
31 |             request["q"],
32 |             request["method"],
33 |             request["arg"],
34 |             sort_order=request["sort_order"],
35 |             **request.metadata,
36 |         )
37 |     start, end, _ = page_interval(request["results_per_page"], hits, request.start, request.end)
38 | 
39 |     concordance_object = {
40 |         "description": {"start": start, "end": end, "results_per_page": request.results_per_page},
41 |         "query": dict([i for i in request]),
42 |         "default_object": db.locals["default_object_level"],
43 |     }
44 | 
45 |     formatting_regexes = []
46 |     if config.concordance_formatting_regex:
47 |         for pattern, replacement in config.concordance_formatting_regex:
48 |             compiled_regex = re.compile(rf"{pattern}")
49 |             formatting_regexes.append((compiled_regex, replacement))
50 |     results = []
51 |     for hit in hits[start - 1 : end]:
52 |         citation_hrefs = citation_links(db, config, hit)
53 |         metadata_fields = {metadata: hit[metadata] for metadata in db.locals["metadata_fields"]}
54 |         citation = citations(hit, citation_hrefs, config, report="concordance")
55 |         context = get_concordance_text(db, hit, config.db_path, config.concordance_length)
56 |         if formatting_regexes:
57 |             for formatting_regex, replacement in formatting_regexes:
58 |                 context = formatting_regex.sub(rf"{replacement}", context)
59 |         result_obj = {
60 |             "philo_id": hit.philo_id,
61 |             "citation": citation,
62 |             "citation_links": citation_hrefs,
63 |             "context": context,
64 |             "metadata_fields": metadata_fields,
65 |             "bytes": hit.bytes,
66 |         }
67 |         results.append(result_obj)
68 |     concordance_object["results"] = results
69 |     concordance_object["results_length"] = len(hits)
70 |     concordance_object["query_done"] = hits.done
71 |     return concordance_object
72 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/reports/filter_word_by_property.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Filter word by property
 3 | Currently unmaintained"""
 4 | 
 5 | from philologic.runtime.citations import citation_links, citations
 6 | from philologic.runtime.get_text import get_concordance_text
 7 | from philologic.runtime.reports.generate_word_frequency import get_word_attrib
 8 | from philologic.runtime.DB import DB
 9 | 
10 | 
11 | def filter_words_by_property(request, config):
12 |     """Filter words by property"""
13 |     db = DB(config.db_path + "/data/")
14 |     hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
15 |     concordance_object = {"query": dict([i for i in request])}
16 | 
17 |     # Do these need to be captured in wsgi_handler?
18 |     word_property = request["word_property"]
19 |     word_property_value = request["word_property_value"]
20 |     word_property_total = request["word_property_total"]
21 | 
22 |     new_hitlist = []
23 |     results = []
24 |     position = 0
25 |     more_pages = False
26 | 
27 |     if request.start == 0:
28 |         start = 1
29 |     else:
30 |         start = request.start
31 | 
32 |     for hit in hits:
33 |         # get my chunk of text
34 |         hit_val = get_word_attrib(hit, word_property, db)
35 | 
36 |         if hit_val == word_property_value:
37 |             position += 1
38 |             if position < start:
39 |                 continue
40 |             new_hitlist.append(hit)
41 |             citation_hrefs = citation_links(db, config, hit)
42 |             metadata_fields = {}
43 |             for metadata in db.locals["metadata_fields"]:
44 |                 metadata_fields[metadata] = hit[metadata]
45 |             citation = citations(hit, citation_hrefs, config)
46 |             context = get_concordance_text(db, hit, config.db_path, config.concordance_length)
47 |             result_obj = {
48 |                 "philo_id": hit.philo_id,
49 |                 "citation": citation,
50 |                 "citation_links": citation_hrefs,
51 |                 "context": context,
52 |                 "metadata_fields": metadata_fields,
53 |                 "bytes": hit.bytes,
54 |                 "collocate_count": 1,
55 |             }
56 |             results.append(result_obj)
57 | 
58 |         if len(new_hitlist) == (request.results_per_page):
59 |             more_pages = True
60 |             break
61 | 
62 |     end = start + len(results) - 1
63 |     if len(results) < request.results_per_page:
64 |         word_property_total = end
65 |     else:
66 |         word_property_total = end + 1
67 |     concordance_object["results"] = results
68 |     concordance_object["query_done"] = hits.done
69 |     concordance_object["results_length"] = word_property_total
70 |     concordance_object["description"] = {
71 |         "start": start,
72 |         "end": end,
73 |         "results_per_page": request.results_per_page,
74 |         "more_pages": more_pages,
75 |     }
76 |     return concordance_object
77 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/reports/generate_word_frequency.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Generate word frequency
 3 | Currently unmaintained."""
 4 | 
 5 | import timeit
 6 | 
 7 | from philologic.runtime.link import make_absolute_query_link
 8 | from philologic.runtime.DB import DB
 9 | 
10 | 
11 | def generate_word_frequency(request, config):
12 |     """reads through a hitlist. looks up request["field"] in each hit, and builds up a list of
13 |        unique values and their frequencies."""
14 |     db = DB(config.db_path + "/data/")
15 |     hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
16 |     field = request["field"]
17 |     counts = {}
18 |     frequency_object = {}
19 |     start_time = timeit.default_timer()
20 |     last_hit_done = request.start
21 |     try:
22 |         for n in hits[request.start :]:
23 |             key = get_word_attrib(n, field, db)
24 |             if not key:
25 |                 # NULL is a magic value for queries, don't change it
26 |                 # recklessly.
27 |                 key = "NULL"
28 |             if key not in counts:
29 |                 counts[key] = 0
30 |             counts[key] += 1
31 |             elapsed = timeit.default_timer() - start_time
32 |             last_hit_done += 1
33 |             if elapsed > 5:
34 |                 break
35 | 
36 |         table = {}
37 |         for k, v in counts.items():
38 |             url = make_absolute_query_link(
39 |                 config,
40 |                 request,
41 |                 start="0",
42 |                 end="0",
43 |                 report="word_property_filter",
44 |                 word_property=field,
45 |                 word_property_value=k,
46 |             )
47 |             table[k] = {"count": v, "url": url}
48 | 
49 |         frequency_object["results"] = table
50 |         frequency_object["hits_done"] = last_hit_done
51 |         if last_hit_done == len(hits):
52 |             frequency_object["more_results"] = False
53 |         else:
54 |             frequency_object["more_results"] = True
55 | 
56 |     except IndexError:
57 |         frequency_object["results"] = {}
58 |         frequency_object["more_results"] = False
59 | 
60 |     frequency_object["results_length"] = len(hits)
61 |     frequency_object["query"] = dict([i for i in request])
62 | 
63 |     return frequency_object
64 | 
65 | 
66 | def get_word_attrib(n, field, db):
67 |     """Get word attribute"""
68 |     words = n.words
69 |     key = field
70 |     if key == "token":
71 |         key = "philo_name"
72 |     if key == "morph":
73 |         key = "pos"
74 |     val = ""
75 |     for word in words:
76 |         word_obj = word
77 |         if val:
78 |             val += "_"
79 |         if word_obj[key]:
80 |             val += word_obj[key]
81 |         else:
82 |             val += "NULL"
83 | 
84 |     if isinstance(val, str):
85 |         return val.encode("utf-8")
86 |     return val
87 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/reports/navigation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Navigate inside objects"""
 3 | 
 4 | import regex as re
 5 | from philologic.runtime.citations import citation_links, citations
 6 | from philologic.runtime.get_text import get_text_obj
 7 | from philologic.runtime.DB import DB
 8 | 
 9 | 
10 | def generate_text_object(request, config, note=False):
11 |     """Return text object given an philo_id"""
12 |     # verify this isn't a page ID or if this is a note
13 |     if len(request.philo_id.split()) == 9 and note is not True:
14 |         width = 9
15 |     else:
16 |         width = 7
17 |     db = DB(config.db_path + "/data/", width=width)
18 |     if note:
19 |         target = request.target.replace("#", "")
20 |         doc_id = request.philo_id.split()[0] + " %"
21 |         cursor = db.dbh.cursor()
22 |         cursor.execute("select philo_id from toms where id=? and philo_id like ? limit 1", (target, doc_id))
23 |         philo_id = cursor.fetchone()["philo_id"].split()[:7]
24 |         obj = db[philo_id]
25 |     else:
26 |         try:
27 |             obj = db[request.philo_id]
28 |         except ValueError:
29 |             obj = db[" ".join(request.path_components)]
30 |         philo_id = obj.philo_id
31 |     if width != 9:
32 |         while obj["philo_name"] == "__philo_virtual" and obj["philo_type"] != "div1":
33 |             philo_id.pop()
34 |             obj = db[philo_id]
35 |     philo_id = list(obj.philo_id)
36 |     while int(philo_id[-1]) == 0:
37 |         philo_id.pop()
38 |     text_object = {"query": dict([i for i in request]), "philo_id": " ".join([str(i) for i in philo_id])}
39 |     text_object["prev"] = neighboring_object_id(db, obj.prev, width)
40 |     text_object["next"] = neighboring_object_id(db, obj.next, width)
41 |     metadata_fields = {}
42 |     for metadata in db.locals["metadata_fields"]:
43 |         metadata_fields[metadata] = obj[metadata]
44 |     text_object["metadata_fields"] = metadata_fields
45 |     if width != 9:
46 |         citation_hrefs = citation_links(db, config, obj)
47 |         citation = citations(obj, citation_hrefs, config, report="navigation")
48 |     else:
49 |         db = DB(config.db_path + "/data/", width=7)
50 |         doc_obj = db[obj.philo_id[0]]
51 |         citation_hrefs = citation_links(db, config, doc_obj)
52 |         citation = citations(doc_obj, citation_hrefs, config, report="navigation")
53 |     text_object["citation"] = citation
54 |     text, imgs = get_text_obj(obj, config, request, db.locals["token_regex"], note=note)
55 |     if config.navigation_formatting_regex:
56 |         for pattern, replacement in config.navigation_formatting_regex:
57 |             text = re.sub(r"%s" % pattern, "%s" % replacement, text)
58 |     text_object["text"] = text
59 |     text_object["imgs"] = imgs
60 |     return text_object
61 | 
62 | 
63 | def neighboring_object_id(db, philo_id, width):
64 |     """Get neighboring object ID"""
65 |     if not philo_id:
66 |         return ""
67 |     philo_id = philo_id.split()[:width]
68 |     while philo_id[-1] == "0":
69 |         philo_id.pop()
70 |     philo_id = str(" ".join(philo_id))
71 |     obj = db[philo_id]
72 |     if obj["philo_name"] == "__philo_virtual" and obj["philo_type"] != "div1":
73 |         # Remove the last number (1) in the philo_id and point to one object
74 |         # level lower
75 |         philo_id = " ".join(philo_id.split()[:-1])
76 |     return philo_id
77 | 


--------------------------------------------------------------------------------
/python/philologic/runtime/web_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | 
 5 | from philologic.Config import MakeWebConfig
 6 | from orjson import dumps
 7 | 
 8 | 
 9 | class brokenConfig(object):
10 |     """Broken config returned with some default values"""
11 | 
12 |     def __init__(self, db_path, traceback):
13 |         self.web_config_path = db_path + "/data/web_config.cfg"
14 |         self.valid_config = False
15 |         self.traceback = traceback
16 |         self.db_path = db_path
17 | 
18 |     def __getitem__(self, _):
19 |         return ""
20 | 
21 |     def to_json(self):
22 |         """Return JSON representation of config"""
23 |         return dumps({"valid_config": False, "traceback": self.traceback, "web_config_path": self.web_config_path})
24 | 
25 | 
26 | def WebConfig(db_path):
27 |     """Build runtime web config object"""
28 |     try:
29 |         return MakeWebConfig(db_path + "/data/web_config.cfg")
30 |     except Exception as err:
31 |         print(err, file=sys.stderr)
32 |         return brokenConfig(db_path, str(err))
33 | 


--------------------------------------------------------------------------------
/python/philologic/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .pretty_print import pretty_print
2 | from .sort import sort_list
3 | from .convert_entities import convert_entities
4 | from .load_module import load_module
5 | from .metadata_type_handler import extract_full_date, extract_integer
6 | 


--------------------------------------------------------------------------------
/python/philologic/utils/convert_entities.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | 
 4 | from html.entities import name2codepoint
 5 | import regex as re
 6 | 
 7 | entities_match = re.compile(r"&#?\w+;")
 8 | 
 9 | 
10 | def convert_entities(text):
11 |     """Convert entities"""
12 | 
13 |     def fixup(m):
14 |         text = m.group(0)
15 |         if text[:2] == "&#":
16 |             # character reference
17 |             try:
18 |                 if text[:3] == "&#x":
19 |                     return chr(int(text[3:-1], 16))
20 |                 else:
21 |                     return chr(int(text[2:-1]))
22 |             except ValueError:
23 |                 pass
24 |         else:
25 |             # named entity
26 |             try:
27 |                 text = chr(name2codepoint[text[1:-1]])
28 |             except KeyError:
29 |                 pass
30 |         return text  # leave as is
31 | 
32 |     return entities_match.sub(fixup, text)
33 | 


--------------------------------------------------------------------------------
/python/philologic/utils/load_module.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Load Python source file"""
 3 | 
 4 | from importlib.machinery import SourceFileLoader
 5 | from importlib.util import spec_from_loader, module_from_spec
 6 | 
 7 | 
 8 | def load_module(module_name, path):
 9 |     """Load arbitrary Python source file"""
10 |     loader = SourceFileLoader(module_name, path)
11 |     spec = spec_from_loader(loader.name, loader)
12 |     module = module_from_spec(spec)
13 |     loader.exec_module(module)
14 |     return module
15 | 


--------------------------------------------------------------------------------
/python/philologic/utils/metadata_type_handler.py:
--------------------------------------------------------------------------------
 1 | """Series of functions to extract and/or convert metadata field values to the right SQL type"""
 2 | 
 3 | import datetime
 4 | import regex as re
 5 | 
 6 | INTEGER = re.compile(r"^(-?\d{1,})")
 7 | YEAR_MONTH_DAY = re.compile(r"(\d+)-(\d+)-(\d+)")
 8 | YEAR_MONTH = re.compile(r"^(\d+)-(\d+)\Z")
 9 | YEAR = re.compile(r"^(\d+)\Z")
10 | MONTH_MAX_DAY = {1: 31, 2: 29, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
11 | 
12 | 
13 | def day_fail_safe(day, month=None):
14 |     """Make sure we have a valid day"""
15 |     if month is not None and month != 0:
16 |         if day > MONTH_MAX_DAY[month]:
17 |             day = 1
18 |     if day > 31 or day <= 0:
19 |         day = 1
20 |     return day
21 | 
22 | 
23 | def month_fail_safe(month):
24 |     """Make sure we have a valid month"""
25 |     if month > 12 or month < 1:
26 |         month = 1
27 |     return month
28 | 
29 | 
30 | def extract_full_date(date):
31 |     """Extract full dates and format as year-month-day"""
32 |     full_date_match = re.search(r"^(\d+)-(\d+)-(\d+)", date)
33 |     if full_date_match:  # e.g. 1987-10-23
34 |         year, month, day = map(int, full_date_match.groups())
35 |         month = month_fail_safe(month)
36 |         day = day_fail_safe(day, month)
37 |         return datetime.date(year, month, day)
38 |     month_year_match = re.search(r"^(\d+)-(\d+)$", date)
39 |     if month_year_match:  # e.g. 1987-10
40 |         year, month = map(int, month_year_match.groups())
41 |         month = month_fail_safe(month)
42 |         return datetime.date(year, month, 1)
43 |     year_match = re.search(r"^(\d+)$", date)
44 |     if year_match:  # e.g. 1987
45 |         year_str = year_match.groups()[0]
46 |         if len(year_str) > 4:
47 |             year_str = year_str[:4]
48 |         year = int(year_str)
49 |         return datetime.date(year, 1, 1)
50 |     return datetime.date(9999, 12, 31)
51 | 
52 | 
53 | def extract_integer(field_value):
54 |     """Extract integer from field value and return a Python int"""
55 |     integer = INTEGER.search(field_value)
56 |     if integer is not None:
57 |         return int(integer.group())
58 |     else:
59 |         return None
60 | 


--------------------------------------------------------------------------------
/python/philologic/utils/pretty_print.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | 
 4 | def pretty_print(value, htchar="\t", lfchar="\n", indent=0):
 5 |     """Pretty printing from a Stack Overflow answer:
 6 |     http://stackoverflow.com/questions/3229419/pretty-printing-nested-dictionaries-in-python#answer-26209900."""
 7 |     nlch = lfchar + htchar * (indent + 1)
 8 |     if type(value) is dict:
 9 |         if value:
10 |             items = [nlch + repr(key) + ": " + pretty_print(value[key], htchar, lfchar, indent + 1) for key in value]
11 |             return "{%s}" % (",".join(items) + lfchar + htchar * indent)
12 |         else:
13 |             return "{}"
14 |     elif type(value) is list:
15 |         if value:
16 |             items = [nlch + pretty_print(item, htchar, lfchar, indent + 1) for item in value]
17 |             return "[%s]" % (",".join(items) + lfchar + htchar * indent)
18 |         else:
19 |             return "[]"
20 |     elif type(value) is tuple:
21 |         if value:
22 |             items = [nlch + pretty_print(item, htchar, lfchar, indent + 1) for item in value]
23 |             return "(%s)" % (",".join(items) + lfchar + htchar * indent)
24 |         else:
25 |             return "()"
26 |     else:
27 |         return repr(value)
28 | 


--------------------------------------------------------------------------------
/python/philologic/utils/sort.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import datetime
 4 | 
 5 | from natsort import natsorted
 6 | from unidecode import unidecode
 7 | 
 8 | 
 9 | def get_key(d, f):
10 |     key = d.get(f, "ZZZZZ")
11 |     if isinstance(key, datetime.date):
12 |         return f"{key.year}-{key.month}-{key.day}"
13 |     elif isinstance(key, int):
14 |         return key
15 |     else:
16 |         return unidecode(key)
17 | 
18 | 
19 | def sort_list(list_to_sort, sort_keys):
20 |     """Sort strings converted to ascii"""
21 | 
22 |     def make_sort_key(d):
23 |         key = [get_key(d, f) for f in sort_keys]
24 |         return key
25 | 
26 |     return natsorted(list_to_sort, key=make_sort_key, reverse=False)
27 | 


--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [build-system]
 3 | requires = ["setuptools>=61.0", "wheel", "build"]
 4 | build-backend = "setuptools.build_meta"
 5 | 
 6 | [project]
 7 | name = "philologic"
 8 | version = "4.7.5.0"
 9 | authors = [
10 |     { name = "Clovis Gladstone", email = "clovisgladstone@artfl.uchicago.edu" },
11 | ]
12 | license = { file = "LICENSE" }
13 | description = "A concordance search engine for TEI-XML"
14 | readme = "README"
15 | urls = { Homepage = "https://github.com/ARTFL-Project/PhiloLogic4" }
16 | 
17 | requires-python = ">=3.10"
18 | dependencies = [
19 |     "regex",
20 |     "lxml",
21 |     "python-levenshtein",
22 |     "natsort",
23 |     "multiprocess",
24 |     "tqdm",
25 |     "orjson",
26 |     "black",
27 |     "msgpack",
28 |     "unidecode",
29 |     "lz4",
30 | ]
31 | 
32 | [project.scripts]
33 | philoload4 = "philologic.loadtime:philoload"
34 | 
35 | 
36 | [tool.setuptools]
37 | packages = [
38 |     "philologic",
39 |     "philologic.runtime",
40 |     "philologic.utils",
41 |     "philologic.runtime.reports",
42 |     "philologic.loadtime",
43 | ]
44 | 


--------------------------------------------------------------------------------
/www/.htaccess:
--------------------------------------------------------------------------------
 1 | Options +ExecCGI
 2 | Options -Indexes
 3 | AddHandler cgi-script py
 4 | DirectoryIndex dispatcher.py
 5 | 
 6 | RewriteEngine on
 7 | RewriteRule ^assets/(.*) app/dist/assets/$1 [L]
 8 | RewriteRule ^img/(.*) app/dist/img/$1 [L]
 9 | RewriteRule ^concordance(.*) dispatcher.py$1 [PT,QSA]
10 | RewriteRule ^kwic(.*) dispatcher.py$1 [PT,QSA]
11 | RewriteRule ^collocation(.*) dispatcher.py$1 [PT,QSA]
12 | RewriteRule ^time_series(.*) dispatcher.py$1 [PT,QSA]
13 | RewriteRule ^query?report=(.*) dispatcher.py$1 [PT,QSA]
14 | RewriteRule ^navigate(.*) dispatcher.py$1 [PT,QSA]
15 | RewriteRule ^aggregation(.*) dispatcher.py$1 [PT,QSA]
16 | RewriteRule ^bibliography(.*) dispatcher.py$1 [PT,QSA]
17 | 
18 | 
19 | <IfModule mod_brotli.c>
20 |     AddOutputFilterByType BROTLI_COMPRESS text/html text/plain text/xml text/css text/javascript application/javascript application/xhtml+xml
21 | </IfModule>


--------------------------------------------------------------------------------
/www/app/.env:
--------------------------------------------------------------------------------
1 | VUE_APP_I18N_LOCALE=en
2 | VUE_APP_I18N_FALLBACK_LOCALE=en
3 | 


--------------------------------------------------------------------------------
/www/app/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /dist
 4 | 
 5 | # local env files
 6 | .env.local
 7 | .env.*.local
 8 | 
 9 | # Log files
10 | npm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 | 
14 | # Editor directories and files
15 | .idea
16 | .vscode
17 | *.suo
18 | *.ntvs*
19 | *.njsproj
20 | *.sln
21 | *.sw*
22 | 


--------------------------------------------------------------------------------
/www/app/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <base href="/" />
 5 |     <meta charset="utf-8" />
 6 |     <meta name="viewport" content="width=device-width,initial-scale=1.0" />
 7 |     <link rel="icon" href="/favicon.ico" />
 8 |     <link rel="preconnect" href="https://fonts.googleapis.com" />
 9 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
10 |     <link
11 |       href="https://fonts.googleapis.com/css2?family=Open+Sans:ital@0;1&family=Source+Serif+Pro:ital,wght@0,400;0,600;1,400;1,600&display=swap"
12 |       rel="stylesheet"
13 |     />
14 |     <script type="module" src="/src/main.js"></script>
15 |     <title>PhiloLogic4</title>
16 |   </head>
17 | 
18 |   <body>
19 |     <noscript>
20 |       <strong
21 |         >We're sorry but PhiloLogic4 doesn't work without JavaScript enabled.
22 |         Please enable it to continue.</strong
23 |       >
24 |     </noscript>
25 |     <div id="app"></div>
26 |     <!-- built files will be auto injected -->
27 |   </body>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/www/app/misconfiguration.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <title>PhiloLogic4 misconfiguration</title>
 6 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 |     <link href='http://fonts.googleapis.com/css?family=Roboto:400,300,300italic,400italic,700,700italic&subset=latin,cyrillic-ext,greek-ext,greek,latin-ext,cyrillic' rel='stylesheet' type='text/css'>
10 | 
11 |     <!-- This is an optional font to use for Greek -->
12 |     <!--<link href='http://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,700italic,400,300,700&subset=latin,cyrillic-ext,greek-ext,greek,latin-ext,cyrillic' rel='stylesheet' type='text/css'>-->
13 | 
14 |     <!--Load all required CSS-->
15 |     <link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
16 |     <link rel="icon" href="favicon.ico" type="image/x-icon">
17 |     <!-- Bootstrap core CSS -->
18 |     <link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootstrap/3.3.2/css/bootstrap.min.css">
19 |     <!-- PhiloLogic4 CSS -->
20 |     $CSS
21 |     <style>
22 |         #main-body {
23 |             font-size: 130%;
24 |         }
25 |         
26 |         h1 {
27 |             margin-top: 20px;
28 |             text-align: center;
29 |         }
30 |         
31 |         #main-body .bg-danger {
32 |             margin: 50px 50px 10px 50px;
33 |             padding: 15px;
34 |         }
35 |         
36 |         .code-block {
37 |             margin: 0px 50px;
38 |             padding: 15px;
39 |             font-size: 14px;
40 |         }
41 |     </style>
42 | 
43 | </head>
44 | 
45 | <body>
46 |     <div class="container" id="main-body">
47 |         <h1>PhiloLogic configuration error!</h1>
48 |         <div class="bg-danger">
49 |             A syntax error was detected in the $CONFIG_FILE configuration file. The following traceback should help you identify the issue:
50 |         </div>
51 |         <div class="code-block">$TRACEBACK</div>
52 |     </div>
53 | </body>
54 | 
55 | </html>


--------------------------------------------------------------------------------
/www/app/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "philologic",
 3 |     "version": "4.7",
 4 |     "private": true,
 5 |     "scripts": {
 6 |         "serve": "npm i && vite preview",
 7 |         "build": "npm i && vite build && rm -rf node_modules",
 8 |         "host": "npm i && vite --host"
 9 |     },
10 |     "dependencies": {
11 |         "@intlify/unplugin-vue-i18n": "^0.8.2",
12 |         "@popperjs/core": "^2.9.2",
13 |         "axios": "^0.21.0",
14 |         "bootstrap": "^5.0.1",
15 |         "chart.js": "^2.9.3",
16 |         "core-js": "^3.13.1",
17 |         "glightbox": "^3.2.0",
18 |         "gsap": "^3.9.1",
19 |         "vue": "^3.2.0",
20 |         "vue-i18n": "^9.2.2",
21 |         "vue-router": "^4.0.0",
22 |         "vue-scrollto": "^2.17.1",
23 |         "vuex": "^4.0.0",
24 |         "vuex-map-fields": "^1.4.0"
25 |     },
26 |     "devDependencies": {
27 |         "@originjs/vite-plugin-commonjs": "^1.0.3",
28 |         "@vitejs/plugin-vue": "^4.0.0",
29 |         "@vue/compiler-sfc": "^3.1.0",
30 |         "eslint": "^7.5.0",
31 |         "eslint-plugin-vue": "^8.5.0",
32 |         "sass": "^1.49.9",
33 |         "vite": "^4.1.1"
34 |     },
35 |     "eslintConfig": {
36 |         "root": true,
37 |         "env": {
38 |             "node": true
39 |         },
40 |         "extends": [
41 |             "plugin:vue/essential",
42 |             "eslint:recommended"
43 |         ],
44 |         "rules": {
45 |             "no-console": "off"
46 |         },
47 |         "parserOptions": {
48 |             "ecmaVersion": 12,
49 |             "sourceType": "module"
50 |         }
51 |     },
52 |     "browserslist": [
53 |         "> 1%",
54 |         "last 2 versions"
55 |     ]
56 | }


--------------------------------------------------------------------------------
/www/app/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/public/favicon.ico


--------------------------------------------------------------------------------
/www/app/src/assets/language.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/language.png


--------------------------------------------------------------------------------
/www/app/src/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/logo.png


--------------------------------------------------------------------------------
/www/app/src/assets/philo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/app/src/assets/philo.png


--------------------------------------------------------------------------------
/www/app/src/assets/styles/theme.module.scss:
--------------------------------------------------------------------------------
  1 | // Custom Bootstrap changes: don't edit
  2 | $popover-max-width: 50%;
  3 | .custom-popover {
  4 |     overflow: auto;
  5 |     text-align: justify !important;
  6 |     max-height: 60%;
  7 | }
  8 | 
  9 | // Theme colors
 10 | $header-color: rgb(245, 219, 157);
 11 | $button-color: rgba(143, 57, 49, .8);
 12 | $button-color-active: rgb(143, 57, 49);
 13 | $link-color: #8f3931;
 14 | $passage-color: rgb(180, 106, 85);
 15 | // Themed elements
 16 | nav.navbar {
 17 |     background-color: $header-color !important;
 18 | }
 19 | 
 20 | $secondary: $button-color;
 21 | .btn-secondary.active {
 22 |     background-color: $button-color-active !important;
 23 | }
 24 | 
 25 | .btn-outline-secondary.active {
 26 |     color: #fff !important;
 27 | }
 28 | 
 29 | $info: $button-color;
 30 | .btn-light {
 31 |     border: solid 1px rgb(206, 212, 218) !important;
 32 | }
 33 | 
 34 | .btn-light.active {
 35 |     background-color: #eee !important;
 36 | }
 37 | 
 38 | .link-color {
 39 |     color: $link-color
 40 | }
 41 | 
 42 | a {
 43 |     color: $link-color !important
 44 | }
 45 | 
 46 | a.btn-secondary {
 47 |     color: #fff !important;
 48 | }
 49 | 
 50 | .number,
 51 | .card-header {
 52 |     background-color: $header-color !important;
 53 |     color: $link-color !important;
 54 | }
 55 | 
 56 | .input-group-text,
 57 | .custom-control-input:checked~.custom-control-label::before,
 58 | .custom-control-input:focus~.custom-control-label::before {
 59 |     color: $link-color !important;
 60 |     background-color: #fff !important;
 61 |     border-color: $link-color !important;
 62 | }
 63 | 
 64 | .metadata-args,
 65 | .remove-metadata,
 66 | .term-groups,
 67 | .close-pill {
 68 |     border-color: $link-color !important;
 69 | }
 70 | 
 71 | .metadata-label,
 72 | .remove-metadata:hover,
 73 | .term-group-word:hover,
 74 | .close-pill:hover {
 75 |     background-color: $button-color !important;
 76 |     color: #fff !important;
 77 | }
 78 | 
 79 | .custom-control-label::after {
 80 |     background-color: $button-color !important;
 81 | }
 82 | 
 83 | .letter {
 84 |     color: $link-color !important;
 85 | }
 86 | 
 87 | .letter:hover,
 88 | #dico-landing-volume .list-group-item:hover {
 89 |     background-color: $button-color !important;
 90 |     color: #fff !important;
 91 | }
 92 | 
 93 | #dico-landing-volume a:hover {
 94 |     color: #fff !important;
 95 | }
 96 | 
 97 | #report-error {
 98 |     color: #fff !important;
 99 | }
100 | 
101 | .custom-select:focus,
102 | .custom-control-input:checked~.custom-control-label::before,
103 | .custom-control-input:focus~.custom-control-label::before,
104 | input[type="text"]:focus {
105 |     box-shadow: 0 0 0 0.05rem $button-color !important;
106 |     border-color: $button-color !important;
107 |     opacity: .5
108 | }
109 | 
110 | .landing-page-btn:focus {
111 |     border-color: $link-color !important;
112 | }
113 | 
114 | #report button:focus {
115 |     mix-blend-mode: hard-light;
116 | }
117 | 
118 | :export {
119 |     color: $link-color
120 | }


--------------------------------------------------------------------------------
/www/app/src/components/Citations.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |     <span class="philologic-cite ps-2">
 3 |         <span class="citation text-view" v-for="(cite, citeIndex) in citation" :key="citeIndex">
 4 |             <span v-html="cite.prefix" v-if="cite.prefix"></span>
 5 |             <router-link :to="cite.href" :style="cite.style" v-if="cite.href">{{ cite.label }}</router-link>
 6 |             <span :style="cite.style" v-else>{{ cite.label }}</span>
 7 |             <span v-html="cite.suffix" v-if="cite.suffix"></span>
 8 |             <span v-if="citeIndex != citation.length - 1"
 9 |                 ><span class="separator px-2" v-if="!separator">&#9679;</span><span v-else>{{ separator }}</span></span
10 |             >
11 |         </span>
12 |     </span>
13 | </template>
14 | <script>
15 | export default {
16 |     name: "citations-generator",
17 |     props: ["citation", "separator"],
18 | };
19 | </script>
20 | <style scoped>
21 | .separator {
22 |     font-size: 0.75rem;
23 |     vertical-align: 0.05rem;
24 | }
25 | .citation {
26 |     font-weight: 600;
27 | }
28 | </style>
29 | 


--------------------------------------------------------------------------------
/www/app/src/components/ExportResults.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |     <div class="modal-dialog">
 3 |         <div class="modal-content">
 4 |             <div class="modal-header">
 5 |                 <h5 class="modal-title">{{ $t("resultsSummary.exportResults") }}</h5>
 6 |                 <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
 7 |             </div>
 8 |             <div class="modal-body">
 9 |                 <p v-if="report == 'concordance' || report == 'kwic' || report == 'bibliography'">
10 |                     {{ $t("exportResults.currentPage") }}.
11 |                 </p>
12 |                 <div v-if="report == 'concordance' || report == 'kwic'">
13 |                     <h6>{{ $t("exportResults.html") }}:</h6>
14 |                     <button type="button" class="btn btn-secondary"
15 |                         @click="getResults('json', false)">JSON</button>&nbsp;
16 |                     <button type="button" class="btn btn-secondary" aria-label="CSV"
17 |                         @click="getResults('csv', false)">CSV</button>
18 |                 </div>
19 |                 <h6 class="mt-2" v-if="report == 'concordance' || report == 'kwic'">
20 |                     {{ $t("exportResults.plain") }}:
21 |                 </h6>
22 |                 <button type="button" class="btn btn-secondary" aria-label="JSON"
23 |                     @click="getResults('json', true)">JSON</button>&nbsp;
24 |                 <button type="button" class="btn btn-secondary" aria-label="CSV"
25 |                     @click="getResults('csv', true)">CSV</button>
26 |             </div>
27 |         </div>
28 |     </div>
29 | </template>
30 | <script>
31 | import { mapFields } from "vuex-map-fields";
32 | 
33 | export default {
34 |     name: "ExportResults",
35 |     computed: {
36 |         ...mapFields(["formData.report"]),
37 |     },
38 |     inject: ["$http"],
39 |     methods: {
40 |         getResults(format, html) {
41 |             this.$http
42 |                 .get(
43 |                     `${this.$dbUrl}/scripts/export_results.py?${this.paramsToUrlString({
44 |                         ...this.$store.state.formData,
45 |                         filter_html: html.toString(),
46 |                         output_format: format,
47 |                         report: "",
48 |                     })}&report=${this.report}`
49 |                 )
50 |                 .then((response) => {
51 |                     let text = "";
52 |                     let element = document.createElement("a");
53 |                     let filename = `${this.paramsToUrlString({ ...this.$store.state.formData })}.${format}`;
54 |                     if (format == "json") {
55 |                         text = JSON.stringify(response.data);
56 |                     } else if (format == "csv") {
57 |                         text = response.data;
58 |                     }
59 |                     element.setAttribute("href", "data:text/plain;charset=utf-8," + encodeURIComponent(text));
60 |                     element.setAttribute("download", filename);
61 |                     element.style.display = "none";
62 |                     document.body.appendChild(element);
63 |                     element.click();
64 |                     document.body.removeChild(element);
65 |                 });
66 |         },
67 |     },
68 | };
69 | </script>


--------------------------------------------------------------------------------
/www/app/src/components/LocaleChanger.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |     <div style="margin-top: 0.2rem">
 3 |         <div class="dropdown">
 4 |             <button class="btn btn-sm btn-outline-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"
 5 |                 aria-expanded="false" aria-label="Change language">
 6 |                 <img src="../assets/language.png" aria-label="language picker" height="18" width="18" />
 7 |             </button>
 8 |             <ul class="dropdown-menu">
 9 |                 <li v-for="locale in $i18n.availableLocales" :key="`locale-${locale}`" @click="changeLocale(locale)">
10 |                     {{ localNames[locale] }}
11 |                 </li>
12 |             </ul>
13 |         </div>
14 |     </div>
15 | </template>
16 | <script>
17 | export default {
18 |     name: "locale-changer",
19 |     data() {
20 |         return {
21 |             localNames: {
22 |                 en: "English",
23 |                 fr: "Français",
24 |             },
25 |         };
26 |     },
27 |     methods: {
28 |         changeLocale(locale) {
29 |             localStorage.setItem("lang", locale);
30 |             this.$i18n.locale = locale;
31 |         },
32 |     },
33 | };
34 | </script>
35 | <style lang="scss" scoped>
36 | @import "../assets/styles/theme.module.scss";
37 | 
38 | button {
39 |     font-size: inherit !important;
40 |     border-width: 0 !important;
41 | }
42 | 
43 | li {
44 |     font-size: 80% !important;
45 | }
46 | 
47 | button,
48 | li {
49 |     font-variant: small-caps;
50 |     padding-left: 0.5rem;
51 |     padding-right: 0.5rem;
52 | }
53 | 
54 | li:hover {
55 |     color: #fff;
56 |     background: $link-color;
57 |     cursor: pointer;
58 | }
59 | 
60 | .dropdown-menu {
61 |     left: initial !important;
62 |     right: 0;
63 |     min-width: initial;
64 |     padding: 0;
65 | }
66 | </style>


--------------------------------------------------------------------------------
/www/app/src/i18n.js:
--------------------------------------------------------------------------------
 1 | import { createI18n } from 'vue-i18n'
 2 | import messages from "@intlify/unplugin-vue-i18n/messages";
 3 | 
 4 | 
 5 | export default createI18n({
 6 |     legacy: false,
 7 |     locale: 'en',
 8 |     fallbackLocale: 'en',
 9 |     availableLocales: ["en", "fr"],
10 |     messages: messages,
11 | })


--------------------------------------------------------------------------------
/www/app/src/main.js:
--------------------------------------------------------------------------------
 1 | import { createApp } from "vue";
 2 | import vueScrollTo from "vue-scrollto";
 3 | import App from "./App.vue";
 4 | import router from "./router";
 5 | import store from "./store";
 6 | import {
 7 |     paramsFilter,
 8 |     paramsToRoute,
 9 |     paramsToUrlString,
10 |     copyObject,
11 |     saveToLocalStorage,
12 |     mergeResults,
13 |     sortResults,
14 |     deepEqual,
15 |     dictionaryLookup,
16 |     debug,
17 | } from "./mixins.js";
18 | import axios from "axios";
19 | import "bootstrap";
20 | 
21 | import appConfig from "../appConfig.json";
22 | import i18n from "./i18n";
23 | 
24 | axios
25 |     .get(`${appConfig.dbUrl}/scripts/get_web_config.py`, {})
26 |     .then((response) => {
27 |         const app = createApp(App).use(i18n);
28 |         app.config.globalProperties.$philoConfig = response.data;
29 |         app.config.globalProperties.$scrollTo = vueScrollTo.scrollTo;
30 |         app.config.globalProperties.$dbUrl = appConfig.dbUrl;
31 |         app.config.unwrapInjectedRef = true;
32 |         app.provide("$http", axios);
33 |         app.provide("$dbUrl", appConfig.dbUrl);
34 |         app.provide("$philoConfig", response.data);
35 |         app.use(router);
36 |         app.use(store);
37 |         app.mixin({
38 |             methods: {
39 |                 paramsFilter,
40 |                 paramsToRoute,
41 |                 paramsToUrlString,
42 |                 copyObject,
43 |                 saveToLocalStorage,
44 |                 mergeResults,
45 |                 sortResults,
46 |                 deepEqual,
47 |                 dictionaryLookup,
48 |                 debug,
49 |             },
50 |         });
51 |         app.directive("scroll", {
52 |             mounted: function (el, binding) {
53 |                 el.scrollHandler = function (evt) {
54 |                     if (binding.value(evt, el)) {
55 |                         window.removeEventListener("scroll", el.scrollHandler);
56 |                     }
57 |                 };
58 |                 window.addEventListener("scroll", el.scrollHandler);
59 |             },
60 |             unmounted: function (el) {
61 |                 window.removeEventListener("scroll", el.scrollHandler);
62 |             },
63 |         });
64 | 
65 |         router.isReady().then(() => app.mount("#app"));
66 |     })
67 |     .catch((error) => {
68 |         // this.loading = false
69 |         console.log(error.toString());
70 |     });
71 | 


--------------------------------------------------------------------------------
/www/app/src/router/index.js:
--------------------------------------------------------------------------------
 1 | // import Vue from 'vue'
 2 | import { createRouter, createWebHistory } from 'vue-router'
 3 | 
 4 | const concordance = () => import('../components/Concordance');
 5 | const kwic = () => import('../components/Kwic');
 6 | const bibliography = () => import('../components/Bibliography');
 7 | const collocation = () => import('../components/Collocation');
 8 | const timeSeries = () => import('../components/TimeSeries');
 9 | const textNavigation = () => import('../components/TextNavigation');
10 | const tableOfContents = () => import('../components/TableOfContents');
11 | const landingPage = () => import('../components/LandingPage');
12 | const aggregation = () => import("../components/Aggregation");
13 | import appConfig from '../../appConfig.json'
14 | 
15 | 
16 | const router = createRouter({
17 |     history: createWebHistory(appConfig.dbUrl.replace(/https?:\/\/[^/]+\//, "")),
18 |     routes: [{
19 |         path: '/',
20 |         name: 'home',
21 |         component: landingPage
22 |     },
23 |     {
24 |         path: '/concordance',
25 |         name: 'concordance',
26 |         component: concordance
27 |     },
28 |     {
29 |         path: '/kwic',
30 |         name: 'kwic',
31 |         component: kwic
32 |     },
33 |     {
34 |         path: '/bibliography',
35 |         name: 'bibliography',
36 |         component: bibliography
37 |     },
38 |     {
39 |         path: "/collocation",
40 |         name: "collocation",
41 |         component: collocation
42 |     },
43 |     {
44 |         path: "/time_series",
45 |         name: "time_series",
46 |         component: timeSeries
47 |     },
48 |     {
49 |         path: "/navigate/:pathInfo([\\d/]+)",
50 |         name: "textNavigation",
51 |         component: textNavigation
52 |     },
53 |     {
54 |         path: "/navigate/:pathInfo(\\d+)/table-of-contents",
55 |         name: "tableOfContents",
56 |         component: tableOfContents
57 |     },
58 |     {
59 |         path: "/aggregation",
60 |         name: 'aggregation',
61 |         component: aggregation
62 |     },
63 |     // for compatibility with old Philo links: still used in landing page and TOC
64 |     {
65 |         path: "/query",
66 |         redirect: to => {
67 |             return {
68 |                 name: to.query.report,
69 |                 params: to.params
70 |             }
71 |         }
72 |     }
73 |     ],
74 |     scrollBehavior(to, from, savedPosition) {
75 |         if (savedPosition) {
76 |             return savedPosition
77 |         } else {
78 |             return {
79 |                 left: 0,
80 |                 top: 0
81 |             }
82 |         }
83 |     }
84 | })
85 | export default router


--------------------------------------------------------------------------------
/www/app/src/store/index.js:
--------------------------------------------------------------------------------
 1 | import Vuex from 'vuex'
 2 | 
 3 | import {
 4 |     getField,
 5 |     updateField
 6 | } from 'vuex-map-fields'
 7 | 
 8 | 
 9 | export default new Vuex.Store({
10 |     strict: true,
11 |     state: {
12 |         formData: {},
13 |         reportValues: {},
14 |         resultsLength: 0,
15 |         textNavigationCitation: {},
16 |         textObject: '',
17 |         navBar: '',
18 |         tocElements: {},
19 |         byte: '',
20 |         searching: false,
21 |         currentReport: "concordance",
22 |         description: {
23 |             start: 0,
24 |             end: 0,
25 |             results_per_page: 25,
26 |             termGroups: [],
27 |         },
28 |         aggregationCache: {
29 |             results: [],
30 |             query: {}
31 |         },
32 |         sortedKwicCache: {
33 |             queryParams: {},
34 |             results: [],
35 |             totalResults: 0
36 |         },
37 |         totalResultsDone: false,
38 |         showFacets: true,
39 |         urlUpdate: "",
40 |         metadataUpdate: {},
41 |     },
42 |     getters: {
43 |         getField
44 |     },
45 |     mutations: {
46 |         updateField,
47 |         updateFormData(state, payload) {
48 |             state.formData = payload
49 |         },
50 |         setDefaultFields(state, payload) {
51 |             for (let field in payload) {
52 |                 state.formData[field] = payload[field]
53 |             }
54 |         },
55 |         updateFormDataField(state, payload) {
56 |             state.formData[payload.key] = payload.value
57 |         },
58 |         updateAllMetadata(state, payload) {
59 |             state.formData = { ...state.formData, ...payload }
60 |         },
61 |         setReportValues(state, payload) {
62 |             state.reportValues = payload
63 |         },
64 |         updateCitation(state, payload) {
65 |             state.textNavigationCitation = payload
66 |         },
67 |         updateDescription(state, payload) {
68 |             state.description = payload
69 |         },
70 |         updateResultsLength(state, payload) {
71 |             state.resultsLength = payload
72 |         }
73 |     },
74 |     actions: {
75 |         updateStartEndDate(context, payload) {
76 |             context.commit("updateFormData", {
77 |                 ...context.state.formData,
78 |                 start_date: payload.startDate,
79 |                 end_date: payload.endDate
80 |             })
81 |         },
82 |     }
83 | })


--------------------------------------------------------------------------------
/www/app/vite.config.js:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "vite";
 2 | import vue from "@vitejs/plugin-vue";
 3 | import VueI18nPlugin from "@intlify/unplugin-vue-i18n/vite";
 4 | import { fileURLToPath, URL } from "node:url";
 5 | import { resolve, dirname } from "node:path";
 6 | 
 7 | export default defineConfig({
 8 |     plugins: [
 9 |         vue(),
10 |         VueI18nPlugin({
11 |             include: resolve(
12 |                 dirname(fileURLToPath(import.meta.url)),
13 |                 "./src/locales/**"
14 |             ),
15 |         }),
16 |     ],
17 |     base: process.env.NODE_ENV === "production" ? getBaseUrl() : "/",
18 |     resolve: {
19 |         alias: {
20 |             "@": fileURLToPath(new URL("./src", import.meta.url)),
21 |         },
22 |         // TODO: Remove by explicitely adding extension in imports
23 |         extensions: [".js", ".json", ".vue"],
24 |     },
25 |     server: {
26 |         hmr: {
27 |             overlay: false,
28 |         },
29 |     },
30 | });
31 | 
32 | function getBaseUrl() {
33 |     const fs = require("fs");
34 |     let appConfig = fs.readFileSync("appConfig.json");
35 |     let dbUrl = JSON.parse(appConfig).dbUrl;
36 |     if (dbUrl == "") {
37 |         let dbPath = __dirname.replace(/app$/, "");
38 |         let dbname = dbPath.split("/").reverse()[1];
39 |         let config = fs.readFileSync("/etc/philologic/philologic4.cfg", "utf8");
40 |         let re = /url_root = ["']([^"]+)["']/gm;
41 |         let match = re.exec(config);
42 |         let rootPath = match[1];
43 |         if (rootPath.endsWith("/")) {
44 |             rootPath = rootPath.slice(0, -1);
45 |         }
46 |         dbUrl = rootPath + "/" + dbname + "/";
47 |         let jsonString = JSON.stringify({ dbUrl: dbUrl });
48 |         fs.writeFileSync("./appConfig.json", jsonString);
49 |         return dbUrl;
50 |     }
51 |     return dbUrl;
52 | }
53 | 


--------------------------------------------------------------------------------
/www/dispatcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Routing for PhiloLogic4."""
 3 | 
 4 | 
 5 | import datetime
 6 | import os
 7 | from random import randint
 8 | from typing import Callable
 9 | from urllib.parse import parse_qs, urlparse
10 | from wsgiref.handlers import CGIHandler
11 | 
12 | import reports
13 | from philologic.runtime import WebConfig, WSGIHandler
14 | from webApp import start_web_app
15 | 
16 | path = os.path.abspath(os.path.dirname(__file__))
17 | 
18 | 
19 | def philo_dispatcher(environ, start_response):
20 |     """Dispatcher function."""
21 |     config = WebConfig(path)
22 |     request = WSGIHandler(environ, config)
23 |     if request.content_type == "application/json" or request.format == "json":
24 |         try:
25 |             path_components = [c for c in environ["PATH_INFO"].split("/") if c]
26 |         except Exception:
27 |             path_components = []
28 |         if path_components:
29 |             if path_components[-1] == "table-of-contents":
30 |                 yield b"".join(reports.table_of_contents(environ, start_response))
31 |             else:
32 |                 yield b"".join(reports.navigation(environ, start_response))
33 |         else:
34 |             try:
35 |                 report_name: str = parse_qs(environ["QUERY_STRING"])["report"][0]
36 |             except KeyError:
37 |                 report_name = urlparse(environ["REQUEST_URI"]).path.split("/")[-1]
38 |             report: Callable = getattr(reports, report_name)
39 |             yield b"".join(report(environ, start_response))
40 |     elif request.full_bibliography is True:
41 |         yield b"".join(reports.bibliography(environ, start_response))
42 |     else:
43 |         yield start_web_app(environ, start_response).encode("utf8")
44 | 
45 |     # clean-up hitlist every now and then
46 |     if randint(0, 10) == 1:
47 |         for file in os.scandir(os.path.join(path, "data/hitlists/*")):
48 |             file_modified = datetime.datetime.fromtimestamp(os.path.getmtime(file.path))
49 |             if datetime.datetime.now() - file_modified > datetime.timedelta(minutes=10):
50 |                 os.remove(file.path)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     CGIHandler().run(philo_dispatcher)
55 | 


--------------------------------------------------------------------------------
/www/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARTFL-Project/PhiloLogic4/bf3bae45bd0d481dcb5722b2ce10c9632897d827/www/favicon.ico


--------------------------------------------------------------------------------
/www/reports/__init__.py:
--------------------------------------------------------------------------------
 1 | from .concordance import concordance
 2 | from .kwic import kwic
 3 | from .bibliography import bibliography
 4 | from .collocation import collocation
 5 | from .time_series import time_series
 6 | from .navigation import navigation
 7 | from .table_of_contents import table_of_contents
 8 | from .word_property_filter import word_property_filter
 9 | from .aggregation import aggregation
10 | 


--------------------------------------------------------------------------------
/www/reports/aggregation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import aggregation_by_field
15 | except ImportError:
16 |     from philologic.runtime import aggregation_by_field
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def aggregation(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     aggregation_object = aggregation_by_field(request, config)
31 |     headers = [
32 |         ("Content-type", "application/json; charset=UTF-8"),
33 |         ("Access-Control-Allow-Origin", "*"),
34 |     ]
35 |     start_response("200 OK", headers)
36 |     yield orjson.dumps(aggregation_object)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     CGIHandler().run(aggregation)
41 | 


--------------------------------------------------------------------------------
/www/reports/bibliography.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | import sys
 6 | from wsgiref.handlers import CGIHandler
 7 | 
 8 | sys.path.append("..")
 9 | import custom_functions
10 | 
11 | try:
12 |     from custom_functions import bibliography_results
13 | except ImportError:
14 |     from philologic.runtime import bibliography_results
15 | try:
16 |     from custom_functions import WebConfig
17 | except ImportError:
18 |     from philologic.runtime import WebConfig
19 | try:
20 |     from custom_functions import WSGIHandler
21 | except ImportError:
22 |     from philologic.runtime import WSGIHandler
23 | 
24 | 
25 | def bibliography(environ, start_response):
26 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
27 |     request = WSGIHandler(environ, config)
28 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
29 |     start_response("200 OK", headers)
30 |     bibliography_object, _ = bibliography_results(request, config)
31 |     yield orjson.dumps(bibliography_object)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     CGIHandler().run(bibliography)
36 | 


--------------------------------------------------------------------------------
/www/reports/collocation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import collocation_results
15 | except ImportError:
16 |     from philologic.runtime import collocation_results
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def collocation(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
31 |     start_response("200 OK", headers)
32 |     collocation_object = collocation_results(request, config)
33 |     yield orjson.dumps(collocation_object)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(collocation)
38 | 


--------------------------------------------------------------------------------
/www/reports/concordance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import concordance_results
15 | except ImportError:
16 |     from philologic.runtime import concordance_results
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def concordance(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     concordance_object = concordance_results(request, config)
31 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
32 |     start_response("200 OK", headers)
33 |     yield orjson.dumps(concordance_object)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(concordance)
38 | 


--------------------------------------------------------------------------------
/www/reports/kwic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import kwic_results
15 | except ImportError:
16 |     from philologic.runtime import kwic_results
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def kwic(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     kwic_object = kwic_results(request, config)
31 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
32 |     start_response("200 OK", headers)
33 |     yield orjson.dumps(kwic_object)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(kwic)
38 | 


--------------------------------------------------------------------------------
/www/reports/navigation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import generate_text_object
15 | except ImportError:
16 |     from philologic.runtime import generate_text_object
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def navigation(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
31 |     start_response("200 OK", headers)
32 |     text_object = generate_text_object(request, config)
33 |     yield orjson.dumps(text_object)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(navigation)
38 | 


--------------------------------------------------------------------------------
/www/reports/table_of_contents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import generate_toc_object
15 | except ImportError:
16 |     from philologic.runtime import generate_toc_object
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def table_of_contents(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 | 
31 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
32 |     start_response("200 OK", headers)
33 |     toc_object = generate_toc_object(request, config)
34 |     yield orjson.dumps(toc_object)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     CGIHandler().run(table_of_contents)
39 | 


--------------------------------------------------------------------------------
/www/reports/time_series.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import generate_time_series
15 | except ImportError:
16 |     from philologic.runtime import generate_time_series
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def time_series(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     time_series_object = generate_time_series(request, config)
31 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
32 |     start_response("200 OK", headers)
33 |     yield orjson.dumps(time_series_object)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(time_series)
38 | 


--------------------------------------------------------------------------------
/www/reports/word_property_filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import filter_words_by_property
15 | except ImportError:
16 |     from philologic.runtime import filter_words_by_property
17 | try:
18 |     from custom_functions import WebConfig
19 | except ImportError:
20 |     from philologic.runtime import WebConfig
21 | try:
22 |     from custom_functions import WSGIHandler
23 | except ImportError:
24 |     from philologic.runtime import WSGIHandler
25 | 
26 | 
27 | def word_property_filter(environ, start_response):
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("reports", ""))
29 |     request = WSGIHandler(environ, config)
30 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
31 |     start_response("200 OK", headers)
32 |     filter_results = filter_words_by_property(hits, config.db_path, request, db, config)
33 |     yield orjson.dumps(filter_results)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(word_property_filter)
38 | 


--------------------------------------------------------------------------------
/www/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/www/scripts/access_request.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | import orjson
 7 | from philologic.runtime import access_control, login_access
 8 | 
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | default_reports = ["concordance", "kwic", "collocation", "time_series", "navigation"]
25 | 
26 | 
27 | def access_request(environ, start_response):
28 |     status = "200 OK"
29 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
30 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
31 |     request = WSGIHandler(environ, config)
32 |     access, headers = login_access(environ, request, config, headers)
33 |     start_response(status, headers)
34 |     if access:
35 |         yield orjson.dumps({"access": True})
36 |     else:
37 |         incoming_address, domain_name = access_control.get_client_info(environ)
38 |         yield orjson.dumps({"access": False, "incoming_address": incoming_address, "domain_name": domain_name})
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     CGIHandler().run(access_request)
43 | 


--------------------------------------------------------------------------------
/www/scripts/alignment_to_text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | from json import dumps
 6 | from wsgiref.handlers import CGIHandler
 7 | 
 8 | from philologic.runtime.DB import DB
 9 | from philologic.runtime.link import byte_range_to_link
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | def alignment_to_text(environ, start_response):
25 |     status = "200 OK"
26 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
27 |     start_response(status, headers)
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
29 |     db = DB(config.db_path + "/data/")
30 |     request = WSGIHandler(environ, config)
31 |     link = byte_range_to_link(db, config, request)
32 |     yield dumps({"link": link}).encode("utf-8")
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     CGIHandler().run(alignment_to_text)
37 | 


--------------------------------------------------------------------------------
/www/scripts/get_academic_citation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import orjson
 5 | from wsgiref.handlers import CGIHandler
 6 | from philologic.runtime.DB import DB
 7 | from philologic.runtime.citations import citations, citation_links
 8 | 
 9 | 
10 | import sys
11 | 
12 | sys.path.append("..")
13 | import custom_functions
14 | 
15 | try:
16 |     from custom_functions import WebConfig
17 | except ImportError:
18 |     from philologic.runtime import WebConfig
19 | try:
20 |     from custom_functions import WSGIHandler
21 | except ImportError:
22 |     from philologic.runtime import WSGIHandler
23 | 
24 | 
25 | def get_academic_citation(environ, start_response):
26 |     status = "200 OK"
27 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
28 |     start_response(status, headers)
29 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
30 |     request = WSGIHandler(environ, config)
31 |     db = DB(config.db_path + "/data/")
32 |     text_obj = db[request.philo_id]
33 |     citation_hrefs = citation_links(db, config, text_obj)
34 |     citation = citations(text_obj, citation_hrefs, config, citation_type=config.academic_citation["citation"])
35 |     yield orjson.dumps({"citation": citation})
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     CGIHandler().run(get_academic_citation)
40 | 


--------------------------------------------------------------------------------
/www/scripts/get_bibliography.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | from philologic.runtime import landing_page_bibliography
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import WebConfig
15 | except ImportError:
16 |     from philologic.runtime import WebConfig
17 | try:
18 |     from custom_functions import WSGIHandler
19 | except ImportError:
20 |     from philologic.runtime import WSGIHandler
21 | 
22 | 
23 | def get_bibliography(environ, start_response):
24 |     status = "200 OK"
25 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
26 |     start_response(status, headers)
27 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
28 |     request = WSGIHandler(environ, config)
29 |     yield landing_page_bibliography(request, config)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     CGIHandler().run(get_bibliography)
34 | 


--------------------------------------------------------------------------------
/www/scripts/get_filter_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime import build_filter_list
 8 | 
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | def get_filter_list(environ, start_response):
25 |     status = "200 OK"
26 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
27 |     start_response(status, headers)
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
29 |     request = WSGIHandler(environ, config)
30 |     filter_list = build_filter_list(request, config)
31 |     yield orjson.dumps(filter_list)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     CGIHandler().run(get_filter_list)
36 | 


--------------------------------------------------------------------------------
/www/scripts/get_frequency.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | import sys
 8 | 
 9 | sys.path.append("..")
10 | import custom_functions
11 | 
12 | try:
13 |     from custom_functions import WebConfig
14 | except ImportError:
15 |     from philologic.runtime import WebConfig
16 | try:
17 |     from custom_functions import WSGIHandler
18 | except ImportError:
19 |     from philologic.runtime import WSGIHandler
20 | try:
21 |     from custom_functions import frequency_results
22 | except ImportError:
23 |     from philologic.runtime import frequency_results
24 | 
25 | 
26 | def get_frequency(environ, start_response):
27 |     """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of
28 |     unique values and their frequencies."""
29 |     status = "200 OK"
30 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
31 |     start_response(status, headers)
32 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
33 |     request = WSGIHandler(environ, config)
34 |     results = frequency_results(request, config)
35 |     yield orjson.dumps(results)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     CGIHandler().run(get_frequency)
40 | 


--------------------------------------------------------------------------------
/www/scripts/get_header.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | from philologic.runtime import get_tei_header
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import WebConfig
15 | except ImportError:
16 |     from philologic.runtime import WebConfig
17 | try:
18 |     from custom_functions import WSGIHandler
19 | except ImportError:
20 |     from philologic.runtime import WSGIHandler
21 | 
22 | 
23 | def get_header(environ, start_response):
24 |     status = "200 OK"
25 |     headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
26 |     start_response(status, headers)
27 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
28 |     request = WSGIHandler(environ, config)
29 |     header = get_tei_header(request, config)
30 |     yield header.encode("utf8")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     CGIHandler().run(get_header)
35 | 


--------------------------------------------------------------------------------
/www/scripts/get_landing_page_content.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | from wsgiref.handlers import CGIHandler
 5 | 
 6 | from philologic.runtime import group_by_metadata, group_by_range
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import WebConfig
15 | except ImportError:
16 |     from philologic.runtime import WebConfig
17 | try:
18 |     from custom_functions import WSGIHandler
19 | except ImportError:
20 |     from philologic.runtime import WSGIHandler
21 | 
22 | 
23 | def landing_page_content(environ, start_response):
24 |     """Get landing page content"""
25 |     status = "200 OK"
26 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
27 |     start_response(status, headers)
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
29 |     request = WSGIHandler(environ, config)
30 |     if request.is_range == "true":
31 |         if isinstance(request.query, bytes):
32 |             request_range = request.query.decode("utf8")
33 |         request_range = [item.strip() for item in request.query.lower().split("-")]
34 |         if len(request_range) == 1:
35 |             request_range.append(request_range[0])
36 |         results = group_by_range(request_range, request, config)
37 |     else:
38 |         results = group_by_metadata(request, config)
39 |     yield results
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     CGIHandler().run(landing_page_content)
44 | 


--------------------------------------------------------------------------------
/www/scripts/get_more_context.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime import get_concordance_text
 9 | 
10 | import sys
11 | 
12 | sys.path.append("..")
13 | import custom_functions
14 | 
15 | try:
16 |     from custom_functions import WebConfig
17 | except ImportError:
18 |     from philologic.runtime import WebConfig
19 | try:
20 |     from custom_functions import WSGIHandler
21 | except ImportError:
22 |     from philologic.runtime import WSGIHandler
23 | 
24 | 
25 | def get_more_context(environ, start_response):
26 |     status = "200 OK"
27 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
28 |     start_response(status, headers)
29 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
30 |     db = DB(config.db_path + "/data/")
31 |     request = WSGIHandler(environ, config)
32 |     hit_num = int(request.hit_num)
33 |     hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
34 |     context_size = config["concordance_length"] * 3
35 |     hit_context = get_concordance_text(db, hits[hit_num], config.db_path, context_size)
36 |     yield orjson.dumps(hit_context)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     CGIHandler().run(get_more_context)
41 | 


--------------------------------------------------------------------------------
/www/scripts/get_notes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime import generate_text_object
 9 | 
10 | import sys
11 | 
12 | sys.path.append("..")
13 | import custom_functions
14 | 
15 | try:
16 |     from custom_functions import WebConfig
17 | except ImportError:
18 |     from philologic.runtime import WebConfig
19 | try:
20 |     from custom_functions import WSGIHandler
21 | except ImportError:
22 |     from philologic.runtime import WSGIHandler
23 | 
24 | 
25 | def get_notes(environ, start_response):
26 |     status = "200 OK"
27 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
28 |     start_response(status, headers)
29 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
30 |     request = WSGIHandler(environ, config)
31 |     text_object = generate_text_object(request, config, note=True)
32 |     yield orjson.dumps(text_object)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     CGIHandler().run(get_notes)
37 | 


--------------------------------------------------------------------------------
/www/scripts/get_query_terms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime.Query import get_expanded_query
 9 | 
10 | import sys
11 | 
12 | sys.path.append("..")
13 | import custom_functions
14 | 
15 | try:
16 |     from custom_functions import WebConfig
17 | except ImportError:
18 |     from philologic.runtime import WebConfig
19 | try:
20 |     from custom_functions import WSGIHandler
21 | except ImportError:
22 |     from philologic.runtime import WSGIHandler
23 | 
24 | 
25 | def term_list(environ, start_response):
26 |     status = "200 OK"
27 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
28 |     start_response(status, headers)
29 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
30 |     db = DB(config.db_path + "/data/")
31 |     request = WSGIHandler(environ, config)
32 |     hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
33 |     hits.finish()
34 |     expanded_terms = get_expanded_query(hits)
35 |     yield orjson.dumps(expanded_terms[0])
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     CGIHandler().run(term_list)
40 | 


--------------------------------------------------------------------------------
/www/scripts/get_sorted_frequency.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime import frequency_results
 8 | 
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | def get_frequency(environ, start_response):
25 |     """reads through a hitlist. looks up q.frequency_field in each hit, and builds up a list of
26 |     unique values and their frequencies."""
27 |     status = "200 OK"
28 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
29 |     start_response(status, headers)
30 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
31 |     request = WSGIHandler(environ, config)
32 |     results = frequency_results(request, config, sorted_results=True)
33 |     yield orjson.dumps(results)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     CGIHandler().run(get_frequency)
38 | 


--------------------------------------------------------------------------------
/www/scripts/get_sorted_kwic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime import kwic_hit_object, page_interval
 9 | 
10 | import sys
11 | 
12 | 
13 | sys.path.append("..")
14 | import custom_functions
15 | 
16 | try:
17 |     from custom_functions import WebConfig
18 | except ImportError:
19 |     from philologic.runtime import WebConfig
20 | try:
21 |     from custom_functions import WSGIHandler
22 | except ImportError:
23 |     from philologic.runtime import WSGIHandler
24 | 
25 | 
26 | def get_sorted_kwic(environ, start_response):
27 |     """Get sorted KWIC"""
28 |     status = "200 OK"
29 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
30 |     start_response(status, headers)
31 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
32 |     db = DB(config.db_path + "/data/")
33 |     request = WSGIHandler(environ, config)
34 |     sorted_hits = get_sorted_hits(request, config, db)
35 |     yield orjson.dumps(sorted_hits)
36 | 
37 | 
38 | def get_sorted_hits(request, config, db):
39 |     """Get sorted hits"""
40 |     hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
41 |     start, end, _ = page_interval(request.results_per_page, hits, request.start, request.end)
42 |     kwic_object = {
43 |         "description": {"start": start, "end": end, "results_per_page": request.results_per_page},
44 |         "query": dict([i for i in request]),
45 |     }
46 |     if not os.path.exists(f"{request.cache_path}.sorted"):
47 |         with open(request.cache_path) as cache:
48 |             fields = cache.readline().strip().split("\t")
49 |         sort_order = []
50 |         if request.first_kwic_sorting_option:
51 |             key = fields.index(request.first_kwic_sorting_option) + 1
52 |             sort_order.append(f"-k {key},{key}")
53 |         if request.second_kwic_sorting_option:
54 |             key = fields.index(request.second_kwic_sorting_option) + 1
55 |             sort_order.append(f"-k {key},{key}")
56 |         if request.third_kwic_sorting_option:
57 |             key = fields.index(request.third_kwic_sorting_option) + 1
58 |             sort_order.append(f"-k {key},{key}")
59 |         sort_order = " ".join(sort_order)
60 |         os.system(
61 |             f"tail -n +2 {request.cache_path} | sort {sort_order} > {request.cache_path}.sorted && rm {request.cache_path}"
62 |         )  # no numeric sort since we would have to know the type of the field being sorted on: e.g. -k 2,2n
63 |     kwic_results = []
64 |     with open(f"{request.cache_path}.sorted") as sorted_results:
65 |         for line_number, line in enumerate(sorted_results, 1):
66 |             if line_number < start:
67 |                 continue
68 |             if line_number > end:
69 |                 break
70 |             index = int(line.split("\t")[0])
71 |             hit = hits[index]
72 |             kwic_result = kwic_hit_object(hit, config, db)
73 |             kwic_results.append(kwic_result)
74 | 
75 |     kwic_object["results"] = kwic_results
76 |     kwic_object["results_length"] = len(hits)
77 |     kwic_object["query_done"] = hits.done
78 | 
79 |     return kwic_object
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     CGIHandler().run(get_sorted_kwic)
84 | 


--------------------------------------------------------------------------------
/www/scripts/get_table_of_contents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | 
 8 | import sys
 9 | 
10 | sys.path.append("..")
11 | import custom_functions
12 | 
13 | try:
14 |     from custom_functions import WebConfig
15 | except ImportError:
16 |     from philologic.runtime import WebConfig
17 | try:
18 |     from custom_functions import WSGIHandler
19 | except ImportError:
20 |     from philologic.runtime import WSGIHandler
21 | try:
22 |     from custom_functions import generate_toc_object
23 | except ImportError:
24 |     from philologic.runtime import generate_toc_object
25 | 
26 | 
27 | def get_table_of_contents(environ, start_response):
28 |     status = "200 OK"
29 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
30 |     start_response(status, headers)
31 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
32 |     request = WSGIHandler(environ, config)
33 |     philo_id = request["philo_id"].split()
34 |     toc_object = generate_toc_object(request, config)
35 |     current_obj_position = 0
36 |     philo_id = " ".join(philo_id)
37 |     for pos, toc_element in enumerate(toc_object["toc"]):
38 |         if toc_element["philo_id"] == philo_id:
39 |             current_obj_position = pos
40 |             break
41 |     toc_object["current_obj_position"] = current_obj_position
42 |     yield orjson.dumps(toc_object)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     CGIHandler().run(get_table_of_contents)
47 | 


--------------------------------------------------------------------------------
/www/scripts/get_term_groups.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime.Query import split_terms
 9 | from philologic.runtime.QuerySyntax import group_terms, parse_query
10 | 
11 | import sys
12 | 
13 | sys.path.append("..")
14 | import custom_functions
15 | 
16 | try:
17 |     from custom_functions import WebConfig
18 | except ImportError:
19 |     from philologic.runtime import WebConfig
20 | try:
21 |     from custom_functions import WSGIHandler
22 | except ImportError:
23 |     from philologic.runtime import WSGIHandler
24 | 
25 | 
26 | def term_group(environ, start_response):
27 |     status = "200 OK"
28 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
29 |     start_response(status, headers)
30 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
31 |     db = DB(config.db_path + "/data/")
32 |     request = WSGIHandler(environ, config)
33 |     if not request["q"]:
34 |         dump = orjson.dumps({"original_query": "", "term_groups": []})
35 |     else:
36 |         hits = db.query(
37 |             request["q"], request["method"], request["arg"], sort_order=request["sort_order"], **request.metadata
38 |         )
39 |         parsed = parse_query(request.q)
40 |         group = group_terms(parsed)
41 |         all_groups = split_terms(group)
42 |         term_groups = []
43 |         for g in all_groups:
44 |             term_group = ""
45 |             not_started = False
46 |             for kind, term in g:
47 |                 if kind == "NOT":
48 |                     if not_started is False:
49 |                         not_started = True
50 |                         term_group += " NOT "
51 |                 elif kind == "OR":
52 |                     term_group += "|"
53 |                 elif kind == "TERM":
54 |                     term_group += " %s " % term
55 |                 elif kind == "QUOTE":
56 |                     term_group += " %s " % term
57 |             term_group = term_group.strip()
58 |             term_groups.append(term_group)
59 |         dump = orjson.dumps({"term_groups": term_groups, "original_query": request.original_q})
60 |     yield dump
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     CGIHandler().run(term_group)
65 | 


--------------------------------------------------------------------------------
/www/scripts/get_text_object.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | from philologic.runtime.HitWrapper import ObjectWrapper
 9 | from philologic.runtime import generate_text_object
10 | 
11 | import sys
12 | 
13 | sys.path.append("..")
14 | import custom_functions
15 | 
16 | try:
17 |     from custom_functions import WebConfig
18 | except ImportError:
19 |     from philologic.runtime import WebConfig
20 | try:
21 |     from custom_functions import WSGIHandler
22 | except ImportError:
23 |     from philologic.runtime import WSGIHandler
24 | 
25 | 
26 | def get_text_object(environ, start_response):
27 |     status = "200 OK"
28 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
29 |     start_response(status, headers)
30 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
31 |     db = DB(config.db_path + "/data/")
32 |     request = WSGIHandler(environ, config)
33 |     path = config.db_path
34 |     zeros = 7 - len(request.philo_id)
35 |     if zeros:
36 |         request.philo_id += zeros * " 0"
37 |     obj = ObjectWrapper(request["philo_id"].split(), db)
38 |     text_object = generate_text_object(request, config)
39 |     yield orjson.dumps(text_object)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     CGIHandler().run(get_text_object)
44 | 


--------------------------------------------------------------------------------
/www/scripts/get_total_results.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime.DB import DB
 8 | 
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | def get_total_results(environ, start_response):
25 |     status = "200 OK"
26 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
27 |     start_response(status, headers)
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
29 |     db = DB(config.db_path + "/data/")
30 |     request = WSGIHandler(environ, config)
31 |     if request.no_q:
32 |         if request.no_metadata:
33 |             hits = db.get_all(db.locals["default_object_level"], request["sort_order"])
34 |         else:
35 |             hits = db.query(sort_order=request["sort_order"], **request.metadata)
36 |     else:
37 |         hits = db.query(request["q"], request["method"], request["arg"], **request.metadata)
38 |     total_results = 0
39 |     hits.finish()
40 |     total_results = len(hits)
41 |     yield orjson.dumps(total_results)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     CGIHandler().run(get_total_results)
46 | 


--------------------------------------------------------------------------------
/www/scripts/get_web_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sqlite3
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | import sys
 8 | 
 9 | sys.path.append("..")
10 | import custom_functions
11 | 
12 | try:
13 |     from custom_functions import WebConfig
14 | except ImportError:
15 |     from philologic.runtime import WebConfig
16 | 
17 | from philologic.Config import MakeDBConfig
18 | from philologic.runtime.DB import DB
19 | 
20 | 
21 | def get_web_config(_, start_response):
22 |     """Retrieve Web Config data"""
23 |     status = "200 OK"
24 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
25 |     start_response(status, headers)
26 |     db_path = os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")
27 |     config = WebConfig(db_path)
28 |     if config.valid_config is False:
29 |         yield config.to_json()
30 |     else:
31 |         config.time_series_status = time_series_tester(config)
32 |         db_locals = MakeDBConfig(os.path.join(db_path, "data/db.locals.py"))
33 |         config.data["available_metadata"] = db_locals.metadata_fields
34 |         yield config.to_json()
35 | 
36 | 
37 | def time_series_tester(config):
38 |     """Test if we have at least two distinct values for time series"""
39 |     frequencies_file = os.path.join(config.db_path, f"data/frequencies/{config.time_series_year_field}_frequencies")
40 |     if os.path.exists(frequencies_file):
41 |         with open(frequencies_file) as input_file:
42 |             line_count = sum(1 for _ in input_file)
43 |         if line_count > 1:
44 |             return True
45 |     return False
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     CGIHandler().run(get_web_config)
50 | 


--------------------------------------------------------------------------------
/www/scripts/get_word_frequency.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import orjson
 4 | import os
 5 | from wsgiref.handlers import CGIHandler
 6 | 
 7 | from philologic.runtime import concordance_results
 8 | 
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | import custom_functions
13 | 
14 | try:
15 |     from custom_functions import WebConfig
16 | except ImportError:
17 |     from philologic.runtime import WebConfig
18 | try:
19 |     from custom_functions import WSGIHandler
20 | except ImportError:
21 |     from philologic.runtime import WSGIHandler
22 | 
23 | 
24 | def get_frequency(environ, start_response):
25 |     status = "200 OK"
26 |     headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
27 |     start_response(status, headers)
28 |     config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", ""))
29 |     request = WSGIHandler(environ, config)
30 |     word_frequency_object = generate_word_frequency(request, config)
31 |     yield orjson.dumps(word_frequency_object)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     CGIHandler().run(get_frequency)
36 | 


--------------------------------------------------------------------------------
/www/webApp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Bootstrap Web app"""
 3 | 
 4 | 
 5 | import os.path
 6 | 
 7 | from philologic.runtime import WebConfig
 8 | from philologic.runtime import WSGIHandler
 9 | from philologic.runtime import access_control
10 | 
11 | PATH = os.path.abspath(os.path.dirname(__file__))
12 | 
13 | 
14 | def start_web_app(environ, start_response):
15 |     """Return index.html to start web app"""
16 |     config = WebConfig(os.path.abspath(PATH))
17 |     headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")]
18 |     if not config.valid_config:  # This means we have an error in the webconfig file
19 |         html_page = build_misconfig_page(config.traceback, "webconfig.cfg")
20 |     else:
21 |         request = WSGIHandler(environ, config)
22 |         if config.access_control:
23 |             if not request.authenticated:
24 |                 token = access_control.check_access(environ, config)
25 |                 if token:
26 |                     h, ts = token
27 |                     headers.append(("Set-Cookie", "hash=%s" % h))
28 |                     headers.append(("Set-Cookie", "timestamp=%s" % ts))
29 |         with open(f"{config.db_path}/app/dist/index.html") as index_page:
30 |             html_page = index_page.read()
31 |     start_response("200 OK", headers)
32 |     return html_page
33 | 
34 | 
35 | def build_misconfig_page(traceback, config_file):
36 |     """Return bad config HTML page"""
37 |     with open("%s/app/misconfiguration.html" % PATH) as input:
38 |         html_page = input.read()
39 |     html_page = html_page.replace("$TRACEBACK", traceback)
40 |     html_page = html_page.replace("$config_FILE", config_file)
41 |     return html_page
42 | 


--------------------------------------------------------------------------------