├── .envrc ├── .github └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.org ├── build.clj ├── deps.edn ├── docs └── resources │ ├── plauna1.png │ ├── plauna2.png │ └── plauna3.png ├── package-lock.json ├── package.json ├── resources ├── db │ └── migration │ │ ├── B1__Initialize_db.sql │ │ └── V2__only_original_data_in_bodies.sql ├── public │ ├── android-chrome-192x192.png │ ├── css │ │ └── custom.css │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── plauna-banner.png │ └── site.webmanifest ├── templates │ ├── admin-categories.html │ ├── admin-languages.html │ ├── admin-preferences.html │ ├── admin.html │ ├── base.html │ ├── email.html │ ├── emails.html │ ├── statistics.html │ ├── training-review.html │ ├── watcher.html │ └── watchers.html └── test │ ├── email_corpus │ ├── greek-text.mbox │ ├── multipart-with-text-attachment.eml │ ├── simple-lorem-ipsum.eml │ ├── test-email-1.mbox │ └── weird-mbox.mbox │ ├── normalization │ ├── normalized-text-1.txt │ └── original-text-1.txt │ └── test.edn ├── shell.nix ├── src └── plauna │ ├── analysis.clj │ ├── client.clj │ ├── core │ ├── email.clj │ └── events.clj │ ├── database.clj │ ├── entry.clj │ ├── files.clj │ ├── markup.clj │ ├── messaging.clj │ ├── parser.clj │ ├── preferences.clj │ ├── server.clj │ ├── specs.clj │ └── util │ ├── async.clj │ ├── errors.clj │ ├── page.clj │ └── text_transform.clj └── test └── plauna ├── analysis_test.clj ├── client_test.clj ├── core ├── email_test.clj └── events_test.clj ├── database_test.clj ├── files_test.clj ├── functions_test.clj ├── parser_test.clj └── preferences_test.clj /.envrc: -------------------------------------------------------------------------------- 1 | use nix 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | tags: 7 | - '*' 8 | workflow_dispatch: 9 | 10 | jobs: 11 | docker: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - 15 | name: Set up QEMU 16 | uses: docker/setup-qemu-action@4574d27a4764455b42196d70a065bc6853246a25 # v3.4.0 17 | - 18 | name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0 20 | - 21 | name: Docker meta 22 | id: meta 23 | uses: docker/metadata-action@369eb591f429131d6889c46b94e711f089e6ca96 #v 5.6.1 24 | with: 25 | images: ozangulle/plauna 26 | tags: | 27 | # set dev tag for master branch 28 | type=raw,value=dev,enable=${{ github.ref == format('refs/heads/{0}', 'master') }} 29 | # do not make a tag for 'master' branch 30 | type=ref,event=branch,enable=${{ github.ref != format('refs/heads/{0}', 'master') }} 31 | type=ref,event=tag 32 | - 33 | name: Login to DockerHub 34 | uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 #v 3.3.0 35 | with: 36 | username: ${{ secrets.DOCKERHUB_USERNAME }} 37 | password: ${{ secrets.DOCKERHUB_TOKEN }} 38 | - 39 | name: Build and push 40 | uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 # v6.14.0 41 | with: 42 | push: true 43 | platforms: linux/amd64,linux/arm64 44 | file: Dockerfile 45 | tags: ${{ steps.meta.outputs.tags }} 46 | labels: ${{ steps.meta.outputs.labels }} 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | .idea/ 15 | .lsp/ 16 | .clh-kondo/ 17 | .clerk/ 18 | .clj-kondo/ 19 | .cpcache/ 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [2025-05-26] - 2025-05-26 6 | 7 | ### 🚀 Features 8 | 9 | - Use IMAP copy and delete when move is not available 10 | - Move e-mail when its category is changed by user 11 | - *(ui)* Make ui better and mobile friendly 12 | - *(ui)* Add the new plauna logo 13 | 14 | ### 🐛 Bug Fixes 15 | 16 | - Change text sanitization for cleaner training texts 17 | - Adjust text sanitization for cleaner training texts 18 | - Moving emails no longer triggers a search through the whole folder 19 | - *(imap client)* Wrong method call during reconnection 20 | - Faulty partial update of connection data on reconnect 21 | - Setting category to n/a now moves messages back to Inbox 22 | - *(ui)* Toast messages cannot be closed anymore 23 | 24 | ### ⚙️ Miscellaneous Tasks 25 | 26 | - Add flow-storm for better debugging experience 27 | - Update ring dependencies 28 | 29 | ## [2025-03-22.0] - 2025-03-22 30 | 31 | ### 🚀 Features 32 | 33 | - Health check interval for IMAP client watcher is configurable 34 | - Show sanitized text next to the original on the email details page 35 | - *(ui)* Add pie charts to statistics pages for better data overview 36 | - Add optional config parameters for the email client 37 | 38 | ### 🐛 Bug Fixes 39 | 40 | - Choose correct text content to train on when text attachments present 41 | - Evict preferences cache after updating a value 42 | - Throw an exception if no config file can be found during startup 43 | 44 | ### 📚 Documentation 45 | 46 | - Fix the link to the Docker image in README 47 | - Add 'features' and 'screenshots' subsections 48 | 49 | ### 🎨 Styling 50 | 51 | - Remove the delete buttons from admin ui 52 | 53 | ### ⚙️ Miscellaneous Tasks 54 | 55 | - Update JRE 23 Docker image 56 | 57 | ## [2025-02-23.0] - 2025-02-23 58 | 59 | ### 🚀 Features 60 | 61 | - *(ui)* Remove links to half-baked features 62 | 63 | ## [2025-02-21.0] - 2025-02-21 64 | 65 | ### 🚀 Features 66 | 67 | - *(ui)* Reorganize e-mail lists and data training 68 | - *(ui)* Clean up and visually improve /emails 69 | - *(ui)* Unify input styles on different pages 70 | - *(ui)* Email details page is styled in the new fashion 71 | - [**breaking**] Remove check for training binaries before starting imap client 72 | - Enrich e-mails parsed from an mbox 73 | - Rename the page "watchers" to "connections" 74 | - Create directories on imap servers upon category creation 75 | - Set log level from ui 76 | - *(ui)* Async operations and errors are shown to the user as toast messages 77 | 78 | ### 🐛 Bug Fixes 79 | 80 | - Message ids are now url encoded in the email list 81 | - Exception when moving emails if they could not be categorized 82 | - Save language metadata in detail view and preferences 83 | - Display category as n/a in email views if note set 84 | - N/a no longer listed as a language in admin 85 | - Event loops restart when they fail 86 | - Change wrong order of functions on main 87 | - Compare categorization threshold with the probability correctly 88 | - Restart all event loops after a failure or restart in messaging 89 | - Add new languages to language preferences with the value false 90 | - Confidence is set properly after categorization 91 | 92 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clojure:temurin-23-tools-deps-bookworm-slim as build 2 | RUN apt update && apt install -y nodejs npm 3 | COPY . /usr/src/app/ 4 | WORKDIR /usr/src/app 5 | RUN npm install 6 | RUN npm run build 7 | RUN clojure -T:build uber 8 | 9 | FROM eclipse-temurin:23.0.2_7-jre-ubi9-minimal 10 | COPY --from=build /usr/src/app/target/plauna-standalone.jar /app/ 11 | EXPOSE 8080 12 | WORKDIR /app 13 | CMD ["sh", "-c", "java -jar plauna-standalone.jar $PLAUNA_ARGS"] 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | EUROPEAN UNION PUBLIC LICENCE v. 1.2 2 | EUPL © the European Union 2007, 2016 3 | 4 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined 5 | below) which is provided under the terms of this Licence. Any use of the Work, 6 | other than as authorised under this Licence is prohibited (to the extent such 7 | use is covered by a right of the copyright holder of the Work). 8 | 9 | The Work is provided under the terms of this Licence when the Licensor (as 10 | defined below) has placed the following notice immediately following the 11 | copyright notice for the Work: 12 | 13 | Licensed under the EUPL 14 | 15 | or has expressed by any other means his willingness to license under the EUPL. 16 | 17 | 1. Definitions 18 | 19 | In this Licence, the following terms have the following meaning: 20 | 21 | - ‘The Licence’: this Licence. 22 | 23 | - ‘The Original Work’: the work or software distributed or communicated by the 24 | Licensor under this Licence, available as Source Code and also as Executable 25 | Code as the case may be. 26 | 27 | - ‘Derivative Works’: the works or software that could be created by the 28 | Licensee, based upon the Original Work or modifications thereof. This Licence 29 | does not define the extent of modification or dependence on the Original Work 30 | required in order to classify a work as a Derivative Work; this extent is 31 | determined by copyright law applicable in the country mentioned in Article 15. 32 | 33 | - ‘The Work’: the Original Work or its Derivative Works. 34 | 35 | - ‘The Source Code’: the human-readable form of the Work which is the most 36 | convenient for people to study and modify. 37 | 38 | - ‘The Executable Code’: any code which has generally been compiled and which is 39 | meant to be interpreted by a computer as a program. 40 | 41 | - ‘The Licensor’: the natural or legal person that distributes or communicates 42 | the Work under the Licence. 43 | 44 | - ‘Contributor(s)’: any natural or legal person who modifies the Work under the 45 | Licence, or otherwise contributes to the creation of a Derivative Work. 46 | 47 | - ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of 48 | the Work under the terms of the Licence. 49 | 50 | - ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, 51 | renting, distributing, communicating, transmitting, or otherwise making 52 | available, online or offline, copies of the Work or providing access to its 53 | essential functionalities at the disposal of any other natural or legal 54 | person. 55 | 56 | 2. Scope of the rights granted by the Licence 57 | 58 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, 59 | sublicensable licence to do the following, for the duration of copyright vested 60 | in the Original Work: 61 | 62 | - use the Work in any circumstance and for all usage, 63 | - reproduce the Work, 64 | - modify the Work, and make Derivative Works based upon the Work, 65 | - communicate to the public, including the right to make available or display 66 | the Work or copies thereof to the public and perform publicly, as the case may 67 | be, the Work, 68 | - distribute the Work or copies thereof, 69 | - lend and rent the Work or copies thereof, 70 | - sublicense rights in the Work or copies thereof. 71 | 72 | Those rights can be exercised on any media, supports and formats, whether now 73 | known or later invented, as far as the applicable law permits so. 74 | 75 | In the countries where moral rights apply, the Licensor waives his right to 76 | exercise his moral right to the extent allowed by law in order to make effective 77 | the licence of the economic rights here above listed. 78 | 79 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to 80 | any patents held by the Licensor, to the extent necessary to make use of the 81 | rights granted on the Work under this Licence. 82 | 83 | 3. Communication of the Source Code 84 | 85 | The Licensor may provide the Work either in its Source Code form, or as 86 | Executable Code. If the Work is provided as Executable Code, the Licensor 87 | provides in addition a machine-readable copy of the Source Code of the Work 88 | along with each copy of the Work that the Licensor distributes or indicates, in 89 | a notice following the copyright notice attached to the Work, a repository where 90 | the Source Code is easily and freely accessible for as long as the Licensor 91 | continues to distribute or communicate the Work. 92 | 93 | 4. Limitations on copyright 94 | 95 | Nothing in this Licence is intended to deprive the Licensee of the benefits from 96 | any exception or limitation to the exclusive rights of the rights owners in the 97 | Work, of the exhaustion of those rights or of other applicable limitations 98 | thereto. 99 | 100 | 5. Obligations of the Licensee 101 | 102 | The grant of the rights mentioned above is subject to some restrictions and 103 | obligations imposed on the Licensee. Those obligations are the following: 104 | 105 | Attribution right: The Licensee shall keep intact all copyright, patent or 106 | trademarks notices and all notices that refer to the Licence and to the 107 | disclaimer of warranties. The Licensee must include a copy of such notices and a 108 | copy of the Licence with every copy of the Work he/she distributes or 109 | communicates. The Licensee must cause any Derivative Work to carry prominent 110 | notices stating that the Work has been modified and the date of modification. 111 | 112 | Copyleft clause: If the Licensee distributes or communicates copies of the 113 | Original Works or Derivative Works, this Distribution or Communication will be 114 | done under the terms of this Licence or of a later version of this Licence 115 | unless the Original Work is expressly distributed only under this version of the 116 | Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee 117 | (becoming Licensor) cannot offer or impose any additional terms or conditions on 118 | the Work or Derivative Work that alter or restrict the terms of the Licence. 119 | 120 | Compatibility clause: If the Licensee Distributes or Communicates Derivative 121 | Works or copies thereof based upon both the Work and another work licensed under 122 | a Compatible Licence, this Distribution or Communication can be done under the 123 | terms of this Compatible Licence. For the sake of this clause, ‘Compatible 124 | Licence’ refers to the licences listed in the appendix attached to this Licence. 125 | Should the Licensee's obligations under the Compatible Licence conflict with 126 | his/her obligations under this Licence, the obligations of the Compatible 127 | Licence shall prevail. 128 | 129 | Provision of Source Code: When distributing or communicating copies of the Work, 130 | the Licensee will provide a machine-readable copy of the Source Code or indicate 131 | a repository where this Source will be easily and freely available for as long 132 | as the Licensee continues to distribute or communicate the Work. 133 | 134 | Legal Protection: This Licence does not grant permission to use the trade names, 135 | trademarks, service marks, or names of the Licensor, except as required for 136 | reasonable and customary use in describing the origin of the Work and 137 | reproducing the content of the copyright notice. 138 | 139 | 6. Chain of Authorship 140 | 141 | The original Licensor warrants that the copyright in the Original Work granted 142 | hereunder is owned by him/her or licensed to him/her and that he/she has the 143 | power and authority to grant the Licence. 144 | 145 | Each Contributor warrants that the copyright in the modifications he/she brings 146 | to the Work are owned by him/her or licensed to him/her and that he/she has the 147 | power and authority to grant the Licence. 148 | 149 | Each time You accept the Licence, the original Licensor and subsequent 150 | Contributors grant You a licence to their contributions to the Work, under the 151 | terms of this Licence. 152 | 153 | 7. Disclaimer of Warranty 154 | 155 | The Work is a work in progress, which is continuously improved by numerous 156 | Contributors. It is not a finished work and may therefore contain defects or 157 | ‘bugs’ inherent to this type of development. 158 | 159 | For the above reason, the Work is provided under the Licence on an ‘as is’ basis 160 | and without warranties of any kind concerning the Work, including without 161 | limitation merchantability, fitness for a particular purpose, absence of defects 162 | or errors, accuracy, non-infringement of intellectual property rights other than 163 | copyright as stated in Article 6 of this Licence. 164 | 165 | This disclaimer of warranty is an essential part of the Licence and a condition 166 | for the grant of any rights to the Work. 167 | 168 | 8. Disclaimer of Liability 169 | 170 | Except in the cases of wilful misconduct or damages directly caused to natural 171 | persons, the Licensor will in no event be liable for any direct or indirect, 172 | material or moral, damages of any kind, arising out of the Licence or of the use 173 | of the Work, including without limitation, damages for loss of goodwill, work 174 | stoppage, computer failure or malfunction, loss of data or any commercial 175 | damage, even if the Licensor has been advised of the possibility of such damage. 176 | However, the Licensor will be liable under statutory product liability laws as 177 | far such laws apply to the Work. 178 | 179 | 9. Additional agreements 180 | 181 | While distributing the Work, You may choose to conclude an additional agreement, 182 | defining obligations or services consistent with this Licence. However, if 183 | accepting obligations, You may act only on your own behalf and on your sole 184 | responsibility, not on behalf of the original Licensor or any other Contributor, 185 | and only if You agree to indemnify, defend, and hold each Contributor harmless 186 | for any liability incurred by, or claims asserted against such Contributor by 187 | the fact You have accepted any warranty or additional liability. 188 | 189 | 10. Acceptance of the Licence 190 | 191 | The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ 192 | placed under the bottom of a window displaying the text of this Licence or by 193 | affirming consent in any other similar way, in accordance with the rules of 194 | applicable law. Clicking on that icon indicates your clear and irrevocable 195 | acceptance of this Licence and all of its terms and conditions. 196 | 197 | Similarly, you irrevocably accept this Licence and all of its terms and 198 | conditions by exercising any rights granted to You by Article 2 of this Licence, 199 | such as the use of the Work, the creation by You of a Derivative Work or the 200 | Distribution or Communication by You of the Work or copies thereof. 201 | 202 | 11. Information to the public 203 | 204 | In case of any Distribution or Communication of the Work by means of electronic 205 | communication by You (for example, by offering to download the Work from a 206 | remote location) the distribution channel or media (for example, a website) must 207 | at least provide to the public the information requested by the applicable law 208 | regarding the Licensor, the Licence and the way it may be accessible, concluded, 209 | stored and reproduced by the Licensee. 210 | 211 | 12. Termination of the Licence 212 | 213 | The Licence and the rights granted hereunder will terminate automatically upon 214 | any breach by the Licensee of the terms of the Licence. 215 | 216 | Such a termination will not terminate the licences of any person who has 217 | received the Work from the Licensee under the Licence, provided such persons 218 | remain in full compliance with the Licence. 219 | 220 | 13. Miscellaneous 221 | 222 | Without prejudice of Article 9 above, the Licence represents the complete 223 | agreement between the Parties as to the Work. 224 | 225 | If any provision of the Licence is invalid or unenforceable under applicable 226 | law, this will not affect the validity or enforceability of the Licence as a 227 | whole. Such provision will be construed or reformed so as necessary to make it 228 | valid and enforceable. 229 | 230 | The European Commission may publish other linguistic versions or new versions of 231 | this Licence or updated versions of the Appendix, so far this is required and 232 | reasonable, without reducing the scope of the rights granted by the Licence. New 233 | versions of the Licence will be published with a unique version number. 234 | 235 | All linguistic versions of this Licence, approved by the European Commission, 236 | have identical value. Parties can take advantage of the linguistic version of 237 | their choice. 238 | 239 | 14. Jurisdiction 240 | 241 | Without prejudice to specific agreement between parties, 242 | 243 | - any litigation resulting from the interpretation of this License, arising 244 | between the European Union institutions, bodies, offices or agencies, as a 245 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court 246 | of Justice of the European Union, as laid down in article 272 of the Treaty on 247 | the Functioning of the European Union, 248 | 249 | - any litigation arising between other parties and resulting from the 250 | interpretation of this License, will be subject to the exclusive jurisdiction 251 | of the competent court where the Licensor resides or conducts its primary 252 | business. 253 | 254 | 15. Applicable Law 255 | 256 | Without prejudice to specific agreement between parties, 257 | 258 | - this Licence shall be governed by the law of the European Union Member State 259 | where the Licensor has his seat, resides or has his registered office, 260 | 261 | - this licence shall be governed by Belgian law if the Licensor has no seat, 262 | residence or registered office inside a European Union Member State. 263 | 264 | Appendix 265 | 266 | ‘Compatible Licences’ according to Article 5 EUPL are: 267 | 268 | - GNU General Public License (GPL) v. 2, v. 3 269 | - GNU Affero General Public License (AGPL) v. 3 270 | - Open Software License (OSL) v. 2.1, v. 3.0 271 | - Eclipse Public License (EPL) v. 1.0 272 | - CeCILL v. 2.0, v. 2.1 273 | - Mozilla Public Licence (MPL) v. 2 274 | - GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 275 | - Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for 276 | works other than software 277 | - European Union Public Licence (EUPL) v. 1.1, v. 1.2 278 | - Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong 279 | Reciprocity (LiLiQ-R+). 280 | 281 | The European Commission may update this Appendix to later versions of the above 282 | licences without producing a new version of the EUPL, as long as they provide 283 | the rights granted in Article 2 of this Licence and protect the covered Source 284 | Code from exclusive appropriation. 285 | 286 | All other changes or additions to this Appendix require the production of a new 287 | EUPL version. 288 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | #+OPTIONS: ^:nil 2 | 3 | * Plauna 4 | 5 | Plauna is a privacy focused service that helps you categorize your e-mails. All of its data is stored only on your computer. 6 | 7 | You can parse your e-mails from mbox files or directly by connecting to your IMAP servers. Plauna helps you categorize your e-mails and automates the process so that incoming e-mails are moved to their respective folders on your IMAP server. 8 | 9 | *IMPORTANT:* Plauna is still under heavy development at the moment. The features and the DB structure are subject to change. 10 | 11 | ** Features 12 | 13 | - You own 100% of your data. Everything is on your machine. 14 | 15 | - You define your categories: Use as many or few as you like 16 | 17 | - Multi-language support. 18 | 19 | - Statistics about your emails and metadata (very basic, still under development). 20 | 21 | - Upload data from mbox archives. 22 | 23 | - Read emails directly from IMAP folders. 24 | 25 | ** Screenshots 26 | 27 | #+CAPTION: List emails and edit metadata 28 | [[./docs/resources/plauna1.png]] 29 | List emails and edit metadata 30 | 31 | #+CAPTION: See details of an email and edit metadata 32 | [[./docs/resources/plauna3.png]] 33 | See details of an email and edit metadata 34 | 35 | #+CAPTION: Admin panel 36 | [[./docs/resources/plauna2.png]] 37 | Admin panel 38 | 39 | * Installation 40 | 41 | The easiest way to get Plauna is using the [[https://hub.docker.com/r/ozangulle/plauna][Docker image]]. 42 | 43 | The second way to get Plauna is fetching it from the git repository and compiling using Clojure CLI with the command: 44 | 45 | #+begin_src 46 | clj -T:build uber 47 | #+end_src 48 | 49 | This will produce a Plauna uberjar in the ./target directory which you can run using a Java Runtime Environment. 50 | 51 | * How to use 52 | 53 | ** How to run 54 | 55 | You must provide a configuration file to start Plauna. By default, the configuration is expected to be at ~/.config/plauna.edn. You can modify the location of the configuration file by passing the parameter --config-file. This method is useful when running Plauna from the command line, e.g., java -jar plauna.jar --config-file=/opt/plauna/plauna.edn. 56 | 57 | If you are using Docker, you can put the parameters in the environment variable PLAUNA_ARGS. A docker-compose example would look like this: 58 | 59 | #+begin_src docker-compose 60 | services: 61 | plauna: 62 | container_name: plauna 63 | image: ozangulle/plauna:dev 64 | environment: 65 | - 'PLAUNA_ARGS=--config-file=/opt/plauna/plauna.edn' 66 | restart: unless-stopped 67 | #+end_src 68 | 69 | ** Configuration 70 | 71 | The following is a configuration file with all of the possible settings. The default value for data-folder is "~/.local/state/plauna" and the default value for the server port is 8080. The e-mail configurations have no default settings. Plauna can be run without any e-mail credentials, even though the key point of Plauna is having it automatically categorize your e-mails. 72 | 73 | #+begin_src clojure 74 | {:data-folder "/home/myhome/example-directory" ; The location for the db, training files and models. Refers to the path in the container 75 | :server {:port 80} ; The port Plauna's web server listens to in the container. Defaults to 8080. 76 | :email { 77 | :clients [{:host "imap.example.com" :user "me@example.com" :secret "mysecret" :folder "Inbox"}] 78 | }} 79 | #+end_src 80 | 81 | Additionally, you can adjust the IMAP connection security and port by using the keys :security and :port. Possible values for :security are :ssl (default), :starttls, and :plain. The default values for these keys are :ssl and 993, respectively. The IMAP client configuration with the optional keys would look like this: 82 | 83 | #+begin_src clojure 84 | ;; Specify connection security only 85 | {:host "imap.example.com" :user "me@example.com" :secret "mysecret" :folder "Inbox" :security :starttls} 86 | 87 | ;; Port 143 is default for starttls so you only need to set port explicitely if you are using a non-standard port. 88 | {:host "imap.example.com" :user "me@example.com" :secret "mysecret" :folder "Inbox" :security :starttls :port 155} 89 | #+end_src 90 | 91 | Currently, Plauna only supports the AUTHENTICATION PLAIN method, i.e. authentication using username and password. Other authentication mechanisms like OAUTH are not supported yet. 92 | 93 | Other optional keys are: 94 | - :debug = Set to true to see the IMAP communication with the server in debug mode. Usage: ~{... :debug true}~ Defaults to false. 95 | 96 | - :check-ssl-certs = Set to false if you are using a self signed certificate on a server. Usage ~{... :check-ssl-certs false}~ Defaults to true. 97 | 98 | ** Getting Started 99 | 100 | When you start Plauna, it starts a web server on the port which you specified (defaults to 8080) and connects to the IMAP servers that you configured. You can use Plauna without configuring any IMAP servers but you would be missing out on the "move e-mail" functionality. On your very first start, the database is empty. You need to fill it with e-mails. There are three non-exclusive ways of doing this: 101 | 102 | 1. Go to "Admin". Under the header "Parse E-mails" select an mbox file and click on the button "Parse Mbox". You can see your e-mails under the "E-mails" tab shortly thereafter. 103 | 104 | 2. Go to "Watchers", click on any IMAP account name, select a folder you want to parse the e-mails in, make sure "Move e-mails after categorization" is unchecked and click on the button "Parse E-mails". This will read all the e-mails in the selected folder. 105 | 106 | 3. Just leave Plauna running and watch it save your e-mails. Beware: Depending on how many e-mails you receive on any given day, this method may be very slow. 107 | 108 | 109 | ** Language Detection 110 | 111 | Plauna automatically detects the language of an e-mail upon parsing it. Here, you must also select the languages that you want to use in training - therefore the languages you want to use in categorization. 112 | 113 | ** Categorization 114 | 115 | *** Create Categories 116 | 117 | Go to "Admin" -> "Manage Categories" in order to create and delete categories. If you set up connections to your IMAP servers in your settings file, anytime you create a category Plauna will try to create a folder on the servers. If you however delete a category, Plauna will not delete it on the server. As a general rule, Plauna reads and moves e-mails as well as it creates folders but it never deletes anything. 118 | 119 | *** Categorize E-mails 120 | 121 | Under the tab "E-mails" you can edit the language and category of every e-mail; either as a batch operation or by clicking on an e-mail and changing it at the details page. 122 | 123 | *** Data Training 124 | 125 | Under the tab "E-mails" there is an expandable section called "Data Training". Click on the button "Train with Existing Data" to train the model(s) using the existing categorized data. *Important:* You must have more than one category saved for each language you want to train in. 126 | 127 | You must have selected at least one language in "Admin" -> "Manage Languages" in order to start training on your e-mails. 128 | 129 | *** Automatic Categorization 130 | 131 | After training your models on the categories you created, Plauna will categorize each e-mail you receive and moved it automatically to its corresponding folder. 132 | -------------------------------------------------------------------------------- /build.clj: -------------------------------------------------------------------------------- 1 | (ns build 2 | (:require [clojure.tools.build.api :as b])) 3 | 4 | (def build-folder "target") 5 | (def class-dir (str build-folder "/classes")) 6 | 7 | (def basis (delay (b/create-basis {:project "deps.edn"}))) 8 | (def version "0.0.1") 9 | (def app-name "plauna") 10 | (def uber-file-name (format "%s/%s-standalone.jar" build-folder app-name)) ; path for result uber file 11 | 12 | (defn clean [_] 13 | (b/delete {:path "target"}) 14 | (println (format "Build folder \"%s\" removed" build-folder))) 15 | 16 | (defn uber [_] 17 | (clean nil) 18 | 19 | (b/copy-dir {:src-dirs ["src" "resources"] 20 | :ignores [".*mbox" ".*eml" ".*txt"] ; ignore test files 21 | :target-dir class-dir}) 22 | 23 | (b/compile-clj {:basis @basis 24 | :ns-compile '[plauna.entry] 25 | :java-opts ["-Dclojure.compiler.direct-linking=true" ] 26 | :class-dir class-dir}) 27 | 28 | (b/uber {:class-dir class-dir 29 | :uber-file uber-file-name 30 | :basis @basis 31 | :java-opts ["-Dclojure.compiler.direct-linking=true" ] 32 | :main 'plauna.entry}) ; here we specify the entry point for uberjar 33 | 34 | (println (format "Uber file created: \"%s\"" uber-file-name))) 35 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src" "classes" "resources"] 2 | :aliases {:build {:deps {io.github.clojure/tools.build {:git/tag "v0.10.3" :git/sha "15ead66"}} 3 | :ns-default build 4 | :jvm-opts ["-Dclojure.compiler.direct-linking=true" "-Xmx4000m"]} 5 | :cider-clj {:extra-deps {cider/cider-nrepl {:mvn/version "0.52.1"}} 6 | :classpath-overrides {org.clojure/clojure nil} 7 | :main-opts ["-m" "nrepl.cmdline" "--middleware" "[cider.nrepl/cider-middleware]"]} 8 | :1.12-storm 9 | {;; for disabling the official compiler 10 | :classpath-overrides {org.clojure/clojure nil} 11 | :extra-deps {com.github.flow-storm/clojure {:mvn/version "1.12.0-9"} 12 | com.github.flow-storm/flow-storm-dbg {:mvn/version "4.2.2"} 13 | cider/cider-nrepl {:mvn/version "0.52.1"}} 14 | :main-opts ["-m" "nrepl.cmdline" "--middleware" "[cider.nrepl/cider-middleware]"]} 15 | :test {:extra-paths ["test"] 16 | :extra-deps {io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "dfb30dd"}} 17 | :main-opts ["-m" "cognitect.test-runner"] 18 | :exec-fn cognitect.test-runner.api/test}} 19 | :deps {commons-codec/commons-codec {:mvn/version "1.17.1"} 20 | commons-io/commons-io {:mvn/version "2.18.0"} 21 | commons-net/commons-net {:mvn/version "3.11.1"} 22 | compojure/compojure {:mvn/version "1.7.1"} 23 | com.github.seancorfield/next.jdbc {:mvn/version "1.3.967"} 24 | com.github.seancorfield/honeysql {:mvn/version "2.6.1230"} 25 | com.taoensso/telemere-slf4j {:mvn/version "1.0.0-RC5"} 26 | com.taoensso/telemere {:mvn/version "1.0.0-RC5"} 27 | io.github.emidln/cld {:mvn/version "0.2.0"} 28 | jakarta.mail/jakarta.mail-api {:mvn/version "2.1.3"} 29 | org.apache.james/apache-mime4j {:mvn/version "0.8.11" :extension "pom"} 30 | org.apache.opennlp/opennlp-tools {:mvn/version "2.5.0"} 31 | org.clj-commons/claypoole {:mvn/version "1.2.2"} 32 | org.clojure/clojure {:mvn/version "1.12.0"} 33 | org.clojure/core.async {:mvn/version "1.6.681"} 34 | org.clojure/core.cache {:mvn/version "1.1.234"} 35 | org.clojure/data.json {:mvn/version "2.5.0"} 36 | org.clojure/java.data {:mvn/version "1.2.107"} 37 | org.eclipse.angus/angus-mail {:mvn/version "2.0.3"} 38 | org.flywaydb/flyway-core {:mvn/version "11.0.0"} 39 | org.jsoup/jsoup {:mvn/version "1.18.2"} 40 | org.xerial/sqlite-jdbc {:mvn/version "3.47.1.0"} 41 | ring/ring-core {:mvn/version "1.14.1"} 42 | ring/ring-jetty-adapter {:mvn/version "1.14.1"} 43 | selmer/selmer {:mvn/version "1.12.61"}}} 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/resources/plauna1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/docs/resources/plauna1.png -------------------------------------------------------------------------------- /docs/resources/plauna2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/docs/resources/plauna2.png -------------------------------------------------------------------------------- /docs/resources/plauna3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/docs/resources/plauna3.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "build": "npx @tailwindcss/cli -i resources/public/css/custom.css -c resources/public/js/tailwind.config.js -o resources/public/css/tailwind.css", 4 | "dev": "npx @tailwindcss/cli -i resources/public/css/custom.css -c resources/public/js/tailwind.config.js -o resources/public/css/tailwind.css -w" 5 | }, 6 | "dependencies": { 7 | }, 8 | "devDependencies": { 9 | "daisyui": "^5.0.23", 10 | "tailwindcss": "^4.1.4", 11 | "@tailwindcss/cli": "^4.1.4" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /resources/db/migration/B1__Initialize_db.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE headers( 2 | id INTEGER PRIMARY KEY autoincrement, 3 | message_id TEXT UNIQUE NOT NULL, 4 | in_reply_to TEXT, 5 | mime_type TEXT, 6 | subject TEXT, 7 | date DATE); 8 | 9 | CREATE INDEX idx_headers_message_id ON headers(message_id); 10 | 11 | CREATE TABLE bodies( 12 | id INTEGER PRIMARY KEY autoincrement, 13 | original_content TEXT, 14 | sanitized_content TEXT, 15 | mime_type TEXT NOT NULL, 16 | charset TEXT, 17 | transfer_encoding TEXT, 18 | name TEXT, 19 | message_id TEXT NOT NULL, 20 | UNIQUE(mime_type, message_id)); 21 | 22 | CREATE INDEX idx_bodies_message_id ON bodies(message_id); 23 | 24 | CREATE TABLE categories( 25 | id INTEGER PRIMARY KEY AUTOINCREMENT, 26 | name TEXT UNIQUE); 27 | 28 | CREATE TABLE category_training_preferences( 29 | id INTEGER PRIMARY KEY AUTOINCREMENT, 30 | language TEXT NOT NULL UNIQUE, 31 | use_in_training BOOLEAN); 32 | 33 | CREATE TABLE communications( 34 | id INTEGER PRIMARY KEY autoincrement, 35 | message_id TEXT REFERENCES headers(message_id), 36 | contact_key TEXT REFERENCES contacts(contact_key), 37 | type TEXT, 38 | UNIQUE(message_id, contact_key, type)); 39 | 40 | CREATE TABLE contacts( 41 | contact_key TEXT PRIMARY KEY, 42 | name TEXT, 43 | address TEXT); 44 | 45 | CREATE TABLE metadata( 46 | id INTEGER PRIMARY KEY AUTOINCREMENT, 47 | message_id UNIQUE REFERENCES headers(message_id), 48 | language TEXT, 49 | language_modified DATE, 50 | language_confidence REAL, 51 | category REFERENCES categories(id), 52 | category_modified DATE, 53 | category_confidence REAL); 54 | 55 | CREATE INDEX idx_metadata_message_id ON metadata(message_id); 56 | 57 | 58 | CREATE TABLE preferences( 59 | preference TEXT UNIQUE, 60 | value TEXT); 61 | -------------------------------------------------------------------------------- /resources/db/migration/V2__only_original_data_in_bodies.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE bodies 2 | DROP COLUMN sanitized_content; 3 | 4 | ALTER TABLE bodies 5 | RENAME COLUMN original_content TO content; 6 | 7 | ALTER TABLE bodies 8 | RENAME COLUMN name TO filename; 9 | 10 | ALTER TABLE bodies 11 | ADD content_disposition TEXT; 12 | 13 | 14 | CREATE TABLE new_bodies( 15 | id INTEGER PRIMARY KEY autoincrement, 16 | content TEXT, 17 | mime_type TEXT NOT NULL, 18 | charset TEXT, 19 | transfer_encoding TEXT, 20 | filename TEXT, 21 | content_disposition TEXT, 22 | message_id TEXT NOT NULL, 23 | UNIQUE(mime_type, message_id, content)); 24 | 25 | INSERT INTO new_bodies SELECT id, content, mime_type, charset, transfer_encoding, filename, content_disposition, message_id FROM bodies; 26 | 27 | DROP TABLE bodies; 28 | 29 | ALTER TABLE new_bodies RENAME TO bodies; 30 | 31 | CREATE INDEX idx_bodies_message_id ON bodies(message_id); 32 | -------------------------------------------------------------------------------- /resources/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/resources/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /resources/public/css/custom.css: -------------------------------------------------------------------------------- 1 | /*@config "../js/tailwind.config.js";*/ 2 | 3 | @import "tailwindcss"; 4 | @plugin "daisyui" { 5 | themes: autumn --default; 6 | } 7 | -------------------------------------------------------------------------------- /resources/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/resources/public/favicon-16x16.png -------------------------------------------------------------------------------- /resources/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/resources/public/favicon-32x32.png -------------------------------------------------------------------------------- /resources/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/resources/public/favicon.ico -------------------------------------------------------------------------------- /resources/public/plauna-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozangulle/plauna/f8542754b50096bafd8b4c56664db8f03e701cbb/resources/public/plauna-banner.png -------------------------------------------------------------------------------- /resources/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} -------------------------------------------------------------------------------- /resources/templates/admin-categories.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 | 7 | 8 | 9 | {% for category in categories %} 10 | 11 | 12 | 32 | 33 | {% endfor %} 34 | 35 |
Category NameDelete
{{category.name}} 13 | 14 | 15 | 31 |
36 |
37 |

New Category

38 |
39 | 40 |
41 | 42 | {% endblock %} 43 | -------------------------------------------------------------------------------- /resources/templates/admin-languages.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 | {% for preference in language-preferences %} 7 | 8 | 9 | {% endfor %} 10 | 11 | 12 | 13 | 14 | 15 | 16 | {% for preference in language-preferences %} 17 | 18 | {% endfor %} 19 |
LanguageUse in Training?
{{preference.language}}
20 | 21 | 22 |
23 | 24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /resources/templates/admin-preferences.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 |
7 | 8 |
9 | 10 | 11 |
12 |
13 | 14 | 15 |
16 |
17 | 18 | 23 |
24 |
25 | 26 | 27 |
28 |
29 | 30 |
31 | {% endblock %} 32 | -------------------------------------------------------------------------------- /resources/templates/admin.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 |
7 |
8 |

Management

9 | 14 |
15 |
16 | 17 |
18 |
19 |

Parse E-mails from Mbox

20 |
21 | 22 | 23 | 24 |
25 |
26 |
27 |
28 | 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /resources/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Plauna 13 | 14 | 15 | 16 | {% if messages %} 17 |
18 | {% for message in messages %} 19 | 34 | {% endfor %} 35 |
36 | {% endif %} 37 | 53 | 66 |
67 | {% block content %} 68 | {% endblock %} 69 |
70 | 71 | 72 | -------------------------------------------------------------------------------- /resources/templates/email.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |

{{email.header.subject}}

6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 27 | 28 | 34 | 35 | 36 |
Message ID: {{email.header.message-id}}
Date: {{email.header.date|date:"dd.MM.yyyy HH:mm:ss"}}
Senders: {{email.participants|concat-senders}}
Recipients: {{email.participants|concat-receivers}}
Cc: {{email.participants|concat-cc}}
Bcc: {{email.participants|concat-bcc}}

Metadata

Language:

Language Confidence:

{{email.metadata.language-confidence}}
Category:
Category Confidence: {{email.metadata.category-confidence}}
29 | 30 |
37 | 38 |

Contents

39 | 40 | {% for body-part in email.body %} 41 | 42 | 43 | 44 | 45 | {% if body-part.filename %} {% endif %} 46 | {% if body-part.content-disposition %} {% endif %} 47 |
Mime Type: {{body-part.mime-type}}
Charset: {{body-part.charset}}
Transfer Encoding: {{body-part.transfer-encoding}}
File Name: {{body-part.filename}}
Content Disposition: {{body-part.content-disposition}}
48 | {% if body-part.mime-type = "text/plain" %} 49 |
50 | 51 |
52 | {{body-part.sanitized-content}} 53 |
54 | 55 |
56 | {{body-part.content}} 57 |
58 |
59 | {% elif body-part.mime-type = "text/html" %} 60 |
61 | 62 |
63 | {{body-part.sanitized-content}} 64 |
65 | 66 |
67 | {{body-part.content}} 68 |
69 |
70 | {% else %} 71 | Non-text content 72 | {% endif %} 73 | {% endfor %} 74 | 75 | {% endblock %} 76 | -------------------------------------------------------------------------------- /resources/templates/emails.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if emails|not-empty %} 6 | 7 |
8 | 9 |
Filter Options
10 |
11 |
12 | 23 |
24 |
25 |
26 | 27 |
Data Training
28 |
29 |
30 |
31 | 32 | 33 |
34 |
35 |
36 |
37 |
38 | 39 | 40 |
41 | 42 |
43 | 64 |
65 | 66 | 67 |
68 | 83 |
84 |
85 | 86 |
87 | 88 |
89 | {% for email in emails %} 90 | 91 | 92 | 93 | {% endfor %} 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | {% for email in emails %} 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 122 | 123 | 124 | {% endfor %} 125 | 126 |
DateSubject
{{email.header.date|date:"dd.MM.yyyy"}}{{email.header.subject}}
127 | 128 |
129 | 130 | {% endif %} 131 | 132 | {% if emails|empty? %} 133 | 134 | 135 | 141 | 142 | {% endif %} 143 | 144 | {% endblock %} 145 | -------------------------------------------------------------------------------- /resources/templates/statistics.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 |
Overall
7 |
Types
8 |
Languages
9 |
Categories
10 |
11 | 12 | {% if interval-filter %} 13 |

Filter

14 |
15 |
16 |
17 | 21 |
22 |
23 | 28 |
29 | 30 |
31 |
32 | 33 | {% endif %} 34 | 35 | 36 |
37 | {% for stats in statistics %} 38 | 39 |
40 |

{{stats.header}}

41 | 42 | {% if stats.type = :bar-chart %} 43 | 44 | {% if no-data %} 45 | 46 | 52 | 53 | {% else %} 54 |
55 | 56 | {% endif %} 57 | {% elif stats.type = :table %} 58 | {% if stats.data.values|empty? %} 59 | 60 | 66 | 67 | {% else %} 68 |
69 | 70 | 71 | {% for header in stats.data.headers %}{% endfor %} 72 | 73 | 74 | {% for values in stats.data.values %} 75 | 76 | {% for val in values %} 77 | 78 | {% endfor %} 79 | 80 | {% endfor %} 81 | 82 |
{{header}}
{{val}}
83 |
84 | {% endif %} 85 | {% endif %} 86 |
87 | {% endfor %} 88 |
89 | 90 | 91 | {% endblock %} 92 | -------------------------------------------------------------------------------- /resources/templates/training-review.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |
6 | 7 |

Category Training

8 |
9 |
10 | 11 |
12 |
13 | 14 | 15 |
16 | Categorize Fresh Data 17 |
18 | 19 |
20 | 25 | 26 |
27 | 28 | 29 |
30 |

Total e-mail count: {{page.total}}

31 |
32 | {% if page.page > 1 %} 33 | Prev 34 | {% else %} 35 | Prev 36 | {% endif %} 37 | {{page.page}} 38 | {% if page.page <= page.last-page %} 39 | Next 40 | {% else %} 41 | Next 42 | {% endif %} 43 |
44 |
45 | 46 | {% for email in emails %} 47 | 48 | 49 | 50 | {% endfor %} 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | {% for email in emails %} 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 72 | 73 | 74 | 75 | {% endfor %} 76 | 78 | 79 |
DateSenderSubjectLanguageLanguage ConfidenceCategoryCategory Confidence
{{email.header.date|date:"dd.MM.yyyy"}}{{email.participants|concat-senders}}{{email.header.subject}}{{email.metadata.language-confidence}}{{email.metadata.category-confidence}}
77 |
80 | 81 | {% endblock %} 82 | -------------------------------------------------------------------------------- /resources/templates/watcher.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 |

IMAP Connection for {{host}} - {{user}}

6 |
7 |
8 |
9 | 10 |
11 | 16 |
17 |
18 |
19 | 20 | 21 |
22 |
23 | 24 |
25 |
26 |
27 | 28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /resources/templates/watchers.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if watchers|empty? %} 6 | 7 | 13 | 14 | {% endif %} 15 | 16 | {% if watchers|not-empty %} 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | {% for watcher in watchers %} 29 | 30 | 33 | 34 | 35 | 38 | 39 | {% endfor %} 40 | 41 |
AccountLogged-inFolder OpenReconnect
31 | {{watcher.string}} 32 | {{watcher.logged-in | iconize}}{{watcher.folder-open | iconize}} 36 | 37 |
42 | 43 | {% endif %} 44 | 45 | {% endblock %} 46 | -------------------------------------------------------------------------------- /resources/test/email_corpus/greek-text.mbox: -------------------------------------------------------------------------------- 1 | From 1212321321315@xxx Tue Feb 20 10:10:10 +0000 2020 2 | Message-ID: 3 | Date: Tue, 20 Feb 2020 10:10:10 +0000 4 | From: "Test Guy" 5 | To: another-test-person@test.com 6 | Subject: =?ISO-8859-7?B?0OHx3OTv8+c=?= 7 | MIME-Version: 1.0 8 | Content-Type: text/plain 9 | Delivered-To: test@test.com 10 | 11 | This is irrelevant -------------------------------------------------------------------------------- /resources/test/email_corpus/multipart-with-text-attachment.eml: -------------------------------------------------------------------------------- 1 | Date: Fri, 31 Jan 2025 08:00:00 +0500 2 | To: recipient@test.com 3 | From: sender@test.com 4 | Subject: Multipart With Text Attachment 5 | Message-ID: <12341234123412341234@test.com> 6 | MIME-Version: 1.0 7 | Content-Type: multipart/mixed; 8 | boundary="b1_123412341234123412341234" 9 | Content-Transfer-Encoding: 8bit 10 | 11 | --b1_123412341234123412341234 12 | Content-Type: text/plain; charset=iso-8859-1 13 | Content-Transfer-Encoding: 8bit 14 | 15 | Hello, 16 | 17 | this is an email body in text/plain. 18 | 19 | --b1_123412341234123412341234 20 | Content-Type: text/plain; charset=iso-8859-1; name="test-attach.txt" 21 | Content-Transfer-Encoding: base64 22 | Content-Disposition: attachment; filename=test-attach.txt 23 | 24 | VGhpcyBpcyBhIHRleHQgZmlsZSBpbnRlbmRlZCB0byBiZSBzZW50IGFzIGFuIGF0dGFjaG1lbnQu 25 | 26 | --b1_123412341234123412341234-- 27 | 28 | -------------------------------------------------------------------------------- /resources/test/email_corpus/simple-lorem-ipsum.eml: -------------------------------------------------------------------------------- 1 | Date: Thu, 10 Jan 2022 12:00:00 +0000 2 | From: tester@example.com 3 | To: test@example.com 4 | Subject: Lorem Ipsum Sample 5 | MIME-Version: 1.0 6 | Message-ID: 7 | Content-Type: text/plain; charset="UTF-8" 8 | Content-Transfer-Encoding: 7bit 9 | 10 | Dear Test, 11 | 12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ac justo vel odio efficitur consectetur. Integer nec urna vitae elit imperdiet ultrices. Fusce vel neque vel justo dapibus luctus a eget quam. 13 | 14 | Sincerely, 15 | Tester 16 | -------------------------------------------------------------------------------- /resources/test/email_corpus/test-email-1.mbox: -------------------------------------------------------------------------------- 1 | From 12879452123124@xxx Wed Dec 10 10:10:10 +0000 2000 2 | Date: Wed, 10 Dec 2000 10:10:10 -0800 (PST) 3 | From: Test Guy 4 | Subject: Re: Testing 5 | To: Test Person 6 | Cc: cctest1 , cctest2 , cctest3 , cctest4 , cctest5 7 | MIME-Version: 1.0 8 | Content-Type: multipart/alternative; boundary="0-1813390255-1228280316=:99832" 9 | Message-ID: <485551.99832.qm@web55106.mail.test.com> 10 | 11 | --0-1813390255-1228280316=:99832 12 | Content-Type: text/plain; charset=utf-8 13 | Content-Transfer-Encoding: quoted-printable 14 | 15 | This is a test message. 16 | 17 | --0-1813390255-1228280316=:99832 18 | Content-Type: text/html; charset=utf-8 19 | Content-Transfer-Encoding: quoted-printable 20 | 21 | Test, but html. 22 | 23 | --0-1813390255-1228280316=:99832-- 24 | 25 | -------------------------------------------------------------------------------- /resources/test/email_corpus/weird-mbox.mbox: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | From nobody Mon Sep 17 00:00:00 2001 5 | From: A (zzz) 6 | U 7 | Thor 8 | (Comment) 9 | Date: Fri, 9 Jun 2006 00:44:16 -0700 10 | Subject: [PATCH] a commit. 11 | 12 | Here is a patch from A U Thor. 13 | 14 | --- 15 | foo | 2 +- 16 | 1 files changed, 1 insertions(+), 1 deletions(-) 17 | 18 | diff --git a/foo b/foo 19 | index 9123cdc..918dcf8 100644 20 | --- a/foo 21 | +++ b/foo 22 | @@ -1 +1 @@ 23 | -Fri Jun 9 00:44:04 PDT 2006 24 | +Fri Jun 9 00:44:13 PDT 2006 25 | -- 26 | 1.4.0.g6f2b 27 | 28 | From nobody Mon Sep 17 00:00:00 2001 29 | From: A U Thor 30 | Date: Fri, 9 Jun 2006 00:44:16 -0700 31 | Subject: [PATCH] another patch 32 | 33 | Here is a patch from A U Thor. This addresses the issue raised in the 34 | message: 35 | 36 | From: Nit Picker 37 | Subject: foo is too old 38 | Message-Id: 39 | 40 | Hopefully this would fix the problem stated there. 41 | 42 | 43 | I have included an extra blank line above, but it does not have to be 44 | stripped away here, along with the 45 | whitespaces at the end of the above line. They are expected to be squashed 46 | when the message is made into a commit log by stripspace, 47 | Also, there are three blank lines after this paragraph, 48 | two truly blank and another full of spaces in between. 49 | 50 | 51 | 52 | Hope this helps. 53 | 54 | --- 55 | foo | 2 +- 56 | 1 files changed, 1 insertions(+), 1 deletions(-) 57 | 58 | diff --git a/foo b/foo 59 | index 9123cdc..918dcf8 100644 60 | --- a/foo 61 | +++ b/foo 62 | @@ -1 +1 @@ 63 | -Fri Jun 9 00:44:04 PDT 2006 64 | +Fri Jun 9 00:44:13 PDT 2006 65 | -- 66 | 1.4.0.g6f2b 67 | 68 | From nobody Mon Sep 17 00:00:00 2001 69 | From: Junio C Hamano 70 | Date: Fri, 9 Jun 2006 00:44:16 -0700 71 | Subject: re: [PATCH] another patch 72 | 73 | From: A U Thor 74 | Subject: [PATCH] third patch 75 | 76 | Here is a patch from A U Thor. This addresses the issue raised in the 77 | message: 78 | 79 | From: Nit Picker 80 | Subject: foo is too old 81 | Message-Id: 82 | 83 | Hopefully this would fix the problem stated there. 84 | 85 | --- 86 | foo | 2 +- 87 | 1 files changed, 1 insertions(+), 1 deletions(-) 88 | 89 | diff --git a/foo b/foo 90 | index 9123cdc..918dcf8 100644 91 | --- a/foo 92 | +++ b/foo 93 | @@ -1 +1 @@ 94 | -Fri Jun 9 00:44:04 PDT 2006 95 | +Fri Jun 9 00:44:13 PDT 2006 96 | -- 97 | 1.4.0.g6f2b 98 | 99 | From nobody Sat Aug 27 23:07:49 2005 100 | Path: news.gmane.org!not-for-mail 101 | Message-ID: <20050721.091036.01119516.yoshfuji@linux-ipv6.org> 102 | From: YOSHIFUJI Hideaki / =?ISO-2022-JP?B?GyRCNUhGIzFRTEAbKEI=?= 103 | 104 | Newsgroups: gmane.comp.version-control.git 105 | Subject: [PATCH 1/2] GIT: Try all addresses for given remote name 106 | Date: Thu, 21 Jul 2005 09:10:36 -0400 (EDT) 107 | Lines: 99 108 | Organization: USAGI/WIDE Project 109 | Approved: news@gmane.org 110 | NNTP-Posting-Host: main.gmane.org 111 | Mime-Version: 1.0 112 | Content-Type: Text/Plain; charset=us-ascii 113 | Content-Transfer-Encoding: 7bit 114 | X-Trace: sea.gmane.org 1121951434 29350 80.91.229.2 (21 Jul 2005 13:10:34 GMT) 115 | X-Complaints-To: usenet@sea.gmane.org 116 | NNTP-Posting-Date: Thu, 21 Jul 2005 13:10:34 +0000 (UTC) 117 | 118 | Hello. 119 | 120 | Try all addresses for given remote name until it succeeds. 121 | Also supports IPv6. 122 | 123 | Signed-of-by: Hideaki YOSHIFUJI 124 | 125 | diff --git a/connect.c b/connect.c 126 | --- a/connect.c 127 | +++ b/connect.c 128 | @@ -96,42 +96,57 @@ static enum protocol get_protocol(const 129 | die("I don't handle protocol '%s'", name); 130 | } 131 | 132 | -static void lookup_host(const char *host, struct sockaddr *in) 133 | -{ 134 | - struct addrinfo *res; 135 | - int ret; 136 | - 137 | - ret = getaddrinfo(host, NULL, NULL, &res); 138 | - if (ret) 139 | - die("Unable to look up %s (%s)", host, gai_strerror(ret)); 140 | - *in = *res->ai_addr; 141 | - freeaddrinfo(res); 142 | -} 143 | +#define STR_(s) # s 144 | +#define STR(s) STR_(s) 145 | 146 | static int git_tcp_connect(int fd[2], const char *prog, char *host, char *path) 147 | { 148 | - struct sockaddr addr; 149 | - int port = DEFAULT_GIT_PORT, sockfd; 150 | - char *colon; 151 | - 152 | - colon = strchr(host, ':'); 153 | - if (colon) { 154 | - char *end; 155 | - unsigned long n = strtoul(colon+1, &end, 0); 156 | - if (colon[1] && !*end) { 157 | - *colon = 0; 158 | - port = n; 159 | + int sockfd = -1; 160 | + char *colon, *end; 161 | + char *port = STR(DEFAULT_GIT_PORT); 162 | + struct addrinfo hints, *ai0, *ai; 163 | + int gai; 164 | + 165 | + if (host[0] == '[') { 166 | + end = strchr(host + 1, ']'); 167 | + if (end) { 168 | + *end = 0; 169 | + end++; 170 | + host++; 171 | + } else 172 | + end = host; 173 | + } else 174 | + end = host; 175 | + colon = strchr(end, ':'); 176 | + 177 | + if (colon) 178 | + port = colon + 1; 179 | + 180 | + memset(&hints, 0, sizeof(hints)); 181 | + hints.ai_socktype = SOCK_STREAM; 182 | + hints.ai_protocol = IPPROTO_TCP; 183 | + 184 | + gai = getaddrinfo(host, port, &hints, &ai); 185 | + if (gai) 186 | + die("Unable to look up %s (%s)", host, gai_strerror(gai)); 187 | + 188 | + for (ai0 = ai; ai; ai = ai->ai_next) { 189 | + sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 190 | + if (sockfd < 0) 191 | + continue; 192 | + if (connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) { 193 | + close(sockfd); 194 | + sockfd = -1; 195 | + continue; 196 | } 197 | + break; 198 | } 199 | 200 | - lookup_host(host, &addr); 201 | - ((struct sockaddr_in *)&addr)->sin_port = htons(port); 202 | + freeaddrinfo(ai0); 203 | 204 | - sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP); 205 | if (sockfd < 0) 206 | die("unable to create socket (%s)", strerror(errno)); 207 | - if (connect(sockfd, (void *)&addr, sizeof(addr)) < 0) 208 | - die("unable to connect (%s)", strerror(errno)); 209 | + 210 | fd[0] = sockfd; 211 | fd[1] = sockfd; 212 | packet_write(sockfd, "%s %s\n", prog, path); 213 | 214 | -- 215 | YOSHIFUJI Hideaki @ USAGI Project 216 | GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA 217 | 218 | From nobody Sat Aug 27 23:07:49 2005 219 | Path: news.gmane.org!not-for-mail 220 | Message-ID: 221 | From: =?ISO8859-1?Q?David_K=E5gedal?= 222 | Newsgroups: gmane.comp.version-control.git 223 | Subject: [PATCH] Fixed two bugs in git-cvsimport-script. 224 | Date: Mon, 15 Aug 2005 20:18:25 +0200 225 | Lines: 83 226 | Approved: news@gmane.org 227 | NNTP-Posting-Host: main.gmane.org 228 | Mime-Version: 1.0 229 | Content-Type: text/plain; charset=ISO8859-1 230 | Content-Transfer-Encoding: QUOTED-PRINTABLE 231 | X-Trace: sea.gmane.org 1124130247 31839 80.91.229.2 (15 Aug 2005 18:24:07 GMT) 232 | X-Complaints-To: usenet@sea.gmane.org 233 | NNTP-Posting-Date: Mon, 15 Aug 2005 18:24:07 +0000 (UTC) 234 | Cc: "Junio C. Hamano" 235 | Original-X-From: git-owner@vger.kernel.org Mon Aug 15 20:24:05 2005 236 | 237 | The git-cvsimport-script had a copule of small bugs that prevented me 238 | from importing a big CVS repository. 239 | 240 | The first was that it didn't handle removed files with a multi-digit 241 | primary revision number. 242 | 243 | The second was that it was asking the CVS server for "F" messages, 244 | although they were not handled. 245 | 246 | I also updated the documentation for that script to correspond to 247 | actual flags. 248 | 249 | Signed-off-by: David K=E5gedal 250 | --- 251 | 252 | Documentation/git-cvsimport-script.txt | 9 ++++++++- 253 | git-cvsimport-script | 4 ++-- 254 | 2 files changed, 10 insertions(+), 3 deletions(-) 255 | 256 | 50452f9c0c2df1f04d83a26266ba704b13861632 257 | diff --git a/Documentation/git-cvsimport-script.txt b/Documentation/git= 258 | -cvsimport-script.txt 259 | --- a/Documentation/git-cvsimport-script.txt 260 | +++ b/Documentation/git-cvsimport-script.txt 261 | @@ -29,6 +29,10 @@ OPTIONS 262 | currently, only the :local:, :ext: and :pserver: access methods=20 263 | are supported. 264 | =20 265 | +-C :: 266 | + The GIT repository to import to. If the directory doesn't 267 | + exist, it will be created. Default is the current directory. 268 | + 269 | -i:: 270 | Import-only: don't perform a checkout after importing. This option 271 | ensures the working directory and cache remain untouched and will 272 | @@ -44,7 +48,7 @@ OPTIONS 273 | =20 274 | -p :: 275 | Additional options for cvsps. 276 | - The options '-x' and '-A' are implicit and should not be used here. 277 | + The options '-u' and '-A' are implicit and should not be used here. 278 | =20 279 | If you need to pass multiple options, separate them with a comma. 280 | =20 281 | @@ -57,6 +61,9 @@ OPTIONS 282 | -h:: 283 | Print a short usage message and exit. 284 | =20 285 | +-z :: 286 | + Pass the timestamp fuzz factor to cvsps. 287 | + 288 | OUTPUT 289 | ------ 290 | If '-v' is specified, the script reports what it is doing. 291 | diff --git a/git-cvsimport-script b/git-cvsimport-script 292 | --- a/git-cvsimport-script 293 | +++ b/git-cvsimport-script 294 | @@ -190,7 +190,7 @@ sub conn { 295 | $self->{'socketo'}->write("Root $repo\n"); 296 | =20 297 | # Trial and error says that this probably is the minimum set 298 | - $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mo= 299 | de M Mbinary E F Checked-in Created Updated Merged Removed\n"); 300 | + $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mo= 301 | de M Mbinary E Checked-in Created Updated Merged Removed\n"); 302 | =20 303 | $self->{'socketo'}->write("valid-requests\n"); 304 | $self->{'socketo'}->flush(); 305 | @@ -691,7 +691,7 @@ while() { 306 | unlink($tmpname); 307 | my $mode =3D pmode($cvs->{'mode'}); 308 | push(@new,[$mode, $sha, $fn]); # may be resurrected! 309 | - } elsif($state =3D=3D 9 and /^\s+(\S+):\d(?:\.\d+)+->(\d(?:\.\d+)+)\(= 310 | DEAD\)\s*$/) { 311 | + } elsif($state =3D=3D 9 and /^\s+(\S+):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)= 312 | \(DEAD\)\s*$/) { 313 | my $fn =3D $1; 314 | $fn =3D~ s#^/+##; 315 | push(@old,$fn); 316 | 317 | --=20 318 | David K=E5gedal 319 | - 320 | To unsubscribe from this list: send the line "unsubscribe git" in 321 | the body of a message to majordomo@vger.kernel.org 322 | More majordomo info at http://vger.kernel.org/majordomo-info.html 323 | 324 | From nobody Mon Sep 17 00:00:00 2001 325 | From: A U Thor 326 | References: 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | Date: Fri, 9 Jun 2006 00:44:16 -0700 377 | Subject: [PATCH] a commit. 378 | 379 | Here is a patch from A U Thor. 380 | 381 | --- 382 | foo | 2 +- 383 | 1 files changed, 1 insertions(+), 1 deletions(-) 384 | 385 | diff --git a/foo b/foo 386 | index 9123cdc..918dcf8 100644 387 | --- a/foo 388 | +++ b/foo 389 | @@ -1 +1 @@ 390 | -Fri Jun 9 00:44:04 PDT 2006 391 | +Fri Jun 9 00:44:13 PDT 2006 392 | -- 393 | 1.4.0.g6f2b 394 | 395 | From nobody Mon Sep 17 00:00:00 2001 396 | From: A U Thor 397 | Date: Fri, 9 Jun 2006 00:44:16 -0700 398 | Subject: [PATCH] another patch 399 | 400 | Here is an empty patch from A U Thor. 401 | 402 | From nobody Mon Sep 17 00:00:00 2001 403 | From: Junio C Hamano 404 | Date: Fri, 9 Jun 2006 00:44:16 -0700 405 | Subject: re: [PATCH] another patch 406 | 407 | From: A U Thor 408 | Subject: [PATCH] another patch 409 | >Here is an empty patch from A U Thor. 410 | 411 | Hey you forgot the patch! 412 | 413 | From nobody Mon Sep 17 00:00:00 2001 414 | From: A U Thor 415 | Date: Mon, 17 Sep 2001 00:00:00 +0900 416 | Mime-Version: 1.0 417 | Content-Type: Text/Plain; charset=us-ascii 418 | Content-Transfer-Encoding: Quoted-Printable 419 | 420 | =0A=0AFrom: F U Bar 421 | Subject: [PATCH] updates=0A=0AThis is to fix diff-format documentation. 422 | 423 | diff --git a/Documentation/diff-format.txt b/Documentation/diff-format.txt 424 | index b426a14..97756ec 100644 425 | --- a/Documentation/diff-format.txt 426 | +++ b/Documentation/diff-format.txt 427 | @@ -81,7 +81,7 @@ The "diff" formatting options can be customized via the 428 | environment variable 'GIT_DIFF_OPTS'. For example, if you 429 | prefer context diff: 430 | =20 431 | - GIT_DIFF_OPTS=3D-c git-diff-index -p $(cat .git/HEAD) 432 | + GIT_DIFF_OPTS=3D-c git-diff-index -p HEAD 433 | =20 434 | =20 435 | 2. When the environment variable 'GIT_EXTERNAL_DIFF' is set, the 436 | From b9704a518e21158433baa2cc2d591fea687967f6 Mon Sep 17 00:00:00 2001 437 | From: =?UTF-8?q?Lukas=20Sandstr=C3=B6m?= 438 | Date: Thu, 10 Jul 2008 23:41:33 +0200 439 | Subject: Re: discussion that lead to this patch 440 | MIME-Version: 1.0 441 | Content-Type: text/plain; charset=UTF-8 442 | Content-Transfer-Encoding: 8bit 443 | 444 | [PATCH] git-mailinfo: Fix getting the subject from the body 445 | 446 | "Subject: " isn't in the static array "header", and thus 447 | memcmp("Subject: ", header[i], 7) will never match. 448 | 449 | Signed-off-by: Lukas Sandström 450 | Signed-off-by: Junio C Hamano 451 | --- 452 | builtin-mailinfo.c | 2 +- 453 | 1 files changed, 1 insertions(+), 1 deletions(-) 454 | 455 | diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c 456 | index 962aa34..2d1520f 100644 457 | --- a/builtin-mailinfo.c 458 | +++ b/builtin-mailinfo.c 459 | @@ -334,7 +334,7 @@ static int check_header(char *line, unsigned linesize, char **hdr_data, int over 460 | return 1; 461 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { 462 | for (i = 0; header[i]; i++) { 463 | - if (!memcmp("Subject: ", header[i], 9)) { 464 | + if (!memcmp("Subject", header[i], 7)) { 465 | if (! handle_header(line, hdr_data[i], 0)) { 466 | return 1; 467 | } 468 | -- 469 | 1.5.6.2.455.g1efb2 470 | 471 | From nobody Fri Aug 8 22:24:03 2008 472 | Date: Fri, 8 Aug 2008 13:08:37 +0200 (CEST) 473 | From: A U Thor 474 | Subject: [PATCH 3/3 v2] Xyzzy 475 | MIME-Version: 1.0 476 | Content-Type: multipart/mixed; boundary="=-=-=" 477 | 478 | --=-=-= 479 | Content-Type: text/plain; charset=ISO8859-15 480 | Content-Transfer-Encoding: quoted-printable 481 | 482 | Here comes a commit log message, and 483 | its second line is here. 484 | --- 485 | builtin-mailinfo.c | 4 ++-- 486 | 487 | diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c 488 | index 3e5fe51..aabfe5c 100644 489 | --- a/builtin-mailinfo.c 490 | +++ b/builtin-mailinfo.c 491 | @@ -758,8 +758,8 @@ static void handle_body(void) 492 | /* process any boundary lines */ 493 | if (*content_top && is_multipart_boundary(&line)) { 494 | /* flush any leftover */ 495 | - if (line.len) 496 | - handle_filter(&line); 497 | + if (prev.len) 498 | + handle_filter(&prev); 499 | =20 500 | if (!handle_boundary()) 501 | goto handle_body_out; 502 | --=20 503 | 1.6.0.rc2 504 | 505 | --=-=-=-- 506 | 507 | From bda@mnsspb.ru Wed Nov 12 17:54:41 2008 508 | From: Dmitriy Blinov 509 | To: navy-patches@dinar.mns.mnsspb.ru 510 | Date: Wed, 12 Nov 2008 17:54:41 +0300 511 | Message-Id: <1226501681-24923-1-git-send-email-bda@mnsspb.ru> 512 | X-Mailer: git-send-email 1.5.6.5 513 | MIME-Version: 1.0 514 | Content-Type: text/plain; 515 | charset=utf-8 516 | Content-Transfer-Encoding: 8bit 517 | Subject: [Navy-patches] [PATCH] 518 | =?utf-8?b?0JjQt9C80LXQvdGR0L0g0YHQv9C40YHQvtC6INC/0LA=?= 519 | =?utf-8?b?0LrQtdGC0L7QsiDQvdC10L7QsdGF0L7QtNC40LzRi9GFINC00LvRjyA=?= 520 | =?utf-8?b?0YHQsdC+0YDQutC4?= 521 | 522 | textlive-* исправлены на texlive-* 523 | docutils заменён на python-docutils 524 | 525 | Действительно, оказалось, что rest2web вытягивает за собой 526 | python-docutils. В то время как сам rest2web не нужен. 527 | 528 | Signed-off-by: Dmitriy Blinov 529 | --- 530 | howto/build_navy.txt | 6 +++--- 531 | 1 files changed, 3 insertions(+), 3 deletions(-) 532 | 533 | diff --git a/howto/build_navy.txt b/howto/build_navy.txt 534 | index 3fd3afb..0ee807e 100644 535 | --- a/howto/build_navy.txt 536 | +++ b/howto/build_navy.txt 537 | @@ -119,8 +119,8 @@ 538 | - libxv-dev 539 | - libusplash-dev 540 | - latex-make 541 | - - textlive-lang-cyrillic 542 | - - textlive-latex-extra 543 | + - texlive-lang-cyrillic 544 | + - texlive-latex-extra 545 | - dia 546 | - python-pyrex 547 | - libtool 548 | @@ -128,7 +128,7 @@ 549 | - sox 550 | - cython 551 | - imagemagick 552 | - - docutils 553 | + - python-docutils 554 | 555 | #. на машине dinar: добавить свой открытый ssh-ключ в authorized_keys2 пользователя ddev 556 | #. на своей машине: отредактировать /etc/sudoers (команда ``visudo``) примерно следующим образом:: 557 | -- 558 | 1.5.6.5 559 | From nobody Mon Sep 17 00:00:00 2001 560 | From: (A U Thor) 561 | Date: Fri, 9 Jun 2006 00:44:16 -0700 562 | Subject: [PATCH] a patch 563 | 564 | From nobody Mon Sep 17 00:00:00 2001 565 | From: Junio Hamano 566 | Date: Thu, 20 Aug 2009 17:18:22 -0700 567 | Subject: Why doesn't git-am does not like >8 scissors mark? 568 | 569 | Subject: [PATCH] BLAH ONE 570 | 571 | In real life, we will see a discussion that inspired this patch 572 | discussing related and unrelated things around >8 scissors mark 573 | in this part of the message. 574 | 575 | Subject: [PATCH] BLAH TWO 576 | 577 | And then we will see the scissors. 578 | 579 | This line is not a scissors mark -- >8 -- but talks about it. 580 | - - >8 - - please remove everything above this line - - >8 - - 581 | 582 | Subject: [PATCH] Teach mailinfo to ignore everything before -- >8 -- mark 583 | From: Junio C Hamano 584 | 585 | This teaches mailinfo the scissors -- >8 -- mark; the command ignores 586 | everything before it in the message body. 587 | 588 | Signed-off-by: Junio C Hamano 589 | --- 590 | builtin-mailinfo.c | 37 ++++++++++++++++++++++++++++++++++++- 591 | 1 files changed, 36 insertions(+), 1 deletions(-) 592 | 593 | diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c 594 | index b0b5d8f..461c47e 100644 595 | --- a/builtin-mailinfo.c 596 | +++ b/builtin-mailinfo.c 597 | @@ -712,6 +712,34 @@ static inline int patchbreak(const struct strbuf *line) 598 | return 0; 599 | } 600 | 601 | +static int scissors(const struct strbuf *line) 602 | +{ 603 | + size_t i, len = line->len; 604 | + int scissors_dashes_seen = 0; 605 | + const char *buf = line->buf; 606 | + 607 | + for (i = 0; i < len; i++) { 608 | + if (isspace(buf[i])) 609 | + continue; 610 | + if (buf[i] == '-') { 611 | + scissors_dashes_seen |= 02; 612 | + continue; 613 | + } 614 | + if (i + 1 < len && !memcmp(buf + i, ">8", 2)) { 615 | + scissors_dashes_seen |= 01; 616 | + i++; 617 | + continue; 618 | + } 619 | + if (i + 7 < len && !memcmp(buf + i, "cut here", 8)) { 620 | + i += 7; 621 | + continue; 622 | + } 623 | + /* everything else --- not scissors */ 624 | + break; 625 | + } 626 | + return scissors_dashes_seen == 03; 627 | +} 628 | + 629 | static int handle_commit_msg(struct strbuf *line) 630 | { 631 | static int still_looking = 1; 632 | @@ -723,10 +751,17 @@ static int handle_commit_msg(struct strbuf *line) 633 | strbuf_ltrim(line); 634 | if (!line->len) 635 | return 0; 636 | - if ((still_looking = check_header(line, s_hdr_data, 0)) != 0) 637 | + still_looking = check_header(line, s_hdr_data, 0); 638 | + if (still_looking) 639 | return 0; 640 | } 641 | 642 | + if (scissors(line)) { 643 | + fseek(cmitmsg, 0L, SEEK_SET); 644 | + still_looking = 1; 645 | + return 0; 646 | + } 647 | + 648 | /* normalize the log message to UTF-8. */ 649 | if (metainfo_charset) 650 | convert_to_utf8(line, charset.buf); 651 | -- 652 | 1.6.4.1 653 | From nobody Mon Sep 17 00:00:00 2001 654 | From: A U Thor 655 | Subject: check bogus body header (from) 656 | Date: Fri, 9 Jun 2006 00:44:16 -0700 657 | 658 | From: bogosity 659 | - a list 660 | - of stuff 661 | --- 662 | diff --git a/foo b/foo 663 | index e69de29..d95f3ad 100644 664 | --- a/foo 665 | +++ b/foo 666 | @@ -0,0 +1 @@ 667 | +content 668 | 669 | From nobody Mon Sep 17 00:00:00 2001 670 | From: A U Thor 671 | Subject: check bogus body header (date) 672 | Date: Fri, 9 Jun 2006 00:44:16 -0700 673 | 674 | Date: bogus 675 | 676 | and some content 677 | 678 | --- 679 | diff --git a/foo b/foo 680 | index e69de29..d95f3ad 100644 681 | --- a/foo 682 | +++ b/foo 683 | @@ -0,0 +1 @@ 684 | +content 685 | 686 | -------------------------------------------------------------------------------- /resources/test/normalization/normalized-text-1.txt: -------------------------------------------------------------------------------- 1 | Newsletter May Plex Web now with global search Perhaps the biggest recent update with the most noticeable changes our Web player upgrade really blew some socks off with powerful new search capabilities and some really nice UI upgrades Check the blog for a complete rundown of all the awesome new stuff you get to enjoy in our best web app yet LEARN MORE Samsung TVs can t get enough of Plex One other super exciting development in our world this month is that we will be shipping our brand new official smart TV app on all Samsung smart TVs Plex is one of the top apps on Samsung TVs which are great for getting that movie theater experience with movie trailers and extras one of the perks you get with a Plex Pass Roll out has begun with their higher end models but Plex will be included on all models that support Tizen apps Roku keeps getting better Earlier this year we completely revamped our Roku app and now it s even better We ve added Rotten Tomatoes theme music for your collection of TV shows and handy shortcuts to mark entire seasons or series as watched or unwatched within the app We also added options to play a movie trailer or browse movie details from the playlist screen as well as navigate to the show season or episode details available for TV shows LEARN MORE Treats for our iOS users iOS got some groovy new fixings as well With the new post play screen binge watching shows is as easy as getting sucked into the blackhole that is baby goat memes Now you can begin watching the next episode or figure out what to watch after you finish a movie Or use offline syncing one of the most awesome Plex Pass features to have pre converted files accessible always In addition you can enjoy direct playback of Apple Lossless tracks All that and all the Rotten Tomatoes reviews one could ask for LEARN MORE Android gets subtitles and more Android both mobile and TV got several improvements in appearance and lots of bug fixes On mobile we added support for embedded PGS VobSub subtitles for Direct Played MKVs meaning you can always read along even when you re watching in bed with the volume way down We also updated the look of the friends screen and improved Camera Upload for our Plex Pass users For Android TV you can now get some additional video info during playback including how much of your video has loaded LEARN MORE Why Get a Plex Pass In addition to giving you access to awesome features and benefits Plex Pass subscriptions support our development of new features and platforms allowing us to bring the best experience to every Plex user Below are a few highlights of premium features For a complete list check out our Plex Pass overview page Mobile Sync Bring your media with you on your Android iOS Windows or Windows Phone device and enjoy it offline anywhere Cloud Sync Sync specific content from your Plex Media Server to supported cloud storage providers to enjoy it anywhere you have an internet connection Camera Upload Wirelessly sync your phone or tablet photos automatically to your Plex Media Server Parental Controls Enable parental controls create customized managed accounts and restrict content users can access Movie Trailers Extras Automatically see high quality movie trailers cast interviews and other extras for movies in your library Early Access Be the first to get new apps as soon as we release them free of charge Premium Music Enjoy lyrics from LyricFind automatic Plex Mix playlists based on mood or similar tracks and premium metadata matching for high quality art album reviews artist bios and more Stay updated win stuff Join us on Facebook Twitter and Google for up to date Plex information and the occasional chance to win something awesome Like a gadget not Barkley Nobody wins Barkley We want you We re always on the hunt for awesome and talented people to join our merry band of Plex makers While the search for amazing people never ends we re specifically looking for hardcore devops folks with scalable cross cloud orchestration tools experience as well as mobile engineers with experience in Android and iOS If you love Plex and are interested in joining the team we would love to talk to you As long as you are passionate talented and connected to the Internet it doesn t matter where you live our employees hail from all over this beautiful planet of ours SEND RESUME People love Plex Austin Keeley The best streaming music service is just setting up your own plex server It s obviously not for everyone but it s worked well for me Andy spinner Smith plex LOVE LOVE LOVE your software Server on Windows file server machine and AndroidTV app on my Sony Bravia Perfection Copyright Plex Inc All rights reserved 2 | -------------------------------------------------------------------------------- /resources/test/normalization/original-text-1.txt: -------------------------------------------------------------------------------- 1 | ( https://plex.tv?utm_source=Plex&utm_medium=email&utm_content=plex_logo_header&utm_campaign=May+Newsletter+2016 ) 2 | 3 | Newsletter May 2016 4 | 5 | Plex Web now with global search 6 | 7 | Perhaps the biggest recent update with the most noticeable changes, our Web player upgrade really blew some socks off, with powerful new search capabilities and some really nice UI upgrades. Check the blog for a complete rundown of all the awesome new stuff you get to enjoy in our best web app yet. 8 | 9 | LEARN MORE 10 | ( https://blog.plex.tv/2016/04/07/seek-plex-shall-find-leveling-web-app/?utm_source=Plex&utm_medium=email&utm_content=web_app_button&utm_campaign=May+Newsletter+2016 ) 11 | 12 | Samsung TVs can’t get enough of Plex 13 | 14 | One other super exciting development in our world this month is that we will be shipping our brand new official smart TV app on all 2016 Samsung smart TVs! Plex is one of the top apps on Samsung TVs, which are great for getting that movie theater experience with movie trailers and extras, one of the perks you get with a Plex Pass ( https://plex.tv/subscription/about?utm_source=Plex&utm_medium=email&utm_content=samsung_pp_inline&utm_campaign=May+Newsletter+2016 ). Roll-out has begun with their higher end models, but Plex will be included on all 2016 models that support Tizen apps. 15 | 16 | Roku keeps getting better 17 | 18 | Earlier this year, we completely revamped our Roku app, and now it’s even better. We’ve added Rotten Tomatoes, theme music for your collection of TV shows, and handy shortcuts to mark entire seasons or series as watched or unwatched within the app. We also added options to play a movie trailer or browse movie details from the playlist screen, as well as navigate to the show, season, or episode details available for TV shows. 19 | 20 | LEARN MORE 21 | ( https://forums.plex.tv/discussion/comment/1181617/?utm_source=Plex&utm_medium=email&utm_content=roku_button&utm_campaign=May+Newsletter+2016#Comment_1181617 ) 22 | 23 | Treats for our iOS users 24 | 25 | iOS got some groovy new fixings as well. With the new post-play screen, binge watching shows is as easy as getting sucked into the blackhole that is baby goat memes. Now you can begin watching the next episode or figure out what to watch after you finish a movie. Or use offline syncing, one of the most awesome Plex Pass ( https://plex.tv/subscription/about?utm_source=Plex&utm_medium=email&utm_content=ios_pp_inline&utm_campaign=May+Newsletter+2016 ) features, to have pre-converted files accessible always. In addition, you can enjoy direct playback of Apple Lossless tracks. All that and all the Rotten Tomatoes reviews one could ask for. 26 | 27 | LEARN MORE 28 | ( https://forums.plex.tv/discussion/comment/1173867/?utm_source=Plex&utm_medium=email&utm_content=ios_button&utm_campaign=May+Newsletter+2016#Comment_1173867 ) 29 | 30 | Android gets subtitles and more 31 | 32 | Android—both mobile and TV—got several improvements in appearance and lots of bug fixes. On mobile, we added support for embedded PGS/VobSub subtitles for Direct Played MKVs, meaning you can always read along, even when you’re watching in bed with the volume way down. We also updated the look of the friends screen and improved Camera Upload for our Plex Pass ( https://plex.tv/subscription/about?utm_source=Plex&utm_medium=email&utm_content=android_pp_inline&utm_campaign=May+Newsletter+2016 ) users. For Android TV, you can now get some additional video info during playback, including how much of your video has loaded. 33 | 34 | LEARN MORE 35 | ( https://forums.plex.tv/discussion/comment/1176384/?utm_source=Plex&utm_medium=email&utm_content=android_androidtv_button&utm_campaign=May+Newsletter+2016#Comment_1176384 ) 36 | 37 | Why Get a Plex Pass? 38 | 39 | In addition to giving you access to awesome features and benefits, Plex Pass subscriptions support our development of new features and platforms, allowing us to bring the best experience to every Plex user. Below are a few highlights of premium features. For a complete list, check out our Plex Pass ( https://plex.tv/subscription/about?utm_source=Plex&utm_medium=email&utm_content=pp_plex_pass&utm_campaign=May+Newsletter+2016 ) overview page. 40 | 41 | Mobile Sync 42 | 43 | Bring your media with you on your Android ( https://play.google.com/store/apps/details?id=com.plexapp.android&hl=en?utm_source=Plex&utm_medium=email&utm_content=pp_android&utm_campaign=May+Newsletter+2016 ), iOS ( https://itunes.apple.com/us/app/plex/id383457673?mt=8?utm_source=Plex&utm_medium=email&utm_content=pp_ios&utm_campaign=May+Newsletter+2016 ), Windows ( https://www.microsoft.com/en-us/store/apps/plex/9wzdncrfj3q8?utm_source=Plex&utm_medium=email&utm_content=pp_windows&utm_campaign=May+Newsletter+2016 ) or Windows Phone ( https://www.microsoft.com/en-us/store/apps/plex/9wzdncrfj3q8?utm_source=Plex&utm_medium=email&utm_content=pp_windows_phone&utm_campaign=May+Newsletter+2016 ) device and enjoy it offline anywhere. 44 | 45 | Cloud Sync 46 | 47 | Sync specific content from your Plex Media Server to supported cloud storage providers to enjoy it anywhere you have an internet connection. 48 | 49 | Camera Upload 50 | 51 | Wirelessly sync your phone or tablet photos automatically to your Plex Media Server. 52 | 53 | Parental Controls 54 | 55 | Enable parental controls, create customized, managed accounts, and restrict content users can access. 56 | 57 | Movie Trailers & Extras 58 | 59 | Automatically see high quality movie trailers, cast interviews, and other extras for movies in your library. 60 | 61 | Early Access 62 | 63 | Be the first to get new apps as soon as we release them, free of charge. 64 | 65 | Premium Music 66 | 67 | Enjoy lyrics from LyricFind ( https://blog.plex.tv/2015/12/23/let-it-snow-lyrics-for-all-your-music/?utm_source=Plex&utm_medium=email&utm_content=pp_lyricfind&utm_campaign=May+Newsletter+2016 ), automatic Plex Mix playlists based on mood or similar tracks, and premium metadata matching for high quality art, album reviews, artist bios and more! 68 | 69 | Stay updated & win stuff 70 | 71 | Join us on Facebook, Twitter, and Google+ for up-to-date Plex information and the occasional chance to win something awesome. (Like a gadget, not Barkley. Nobody wins Barkley.) 72 | 73 | ( https://twitter.com/plex?utm_source=Plex&utm_medium=email&utm_content=win_stuff_twitter&utm_campaign=May+Newsletter+2016 ) 74 | 75 | ( https://www.facebook.com/plexapp?utm_source=Plex&utm_medium=email&utm_content=win_stuff_facebook&utm_campaign=May+Newsletter+2016 ) 76 | 77 | ( https://plus.google.com/+plex?utm_source=Plex&utm_medium=email&utm_content=win_stuff_goggle&utm_campaign=May+Newsletter+2016 ) 78 | 79 | We want you! 80 | 81 | We're always on the hunt for awesome and talented people to join our merry band of Plex makers. While the search for amazing people never ends, we’re specifically looking for hardcore devops folks with scalable cross-cloud orchestration tools experience, as well as mobile engineers with experience in Android and iOS. If you love Plex and are interested in joining the team, we would love to talk to you! As long as you are passionate, talented, and connected to the Internet, it doesn’t matter where you live—our employees hail from all over this beautiful planet of ours. 82 | 83 | SEND RESUME 84 | ( mailto:careers@plex.tv ) 85 | 86 | People love Plex 87 | 88 | Austin Keeley 89 | ( https://twitter.com/austinkeeley/status/732547851893538816?ref_src=twsrc%5Etfw?utm_source=Plex&utm_medium=email&utm_content=love_plex_austin&utm_campaign=May+Newsletter+2016 ) 90 | 91 | The best streaming music service is just setting up your own @plex ( https://twitter.com/plex?utm_source=Plex&utm_medium=email&utm_content=love_plex_plex_1&utm_campaign=May+Newsletter+2016 ) server. It's obviously not for everyone, but it's worked well for me. 92 | 93 | Andy spinner Smith 94 | ( https://twitter.com/andycs169/status/732855102428581889?ref_src=twsrc%5Etfw?utm_source=Plex&utm_medium=email&utm_content=love_plex_plex_andy&utm_campaign=May+Newsletter+2016 ) 95 | 96 | @plex ( https://twitter.com/plex?utm_source=Plex&utm_medium=email&utm_content=love_plex_plex_2&utm_campaign=May+Newsletter+2016 ) LOVE LOVE LOVE your software. Server on Windows file server machine and AndroidTV app on my Sony Bravia. #Perfection ( https://twitter.com/hashtag/Perfection?src=hash&ref_src=twsrc%5Etfw?utm_source=Plex&utm_medium=email&utm_content=love_plex_perfection&utm_campaign=May+Newsletter+2016 ) 97 | 98 | ( https://plex.tv?utm_source=Plex&utm_medium=email&utm_content=plex_logo_footer&utm_campaign=May+Newsletter+2016 ) 99 | 100 | ( https://twitter.com/plex?utm_source=Plex&utm_medium=email&utm_content=social_footer_twitter&utm_campaign=May+Newsletter+2016 ) 101 | 102 | ( https://www.facebook.com/plexapp?utm_source=Plex&utm_medium=email&utm_content=social_footer_facebook&utm_campaign=May+Newsletter+2016 ) 103 | 104 | ( https://plus.google.com/+plex?utm_source=Plex&utm_medium=email&utm_content=social_footer_google&utm_campaign=May+Newsletter+2016 ) 105 | 106 | ( https://www.linkedin.com/company/plex-inc?utm_source=Plex&utm_medium=email&utm_content=social_footer_linkdin&utm_campaign=May+Newsletter+2016 ) 107 | 108 | ( https://blog.plex.tv/feed/?utm_source=Plex&utm_medium=email&utm_content=social_footer_rss&utm_campaign=May+Newsletter+2016 ) 109 | 110 | Copyright © 2016 Plex, Inc. All rights reserved. 111 | -------------------------------------------------------------------------------- /resources/test/test.edn: -------------------------------------------------------------------------------- 1 | {:data-folder "./tmp/"} 2 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | # save this as shell.nix 2 | { pkgs ? import {}}: 3 | 4 | pkgs.mkShell { 5 | buildInputs = [ 6 | pkgs.clojure 7 | pkgs.clojure-lsp 8 | pkgs.clj-kondo 9 | pkgs.cljfmt 10 | (pkgs.jdk23.override { enableJavaFX = true; }) 11 | pkgs.javaPackages.openjfx23 12 | pkgs.xorg.libXtst 13 | pkgs.xorg.libXxf86vm 14 | pkgs.libGL 15 | pkgs.glib.out 16 | pkgs.gtk3 17 | pkgs.git-cliff 18 | pkgs.nodejs_22 19 | pkgs.tailwindcss_4]; 20 | 21 | shellHook = '' 22 | export JAVA_HOME="${pkgs.jdk23}/lib/openjdk" 23 | export JAVAFX_PATH="${pkgs.javaPackages.openjfx23}/lib" 24 | export LD_LIBRARY_PATH="${pkgs.libGL}/lib:${pkgs.gtk3}/lib:${pkgs.glib.out}/lib:${pkgs.xorg.libXtst}/lib:${pkgs.xorg.libXxf86vm.out}/lib"; 25 | ''; 26 | } 27 | -------------------------------------------------------------------------------- /src/plauna/analysis.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.analysis 2 | (:require [clojure.string :as st] 3 | [plauna.database :as db] 4 | [plauna.core.events :as events] 5 | [plauna.core.email :as core-email] 6 | [plauna.preferences :as p] 7 | [plauna.util.text-transform :as tt] 8 | [clojure.core.async :as async] 9 | [taoensso.telemere :as t] 10 | [cld.core :as lang] 11 | [plauna.files :as files]) 12 | (:import 13 | (opennlp.tools.util.normalizer AggregateCharSequenceNormalizer NumberCharSequenceNormalizer ShrinkCharSequenceNormalizer CharSequenceNormalizer) 14 | (opennlp.tools.util MarkableFileInputStreamFactory PlainTextByLineStream TrainingParameters) 15 | (opennlp.tools.doccat DocumentSampleStream DocumentCategorizerME DoccatFactory DoccatModel) 16 | (opennlp.tools.ml.naivebayes NaiveBayesTrainer) 17 | (opennlp.tools.ml.maxent GISTrainer) 18 | (java.util Locale) 19 | (java.util.regex Pattern) 20 | (java.io File OutputStream))) 21 | 22 | (set! *warn-on-reflection* true) 23 | 24 | (def BracketsNormalizer (reify CharSequenceNormalizer 25 | (normalize [_ text] ((comp st/trim #(st/replace % #"\( \)" "")) text)))) 26 | 27 | (def MailtoNormalizer (reify CharSequenceNormalizer 28 | (normalize [_ text] ((comp st/trim #(st/replace % #"(mailto:)?(? (count text) 3) 62 | (let [result (second (lang/detect text)) 63 | confidence (Double/parseDouble (first (vals result))) 64 | lang-code (lang-code-set3 (first (keys result)))] 65 | {:code (if (< confidence (p/language-detection-threshold)) "n/a" lang-code) 66 | :confidence confidence}) 67 | {:code "n/a" :confidence 0.0}))) 68 | 69 | (defn training-data-stream [file] 70 | (-> (MarkableFileInputStreamFactory. file) 71 | (PlainTextByLineStream. "UTF-8") 72 | (DocumentSampleStream.))) 73 | 74 | (defn training-body-part [email] (core-email/body-part-for-mime-type "text/html" email)) 75 | 76 | (defn format-training-data [data] 77 | (transduce 78 | (comp (map (fn [email] [(get-in email [:metadata :category]) (training-body-part email)])) 79 | (map (fn [[category body-part]] [category 80 | (if (some? (:subject body-part)) (st/trim (:subject body-part)) "") 81 | (tt/clean-text-content (:content body-part) (core-email/text-content-type body-part))])) 82 | (map (fn [[category subject text-content]] [category subject (normalize text-content)])) 83 | (map (fn [[category subject normalized-content]] (str category " " subject " " normalized-content "\n")))) 84 | str 85 | "" 86 | data)) 87 | 88 | (defn training-parameters [] 89 | (doto (new TrainingParameters) 90 | (.put TrainingParameters/ITERATIONS_PARAM 1000) 91 | (.put TrainingParameters/CUTOFF_PARAM 0) 92 | (.put TrainingParameters/ALGORITHM_PARAM (categorization-algorithm)))) 93 | 94 | (comment NaiveBayesTrainer/NAIVE_BAYES_VALUE 95 | GISTrainer/MAXENT_VALUE 96 | "") 97 | 98 | (defn serialize-and-write-model! [^DoccatModel model ^OutputStream os] 99 | (when (some? model) (.serialize model os))) 100 | 101 | (defn train-data [training-files] 102 | (for [tf training-files] 103 | (try 104 | {:model 105 | (DocumentCategorizerME/train (:language tf) (training-data-stream (:file tf)) (training-parameters) (DoccatFactory.)) 106 | :language (:language tf)} 107 | (catch Exception e (t/log! {:level :error :error e} (.getMessage e)))))) 108 | 109 | (defn categorize [text ^File model-file] 110 | (if (.exists model-file) 111 | (let [doccat (DocumentCategorizerME. (DoccatModel. model-file)) 112 | cat-results (.categorize doccat (into-array String (st/split text #" "))) 113 | best-category (.getBestCategory doccat cat-results) 114 | best-probability (get cat-results (.getIndex doccat best-category))] 115 | (if (> best-probability (p/categorization-threshold)) 116 | {:name best-category :confidence best-probability} 117 | {:name nil :confidence 0})) 118 | {:name nil :confidence 0})) 119 | 120 | (defn normalize-body-part [body-part] 121 | (when (some? body-part) 122 | (normalize (tt/clean-text-content (:content body-part) (core-email/text-content-type body-part))))) 123 | 124 | (defn category-for-text [text language-code] 125 | (when (and (some? text) (some? language-code)) 126 | (let [allowed-languages (mapv :language (filter #(= 1 (:use_in_training %)) (db/get-language-preferences)))] 127 | (when (some #(= language-code %) allowed-languages) (categorize text (files/model-file language-code)))))) 128 | 129 | (defn detect-language-and-categorize-event [event] 130 | (let [email (:payload event) 131 | body-part-to-train-on (core-email/body-part-for-mime-type "text/html" email) 132 | training-content (normalize-body-part body-part-to-train-on) 133 | language-result (detect-language training-content) 134 | category-result (category-for-text training-content (:code language-result)) 135 | category-id (if (nil? (:name category-result)) nil (:id (db/category-by-name (:name category-result))))] 136 | (core-email/construct-enriched-email email {:language (:code language-result) :language-confidence (:confidence language-result)} {:category (:name category-result) :category-confidence (:confidence category-result) :category-id category-id}))) 137 | 138 | (defn detect-language-event [event] 139 | (let [email (:payload event) 140 | body-part-to-train-on (core-email/body-part-for-mime-type "text/html" email) 141 | training-content (normalize-body-part body-part-to-train-on) 142 | language-result (try (detect-language training-content) (catch Exception e (t/log! {:level :error :error e} [(.getMessage e) "\nText causing the exception:" training-content])))] 143 | (core-email/construct-enriched-email email {:language (:code language-result) :language-confidence (:confidence language-result)} {:category (-> email :metadata :category) :category-confidence (-> email :metadata :category-confidence) :category-id (-> email :metadata :category-id)}))) 144 | 145 | (defmulti handle-enrichment :type) 146 | 147 | (defmethod handle-enrichment :parsed-enrichable-email [event] 148 | (events/create-event :enriched-email (detect-language-and-categorize-event event) nil event)) 149 | 150 | (defmethod handle-enrichment :language-detection-request [event] 151 | (events/create-event :enriched-email (detect-language-event event) nil event)) 152 | 153 | (defn enrichment-event-loop 154 | "Enriches the e-mails. Listens to two events: 155 | 156 | :parsed-enrichable-email - Detects both the language and the category 157 | :language-detection-request - Only detects the language" 158 | [publisher events-channel] 159 | (let [parsed-enrichable-email-chan (async/chan) 160 | language-detection-request-chan (async/chan) 161 | local-chan (async/merge [parsed-enrichable-email-chan language-detection-request-chan])] 162 | (async/sub publisher :parsed-enrichable-email parsed-enrichable-email-chan) 163 | (async/sub publisher :language-detection-request language-detection-request-chan) 164 | (async/pipeline 4 165 | events-channel 166 | (map handle-enrichment) 167 | local-chan 168 | true 169 | (fn [^Throwable th] (t/log! {:level :error :error th} (.getMessage th)))))) 170 | -------------------------------------------------------------------------------- /src/plauna/client.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.client 2 | (:require 3 | [clojure.core.async :as async] 4 | [plauna.database :as db] 5 | [plauna.core.events :as events] 6 | [plauna.preferences :as p] 7 | [clojure.string :as s] 8 | [taoensso.telemere :as t] 9 | [plauna.messaging :as messaging]) 10 | (:import 11 | (clojure.lang PersistentVector) 12 | (jakarta.mail Store Session Folder Message Flags$Flag) 13 | (org.eclipse.angus.mail.imap IMAPFolder IMAPMessage) 14 | (jakarta.mail.event MessageCountAdapter MessageCountEvent) 15 | (jakarta.mail.search MessageIDTerm) 16 | (java.io ByteArrayOutputStream) 17 | (java.lang AutoCloseable) 18 | (java.util Properties) 19 | (java.util.concurrent Executors) 20 | (org.eclipse.angus.mail.imap IdleManager IMAPStore) 21 | (java.util.concurrent Executors TimeUnit ScheduledExecutorService))) 22 | 23 | ; Names 24 | ; Config without secret -> identifier 25 | ; Watching folder -> FolderMonitor 26 | ; Periodic checks: Health checks 27 | ; idling -> monitoring 28 | 29 | (set! *warn-on-reflection* true) 30 | 31 | (defonce executor-service (Executors/newSingleThreadScheduledExecutor)) 32 | 33 | (defonce idle-manager (atom nil)) 34 | 35 | (defonce parent-folder-name "Categories") 36 | 37 | (defonce connections (atom {})) 38 | 39 | (defonce health-checks (atom {})) 40 | 41 | ;; id -> listener 42 | (defonce message-count-listeners (atom {})) 43 | 44 | (defn default-port-for-security [security] 45 | (if (= security :ssl) 993 143)) 46 | 47 | (defn security [connection-config] 48 | (let [security (get connection-config :security :ssl)] 49 | (if (some #(= security %) [:ssl :starttls :plain]) 50 | security 51 | :ssl))) 52 | 53 | (defn port [connection-config] 54 | (str (get connection-config :port (default-port-for-security (security connection-config))))) 55 | 56 | (defn check-ssl-certs? [connection-config] (get connection-config :check-ssl-certs true)) 57 | 58 | (defn default-imap-properties ^Properties [connection-config] 59 | (doto (new Properties) 60 | (.setProperty "mail.imap.port" (port connection-config)) 61 | (.setProperty "mail.imap.usesocketchannels" "true") 62 | (.setProperty "mail.imap.timeout" "5000") 63 | (.setProperty "mail.imap.partialfetch" "false") 64 | (.setProperty "mail.imap.fetchsize" "1048576"))) 65 | 66 | (defn security-properties [connection-config] 67 | (let [security-key (security connection-config)] 68 | (fn [^Properties properties] 69 | (cond (= security-key :ssl) (doto properties (.setProperty "mail.imap.ssl.enable", "true")) 70 | (= security-key :starttls) (doto properties (.setProperty "mail.imap.starttls.enable", "true")) 71 | (= security-key :plain) properties 72 | :else (doto properties (.setProperty "mail.imap.ssl.enable", "true")))))) 73 | 74 | (defn certification-check-properties [connection-config] 75 | (if (not (check-ssl-certs? connection-config)) 76 | (fn [^Properties properties] (doto properties (.setProperty "mail.imap.ssl.trust", "*"))) 77 | (fn [^Properties properties] properties))) 78 | 79 | (defn find-by-id-in-watchers [id] 80 | (get @connections id)) 81 | 82 | (defn set-debug-mode [connection-config] 83 | (let [debug? (get connection-config :debug false)] 84 | (fn [^Session session] 85 | (if debug? (doto session (.setDebug true)) session)))) 86 | 87 | (defn config->session [connection-config] 88 | (-> (default-imap-properties connection-config) 89 | ((security-properties connection-config)) 90 | ((certification-check-properties connection-config)) 91 | Session/getInstance 92 | ((set-debug-mode connection-config)))) 93 | 94 | (defn connection-config->store [connection-config] 95 | (let [session ^Session (config->session connection-config)] 96 | (if (= security :ssl) 97 | (.getStore session "imaps") 98 | (.getStore session "imap")))) 99 | 100 | (defn login [connection-config] 101 | (let [store ^Store (connection-config->store connection-config)] 102 | (.connect store (:host connection-config) (:user connection-config) (:secret connection-config)) 103 | store)) 104 | 105 | (defn folder-separator [^Store store] (.getSeparator (.getDefaultFolder store))) 106 | 107 | (defn read-all-emails [id ^String folder-name options] 108 | (t/log! :info ["Read all e-mails from:" folder-name "with options:" options]) 109 | (async/go (let [store (:store (:monitor (find-by-id-in-watchers id))) 110 | folder ^Folder (doto (.getFolder ^Store store folder-name) (.open Folder/READ_WRITE))] 111 | (vec (doseq [message-num (range 1 (inc (.getMessageCount ^Folder folder))) 112 | :let [message ^Message (.getMessage ^Folder folder message-num)]] 113 | (with-open [os (ByteArrayOutputStream.)] 114 | (.writeTo message os) 115 | (async/>!! @messaging/main-chan (events/create-event :received-email 116 | (.toByteArray os) 117 | {:enrich true :move (:move options) :id id :folder folder :original-folder folder-name :message message})))))))) 118 | 119 | (defn create-folder [^Store store ^String folder-name result-map] 120 | (let [folder ^IMAPFolder (.getFolder store folder-name)] 121 | (if (not (.exists folder)) 122 | (do (.create folder Folder/HOLDS_MESSAGES) 123 | (conj result-map {folder-name :created})) 124 | (conj result-map {folder-name :already-exists})))) 125 | 126 | (defn structured-folder-name [store lower-case-folder-name] 127 | (str parent-folder-name (folder-separator store) (s/capitalize lower-case-folder-name))) 128 | 129 | (defn copy-message [^Message message ^Folder source-folder ^Folder target-folder] 130 | (try 131 | (.setPeek ^IMAPMessage message true) 132 | (.copyMessages source-folder (into-array Message [message]) target-folder) 133 | (t/log! :debug ["Copied" message]) 134 | (.setFlag message Flags$Flag/DELETED true) 135 | (t/log! :debug ["Set DELETED flag for" message]) 136 | (.expunge source-folder) 137 | (t/log! :debug ["Expunged source folder"]) 138 | (catch Exception e (t/log! {:level :error :error e} ["There was an error copying and deleting the message" message])))) 139 | 140 | (defn move-message [id ^Message message ^Folder source-folder ^String target-name] 141 | (let [connection (get @connections id) 142 | store (:store (:monitor connection)) 143 | capabilities ^PersistentVector (:capabilities connection) 144 | target-folder ^IMAPFolder (.getFolder ^Store store ^String (structured-folder-name store target-name))] 145 | (if (.contains capabilities :move) 146 | (do (.setPeek ^IMAPMessage message true) 147 | (.moveMessages ^IMAPFolder source-folder (into-array Message [message]) target-folder)) 148 | (do (t/log! :debug "Server does not support the IMAP MOVE command. Using copy and delete as fallback.") 149 | (copy-message message source-folder target-folder))))) 150 | 151 | (defn client-event-loop 152 | "Listens to :enriched-email 153 | 154 | Options: 155 | :move - boolean 156 | 157 | If :move is true, move the e-mail to the corresponding category folder." 158 | [publisher] 159 | (let [local-chan (async/chan)] 160 | (async/sub publisher :enriched-email local-chan) 161 | (async/go-loop [event (async/FolderMonitor (:store (:monitor connection-data)) folder (:listen-channel (:monitor connection-data)))) 206 | 207 | (defn capability-name [^IMAPStore store ^String cap-name] 208 | (when (.hasCapability store cap-name) 209 | (keyword (clojure.string/lower-case cap-name)))) 210 | 211 | (defn connection-object [^FolderMonitor monitor config capabilities] 212 | {:monitor monitor 213 | :config config 214 | :capabilities capabilities}) 215 | 216 | (defn swap-connection-with-new-monitor [identifier new-monitor] 217 | (let [old-connection (get @connections identifier) 218 | old-config (:config old-connection) 219 | old-capabilities (:capabilities old-connection)] ; assuming the capabilities won't change on reconnect 220 | (swap! connections assoc identifier (connection-object new-monitor old-config old-capabilities)))) 221 | 222 | (defn connection-object-with-capabilities [^FolderMonitor monitor config] 223 | (let [store (:store monitor)] 224 | (connection-object monitor config (filterv some? (mapv #(capability-name store %) ["MOVE"]))))) 225 | 226 | (defn message-count-adapter [id folder folder-name] 227 | (proxy [MessageCountAdapter] [] 228 | (messagesAdded [^MessageCountEvent event] 229 | (t/log! :info "Received new message event.") 230 | (doseq [message ^IMAPMessage (.getMessages event)] 231 | (t/log! :debug ["Processing message:" message]) 232 | (.setPeek ^IMAPMessage message true) 233 | (with-open [os ^OutputStream (ByteArrayOutputStream.)] 234 | (.writeTo ^IMAPMessage message os) 235 | (async/>!! @messaging/main-chan (events/create-event :received-email (.toByteArray os) {:enrich true :move true :id id :folder folder :original-folder folder-name :message message})))) 236 | (try 237 | (t/log! :debug ["Handled messagesAdded event. Resuming to watch the folder" folder-name]) 238 | (.watch ^IdleManager @idle-manager folder) 239 | (catch Exception e 240 | (t/log! {:level :error :error e} (.getMessage e))))))) 241 | 242 | (defn start-monitoring [id ^Store store ^String folder-name] 243 | (let [folder ^IMAPFolder (.getFolder store folder-name)] 244 | (when (not (.isOpen folder)) 245 | (.open folder Folder/READ_WRITE)) 246 | (let [listener (message-count-adapter id folder folder-name)] 247 | (swap! message-count-listeners conj {id listener}) 248 | (.addMessageCountListener folder listener)) 249 | (try 250 | (.watch ^IdleManager @idle-manager folder) 251 | (t/log! :info ["Started monitoring for" folder-name "in" (.getURLName store)]) 252 | (catch Exception e 253 | (t/log! {:level :error :error e} (.getMessage e)))) 254 | folder)) 255 | 256 | (defn stop-monitoring [id] 257 | (let [{monitor :monitor} (get @connections id) 258 | listener (get @message-count-listeners id)] 259 | (t/log! :debug ["Removing message count listener from folder" (:folder monitor)]) 260 | (.removeMessageCountListener ^IMAPFolder (:folder monitor) listener))) 261 | 262 | (defn swap-new-period-check [identifier future] 263 | (swap! health-checks (fn [futures new-future] (conj futures {identifier new-future})) future)) 264 | 265 | (defn start-monitoring-and-change-state [identifier connection-data] 266 | (let [folder (start-monitoring identifier (:store (:monitor connection-data)) (:folder (:config connection-data))) 267 | new-monitor (monitor-with-new-folder connection-data folder)] 268 | (swap-connection-with-new-monitor identifier new-monitor))) 269 | 270 | (defn reconnect-to-store [identifier] 271 | (let [connection-data (get @connections identifier) 272 | store ^Store (:store (:monitor connection-data))] 273 | (t/log! :debug "Closing store.") 274 | (.close store) 275 | (try 276 | (t/log! :debug "Connecting to store.") 277 | (.connect store) 278 | (t/log! :debug "Starting to idle.") 279 | (start-monitoring-and-change-state identifier connection-data) 280 | (catch Exception e (t/log! {:level :error :error e} (.getMessage e)))))) 281 | 282 | (defn check-connection [identifier] 283 | (let [{monitor :monitor} (get @connections identifier) 284 | store (:store monitor)] 285 | (if (.isConnected ^Store store) 286 | (t/log! :debug "Store is still connected.") 287 | (do (t/log! :warn "Connection lost. Reconnecting to email server...") 288 | (reconnect-to-store identifier))))) 289 | 290 | (defn check-folder [identifier] 291 | (let [{monitor :monitor} (get @connections identifier) 292 | folder (:folder monitor)] 293 | (if (.isOpen ^Folder folder) 294 | (t/log! :debug "Folder is still open.") 295 | (do (t/log! :info "Folder is closed. Reconnecting.") 296 | (reconnect-to-store identifier))))) 297 | 298 | (defn create-idle-manager [session] 299 | (when (nil? @idle-manager) 300 | (reset! idle-manager (IdleManager. session (Executors/newCachedThreadPool))))) 301 | 302 | (defn health-check-for-identifier [identifier] 303 | (let [scheduled-future (.scheduleAtFixedRate ^ScheduledExecutorService executor-service 304 | #(do 305 | (try 306 | (t/log! :debug ["Checking if the connection for id" identifier "is open"]) 307 | (check-connection identifier) 308 | (t/log! :debug ["Checking if the folder for id" identifier "is open"]) 309 | (check-folder identifier) 310 | (let [{monitor :monitor} (get @connections identifier) 311 | ^Folder folder (:folder monitor) 312 | ^Store store (:store monitor) 313 | ^IdleManager im @idle-manager] 314 | (t/log! :debug ["Resuming to watch folder:" (.getURLName store) "-" (.getFullName folder)]) 315 | (.watch im (:folder monitor))) 316 | (catch Exception e (do (t/log! {:level :error :error e} "There was a problem during health check.") 317 | (reconnect-to-store identifier))))) 318 | 120 (p/client-health-check-interval) TimeUnit/SECONDS)] 319 | (swap-new-period-check identifier scheduled-future))) 320 | 321 | (defn config-id [something] 322 | (str (hash something))) 323 | 324 | (defn connection-config->identifier [connection-config] 325 | (let [cleaned-config (dissoc connection-config :id)] 326 | (config-id cleaned-config))) 327 | 328 | (defn create-folder-monitor [connection-config channel] 329 | (create-idle-manager (config->session connection-config)) 330 | (let [store (login connection-config) 331 | identifier (connection-config->identifier connection-config) 332 | folder (start-monitoring identifier store (:folder connection-config))] 333 | (swap! connections (fn [subs new-data] 334 | (conj subs {identifier new-data})) 335 | (connection-object-with-capabilities (->FolderMonitor store folder channel) connection-config)) 336 | (health-check-for-identifier identifier) 337 | store)) 338 | 339 | (defn connect-using-id [id] 340 | (let [connection (get @connections id) 341 | listen-channel (:listen-channel (get @connections (:config connection)))] 342 | (create-folder-monitor (:config connection) listen-channel))) 343 | 344 | (defn monitor->map [monitor] 345 | (let [store ^Store (-> monitor :store) 346 | folder ^IMAPFolder (-> monitor :folder)] 347 | {:connected (.isConnected ^Store store) 348 | :folder (.isOpen ^IMAPFolder folder)})) 349 | 350 | (defn folders-in-store [^Store store] 351 | (.list (.getDefaultFolder store) "*")) 352 | 353 | (defn inbox-or-category-folder-name [^Store store ^String folder-name] 354 | (if (nil? folder-name) "INBOX" (structured-folder-name store folder-name))) 355 | 356 | (defn move-messages-by-id-between-category-folders [^String id ^Store store message-id ^String source-name ^String target-name] 357 | (let [{config :config} (get @connections id) 358 | source-folder-name (inbox-or-category-folder-name store source-name) 359 | target-folder-name (inbox-or-category-folder-name store target-name)] 360 | (with-open [target-folder ^IMAPFolder (doto (.getFolder ^Store store ^String target-folder-name) (.open Folder/READ_WRITE)) 361 | source-folder ^AutoCloseable (doto (.getFolder ^Store store ^String source-folder-name) (.open Folder/READ_WRITE))] 362 | (let [found-messages (.search source-folder (MessageIDTerm. message-id))] 363 | (t/log! :debug ["Found" (count found-messages) "messages when searched for the message-id:" message-id]) 364 | (when (seq found-messages) 365 | (stop-monitoring id) 366 | (t/log! :debug ["Moving e-mail from" source-folder-name "to" target-folder-name]) 367 | (.moveMessages ^IMAPFolder source-folder (into-array Message found-messages) target-folder)))) 368 | (start-monitoring id store (:folder config)))) 369 | 370 | -------------------------------------------------------------------------------- /src/plauna/core/email.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.core.email 2 | (:require [clojure.math :as math] 3 | [clojure.string :as s])) 4 | 5 | (set! *warn-on-reflection* true) 6 | 7 | (defrecord Header [message-id in-reply-to subject mime-type date]) 8 | 9 | (defrecord Body-Part [message-id charset mime-type transfer-encoding content filename content-disposition]) 10 | 11 | (defrecord Participant [address name contact-key type message-id]) 12 | 13 | (defrecord Email [^Header header body participants]) 14 | 15 | (defrecord Metadata [message-id language language-modified language-confidence category category-id category-modified category-confidence]) 16 | 17 | (defrecord EnrichedEmail [^Header header body participants ^Metadata metadata]) 18 | 19 | (defrecord EnrichedBodyPart [^Body-Part body-part ^Metadata metadata]) 20 | 21 | (defn construct-body-part [body-part] (map->Body-Part body-part)) 22 | 23 | (defn construct-participants [participant] 24 | (let [args ((juxt :address :name :contact-key :type :message-id) participant)] 25 | (if (keyword? (get args 3)) 26 | (apply ->Participant args) 27 | (apply ->Participant (assoc args 3 (keyword (s/replace-first (get args 3) ":" ""))))))) 28 | 29 | (defn construct-header [header] (map->Header header)) 30 | 31 | (defn construct-email [raw-header raw-body raw-participants] 32 | ;(sp/conform ::email-specs/body raw-body) 33 | (let [body-parts (map construct-body-part raw-body) 34 | participants (map construct-participants raw-participants) 35 | header (construct-header raw-header)] 36 | (->Email header body-parts participants))) 37 | 38 | (defn construct-enriched-email [email language-metadata category-metadata] 39 | (->EnrichedEmail (:header email) 40 | (:body email) 41 | (:participants email) 42 | (->Metadata (-> email :header :message-id) 43 | (:language language-metadata) 44 | (get language-metadata :language-modified nil) 45 | (:language-confidence language-metadata) 46 | (:category category-metadata) 47 | (:category-id category-metadata) 48 | (get category-metadata :category-modified nil) 49 | (:category-confidence category-metadata)))) 50 | 51 | (defn iterate-over-all-pages [call-with-pagination fun query sql-query mutates?] 52 | (let [data-with-current-page (call-with-pagination query sql-query) 53 | remaining-pages (-> (/ (:total data-with-current-page) (:size (:page query))) 54 | math/ceil 55 | (- (:page data-with-current-page)))] 56 | (fun (:data data-with-current-page)) 57 | (if (> remaining-pages 0) 58 | (recur call-with-pagination fun (if mutates? query (update-in query [:page :page] inc)) sql-query mutates?) 59 | nil))) 60 | 61 | (defn attachment? [body-part] (or (= "attachment" (:content-disposition body-part)) (some? (:filename body-part)))) 62 | 63 | (defn text-content? [mime-type] (.startsWith ^String mime-type "text")) 64 | 65 | (defn body-text-content? [body-part] (text-content? (:mime-type body-part))) 66 | 67 | (defn text-content-type [body-part] 68 | (let [mime-type (:mime-type body-part)] 69 | (cond (.endsWith ^String mime-type "html") :html 70 | (.endsWith ^String mime-type "rtf") :rtf 71 | :else :plain))) 72 | 73 | (defn body-part-for-mime-type 74 | "When supplied with an e-mail, select a mime-type and extract its contents for training purposes. 75 | If the selected mime-type does not exist, it returns the text content of the first mime-type available. 76 | If the e-mail has no body, returns nil." 77 | [mime-type email] 78 | (let [body-parts (filter #(and (not (attachment? %)) (body-text-content? %)) (:body email))] 79 | (cond (empty? (:body email)) ;; is this possible? 80 | nil 81 | (= 1 (count body-parts)) 82 | (first body-parts) 83 | :else 84 | (let [first-match (first (filter #(.equals ^String (:mime-type %) mime-type) body-parts))] 85 | (if (some? first-match) first-match (first body-parts)))))) 86 | -------------------------------------------------------------------------------- /src/plauna/core/events.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.core.events 2 | (:require 3 | [clojure.core.async :refer [chan go insert-ignore [insert-query] 48 | (let [insert-part (first insert-query)] 49 | (conj (rest insert-query) (string/replace insert-part #"INSERT" "INSERT OR IGNORE")))) 50 | 51 | (defn insert->insert-update [insert-query] 52 | (let [insert-part (first insert-query)] 53 | (conj (rest insert-query) (string/replace insert-part #"INSERT" "INSERT OR REPLACE")))) 54 | 55 | (defn save-headers [headers] 56 | (jdbc/execute! (jdbc/get-connection (db)) 57 | (->> 58 | (builder/for-insert-multi 59 | :headers 60 | [:mime_type :subject :message_id :date :in_reply_to] 61 | (mapv (juxt :mime-type :subject :message-id :date :in-reply-to) headers) {}) 62 | (insert->insert-ignore)) 63 | {:batch true})) 64 | 65 | (defn save-bodies [bodies] 66 | (jdbc/execute! (jdbc/get-connection (db)) 67 | (->> 68 | (builder/for-insert-multi 69 | :bodies 70 | [:content :mime_type :charset :transfer_encoding :message_id :filename :content_disposition] 71 | (mapv (juxt :content :mime-type :charset :transfer-encoding :message-id :filename :content-disposition) bodies) {}) 72 | (insert->insert-ignore)) 73 | {:batch true})) 74 | 75 | (defn save-contacts [contacts] 76 | (jdbc/execute! (jdbc/get-connection (db)) 77 | (->> 78 | (builder/for-insert-multi 79 | :contacts 80 | [:contact_key :name :address] 81 | (mapv (juxt :contact-key :name :address) contacts) {}) 82 | (insert->insert-ignore)) 83 | {:batch true})) 84 | 85 | (defn save-communications [contacts] 86 | (jdbc/execute! (jdbc/get-connection (db)) 87 | (->> (builder/for-insert-multi 88 | :communications 89 | [:message_id :contact_key :type] 90 | (mapv (juxt :message-id :contact-key :type) contacts) {}) 91 | (insert->insert-ignore)) 92 | {:batch true})) 93 | 94 | (defn update-metadata-batch [metadata] 95 | (when (seq metadata) 96 | (jdbc/execute! (jdbc/get-connection (db)) 97 | (->> (builder/for-insert-multi 98 | :metadata 99 | [:message_id :language :language_confidence :category :category_confidence] 100 | (mapv (juxt :message-id :language :language-confidence :category-id :category-confidence) metadata) {}) 101 | (insert->insert-update)) 102 | {:batch true}))) 103 | 104 | (def batch-size 500) 105 | 106 | (defn empty-buffer [] {:headers [] :bodies [] :participants [] :metadata []}) 107 | 108 | (defn add-to-buffer [e-mail buffer] 109 | (let [updated-buffer 110 | (-> (update buffer :headers conj (:header e-mail)) 111 | (update :bodies concat (:body e-mail)) 112 | (update :participants concat (:participants e-mail)))] 113 | (if (some? (:metadata e-mail)) 114 | (update updated-buffer :metadata conj (:metadata e-mail)) 115 | updated-buffer))) 116 | 117 | (defn save-emails-in-buffer [buffer] 118 | (try 119 | (save-headers (:headers buffer)) 120 | (save-bodies (:bodies buffer)) 121 | (save-contacts (:participants buffer)) 122 | (save-communications (:participants buffer)) 123 | (when (seq (:metadata buffer)) (update-metadata-batch (:metadata buffer))) 124 | (catch Exception e (t/log! {:level :error :error e} (.getMessage e))))) 125 | 126 | (defn database-event-loop [publisher] 127 | (let [parsed-chan (async/chan) 128 | enriched-chan (async/chan) 129 | local-chan (async/merge [parsed-chan enriched-chan] batch-size)] 130 | (async/sub publisher :parsed-email local-chan) 131 | (async/sub publisher :enriched-email local-chan) 132 | (async/go-loop [event (async/ (count (:headers buffer)) batch-size) 142 | (do (t/log! :debug ["DB buffer full. Emptying"]) 143 | (let [updated-buffer (add-to-buffer (:payload event) buffer)] 144 | (save-emails-in-buffer updated-buffer)) 145 | (recur (async/ (insert-into :metadata) 164 | (values [{:message_id message_id 165 | :category category 166 | :category_modified [:strftime "%s" "now"] 167 | :category_confidence cat-confidence 168 | :language language 169 | :language_modified [:strftime "%s" "now"] 170 | :language_confidence lang-confidence}]) 171 | (upsert (-> (on-conflict :message_id) 172 | (do-update-set :category 173 | :category_modified 174 | :category_confidence 175 | :language 176 | :language_modified 177 | :language_confidence))) 178 | (honey/format)))) 179 | 180 | (defn update-metadata-category [message_id category confidence] 181 | (jdbc/execute! (ds) (-> (insert-into :metadata) 182 | (values [{:message_id message_id :category category :category_modified [:strftime "%s" "now"] :category_confidence confidence}]) 183 | (upsert (-> (on-conflict :message_id) 184 | (do-update-set :category :category_modified :category_confidence))) 185 | (honey/format)))) 186 | 187 | (defn get-categories [] 188 | (jdbc/execute! (ds) (honey/format {:select [:*] :from :categories}) builder-function)) 189 | 190 | (defn create-category [category] 191 | (jdbc/execute! (ds) (honey/format {:insert-into :categories :columns [:name] :values [[category]]}))) 192 | 193 | (defn delete-category-by-id [id] 194 | (jdbc/execute! (ds) (honey/format {:delete-from :categories :where [:= :id id]}))) 195 | 196 | (defn category-by-name [category-name] 197 | (jdbc/execute-one! (ds) (honey/format {:select [:*] :from :categories :where [:= :name category-name]}) builder-function)) 198 | 199 | (defn get-languages [] 200 | (jdbc/execute! (ds) ["select language from metadata group by language"] builder-function)) 201 | 202 | (defn get-language-preferences [] 203 | (jdbc/execute! (ds) ["select * from category_training_preferences where language is not 'n/a'"] builder-function)) 204 | 205 | (defn get-activated-language-preferences [] 206 | (jdbc/execute! (ds) ["select * from category_training_preferences where use_in_training = 1"] builder-function)) 207 | 208 | (defn add-language-preferences [preferences] 209 | (jdbc/execute! (ds) (insert->insert-ignore (honey/format {:insert-into :category-training-preferences :columns [:language :use-in-training] :values preferences})))) 210 | 211 | (defn update-language-preference [preference] 212 | (jdbc/execute! (ds) ["UPDATE category_training_preferences SET use_in_training = ? WHERE id = ?" (:use preference) (:id preference)] builder-function)) 213 | 214 | (defn years-of-data [] 215 | (mapv :year (jdbc/execute! (ds) [(str "SELECT " year-format " as year FROM headers WHERE date IS NOT NULL GROUP BY year ORDER BY year;")] builder-function))) 216 | 217 | ;;;;;;;;;;;;;; Refactored call stuff 218 | 219 | (defn headers-for-strict-options [strict] 220 | (if strict 221 | "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers INNER JOIN metadata ON headers.message_id = metadata.message_id" 222 | "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers LEFT JOIN metadata ON headers.message_id = metadata.message_id")) 223 | 224 | (defn body-parts-for-options [] "SELECT * FROM bodies INNER JOIN metadata ON metadata.message_id = bodies.message_id") 225 | 226 | (defn participants [] "SELECT * FROM communications INNER JOIN contacts ON communications.contact_key = contacts.contact_key") 227 | 228 | (defn interval-for [key] (get intervals key :yearly)) 229 | 230 | (defn interval-for-honey [key] (get honey-intervals key :yearly)) 231 | 232 | (defn convert-to-count [sql-result entity] 233 | (let [sql (first sql-result) 234 | to-format (string/replace (string/replace sql #"SELECT .* FROM" "SELECT COUNT(%s) as count FROM") #"ORDER.*$" "")] 235 | (cond (= entity :enriched-email) (flatten [(format to-format "headers.message_id") (rest sql-result)]) 236 | (= entity :body-part) (flatten [(format to-format "bodies.message_id") (rest sql-result)])))) 237 | 238 | (def key-lookup {:message-id :headers.message_id 239 | :date :headers.date 240 | :category :metadata.category 241 | :language :metadata.language 242 | :category-modified :metadata.category_modified}) 243 | 244 | (defn change-important-keys [key] 245 | (let [lookup (get key-lookup key)] 246 | (if (nil? lookup) 247 | key 248 | lookup))) 249 | 250 | (defmulti data->sql :entity) 251 | 252 | (defmethod data->sql :body-part [_ sql-clause] 253 | (let [jdbc-sql (honey/format (postwalk change-important-keys sql-clause))] 254 | (flatten [(str (body-parts-for-options) " " (first jdbc-sql)) (rest jdbc-sql)]))) 255 | 256 | (defmethod data->sql :enriched-email 257 | ([entity-clause sql-clause] 258 | (let [strict (:strict entity-clause) 259 | jdbc-sql (honey/format (postwalk change-important-keys sql-clause))] 260 | (flatten [(str (headers-for-strict-options strict) " " (first jdbc-sql)) (rest jdbc-sql)]))) 261 | ([entity-clause] 262 | (let [strict (:strict entity-clause)] 263 | [(headers-for-strict-options strict)]))) 264 | 265 | (defmethod data->sql :participant [_ sql-clause] 266 | (let [first-part {:select [:communications.contact-key :message-id :type :name :address] :from [:communications] :join [:contacts [:= :contacts.contact-key :communications.contact-key]]}] 267 | (->> (conj first-part sql-clause) 268 | honey/format))) 269 | 270 | (defn fetch-headers [entity-clause sql-clause] (jdbc/execute! (ds) (data->sql entity-clause sql-clause) builder-function-kebab)) 271 | 272 | (defn fetch-metadata [message-id] (jdbc/execute-one! (ds) ["SELECT message_id, language, language_modified, language_confidence, metadata.category AS category_id, category_modified, category_confidence, categories.name AS category FROM metadata LEFT JOIN categories ON metadata.category = categories.id WHERE metadata.message_id = ?" message-id] builder-function-kebab)) 273 | 274 | (defn fetch-bodies [message-id] (jdbc/execute! (ds) ["SELECT * FROM bodies WHERE message_id = ?" message-id] builder-function-kebab)) 275 | 276 | (defn fetch-participants [message-id] (jdbc/execute! (ds) ["SELECT * FROM communications LEFT JOIN contacts ON contacts.contact_key = communications.contact_key WHERE message_id = ? " message-id] builder-function-kebab)) 277 | 278 | (defn db->metadata [db-metadata] (apply core.email/->Metadata ((juxt :message-id :language :language-modified :language-confidence :category :category-id :category-modified :category-confidence) db-metadata))) 279 | 280 | (defn related-data-to-header [header] 281 | (let [message-id (:message-id header) 282 | metadata (db->metadata (fetch-metadata message-id)) 283 | bodies (map core.email/construct-body-part (fetch-bodies message-id)) 284 | participants (map core.email/construct-participants (fetch-participants message-id))] 285 | (core.email/->EnrichedEmail header bodies participants metadata))) 286 | 287 | (defmulti fetch-data (fn [options _] (:entity options))) 288 | 289 | (defmethod fetch-data :body-part [entity-clause sql-clause] 290 | (if (nil? (:page entity-clause)) 291 | (let [result (jdbc/execute! (ds) (data->sql entity-clause sql-clause) builder-function-kebab)] 292 | (map (fn [el] (core.email/->EnrichedBodyPart (core.email/construct-body-part el) (core.email/map->Metadata el))) result)) 293 | (let [limit-offset (page/page-request->limit-offset (:page entity-clause)) 294 | sql-clause-with-limit-offset (conj sql-clause limit-offset) 295 | result (jdbc/execute! (ds) (data->sql entity-clause sql-clause-with-limit-offset) builder-function-kebab) 296 | data (map (fn [el] (core.email/->EnrichedBodyPart (core.email/construct-body-part el) (core.email/map->Metadata el))) result)] 297 | {:data data 298 | :size (count data) 299 | :page (inc (quot (:offset limit-offset) (:limit limit-offset))) 300 | :total (:count (jdbc/execute-one! (ds) (convert-to-count (data->sql entity-clause sql-clause) (:entity entity-clause)) builder-function-kebab))}))) 301 | 302 | (defmethod fetch-data :enriched-email [entity-clause sql-clause] 303 | (if (nil? (:page entity-clause)) 304 | (map related-data-to-header (map core.email/construct-header (fetch-headers entity-clause sql-clause))) 305 | (let [limit-offset (page/page-request->limit-offset (:page entity-clause)) 306 | sql-clause-with-limit-offset (conj sql-clause limit-offset) 307 | data (map related-data-to-header (map core.email/construct-header (fetch-headers entity-clause sql-clause-with-limit-offset)))] 308 | {:data data 309 | :size (count data) 310 | :page (inc (quot (:offset limit-offset) (:limit limit-offset))) 311 | :total (:count (jdbc/execute-one! (ds) (convert-to-count (data->sql entity-clause sql-clause) (:entity entity-clause)) builder-function-kebab))}))) 312 | 313 | (defmethod fetch-data :participant [entity-clause sql-clause] 314 | (map core.email/map->Participant (jdbc/execute! (ds) (data->sql entity-clause sql-clause) builder-function-kebab))) 315 | 316 | (defn yearly-email-stats [] 317 | (jdbc/execute! (ds) [(str "SELECT COUNT(message_id) AS count, " (interval-for :yearly) " AS date FROM headers WHERE date IS NOT NULL GROUP BY " (interval-for :yearly))])) 318 | 319 | (defn query-db [honeysql-query] 320 | (jdbc/execute! (ds) (honey/format honeysql-query) builder-function-kebab)) 321 | 322 | (defn update-preference [preference value] 323 | (jdbc/execute! (ds) 324 | (-> {:insert-into [:preferences] 325 | :columns [:preference :value] 326 | :values [[(name preference) value]]} 327 | (honey/format) 328 | (insert->insert-update)) builder-function-kebab)) 329 | 330 | (defn fetch-preference [preference] 331 | (let [result (jdbc/execute-one! (ds) 332 | (honey/format {:select [:value] 333 | :from [:preferences] 334 | :where [:= :preference (name preference)]}) builder-function-kebab)] 335 | (when (some? result) (:value result)))) 336 | -------------------------------------------------------------------------------- /src/plauna/entry.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.entry 2 | (:require 3 | [clojure.core.async :refer [chan] :as async] 4 | [plauna.files :as files] 5 | [plauna.server :as server] 6 | [plauna.client :as client] 7 | [plauna.database :as database] 8 | [plauna.messaging :as messaging] 9 | [plauna.analysis :as analysis] 10 | [taoensso.telemere :as t] 11 | [plauna.parser :as parser] 12 | [plauna.core.events :as events] 13 | [plauna.preferences :as preferences]) 14 | (:gen-class)) 15 | 16 | (defn setup-logging [] 17 | (t/set-min-level! :info) 18 | ;; jetty is very noisy. Disable all jetty logs. 19 | (t/set-ns-filter! {:disallow "org.eclipse.jetty.*"})) 20 | 21 | (set! *warn-on-reflection* true) 22 | 23 | (def event-register {:enrichment-event-loop (fn [] (analysis/enrichment-event-loop @messaging/main-publisher @messaging/main-chan)) 24 | :client-event-loop (fn [] (client/client-event-loop @messaging/main-publisher)) 25 | :database-event-loop (fn [] (database/database-event-loop @messaging/main-publisher)) 26 | :parser-event-loop (fn [] (parser/parser-event-loop @messaging/main-publisher @messaging/main-chan))}) 27 | 28 | (defn start-imap-client 29 | [config] 30 | (let [listen-channel (chan 10)] 31 | (doseq [client-config (:clients (:email config))] 32 | (let [store (client/create-folder-monitor client-config listen-channel)] 33 | (client/create-imap-directories! store) 34 | (client/check-necessary-capabilities store))) 35 | (t/log! :debug "Listening to new emails from listen-channel"))) 36 | 37 | (defn -main 38 | [& args] 39 | (setup-logging) 40 | (let [application-config (files/parse-config-from-cli-arguments args)] 41 | (files/check-and-create-database-file) 42 | (database/create-db) 43 | (t/log! :info "Setting log level according to preferences.") 44 | (t/set-min-level! (preferences/log-level)) 45 | (start-imap-client application-config) 46 | (events/start-event-loops event-register) 47 | (server/start-server application-config))) 48 | -------------------------------------------------------------------------------- /src/plauna/files.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.files 2 | (:require [clojure.java.io :as io] 3 | [clojure.tools.reader.edn :as edn] 4 | [clojure.core.async :as async] 5 | [clojure.string :as string] 6 | [taoensso.telemere :as t] 7 | [plauna.messaging :as messaging] 8 | [plauna.core.events :as events] 9 | [plauna.files :as files]) 10 | (:import [java.io File])) 11 | 12 | (set! *warn-on-reflection* true) 13 | 14 | (def custom-config-location (ref nil)) 15 | 16 | (def database-file "email.db") 17 | 18 | (defn default-config-location [] "~/.config/plauna.edn") 19 | 20 | (defn set-custom-config-location! [location] 21 | (if (some? location) 22 | (do (t/log! :info ["Setting custom config location to:" location]) 23 | (dosync (alter custom-config-location (fn [_ l] l) location))) 24 | (t/log! :info ["No config file was supplied. Using the default config location:" (default-config-location)]))) 25 | 26 | (defn expand-home [^String s] 27 | (if (.startsWith s "~") 28 | (clojure.string/replace-first s "~" (System/getProperty "user.home")) 29 | s)) 30 | 31 | (defn config-location [] (if (some? @custom-config-location) 32 | (expand-home @custom-config-location) 33 | (expand-home (default-config-location)))) 34 | 35 | (defn config [] (edn/read-string (slurp (config-location)))) 36 | 37 | (defn file-dir [] 38 | (let [configured-location (expand-home (:data-folder (config)))] 39 | (if (some? configured-location) 40 | configured-location 41 | (expand-home "~/.local/state/plauna")))) 42 | 43 | (defn check-and-create-database-file [] 44 | (let [db-file (io/file (file-dir) database-file)] 45 | (if (.exists db-file) 46 | nil 47 | (do (.mkdirs (io/file (file-dir))) 48 | (.createNewFile db-file))))) 49 | 50 | (defn delete-database-file [] 51 | (let [db-file (io/file (file-dir) database-file)] 52 | (if (.exists db-file) 53 | (io/delete-file db-file) 54 | nil))) 55 | 56 | (defn path-to-db-file [] 57 | (str (io/file (file-dir) database-file))) 58 | 59 | (defn training-file [language] 60 | (let [file (io/file (file-dir) (str "train-" language ".train"))] 61 | (if (.exists file) 62 | file 63 | (do (.createNewFile file) 64 | file)))) 65 | 66 | (defn files-with-type [type] 67 | (let [type-string (type {:model ".bin" :train ".train"})] 68 | (->> (filter #(and (.isFile ^File %) 69 | (.endsWith (.getName ^File %) type-string) 70 | (.startsWith (.getName ^File %) "train")) 71 | (file-seq (clojure.java.io/file (file-dir)))) 72 | (map (fn [f] (when (.isFile ^File f) 73 | {:file f 74 | :language (subs (. ^File f getName) 6 9)})))))) 75 | 76 | (defn training-files [] (files-with-type :train)) 77 | 78 | (defn model-files [] (files-with-type :model)) 79 | 80 | (defn model-file "Returns the model file for the language specified." 81 | [^String language] ^File (io/file (file-dir) (str "train-" language ".bin"))) 82 | 83 | (defn delete-files-with-type [type] 84 | (case type 85 | :model (doseq [file (model-files)] (io/delete-file (:file file))) 86 | :train (doseq [file (training-files)] (io/delete-file (:file file))))) 87 | 88 | (defn write-to-training-file 89 | [language data] 90 | (spit (training-file language) data :append true)) 91 | 92 | (defn email-start? [line] 93 | (and 94 | (not (nil? line)) 95 | (string/starts-with? line "From "))) 96 | 97 | (defn read-mail-lines 98 | [fn sq acc] 99 | (loop [fn fn sq sq acc acc] 100 | (let [line (first sq)] 101 | (if (and (email-start? line) (not (nil? (peek acc)))) 102 | (do 103 | (fn acc) 104 | (recur fn (rest sq) [line "\r\n"])) 105 | (if (nil? line) 106 | (do 107 | (fn acc) 108 | nil) 109 | (recur fn (rest sq) (conj acc line "\r\n"))))))) 110 | 111 | (defn read-emails-from-mbox 112 | "Reads the e-mails from an mbox (as input channel) and puts them in a :received-email event as byte arrays. 113 | 114 | Currently always adds the option :enrich" 115 | [mbox-is channel] 116 | (t/log! :info ["Starting to read from mbox"]) 117 | (with-open [rdr (clojure.java.io/reader mbox-is)] 118 | (let [limiter (messaging/channel-limiter :parsed-enrichable-email)] 119 | (read-mail-lines 120 | (fn [email-string] 121 | (async/>!! limiter :token) 122 | (async/>!! channel 123 | ((comp 124 | (fn [mail-string] (events/create-event :received-email mail-string {:enrich true})) 125 | #(.getBytes ^String %) 126 | #(apply str %)) email-string))) 127 | (line-seq rdr) 128 | []))) 129 | (t/log! :info ["Finished reading mbox."])) 130 | 131 | (defn file-exists? [path] (.exists ^File (io/file path))) 132 | 133 | (defn config-from-file [path] (if (file-exists? path) 134 | (do (files/set-custom-config-location! path) 135 | (edn/read-string (slurp path))) 136 | (throw (t/error! (ex-info "Provided config file at does not exist. Exiting application." {:path path}))))) 137 | 138 | (defn config-from-default-location [] 139 | (if (file-exists? (config-location)) 140 | (config) 141 | (throw (t/error! (ex-info "Tried reading config from default location but result was nil." {:path (config-location)}))))) 142 | 143 | (defmulti parse-cli-arg (fn [arg] (first (string/split arg #"=")))) 144 | (defmethod parse-cli-arg "--config-file" [arg-string] {:config-file (second (string/split arg-string #"="))}) 145 | (defmethod parse-cli-arg :default [arg-string] 146 | (t/log! :info ["Received non Plauna specific argument" arg-string "- Doing nothing."]) 147 | nil) 148 | 149 | (defn parse-config-from-cli-arguments [cli-args] 150 | (let [arguments (reduce (fn [acc val] (conj acc (parse-cli-arg val))) {} cli-args)] 151 | (cond (some? (:config-file arguments)) (config-from-file (:config-file arguments)) 152 | (nil? (:config-file arguments)) (config-from-default-location)))) 153 | -------------------------------------------------------------------------------- /src/plauna/markup.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.markup 2 | (:require [clojure.data.json :as json] 3 | [selmer.parser :refer [render-file set-resource-path!]] 4 | [selmer.filters :refer [add-filter!]] 5 | [clojure.java.io :as io]) 6 | (:import 7 | (java.time LocalDateTime ZoneOffset))) 8 | 9 | (set! *warn-on-reflection* true) 10 | 11 | (set-resource-path! (io/resource "templates")) 12 | 13 | (comment 14 | (selmer.parser/cache-off!)) 15 | 16 | (defn timestamp->date [timestamp] 17 | (if (nil? timestamp) 18 | nil 19 | (. LocalDateTime ofEpochSecond timestamp 0 ZoneOffset/UTC))) 20 | 21 | (defn type->toast-role [message] 22 | (cond 23 | (= :alert (:type message)) (conj message {:path "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 11.793a1 1 0 1 1-1.414 1.414L10 11.414l-2.293 2.293a1 1 0 0 1-1.414-1.414L8.586 10 6.293 7.707a1 1 0 0 1 1.414-1.414L10 8.586l2.293-2.293a1 1 0 0 1 1.414 1.414L11.414 10l2.293 2.293Z" 24 | :color "text-red-500" 25 | :bg-color "bg-red-100" 26 | :id (str "toast-" (hash message))}) 27 | (= :success (:type message)) (conj message {:path "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5Zm3.707 8.207-4 4a1 1 0 0 1-1.414 0l-2-2a1 1 0 0 1 1.414-1.414L9 10.586l3.293-3.293a1 1 0 0 1 1.414 1.414Z" 28 | :color "text-green-500" 29 | :bg-color "bg-green-100" 30 | :id (str "toast-" (hash message))}) 31 | (= :info (:type message)) (conj message {:path "M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM10 15a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-4a1 1 0 0 1-2 0V6a1 1 0 0 1 2 0v5Z" 32 | :color "text-orange-500" 33 | :bg-color "bg-orange-100" 34 | :id (str "toast-" (hash message))}) 35 | :else message)) 36 | 37 | (defn administration 38 | ([] (render-file "admin.html" {:active-nav :admin})) 39 | ([messages] (render-file "admin.html" {:messages (mapv type->toast-role messages) :active-nav :admin}))) 40 | 41 | (defn concat-string [contact] 42 | (if (nil? (:name contact)) 43 | (:address contact) 44 | (str (:name contact) " - " (:address contact)))) 45 | 46 | (defn concat-contacts 47 | ([key contacts] 48 | (->> contacts 49 | (filter (fn [contact] (= (:type contact) key))) 50 | (reduce (fn [acc el] 51 | (if (empty? acc) 52 | (str acc (concat-string el)) 53 | (str acc ", " (concat-string el)))) 54 | ""))) 55 | ([contacts] 56 | (reduce (fn [acc el] (if (empty? acc) 57 | (str acc (:address el)) 58 | (str acc ", " (:address el)))) "" contacts))) 59 | 60 | (add-filter! :concat-senders (partial concat-contacts :sender)) 61 | 62 | (add-filter! :concat-receivers (partial concat-contacts :receiver)) 63 | 64 | (add-filter! :concat-cc (partial concat-contacts :cc)) 65 | 66 | (add-filter! :concat-bcc (partial concat-contacts :bcc)) 67 | 68 | (add-filter! :iconize (fn [pred] (if pred "✓" "⤫"))) 69 | 70 | (add-filter! :double-format-nillable (fn [n & [decimal-places]] 71 | (if (nil? n) 72 | 0 73 | (let [n (double n)] 74 | (format (str "%." (if decimal-places decimal-places "1") "f") 75 | n))))) 76 | 77 | (defn list-emails 78 | ([emails page-info categories] 79 | (let [last-page {:last-page (quot (:total page-info) (:size page-info))} 80 | emails-with-java-date (map #(update-in % [:header :date] timestamp->date) emails)] 81 | (render-file "emails.html" {:emails emails-with-java-date :page (conj page-info last-page) :header "Emails" :categories categories :active-nav :emails}))) 82 | ([emails page-info categories messages] 83 | (let [last-page {:last-page (quot (:total page-info) (:size page-info))} 84 | emails-with-java-date (map #(update-in % [:header :date] timestamp->date) emails)] 85 | (render-file "emails.html" {:emails emails-with-java-date :page (conj page-info last-page) :header "Emails" :categories categories :messages (mapv type->toast-role messages) :active-nav :emails})))) 86 | 87 | (defn list-email-contents [email-data categories] 88 | (render-file "email.html" {:email (update-in email-data [:header :date] timestamp->date) :categories categories :active-nav :emails})) 89 | 90 | (defn statistics-contacts [intervals top-from interval-from] 91 | (let [vega-interval-from {:data {:values interval-from} 92 | :mark "bar" 93 | :transform [{:filter "datum.interval != null"} 94 | {:aggregate [{:op "sum" :field :count :as :sum}] :groupby [:interval]}] 95 | :encoding {:y {:field :sum :type "quantitative"} 96 | :x {:field :interval :type "ordinal" :axis {:labelOverlap "parity" :labelSeparation 10}} 97 | :tooltip {:field :sum :type "nominal"}}}] 98 | (render-file "statistics.html" 99 | {:interval-filter (conj {:url "/statistics/contacts"} intervals) 100 | :statistics 101 | [{:type :table :header "Top 10 Senders of E-Mails" :data {:headers ["Count" "Address"] :values (map vals top-from)}} 102 | {:type :bar-chart :header "Number of Senders per Interval" :id "senders" :json-data (json/write-str vega-interval-from)}]}))) 103 | 104 | (defmacro pie-chart [data-values key key-label description] 105 | `{:data {:values ~data-values} 106 | :description ~description 107 | :width :container 108 | :transform [{:joinaggregate [{:op "sum" :field :count :as :total}]} 109 | {:calculate (str "datum.count / datum.total < 0.1 ? 'others' : datum['" ~key "']") :as (keyword ~key)} 110 | {:aggregate [{:op :sum :field :count :as :count}] :groupby [(keyword ~key)]}] 111 | :layer [{:mark {:type :arc :outerRadius 10 :stroke "#fff"}} 112 | {:mark {:type :text :radiusOffset 30} 113 | :encoding {:text {:field (keyword ~key) :type "nominal"}}}] 114 | :encoding {:theta {:field :count :type "quantitative" :stack true} 115 | :radius {:field :count :scale {:type :sqrt :zero true :rangeMin 15}} 116 | :color {:field (keyword ~key) :legend nil} 117 | :tooltip [{:field (keyword ~key) :type "nominal" :title ~key-label} 118 | {:field :count :type "nominal" :title "Count"}]} 119 | :config {:background nil}}) 120 | 121 | (defn statistics-types [overview-map yearly-mime-types] 122 | (let [overall-pie (pie-chart overview-map 'mime-type "MIME TYPE" "MIME Types Overview") 123 | 124 | vega-most-common {:data {:values yearly-mime-types} 125 | :description "Most common mime types" 126 | :width :container 127 | :mark "bar" 128 | :transform [{:aggregate [{:op "sum" :field :count :as :sum}] :groupby [:mime-type]} 129 | {:window [{:op "rank" :as :rank}] :sort [{:field :sum :order "descending"}]} 130 | {:filter "datum.rank <= 50"}] 131 | :encoding {:x {:field :mime-type :type "nominal" :title "Mime types" :sort "-y"} 132 | :y {:field :sum :type "quantitative" :title "Count"} 133 | :tooltip [{:field :sum :type "quantitative"} {:field :mime-type :type "nominal"}]} 134 | :config {:background nil}} 135 | vega-least-common {:data {:values yearly-mime-types} 136 | :description "Least common mime types" 137 | :width :container 138 | :mark "bar" 139 | :transform [{:aggregate [{:op "sum" :field :count :as :sum}] :groupby [:mime-type]} 140 | {:filter "datum.sum <= 50"}] 141 | :encoding {:x {:field :mime-type :type "nominal" :title "Mime types" :sort "-y"} 142 | :y {:field :sum :type "quantitative" :title "Count"} 143 | :tooltip [{:field :sum :type "quantitative"} {:field :mime-type :type "nominal"}]} 144 | :config {:background nil}}] 145 | (render-file "statistics.html" 146 | {:statistics 147 | [{:type :bar-chart :header "MIME Types Overview" :id "overview" :json-data (json/write-str overall-pie)} 148 | {:type :bar-chart :header "Most Common MIME Types" :id "most-common" :json-data (json/write-str vega-most-common)} 149 | {:type :bar-chart :header "Least Common MIME Types" :id "least-common" :json-data (json/write-str vega-least-common)}] 150 | :active-tab :types 151 | :active-nav :statistics 152 | :no-data (empty? overview-map)}))) 153 | 154 | (defn statistics-languages [languages-overall yearly-languages] 155 | (let [overall-languages (pie-chart languages-overall 'language "Language" "Languages Overview") 156 | yearly-data {:data {:values yearly-languages} 157 | :mark {:type "bar" :tooltip true} 158 | :width :container 159 | :encoding {:y {:field :count :type "quantitative"} 160 | :x {:field :interval :type "ordinal" :axis {:labelOverlap "parity" :labelSeparation 10}} 161 | :color {:field :language :type "nominal" :scale {:scheme "category20c"}} 162 | :text {:field :language :type "nominal" :scale {:scheme "category20c"}}} 163 | :config {:background nil}}] 164 | (render-file "statistics.html" 165 | {:statistics [{:type :bar-chart :header "Languages Overview" :id "languages-overview" :json-data (json/write-str overall-languages)} 166 | {:type :bar-chart :header "Yearly Languages" :id "languages" :json-data (json/write-str yearly-data)}] 167 | :active-tab :languages 168 | :active-nav :statistics 169 | :no-data (empty? languages-overall)}))) 170 | 171 | (defn statistics-categories [categories-overall yearly-categories] 172 | (let [overall-categories (pie-chart categories-overall 'category "Category" "Categories Overview") 173 | vega-data {:data {:values yearly-categories} 174 | :mark {:type "bar" :tooltip true} 175 | :width :container 176 | :transform [{:filter "datum.interval != null"}] 177 | :encoding {:y {:field :count :type "quantitative"} 178 | :x {:field :interval :type "ordinal" :axis {:labelOverlap "parity" :labelSeparation 10}} 179 | :color {:field :category :type "nominal"}} 180 | :config {:background nil}}] 181 | (render-file "statistics.html" {:statistics [{:type :bar-chart :header "Categories Overview" :id "cat-overview" :json-data (json/write-str overall-categories)} 182 | {:type :bar-chart :header "Yearly Categories" :id "categories" :json-data (json/write-str vega-data)}] 183 | :active-tab :categories 184 | :active-nav :statistics 185 | :no-data (empty? categories-overall)}))) 186 | 187 | (defn statistics-overall [yearly-emails] 188 | (let [vega-data {:data {:values yearly-emails} 189 | :mark "bar" 190 | :width :container 191 | :transform [{:filter "datum.date != null"}] 192 | :encoding {:y {:field :count :type "quantitative"} 193 | :x {:field :date :type "ordinal" :axis {:labelOverlap "parity" :labelSeparation 10}} 194 | :tooltip {:field :count :type "quantitative"}} 195 | :config {:background nil}}] 196 | (render-file "statistics.html" {:statistics [{:type :bar-chart :header "Yearly Emails" :id "emails" :json-data (json/write-str vega-data)}] 197 | :active-nav :statistics 198 | :no-data (empty? yearly-emails)}))) 199 | 200 | (defn categories-page [categories] (render-file "admin-categories.html" {:categories categories :active-nav :admin})) 201 | 202 | (defn languages-admin-page [language-preferences] 203 | (render-file "admin-languages.html" {:language-preferences language-preferences :active-nav :admin})) 204 | 205 | (defn watcher-list [clients] 206 | (let [watchers (mapv (fn [client] {:id (first client) :logged-in (-> client second :connected) :folder-open (-> client second :folder) :string (str (-> client (nth 2) :config :host) " - " (-> client (nth 2) :config :user))}) clients)] 207 | (render-file "watchers.html" {:watchers watchers :active-nav :connections}))) 208 | 209 | (defn watcher 210 | ([id config folders] (render-file "watcher.html" {:id id :host (:host config) :user (:user config) :folders folders :active-nav :connections})) 211 | ([id client folders messages] (render-file "watcher.html" {:id id :host (:host client) :user (:user client) :folders folders :messages (mapv type->toast-role messages) :active-nav :connections}))) 212 | 213 | (defn preferences-page [data] (let [log-levels {:log-level-options [{:key :error :name "Error"} {:key :info :name "Info"} {:key :debug :name "Debug"}] :active-nav :admin}] 214 | (render-file "admin-preferences.html" (conj data log-levels)))) 215 | -------------------------------------------------------------------------------- /src/plauna/messaging.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.messaging 2 | (:require [clojure.core.async :refer [chan pub sub bytes [is] 20 | (let [baos (java.io.ByteArrayOutputStream.)] 21 | (copy is baos) 22 | (.toByteArray baos))) 23 | 24 | (defn uuid [^String name] (str (java.util.UUID/nameUUIDFromBytes (.getBytes name)))) 25 | 26 | (defn parse-participants [message-id type mailbox] 27 | (cond (instance? Mailbox mailbox) 28 | {:name (.getName ^Mailbox mailbox) 29 | :address (.getAddress ^Mailbox mailbox) 30 | :contact-key (uuid (str (.getName ^Mailbox mailbox) (.getAddress ^Mailbox mailbox))) 31 | :message-id message-id 32 | :type type} 33 | (instance? Group mailbox) (map (fn [inner-mailbox] (parse-participants message-id type inner-mailbox)) (.getMailboxes ^Group mailbox)) 34 | :else (do (t/log! :error ["Wrong type of mailbox for participants" (class mailbox)]) 35 | {:name "n/a" :address "n/a" :contact-key nil :message-id message-id :type type}))) 36 | 37 | (defn message-id [^Message message] 38 | (let [message-id (.getMessageId message)] 39 | (if (some? message-id) 40 | (st/trim message-id) 41 | ""))) 42 | 43 | (defn detect-utf8 44 | "Some emails have the charset uft-8 in quotation marks or escaped like \\UTF-8 45 | which throws UnsupportedEncodingException. This function tries to 'sanitize' 46 | those poorly formatted utf-8 declarations." 47 | [^String charset-string] (if (.equalsIgnoreCase charset-string "utf-8") 48 | charset-string 49 | (if (some? (re-matches #"(?i).*utf-8.*" charset-string)) 50 | "utf-8" 51 | charset-string))) 52 | 53 | (defn decode-body [^BinaryBody body] 54 | (try (new String ^bytes (stream->bytes (.getInputStream body))) 55 | (catch java.io.UnsupportedEncodingException e (t/log! {:level :error :error e} (.getMessage e)) (new String ^bytes (stream->bytes (.getInputStream body)))))) 56 | 57 | (defn reader->string [^Reader reader] 58 | (with-open [r reader] (slurp r))) 59 | 60 | (defmulti parse-body-content (fn [body] (type body))) 61 | 62 | (defmethod parse-body-content TextBody [^TextBody text-body] (reader->string (.getReader text-body))) 63 | 64 | (defmethod parse-body-content BinaryBody [^BinaryBody binary-body] (decode-body binary-body)) 65 | 66 | (defmulti parse-body (fn [_ _ message] (type message))) 67 | 68 | (defmethod parse-body MessageImpl [message-id bodies ^Message message] 69 | (let [body (.getBody message)] 70 | (if (instance? MultipartImpl body) 71 | (parse-body message-id bodies body) 72 | (conj bodies 73 | {:mime-type (.getMimeType message) 74 | :charset (.getCharset message) 75 | :message-id message-id 76 | :transfer-encoding (.getContentTransferEncoding message) 77 | :content (when (core-email/text-content? (.getMimeType message)) (parse-body-content body)) 78 | :content-disposition (.getDispositionType message) 79 | :filename (.getFilename message)})))) 80 | 81 | (defmethod parse-body MultipartImpl [message-id bodies ^Multipart message] 82 | (let [body-parts (.getBodyParts message) 83 | results (conj bodies (mapv #(parse-body message-id bodies %) body-parts))] 84 | results)) 85 | 86 | (defmethod parse-body BodyPart [message-id bodies ^BodyPart body-part] 87 | (let [body (.getBody body-part)] 88 | (if (or (instance? MultipartImpl body) (instance? MessageImpl body)) 89 | (parse-body message-id bodies body) 90 | (conj bodies 91 | {:mime-type (.getMimeType body-part) 92 | :charset (.getCharset body-part) 93 | :message-id message-id 94 | :transfer-encoding (.getContentTransferEncoding body-part) 95 | :content (when (core-email/text-content? (.getMimeType body-part)) (parse-body-content (.getBody body-part))) 96 | :content-disposition (.getDispositionType body-part) 97 | :filename (.getFilename body-part)})))) 98 | 99 | (defn parse-date [^MessageImpl message] 100 | (let [date (.getDate message)] 101 | (if (nil? date) 102 | nil 103 | (quot (.getTime date) 1000)))) 104 | 105 | (defn parse-headers [^MessageImpl message] 106 | (t/log! :debug "Parsing headers.") 107 | (let [in-reply-to-field ^Field (.getField ^Header (.getHeader message) "In-Reply-To")] 108 | {:message-id (message-id message) 109 | :in-reply-to (if (some? in-reply-to-field) (.getBody in-reply-to-field) nil) 110 | :subject (.getSubject message) 111 | :date (parse-date message) 112 | :mime-type (.getMimeType message)})) 113 | 114 | (defn parse-email [^InputStream is] 115 | (t/log! :debug "Parsing new email from input stream") 116 | (let [mime-config (-> (MimeConfig$Builder.) 117 | (.setMaxLineLen -1) 118 | (.setMaxHeaderLen -1) 119 | (.setMaxHeaderCount -1) 120 | (.build)) 121 | message-builder (let [builder (DefaultMessageBuilder.)] 122 | (.setMimeEntityConfig builder mime-config) builder) 123 | message (.parseMessage message-builder is) 124 | headers (parse-headers message) 125 | senders (flatten (map (partial parse-participants (message-id message) :sender) (.getFrom message))) 126 | receivers (flatten (map (partial parse-participants (message-id message) :receiver) (.getTo message))) 127 | cc (flatten (map (partial parse-participants (message-id message) :cc) (.getCc message))) 128 | bcc (flatten (map (partial parse-participants (message-id message) :bcc) (.getBcc message))) 129 | participants (concat senders receivers cc bcc)] 130 | (construct-email headers (flatten (parse-body (:message-id headers) [] message)) participants))) 131 | 132 | (defn with-message-id? [parsed-email] 133 | (let [message-id (get-in parsed-email [:header :message-id])] 134 | (if (or (nil? message-id) (empty? message-id)) 135 | (do (t/log! {:level :error} ["Dropping parsed-email with headers" (into {} (:header parsed-email)) "Reason: message-id is empty"]) 136 | false) 137 | true))) 138 | 139 | (defn parsed-email-event [original-event parsed-email] 140 | (if (true? (:enrich (:options original-event))) 141 | (events/create-event :parsed-enrichable-email parsed-email nil original-event) 142 | (events/create-event :parsed-email parsed-email nil original-event))) 143 | 144 | (defn parser-event-loop 145 | "Listens to :received-email. 146 | 147 | Options: 148 | :enrich - boolean 149 | 150 | If :enrich is true, emits a :parsed-enrichable-email event. Otherwise emits a :parsed-email event." 151 | [publisher events-channel] 152 | (let [local-channel (chan 256)] 153 | (sub publisher :received-email local-channel) 154 | (async/pipeline 4 155 | events-channel 156 | (comp (map (fn [event] [event (:payload event)])) 157 | (map (fn [[original-event payload]] [original-event (parse-email (input-stream payload))])) 158 | (filter (fn [[_ parsed-email]] (with-message-id? parsed-email))) 159 | (map (fn [[original-event parsed-email]] (parsed-email-event original-event parsed-email)))) 160 | local-channel 161 | true 162 | (fn [^Throwable th] 163 | (t/log! {:level :error :error th} (.getMessage th)) 164 | (.printStackTrace th))))) 165 | -------------------------------------------------------------------------------- /src/plauna/preferences.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.preferences 2 | (:require 3 | [clojure.core.cache.wrapped :as w] 4 | [plauna.database :as db])) 5 | 6 | (def cache (w/ttl-cache-factory {} :ttl 6000)) 7 | 8 | (def fetch-fn (atom db/fetch-preference)) 9 | 10 | (def converters {clojure.lang.Keyword (fn [^String s] (keyword (.substring s 1))) 11 | java.lang.Double Double/parseDouble 12 | java.lang.Long Long/parseLong}) 13 | 14 | (defmacro preference-with-default [property pred default] 15 | `(let [value# (~pred (@fetch-fn ~property) ~default) 16 | default-type# (class ~default) 17 | type# (class value#)] 18 | (if (= default-type# type#) 19 | value# 20 | ((get converters default-type#) value#)))) 21 | 22 | (defn update-preference [key value] 23 | (db/update-preference key value) 24 | (w/evict cache key)) 25 | 26 | (defn log-level [] (w/lookup-or-miss cache 27 | :log-level 28 | (fn [key] (preference-with-default key or :info)))) 29 | 30 | (defn language-detection-threshold [] (w/lookup-or-miss cache 31 | :language-detection-threshold 32 | (fn [key] (preference-with-default key or 0.80)))) 33 | 34 | (defn categorization-threshold [] (w/lookup-or-miss cache 35 | :categorization-threshold 36 | (fn [key] (preference-with-default key or 0.65)))) 37 | 38 | (defn client-health-check-interval [] (w/lookup-or-miss cache 39 | :client-health-check-interval 40 | (fn [key] (preference-with-default key or 60)))) 41 | -------------------------------------------------------------------------------- /src/plauna/specs.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.specs 2 | (:require [clojure.spec.alpha :as s])) 3 | 4 | (s/check-asserts true) 5 | 6 | (s/def ::message-id string?) ; Delivery error emails don't have message-ids 7 | 8 | (s/def ::in-reply-to (s/or :string string? :nil nil?)) 9 | 10 | (s/def ::subject (s/or :string string? :nil nil?)) 11 | 12 | (s/def ::mime-type string?) 13 | 14 | ;(s/def ::address #(re-matches #".+\@.+\..+" %)) 15 | (s/def ::address string?) 16 | 17 | (s/def ::direction #(or (= :from %) (= :to %))) 18 | 19 | (s/def ::participant-type #(or (= :sender %) (= :receiver %))) 20 | 21 | (s/def ::date number?) ; Chat messages don't have date 22 | 23 | (s/def ::charset string?) 24 | 25 | (s/def ::transfer-encoding string?) 26 | 27 | (s/def ::content (s/or :string string? :nil nil?)) 28 | 29 | (s/def ::name (s/or :string string? :nil nil?)) 30 | 31 | (s/def ::contact-key string?) 32 | 33 | (s/def ::contact (s/keys :req-un [::name ::address ::direction ::contact-key])) 34 | 35 | (s/def ::participant (s/keys :req-un [::name ::address ::type ::message-id ::contact-key])) 36 | 37 | (s/def ::senders (s/or :s (s/coll-of ::contact) :n empty?)) 38 | 39 | (s/def ::recipients (s/or :s (s/coll-of ::contact) :n empty?)) 40 | 41 | (s/def ::participants (s/coll-of ::participant :min-count 2)) 42 | 43 | (s/def ::headers (s/keys :req-un [::message-id ::in-reply-to ::subject ::senders ::recipients ::mime-type ::date])) 44 | 45 | (s/def ::body-part (s/keys :req-un [::message-id ::charset ::mime-type ::transfer-encoding 46 | ::content ::filename])) 47 | 48 | (s/def ::body (s/coll-of ::body-part :min-count 1)) 49 | 50 | (s/def ::email (s/keys :req-un [::body ::headers])) 51 | -------------------------------------------------------------------------------- /src/plauna/util/async.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.util.async 2 | (:require [clojure.core.async :refer [timeout alts! alts!!]])) 3 | 4 | (defmacro fetch-or-timeout! 5 | "Non-blocking. Either fetch a value from the channel or timeout after x milliseconds." 6 | [chan milliseconds] 7 | `(let [timeout# (timeout ~milliseconds) 8 | [value# port#] (alts! [~chan timeout#])] 9 | (if (= ~chan port#) 10 | value# 11 | :timed-out))) 12 | 13 | (defmacro fetch-or-timeout!! 14 | "Blocking. Either fetch a value from the channel or timeout after x milliseconds." 15 | [chan milliseconds] 16 | `(let [timeout# (timeout ~milliseconds) 17 | [value# port#] (alts!! [~chan timeout#])] 18 | (if (= ~chan port#) 19 | value# 20 | :timed-out))) 21 | -------------------------------------------------------------------------------- /src/plauna/util/errors.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.util.errors) 2 | 3 | (defn return-error [error] 4 | {:error error}) 5 | 6 | (defn return-success [success] 7 | {:success success}) 8 | 9 | (defn nil-or-success [result] 10 | (if (nil? result) 11 | (return-error "The result was nil.") 12 | (return-success result))) 13 | 14 | (defn error-check [result success-fn error-fn] 15 | (if (some? (:error result)) 16 | (error-fn (:error result)) 17 | (success-fn (:success result)))) 18 | -------------------------------------------------------------------------------- /src/plauna/util/page.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.util.page) 2 | 3 | (defrecord PageRequest [page size]) 4 | 5 | (defn page-request [page size] 6 | (->PageRequest (or page 1) (or size 10))) 7 | 8 | (defn page-request->limit-offset [page-request] 9 | {:limit (:size page-request) :offset (* (:size page-request) (dec (:page page-request)))}) 10 | -------------------------------------------------------------------------------- /src/plauna/util/text_transform.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.util.text-transform 2 | (:require 3 | [clojure.java.io :refer [input-stream]]) 4 | (:import 5 | (org.jsoup Jsoup) 6 | (javax.swing.text.rtf RTFEditorKit))) 7 | 8 | (defn html->text [^String html] (.text (Jsoup/parse html "UTF-8"))) 9 | 10 | (defn rtf->string [^String rtf] 11 | (let [rtf-parser (new RTFEditorKit) 12 | document (.createDefaultDocument rtf-parser)] 13 | (.read rtf-parser (input-stream (.getBytes rtf)) document 0) 14 | (.getText document 0 (.getLength document)))) 15 | 16 | (defn clean-text-content [content content-type] 17 | (cond (= :html content-type) (html->text content) 18 | (= :rtf content-type) (rtf->string content) 19 | :else (html->text content))) 20 | -------------------------------------------------------------------------------- /test/plauna/analysis_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.analysis-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.string :as s] 4 | [clojure.java.io :as io] 5 | [plauna.analysis :as analysis])) 6 | 7 | ;; Normalization tests 8 | 9 | (deftest normalization-1 10 | (let [res (analysis/normalize (slurp (io/resource "test/normalization/original-text-1.txt")))] 11 | (is (= (s/trim (slurp (io/resource "test/normalization/normalized-text-1.txt"))) res)))) 12 | -------------------------------------------------------------------------------- /test/plauna/client_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.client-test 2 | (:require [clojure.test :refer :all] 3 | [plauna.client :as client]) 4 | (:import (java.util Properties) 5 | (jakarta.mail Session))) 6 | 7 | (deftest ssl-properties-set-correctly 8 | (let [session ^Session (client/config->session {:security :ssl :port 993}) 9 | expected-properties (doto (new Properties) 10 | (.setProperty "mail.imap.ssl.enable", "true") 11 | (.setProperty "mail.imap.port", "993") 12 | (.setProperty "mail.imap.usesocketchannels" "true") 13 | (.setProperty "mail.imap.timeout" "5000") 14 | (.setProperty "mail.imap.partialfetch" "false") 15 | (.setProperty "mail.imap.fetchsize" "1048576"))] 16 | (is (= expected-properties (.getProperties session))))) 17 | 18 | (deftest starttls-properties-set-correctly 19 | (let [session ^Session (client/config->session {:security :starttls :port 143}) 20 | expected-properties (doto (new Properties) 21 | (.setProperty "mail.imap.starttls.enable", "true") 22 | (.setProperty "mail.imap.port", "143") 23 | (.setProperty "mail.imap.usesocketchannels" "true") 24 | (.setProperty "mail.imap.timeout" "5000") 25 | (.setProperty "mail.imap.partialfetch" "false") 26 | (.setProperty "mail.imap.fetchsize" "1048576"))] 27 | (is (= expected-properties (.getProperties session))))) 28 | 29 | (deftest plain-text-properties-set-correctly 30 | (let [session ^Session (client/config->session {:security :plain :port 143}) 31 | expected-properties (doto (new Properties) 32 | (.setProperty "mail.imap.usesocketchannels" "true") 33 | (.setProperty "mail.imap.port", "143") 34 | (.setProperty "mail.imap.timeout" "5000") 35 | (.setProperty "mail.imap.partialfetch" "false") 36 | (.setProperty "mail.imap.fetchsize" "1048576"))] 37 | (is (= expected-properties (.getProperties session))))) 38 | 39 | (deftest empty-values-return-ssl 40 | (let [session ^Session (client/config->session {}) 41 | expected-properties (doto (new Properties) 42 | (.setProperty "mail.imap.ssl.enable", "true") 43 | (.setProperty "mail.imap.port", "993") 44 | (.setProperty "mail.imap.usesocketchannels" "true") 45 | (.setProperty "mail.imap.timeout" "5000") 46 | (.setProperty "mail.imap.partialfetch" "false") 47 | (.setProperty "mail.imap.fetchsize" "1048576"))] 48 | (is (= expected-properties (.getProperties session))))) 49 | 50 | (deftest non-compliant-security-values-return-ssl 51 | (let [session ^Session (client/config->session {:security :does-not-exist}) 52 | expected-properties (doto (new Properties) 53 | (.setProperty "mail.imap.ssl.enable", "true") 54 | (.setProperty "mail.imap.port" "993") 55 | (.setProperty "mail.imap.usesocketchannels" "true") 56 | (.setProperty "mail.imap.timeout" "5000") 57 | (.setProperty "mail.imap.partialfetch" "false") 58 | (.setProperty "mail.imap.fetchsize" "1048576"))] 59 | (is (= expected-properties (.getProperties session))))) 60 | 61 | (deftest debug-false-on-default 62 | (let [session ^Session (client/config->session {})] 63 | (is (= false (.getDebug session))))) 64 | 65 | (deftest debug-can-be-set-true 66 | (let [session ^Session (client/config->session {:debug true})] 67 | (is (= true (.getDebug session))))) 68 | 69 | (deftest set-cert-checks-to-false 70 | (let [session ^Session (client/config->session {:security :ssl :check-ssl-certs false})] 71 | (is (= "*" (.getProperty session "mail.imap.ssl.trust"))))) 72 | -------------------------------------------------------------------------------- /test/plauna/core/email_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.core.email-test 2 | (:require [clojure.test :refer :all] 3 | [plauna.core.email :as core.email])) 4 | 5 | (defn create-mock-header [] 6 | (core.email/->Header "test123" nil "test subject" "multipart" 1123)) 7 | 8 | (defn create-body-part [mime-type content] 9 | (core.email/->Body-Part "test123" "utf-8" mime-type "test" content nil nil)) 10 | 11 | (defn create-email-with-fake-body [mimetype-content-tuples] 12 | (core.email/->Email (create-mock-header) (map (fn [tuple] (create-body-part (first tuple) (second tuple))) mimetype-content-tuples) [])) 13 | 14 | (deftest training-content-single-body-part 15 | (let [email (core.email/->Email (create-mock-header) 16 | [(create-body-part "text/html" "Simple content")] 17 | []) 18 | training-data (core.email/body-part-for-mime-type "text/html" email)] 19 | (testing 20 | (is (= (:message-id training-data) "test123")) 21 | (is (= (:content training-data) "Simple content"))))) 22 | 23 | (deftest training-content-single-body-part-wrong-mime-type 24 | (let [email (core.email/->Email (create-mock-header) 25 | [(create-body-part "text/html" "Simple content")] 26 | []) 27 | training-data (core.email/body-part-for-mime-type "text/text" email)] 28 | (testing 29 | (is (= (:message-id training-data) "test123")) 30 | (is (= (:content training-data) "Simple content"))))) 31 | 32 | (deftest training-content-multiple-body-parts 33 | (let [email (core.email/->Email (create-mock-header) 34 | [(create-body-part "text/plain" "Simple content") 35 | (create-body-part "text/html" "Better content")] 36 | []) 37 | training-data (core.email/body-part-for-mime-type "text/html" email)] 38 | (testing 39 | (is (= "test123" (:message-id training-data))) 40 | (is (= "Better content" (:content training-data)))))) 41 | 42 | (deftest training-content-empty-text-contents 43 | (let [mock-data (core.email/->Email (create-mock-header) [] []) 44 | result (core.email/body-part-for-mime-type "text/plain" mock-data)] 45 | (is (= nil result)))) 46 | 47 | (deftest training-content-single-text-contents 48 | (let [mock-data (create-email-with-fake-body [["text/plain" "test"]]) 49 | result (core.email/body-part-for-mime-type "text/plain" mock-data)] 50 | (is (= "text/plain" (:mime-type result))) 51 | (is (= "test" (:content result))))) 52 | 53 | (deftest trained-emails-single-text-contents-different-mimetype 54 | (let [mock-data (create-email-with-fake-body [["text/html" "test"]]) 55 | result (core.email/body-part-for-mime-type "text/plain" mock-data)] 56 | (is (= "text/html" (:mime-type result))) 57 | (is (= "test" (:content result))))) 58 | 59 | (deftest trained-emails-multiple-text-contents 60 | (let [mock-data (create-email-with-fake-body [["text/html" "test"] ["text/plain" "test2"]]) 61 | result (core.email/body-part-for-mime-type "text/plain" mock-data)] 62 | (is (= "text/plain" (:mime-type result))) 63 | (is (= "test2" (:content result))))) 64 | 65 | (deftest trained-emails-without-any-text 66 | (let [mock-data (create-email-with-fake-body [["image/jpg" "binary-data"] ["image/pn" "binary-data"]]) 67 | result (core.email/body-part-for-mime-type "text/plain" mock-data)] 68 | (is (= nil result)))) 69 | -------------------------------------------------------------------------------- /test/plauna/core/events_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.core.events-test 2 | (:require [plauna.core.events :as events] 3 | [clojure.core.async :as async] 4 | [clojure.test :as test])) 5 | 6 | (test/deftest return-key-on-complete-works 7 | (let [test-chan (async/chan) 8 | test-fn (fn [] (async/go (async/!! test-channel test-event)) 25 | (Thread/sleep 1000) 26 | (async/close! test-channel) 27 | (println "Done"))) 28 | 29 | (deftest enriched-email-simple 30 | (let [sql (db/data->sql {:entity :enriched-email :strict false})] 31 | (is (= "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers LEFT JOIN metadata ON headers.message_id = metadata.message_id" 32 | (first sql))))) 33 | 34 | (deftest enriched-email-simple-2 35 | (let [sql (db/data->sql {:entity :enriched-email :strict true})] 36 | (is (= "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers INNER JOIN metadata ON headers.message_id = metadata.message_id" 37 | (first sql))))) 38 | 39 | (deftest enriched-email-simple-3 40 | (let [sql (db/data->sql {:entity :enriched-email :strict true} {:where [:= :message-id "123"]})] 41 | (is (= "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers INNER JOIN metadata ON headers.message_id = metadata.message_id WHERE headers.message_id = ?" 42 | (first sql))))) 43 | 44 | (deftest enriched-email-simple-4 45 | (let [sql (db/data->sql {:entity :enriched-email :strict true} {:where [:and [:= :message-id "123"] [:<> :language nil] [:<> :category nil]]})] 46 | (is (= "SELECT headers.message_id, in_reply_to, subject, mime_type, date FROM headers INNER JOIN metadata ON headers.message_id = metadata.message_id WHERE (headers.message_id = ?) AND (metadata.language IS NOT NULL) AND (metadata.category IS NOT NULL)" 47 | (first sql))))) 48 | 49 | -------------------------------------------------------------------------------- /test/plauna/files_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.files-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.java.io :as io] 4 | [plauna.files :as files] 5 | [clojure.core.async :refer [is [resource-path] 8 | (io/input-stream (io/resource resource-path))) 9 | 10 | (deftest read-single-item-mbox 11 | (let [test-chan (chan 20)] 12 | (files/read-emails-from-mbox (resource->is "test/email_corpus/test-email-1.mbox") test-chan) 13 | (close! test-chan) 14 | (loop [result (is "test/email_corpus/weird-mbox.mbox") test-chan) 23 | (close! test-chan) 24 | (loop [result (!!] :as async])) 10 | 11 | (defn resource->is [resource-path] 12 | (io/input-stream (io/resource resource-path))) 13 | 14 | ;; Testing email parsing 15 | 16 | (deftest basic-parse-test 17 | (let [test-chan (chan) 18 | test-pub (pub test-chan :type) 19 | email-bytes (.getBytes ^String (slurp (io/resource "test/email_corpus/simple-lorem-ipsum.eml")))] 20 | (parser/parser-event-loop test-pub test-chan) 21 | (>!! test-chan {:type :received-email :options {} :payload email-bytes}) 22 | (let [results-chan (chan) 23 | _ (sub test-pub :parsed-email results-chan) 24 | parsed-mail (:payload (async-utils/fetch-or-timeout!! results-chan 1000))] 25 | (is (= "" (:message-id (:header parsed-mail)))) 26 | (is (= "Lorem Ipsum Sample" (:subject (:header parsed-mail)))) 27 | (is (= "text/plain" (:mime-type (first (:body parsed-mail))))) 28 | (is (= "Dear Test,\r 29 | \r 30 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ac justo vel odio efficitur consectetur. Integer nec urna vitae elit imperdiet ultrices. Fusce vel neque vel justo dapibus luctus a eget quam.\r 31 | \r 32 | Sincerely,\r 33 | Tester\r 34 | " (:content (first (:body parsed-mail)))))))) 35 | 36 | (deftest parse-test-2 37 | (let [test-chan (chan) 38 | test-pub (pub test-chan :type) 39 | email-bytes (.getBytes ^String (slurp (io/resource "test/email_corpus/greek-text.mbox")))] 40 | (parser/parser-event-loop test-pub test-chan) 41 | (>!! test-chan {:type :received-email :options {} :payload email-bytes}) 42 | (let [results-chan (chan) 43 | _ (sub test-pub :parsed-email results-chan) 44 | parsed-mail (:payload (async-utils/fetch-or-timeout!! results-chan 1000))] 45 | (is (= "Παράδοση" (:subject (:header parsed-mail))))))) 46 | 47 | (deftest parse-test-3 48 | (let [test-chan (chan) 49 | test-pub (pub test-chan :type) 50 | email-bytes (.getBytes ^String (slurp (io/resource "test/email_corpus/multipart-with-text-attachment.eml")))] 51 | (parser/parser-event-loop test-pub test-chan) 52 | (>!! test-chan {:type :received-email :options {} :payload email-bytes}) 53 | (let [results-chan (chan) 54 | _ (sub test-pub :parsed-email results-chan) 55 | parsed-mail (:payload (async-utils/fetch-or-timeout!! results-chan 1000))] 56 | (is (= "Multipart With Text Attachment" (:subject (:header parsed-mail)))) 57 | (is (= 2 (count (:body parsed-mail)))) 58 | (is (false? (core-email/attachment? (first (:body parsed-mail))))) 59 | (is (true? (core-email/attachment? (second (:body parsed-mail)))))))) 60 | 61 | ;; Wrong data tests 62 | 63 | (deftest wrong-data-1 64 | ;; The mbox contains more than 3 e-mails. The expectation is that only the ones with proper message-id will get through. 65 | (let [inner-chan (chan 20) 66 | test-chan (pub inner-chan :type)] 67 | (parser/parser-event-loop test-chan inner-chan) 68 | (files/read-emails-from-mbox (resource->is "test/email_corpus/weird-mbox.mbox") inner-chan) 69 | (let [results-chan (chan)] 70 | (sub test-chan :parsed-enrichable-email results-chan) 71 | (loop [event (async-utils/fetch-or-timeout!! results-chan 200) results []] 72 | (if (or (nil? event) (= :timed-out event)) 73 | (is (= 3 (count results))) 74 | (recur (async-utils/fetch-or-timeout!! results-chan 200) (conj results event))))))) 75 | 76 | -------------------------------------------------------------------------------- /test/plauna/preferences_test.clj: -------------------------------------------------------------------------------- 1 | (ns plauna.preferences-test 2 | (:require [plauna.preferences :as sut] 3 | [clojure.core.cache :as c] 4 | [clojure.test :as t])) 5 | 6 | (defn return-fn-for-preferences [returns] (swap! sut/fetch-fn (fn [_] (fn [_] returns)))) 7 | 8 | (t/deftest fetch-returns-default 9 | (swap! sut/cache (fn [_] (c/ttl-cache-factory {} :ttl 1))) 10 | (return-fn-for-preferences nil) 11 | (t/is (= (sut/log-level) :info))) 12 | 13 | (t/deftest fetch-returns-string 14 | (swap! sut/cache (fn [_] (c/ttl-cache-factory {} :ttl 1))) 15 | (return-fn-for-preferences ":debug") 16 | (t/is (= (sut/log-level) :debug))) 17 | 18 | (t/deftest fetch-returns-double 19 | (swap! sut/cache (fn [_] (c/ttl-cache-factory {} :ttl 1))) 20 | (return-fn-for-preferences "0.01") 21 | (t/is (= (sut/categorization-threshold) 0.01))) 22 | --------------------------------------------------------------------------------