├── tests ├── testthat │ ├── data │ │ ├── empty_mailbox.mbox │ │ └── test_mailbox.mbox │ ├── test_read_messages.R │ └── test_get_messages.R └── testthat.R ├── .Rbuildignore ├── LICENSE ├── .gitignore ├── R ├── mailman.R ├── zzz.R ├── read_messages.R └── get_messages.R ├── NAMESPACE ├── man ├── get_mailbox.Rd ├── get_messages.Rd ├── mailman-package.Rd └── read_messages.Rd ├── mailman.Rproj └── DESCRIPTION /tests/testthat/data/empty_mailbox.mbox: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Adnan Fiaz 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(mailman) 3 | 4 | test_check("mailman") 5 | -------------------------------------------------------------------------------- /R/mailman.R: -------------------------------------------------------------------------------- 1 | #' mailman: a wrapper around the python mailbox module 2 | #' 3 | #' @importFrom reticulate import py_module_available 4 | "_PACKAGE" 5 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(get_messages) 4 | export(read_messages) 5 | importFrom(reticulate,import) 6 | importFrom(reticulate,py_module_available) 7 | importFrom(tibble,as_tibble) 8 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # global reference to scipy (will be initialized in .onLoad) 2 | mailbox <- NULL 3 | 4 | .onLoad <- function(libname, pkgname) { 5 | # use superassignment to update global reference to scipy 6 | mailbox <<- reticulate::import("mailbox", delay_load = TRUE) 7 | } 8 | -------------------------------------------------------------------------------- /man/get_mailbox.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_mail.R 3 | \name{get_mailbox} 4 | \alias{get_mailbox} 5 | \title{Retrieve the mailbox object} 6 | \usage{ 7 | get_mailbox(path, type) 8 | } 9 | \arguments{ 10 | \item{path}{The path to the mailbox} 11 | 12 | \item{type}{The format in which the mailbox is stored} 13 | } 14 | \value{ 15 | A mailbox object 16 | } 17 | \description{ 18 | Retrieve the mailbox object 19 | } 20 | -------------------------------------------------------------------------------- /mailman.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/get_messages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_messages.R 3 | \name{get_messages} 4 | \alias{get_messages} 5 | \title{Parse all messages from the given mail_object and return a data.frame} 6 | \usage{ 7 | get_messages(mail_object) 8 | } 9 | \arguments{ 10 | \item{mail_object}{A mail_object as read in by read_mail} 11 | } 12 | \value{ 13 | A data.frame containing all the fields from the message including the body 14 | } 15 | \description{ 16 | Parse all messages from the given mail_object and return a data.frame 17 | } 18 | -------------------------------------------------------------------------------- /man/mailman-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mailman.R 3 | \docType{package} 4 | \name{mailman-package} 5 | \alias{mailman} 6 | \alias{mailman-package} 7 | \title{mailman: a wrapper around the python mailbox module} 8 | \description{ 9 | The python mailbox module provides functionality to manipulate mailboxes in various formats. 10 | This package is a wrapper around the python module and exposes a number of functions from it. 11 | } 12 | \author{ 13 | \strong{Maintainer}: Adnan Fiaz \email{afiaz@mango-solutions.com} 14 | 15 | } 16 | -------------------------------------------------------------------------------- /man/read_messages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_mail.R 3 | \name{read_messages} 4 | \alias{read_messages} 5 | \title{Read e-mail messages from an on-disk mailbox} 6 | \usage{ 7 | read_messages(path, type = c("mbox", "MailDir", "MH", "Babyl", "MMDF")) 8 | } 9 | \arguments{ 10 | \item{path}{The path to the mailbox} 11 | 12 | \item{type}{The format in which the mailbox is stored} 13 | } 14 | \value{ 15 | a data.frame with the headers and body of the messages 16 | } 17 | \description{ 18 | Read e-mail messages from an on-disk mailbox 19 | } 20 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mailman 2 | Type: Package 3 | Title: A wrapper around the python mailbox module 4 | Version: 0.1.0 5 | Authors@R: person("Adnan", "Fiaz", role = c("aut", "cre"), email = "afiaz@mango-solutions.com") 6 | Description: The python mailbox module provides functionality to manipulate mailboxes in various formats. 7 | This package is a wrapper around the python module and exposes a number of functions from it. 8 | License: MIT + file LICENSE 9 | URL: https://github.com/MangoTheCat/mailman 10 | BugReports: https://github.com/MangoTheCat/mailman/issues 11 | Encoding: UTF-8 12 | LazyData: true 13 | Depends: 14 | R (>= 3.0) 15 | Imports: reticulate, 16 | tibble 17 | Suggests: testthat 18 | RoxygenNote: 6.0.1.9000 19 | -------------------------------------------------------------------------------- /tests/testthat/test_read_messages.R: -------------------------------------------------------------------------------- 1 | context("Reading messages") 2 | 3 | test_that("read_messages read and parses messages from a mailbox", { 4 | if (!reticulate::py_module_available("mailbox")) 5 | skip("mailbox not available for testing") 6 | 7 | messages <- read_messages("data/test_mailbox.mbox", type="mbox") 8 | result <- tibble::tibble(From=c("foo@bar.com", NA, "foo@bar.com"), 9 | To=c("spam@eggs.co.uk", "spam@eggs.co.uk", 10 | "spam@eggs.co.uk"), 11 | Date=c("2018-01-01 12:00", "2018-01-01 12:00", 12 | "2018-01-01 12:00"), 13 | Body=c("This is a test\n", "This is a second test\n", 14 | "This is a fourth test")) 15 | expect_equal(result, messages) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/data/test_mailbox.mbox: -------------------------------------------------------------------------------- 1 | From MAILER-DAEMON Thu Jun 21 09:15:12 2018 2 | From: foo@bar.com 3 | To: spam@eggs.co.uk 4 | Date: 2018-01-01 12:00 5 | 6 | This is a test 7 | 8 | From MAILER-DAEMON Thu Jun 21 09:15:13 2018 9 | To: spam@eggs.co.uk 10 | Date: 2018-01-01 12:00 11 | 12 | This is a second test 13 | 14 | From MAILER-DAEMON Thu Jun 21 09:15:13 2018 15 | Content-Type: multipart/alternative; boundary="===============1438705278==" 16 | From: foo@bar.com 17 | To: spam@eggs.co.uk 18 | Date: 2018-01-01 12:00 19 | 20 | --===============1438705278== 21 | From: foo@bar.com 22 | To: spam@eggs.co.uk 23 | Date: 2018-01-01 12:00 24 | 25 | This is a third test 26 | --===============1438705278== 27 | From: foo@bar.com 28 | To: spam@eggs.co.uk 29 | Date: 2018-01-01 12:00 30 | 31 | This is a fourth test 32 | --===============1438705278==-- 33 | 34 | -------------------------------------------------------------------------------- /R/read_messages.R: -------------------------------------------------------------------------------- 1 | #' Read e-mail messages from an on-disk mailbox 2 | #' 3 | #' @param path The path to the mailbox 4 | #' @param type The format in which the mailbox is stored 5 | #' 6 | #' @return a data.frame with the headers and body of the messages 7 | #' @export 8 | read_messages <- function(path, 9 | type=c('mbox', 'MailDir', 'MH', 'Babyl', 'MMDF')){ 10 | my_mailbox <- get_mailbox(path, type) 11 | 12 | return(get_messages(my_mailbox)) 13 | } 14 | 15 | 16 | #' Retrieve the mailbox object 17 | #' 18 | #' @param path The path to the mailbox 19 | #' @param type The format in which the mailbox is stored 20 | #' 21 | #' @return A mailbox object 22 | get_mailbox <- function(path, type){ 23 | switch(type, 24 | mbox = mailbox$mbox(path), 25 | MailDir = mailbox$MailDir(path), 26 | MH = mailbox$MH(path), 27 | Babyl = mailbox$Babyl(path), 28 | MMDF = mailbox$MMDF(path), 29 | stop("Unknown mailbox type")) 30 | } 31 | -------------------------------------------------------------------------------- /tests/testthat/test_get_messages.R: -------------------------------------------------------------------------------- 1 | context("Parsing messages") 2 | 3 | test_that("get_messages parses all messages from a mailbox object", { 4 | if (!reticulate::py_module_available("mailbox")) 5 | skip("mailbox not available for testing") 6 | 7 | mb <- mailman:::get_mailbox("data/test_mailbox.mbox", type="mbox") 8 | messages <- get_messages(mb) 9 | 10 | result <- tibble::tibble(From=c("foo@bar.com", NA, "foo@bar.com"), 11 | To=c("spam@eggs.co.uk", "spam@eggs.co.uk", 12 | "spam@eggs.co.uk"), 13 | Date=c("2018-01-01 12:00", "2018-01-01 12:00", 14 | "2018-01-01 12:00"), 15 | Body=c("This is a test\n", "This is a second test\n", 16 | "This is a fourth test")) 17 | expect_equal(result, messages) 18 | }) 19 | 20 | 21 | test_that("get_messages returns a warning for an empty mailbox object", { 22 | if (!reticulate::py_module_available("mailbox")) 23 | skip("mailbox not available for testing") 24 | 25 | mb <- mailman:::get_mailbox("data/empty_mailbox.mbox", type="mbox") 26 | expect_warning(get_messages(mb)) 27 | }) 28 | -------------------------------------------------------------------------------- /R/get_messages.R: -------------------------------------------------------------------------------- 1 | #' Parse all messages from the given mail_object and return a data.frame 2 | #' 3 | #' @param mail_object A mail_object as read in by read_mail 4 | #' 5 | #' @return A data.frame containing all the fields from the message including the body 6 | #' @importFrom tibble as_tibble 7 | #' @export 8 | get_messages <- function(mail_object){ 9 | keys <- mail_object$keys() 10 | number_of_messages <- length(keys) 11 | 12 | if(number_of_messages==0){ 13 | warning("No messages in mailbox") 14 | return(NULL) 15 | } 16 | 17 | pb <- utils::txtProgressBar(max=number_of_messages) 18 | 19 | first_message <- mail_object$get_message(keys[1]) 20 | message_fields <- first_message$keys() 21 | # add one for the body of the message 22 | number_of_columns <- length(message_fields) + 1 23 | 24 | # initialise result matrix 25 | result <- matrix(nrow=number_of_messages, ncol=number_of_columns) 26 | 27 | # loop over each key, retrieve message and fill matrix 28 | for(i in seq_along(keys)){ 29 | message <- mail_object$get_message(keys[i]) 30 | # initialise result vector 31 | fields <- character(number_of_columns) 32 | # loop over each field and retrieve its value from the message 33 | for(j in seq_along(message_fields)){ 34 | value <- message$get(message_fields[j]) 35 | if(is.null(value)){ 36 | value <- NA 37 | } 38 | fields[j] <- value 39 | } 40 | 41 | # now retrieve the body 42 | if(message$is_multipart()){ 43 | # sometimes a message is split into sub-messages 44 | # through inspection we see the body is stored in the second sub-message 45 | payload_with_body <- message$get_payload(1L) 46 | # we convert the sub-message to string 47 | fields[number_of_columns] <- payload_with_body$get_payload() 48 | }else{ 49 | # the documentation for email.message.is_multipart states that when a 50 | # False value is returned the payload should be a string 51 | # but just to be on the safe side we'll check for this 52 | body <- message$get_payload() 53 | if(any(class(body)=="email.message.Message")){ 54 | fields[number_of_columns] <- body$as_string() 55 | }else{ 56 | fields[number_of_columns] <- body 57 | } 58 | 59 | } 60 | 61 | result[i,] <- fields 62 | utils::setTxtProgressBar(pb, i) 63 | } 64 | 65 | result <- as_tibble(result) 66 | colnames(result) <- c(message_fields, "Body") 67 | return(result) 68 | } 69 | --------------------------------------------------------------------------------