├── .Rbuildignore ├── .codecov.yml ├── .gitignore ├── .travis.yml ├── CONDUCT.md ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── as-tibble.R ├── attach.r ├── msgxtractr-package.R ├── print.r ├── process.r ├── read-msg.r └── utils.r ├── README.Rmd ├── README.md ├── appveyor.yml ├── codecov.yml ├── inst ├── COPYRIGHTS └── extdata │ ├── TestMessage-ansi.msg │ ├── TestMessage-default.msg │ ├── TestMessage-unicode.msg │ └── unicode.msg ├── man ├── decode_rtf.Rd ├── is_rtf.Rd ├── msgxtractr.Rd ├── print.msg.Rd ├── read_msg.Rd ├── save_attachments.Rd └── tidy_msg.Rd ├── msgxtractr.Rproj ├── oldsrc ├── attr.c ├── attr.h ├── file.c ├── file.h ├── mapi_attr.c ├── mapi_attr.h ├── mapi_names.c ├── mapi_names.h ├── mapi_types.c ├── mapi_types.h ├── path.c ├── path.h ├── rtf.c ├── rtf.h ├── tnef.c ├── tnef.h ├── tnef │ ├── mapi-names.data │ ├── mapi-types.data │ ├── mkdata.awk │ ├── replace │ │ ├── .deps │ │ │ ├── basename.Po │ │ │ ├── dummy.Po │ │ │ ├── getopt_long.Po │ │ │ ├── malloc.Po │ │ │ └── strdup.Po │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── basename.c │ │ ├── dummy.c │ │ ├── getopt_long.c │ │ ├── getopt_long.h │ │ ├── malloc.c │ │ └── strdup.c │ ├── tnef-names.data │ └── tnef-types.data ├── tnef_names.c ├── tnef_names.h ├── tnef_types.c ├── tnef_types.h ├── write.c └── write.h ├── src ├── .gitignore ├── Makevars ├── RcppExports.cpp ├── alloc.c ├── alloc.h ├── common.h ├── config.h ├── date.c ├── date.h ├── debug.c ├── debug.h ├── options.c ├── options.h ├── pole.cpp ├── pole.h ├── r_pole.cpp ├── utf8.h ├── utf8 │ ├── checked.h │ ├── core.h │ └── unchecked.h ├── util.c ├── util.h └── xstrdup.c └── tests ├── test-all.R └── testthat └── test-msgxtractr.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^README\.*Rmd$ 5 | ^README\.*html$ 6 | ^NOTES\.*Rmd$ 7 | ^NOTES\.*html$ 8 | ^\.codecov\.yml$ 9 | ^README_files$ 10 | ^doc$ 11 | ^img$ 12 | ^appveyor\.yml$ 13 | ^codecov\.yml$ 14 | ^CONDUCT\.md$ 15 | ^oldsrc$ -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | .Rproj 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | 3 | warnings_are_errors: true 4 | 5 | sudo: required 6 | 7 | cache: packages 8 | 9 | r: 10 | - oldrel 11 | - release 12 | - devel 13 | 14 | apt_packages: 15 | - libv8-dev 16 | - xclip 17 | 18 | env: 19 | global: 20 | - CRAN: http://cran.rstudio.com 21 | 22 | after_success: 23 | - Rscript -e 'covr::codecov()' 24 | 25 | notifications: 26 | email: 27 | - bob@rud.is 28 | irc: 29 | channels: 30 | - "104.236.112.222#builds" 31 | nick: travisci 32 | -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http:contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: msgxtractr 2 | Type: Package 3 | Title: Read Outlook '.msg' Files 4 | Version: 0.3.0 5 | Date: 2020-05-06 6 | Authors@R: c( 7 | person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")), 8 | person("Ariya", "Hidayat", email = "ariya@kde.org", role = c("ctb", "cph"), comment = "POLE library"), 9 | person("Nemanja", "Trifunovic", role = c("ctb", "cph"), comment = "UTF-8 helpers ") 10 | ) 11 | Maintainer: Bob Rudis 12 | Description: 'Microsoft' 'Outlook' messages can be saved in '.msg' files. Tools are 13 | provided that enable extraction of metadata, envelope, headers, body and attachments 14 | from these files. 15 | URL: https://github.com/hrbrmstr/msgxtractr 16 | BugReports: https://github.com/hrbrmstr/msgxtractr/issues 17 | SystemRequirements: C++11 18 | NeedsCompilation: yes 19 | Encoding: UTF-8 20 | Copyright: file inst/COPYRIGHTS 21 | License: AGPL 22 | Suggests: 23 | testthat, 24 | covr 25 | Depends: 26 | R (>= 3.5.0) 27 | Imports: 28 | Rcpp, 29 | scales, 30 | tibble 31 | RoxygenNote: 7.1.1 32 | LinkingTo: Rcpp 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as_tibble,msg) 4 | S3method(print,msg) 5 | export(decode_rtf) 6 | export(is_rtf) 7 | export(read_msg) 8 | export(save_attachments) 9 | export(tidy_msg) 10 | importFrom(Rcpp,sourceCpp) 11 | importFrom(scales,comma) 12 | importFrom(tibble,as_tibble) 13 | useDynLib(msgxtractr, .registration=TRUE) 14 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 0.3.0 2 | * Added `tidy_msg()` to turn a `msg` object into a `tibble` 3 | 4 | 0.2.1 5 | * Fixed issue #2 6 | 7 | 0.2.0 8 | * Switched to C library 9 | 10 | 0.1.0 11 | * Initial release 12 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Is a sequence of raw bytes an RTF document? 5 | #' 6 | #' @param v a raw vector 7 | #' @export 8 | is_rtf <- function(v) { 9 | .Call(`_msgxtractr_is_rtf`, v) 10 | } 11 | 12 | #' Decode RTF from a raw vector 13 | #' 14 | #' @param v a raw vector 15 | #' @export 16 | decode_rtf <- function(v) { 17 | .Call(`_msgxtractr_decode_rtf`, v) 18 | } 19 | 20 | int_read_msg <- function(path) { 21 | .Call(`_msgxtractr_int_read_msg`, path) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/as-tibble.R: -------------------------------------------------------------------------------- 1 | #' Turn a single `msg` object into a `tibble` 2 | #' 3 | #' @param x a `msg` object 4 | #' @param ... passed on to [tibble::as_tibble()] 5 | #' @export 6 | tidy_msg <- function(x, ...) { 7 | 8 | x <- unclass(x) 9 | 10 | x <- x[lengths(x) > 0] 11 | 12 | for (idx in which(sapply(x, mode) == "list")) { 13 | x[[idx]] <- list(x[[idx]]) 14 | } 15 | 16 | tibble::as_tibble(x, ...) 17 | 18 | } 19 | 20 | #' @rdname tidy_msg 21 | #' @export 22 | as_tibble.msg <- tidy_msg -------------------------------------------------------------------------------- /R/attach.r: -------------------------------------------------------------------------------- 1 | #' Save all attachments from a 'msg" object 2 | #' 3 | #' @md 4 | #' @param msg_obj a message object read in with `read_msg()` 5 | #' @param path directory path to save attachments in (defaults to current directory) 6 | #' @param use_short if `TRUE` then use the "short" filename from the parsed message, 7 | #' otherwise use the "long" filename (if it exists) 8 | #' @param quiet if `TRUE` then no informative messages will be displayed 9 | #' @return a character vector of full path names of files written out (invisibly) 10 | #' @export 11 | #' @examples 12 | #' x <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 13 | #' td <- tempdir() 14 | #' res <- save_attachments(x, td) 15 | #' for (f in res) unlink(f) 16 | save_attachments <- function(msg_obj, path=getwd(), use_short=TRUE, quiet=FALSE) { 17 | 18 | out <- c() 19 | 20 | if (!inherits(msg_obj, "msg")) stop("Can only save attachments from 'msg' objects.") 21 | 22 | if (length(msg_obj$attachments) > 0) { 23 | 24 | for (a in msg_obj$attachments) { 25 | 26 | if (use_short) { 27 | fn <- trimws(a$filename) 28 | } else { 29 | fn <- trimws(a$long_filename) 30 | if ((length(fn) == 0) || nchar(fn) == 0) fn <- trimws(a$filename) 31 | } 32 | 33 | out_path <- path.expand(path) 34 | out_path <- file.path(out_path, fn) 35 | 36 | if (!quiet) message(sprintf("Saving %s (%s bytes)", out_path, 37 | scales::comma(length(a$content)))) 38 | 39 | writeBin(a$content, con = out_path) 40 | 41 | out <- c(out, out_path) 42 | 43 | } 44 | 45 | } 46 | 47 | invisible(out) 48 | 49 | } -------------------------------------------------------------------------------- /R/msgxtractr-package.R: -------------------------------------------------------------------------------- 1 | #' Read Outlook '.msg' Files 2 | #' 3 | #' 'Microsoft' 'Outlook' messages can be saved in '.msg' files. Tools are provided that 4 | #' enable extraction of metadata, envelope, headers, body and attachments from these 5 | #' files. 6 | #' 7 | #' @name msgxtractr 8 | #' @docType package 9 | #' @author Bob Rudis (bob@@rud.is) 10 | #' @importFrom scales comma 11 | #' @importFrom tibble as_tibble 12 | #' @useDynLib msgxtractr, .registration=TRUE 13 | #' @importFrom Rcpp sourceCpp 14 | NULL -------------------------------------------------------------------------------- /R/print.r: -------------------------------------------------------------------------------- 1 | #' Pretty print a 'msg' object 2 | #' 3 | #' @param x a "`msg`" object 4 | #' @param ... unused 5 | #' @export 6 | print.msg <- function(x, ...) { 7 | 8 | xto <- x$display_envelope$display_to 9 | if (length(xto) == 0) xto <- x$headers$To[1] 10 | if (length(xto) == 0) xto <- "[Unspecified]" 11 | 12 | xfrom <- x$headers$From[1] 13 | if (length(xfrom) == 0) xfrom <- "[Unspecified]" 14 | 15 | xsubj <- x$subject 16 | if (length(xsubj) == 0) xsubj <- x$headers$Subject[1] 17 | if (length(xsubj) == 0) xsubj <- "[Unspecified]" 18 | 19 | xattach <- "" 20 | if (length(x$attachments) > 0) xattach <- sprintf("Attachments: %s\n", length(x$attachments)) 21 | 22 | xdate <- "" 23 | if (length(x$headers$Date) > 0) xdate <- sprintf("%s\n", x$headers$Date) 24 | 25 | cat(sprintf("%sFrom: %s\nTo: %s\nSubject: %s\n%s", xdate, xfrom, xto, xsubj, xattach), sep="") 26 | 27 | } -------------------------------------------------------------------------------- /R/process.r: -------------------------------------------------------------------------------- 1 | process_times <- function(x) { 2 | list( 3 | creation_time = unlist(unname(x[grep(msg_fields$CreationTime, names(x), value=TRUE)])), 4 | last_mod_time = unlist(unname(x[grep(msg_fields$LastModificationTime, names(x), value=TRUE)])), 5 | last_mod_name = unlist(unname(x[grep(msg_fields$LastModifierName, names(x), value=TRUE)])) 6 | ) 7 | 8 | } 9 | process_recipients <- function(x) { 10 | y <- grep("/__recip_version1.0_", names(x), value=TRUE) 11 | z <- sapply(y, strsplit, split = "/", fixed=TRUE, USE.NAMES = FALSE) 12 | z <- sprintf("/%s", unique(sapply(z, `[`, 2))) 13 | lapply(z, function(r) { 14 | recip <- x[grep(sprintf("^%s", r), names(x), value=TRUE)] 15 | list( 16 | display_name = unlist(unname(x[grep(msg_fields$DisplayName, names(recip), value=TRUE)])), 17 | address_type = unlist(unname(x[grep(msg_fields$AddressType, names(recip), value=TRUE)])), 18 | email_address = unlist(unname(x[grep(msg_fields$EmailAddress, names(recip), value=TRUE)])) 19 | ) 20 | }) 21 | } 22 | 23 | process_attachments <- function(x) { 24 | y <- grep("/__attach_version1.0_", names(x), value=TRUE) 25 | z <- sapply(y, strsplit, split = "/", fixed=TRUE, USE.NAMES = FALSE) 26 | z <- sprintf("/%s", unique(sapply(z, `[`, 2))) 27 | lapply(z, function(r) { 28 | attachmnt <- x[grep(sprintf("^%s", r), names(x), value=TRUE)] 29 | list( 30 | filename = unlist(unname(x[grep(msg_fields$AttachFilename, names(attachmnt), value=TRUE)])), 31 | long_filename = unlist(unname(x[grep(msg_fields$AttachLongFilename, names(attachmnt), value=TRUE)])), 32 | mime = unlist(unname(x[grep(msg_fields$AttachMIME, names(attachmnt), value=TRUE)])), 33 | content = unlist(unname(x[grep(msg_fields$AttachContent, names(attachmnt), value=TRUE)])) 34 | ) -> res 35 | extension <- unlist(unname(x[grep(msg_fields$AttachExtension, names(attachmnt), value=TRUE)])) 36 | if (!is.null(extension)) res$extension <- extension 37 | res 38 | }) 39 | } 40 | 41 | process_subject <- function(x) { 42 | unlist(unname(x[grep(msg_fields$Subject, names(x), value=TRUE)])) 43 | } 44 | 45 | process_sender <- function(x) { 46 | 47 | res <- list() 48 | 49 | sender_email <- unlist(unname(x[grep(msg_fields$SenderEmailAddress, names(x), value=TRUE)])) 50 | sender_name <- unlist(unname(x[grep(msg_fields$SenderName, names(x), value=TRUE)])) 51 | 52 | if (!is.null(sender_email)) res$sender_email <- sender_email 53 | if (!is.null(sender_name)) res$sender_name <- sender_name 54 | 55 | res 56 | 57 | } 58 | 59 | process_envelope <- function(x) { 60 | 61 | res <- list() 62 | 63 | display_name <- unlist(unname(x[grep(msg_fields$DisplayName, names(x), value=TRUE)])) 64 | display_bcc <- unlist(unname(x[grep(msg_fields$DisplayBcc, names(x), value=TRUE)])) 65 | display_cc <- unlist(unname(x[grep(msg_fields$DisplayCc, names(x), value=TRUE)])) 66 | display_to <- unlist(unname(x[grep(msg_fields$DisplayTo, names(x), value=TRUE)])) 67 | 68 | if (!is.null(display_name)) res$display_name <- display_name 69 | if (!is.null(display_bcc)) res$display_bcc <- display_bcc 70 | if (!is.null(display_cc)) res$display_cc <- display_cc 71 | if (!is.null(display_to)) res$display_to <- display_to 72 | 73 | res 74 | 75 | } 76 | 77 | process_headers <- function(x) { 78 | x <- unlist(unname(x[grep(msg_fields$TransportMessageHeaders, names(x), value=TRUE)])) 79 | if (!is.null(x)) { 80 | txc <- textConnection(x) 81 | on.exit(close(txc)) 82 | x <- read.dcf(txc, all=TRUE) 83 | class(x) <- c("tbl_df", "tbl", "data.frame") 84 | } 85 | x 86 | } 87 | 88 | process_body <- function(x) { 89 | list( 90 | text = unlist(unname(x[grep(msg_fields$MessageBody, names(x), value=TRUE)])), 91 | html = unlist(unname(x[grep(msg_fields$MessageBodyHtml, names(x), value=TRUE)])) 92 | ) -> res 93 | if (!is.null(res$html)) res$html <- readBin(res$html, "character") 94 | res 95 | } 96 | -------------------------------------------------------------------------------- /R/read-msg.r: -------------------------------------------------------------------------------- 1 | #' Read in an Outlook '.msg' file 2 | #' 3 | #' @md 4 | #' @param path path to '.msg' file 5 | #' @return a `list` of extracted fields and metadata with class "`msg`" 6 | #' @export 7 | #' @examples 8 | #' read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 9 | #' read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr")) 10 | #' read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr")) 11 | #' read_msg(system.file("extdata/TestMessage-unicode.msg", package="msgxtractr")) 12 | read_msg <- function(path) { 13 | 14 | path <- normalizePath(path.expand(path)) 15 | if (!file.exists(path)) stop("File not found.", call.=FALSE) 16 | 17 | x <- int_read_msg(path) 18 | 19 | names(x$values) <- x$keys 20 | 21 | x <- x$values 22 | 23 | list( 24 | headers = process_headers(x), 25 | sender = process_sender(x), 26 | recipients = process_recipients(x), 27 | subject = process_subject(x), 28 | body = process_body(x), 29 | attachments = process_attachments(x), 30 | display_envelope = process_envelope(x), 31 | times = process_times(x) 32 | ) -> res 33 | 34 | class(res) <- "msg" 35 | 36 | # list( 37 | # orig = x, 38 | # res = res 39 | # ) 40 | 41 | res 42 | 43 | } -------------------------------------------------------------------------------- /R/utils.r: -------------------------------------------------------------------------------- 1 | msg_fields <- list( 2 | CreationTime = "_3007", 3 | LastModificationTime = "_3008", 4 | LastModifierName = "_3FFA", 5 | OriginalSenderEmailAddress = "_0067", 6 | MessageBodyHtml = "_1013", 7 | MessageClass = "_001A", 8 | Subject = "_0037", 9 | ReceivedByName = "_0040", 10 | ReceivedRepresentingName = "_0044", 11 | SentRepresentingName = "_0071", 12 | SentRepresentingEmailAddress = "_0065", 13 | SentRepresentingAddressType = "_0064", 14 | SenderAddressType = "_0C1E", 15 | SenderEmailAddress = "_0C1F", 16 | SenderName = "_0C1A", 17 | ConversationTopic = "_0070", 18 | ReceivedRepresentingEmailAddress = "_0078", 19 | ReceivedByEmailAddress = "_0076", 20 | ReceivedByAddressType = "_0075", 21 | ReceivedRepresentingAddressType = "_0077", 22 | TransportMessageHeaders = "_007D", 23 | DisplayName = "_0E01", 24 | DisplayBcc = "_0E02", 25 | DisplayCc = "_0E03", 26 | DisplayTo = "_0E04", 27 | NormalizedSubject = "_0E1D", 28 | OriginalMessageId = "_1046", 29 | InternetMessageId = "_1035", 30 | MessageBody = "_1000", 31 | AttachExtension = "_3703", 32 | AttachFilename = "_3704", 33 | AttachLongFilename = "_3707", 34 | AttachMIME = "_370E", 35 | AttachContentId = "_3712", 36 | AttachContent = "_3701", 37 | DisplayName = "_3001", 38 | AddressType = "_3002", 39 | EmailAddress = "_3003" 40 | ) 41 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: rmarkdown::github_document 3 | --- 4 | [![Build Status](https://travis-ci.org/hrbrmstr/msgxtractr.svg?branch=master)](https://travis-ci.org/hrbrmstr/msgxtractr) 5 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/msgxtractr?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/msgxtractr) 6 | [![codecov](https://codecov.io/gh/hrbrmstr/msgxtractr/branch/master/graph/badge.svg)](https://codecov.io/gh/hrbrmstr/msgxtractr) 7 | 8 | `msgxtractr` : Read Outlook '.msg' Files 9 | 10 | 'Microsoft' 'Outlook' messages can be saved in '.msg' files. Tools are provided that enable extraction of metadata, envelope, headers, body and attachments from these files. 11 | 12 | The following functions are implemented: 13 | 14 | - `read_msg`: Read in an Outlook '.msg' file 15 | - `save_attachments`: Save all attachments from a 'msg' object 16 | - `tidy_msg`: Turn a 'msg' object into a 'tibble' 17 | 18 | ### Installation 19 | 20 | ```{r eval=FALSE} 21 | devtools::install_github("hrbrmstr/msgxtractr") 22 | ``` 23 | 24 | ```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} 25 | options(width=120) 26 | ``` 27 | 28 | ### Usage 29 | 30 | ```{r message=FALSE, warning=FALSE, error=FALSE} 31 | library(msgxtractr) 32 | 33 | # current version 34 | packageVersion("msgxtractr") 35 | 36 | str(msg1 <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr"))) 37 | 38 | print(msg1) 39 | 40 | str(msg2 <- read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr"))) 41 | 42 | str(msg3 <- read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr"))) 43 | 44 | str(msg4 <- read_msg(system.file("extdata/TestMessage-unicode.msg", package="msgxtractr"))) 45 | 46 | str(tidy_msg(msg1), 2) 47 | str(tidy_msg(msg2), 2) 48 | str(tidy_msg(msg3), 2) 49 | str(tidy_msg(msg4), 2) 50 | ``` 51 | 52 | ### Code of Conduct 53 | 54 | Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Build 3 | Status](https://travis-ci.org/hrbrmstr/msgxtractr.svg?branch=master)](https://travis-ci.org/hrbrmstr/msgxtractr) 4 | [![AppVeyor Build 5 | Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/msgxtractr?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/msgxtractr) 6 | [![codecov](https://codecov.io/gh/hrbrmstr/msgxtractr/branch/master/graph/badge.svg)](https://codecov.io/gh/hrbrmstr/msgxtractr) 7 | 8 | `msgxtractr` : Read Outlook ‘.msg’ Files 9 | 10 | ‘Microsoft’ ‘Outlook’ messages can be saved in ‘.msg’ files. Tools are 11 | provided that enable extraction of metadata, envelope, headers, body and 12 | attachments from these files. 13 | 14 | The following functions are implemented: 15 | 16 | - `read_msg`: Read in an Outlook ‘.msg’ file 17 | - `save_attachments`: Save all attachments from a ‘msg’ object 18 | - `tidy_msg`: Turn a ‘msg’ object into a ‘tibble’ 19 | 20 | ### Installation 21 | 22 | ``` r 23 | devtools::install_github("hrbrmstr/msgxtractr") 24 | ``` 25 | 26 | ### Usage 27 | 28 | ``` r 29 | library(msgxtractr) 30 | 31 | # current version 32 | packageVersion("msgxtractr") 33 | ``` 34 | 35 | ## [1] '0.3.0' 36 | 37 | ``` r 38 | str(msg1 <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr"))) 39 | ``` 40 | 41 | ## List of 8 42 | ## $ headers : tibble [1 × 18] (S3: tbl_df/tbl/data.frame) 43 | ## ..$ Return-path : chr "" 44 | ## ..$ Received :List of 1 45 | ## .. ..$ : chr [1:4] "from st11p00mm-smtpin007.mac.com ([17.172.84.240])\nby ms06561.mac.com (Oracle Communications Messaging Server "| __truncated__ "from mail-vc0-f182.google.com ([209.85.220.182])\nby st11p00mm-smtpin007.mac.com\n(Oracle Communications Messag"| __truncated__ "by mail-vc0-f182.google.com with SMTP id ie18so3484487vcb.13 for\n; Mon, 18 Nov 2013 00:26:25 -0800 (PST)" "by 10.58.207.196 with HTTP; Mon, 18 Nov 2013 00:26:24 -0800 (PST)" 46 | ## ..$ Original-recipient : chr "rfc822;brianzhou@me.com" 47 | ## ..$ Received-SPF : chr "pass (st11p00mm-smtpin006.mac.com: domain of brizhou@gmail.com\ndesignates 209.85.220.182 as permitted sender)\"| __truncated__ 48 | ## ..$ DKIM-Signature : chr "v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com;\ns=20120113; h=mime-version:date:message-id:subject:f"| __truncated__ 49 | ## ..$ MIME-version : chr "1.0" 50 | ## ..$ X-Received : chr "by 10.221.47.193 with SMTP id ut1mr14470624vcb.8.1384763184960;\nMon, 18 Nov 2013 00:26:24 -0800 (PST)" 51 | ## ..$ Date : chr "Mon, 18 Nov 2013 10:26:24 +0200" 52 | ## ..$ Message-id : chr "" 53 | ## ..$ Subject : chr "Test for TIF files" 54 | ## ..$ From : chr "Brian Zhou " 55 | ## ..$ To : chr "brianzhou@me.com" 56 | ## ..$ Cc : chr "Brian Zhou " 57 | ## ..$ Content-type : chr "multipart/mixed; boundary=001a113392ecbd7a5404eb6f4d6a" 58 | ## ..$ Authentication-results : chr "st11p00mm-smtpin007.mac.com; dkim=pass\nreason=\"2048-bit key\" header.d=gmail.com header.i=@gmail.com\nheader."| __truncated__ 59 | ## ..$ x-icloud-spam-score : chr "33322\nf=gmail.com;e=gmail.com;pp=ham;spf=pass;dkim=pass;wl=absent;pwl=absent" 60 | ## ..$ X-Proofpoint-Virus-Version: chr "vendor=fsecure\nengine=2.50.10432:5.10.8794,1.0.14,0.0.0000\ndefinitions=2013-11-18_02:2013-11-18,2013-11-17,19"| __truncated__ 61 | ## ..$ X-Proofpoint-Spam-Details : chr "rule=notspam policy=default score=0 spamscore=0\nsuspectscore=0 phishscore=0 bulkscore=0 adultscore=0 classifie"| __truncated__ 62 | ## $ sender :List of 2 63 | ## ..$ sender_email: chr "brizhou@gmail.com" 64 | ## ..$ sender_name : chr "Brian Zhou" 65 | ## $ recipients :List of 2 66 | ## ..$ :List of 3 67 | ## .. ..$ display_name : NULL 68 | ## .. ..$ address_type : chr "SMTP" 69 | ## .. ..$ email_address: chr "brianzhou@me.com" 70 | ## ..$ :List of 3 71 | ## .. ..$ display_name : NULL 72 | ## .. ..$ address_type : chr "SMTP" 73 | ## .. ..$ email_address: chr "brizhou@gmail.com" 74 | ## $ subject : chr "Test for TIF files" 75 | ## $ body :List of 2 76 | ## ..$ text: chr "This is a test email to experiment with the MS Outlook MSG Extractor\r\n\r\n\r\n-- \r\n\r\n\r\nKind regards\r\n"| __truncated__ 77 | ## ..$ html: NULL 78 | ## $ attachments :List of 2 79 | ## ..$ :List of 4 80 | ## .. ..$ filename : chr "importOl.tif" 81 | ## .. ..$ long_filename: chr "import OleFileIO.tif" 82 | ## .. ..$ mime : chr "image/tiff" 83 | ## .. ..$ content : raw [1:969674] 49 49 2a 00 ... 84 | ## ..$ :List of 4 85 | ## .. ..$ filename : chr "raisedva.tif" 86 | ## .. ..$ long_filename: chr "raised value error.tif" 87 | ## .. ..$ mime : chr "image/tiff" 88 | ## .. ..$ content : raw [1:1033142] 49 49 2a 00 ... 89 | ## $ display_envelope:List of 2 90 | ## ..$ display_cc: chr "Brian Zhou" 91 | ## ..$ display_to: chr "brianzhou@me.com" 92 | ## $ times :List of 3 93 | ## ..$ creation_time: NULL 94 | ## ..$ last_mod_time: NULL 95 | ## ..$ last_mod_name: NULL 96 | ## - attr(*, "class")= chr "msg" 97 | 98 | ``` r 99 | print(msg1) 100 | ``` 101 | 102 | ## Mon, 18 Nov 2013 10:26:24 +0200 103 | ## From: Brian Zhou 104 | ## To: brianzhou@me.com 105 | ## Subject: Test for TIF files 106 | ## Attachments: 2 107 | 108 | ``` r 109 | str(msg2 <- read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr"))) 110 | ``` 111 | 112 | ## List of 8 113 | ## $ headers : NULL 114 | ## $ sender : list() 115 | ## $ recipients :List of 3 116 | ## ..$ :List of 3 117 | ## .. ..$ display_name : NULL 118 | ## .. ..$ address_type : NULL 119 | ## .. ..$ email_address: NULL 120 | ## ..$ :List of 3 121 | ## .. ..$ display_name : NULL 122 | ## .. ..$ address_type : NULL 123 | ## .. ..$ email_address: NULL 124 | ## ..$ :List of 3 125 | ## .. ..$ display_name : NULL 126 | ## .. ..$ address_type : NULL 127 | ## .. ..$ email_address: NULL 128 | ## $ subject : NULL 129 | ## $ body :List of 2 130 | ## ..$ text: NULL 131 | ## ..$ html: NULL 132 | ## $ attachments :List of 1 133 | ## ..$ :List of 4 134 | ## .. ..$ filename : NULL 135 | ## .. ..$ long_filename: NULL 136 | ## .. ..$ mime : NULL 137 | ## .. ..$ content : raw [1:10934] 50 4b 03 04 ... 138 | ## $ display_envelope: list() 139 | ## $ times :List of 3 140 | ## ..$ creation_time: NULL 141 | ## ..$ last_mod_time: NULL 142 | ## ..$ last_mod_name: NULL 143 | ## - attr(*, "class")= chr "msg" 144 | 145 | ``` r 146 | str(msg3 <- read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr"))) 147 | ``` 148 | 149 | ## List of 8 150 | ## $ headers : NULL 151 | ## $ sender :List of 2 152 | ## ..$ sender_email: chr "sender@example.com" 153 | ## ..$ sender_name : chr "Sender" 154 | ## $ recipients :List of 3 155 | ## ..$ :List of 3 156 | ## .. ..$ display_name : NULL 157 | ## .. ..$ address_type : chr "SMTP" 158 | ## .. ..$ email_address: chr "recipient1@example.com" 159 | ## ..$ :List of 3 160 | ## .. ..$ display_name : NULL 161 | ## .. ..$ address_type : chr "SMTP" 162 | ## .. ..$ email_address: chr "cc1@example.com" 163 | ## ..$ :List of 3 164 | ## .. ..$ display_name : NULL 165 | ## .. ..$ address_type : chr "SMTP" 166 | ## .. ..$ email_address: chr "recipient2@example.com" 167 | ## $ subject : chr "New Message!" 168 | ## $ body :List of 2 169 | ## ..$ text: chr "This is some bold html!" 170 | ## ..$ html: chr "\r\n\r\n\r\n\r\n 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions 14 | are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | * Redistributions in binary form must reproduce the above copyright notice, 19 | this list of conditions and the following disclaimer in the documentation 20 | and/or other materials provided with the distribution. 21 | * Neither the name of the authors nor the names of its contributors may be 22 | used to endorse or promote products derived from this software without 23 | specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 35 | THE POSSIBILITY OF SUCH DAMAGE. 36 | 37 | --- 38 | 39 | utf8 : 40 | 41 | Copyright 2006 Nemanja Trifunovic 42 | 43 | Permission is hereby granted, free of charge, to any person or organization 44 | obtaining a copy of the software and accompanying documentation covered by 45 | this license (the "Software") to use, reproduce, display, distribute, 46 | execute, and transmit the Software, and to prepare derivative works of the 47 | Software, and to permit third-parties to whom the Software is furnished to 48 | do so, all subject to the following: 49 | 50 | The copyright notices in the Software and this entire statement, including 51 | the above license grant, this restriction and the following disclaimer, 52 | must be included in all copies of the Software, in whole or in part, and 53 | all derivative works of the Software, unless such copies or derivative 54 | works are solely in the form of machine-executable object code generated by 55 | a source language processor. 56 | 57 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 58 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 59 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 60 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 61 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 62 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 63 | DEALINGS IN THE SOFTWARE. 64 | -------------------------------------------------------------------------------- /inst/extdata/TestMessage-ansi.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/msgxtractr/40098e012f98e345848dd784eb41a27996e8ac57/inst/extdata/TestMessage-ansi.msg -------------------------------------------------------------------------------- /inst/extdata/TestMessage-default.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/msgxtractr/40098e012f98e345848dd784eb41a27996e8ac57/inst/extdata/TestMessage-default.msg -------------------------------------------------------------------------------- /inst/extdata/TestMessage-unicode.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/msgxtractr/40098e012f98e345848dd784eb41a27996e8ac57/inst/extdata/TestMessage-unicode.msg -------------------------------------------------------------------------------- /inst/extdata/unicode.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/msgxtractr/40098e012f98e345848dd784eb41a27996e8ac57/inst/extdata/unicode.msg -------------------------------------------------------------------------------- /man/decode_rtf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{decode_rtf} 4 | \alias{decode_rtf} 5 | \title{Decode RTF from a raw vector} 6 | \usage{ 7 | decode_rtf(v) 8 | } 9 | \arguments{ 10 | \item{v}{a raw vector} 11 | } 12 | \description{ 13 | Decode RTF from a raw vector 14 | } 15 | -------------------------------------------------------------------------------- /man/is_rtf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{is_rtf} 4 | \alias{is_rtf} 5 | \title{Is a sequence of raw bytes an RTF document?} 6 | \usage{ 7 | is_rtf(v) 8 | } 9 | \arguments{ 10 | \item{v}{a raw vector} 11 | } 12 | \description{ 13 | Is a sequence of raw bytes an RTF document? 14 | } 15 | -------------------------------------------------------------------------------- /man/msgxtractr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/msgxtractr-package.R 3 | \docType{package} 4 | \name{msgxtractr} 5 | \alias{msgxtractr} 6 | \title{Read Outlook '.msg' Files} 7 | \description{ 8 | 'Microsoft' 'Outlook' messages can be saved in '.msg' files. Tools are provided that 9 | enable extraction of metadata, envelope, headers, body and attachments from these 10 | files. 11 | } 12 | \author{ 13 | Bob Rudis (bob@rud.is) 14 | } 15 | -------------------------------------------------------------------------------- /man/print.msg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.r 3 | \name{print.msg} 4 | \alias{print.msg} 5 | \title{Pretty print a 'msg' object} 6 | \usage{ 7 | \method{print}{msg}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a "`msg`" object} 11 | 12 | \item{...}{unused} 13 | } 14 | \description{ 15 | Pretty print a 'msg' object 16 | } 17 | -------------------------------------------------------------------------------- /man/read_msg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-msg.r 3 | \name{read_msg} 4 | \alias{read_msg} 5 | \title{Read in an Outlook '.msg' file} 6 | \usage{ 7 | read_msg(path) 8 | } 9 | \arguments{ 10 | \item{path}{path to '.msg' file} 11 | } 12 | \value{ 13 | a \code{list} of extracted fields and metadata with class "\code{msg}" 14 | } 15 | \description{ 16 | Read in an Outlook '.msg' file 17 | } 18 | \examples{ 19 | read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 20 | read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr")) 21 | read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr")) 22 | read_msg(system.file("extdata/TestMessage-unicode.msg", package="msgxtractr")) 23 | } 24 | -------------------------------------------------------------------------------- /man/save_attachments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/attach.r 3 | \name{save_attachments} 4 | \alias{save_attachments} 5 | \title{Save all attachments from a 'msg" object} 6 | \usage{ 7 | save_attachments(msg_obj, path = getwd(), use_short = TRUE, quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{msg_obj}{a message object read in with \code{read_msg()}} 11 | 12 | \item{path}{directory path to save attachments in (defaults to current directory)} 13 | 14 | \item{use_short}{if \code{TRUE} then use the "short" filename from the parsed message, 15 | otherwise use the "long" filename (if it exists)} 16 | 17 | \item{quiet}{if \code{TRUE} then no informative messages will be displayed} 18 | } 19 | \value{ 20 | a character vector of full path names of files written out (invisibly) 21 | } 22 | \description{ 23 | Save all attachments from a 'msg" object 24 | } 25 | \examples{ 26 | x <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 27 | td <- tempdir() 28 | res <- save_attachments(x, td) 29 | for (f in res) unlink(f) 30 | } 31 | -------------------------------------------------------------------------------- /man/tidy_msg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as-tibble.R 3 | \name{tidy_msg} 4 | \alias{tidy_msg} 5 | \alias{as_tibble.msg} 6 | \title{Turn a single `msg` object into a `tibble`} 7 | \usage{ 8 | tidy_msg(x, ...) 9 | 10 | \method{as_tibble}{msg}(x, ...) 11 | } 12 | \arguments{ 13 | \item{x}{a `msg` object} 14 | 15 | \item{...}{passed on to [tibble::as_tibble()]} 16 | } 17 | \description{ 18 | Turn a single `msg` object into a `tibble` 19 | } 20 | -------------------------------------------------------------------------------- /msgxtractr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageBuildArgs: --resave-data 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /oldsrc/attr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * attr.h -- Functions for handling tnef attributes 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | 32 | #include 33 | 34 | #include "alloc.h" 35 | #include "attr.h" 36 | #include "tnef_types.h" 37 | #include "tnef_names.h" 38 | #include "date.h" 39 | #include "options.h" 40 | #include "util.h" 41 | #include "write.h" 42 | #include "debug.h" 43 | 44 | /* Copy the date data from the attribute into a struct date */ 45 | void 46 | copy_date_from_attr (Attr* attr, struct date* dt) 47 | { 48 | assert (attr); 49 | assert (dt); 50 | assert (attr->type == szDATE); 51 | assert (attr->len >= 14); 52 | 53 | date_read (dt, attr->buf); 54 | } 55 | 56 | void 57 | copy_triple_from_attr (Attr* attr, TRIPLE *t) 58 | { 59 | assert (attr); 60 | assert (t); 61 | assert (attr->type == szTRIPLES); 62 | 63 | t->trp.id = GETINT16 (attr->buf); 64 | t->trp.chbgtrp = GETINT16 (attr->buf+2); 65 | t->trp.cch = GETINT16 (attr->buf+4); 66 | t->trp.cb = GETINT16 (attr->buf+6); 67 | t->sender_display_name = (char*)(attr->buf+8); 68 | t->sender_address = (char*)(attr->buf+8+t->trp.cch); 69 | } 70 | 71 | /* attr_dump 72 | print attr to stdout. Assumes that the Debug flag has been set and 73 | already checked */ 74 | void 75 | attr_dump (Attr* attr) 76 | { 77 | char *name = get_tnef_name_str (attr->name); 78 | char *type = get_tnef_type_str (attr->type); 79 | size_t i; 80 | 81 | fprintf (stdout, "(%s) %s [type: %s] [len: %lu] =", 82 | ((attr->lvl_type == LVL_MESSAGE) ? "MESS" : "ATTA"), 83 | name, type, (unsigned long)attr->len); 84 | 85 | switch (attr->type) 86 | { 87 | case szBYTE: 88 | for (i=0; i < attr->len; i++) 89 | { 90 | fputc (' ', stdout); 91 | write_byte(stdout, (uint8)attr->buf[i]); 92 | } 93 | break; 94 | 95 | case szSHORT: 96 | if (attr->len < sizeof(uint16)) 97 | { 98 | fprintf (stdout, "Not enough data for szSHORT"); 99 | abort(); 100 | } 101 | fputc (' ', stdout); 102 | write_uint16 (stdout, GETINT16(attr->buf)); 103 | if (attr->len > sizeof(uint16)) 104 | { 105 | fprintf (stdout, " [extra data:"); 106 | for (i = sizeof(uint16); i < attr->len; i++) 107 | { 108 | fputc (' ', stdout); 109 | write_uint8 (stdout, (uint8)attr->buf[i]); 110 | } 111 | fprintf (stdout, " ]"); 112 | } 113 | break; 114 | 115 | case szLONG: 116 | if (attr->len < sizeof(uint32)) 117 | { 118 | fprintf (stdout, "Not enough data for szLONG"); 119 | abort(); 120 | } 121 | fputc (' ', stdout); 122 | write_uint32 (stdout, GETINT32(attr->buf)); 123 | if (attr->len > sizeof(uint32)) 124 | { 125 | fprintf (stdout, " [extra data:"); 126 | for (i = sizeof(uint32); i < attr->len; i++) 127 | { 128 | fputc (' ', stdout); 129 | write_uint8 (stdout, (uint8)attr->buf[i]); 130 | } 131 | fprintf (stdout, " ]"); 132 | } 133 | break; 134 | 135 | 136 | case szWORD: 137 | for (i=0; i < attr->len; i+=2) 138 | { 139 | fputc (' ', stdout); 140 | write_word(stdout, GETINT16(attr->buf+i)); 141 | } 142 | break; 143 | 144 | case szDWORD: 145 | for (i=0; i < attr->len; i+=4) 146 | { 147 | fputc (' ', stdout); 148 | write_dword (stdout, GETINT32(attr->buf+i)); 149 | } 150 | break; 151 | 152 | case szDATE: 153 | { 154 | struct date dt; 155 | copy_date_from_attr (attr, &dt); 156 | fputc (' ', stdout); 157 | write_date (stdout, &dt); 158 | } 159 | break; 160 | 161 | case szTEXT: 162 | case szSTRING: 163 | { 164 | char* buf = CHECKED_XMALLOC (char, (attr->len + 1)); 165 | strncpy (buf, (char*)attr->buf, attr->len); 166 | buf[attr->len] = '\0'; 167 | write_string (stdout, buf); 168 | XFREE (buf); 169 | } 170 | break; 171 | 172 | case szTRIPLES: 173 | { 174 | TRIPLE triple; 175 | copy_triple_from_attr (attr, &triple); 176 | write_triple (stdout, &triple); 177 | } 178 | break; 179 | 180 | default: 181 | fprintf (stdout, ""); 182 | break; 183 | } 184 | fprintf (stdout, "\n"); 185 | fflush( NULL ); 186 | } 187 | 188 | void 189 | attr_free (Attr* attr) 190 | { 191 | if (attr) 192 | { 193 | XFREE (attr->buf); 194 | memset (attr, '\0', sizeof (Attr)); 195 | } 196 | } 197 | 198 | 199 | /* Validate the checksum against attr. The checksum is the sum of all the 200 | bytes in the attribute data modulo 65536 */ 201 | static int 202 | check_checksum (Attr* attr, uint16 checksum) 203 | { 204 | size_t i; 205 | uint32 sum = 0; 206 | 207 | for (i = 0; i < attr->len; i++) 208 | { 209 | sum = ( sum + (uint8)attr->buf[i] ) & 0xffff; 210 | } 211 | 212 | if (DEBUG_ON) 213 | { 214 | if ( sum != checksum ) 215 | { 216 | /* for grins, figure out if it *ever* matched */ 217 | 218 | int match = -1; 219 | uint32 mysum = 0; 220 | 221 | for ( i=0; i < attr->len; i++ ) 222 | { 223 | mysum = ( mysum + (uint8)attr->buf[i] ) & 0xffff; 224 | 225 | if ( mysum == checksum ) match = i; 226 | } 227 | 228 | debug_print( "!!checksum error: length=%d sum=%04x checksum=%04x match=%d\n", attr->len, mysum, checksum, match ); 229 | } 230 | } 231 | 232 | return (sum == checksum); 233 | } 234 | 235 | Attr* 236 | attr_read (FILE* in) 237 | { 238 | uint32 type_and_name; 239 | uint16 checksum; 240 | 241 | Attr *attr = CHECKED_XCALLOC (Attr, 1); 242 | 243 | attr->lvl_type = geti8(in); 244 | 245 | assert ((attr->lvl_type == LVL_MESSAGE) 246 | || (attr->lvl_type == LVL_ATTACHMENT)); 247 | 248 | type_and_name = geti32(in); 249 | 250 | attr->type = (type_and_name >> 16); 251 | attr->name = ((type_and_name << 16) >> 16); 252 | attr->len = geti32(in); 253 | attr->buf = CHECKED_XCALLOC (unsigned char, attr->len); 254 | 255 | (void)getbuf(in, attr->buf, attr->len); 256 | 257 | checksum = geti16(in); 258 | if (!check_checksum(attr, checksum)) 259 | { 260 | if ( CHECKSUM_SKIP ) 261 | { 262 | fprintf (stderr, 263 | "WARNING: invalid checksum, input file may be corrupted\n"); 264 | } 265 | else 266 | { 267 | fprintf (stderr, 268 | "ERROR: invalid checksum, input file may be corrupted\n"); 269 | exit( 1 ); 270 | } 271 | } 272 | 273 | if (DEBUG_ON) attr_dump (attr); 274 | 275 | return attr; 276 | } 277 | 278 | #ifdef __cplusplus 279 | } 280 | #endif -------------------------------------------------------------------------------- /oldsrc/attr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * attr.h -- Functions for handling tnef attributes 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef ATTR_H 23 | #define ATTR_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if HAVE_CONFIG_H 30 | # include "config.h" 31 | #endif /* HAVE_CONFIG_H */ 32 | 33 | #include "common.h" 34 | #include "date.h" 35 | #include "tnef_types.h" 36 | #include "tnef_names.h" 37 | 38 | #define MINIMUM_ATTR_LENGTH 8 39 | 40 | /* Object types */ 41 | enum _lvl_type 42 | { 43 | LVL_MESSAGE = 0x1, 44 | LVL_ATTACHMENT = 0x2, 45 | }; 46 | typedef enum _lvl_type level_type; 47 | 48 | /* Attr -- storing a structure, formated according to file specification */ 49 | typedef struct 50 | { 51 | level_type lvl_type; 52 | tnef_type type; 53 | tnef_name name; 54 | size_t len; 55 | unsigned char* buf; 56 | } Attr; 57 | 58 | typedef struct 59 | { 60 | uint16 id; 61 | uint16 chbgtrp; 62 | uint16 cch; 63 | uint16 cb; 64 | } TRP; 65 | 66 | typedef struct 67 | { 68 | TRP trp; 69 | char* sender_display_name; 70 | char* sender_address; 71 | } TRIPLE; 72 | 73 | extern void attr_dump (Attr* attr); 74 | extern void attr_free (Attr* attr); 75 | extern void copy_date_from_attr (Attr* attr, struct date* dt); 76 | extern Attr* attr_read (); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | #endif /* ATTR_H */ 82 | -------------------------------------------------------------------------------- /oldsrc/file.c: -------------------------------------------------------------------------------- 1 | /* 2 | * file.c -- functions for dealing with file output 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | #include 32 | #include "alloc.h" 33 | #include "date.h" 34 | #include "debug.h" 35 | #include "file.h" 36 | #include "mapi_attr.h" 37 | #include "options.h" 38 | #include "path.h" 39 | 40 | #define TNEF_DEFAULT_FILENAME "tnef-tmp" 41 | 42 | /* ask user for confirmation of the action */ 43 | static int 44 | confirm_action (const char *prompt, ...) 45 | { 46 | if (INTERACTIVE) 47 | { 48 | int confirmed = 0; 49 | char buf[BUFSIZ + 1]; 50 | va_list args; 51 | va_start (args, prompt); 52 | 53 | fgets (buf, BUFSIZ, stdin); 54 | if (buf[0] == 'y' || buf[0] == 'Y') confirmed = 1; 55 | 56 | va_end (args); 57 | 58 | return confirmed; 59 | } 60 | return 1; 61 | } 62 | 63 | void 64 | file_write (File *file, const char* directory) 65 | { 66 | char *path = NULL; 67 | 68 | assert (file); 69 | if (!file) return; 70 | 71 | if (file->name == NULL) 72 | { 73 | file->name = strdup( TNEF_DEFAULT_FILENAME ); 74 | debug_print ("No file name specified, using default %s.\n", TNEF_DEFAULT_FILENAME); 75 | } 76 | 77 | if ( file->path == NULL ) 78 | { 79 | file->path = munge_fname( file->name ); 80 | 81 | if (file->path == NULL) 82 | { 83 | file->path = strdup( TNEF_DEFAULT_FILENAME ); 84 | debug_print ("No path name available, using default %s.\n", TNEF_DEFAULT_FILENAME); 85 | } 86 | } 87 | 88 | path = concat_fname( directory, file->path ); 89 | 90 | if (path == NULL) 91 | { 92 | path = strdup( TNEF_DEFAULT_FILENAME ); 93 | debug_print ("No path generated, using default %s.\n", TNEF_DEFAULT_FILENAME); 94 | } 95 | 96 | debug_print ("%sWRITING\t|\t%s\t|\t%s\n", 97 | ((LIST_ONLY==0)?"":"NOT "), file->name, path); 98 | 99 | if (!LIST_ONLY) 100 | { 101 | FILE *fp = NULL; 102 | 103 | if (!confirm_action ("extract %s?", file->name)) return; 104 | if (!OVERWRITE_FILES) 105 | { 106 | if (file_exists (path)) 107 | { 108 | if (!NUMBER_FILES) 109 | { 110 | fprintf (stderr, 111 | "tnef: %s: Could not create file: File exists\n", 112 | path); 113 | return; 114 | } 115 | else 116 | { 117 | char *tmp = find_free_number (path); 118 | debug_print ("Renaming %s to %s\n", path, tmp); 119 | XFREE (path); 120 | path = tmp; 121 | } 122 | } 123 | } 124 | 125 | fp = fopen (path, "wb"); 126 | if (fp == NULL) 127 | { 128 | perror (path); 129 | exit (1); 130 | } 131 | if (fwrite (file->data, 1, file->len, fp) != file->len) 132 | { 133 | perror (path); 134 | exit (1); 135 | } 136 | fclose (fp); 137 | } 138 | 139 | if (LIST_ONLY || VERBOSE_ON) 140 | { 141 | if (LIST_ONLY && VERBOSE_ON) 142 | { 143 | /* FIXME: print out date and stuff */ 144 | const char *date_str = date_to_str(&file->dt); 145 | fprintf (stdout, "%11lu\t|\t%s\t|\t%s\t|\t%s", 146 | (unsigned long)file->len, 147 | date_str+4, /* skip the day of week */ 148 | file->name, 149 | path); 150 | } 151 | else 152 | { 153 | fprintf (stdout, "%s\t|\t%s", file->name, path); 154 | } 155 | if ( SHOW_MIME ) 156 | { 157 | fprintf (stdout, "\t|\t%s", file->mime_type ? file->mime_type : "unknown"); 158 | fprintf (stdout, "\t|\t%s", file->content_id ? file->content_id : ""); 159 | } 160 | fprintf (stdout, "\n"); 161 | } 162 | XFREE(path); 163 | } 164 | 165 | static void 166 | file_add_mapi_attrs (File* file, MAPI_Attr** attrs) 167 | { 168 | int i; 169 | for (i = 0; attrs[i]; i++) 170 | { 171 | MAPI_Attr* a = attrs[i]; 172 | 173 | if (a->num_values) 174 | { 175 | 176 | switch (a->name) 177 | { 178 | case MAPI_ATTACH_LONG_FILENAME: 179 | if (file->name) XFREE(file->name); 180 | file->name = strdup( (char*)a->values[0].data.buf ); 181 | break; 182 | 183 | case MAPI_ATTACH_DATA_OBJ: 184 | file->len = a->values[0].len; 185 | if (file->data) XFREE (file->data); 186 | file->data = CHECKED_XMALLOC (unsigned char, file->len); 187 | memmove (file->data, a->values[0].data.buf, file->len); 188 | break; 189 | 190 | case MAPI_ATTACH_MIME_TAG: 191 | if (file->mime_type) XFREE (file->mime_type); 192 | file->mime_type = CHECKED_XMALLOC (char, a->values[0].len); 193 | memmove (file->mime_type, a->values[0].data.buf, a->values[0].len); 194 | break; 195 | 196 | case MAPI_ATTACH_CONTENT_ID: 197 | if (file->content_id) XFREE(file->content_id); 198 | file->content_id = CHECKED_XMALLOC (char, a->values[0].len); 199 | memmove (file->content_id, a->values[0].data.buf, a->values[0].len); 200 | break; 201 | 202 | default: 203 | break; 204 | } 205 | } 206 | } 207 | } 208 | 209 | void 210 | file_add_attr (File* file, Attr* attr) 211 | { 212 | assert (file && attr); 213 | if (!(file && attr)) return; 214 | 215 | /* we only care about some things... we will skip most attributes */ 216 | switch (attr->name) 217 | { 218 | case attATTACHMODIFYDATE: 219 | copy_date_from_attr (attr, &file->dt); 220 | break; 221 | 222 | case attATTACHMENT: 223 | { 224 | MAPI_Attr **mapi_attrs = mapi_attr_read (attr->len, attr->buf); 225 | if (mapi_attrs) 226 | { 227 | file_add_mapi_attrs (file, mapi_attrs); 228 | mapi_attr_free_list (mapi_attrs); 229 | XFREE (mapi_attrs); 230 | } 231 | } 232 | break; 233 | 234 | case attATTACHTITLE: 235 | file->name = strdup( (char*)attr->buf ); 236 | break; 237 | 238 | case attATTACHDATA: 239 | file->len = attr->len; 240 | file->data = CHECKED_XMALLOC(unsigned char, attr->len); 241 | memmove (file->data, attr->buf, attr->len); 242 | break; 243 | 244 | default: 245 | break; 246 | } 247 | } 248 | 249 | void 250 | file_free (File *file) 251 | { 252 | if (file) 253 | { 254 | XFREE (file->name); 255 | XFREE (file->data); 256 | XFREE (file->mime_type); 257 | XFREE (file->content_id); 258 | XFREE (file->path); 259 | memset (file, '\0', sizeof (File)); 260 | } 261 | } 262 | 263 | #ifdef __cplusplus 264 | } 265 | #endif 266 | -------------------------------------------------------------------------------- /oldsrc/file.h: -------------------------------------------------------------------------------- 1 | /* 2 | * file.h -- functions for dealing with file output 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef FILE_H 23 | #define FILE_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if HAVE_CONFIG_H 30 | # include "config.h" 31 | #endif /* HAVE_CONFIG_H */ 32 | 33 | #include "common.h" 34 | 35 | #include "date.h" 36 | #include "attr.h" 37 | 38 | typedef struct 39 | { 40 | char * name; 41 | char * path; /* filesystem "safe" version of name */ 42 | size_t len; 43 | unsigned char * data; 44 | struct date dt; 45 | char * mime_type; 46 | char * content_id; /* cid for inline HTML attachments */ 47 | } File; 48 | 49 | extern void file_write (File *file, const char* directory); 50 | extern void file_add_attr (File* file, Attr* attr); 51 | extern void file_free (File *file); 52 | 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | #endif /* FILE_H */ 58 | -------------------------------------------------------------------------------- /oldsrc/mapi_attr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mapi_attr.c -- Functions for handling MAPI attributes 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | 32 | #include "mapi_attr.h" 33 | #include "alloc.h" 34 | #include "options.h" 35 | #include "util.h" 36 | #include "write.h" 37 | 38 | /* return the length padded to a 4 byte boundary */ 39 | static size_t 40 | pad_to_4byte (size_t length) 41 | { 42 | return (length+3) & ~3; 43 | } 44 | 45 | /* Copy the GUID data from a character buffer */ 46 | static void 47 | copy_guid_from_buf (GUID* guid, unsigned char *buf) 48 | { 49 | int i; 50 | int idx = 0; 51 | assert (guid); 52 | assert (buf); 53 | 54 | guid->data1 = GETINT32(buf + idx); idx += sizeof (uint32); 55 | guid->data2 = GETINT16(buf + idx); idx += sizeof (uint16); 56 | guid->data3 = GETINT16(buf + idx); idx += sizeof (uint16); 57 | for (i = 0; i < 8; i++, idx += sizeof (uint8)) 58 | guid->data4[i] = (uint8)(buf[idx]); 59 | } 60 | 61 | 62 | /* dumps info about MAPI attributes... useful for debugging */ 63 | static void 64 | mapi_attr_dump (MAPI_Attr* attr) 65 | { 66 | char *name = get_mapi_name_str (attr->name); 67 | char *type = get_mapi_type_str (attr->type); 68 | size_t i; 69 | 70 | fprintf (stdout, "(MAPI) %s [type: %s] [num_values = %lu] = \n", 71 | name, type, (unsigned long)attr->num_values); 72 | if (attr->guid) 73 | { 74 | fprintf (stdout, "\tGUID: "); 75 | write_guid (stdout, attr->guid); 76 | fputc ('\n', stdout); 77 | } 78 | 79 | for (i = 0; i < attr->num_names; i++) 80 | fprintf (stdout, "\tname #%d: '%s'\n", (int)i, attr->names[i].data); 81 | 82 | for (i = 0; i < attr->num_values; i++) 83 | { 84 | fprintf (stdout, "\t#%lu [len: %lu] = ", 85 | (unsigned long)i, 86 | (unsigned long)attr->values[i].len); 87 | 88 | switch (attr->type) 89 | { 90 | case szMAPI_NULL: 91 | fprintf (stdout, "NULL"); 92 | break; 93 | 94 | case szMAPI_SHORT: 95 | write_int16 (stdout, (int16)attr->values[i].data.bytes2); 96 | break; 97 | 98 | case szMAPI_INT: 99 | write_int32 (stdout, (int32)attr->values[i].data.bytes4); 100 | break; 101 | 102 | case szMAPI_FLOAT: 103 | case szMAPI_DOUBLE: 104 | write_float (stdout, (float)attr->values[i].data.bytes4); 105 | break; 106 | 107 | case szMAPI_BOOLEAN: 108 | write_boolean (stdout, attr->values[i].data.bytes4); 109 | break; 110 | 111 | case szMAPI_STRING: 112 | case szMAPI_UNICODE_STRING: 113 | write_string (stdout, (char*)attr->values[i].data.buf); 114 | break; 115 | 116 | case szMAPI_SYSTIME: 117 | case szMAPI_CURRENCY: 118 | case szMAPI_INT8BYTE: 119 | case szMAPI_APPTIME: 120 | write_uint64 (stdout, attr->values[i].data.bytes8); 121 | break; 122 | 123 | case szMAPI_ERROR: 124 | write_uint32 (stdout, attr->values[i].data.bytes4); 125 | break; 126 | 127 | case szMAPI_CLSID: 128 | write_guid (stdout, &attr->values[i].data.guid); 129 | break; 130 | 131 | case szMAPI_OBJECT: 132 | case szMAPI_BINARY: 133 | { 134 | size_t x; 135 | 136 | for (x = 0; x < attr->values[i].len; x++) 137 | { 138 | write_byte (stdout, (uint8)attr->values[i].data.buf[x]); 139 | fputc (' ', stdout); 140 | } 141 | } 142 | break; 143 | 144 | default: 145 | fprintf (stdout, ""); 146 | break; 147 | } 148 | fprintf (stdout, "\n"); 149 | } 150 | 151 | fflush( NULL ); 152 | } 153 | 154 | static MAPI_Value* 155 | alloc_mapi_values (MAPI_Attr* a) 156 | { 157 | if (a && a->num_values) 158 | { 159 | a->values = CHECKED_XCALLOC (MAPI_Value, a->num_values); 160 | return a->values; 161 | } 162 | return NULL; 163 | } 164 | 165 | /* 166 | 2009/07/07 167 | Microsoft documentation reference: [MS-OXPROPS] v 2.0, April 10, 2009 168 | 169 | only multivalue types appearing are: 170 | szMAPI_INT, szMAPI_SYSTIME, szMAPI_UNICODE_STRING, szMAPI_BINARY 171 | */ 172 | 173 | /* parses out the MAPI attibutes hidden in the character buffer */ 174 | MAPI_Attr** 175 | mapi_attr_read (size_t len, unsigned char *buf) 176 | { 177 | size_t idx = 0; 178 | uint32 i,j,mvf; 179 | uint32 num_properties = GETINT32(buf+idx); 180 | MAPI_Attr** attrs = CHECKED_XMALLOC (MAPI_Attr*, (num_properties + 1)); 181 | 182 | idx += 4; 183 | 184 | if (!attrs) return NULL; 185 | for (i = 0; i < num_properties; i++) 186 | { 187 | MAPI_Attr* a = attrs[i] = CHECKED_XCALLOC(MAPI_Attr, 1); 188 | MAPI_Value* v = NULL; 189 | 190 | a->type = GETINT16(buf+idx); idx += 2; 191 | a->name = GETINT16(buf+idx); idx += 2; 192 | 193 | 194 | /* Multi-valued attributes have their type modified by the MULTI_VALUE_FLAG value */ 195 | if (a->type & MULTI_VALUE_FLAG) 196 | { 197 | a->type -= MULTI_VALUE_FLAG; 198 | mvf = 1; 199 | 200 | if (DEBUG_ON) 201 | { 202 | fprintf( stdout, "!!MULTI_VALUE_FLAG seen (0x%02x 0x%02x)\n", a->name, a->type ); 203 | fflush( NULL ); 204 | } 205 | } 206 | else 207 | { 208 | mvf = 0; 209 | } 210 | 211 | /* handle special case of GUID prefixed properties */ 212 | if (a->name >= GUID_EXISTS_FLAG) 213 | { 214 | /* copy GUID */ 215 | a->guid = CHECKED_XMALLOC(GUID, 1); 216 | copy_guid_from_buf(a->guid, buf+idx); 217 | idx += sizeof (GUID); 218 | 219 | a->num_names = GETINT32(buf+idx); idx += 4; 220 | if (a->num_names > 0) 221 | { 222 | /* FIXME: do something useful here! */ 223 | size_t i; 224 | 225 | a->names = CHECKED_XCALLOC(VarLenData, a->num_names); 226 | 227 | for (i = 0; i < a->num_names; i++) 228 | { 229 | size_t j; 230 | 231 | a->names[i].len = GETINT32(buf+idx); idx += 4; 232 | 233 | /* read the data into a buffer */ 234 | a->names[i].data 235 | = CHECKED_XMALLOC(unsigned char, a->names[i].len); 236 | for (j = 0; j < (a->names[i].len >> 1); j++) 237 | a->names[i].data[j] = (buf+idx)[j*2]; 238 | 239 | /* But what are we going to do with it? */ 240 | 241 | idx += pad_to_4byte(a->names[i].len); 242 | } 243 | } 244 | else 245 | { 246 | /* get the 'real' name */ 247 | a->name = GETINT32(buf+idx); idx+= 4; 248 | } 249 | } 250 | 251 | switch (a->type) 252 | { 253 | case szMAPI_SHORT: /* 2 bytes */ 254 | assert(!mvf); 255 | a->num_values = 1; 256 | v = alloc_mapi_values (a); 257 | v->len = 2; 258 | v->data.bytes2 = GETINT16(buf+idx); 259 | idx += 4; /* assume padding of 2, advance by 4! */ 260 | break; 261 | 262 | case szMAPI_INT: /* 4 bytes, possible MV */ 263 | if ( mvf ) 264 | { 265 | a->num_values = GETINT32(buf+idx); 266 | idx += 4; 267 | } 268 | else 269 | { 270 | a->num_values = 1; 271 | } 272 | v = alloc_mapi_values (a); 273 | for ( j=0; j< a->num_values; j++ ) 274 | { 275 | v->len = 4; 276 | v->data.bytes4 = GETINT32(buf+idx); 277 | idx += 4; 278 | v++; 279 | } 280 | break; 281 | 282 | case szMAPI_FLOAT: /* 4 bytes */ 283 | case szMAPI_BOOLEAN: /* this should be 2 bytes + 2 padding */ 284 | assert(!mvf); 285 | a->num_values = 1; 286 | v = alloc_mapi_values (a); 287 | v->len = 4; 288 | v->data.bytes4 = GETINT32(buf+idx); 289 | idx += v->len; 290 | break; 291 | 292 | case szMAPI_SYSTIME: /* 8 bytes */ 293 | if ( mvf ) 294 | { 295 | a->num_values = GETINT32(buf+idx); 296 | idx += 4; 297 | } 298 | else 299 | { 300 | a->num_values = 1; 301 | } 302 | v = alloc_mapi_values (a); 303 | for ( j=0; j< a->num_values; j++ ) 304 | { 305 | v->len = 8; 306 | v->data.bytes8[0] = GETINT32(buf+idx); 307 | v->data.bytes8[1] = GETINT32(buf+idx+4); 308 | idx += 8; 309 | v++; 310 | } 311 | break; 312 | 313 | case szMAPI_DOUBLE: /* 8 bytes */ 314 | case szMAPI_APPTIME: 315 | case szMAPI_CURRENCY: 316 | case szMAPI_INT8BYTE: 317 | assert(!mvf); 318 | a->num_values = 1; 319 | v = alloc_mapi_values (a); 320 | v->len = 8; 321 | v->data.bytes8[0] = GETINT32(buf+idx); 322 | v->data.bytes8[1] = GETINT32(buf+idx+4); 323 | idx += v->len; 324 | break; 325 | 326 | case szMAPI_CLSID: 327 | assert(!mvf); 328 | a->num_values = 1; 329 | v = alloc_mapi_values (a); 330 | v->len = sizeof (GUID); 331 | copy_guid_from_buf(&v->data.guid, buf+idx); 332 | idx += v->len; 333 | break; 334 | 335 | case szMAPI_STRING: 336 | case szMAPI_UNICODE_STRING: 337 | case szMAPI_OBJECT: 338 | case szMAPI_BINARY: /* variable length */ 339 | a->num_values = GETINT32(buf+idx); idx += 4; 340 | v = alloc_mapi_values (a); 341 | for (j = 0; j < a->num_values; j++) 342 | { 343 | v->len = GETINT32(buf+idx); idx += 4; 344 | 345 | if (a->type == szMAPI_UNICODE_STRING) 346 | { 347 | v->data.buf = (unsigned char*)unicode_to_utf8(v->len, buf+idx); 348 | } 349 | else 350 | { 351 | v->data.buf = CHECKED_XMALLOC(unsigned char, v->len); 352 | memmove (v->data.buf, buf+idx, v->len); 353 | } 354 | 355 | idx += pad_to_4byte(v->len); 356 | v++; 357 | } 358 | break; 359 | 360 | case szMAPI_NULL: /* illegal in input tnef streams */ 361 | case szMAPI_ERROR: 362 | case szMAPI_UNSPECIFIED: 363 | 364 | fprintf (stderr, 365 | "Invalid attribute, input file may be corrupted\n"); 366 | if (!ENCODE_SKIP) exit (1); 367 | 368 | return NULL; 369 | 370 | default: /* should never get here */ 371 | fprintf (stderr, 372 | "Undefined attribute, input file may be corrupted\n"); 373 | if (!ENCODE_SKIP) exit (1); 374 | 375 | return NULL; 376 | 377 | } 378 | if (DEBUG_ON) mapi_attr_dump (attrs[i]); 379 | 380 | } 381 | attrs[i] = NULL; 382 | 383 | return attrs; 384 | } 385 | 386 | static void 387 | mapi_attr_free (MAPI_Attr* attr) 388 | { 389 | if (attr) 390 | { 391 | size_t i; 392 | for (i = 0; i < attr->num_values; i++) 393 | { 394 | if ((attr->type == szMAPI_STRING) 395 | || (attr->type == szMAPI_UNICODE_STRING) 396 | || (attr->type == szMAPI_BINARY)) 397 | { 398 | XFREE (attr->values[i].data.buf); 399 | } 400 | } 401 | if (attr->num_names > 0) { 402 | for (i = 0; i < attr->num_names; i++) 403 | { 404 | XFREE(attr->names[i].data); 405 | } 406 | XFREE(attr->names); 407 | } 408 | XFREE (attr->values); 409 | XFREE (attr->guid); 410 | memset (attr, '\0', sizeof (MAPI_Attr)); 411 | } 412 | } 413 | 414 | void 415 | mapi_attr_free_list (MAPI_Attr** attrs) 416 | { 417 | int i; 418 | for (i = 0; attrs && attrs[i]; i++) 419 | { 420 | mapi_attr_free (attrs[i]); 421 | XFREE (attrs[i]); 422 | } 423 | } 424 | 425 | #ifdef __cplusplus 426 | } 427 | #endif -------------------------------------------------------------------------------- /oldsrc/mapi_attr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mapi_attr.h -- Functions for handling MAPI attributes 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef MAPI_ATTR_H 23 | #define MAPI_ATTR_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if HAVE_CONFIG_H 30 | # include "config.h" 31 | #endif /* HAVE_CONFIG_H */ 32 | 33 | #include "common.h" 34 | 35 | #include "mapi_types.h" 36 | #include "mapi_names.h" 37 | 38 | #define MULTI_VALUE_FLAG 0x1000 39 | #define GUID_EXISTS_FLAG 0x8000 40 | 41 | typedef struct 42 | { 43 | uint32 data1; 44 | uint16 data2; 45 | uint16 data3; 46 | uint8 data4[8]; 47 | } GUID; 48 | 49 | typedef struct 50 | { 51 | size_t len; 52 | union 53 | { 54 | unsigned char *buf; 55 | uint16 bytes2; 56 | uint32 bytes4; 57 | uint32 bytes8[2]; 58 | GUID guid; 59 | } data; 60 | } MAPI_Value; 61 | 62 | typedef struct 63 | { 64 | size_t len; 65 | unsigned char* data; 66 | } VarLenData; 67 | 68 | typedef struct 69 | { 70 | mapi_type type; 71 | mapi_name name; 72 | size_t num_values; 73 | MAPI_Value* values; 74 | GUID *guid; 75 | size_t num_names; 76 | VarLenData *names; 77 | } MAPI_Attr; 78 | 79 | extern MAPI_Attr** mapi_attr_read (size_t len, unsigned char *buf); 80 | extern void mapi_attr_free_list (MAPI_Attr** attrs); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | #endif /* MAPI_ATTR_H */ 86 | -------------------------------------------------------------------------------- /oldsrc/mapi_types.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifdef HAVE_CONFIG_H 8 | #include "config.h" 9 | #endif 10 | #include "common.h" 11 | #include "mapi_types.h" 12 | char* 13 | get_mapi_type_str(uint16 d) 14 | { 15 | static char buf[128]; 16 | static char* str; 17 | switch(d) { 18 | case szMAPI_UNSPECIFIED: 19 | str="MAPI Unspecified"; 20 | break; 21 | case szMAPI_NULL: 22 | str="MAPI null property"; 23 | break; 24 | case szMAPI_SHORT: 25 | str="MAPI short (signed 16 bits)"; 26 | break; 27 | case szMAPI_INT: 28 | str="MAPI integer (signed 32 bits)"; 29 | break; 30 | case szMAPI_FLOAT: 31 | str="MAPI float (4 bytes)"; 32 | break; 33 | case szMAPI_DOUBLE: 34 | str="MAPI double"; 35 | break; 36 | case szMAPI_CURRENCY: 37 | str="MAPI currency (64 bits)"; 38 | break; 39 | case szMAPI_APPTIME: 40 | str="MAPI application time"; 41 | break; 42 | case szMAPI_ERROR: 43 | str="MAPI error (32 bits)"; 44 | break; 45 | case szMAPI_BOOLEAN: 46 | str="MAPI boolean (16 bits)"; 47 | break; 48 | case szMAPI_OBJECT: 49 | str="MAPI embedded object"; 50 | break; 51 | case szMAPI_INT8BYTE: 52 | str="MAPI 8 byte signed int"; 53 | break; 54 | case szMAPI_STRING: 55 | str="MAPI string"; 56 | break; 57 | case szMAPI_UNICODE_STRING: 58 | str="MAPI unicode-string (null terminated)"; 59 | break; 60 | case szMAPI_SYSTIME: 61 | str="MAPI time (64 bits)"; 62 | break; 63 | case szMAPI_CLSID: 64 | str="MAPI OLE GUID"; 65 | break; 66 | case szMAPI_BINARY: 67 | str="MAPI binary"; 68 | break; 69 | default: 70 | str=NULL; 71 | break; 72 | } 73 | if ( str ) 74 | { 75 | sprintf(buf,"%s <%04x>",str,d); 76 | } 77 | else 78 | { 79 | sprintf(buf,"%04x",d); 80 | } 81 | return buf; 82 | } 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif -------------------------------------------------------------------------------- /oldsrc/mapi_types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifndef MAPI_TYPES_H 8 | #define MAPI_TYPES_H 9 | enum _mapi_type { 10 | szMAPI_UNSPECIFIED = 0x0000, 11 | szMAPI_NULL = 0x0001, 12 | szMAPI_SHORT = 0x0002, 13 | szMAPI_INT = 0x0003, 14 | szMAPI_FLOAT = 0x0004, 15 | szMAPI_DOUBLE = 0x0005, 16 | szMAPI_CURRENCY = 0x0006, 17 | szMAPI_APPTIME = 0x0007, 18 | szMAPI_ERROR = 0x000a, 19 | szMAPI_BOOLEAN = 0x000b, 20 | szMAPI_OBJECT = 0x000d, 21 | szMAPI_INT8BYTE = 0x0014, 22 | szMAPI_STRING = 0x001e, 23 | szMAPI_UNICODE_STRING = 0x001f, 24 | szMAPI_SYSTIME = 0x0040, 25 | szMAPI_CLSID = 0x0048, 26 | szMAPI_BINARY = 0x0102, 27 | }; 28 | typedef enum _mapi_type mapi_type; 29 | extern char* 30 | get_mapi_type_str(uint16 d); 31 | #endif /* MAPI_TYPES_H */ 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif -------------------------------------------------------------------------------- /oldsrc/path.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * path.c -- Utility functions for dealing with pathnames 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifdef HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif /* HAVE_CONFIG_H */ 30 | 31 | #include "common.h" 32 | #include 33 | 34 | #include 35 | 36 | #include "alloc.h" 37 | #include "options.h" 38 | #include "path.h" 39 | #include "debug.h" 40 | 41 | /* concatenates fname1 with fname2 to make a pathname, adds '/' as needed */ 42 | /* strips trailing '/' */ 43 | 44 | char * 45 | concat_fname (const char *fname1, const char* fname2) 46 | { 47 | char *filename; 48 | int len; 49 | 50 | if ( ( fname1 == NULL ) && ( fname2 == NULL ) ) return NULL; 51 | 52 | if ( ( fname1 == NULL ) || ( *fname1 == '\0' ) ) 53 | { 54 | filename = xstrdup (fname2); 55 | } 56 | else 57 | { 58 | len = strlen (fname1); 59 | if (fname2) len += strlen (fname2); 60 | 61 | filename = CHECKED_XMALLOC (char, (len + 2)); 62 | strcpy (filename, fname1); 63 | 64 | if (fname2) 65 | { 66 | if ((filename[strlen(filename)-1] != '/') 67 | && (fname2[0] != '/')) 68 | { 69 | strcat (filename, "/"); 70 | } 71 | strcat (filename, fname2); 72 | } 73 | } 74 | 75 | /* strip trailing '/' */ 76 | 77 | while ((len=strlen(filename)) > 0) 78 | { 79 | if ( filename[len-1] == '/' ) 80 | { 81 | filename[len-1] = '\0'; 82 | } 83 | else 84 | { 85 | break; 86 | } 87 | } 88 | 89 | if ( *filename == '\0' ) filename = NULL; /* nothing left */ 90 | 91 | return filename; 92 | } 93 | 94 | int 95 | file_exists (const char *fname) 96 | { 97 | static struct stat buf; 98 | return (stat (fname, &buf) == 0); 99 | } 100 | 101 | /* finds a filename fname.N where N >= 1 and is not the name of an existing 102 | filename. Assumes that fname does not already have such an extension */ 103 | char * 104 | find_free_number (const char *fname) 105 | { 106 | size_t len = (strlen(fname) 107 | + 1 /* '.' */ 108 | + 5 /* big enough for our purposes (i hope) */ 109 | + 1); /* NULL */ 110 | char *tmp = CHECKED_XMALLOC (char, len); 111 | int counter = 1; 112 | do 113 | { 114 | sprintf (tmp, "%s.%d", fname, counter++); 115 | } 116 | while (file_exists(tmp)); 117 | return tmp; 118 | } 119 | 120 | /* windows pathname manipulation routines */ 121 | 122 | /* per windows file manager, these aren't allowed in filenames */ 123 | 124 | static char unsanitary_windows_chars[] = { 125 | '\\', '/', ':', '*', '?', '"', '<', '>', '|', '\0' 126 | }; 127 | 128 | /* these aren't welcomed in unix filenames */ 129 | 130 | static char unsavory_unix_chars[] = { 131 | ' ', ';', '`', '\'', '[', ']', '{', '}', '(', ')', '\0' 132 | }; 133 | 134 | static int 135 | could_be_a_windows_path( const char *fname ) 136 | { 137 | const char *up; 138 | 139 | if ( ( fname == NULL ) || ( *fname == '\0' ) ) return 0; 140 | 141 | /* 142 | we might be a windows path if... 143 | we have at least one path separator and 144 | we have no unsanitary windows chars and 145 | we are reasonably printable 146 | */ 147 | 148 | up = rindex( fname, '\\' ); 149 | 150 | if ( up ) 151 | { 152 | up++; 153 | if ( *up == '\0' ) return 0; /* trailing backslash doesn't cut it */ 154 | } 155 | else 156 | { 157 | return 0; /* no backslash */ 158 | } 159 | 160 | for ( up=unsanitary_windows_chars; *up; up++ ) 161 | { 162 | if ( *up == '\\' ) continue; /* ignore backslashes */ 163 | 164 | if ( index( fname, (int)*up ) ) 165 | { 166 | return 0; /* found something we can't stomach */ 167 | } 168 | } 169 | 170 | for ( up=fname; *up; up++ ) 171 | { 172 | if ( iscntrl( (int)*up ) ) 173 | { 174 | return 0; /* found something we can't see */ 175 | } 176 | } 177 | 178 | /* found nothing to the contrary, so we might just be a path */ 179 | 180 | if (DEBUG_ON) debug_print( "!!windows path possible: %s\n", fname ); 181 | 182 | return 1; 183 | } 184 | 185 | static unsigned char hex_digits[16] = { 186 | '0', '1', '2', '3', '4', '5', '6', '7', 187 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 188 | }; 189 | 190 | #define SLOP 4 /* a minimum rational buffer size */ 191 | 192 | static char * 193 | sanitize_filename( const char *fname ) 194 | { 195 | char *buf, *bp; 196 | const char *cp, *up; 197 | int flag, stet; 198 | 199 | if ( ( fname == NULL ) || ( *fname == '\0' ) ) 200 | { 201 | /* nothing to see here -- return small, zeroed buffer */ 202 | buf = CHECKED_XCALLOC( char, SLOP ); 203 | return buf; 204 | } 205 | 206 | /* 207 | sanitize the filename by modifying unsanitary or unsavory 208 | characters using the URL escape technique of c => %XX 209 | return a "fresh and freeable" buffer with the sanitary filename 210 | */ 211 | 212 | buf = CHECKED_XCALLOC( char, 3*strlen(fname)+SLOP ); 213 | bp = buf; 214 | 215 | for ( cp=fname; *cp; cp++ ) 216 | { 217 | flag = 0; 218 | 219 | while (1) 220 | { 221 | /* control chars */ 222 | 223 | if ( iscntrl( (int)*cp ) ) 224 | { 225 | stet = 0; 226 | break; 227 | } 228 | 229 | /* unsanitary windows chars */ 230 | 231 | for ( up=unsanitary_windows_chars; *up; up++ ) 232 | { 233 | if ( *cp == *up ) 234 | { 235 | flag = 1; 236 | stet = 0; 237 | break; /* for loop... */ 238 | } 239 | } 240 | 241 | if ( flag ) break; /* while loop... */ 242 | 243 | if ( UNIX_FS ) 244 | { 245 | /* non-ascii chars */ 246 | 247 | if ( !isascii( (int)*cp ) ) 248 | { 249 | stet = 0; 250 | break; 251 | } 252 | 253 | /* unsavory unix chars */ 254 | 255 | for ( up=unsavory_unix_chars; *up; up++ ) 256 | { 257 | if ( *cp == *up ) 258 | { 259 | flag = 1; 260 | stet = 0; 261 | break; /* for loop... */ 262 | } 263 | } 264 | 265 | if ( flag ) break; /* while loop... */ 266 | } 267 | 268 | /* escape the escaper */ 269 | 270 | if ( *cp == '%' ) 271 | { 272 | stet = 0; 273 | break; 274 | } 275 | 276 | /* nothing obvious */ 277 | 278 | stet = 1; 279 | break; 280 | } 281 | 282 | /* handle the char */ 283 | 284 | if ( stet ) 285 | { 286 | *bp++ = *cp; /* keep it */ 287 | } 288 | else 289 | { 290 | *bp++ = '%'; /* escape it */ 291 | *bp++ = hex_digits[ ((*cp)>>4)&0xf ]; 292 | *bp++ = hex_digits[ (*cp) &0xf ]; 293 | } 294 | } 295 | 296 | return buf; 297 | } 298 | 299 | /* 300 | pathname generator 301 | 302 | takes input filename and "sanitizes" it for filesystem use 303 | understands windows paths and unixisms 304 | returns NULL if totally unsavory filename 305 | returns (freeable) pointer to sanitized filename if at all palatable 306 | */ 307 | 308 | char * 309 | munge_fname( const char *fname ) 310 | { 311 | char *dir, *base, *p, *fpd, *fpb; 312 | 313 | /* If we were not given a filename give up */ 314 | if (!fname || *fname == '\0') 315 | { 316 | return NULL; 317 | } 318 | 319 | if ( USE_PATHS ) 320 | { 321 | /* evaluate windows path potential */ 322 | 323 | if ( could_be_a_windows_path( (char *)fname ) ) 324 | { 325 | /* split fname after last path separator */ 326 | 327 | dir = strdup( fname ); fpd = dir; 328 | base = rindex( dir, (int)'\\' ); 329 | base++; 330 | *base = '\0'; 331 | 332 | base = strdup( fname ); fpb = base; 333 | base = rindex( base, (int)'\\' ); 334 | base++; 335 | 336 | /* flip path separators */ 337 | 338 | for ( p=dir; *p; p++ ) 339 | { 340 | if ( *p == '\\' ) *p = '/'; 341 | } 342 | 343 | /* handle absolute path separators */ 344 | 345 | if ( *dir == '/' ) 346 | { 347 | if ( ABSOLUTE_OK ) 348 | { 349 | if (VERBOSE_ON) debug_print( "WARNING: absolute path: %s", fname ); 350 | if (DEBUG_ON) debug_print( "!!absolute path: %s", fname ); 351 | } 352 | else 353 | { 354 | if (VERBOSE_ON) debug_print( "WARNING: absolute path stripped: %s", fname ); 355 | if (DEBUG_ON) debug_print( "!!absolute path stripped: %s", fname ); 356 | 357 | while ( *dir == '/' ) dir++; 358 | 359 | if ( *dir == '\0' ) dir = NULL; /* nothing left */ 360 | } 361 | } 362 | } 363 | else 364 | { 365 | /* not recognized as a windows path */ 366 | 367 | dir = NULL; fpd = NULL; 368 | base = strdup( fname ); fpb = base; 369 | } 370 | } 371 | else 372 | { 373 | /* no paths allowed */ 374 | 375 | dir = NULL; fpd = NULL; 376 | base = strdup( fname ); fpb = base; 377 | } 378 | 379 | /* cleanup the basename */ 380 | 381 | base = sanitize_filename( base ); 382 | 383 | /* build a pathname out of the pieces */ 384 | 385 | p = concat_fname( dir, base ); 386 | 387 | /* cleanup and return */ 388 | 389 | XFREE( fpd ); /* free what we may have allocated */ 390 | XFREE( fpb ); /* free what we may have allocated */ 391 | XFREE( base ); /* free what the sanitizer allocated */ 392 | 393 | if ( p && ( *p == '\0' ) ) 394 | { 395 | XFREE( p ); /* free what we allocated and is no longer required */ 396 | p = NULL; 397 | } 398 | 399 | return p; 400 | } 401 | 402 | #ifdef __cplusplus 403 | } 404 | #endif -------------------------------------------------------------------------------- /oldsrc/path.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * path.h -- Utility functions for dealing with pathnames 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifndef PATH_H 28 | #define PATH_H 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | extern char * concat_fname (const char* fname1, const char* fname2); 35 | extern char * munge_fname (const char *fname); 36 | extern char * find_free_number (const char *fname); 37 | extern int file_exists (const char *fname); /* 1 = true, 0 = false */ 38 | 39 | #endif /* !PATH_H */ 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif -------------------------------------------------------------------------------- /oldsrc/rtf.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * rtf.c -- utility function for dealing with RTF content 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | * Commentary: 27 | * Entry point is get_rtf_data. All other functions are internal. 28 | */ 29 | 30 | #ifdef HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | #include "common.h" 35 | 36 | #include "alloc.h" 37 | #include "file.h" 38 | #include "mapi_attr.h" 39 | #include "path.h" 40 | #include "util.h" 41 | #include 42 | 43 | /* 44 | decompression algorithm 45 | [MS-OXRTFCP] v 2.0 april 10, 2009 46 | */ 47 | 48 | static const uint32 rtf_uncompressed_magic = 0x414c454d; 49 | static const uint32 rtf_compressed_magic = 0x75465a4c; 50 | 51 | static const char* rtf_prebuf = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\r\n\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"; 52 | 53 | /* 54 | crc table and crc generation algorithm 55 | [MS-OXRTFCP] v 2.0 april 10, 2009 56 | */ 57 | 58 | static uint32 crc_table[256] = 59 | { 60 | 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 61 | 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 62 | 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 63 | 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 64 | 65 | 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 66 | 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 67 | 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 68 | 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 69 | 70 | 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 71 | 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 72 | 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 73 | 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 74 | 75 | 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 76 | 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 77 | 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 78 | 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 79 | 80 | 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 81 | 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 82 | 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 83 | 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 84 | 85 | 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 86 | 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 87 | 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 88 | 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 89 | 90 | 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 91 | 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 92 | 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 93 | 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 94 | 95 | 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 96 | 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 97 | 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 98 | 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 99 | 100 | 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 101 | 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 102 | 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 103 | 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 104 | 105 | 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 106 | 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 107 | 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 108 | 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 109 | 110 | 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 111 | 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 112 | 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 113 | 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 114 | 115 | 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 116 | 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 117 | 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 118 | 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 119 | 120 | 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 121 | 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 122 | 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 123 | 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 124 | 125 | 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 126 | 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 127 | 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 128 | 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 129 | 130 | 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 131 | 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 132 | 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 133 | 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 134 | 135 | 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 136 | 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 137 | 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 138 | 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d 139 | }; 140 | 141 | static uint32 142 | generate_crc(unsigned char *data, size_t len) 143 | { 144 | uint32 crc = 0x00000000; 145 | uint32 i, j; 146 | unsigned char c; 147 | 148 | for ( i=0; i>8 ); 153 | } 154 | 155 | return crc; 156 | } 157 | 158 | static int 159 | is_rtf_data (unsigned char *data) 160 | { 161 | size_t compr_size = 0L; 162 | size_t uncompr_size = 0L; 163 | uint32 magic; 164 | size_t idx = 0; 165 | 166 | compr_size = GETINT32(data + idx); idx += 4; 167 | uncompr_size = GETINT32(data + idx); idx += 4; 168 | magic = GETINT32(data + idx); idx += 4; 169 | 170 | if ((magic == rtf_uncompressed_magic) 171 | || (magic == rtf_compressed_magic)) 172 | return 1; 173 | return 0; 174 | } 175 | 176 | unsigned char *decompress_rtf_data( unsigned char *src, size_t lenc, size_t lenu ) { 177 | const size_t rtf_prebuf_len = strlen( rtf_prebuf ); 178 | 179 | int woff, eoff, roff, rlen; 180 | int control, cin, cout, i, j, endflag; 181 | unsigned char *dest; 182 | unsigned char dict[4096]; 183 | 184 | /* setup dictionary */ 185 | 186 | memset( dict, 0x0, sizeof(dict) ); 187 | memmove( dict, rtf_prebuf, rtf_prebuf_len ); 188 | 189 | woff = rtf_prebuf_len; 190 | eoff = rtf_prebuf_len; 191 | 192 | /* setup destination */ 193 | 194 | assert( lenu>0 ); /* sanity check */ 195 | 196 | dest = CHECKED_XCALLOC(unsigned char, lenu); 197 | cout = 0; 198 | 199 | /* setup source */ 200 | 201 | assert( lenc>0 ); 202 | 203 | cin = 0; 204 | 205 | /* processing loop */ 206 | 207 | endflag = 0; 208 | 209 | while (1) 210 | { 211 | if ( endflag ) break; 212 | 213 | /* get control byte */ 214 | 215 | if ( cin+1 > lenc ) 216 | { 217 | endflag = -1; 218 | break; /* input overrun */ 219 | } 220 | 221 | control = (int)src[cin++]; 222 | 223 | /* handle control run */ 224 | 225 | for ( i=0; i<8; i++ ) 226 | { 227 | if ( endflag ) break; 228 | 229 | if ( control & (1< lenc ) 234 | { 235 | endflag = -1; 236 | break; /* input overrun */ 237 | } 238 | 239 | roff = (int)src[cin++]; 240 | rlen = (int)src[cin++]; 241 | 242 | roff = (roff<<4) + (rlen>>4); 243 | rlen = (rlen&0x0f) + 2; 244 | 245 | /* the one true exit test */ 246 | 247 | if ( roff == woff ) 248 | { 249 | endflag = 1; 250 | break; /* happy ending */ 251 | } 252 | 253 | /* handle reference */ 254 | 255 | if ( cout+rlen > lenu ) 256 | { 257 | endflag = -1; 258 | break; /* output overrun */ 259 | } 260 | 261 | for ( j=0; j lenc ) 276 | { 277 | endflag = -1; 278 | break; /* input overrun */ 279 | } 280 | 281 | if ( cout+1 > lenu ) 282 | { 283 | endflag = -1; 284 | break; /* output overrun */ 285 | } 286 | 287 | /* handle literal */ 288 | 289 | dest[cout++] = src[cin]; 290 | dict[woff++] = src[cin++]; 291 | 292 | woff &= 0xfff; 293 | if ( eoff < 4096 ) eoff++; 294 | } 295 | } 296 | } 297 | 298 | if ( endflag < 0 ) 299 | { 300 | fprintf( stderr, "RTF buffer overrun, input file may be corrupted\n" ); 301 | } 302 | 303 | return dest; 304 | } 305 | 306 | static void 307 | get_rtf_data_from_buf (size_t len, unsigned char *data, 308 | size_t *out_len, unsigned char **out_data) 309 | { 310 | size_t compr_size = 0L; 311 | size_t uncompr_size = 0L; 312 | uint32 magic; 313 | uint32 checksum; 314 | size_t idx = 0; 315 | 316 | compr_size = GETINT32(data + idx); idx += 4; 317 | uncompr_size = GETINT32(data + idx); idx += 4; 318 | magic = GETINT32(data + idx); idx += 4; 319 | checksum = GETINT32 (data + idx); idx += 4; 320 | 321 | /* sanity check */ 322 | assert (compr_size + 4 == len); 323 | 324 | (*out_len) = uncompr_size; 325 | 326 | if (magic == rtf_uncompressed_magic) /* uncompressed rtf stream */ 327 | { 328 | (*out_data) = CHECKED_XCALLOC(unsigned char, (*out_len)); 329 | memmove ((*out_data), data+4, uncompr_size); 330 | } 331 | else if (magic == rtf_compressed_magic) /* compressed rtf stream */ 332 | { 333 | if ( checksum == generate_crc( data+idx, len-idx ) ) 334 | { 335 | (*out_data) = decompress_rtf_data (data+idx, len-idx, uncompr_size); 336 | } 337 | else 338 | { 339 | (*out_data) = CHECKED_XCALLOC(unsigned char, 4); 340 | (*out_len) = 0; 341 | fprintf( stderr, "Invalid RTF CRC, input file may be corrupted\n" ); 342 | } 343 | } 344 | } 345 | 346 | VarLenData** 347 | get_rtf_data (MAPI_Attr *a) 348 | { 349 | VarLenData** body 350 | = (VarLenData**)CHECKED_XCALLOC(VarLenData*, a->num_values + 1); 351 | 352 | int j; 353 | for (j = 0; j < a->num_values; j++) 354 | { 355 | if (is_rtf_data (a->values[j].data.buf)) 356 | { 357 | body[j] = (VarLenData*)XMALLOC(VarLenData, 1); 358 | 359 | get_rtf_data_from_buf (a->values[j].len, 360 | a->values[j].data.buf, 361 | &body[j]->len, &body[j]->data); 362 | } 363 | } 364 | return body; 365 | } 366 | 367 | 368 | #ifdef __cplusplus 369 | } 370 | #endif -------------------------------------------------------------------------------- /oldsrc/rtf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * rtf.h -- utility functions for dealing with RTF content 3 | * 4 | * Copyright (C) 1999-2006 by Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | * Commentary: 22 | * 23 | */ 24 | 25 | #ifndef RTF_H 26 | #define RTF_H 27 | 28 | #if HAVE_CONFIG_H 29 | # include "config.h" 30 | #endif /* HAVE_CONFIG_H */ 31 | 32 | #include "mapi_attr.h" 33 | 34 | extern VarLenData** get_rtf_data (MAPI_Attr *attr); 35 | 36 | #endif /* RTF_H */ 37 | -------------------------------------------------------------------------------- /oldsrc/tnef.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * tnef.c -- extract files from microsoft TNEF format 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * Copyright (C)1997 Thomas Boll [ORIGINAL AUTHOR] 11 | * 12 | * This program is free software; you can redistribute it and/or modify 13 | * it under the terms of the GNU General Public License as published by 14 | * the Free Software Foundation; either version 2, or (at your option) 15 | * any later version. 16 | * 17 | * This program is distributed in the hope that it will be useful, 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | * GNU General Public License for more details. 21 | * 22 | * You should have received a copy of the GNU General Public License 23 | * along with this program; if not, you can either send email to this 24 | * program's maintainer or write to: The Free Software Foundation, 25 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 26 | * 27 | * Commentary: 28 | * scans tnef file and extracts all attachments 29 | * attachments are written to their original file-names if possible 30 | */ 31 | #ifdef HAVE_CONFIG_H 32 | #include "config.h" 33 | #endif /* HAVE_CONFIG_H */ 34 | 35 | #include "common.h" 36 | 37 | #include "tnef.h" 38 | 39 | #include "alloc.h" 40 | #include "attr.h" 41 | #include "debug.h" 42 | #include "file.h" 43 | #include "mapi_attr.h" 44 | #include "options.h" 45 | #include "path.h" 46 | #include "rtf.h" 47 | #include "util.h" 48 | 49 | #include 50 | #include 51 | 52 | static size_t filesize; 53 | 54 | typedef struct 55 | { 56 | VarLenData **text_body; 57 | VarLenData **html_bodies; 58 | VarLenData **rtf_bodies; 59 | } MessageBody; 60 | 61 | typedef enum 62 | { 63 | TEXT = 't', 64 | HTML = 'h', 65 | RTF = 'r' 66 | } MessageBodyTypes; 67 | 68 | /* Reads and decodes a object from the stream */ 69 | 70 | static Attr* 71 | read_object (FILE *in) 72 | { 73 | Attr *attr = NULL; 74 | 75 | /* peek to see if there is more to read from this stream */ 76 | int tmp_char = fgetc(in); 77 | if (tmp_char == -1) return NULL; 78 | ungetc(tmp_char, in); 79 | 80 | attr = attr_read (in); 81 | 82 | return attr; 83 | } 84 | 85 | static void 86 | free_bodies(VarLenData **bodies, int len) 87 | { 88 | while (len--) 89 | { 90 | XFREE(bodies[len]->data); 91 | XFREE(bodies[len]); 92 | } 93 | } 94 | 95 | static File** 96 | get_body_files (const char* filename, 97 | const char pref, 98 | const MessageBody* body) 99 | { 100 | File **files = NULL; 101 | VarLenData **data; 102 | char *ext = ""; 103 | char *type = "unknown"; 104 | int i; 105 | 106 | switch (pref) 107 | { 108 | case 'r': 109 | data = body->rtf_bodies; 110 | ext = ".rtf"; 111 | type = "text/rtf"; 112 | break; 113 | case 'h': 114 | data = body->html_bodies; 115 | ext = ".html"; 116 | type = "text/html"; 117 | break; 118 | case 't': 119 | data = body->text_body; 120 | ext = ".txt"; 121 | type = "text/plain"; 122 | break; 123 | default: 124 | data = NULL; 125 | break; 126 | } 127 | 128 | if (data) 129 | { 130 | int count = 0; 131 | char *tmp 132 | = CHECKED_XCALLOC(char, 133 | strlen(filename) + strlen(ext) + 1); 134 | strcpy (tmp, filename); 135 | strcat (tmp, ext); 136 | 137 | char *mime = CHECKED_XCALLOC(char, strlen(type) + 1); 138 | strcpy (mime, type); 139 | 140 | /* first get a count */ 141 | while (data[count++]); 142 | 143 | files = (File**)XCALLOC(File*, count + 1); 144 | for (i = 0; data[i]; i++) 145 | { 146 | files[i] = (File*)XCALLOC(File, 1); 147 | files[i]->name = tmp; 148 | files[i]->mime_type = mime; 149 | files[i]->len = data[i]->len; 150 | files[i]->data 151 | = CHECKED_XMALLOC(unsigned char, data[i]->len); 152 | memmove (files[i]->data, data[i]->data, data[i]->len); 153 | } 154 | } 155 | return files; 156 | } 157 | 158 | static VarLenData** 159 | get_text_data (Attr *attr) 160 | { 161 | VarLenData **body = XCALLOC(VarLenData*, 2); 162 | 163 | body[0] = XCALLOC(VarLenData, 1); 164 | body[0]->len = attr->len; 165 | body[0]->data = CHECKED_XCALLOC(unsigned char, attr->len); 166 | memmove (body[0]->data, attr->buf, attr->len); 167 | return body; 168 | } 169 | 170 | static VarLenData** 171 | get_html_data (MAPI_Attr *a) 172 | { 173 | VarLenData **body = XCALLOC(VarLenData*, a->num_values + 1); 174 | 175 | int j; 176 | for (j = 0; j < a->num_values; j++) 177 | { 178 | body[j] = XMALLOC(VarLenData, 1); 179 | body[j]->len = a->values[j].len; 180 | body[j]->data = CHECKED_XCALLOC(unsigned char, a->values[j].len); 181 | memmove (body[j]->data, a->values[j].data.buf, body[j]->len); 182 | } 183 | return body; 184 | } 185 | 186 | int 187 | data_left (FILE* input_file) 188 | { 189 | int retval = 1; 190 | 191 | if (feof(input_file)) retval = 0; 192 | else if (input_file != stdin) 193 | { 194 | /* check if there is enough data left */ 195 | struct stat statbuf; 196 | size_t pos, data_left; 197 | fstat (fileno(input_file), &statbuf); 198 | pos = ftell(input_file); 199 | data_left = (statbuf.st_size - pos); 200 | 201 | if (data_left > 0 && data_left < MINIMUM_ATTR_LENGTH) 202 | { 203 | if ( CRUFT_SKIP ) 204 | { 205 | /* look for specific flavor of cruft -- trailing "\r\n" */ 206 | 207 | if ( data_left == 2 ) 208 | { 209 | int c = fgetc( input_file ); 210 | 211 | if ( c < 0 ) /* this should never happen */ 212 | { 213 | fprintf( stderr, "ERROR: confused beyond all redemption.\n" ); 214 | exit (1); 215 | } 216 | 217 | ungetc( c, input_file ); 218 | 219 | if ( c == 0x0d ) /* test for "\r" part of "\r\n" */ 220 | { 221 | /* "trust" that next char is 0x0a and ignore this cruft */ 222 | 223 | if ( VERBOSE_ON ) 224 | fprintf( stderr, "WARNING: garbage at end of file (ignored)\n" ); 225 | 226 | if ( DEBUG_ON ) 227 | debug_print( "!!garbage at end of file (ignored)\n" ); 228 | } 229 | else 230 | { 231 | fprintf( stderr, "ERROR: garbage at end of file.\n" ); 232 | } 233 | } 234 | else 235 | { 236 | fprintf (stderr, "ERROR: garbage at end of file.\n"); 237 | } 238 | } 239 | else 240 | { 241 | fprintf (stderr, "ERROR: garbage at end of file.\n"); 242 | } 243 | 244 | retval = 0; 245 | } 246 | } 247 | return retval; 248 | } 249 | 250 | 251 | /* The entry point into this module. This parses an entire TNEF file. */ 252 | int 253 | parse_file (FILE* input_file, char* directory, 254 | char *body_filename, char *body_pref, int flags) { 255 | 256 | uint32 d; 257 | uint16 key; 258 | Attr *attr = NULL; 259 | File *file = NULL; 260 | int rtf_size = 0, html_size = 0; 261 | MessageBody body; 262 | memset (&body, '\0', sizeof (MessageBody)); 263 | 264 | /* store the program options in our file global variables */ 265 | g_flags = flags; 266 | 267 | /* check that this is in fact a TNEF file */ 268 | d = geti32(input_file); 269 | if (d != TNEF_SIGNATURE) { 270 | fprintf (stdout, "Seems not to be a TNEF file\n"); 271 | return 1; 272 | } 273 | 274 | /* Get the key */ 275 | key = geti16(input_file); 276 | debug_print ("TNEF Key: %hx\n", key); 277 | 278 | /* The rest of the file is a series of 'messages' and 'attachments' */ 279 | while ( data_left( input_file ) ) { 280 | 281 | attr = read_object( input_file ); 282 | 283 | if ( attr == NULL ) break; 284 | 285 | /* This signals the beginning of a file */ 286 | if (attr->name == attATTACHRENDDATA) { 287 | if (file) { 288 | file_write (file, directory); 289 | file_free (file); 290 | } else { 291 | file = CHECKED_XCALLOC (File, 1); 292 | } 293 | } 294 | 295 | /* Add the data to our lists. */ 296 | switch (attr->lvl_type) { 297 | case LVL_MESSAGE: 298 | if (attr->name == attBODY) { 299 | body.text_body = get_text_data (attr); 300 | } else if (attr->name == attMAPIPROPS) { 301 | MAPI_Attr **mapi_attrs 302 | = mapi_attr_read (attr->len, attr->buf); 303 | if (mapi_attrs) { 304 | int i; 305 | for (i = 0; mapi_attrs[i]; i++) { 306 | MAPI_Attr *a = mapi_attrs[i]; 307 | 308 | if (a->name == MAPI_BODY_HTML) { 309 | body.html_bodies = get_html_data (a); 310 | html_size = a->num_values; 311 | } else if (a->name == MAPI_RTF_COMPRESSED) { 312 | body.rtf_bodies = get_rtf_data (a); 313 | rtf_size = a->num_values; 314 | } 315 | } 316 | /* cannot save attributes to file, since they 317 | * are not attachment attributes */ 318 | /* file_add_mapi_attrs (file, mapi_attrs); */ 319 | mapi_attr_free_list (mapi_attrs); 320 | XFREE (mapi_attrs); 321 | } 322 | } 323 | break; 324 | case LVL_ATTACHMENT: 325 | file_add_attr (file, attr); 326 | break; 327 | default: 328 | fprintf (stderr, "Invalid lvl type on attribute: %d\n", 329 | attr->lvl_type); 330 | return 1; 331 | break; 332 | } 333 | attr_free (attr); 334 | XFREE (attr); 335 | } 336 | 337 | if (file) { 338 | file_write (file, directory); 339 | file_free (file); 340 | XFREE (file); 341 | } 342 | 343 | /* Write the message body */ 344 | if (flags & SAVEBODY) { 345 | int i = 0; 346 | int all_flag = 0; 347 | if (strcmp (body_pref, "all") == 0) { 348 | all_flag = 1; 349 | body_pref = (char *)"rht"; 350 | } 351 | 352 | for (; i < 3; i++) { 353 | File **files = get_body_files (body_filename, body_pref[i], &body); 354 | if (files) { 355 | int j = 0; 356 | for (; files[j]; j++) { 357 | file_write(files[j], directory); 358 | file_free (files[j]); 359 | XFREE(files[j]); 360 | } 361 | XFREE(files); 362 | if (!all_flag) break; 363 | } 364 | } 365 | } 366 | 367 | if (body.text_body) { 368 | free_bodies(body.text_body, 1); 369 | XFREE(body.text_body); 370 | } 371 | if (rtf_size > 0) { 372 | free_bodies(body.rtf_bodies, rtf_size); 373 | XFREE(body.rtf_bodies); 374 | } 375 | if (html_size > 0) { 376 | free_bodies(body.html_bodies, html_size); 377 | XFREE(body.html_bodies); 378 | } 379 | return 0; 380 | } 381 | 382 | #ifdef __cplusplus 383 | } 384 | #endif 385 | -------------------------------------------------------------------------------- /oldsrc/tnef.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * tnef.h -- extract files from Microsoft TNEF format. 8 | * 9 | * Copyright (C) 1999-2006 by Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | * Commentary: 27 | * Various defined values for decoding a TNEF file. 28 | */ 29 | #ifndef TNEF_H 30 | #define TNEF_H 31 | 32 | #if HAVE_CONFIG_H 33 | # include "config.h" 34 | #endif /* HAVE_CONFIG_H */ 35 | 36 | #include "common.h" 37 | 38 | /* TNEF signature. Equivalent to the magic cookie for a TNEF file. */ 39 | #define TNEF_SIGNATURE 0x223e9f78 40 | 41 | /* Main entrance point to tnef processing */ 42 | extern int 43 | parse_file(FILE *input, char * output_dir, 44 | char *body_file, char *body_pref, 45 | int flags); 46 | 47 | #endif /* !TNEF_H */ 48 | 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /oldsrc/tnef/mapi-types.data: -------------------------------------------------------------------------------- 1 | # mapi-types.data -- data file defining all types found in MAPI TNEF attachment 2 | # sym val name 3 | szMAPI_UNSPECIFIED 0x0000 MAPI Unspecified 4 | szMAPI_NULL 0x0001 MAPI null property 5 | szMAPI_SHORT 0x0002 MAPI short (signed 16 bits) 6 | szMAPI_INT 0x0003 MAPI integer (signed 32 bits) 7 | szMAPI_FLOAT 0x0004 MAPI float (4 bytes) 8 | szMAPI_DOUBLE 0x0005 MAPI double 9 | szMAPI_CURRENCY 0x0006 MAPI currency (64 bits) 10 | szMAPI_APPTIME 0x0007 MAPI application time 11 | szMAPI_ERROR 0x000a MAPI error (32 bits) 12 | szMAPI_BOOLEAN 0x000b MAPI boolean (16 bits) 13 | szMAPI_OBJECT 0x000d MAPI embedded object 14 | szMAPI_INT8BYTE 0x0014 MAPI 8 byte signed int 15 | szMAPI_STRING 0x001e MAPI string 16 | szMAPI_UNICODE_STRING 0x001f MAPI unicode-string (null terminated) 17 | szMAPI_SYSTIME 0x0040 MAPI time (64 bits) 18 | szMAPI_CLSID 0x0048 MAPI OLE GUID 19 | szMAPI_BINARY 0x0102 MAPI binary 20 | -------------------------------------------------------------------------------- /oldsrc/tnef/mkdata.awk: -------------------------------------------------------------------------------- 1 | # mkdata.awk - used to generate blah.[ch] from blah.data 2 | # 3 | # Copyright (C)1999-2006 Mark Simpson 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation; either version 2, or (at your option) 8 | # any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program; if not, you can either send email to this 17 | # program's maintainer or write to: The Free Software Foundation, 18 | # Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 19 | # 20 | # Commentary: 21 | # 22 | # the file x.data was given on the command line 23 | # the output of this script is redirected to the right file 24 | # the environment variable TAG is used for the function name and comments 25 | # 26 | 27 | # 28 | # Print headers and such. 29 | # 30 | BEGIN { 31 | HFILE = sprintf ("%ss.h", ENVIRON["TAG"]); 32 | HFILEDEF = HFILE; 33 | HFILEDEF = toupper(HFILEDEF); 34 | gsub("\\.","_",HFILEDEF); 35 | CFILE = sprintf ("%ss.c", ENVIRON["TAG"]); 36 | printf "/* This file generated by names.awk do not edit! */\n" > HFILE; 37 | printf "#ifndef %s\n", HFILEDEF > HFILE; 38 | printf "#define %s\n", HFILEDEF > HFILE; 39 | printf "enum _%s {\n", ENVIRON["TAG"] > HFILE; 40 | 41 | printf "/* This file generated by names.awk do not edit! */\n" > CFILE; 42 | printf "#ifdef HAVE_CONFIG_H\n" > CFILE; 43 | printf "#include \"config.h\"\n" > CFILE; 44 | printf "#endif\n" > CFILE; 45 | printf "#include \"common.h\"\n" > CFILE; 46 | printf "#include \"%s\"\n", HFILE > CFILE; 47 | printf "char*\n" > CFILE; 48 | printf "get_%s_str(uint16 d)\n", ENVIRON["TAG"] > CFILE; 49 | printf "{\n" > CFILE; 50 | printf " static char buf[128];\n" > CFILE; 51 | printf " static char* str;\n" > CFILE; 52 | printf " switch(d) {\n" > CFILE; 53 | } 54 | 55 | # 56 | # deal with each line if it doesn't start with a '#' 57 | # 58 | /^[^#]/ { 59 | NAME = "" 60 | for (i = 3; i < NF + 1; i=i+1) { 61 | if ( NAME ) { 62 | NAME = sprintf ("%s %s", NAME, $(i)) 63 | } else { 64 | NAME = $(i) 65 | } 66 | } 67 | 68 | # printf "#define %-10s\t%s\t/* %s */\n", $1, $2, NAME > HFILE; 69 | printf "\t%-45s = %s,\n", $1, $2 > HFILE 70 | 71 | printf " case %s:\n", $1 > CFILE; 72 | printf " str=\"%s\";\n", NAME > CFILE; 73 | printf " break;\n" > CFILE; 74 | } 75 | 76 | # 77 | # Finish off the files 78 | # 79 | END { 80 | printf "};\n" > HFILE; 81 | printf "typedef enum _%s %s;\n", ENVIRON["TAG"], ENVIRON["TAG"] > HFILE; 82 | printf "extern char*\n" > HFILE; 83 | printf "get_%s_str(uint16 d);\n", ENVIRON["TAG"] > HFILE; 84 | printf "#endif /* %s */\n", HFILEDEF > HFILE; 85 | 86 | printf " default:\n" > CFILE; 87 | printf " str=NULL;\n" > CFILE; 88 | printf " break;\n" > CFILE; 89 | printf " }\n" > CFILE; 90 | printf " if ( str )\n" > CFILE; 91 | printf " {\n" > CFILE; 92 | printf " sprintf(buf,\"%%s <%%04x>\",str,d);\n" > CFILE; 93 | printf " }\n" > CFILE; 94 | printf " else\n" > CFILE; 95 | printf " {\n" > CFILE; 96 | printf " sprintf(buf,\"%%04x\",d);\n" > CFILE; 97 | printf " }\n" > CFILE; 98 | printf " return buf;\n" > CFILE; 99 | printf "}\n" > CFILE; 100 | } 101 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.deps/basename.Po: -------------------------------------------------------------------------------- 1 | # dummy 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.deps/dummy.Po: -------------------------------------------------------------------------------- 1 | dummy.o: dummy.c 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.deps/getopt_long.Po: -------------------------------------------------------------------------------- 1 | # dummy 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.deps/malloc.Po: -------------------------------------------------------------------------------- 1 | # dummy 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.deps/strdup.Po: -------------------------------------------------------------------------------- 1 | # dummy 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/.gitignore: -------------------------------------------------------------------------------- 1 | libreplace.a 2 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/Makefile.am: -------------------------------------------------------------------------------- 1 | noinst_LIBRARIES = libreplace.a 2 | 3 | libreplace_a_SOURCES = dummy.c 4 | 5 | libreplace_a_LIBADD = @LIBOBJS@ 6 | 7 | EXTRA_DIST = getopt_long.h 8 | 9 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/basename.c: -------------------------------------------------------------------------------- 1 | /* 2 | * basename.c -- basename function for platforms without 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #if HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #if HAVE_STRING_H 27 | # include 28 | #elif HAVE_STRINGS_H 29 | # include 30 | #endif 31 | 32 | #if !HAVE_STRRCHR 33 | # ifdef strrchr 34 | # define strrchr rindex 35 | # endif 36 | #endif 37 | 38 | /* works like basename(1) (NOTE: the returned pointer must not be freed! */ 39 | char* 40 | basename (char* path) 41 | { 42 | char *ptr = strrchr (path, '/'); 43 | return ptr ? ++ptr : (char*)path; 44 | } 45 | 46 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/dummy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * dummy function to ensure that libreplace.a is never empty since 3 | * that is non-portable. 4 | */ 5 | void 6 | ___dummy___() 7 | {} 8 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/getopt_long.h: -------------------------------------------------------------------------------- 1 | /* This code snithced from GNUEmacs 20.5.1 with only minor mods */ 2 | /* Declarations for getopt. 3 | Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc. 4 | 5 | NOTE: The canonical source of this file is maintained with the GNU C Library. 6 | Bugs can be reported to bug-glibc@gnu.org. 7 | 8 | This program is free software; you can redistribute it and/or modify it 9 | under the terms of the GNU General Public License as published by the 10 | Free Software Foundation; either version 2, or (at your option) any 11 | later version. 12 | 13 | This program is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with this program; if not, write to the Free Software 20 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 21 | USA. */ 22 | 23 | #ifndef _GETOPT_H 24 | #define _GETOPT_H 1 25 | 26 | #ifdef HAVE_CONFIG_H 27 | #include "config.h" 28 | #endif /* HAVE_CONFIG_H */ 29 | #ifndef HAVE_GETOPT_LONG 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | /* For communication from `getopt' to the caller. 36 | When `getopt' finds an option that takes an argument, 37 | the argument value is returned here. 38 | Also, when `ordering' is RETURN_IN_ORDER, 39 | each non-option ARGV-element is returned here. */ 40 | 41 | extern char *optarg; 42 | 43 | /* Index in ARGV of the next element to be scanned. 44 | This is used for communication to and from the caller 45 | and for communication between successive calls to `getopt'. 46 | 47 | On entry to `getopt', zero means this is the first call; initialize. 48 | 49 | When `getopt' returns -1, this is the index of the first of the 50 | non-option elements that the caller should itself scan. 51 | 52 | Otherwise, `optind' communicates from one call to the next 53 | how much of ARGV has been scanned so far. */ 54 | 55 | extern int optind; 56 | 57 | /* Callers store zero here to inhibit the error message `getopt' prints 58 | for unrecognized options. */ 59 | 60 | extern int opterr; 61 | 62 | /* Set to an option character which was unrecognized. */ 63 | 64 | extern int optopt; 65 | 66 | /* Describe the long-named options requested by the application. 67 | The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector 68 | of `struct option' terminated by an element containing a name which is 69 | zero. 70 | 71 | The field `has_arg' is: 72 | no_argument (or 0) if the option does not take an argument, 73 | required_argument (or 1) if the option requires an argument, 74 | optional_argument (or 2) if the option takes an optional argument. 75 | 76 | If the field `flag' is not NULL, it points to a variable that is set 77 | to the value given in the field `val' when the option is found, but 78 | left unchanged if the option is not found. 79 | 80 | To have a long-named option do something other than set an `int' to 81 | a compiled-in constant, such as set a value from `optarg', set the 82 | option's `flag' field to zero and its `val' field to a nonzero 83 | value (the equivalent single-letter option character, if there is 84 | one). For long options that have a zero `flag' field, `getopt' 85 | returns the contents of the `val' field. */ 86 | 87 | struct option 88 | { 89 | #if defined (__STDC__) && __STDC__ 90 | const char *name; 91 | #else 92 | char *name; 93 | #endif 94 | /* has_arg can't be an enum because some compilers complain about 95 | type mismatches in all the code that assumes it is an int. */ 96 | int has_arg; 97 | int *flag; 98 | int val; 99 | }; 100 | 101 | /* Names for the values of the `has_arg' field of `struct option'. */ 102 | 103 | #define no_argument 0 104 | #define required_argument 1 105 | #define optional_argument 2 106 | 107 | #if defined (__STDC__) && __STDC__ 108 | #ifdef __GNU_LIBRARY__ 109 | /* Many other libraries have conflicting prototypes for getopt, with 110 | differences in the consts, in stdlib.h. To avoid compilation 111 | errors, only prototype getopt for the GNU C library. */ 112 | extern int getopt (int argc, char *const *argv, const char *shortopts); 113 | #else /* not __GNU_LIBRARY__ */ 114 | extern int getopt (); 115 | #endif /* __GNU_LIBRARY__ */ 116 | extern int getopt_long (int argc, char *const *argv, const char *shortopts, 117 | const struct option *longopts, int *longind); 118 | extern int getopt_long_only (int argc, char *const *argv, 119 | const char *shortopts, 120 | const struct option *longopts, int *longind); 121 | 122 | /* Internal only. Users should not call this directly. */ 123 | extern int _getopt_internal (int argc, char *const *argv, 124 | const char *shortopts, 125 | const struct option *longopts, int *longind, 126 | int long_only); 127 | #else /* not __STDC__ */ 128 | extern int getopt (); 129 | extern int getopt_long (); 130 | extern int getopt_long_only (); 131 | 132 | extern int _getopt_internal (); 133 | #endif /* __STDC__ */ 134 | 135 | #ifdef __cplusplus 136 | } 137 | #endif 138 | 139 | #endif /* !HAVE_GETOPT_LONG */ 140 | 141 | #endif /* getopt.h */ 142 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/malloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * malloc.c -- replacement malloc function if provided malloc does not 3 | * handle malloc(0) well 4 | * 5 | * Copyright (C)1999-2006 Mark Simpson 6 | * 7 | * This program is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2, or (at your option) 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program; if not, you can either send email to this 19 | * program's maintainer or write to: The Free Software Foundation, 20 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 21 | * 22 | */ 23 | #if HAVE_CONFIG_H 24 | # include "config.h" 25 | #endif 26 | #undef malloc 27 | 28 | #include 29 | 30 | char *malloc (); 31 | 32 | /* Allocate an N-byte block of memory from the heap. 33 | If N is zero, allocate a 1-byte block. */ 34 | 35 | char * 36 | rpl_malloc (size_t n) 37 | { 38 | if (n == 0) 39 | n = 1; 40 | return malloc (n); 41 | } 42 | -------------------------------------------------------------------------------- /oldsrc/tnef/replace/strdup.c: -------------------------------------------------------------------------------- 1 | /* 2 | * strdup.c -- version of strdup for systems without one 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #if !HAVE_STRDUP 27 | #include 28 | #include 29 | 30 | #if STDC_HEADERS 31 | # include 32 | #else 33 | extern size_t strlen (const char *); 34 | 35 | # if !HAVE_MEMMOVE 36 | # define memmove(d,s,n) bcopy((s),(d),(n)); 37 | # else 38 | extern void* memmove (void *, const void *, size_t); 39 | # endif 40 | #endif 41 | 42 | char * 43 | strdup (const char *str) 44 | { 45 | size_t len = strlen(str); 46 | char *out = malloc ((len+1) * sizeof (char)); 47 | memmove (out, str, (len + 1)); 48 | return out; 49 | } 50 | #endif /* !HAVE_STRDUP */ 51 | 52 | -------------------------------------------------------------------------------- /oldsrc/tnef/tnef-names.data: -------------------------------------------------------------------------------- 1 | # names.data -- data file defining names of all attributes found in TNEF file 2 | # sym val name 3 | attOWNER 0x0000 Owner 4 | attSENTFOR 0x0001 Sent For 5 | attDELEGATE 0x0002 Delegate 6 | attDATESTART 0x0006 Date Start 7 | attDATEEND 0x0007 Date End 8 | attAIDOWNER 0x0008 Owner Appointment ID 9 | attREQUESTRES 0x0009 Response Requested. 10 | attFROM 0x8000 From 11 | attSUBJECT 0x8004 Subject 12 | attDATESENT 0x8005 Date Sent 13 | attDATERECD 0x8006 Date Received 14 | attMESSAGESTATUS 0x8007 Message Status 15 | attMESSAGECLASS 0x8008 Message Class 16 | attMESSAGEID 0x8009 Message ID 17 | attPARENTID 0x800a Parent ID 18 | attCONVERSATIONID 0x800b Conversation ID 19 | attBODY 0x800c Body 20 | attPRIORITY 0x800d Priority 21 | attATTACHDATA 0x800f Attachment Data 22 | attATTACHTITLE 0x8010 Attachment File Name 23 | attATTACHMETAFILE 0x8011 Attachment Meta File 24 | attATTACHCREATEDATE 0x8012 Attachment Creation Date 25 | attATTACHMODIFYDATE 0x8013 Attachment Modification Date 26 | attDATEMODIFY 0x8020 Date Modified 27 | attATTACHTRANSPORTFILENAME 0x9001 Attachment Transport Filename 28 | attATTACHRENDDATA 0x9002 Attachment Rendering Data 29 | attMAPIPROPS 0x9003 MAPI Properties 30 | attRECIPTABLE 0x9004 Recipients 31 | attATTACHMENT 0x9005 Attachment 32 | attTNEFVERSION 0x9006 TNEF Version 33 | attOEMCODEPAGE 0x9007 OEM Codepage 34 | attORIGNINALMESSAGECLASS 0x9008 Original Message Class 35 | -------------------------------------------------------------------------------- /oldsrc/tnef/tnef-types.data: -------------------------------------------------------------------------------- 1 | # types.data -- data file defining all types found in TNEF attachment 2 | # sym val name 3 | szTRIPLES 0x0000 triples 4 | szSTRING 0x0001 string 5 | szTEXT 0x0002 text 6 | szDATE 0x0003 date 7 | szSHORT 0x0004 short 8 | szLONG 0x0005 long 9 | szBYTE 0x0006 byte 10 | szWORD 0x0007 word 11 | szDWORD 0x0008 dword 12 | szMAX 0x0009 max 13 | -------------------------------------------------------------------------------- /oldsrc/tnef_names.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifdef HAVE_CONFIG_H 8 | #include "config.h" 9 | #endif 10 | #include "common.h" 11 | #include "tnef_names.h" 12 | char* 13 | get_tnef_name_str(uint16 d) 14 | { 15 | static char buf[128]; 16 | static char* str; 17 | switch(d) { 18 | case attOWNER: 19 | str="Owner"; 20 | break; 21 | case attSENTFOR: 22 | str="Sent For"; 23 | break; 24 | case attDELEGATE: 25 | str="Delegate"; 26 | break; 27 | case attDATESTART: 28 | str="Date Start"; 29 | break; 30 | case attDATEEND: 31 | str="Date End"; 32 | break; 33 | case attAIDOWNER: 34 | str="Owner Appointment ID"; 35 | break; 36 | case attREQUESTRES: 37 | str="Response Requested."; 38 | break; 39 | case attFROM: 40 | str="From"; 41 | break; 42 | case attSUBJECT: 43 | str="Subject"; 44 | break; 45 | case attDATESENT: 46 | str="Date Sent"; 47 | break; 48 | case attDATERECD: 49 | str="Date Received"; 50 | break; 51 | case attMESSAGESTATUS: 52 | str="Message Status"; 53 | break; 54 | case attMESSAGECLASS: 55 | str="Message Class"; 56 | break; 57 | case attMESSAGEID: 58 | str="Message ID"; 59 | break; 60 | case attPARENTID: 61 | str="Parent ID"; 62 | break; 63 | case attCONVERSATIONID: 64 | str="Conversation ID"; 65 | break; 66 | case attBODY: 67 | str="Body"; 68 | break; 69 | case attPRIORITY: 70 | str="Priority"; 71 | break; 72 | case attATTACHDATA: 73 | str="Attachment Data"; 74 | break; 75 | case attATTACHTITLE: 76 | str="Attachment File Name"; 77 | break; 78 | case attATTACHMETAFILE: 79 | str="Attachment Meta File"; 80 | break; 81 | case attATTACHCREATEDATE: 82 | str="Attachment Creation Date"; 83 | break; 84 | case attATTACHMODIFYDATE: 85 | str="Attachment Modification Date"; 86 | break; 87 | case attDATEMODIFY: 88 | str="Date Modified"; 89 | break; 90 | case attATTACHTRANSPORTFILENAME: 91 | str="Attachment Transport Filename"; 92 | break; 93 | case attATTACHRENDDATA: 94 | str="Attachment Rendering Data"; 95 | break; 96 | case attMAPIPROPS: 97 | str="MAPI Properties"; 98 | break; 99 | case attRECIPTABLE: 100 | str="Recipients"; 101 | break; 102 | case attATTACHMENT: 103 | str="Attachment"; 104 | break; 105 | case attTNEFVERSION: 106 | str="TNEF Version"; 107 | break; 108 | case attOEMCODEPAGE: 109 | str="OEM Codepage"; 110 | break; 111 | case attORIGNINALMESSAGECLASS: 112 | str="Original Message Class"; 113 | break; 114 | default: 115 | str=NULL; 116 | break; 117 | } 118 | if ( str ) 119 | { 120 | sprintf(buf,"%s <%04x>",str,d); 121 | } 122 | else 123 | { 124 | sprintf(buf,"%04x",d); 125 | } 126 | return buf; 127 | } 128 | 129 | #ifdef __cplusplus 130 | } 131 | #endif -------------------------------------------------------------------------------- /oldsrc/tnef_names.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifndef TNEF_NAMES_H 8 | #define TNEF_NAMES_H 9 | enum _tnef_name { 10 | attOWNER = 0x0000, 11 | attSENTFOR = 0x0001, 12 | attDELEGATE = 0x0002, 13 | attDATESTART = 0x0006, 14 | attDATEEND = 0x0007, 15 | attAIDOWNER = 0x0008, 16 | attREQUESTRES = 0x0009, 17 | attFROM = 0x8000, 18 | attSUBJECT = 0x8004, 19 | attDATESENT = 0x8005, 20 | attDATERECD = 0x8006, 21 | attMESSAGESTATUS = 0x8007, 22 | attMESSAGECLASS = 0x8008, 23 | attMESSAGEID = 0x8009, 24 | attPARENTID = 0x800a, 25 | attCONVERSATIONID = 0x800b, 26 | attBODY = 0x800c, 27 | attPRIORITY = 0x800d, 28 | attATTACHDATA = 0x800f, 29 | attATTACHTITLE = 0x8010, 30 | attATTACHMETAFILE = 0x8011, 31 | attATTACHCREATEDATE = 0x8012, 32 | attATTACHMODIFYDATE = 0x8013, 33 | attDATEMODIFY = 0x8020, 34 | attATTACHTRANSPORTFILENAME = 0x9001, 35 | attATTACHRENDDATA = 0x9002, 36 | attMAPIPROPS = 0x9003, 37 | attRECIPTABLE = 0x9004, 38 | attATTACHMENT = 0x9005, 39 | attTNEFVERSION = 0x9006, 40 | attOEMCODEPAGE = 0x9007, 41 | attORIGNINALMESSAGECLASS = 0x9008, 42 | }; 43 | typedef enum _tnef_name tnef_name; 44 | extern char* 45 | get_tnef_name_str(uint16 d); 46 | 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | #endif /* TNEF_NAMES_H */ 52 | -------------------------------------------------------------------------------- /oldsrc/tnef_types.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifdef HAVE_CONFIG_H 8 | #include "config.h" 9 | #endif 10 | #include "common.h" 11 | #include "tnef_types.h" 12 | char* 13 | get_tnef_type_str(uint16 d) 14 | { 15 | static char buf[128]; 16 | static char* str; 17 | switch(d) { 18 | case szTRIPLES: 19 | str="triples"; 20 | break; 21 | case szSTRING: 22 | str="string"; 23 | break; 24 | case szTEXT: 25 | str="text"; 26 | break; 27 | case szDATE: 28 | str="date"; 29 | break; 30 | case szSHORT: 31 | str="short"; 32 | break; 33 | case szLONG: 34 | str="long"; 35 | break; 36 | case szBYTE: 37 | str="byte"; 38 | break; 39 | case szWORD: 40 | str="word"; 41 | break; 42 | case szDWORD: 43 | str="dword"; 44 | break; 45 | case szMAX: 46 | str="max"; 47 | break; 48 | default: 49 | str=NULL; 50 | break; 51 | } 52 | if ( str ) 53 | { 54 | sprintf(buf,"%s <%04x>",str,d); 55 | } 56 | else 57 | { 58 | sprintf(buf,"%04x",d); 59 | } 60 | return buf; 61 | } 62 | 63 | #ifdef __cplusplus 64 | } 65 | #endif -------------------------------------------------------------------------------- /oldsrc/tnef_types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* This file generated by names.awk do not edit! */ 7 | #ifndef TNEF_TYPES_H 8 | #define TNEF_TYPES_H 9 | enum _tnef_type { 10 | szTRIPLES = 0x0000, 11 | szSTRING = 0x0001, 12 | szTEXT = 0x0002, 13 | szDATE = 0x0003, 14 | szSHORT = 0x0004, 15 | szLONG = 0x0005, 16 | szBYTE = 0x0006, 17 | szWORD = 0x0007, 18 | szDWORD = 0x0008, 19 | szMAX = 0x0009, 20 | }; 21 | typedef enum _tnef_type tnef_type; 22 | extern char* 23 | get_tnef_type_str(uint16 d); 24 | 25 | 26 | #endif /* TNEF_TYPES_H */ 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif -------------------------------------------------------------------------------- /oldsrc/write.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | #if HAVE_CONFIG_H 7 | # include "config.h" 8 | #endif 9 | 10 | #include "common.h" 11 | #include "attr.h" 12 | #include "mapi_attr.h" 13 | 14 | /* Format Strings */ 15 | #define UINT8_FMT "%u" 16 | #define INT8_FMT "%d" 17 | #define UINT16_FMT "%hu" 18 | #define INT16_FMT "%hd" 19 | #if (SIZEOF_INT == 2) 20 | #define UINT32_FMT "%lu" 21 | #define INT32_FMT "%ld" 22 | #else 23 | #define UINT32_FMT "%u" 24 | #define INT32_FMT "%d" 25 | #endif /* (SIZEOF_INT == 2) */ 26 | 27 | void 28 | write_uint8 (FILE* fp, uint8 b) 29 | { 30 | fprintf (fp, UINT8_FMT, b); 31 | } 32 | 33 | void 34 | write_uint16 (FILE* fp, uint16 s) 35 | { 36 | fprintf (fp, UINT16_FMT, s); 37 | } 38 | 39 | void 40 | write_uint32 (FILE* fp, uint32 l) 41 | { 42 | fprintf (fp, UINT32_FMT, l); 43 | } 44 | 45 | void 46 | write_int8 (FILE* fp, int8 b) 47 | { 48 | fprintf (fp, INT8_FMT, b); 49 | } 50 | 51 | void 52 | write_int16 (FILE* fp, int16 s) 53 | { 54 | fprintf (fp, INT16_FMT, s); 55 | } 56 | 57 | void 58 | write_int32 (FILE* fp, int32 l) 59 | { 60 | fprintf (fp, INT32_FMT, l); 61 | } 62 | 63 | void 64 | write_float (FILE *fp, float f) 65 | { 66 | fprintf (fp, "%f", f); 67 | } 68 | 69 | void 70 | write_string (FILE *fp, const char *s) 71 | { 72 | fprintf (fp, "'%s'", s); 73 | } 74 | 75 | void 76 | write_byte (FILE* fp, uint8 b) 77 | { 78 | fprintf (fp, "0x%02x", b); 79 | } 80 | 81 | void 82 | write_word (FILE *fp, uint16 w) 83 | { 84 | fprintf (fp, "0x%04x", w); 85 | } 86 | 87 | void 88 | write_dword (FILE *fp, uint32 dw) 89 | { 90 | fprintf (fp, "0x%08x", dw); 91 | } 92 | 93 | void 94 | write_date (FILE *fp, struct date* dt) 95 | { 96 | fprintf (fp, "%s", date_to_str (dt)); 97 | } 98 | 99 | void 100 | write_triple (FILE* fp, TRIPLE* triple) 101 | { 102 | fprintf (fp, 103 | "{id=" UINT16_FMT 104 | ",chgtrp=" UINT16_FMT 105 | ",cch=" UINT16_FMT 106 | ",cb=" UINT16_FMT "} " 107 | "sender_display_name='%s', " 108 | "sender_address='%s'", 109 | triple->trp.id, 110 | triple->trp.chbgtrp, 111 | triple->trp.cch, 112 | triple->trp.cb, 113 | triple->sender_display_name, 114 | triple->sender_address); 115 | } 116 | 117 | void 118 | write_boolean (FILE *fp, uint16 b) 119 | { 120 | fprintf (fp, "%s", ((b == 0) ? "false" : "true")); 121 | } 122 | 123 | void 124 | write_uint64 (FILE *fp, uint32 bytes[2]) 125 | { 126 | fprintf (fp, "0x%08x 0x%08x", bytes[0], bytes[1]); 127 | } 128 | 129 | void 130 | write_guid (FILE *fp, GUID *guid) 131 | { 132 | int j; 133 | fprintf (fp, "{ 0x%04x 0x%02x 0x%02x { ", 134 | guid->data1, guid->data2, guid->data3); 135 | for (j = 0; j < 8; j++) 136 | { 137 | write_byte (fp, guid->data4[j]); 138 | fprintf (fp, " "); 139 | } 140 | fprintf (fp, "}"); 141 | } 142 | 143 | #ifdef __cplusplus 144 | } 145 | #endif -------------------------------------------------------------------------------- /oldsrc/write.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | #ifndef WRITE_H 7 | #define WRITE_H 1 8 | 9 | #include "common.h" 10 | #include "attr.h" 11 | #include "mapi_attr.h" 12 | 13 | extern void write_uint8 (FILE* fp, uint8 b); 14 | extern void write_uint16 (FILE* fp, uint16 s); 15 | extern void write_uint32 (FILE* fp, uint32 l); 16 | extern void write_int8 (FILE* fp, int8 b); 17 | extern void write_int16 (FILE* fp, int16 s); 18 | extern void write_int32 (FILE* fp, int32 l); 19 | extern void write_float (FILE *fp, float f); 20 | extern void write_string(FILE* fp, const char *s); 21 | extern void write_byte (FILE* fp, uint8 b); 22 | extern void write_word (FILE *fp, uint16 w); 23 | extern void write_dword (FILE *fp, uint32 dw); 24 | extern void write_date (FILE *fp, struct date* dt); 25 | extern void write_triple (FILE* fp, TRIPLE *triple); 26 | extern void write_boolean (FILE *fp, uint16 b); 27 | extern void write_uint64 (FILE *fp, uint32 bytes[2]); 28 | extern void write_guid (FILE *fp, GUID *guid); 29 | 30 | #endif /* WRITE_H */ 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_CXXFLAGS = 3 | PKG_LIBS = -L. 4 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // is_rtf 9 | bool is_rtf(RawVector v); 10 | RcppExport SEXP _msgxtractr_is_rtf(SEXP vSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< RawVector >::type v(vSEXP); 15 | rcpp_result_gen = Rcpp::wrap(is_rtf(v)); 16 | return rcpp_result_gen; 17 | END_RCPP 18 | } 19 | // decode_rtf 20 | RawVector decode_rtf(RawVector v); 21 | RcppExport SEXP _msgxtractr_decode_rtf(SEXP vSEXP) { 22 | BEGIN_RCPP 23 | Rcpp::RObject rcpp_result_gen; 24 | Rcpp::RNGScope rcpp_rngScope_gen; 25 | Rcpp::traits::input_parameter< RawVector >::type v(vSEXP); 26 | rcpp_result_gen = Rcpp::wrap(decode_rtf(v)); 27 | return rcpp_result_gen; 28 | END_RCPP 29 | } 30 | // int_read_msg 31 | List int_read_msg(std::string path); 32 | RcppExport SEXP _msgxtractr_int_read_msg(SEXP pathSEXP) { 33 | BEGIN_RCPP 34 | Rcpp::RObject rcpp_result_gen; 35 | Rcpp::RNGScope rcpp_rngScope_gen; 36 | Rcpp::traits::input_parameter< std::string >::type path(pathSEXP); 37 | rcpp_result_gen = Rcpp::wrap(int_read_msg(path)); 38 | return rcpp_result_gen; 39 | END_RCPP 40 | } 41 | 42 | static const R_CallMethodDef CallEntries[] = { 43 | {"_msgxtractr_is_rtf", (DL_FUNC) &_msgxtractr_is_rtf, 1}, 44 | {"_msgxtractr_decode_rtf", (DL_FUNC) &_msgxtractr_decode_rtf, 1}, 45 | {"_msgxtractr_int_read_msg", (DL_FUNC) &_msgxtractr_int_read_msg, 1}, 46 | {NULL, NULL, 0} 47 | }; 48 | 49 | RcppExport void R_init_msgxtractr(DllInfo *dll) { 50 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 51 | R_useDynamicSymbols(dll, FALSE); 52 | } 53 | -------------------------------------------------------------------------------- /src/alloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * alloc.c -- Useful allocation function/defintions 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | #include "alloc.h" 32 | 33 | static size_t alloc_limit = 0; 34 | 35 | void 36 | set_alloc_limit (size_t size) 37 | { 38 | alloc_limit = size; 39 | } 40 | 41 | size_t 42 | get_alloc_limit() 43 | { 44 | return alloc_limit; 45 | } 46 | 47 | static void 48 | alloc_limit_failure (char *fn_name, size_t size) 49 | { 50 | // fprintf (stderr, 51 | // "%s: Maximum allocation size exceeded " 52 | // "(maxsize = %lu; size = %lu).\n", 53 | // fn_name, 54 | // (unsigned long)alloc_limit, 55 | // (unsigned long)size); 56 | } 57 | 58 | void 59 | alloc_limit_assert (char *fn_name, size_t size) 60 | { 61 | if (alloc_limit && size > alloc_limit) 62 | { 63 | alloc_limit_failure (fn_name, size); 64 | //exit (-1); 65 | } 66 | } 67 | 68 | /* attempts to malloc memory, if fails print error and call abort */ 69 | void* 70 | xmalloc (size_t size) 71 | { 72 | void *ptr = malloc (size); 73 | if (!ptr 74 | && (size != 0)) /* some libc don't like size == 0 */ 75 | { 76 | perror ("xmalloc: Memory allocation failure"); 77 | //abort(); 78 | } 79 | return ptr; 80 | } 81 | 82 | /* Allocates memory but only up to a limit */ 83 | void* 84 | checked_xmalloc (size_t size) 85 | { 86 | alloc_limit_assert ("checked_xmalloc", size); 87 | return xmalloc (size); 88 | } 89 | 90 | /* xmallocs memory and clears it out */ 91 | void* 92 | xcalloc (size_t num, size_t size) 93 | { 94 | void *ptr = malloc(num * size); 95 | if (ptr) 96 | { 97 | memset (ptr, '\0', (num * size)); 98 | } 99 | return ptr; 100 | } 101 | 102 | /* xcallocs memory but only up to a limit */ 103 | void* 104 | checked_xcalloc (size_t num, size_t size) 105 | { 106 | alloc_limit_assert ("checked_xcalloc", (num *size)); 107 | return xcalloc (num, size); 108 | } 109 | 110 | 111 | #ifdef __cplusplus 112 | } 113 | #endif 114 | 115 | -------------------------------------------------------------------------------- /src/alloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * alloc.h -- Useful allocation function/defintions 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef ALLOC_H 23 | #define ALLOC_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | #include "common.h" 35 | 36 | #if !STDC_HEADERS 37 | extern void free (void*); 38 | #endif /* STDC_HEADERS */ 39 | 40 | extern void set_alloc_limit (size_t size); 41 | extern size_t get_alloc_limit(); 42 | extern void alloc_limit_assert (char *fn_name, size_t size); 43 | extern void* checked_xmalloc (size_t size); 44 | extern void* xmalloc (size_t size); 45 | extern void* checked_xcalloc (size_t num, size_t size); 46 | extern void* xcalloc (size_t num, size_t size); 47 | 48 | #define XMALLOC(_type,_num) \ 49 | ((_type*)xmalloc((_num)*sizeof(_type))) 50 | #define XCALLOC(_type,_num) \ 51 | ((_type*)xcalloc((_num), sizeof (_type))) 52 | #define CHECKED_XMALLOC(_type,_num) \ 53 | ((_type*)checked_xmalloc((_num)*sizeof(_type))) 54 | #define CHECKED_XCALLOC(_type,_num) \ 55 | ((_type*)checked_xcalloc((_num),sizeof(_type))) 56 | #define XFREE(_ptr) \ 57 | do { if (_ptr) { free (_ptr); _ptr = 0; } } while (0) 58 | 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | #endif /* ALLOC_H */ 63 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * common.h -- 'common' declarations etc. 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #ifndef COMMON_H 27 | #define COMMON_H 1 28 | 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #if STDC_HEADERS 39 | # include 40 | # include 41 | # include 42 | # if HAVE_STRING_H 43 | # include 44 | # endif 45 | # if HAVE_STRINGS_H 46 | # include 47 | # endif 48 | #else 49 | extern int strcmp (const char *, const char *); 50 | extern char *strcpy (char *, const char *); 51 | extern char *strcat (char *, const char *); 52 | // extern void abort (void); 53 | // extern void exit (int); 54 | extern void* memset (void* ptr, int c, size_t size); 55 | extern void* malloc (size_t size); 56 | extern void *memmove (void *, const void*, size_t); 57 | 58 | #ifdef HAVE_SYS_STAT_H 59 | # include 60 | #endif 61 | 62 | extern char* xstrdup (const char* str); 63 | 64 | /* ********** SIZES ********** */ 65 | 66 | /* 67 | * typedefs for the types specified in the grammar: 68 | * BYTE -- 1 byte -- char 69 | * WORD -- 2 bytes -- short/int 70 | * DWORD -- 4 bytes -- int/long 71 | */ 72 | typedef signed char int8; 73 | typedef unsigned char uint8; 74 | #if (SIZEOF_INT == 4) 75 | typedef short int16; 76 | typedef unsigned short uint16; 77 | typedef int int32; 78 | typedef unsigned int uint32; 79 | #else 80 | typedef int int16; 81 | typedef unsigned int uint16; 82 | typedef long int32; 83 | typedef unsigned long uint32; 84 | #endif /* SIZEOF_INT == 4 */ 85 | 86 | /* ********** SIZES ********** */ 87 | 88 | /* ********** REPLACED FUNCS ********** */ 89 | #if !HAVE_DECL_BASENAME 90 | extern char *basename (char *path); 91 | #endif 92 | 93 | /* ********** REPLACED FUNCS ********** */ 94 | #endif 95 | 96 | #endif /* COMMON_H */ 97 | 98 | #ifdef __cplusplus 99 | } 100 | #endif 101 | -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | /* src/config.h. Generated from config.h.in by configure. */ 2 | /* src/config.h.in. Generated from configure.in by autoheader. */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /* Define to 1 if you have the `basename' function. */ 9 | #define HAVE_BASENAME 1 10 | 11 | /* Define to 1 if you have the declaration of `basename', and to 0 if you 12 | don't. */ 13 | #define HAVE_DECL_BASENAME 0 14 | 15 | /* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ 16 | /* #undef HAVE_DOPRNT */ 17 | 18 | /* Define to 1 if you have the `getopt_long' function. */ 19 | #define HAVE_GETOPT_LONG 1 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #define HAVE_INTTYPES_H 1 23 | 24 | /* Define to 1 if you have the `intl' library (-lintl). */ 25 | /* #undef HAVE_LIBINTL */ 26 | 27 | /* Define to 1 if you have the header file. */ 28 | /* #undef HAVE_LIBINTL_H */ 29 | 30 | /* Define to 1 if your system has a GNU libc compatible `malloc' function, and 31 | to 0 otherwise. */ 32 | #define HAVE_MALLOC 1 33 | 34 | /* Define to 1 if you have the `memmove' function. */ 35 | #define HAVE_MEMMOVE 1 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #define HAVE_MEMORY_H 1 39 | 40 | /* Define to 1 if you have the `memset' function. */ 41 | #define HAVE_MEMSET 1 42 | 43 | /* Define to 1 if you have the header file. */ 44 | #define HAVE_STDINT_H 1 45 | 46 | /* Define to 1 if you have the header file. */ 47 | #define HAVE_STDLIB_H 1 48 | 49 | /* Define to 1 if you have the `strdup' function. */ 50 | #define HAVE_STRDUP 1 51 | 52 | /* Define to 1 if you have the header file. */ 53 | #define HAVE_STRINGS_H 1 54 | 55 | /* Define to 1 if you have the header file. */ 56 | #define HAVE_STRING_H 1 57 | 58 | /* Define to 1 if you have the `strrchr' function. */ 59 | #define HAVE_STRRCHR 1 60 | 61 | /* Define to 1 if you have the `strtoul' function. */ 62 | #define HAVE_STRTOUL 1 63 | 64 | /* Define to 1 if you have the header file. */ 65 | #define HAVE_SYS_STAT_H 1 66 | 67 | /* Define to 1 if you have the header file. */ 68 | #define HAVE_SYS_TYPES_H 1 69 | 70 | /* Define to 1 if you have the header file. */ 71 | #define HAVE_UNISTD_H 1 72 | 73 | /* Define to 1 if you have the `vprintf' function. */ 74 | #define HAVE_VPRINTF 1 75 | 76 | /* Name of package */ 77 | #define PACKAGE "tnef" 78 | 79 | /* Define to the address where bug reports for this package should be sent. */ 80 | #define PACKAGE_BUGREPORT "verdammelt@users.sourceforge.net" 81 | 82 | /* Define to the full name of this package. */ 83 | #define PACKAGE_NAME "tnef" 84 | 85 | /* Define to the full name and version of this package. */ 86 | #define PACKAGE_STRING "tnef 1.4.10" 87 | 88 | /* Define to the one symbol short name of this package. */ 89 | #define PACKAGE_TARNAME "tnef" 90 | 91 | /* Define to the home page for this package. */ 92 | #define PACKAGE_URL "" 93 | 94 | /* Define to the version of this package. */ 95 | #define PACKAGE_VERSION "1.4.10" 96 | 97 | /* The size of `int', as computed by sizeof. */ 98 | #define SIZEOF_INT 4 99 | 100 | /* Define to 1 if the `S_IS*' macros in do not work properly. */ 101 | /* #undef STAT_MACROS_BROKEN */ 102 | 103 | /* Define to 1 if you have the ANSI C header files. */ 104 | #define STDC_HEADERS 1 105 | 106 | /* Version number of package */ 107 | #define VERSION "1.4.10" 108 | 109 | /* Define to empty if `const' does not conform to ANSI C. */ 110 | /* #undef const */ 111 | 112 | /* Define to rpl_malloc if the replacement function should be used. */ 113 | /* #undef malloc */ 114 | 115 | /* Define to `unsigned int' if does not define. */ 116 | /* #undef size_t */ 117 | 118 | #ifdef __cplusplus 119 | } 120 | #endif -------------------------------------------------------------------------------- /src/date.c: -------------------------------------------------------------------------------- 1 | /* 2 | * date.c -- functions for dealing with dates. 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | 32 | #include "date.h" 33 | #include "util.h" 34 | 35 | /* Array of days of the week for translating a date */ 36 | const char* day_of_week[] = { "Sun", "Mon", "Tue", 37 | "Wed", "Thu", "Fri", "Sat" }; 38 | 39 | extern const char * 40 | dow_str(int dow) 41 | { 42 | assert (dow >= 0 && dow <= 6); 43 | return day_of_week[dow]; 44 | } 45 | 46 | const char * 47 | date_to_str (struct date *dt) 48 | { 49 | static char buf[32]; 50 | sprintf (buf, "%s %04d/%02d/%02d %02d:%02d:%02d", 51 | dow_str(dt->dow), 52 | dt->year, dt->month, dt->day, 53 | dt->hour, dt->min, dt->sec); 54 | return buf; 55 | } 56 | 57 | void 58 | date_read (struct date *dt, const unsigned char *buf) 59 | { 60 | size_t i = 0; 61 | unsigned char *tmp = (unsigned char *)buf; 62 | dt->year = GETINT16 (tmp + i); i += sizeof (uint16); 63 | dt->month = GETINT16 (tmp + i); i += sizeof (uint16); 64 | dt->day = GETINT16 (tmp + i); i += sizeof (uint16); 65 | dt->hour = GETINT16 (tmp + i); i += sizeof (uint16); 66 | dt->min = GETINT16 (tmp + i); i += sizeof (uint16); 67 | dt->sec = GETINT16 (tmp + i); i += sizeof (uint16); 68 | dt->dow = GETINT16 (tmp + i); i += sizeof (uint16); 69 | } 70 | 71 | 72 | #ifdef __cplusplus 73 | } 74 | #endif -------------------------------------------------------------------------------- /src/date.h: -------------------------------------------------------------------------------- 1 | /* 2 | * date.h -- functions for dealing with dates. 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef DATE_H 23 | #define DATE_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if HAVE_CONFIG_H 30 | # include "config.h" 31 | #endif /* HAVE_CONFIG_H */ 32 | 33 | #include "common.h" 34 | 35 | /* Store a date according to file specification */ 36 | struct date 37 | { 38 | int16 year, month, day; 39 | int16 hour, min, sec; 40 | int16 dow; 41 | }; 42 | 43 | extern const char *date_to_str (struct date* dt); 44 | extern void date_read (struct date *dt, const unsigned char *buf); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | #endif /* DATE_H */ 50 | 51 | -------------------------------------------------------------------------------- /src/debug.c: -------------------------------------------------------------------------------- 1 | /* 2 | * debug.c -- functions for debug output 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif /* HAVE_CONFIG_H */ 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include "common.h" 31 | 32 | #include "debug.h" 33 | #include "options.h" 34 | 35 | /* print message only when debug on */ 36 | void 37 | debug_print (const char *fmt, ...) 38 | { 39 | 40 | } 41 | 42 | 43 | #ifdef __cplusplus 44 | } 45 | #endif -------------------------------------------------------------------------------- /src/debug.h: -------------------------------------------------------------------------------- 1 | /* 2 | * debug.h -- functions for debug output 3 | * 4 | * Copyright (C)1999-2006 Mark Simpson 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, you can either send email to this 18 | * program's maintainer or write to: The Free Software Foundation, 19 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 20 | * 21 | */ 22 | #ifndef DEBUG_H 23 | #define DEBUG_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if HAVE_CONFIG_H 30 | # include "config.h" 31 | #endif /* HAVE_CONFIG_H */ 32 | 33 | extern void debug_print (const char *fmt, ...); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | #endif /* DEBUG_H */ 39 | 40 | -------------------------------------------------------------------------------- /src/options.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * options.c -- functions for dealing with program options 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifdef HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif /* HAVE_CONFIG_H */ 30 | 31 | #include "common.h" 32 | 33 | #include "options.h" 34 | 35 | /* Global variables, used by all (or nearly all) functions */ 36 | int g_flags = NONE; /* program options */ 37 | 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif -------------------------------------------------------------------------------- /src/options.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * options.h -- functions for dealing with program options 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifndef OPTIONS_H 28 | #define OPTIONS_H 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | #include "common.h" 35 | 36 | /* Global variables, used by all (or nearly all) functions */ 37 | extern int g_flags; /* program options */ 38 | 39 | /* macros for dealing with program flags */ 40 | #define DEBUG_ON ((g_flags)&DBG_OUT) 41 | #define VERBOSE_ON ((g_flags)&VERBOSE) 42 | #define LIST_ONLY ((g_flags)&LIST) 43 | #define SHOW_MIME ((g_flags)&LISTMIME) 44 | #define USE_PATHS ((g_flags)&PATHS) 45 | #define INTERACTIVE ((g_flags)&CONFIRM) 46 | #define OVERWRITE_FILES ((g_flags)&OVERWRITE) 47 | #define NUMBER_FILES ((g_flags)&NUMBERED) 48 | #define CHECKSUM_SKIP ((g_flags)&CHECKSUM_OK) 49 | #define ENCODE_SKIP ((g_flags)&ENCODE_OK) 50 | #define CRUFT_SKIP ((g_flags)&CRUFT_OK) 51 | #define UNIX_FS ((g_flags)&UNIX_PATHS) 52 | #define ABSOLUTE_OK ((g_flags)&ABSOLUTE_PATHS) 53 | 54 | /* flags to modify behaviour of file parsing */ 55 | enum { NONE = 0x00, 56 | VERBOSE = 0x01, 57 | DBG_OUT = 0x02, 58 | LIST = 0x04, 59 | PATHS = 0x08, 60 | OVERWRITE= 0x10, 61 | CONFIRM = 0x20, 62 | NUMBERED = 0x40, 63 | SAVEBODY = 0x80, 64 | LISTMIME = 0x100, 65 | CHECKSUM_OK = 0x200, 66 | ENCODE_OK = 0x400, 67 | CRUFT_OK = 0x800, 68 | UNIX_PATHS = 0x1000, 69 | ABSOLUTE_PATHS = 0x2000 70 | }; 71 | 72 | #endif /* OPTIONS_H */ 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif -------------------------------------------------------------------------------- /src/pole.h: -------------------------------------------------------------------------------- 1 | /* POLE - Portable C++ library to access OLE Storage 2 | Copyright (C) 2002-2005 Ariya Hidayat 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of the authors nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 | THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef POLE_H 30 | #define POLE_H 31 | 32 | #include 33 | #include 34 | 35 | namespace POLE 36 | { 37 | 38 | class StorageIO; 39 | class Stream; 40 | class StreamIO; 41 | 42 | class Storage 43 | { 44 | friend class Stream; 45 | friend class StreamOut; 46 | 47 | public: 48 | 49 | // for Storage::result() 50 | enum { Ok, OpenFailed, NotOLE, BadOLE, UnknownError }; 51 | 52 | /** 53 | * Constructs a storage with name filename. 54 | **/ 55 | Storage( const char* filename ); 56 | 57 | /** 58 | * Destroys the storage. 59 | **/ 60 | ~Storage(); 61 | 62 | /** 63 | * Opens the storage. Returns true if no error occurs. 64 | **/ 65 | bool open(); 66 | 67 | /** 68 | * Closes the storage. 69 | **/ 70 | void close(); 71 | 72 | /** 73 | * Returns the error code of last operation. 74 | **/ 75 | int result(); 76 | 77 | /** 78 | * Finds all stream and directories in given path. 79 | **/ 80 | std::list entries( const std::string& path = "/" ); 81 | 82 | /** 83 | * Returns true if specified entry name is a directory. 84 | */ 85 | bool isDirectory( const std::string& name ); 86 | 87 | /** 88 | * Finds and returns a stream with the specified name. 89 | * If reuse is true, this function returns the already created stream 90 | * (if any). Otherwise it will create the stream. 91 | * 92 | * When errors occur, this function returns NULL. 93 | * 94 | * You do not need to delete the created stream, it will be handled 95 | * automatically. 96 | **/ 97 | Stream* stream( const std::string& name, bool reuse = true ); 98 | //Stream* stream( const std::string& name, int mode = Stream::ReadOnly, bool reuse = true ); 99 | 100 | private: 101 | StorageIO* io; 102 | 103 | // no copy or assign 104 | Storage( const Storage& ); 105 | Storage& operator=( const Storage& ); 106 | 107 | }; 108 | 109 | class Stream 110 | { 111 | friend class Storage; 112 | friend class StorageIO; 113 | 114 | public: 115 | 116 | /** 117 | * Creates a new stream. 118 | */ 119 | // name must be absolute, e.g "/Workbook" 120 | Stream( Storage* storage, const std::string& name ); 121 | 122 | /** 123 | * Destroys the stream. 124 | */ 125 | ~Stream(); 126 | 127 | /** 128 | * Returns the full stream name. 129 | */ 130 | std::string fullName(); 131 | 132 | /** 133 | * Returns the stream size. 134 | **/ 135 | unsigned long size(); 136 | 137 | /** 138 | * Returns the current read/write position. 139 | **/ 140 | unsigned long tell(); 141 | 142 | /** 143 | * Sets the read/write position. 144 | **/ 145 | void seek( unsigned long pos ); 146 | 147 | /** 148 | * Reads a byte. 149 | **/ 150 | int getch(); 151 | 152 | /** 153 | * Reads a block of data. 154 | **/ 155 | unsigned long read( unsigned char* data, unsigned long maxlen ); 156 | 157 | /** 158 | * Returns true if the read/write position is past the file. 159 | **/ 160 | bool eof(); 161 | 162 | /** 163 | * Returns true whenever error occurs. 164 | **/ 165 | bool fail(); 166 | 167 | private: 168 | StreamIO* io; 169 | 170 | // no copy or assign 171 | Stream( const Stream& ); 172 | Stream& operator=( const Stream& ); 173 | }; 174 | 175 | } 176 | 177 | #endif // POLE_H 178 | -------------------------------------------------------------------------------- /src/r_pole.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "pole.h" 12 | #include "utf8.h" 13 | 14 | #include "alloc.h" 15 | #include "util.h" 16 | // #include "tnef.h" 17 | // #include "rtf.h" 18 | 19 | std::string MSG_UTF16LE = std::string("001F"); 20 | std::string MSG_BINARY = std::string("0102"); 21 | std::string MSG_TIME = std::string("0040"); 22 | std::string MSG_PROPERTIES = std::string("properties_version1.0"); 23 | 24 | unsigned int get_int_32(unsigned char *p){ 25 | return (unsigned int)((unsigned char)(p)[0] + ((unsigned char)(p)[1]<<8) + ((unsigned char)(p)[2]<<16) + ((unsigned char)(p)[3]<<24)); 26 | } 27 | 28 | //' Is a sequence of raw bytes an RTF document? 29 | //' 30 | //' @param v a raw vector 31 | //' @export 32 | // [[Rcpp::export]] 33 | bool is_rtf(RawVector v) { 34 | 35 | static const unsigned int rtf_uncompressed_magic = 0x414c454d; 36 | static const unsigned int rtf_compressed_magic = 0x75465a4c; 37 | 38 | size_t compr_size = 0L; 39 | size_t uncompr_size = 0L; 40 | unsigned int magic; 41 | size_t idx = 0; 42 | 43 | unsigned char *data = (unsigned char *)(v.begin()); 44 | 45 | compr_size = get_int_32(data + idx); idx += 4; 46 | uncompr_size = get_int_32(data + idx); idx += 4; 47 | magic = get_int_32(data + idx); idx += 4; 48 | 49 | return((magic == rtf_uncompressed_magic) || (magic == rtf_compressed_magic)); 50 | 51 | } 52 | 53 | static const char* rtf_prebuf = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\r\n\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"; 54 | 55 | RawVector decompress_rtf_data(unsigned char *src, size_t lenc, size_t lenu) { 56 | 57 | const size_t rtf_prebuf_len = strlen( rtf_prebuf ); 58 | 59 | int woff, eoff, roff, rlen; 60 | int control, cin, cout, i, j, endflag; 61 | RawVector v(lenu); 62 | unsigned char *dest = &v[0]; 63 | unsigned char dict[4096]; 64 | 65 | /* setup dictionary */ 66 | 67 | memset( dict, 0x0, sizeof(dict) ); 68 | memmove( dict, rtf_prebuf, rtf_prebuf_len ); 69 | 70 | woff = rtf_prebuf_len; 71 | eoff = rtf_prebuf_len; 72 | 73 | /* setup destination */ 74 | 75 | assert( lenu>0 ); /* sanity check */ 76 | 77 | // dest = CHECKED_XCALLOC(unsigned char, lenu); 78 | cout = 0; 79 | 80 | /* setup source */ 81 | 82 | assert( lenc>0 ); 83 | 84 | cin = 0; 85 | 86 | /* processing loop */ 87 | 88 | endflag = 0; 89 | 90 | while (1) { 91 | 92 | if ( endflag ) break; 93 | 94 | /* get control byte */ //Rcout<<"control byte"< lenc ) { 96 | endflag = -1; 97 | break; /* input overrun */ 98 | } 99 | 100 | control = (int)src[cin++]; 101 | 102 | /* handle control run */ 103 | for ( i=0; i<8; i++ ) { 104 | 105 | if ( endflag ) break; 106 | 107 | if ( control & (1< lenc ){ 111 | endflag = -1; 112 | break; /* input overrun */ 113 | } 114 | 115 | roff = (int)src[cin++]; 116 | rlen = (int)src[cin++]; 117 | 118 | roff = (roff<<4) + (rlen>>4); 119 | rlen = (rlen&0x0f) + 2; 120 | 121 | /* the one true exit test */ //Rcout<<"exit test"< lenu ) { 129 | endflag = -1; 130 | break; /* output overrun */ 131 | } 132 | 133 | for ( j=0; j lenc ) { 146 | endflag = -1; 147 | break; /* input overrun */ 148 | } 149 | 150 | if ( cout+1 > lenu ) { 151 | endflag = -1; 152 | break; /* output overrun */ 153 | } 154 | 155 | /* handle literal */ //Rcout<<"literal"< a.size()) return false ; 209 | return std::equal(a.begin() + a.size() - b.size(), a.end(), b.begin()) ; 210 | } 211 | 212 | List visit(POLE::Storage* storage, std::string path) { 213 | 214 | std::vector keys; 215 | List vals; 216 | 217 | std::list entries; 218 | entries = storage->entries(path); 219 | 220 | std::list::iterator it; 221 | 222 | for (it = entries.begin(); it != entries.end(); ++it) { 223 | 224 | std::string name = *it; 225 | std::string fullname = path + name; 226 | 227 | // std::cout << " Opening Stream " ; 228 | 229 | POLE::Stream* ss = new POLE::Stream(storage, fullname); 230 | 231 | if (!(storage->isDirectory(fullname))) { 232 | 233 | unsigned char *buf = (unsigned char *)malloc(ss->size()); 234 | 235 | unsigned read = ss->read(buf, ss->size()); 236 | 237 | if (read > 0) { 238 | 239 | // Rcpp::Rcout << "NAME: " << fullname << std::endl; 240 | 241 | if (ends_with(fullname, MSG_UTF16LE)) { // UTF-16LE string 242 | 243 | // all this to make the string content useful 244 | 245 | int wlen = (ss->size())/2; 246 | char16_t *dest = new char16_t[wlen + 1]; 247 | unsigned char *ptr = buf; 248 | 249 | for (int i=0; i utf8result; 259 | utf8::utf16to8(dest, dest + wlen, std::back_inserter(utf8result)); 260 | vals.push_back(std::string(utf8result.begin(), utf8result.end())); 261 | 262 | delete[] dest; 263 | 264 | } else if (ends_with(fullname, MSG_BINARY)) { // binary content 265 | 266 | keys.push_back(fullname); 267 | vals.push_back(RawVector(buf, buf+read)); 268 | 269 | } else if (ends_with(fullname, MSG_TIME)) { // time content 270 | 271 | keys.push_back(fullname); 272 | vals.push_back(RawVector(buf, buf+read)); 273 | 274 | } else if (ends_with(fullname, MSG_PROPERTIES)) { // properties content 275 | 276 | keys.push_back(fullname); 277 | vals.push_back(RawVector(buf, buf+read)); 278 | 279 | } else { 280 | 281 | //std::cout << fullname.c_str() << std::endl; 282 | 283 | } 284 | 285 | } 286 | 287 | if (buf) free(buf); 288 | 289 | } 290 | 291 | // std::cout << "- Deleting Stream" << std::endl; 292 | delete ss; 293 | 294 | if (storage->isDirectory(fullname)) { 295 | 296 | List res = visit(storage, fullname + "/"); 297 | 298 | std::vector rk = res["keys"]; 299 | 300 | if (rk.size() > 0) { 301 | 302 | keys.insert(keys.end(), rk.begin(), rk.end()); 303 | 304 | List::iterator lit; 305 | List rv = res["values"]; 306 | 307 | for (lit = rv.begin(); lit != rv.end(); ++lit) { 308 | vals.push_back(*lit); 309 | } 310 | 311 | } 312 | 313 | } 314 | 315 | } 316 | 317 | return( 318 | List::create( 319 | Named("keys") = keys, Named("values") = vals 320 | ) 321 | ); 322 | 323 | } 324 | 325 | 326 | // [[Rcpp::export]] 327 | List int_read_msg(std::string path) { 328 | 329 | // std::cout << "Opening File" << std::endl ; 330 | 331 | POLE::Storage* storage = new POLE::Storage(path.c_str()); 332 | 333 | storage->open(); 334 | 335 | if (storage->result() == POLE::Storage::Ok) { 336 | 337 | List l = visit(storage, "/"); 338 | 339 | if (storage) { 340 | // std::cout << "Closing File" << std::endl << std::endl; 341 | storage->close(); 342 | delete storage; 343 | } 344 | 345 | return(l); 346 | 347 | } else { 348 | 349 | if (storage) { 350 | // std::cout << "Closing File" << std::endl << std::endl; 351 | storage->close(); 352 | delete storage; 353 | } 354 | 355 | return(wrap(NULL)); 356 | 357 | } 358 | 359 | } 360 | -------------------------------------------------------------------------------- /src/utf8.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "utf8/checked.h" 32 | #include "utf8/unchecked.h" 33 | 34 | #endif // header guard 35 | -------------------------------------------------------------------------------- /src/utf8/core.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include 32 | 33 | namespace utf8 34 | { 35 | // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers 36 | // You may need to change them to match your system. 37 | // These typedefs have the same names as ones from cstdint, or boost/cstdint 38 | typedef unsigned char uint8_t; 39 | typedef unsigned short uint16_t; 40 | typedef unsigned int uint32_t; 41 | 42 | // Helper code - not intended to be directly called by the library users. May be changed at any time 43 | namespace internal 44 | { 45 | // Unicode constants 46 | // Leading (high) surrogates: 0xd800 - 0xdbff 47 | // Trailing (low) surrogates: 0xdc00 - 0xdfff 48 | const uint16_t LEAD_SURROGATE_MIN = 0xd800u; 49 | const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; 50 | const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; 51 | const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; 52 | const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); 53 | const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; 54 | 55 | // Maximum valid value for a Unicode code point 56 | const uint32_t CODE_POINT_MAX = 0x0010ffffu; 57 | 58 | template 59 | inline uint8_t mask8(octet_type oc) 60 | { 61 | return static_cast(0xff & oc); 62 | } 63 | template 64 | inline uint16_t mask16(u16_type oc) 65 | { 66 | return static_cast(0xffff & oc); 67 | } 68 | template 69 | inline bool is_trail(octet_type oc) 70 | { 71 | return ((utf8::internal::mask8(oc) >> 6) == 0x2); 72 | } 73 | 74 | template 75 | inline bool is_lead_surrogate(u16 cp) 76 | { 77 | return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); 78 | } 79 | 80 | template 81 | inline bool is_trail_surrogate(u16 cp) 82 | { 83 | return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); 84 | } 85 | 86 | template 87 | inline bool is_surrogate(u16 cp) 88 | { 89 | return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); 90 | } 91 | 92 | template 93 | inline bool is_code_point_valid(u32 cp) 94 | { 95 | return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); 96 | } 97 | 98 | template 99 | inline typename std::iterator_traits::difference_type 100 | sequence_length(octet_iterator lead_it) 101 | { 102 | uint8_t lead = utf8::internal::mask8(*lead_it); 103 | if (lead < 0x80) 104 | return 1; 105 | else if ((lead >> 5) == 0x6) 106 | return 2; 107 | else if ((lead >> 4) == 0xe) 108 | return 3; 109 | else if ((lead >> 3) == 0x1e) 110 | return 4; 111 | else 112 | return 0; 113 | } 114 | 115 | template 116 | inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) 117 | { 118 | if (cp < 0x80) { 119 | if (length != 1) 120 | return true; 121 | } 122 | else if (cp < 0x800) { 123 | if (length != 2) 124 | return true; 125 | } 126 | else if (cp < 0x10000) { 127 | if (length != 3) 128 | return true; 129 | } 130 | 131 | return false; 132 | } 133 | 134 | enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; 135 | 136 | /// Helper for get_sequence_x 137 | template 138 | utf_error increase_safely(octet_iterator& it, octet_iterator end) 139 | { 140 | if (++it == end) 141 | return NOT_ENOUGH_ROOM; 142 | 143 | if (!utf8::internal::is_trail(*it)) 144 | return INCOMPLETE_SEQUENCE; 145 | 146 | return UTF8_OK; 147 | } 148 | 149 | #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} 150 | 151 | /// get_sequence_x functions decode utf-8 sequences of the length x 152 | template 153 | utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) 154 | { 155 | if (it == end) 156 | return NOT_ENOUGH_ROOM; 157 | 158 | code_point = utf8::internal::mask8(*it); 159 | 160 | return UTF8_OK; 161 | } 162 | 163 | template 164 | utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) 165 | { 166 | if (it == end) 167 | return NOT_ENOUGH_ROOM; 168 | 169 | code_point = utf8::internal::mask8(*it); 170 | 171 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 172 | 173 | code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); 174 | 175 | return UTF8_OK; 176 | } 177 | 178 | template 179 | utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) 180 | { 181 | if (it == end) 182 | return NOT_ENOUGH_ROOM; 183 | 184 | code_point = utf8::internal::mask8(*it); 185 | 186 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 187 | 188 | code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); 189 | 190 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 191 | 192 | code_point += (*it) & 0x3f; 193 | 194 | return UTF8_OK; 195 | } 196 | 197 | template 198 | utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) 199 | { 200 | if (it == end) 201 | return NOT_ENOUGH_ROOM; 202 | 203 | code_point = utf8::internal::mask8(*it); 204 | 205 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 206 | 207 | code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); 208 | 209 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 210 | 211 | code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; 212 | 213 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 214 | 215 | code_point += (*it) & 0x3f; 216 | 217 | return UTF8_OK; 218 | } 219 | 220 | #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR 221 | 222 | template 223 | utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) 224 | { 225 | // Save the original value of it so we can go back in case of failure 226 | // Of course, it does not make much sense with i.e. stream iterators 227 | octet_iterator original_it = it; 228 | 229 | uint32_t cp = 0; 230 | // Determine the sequence length based on the lead octet 231 | typedef typename std::iterator_traits::difference_type octet_difference_type; 232 | const octet_difference_type length = utf8::internal::sequence_length(it); 233 | 234 | // Get trail octets and calculate the code point 235 | utf_error err = UTF8_OK; 236 | switch (length) { 237 | case 0: 238 | return INVALID_LEAD; 239 | case 1: 240 | err = utf8::internal::get_sequence_1(it, end, cp); 241 | break; 242 | case 2: 243 | err = utf8::internal::get_sequence_2(it, end, cp); 244 | break; 245 | case 3: 246 | err = utf8::internal::get_sequence_3(it, end, cp); 247 | break; 248 | case 4: 249 | err = utf8::internal::get_sequence_4(it, end, cp); 250 | break; 251 | } 252 | 253 | if (err == UTF8_OK) { 254 | // Decoding succeeded. Now, security checks... 255 | if (utf8::internal::is_code_point_valid(cp)) { 256 | if (!utf8::internal::is_overlong_sequence(cp, length)){ 257 | // Passed! Return here. 258 | code_point = cp; 259 | ++it; 260 | return UTF8_OK; 261 | } 262 | else 263 | err = OVERLONG_SEQUENCE; 264 | } 265 | else 266 | err = INVALID_CODE_POINT; 267 | } 268 | 269 | // Failure branch - restore the original value of the iterator 270 | it = original_it; 271 | return err; 272 | } 273 | 274 | template 275 | inline utf_error validate_next(octet_iterator& it, octet_iterator end) { 276 | uint32_t ignored; 277 | return utf8::internal::validate_next(it, end, ignored); 278 | } 279 | 280 | } // namespace internal 281 | 282 | /// The library API - functions intended to be called by the users 283 | 284 | // Byte order mark 285 | const uint8_t bom[] = {0xef, 0xbb, 0xbf}; 286 | 287 | template 288 | octet_iterator find_invalid(octet_iterator start, octet_iterator end) 289 | { 290 | octet_iterator result = start; 291 | while (result != end) { 292 | utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); 293 | if (err_code != internal::UTF8_OK) 294 | return result; 295 | } 296 | return result; 297 | } 298 | 299 | template 300 | inline bool is_valid(octet_iterator start, octet_iterator end) 301 | { 302 | return (utf8::find_invalid(start, end) == end); 303 | } 304 | 305 | template 306 | inline bool starts_with_bom (octet_iterator it, octet_iterator end) 307 | { 308 | return ( 309 | ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && 310 | ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && 311 | ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) 312 | ); 313 | } 314 | 315 | //Deprecated in release 2.3 316 | template 317 | inline bool is_bom (octet_iterator it) 318 | { 319 | return ( 320 | (utf8::internal::mask8(*it++)) == bom[0] && 321 | (utf8::internal::mask8(*it++)) == bom[1] && 322 | (utf8::internal::mask8(*it)) == bom[2] 323 | ); 324 | } 325 | } // namespace utf8 326 | 327 | #endif // header guard 328 | 329 | 330 | -------------------------------------------------------------------------------- /src/utf8/unchecked.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "core.h" 32 | 33 | namespace utf8 34 | { 35 | namespace unchecked 36 | { 37 | template 38 | octet_iterator append(uint32_t cp, octet_iterator result) 39 | { 40 | if (cp < 0x80) // one octet 41 | *(result++) = static_cast(cp); 42 | else if (cp < 0x800) { // two octets 43 | *(result++) = static_cast((cp >> 6) | 0xc0); 44 | *(result++) = static_cast((cp & 0x3f) | 0x80); 45 | } 46 | else if (cp < 0x10000) { // three octets 47 | *(result++) = static_cast((cp >> 12) | 0xe0); 48 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 49 | *(result++) = static_cast((cp & 0x3f) | 0x80); 50 | } 51 | else { // four octets 52 | *(result++) = static_cast((cp >> 18) | 0xf0); 53 | *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); 54 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 55 | *(result++) = static_cast((cp & 0x3f) | 0x80); 56 | } 57 | return result; 58 | } 59 | 60 | template 61 | uint32_t next(octet_iterator& it) 62 | { 63 | uint32_t cp = utf8::internal::mask8(*it); 64 | typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); 65 | switch (length) { 66 | case 1: 67 | break; 68 | case 2: 69 | it++; 70 | cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); 71 | break; 72 | case 3: 73 | ++it; 74 | cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); 75 | ++it; 76 | cp += (*it) & 0x3f; 77 | break; 78 | case 4: 79 | ++it; 80 | cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); 81 | ++it; 82 | cp += (utf8::internal::mask8(*it) << 6) & 0xfff; 83 | ++it; 84 | cp += (*it) & 0x3f; 85 | break; 86 | } 87 | ++it; 88 | return cp; 89 | } 90 | 91 | template 92 | uint32_t peek_next(octet_iterator it) 93 | { 94 | return utf8::unchecked::next(it); 95 | } 96 | 97 | template 98 | uint32_t prior(octet_iterator& it) 99 | { 100 | while (utf8::internal::is_trail(*(--it))) ; 101 | octet_iterator temp = it; 102 | return utf8::unchecked::next(temp); 103 | } 104 | 105 | // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) 106 | template 107 | inline uint32_t previous(octet_iterator& it) 108 | { 109 | return utf8::unchecked::prior(it); 110 | } 111 | 112 | template 113 | void advance (octet_iterator& it, distance_type n) 114 | { 115 | for (distance_type i = 0; i < n; ++i) 116 | utf8::unchecked::next(it); 117 | } 118 | 119 | template 120 | typename std::iterator_traits::difference_type 121 | distance (octet_iterator first, octet_iterator last) 122 | { 123 | typename std::iterator_traits::difference_type dist; 124 | for (dist = 0; first < last; ++dist) 125 | utf8::unchecked::next(first); 126 | return dist; 127 | } 128 | 129 | template 130 | octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) 131 | { 132 | while (start != end) { 133 | uint32_t cp = utf8::internal::mask16(*start++); 134 | // Take care of surrogate pairs first 135 | if (utf8::internal::is_lead_surrogate(cp)) { 136 | uint32_t trail_surrogate = utf8::internal::mask16(*start++); 137 | cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; 138 | } 139 | result = utf8::unchecked::append(cp, result); 140 | } 141 | return result; 142 | } 143 | 144 | template 145 | u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) 146 | { 147 | while (start < end) { 148 | uint32_t cp = utf8::unchecked::next(start); 149 | if (cp > 0xffff) { //make a surrogate pair 150 | *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); 151 | *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); 152 | } 153 | else 154 | *result++ = static_cast(cp); 155 | } 156 | return result; 157 | } 158 | 159 | template 160 | octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) 161 | { 162 | while (start != end) 163 | result = utf8::unchecked::append(*(start++), result); 164 | 165 | return result; 166 | } 167 | 168 | template 169 | u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) 170 | { 171 | while (start < end) 172 | (*result++) = utf8::unchecked::next(start); 173 | 174 | return result; 175 | } 176 | 177 | // The iterator class 178 | template 179 | class iterator : public std::iterator { 180 | octet_iterator it; 181 | public: 182 | iterator () {} 183 | explicit iterator (const octet_iterator& octet_it): it(octet_it) {} 184 | // the default "big three" are OK 185 | octet_iterator base () const { return it; } 186 | uint32_t operator * () const 187 | { 188 | octet_iterator temp = it; 189 | return utf8::unchecked::next(temp); 190 | } 191 | bool operator == (const iterator& rhs) const 192 | { 193 | return (it == rhs.it); 194 | } 195 | bool operator != (const iterator& rhs) const 196 | { 197 | return !(operator == (rhs)); 198 | } 199 | iterator& operator ++ () 200 | { 201 | ::std::advance(it, utf8::internal::sequence_length(it)); 202 | return *this; 203 | } 204 | iterator operator ++ (int) 205 | { 206 | iterator temp = *this; 207 | ::std::advance(it, utf8::internal::sequence_length(it)); 208 | return temp; 209 | } 210 | iterator& operator -- () 211 | { 212 | utf8::unchecked::prior(it); 213 | return *this; 214 | } 215 | iterator operator -- (int) 216 | { 217 | iterator temp = *this; 218 | utf8::unchecked::prior(it); 219 | return temp; 220 | } 221 | }; // class iterator 222 | 223 | } // namespace utf8::unchecked 224 | } // namespace utf8 225 | 226 | 227 | #endif // header guard 228 | 229 | -------------------------------------------------------------------------------- /src/util.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * util.c -- Utility functions 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifdef HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif /* HAVE_CONFIG_H */ 30 | 31 | #include "common.h" 32 | 33 | #include "util.h" 34 | #include "options.h" 35 | 36 | /* Needed to transform char buffers into little endian numbers */ 37 | uint32_t GETINT32(unsigned char *p) 38 | { 39 | return (uint32)((uint8)(p)[0] \ 40 | +((uint8)(p)[1]<<8) \ 41 | +((uint8)(p)[2]<<16) \ 42 | +((uint8)(p)[3]<<24)); 43 | } 44 | 45 | uint16_t GETINT16 (unsigned char* p) 46 | { 47 | return (uint16)((uint8)(p)[0]+((uint8)(p)[1]<<8)); 48 | } 49 | 50 | uint8 GETINT8 (unsigned char *p) 51 | { 52 | return (uint8)(p)[0]; 53 | } 54 | 55 | unsigned char* 56 | getbuf (FILE *fp, unsigned char buf[], size_t n) 57 | { 58 | if (fread (buf, 1, n, fp) != n) 59 | { 60 | perror ("Unexpected end of input"); 61 | //exit (1); 62 | } 63 | return buf; 64 | } 65 | 66 | uint32_t 67 | geti32 (FILE *fp) 68 | { 69 | unsigned char buf[4]; 70 | return (uint32)GETINT32(getbuf(fp, buf, 4)); 71 | } 72 | uint16_t 73 | geti16 (FILE *fp) 74 | { 75 | unsigned char buf[2]; 76 | return (uint16)GETINT16(getbuf(fp, buf, 2)); 77 | } 78 | 79 | uint8 80 | geti8(FILE *fp) 81 | { 82 | unsigned char buf[1]; 83 | return (uint8)GETINT8(getbuf(fp, buf, 1)); 84 | } 85 | 86 | unsigned char* 87 | unicode_to_utf8 (size_t len, unsigned char* buf) 88 | { 89 | int i = 0; 90 | int j = 0; 91 | unsigned char *utf8 = malloc (3 * len/2 + 1); /* won't get any longer than this */ 92 | 93 | for (i = 0; i < len - 1; i += 2) 94 | { 95 | uint32 c = GETINT16(buf + i); 96 | if (c <= 0x007f) 97 | { 98 | utf8[j++] = 0x00 | ((c & 0x007f) >> 0); 99 | } 100 | else if (c < 0x07ff) 101 | { 102 | utf8[j++] = 0xc0 | ((c & 0x07c0) >> 6); 103 | utf8[j++] = 0x80 | ((c & 0x003f) >> 0); 104 | } 105 | else 106 | { 107 | utf8[j++] = 0xe0 | ((c & 0xf000) >> 12); 108 | utf8[j++] = 0x80 | ((c & 0x0fc0) >> 6); 109 | utf8[j++] = 0x80 | ((c & 0x003f) >> 0); 110 | } 111 | } 112 | 113 | utf8[j] = '\0'; 114 | 115 | return utf8; 116 | } 117 | 118 | 119 | #ifdef __cplusplus 120 | } 121 | #endif -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * util.h -- Utility functions 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifndef UTIL_H 28 | #define UTIL_H 29 | 30 | #if HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif /* HAVE_CONFIG_H */ 33 | 34 | #ifdef WIN64 35 | #define IS_WINDOWS 36 | #elif defined WIN32 37 | #define IS_WINDOWS 38 | #endif 39 | 40 | #ifdef __APPLE__ 41 | typedef signed char int8_t; 42 | typedef unsigned char uint8_t; 43 | typedef signed short int int16_t; 44 | typedef unsigned short int uint16_t; 45 | typedef signed int int32_t; 46 | typedef unsigned int uint32_t; 47 | typedef unsigned long long uint64_t; 48 | typedef long long int64_t; 49 | #endif 50 | 51 | #ifdef IS_WINDOWS 52 | typedef signed char int8_t; 53 | typedef unsigned char uint8_t; 54 | typedef signed short int int16_t; 55 | typedef unsigned short int uint16_t; 56 | typedef signed int int32_t; 57 | typedef unsigned int uint32_t; 58 | typedef unsigned long long uint64_t; 59 | typedef long long int64_t; 60 | #endif 61 | 62 | #include "common.h" 63 | 64 | extern uint32_t GETINT32(unsigned char*p); 65 | extern uint16_t GETINT16(unsigned char*p); 66 | extern uint8_t GETINT8(unsigned char*p); 67 | 68 | extern unsigned char* getbuf (FILE *fp, unsigned char buf[], size_t n); 69 | extern uint32_t geti32(FILE *fp); 70 | extern uint16_t geti16(FILE *fp); 71 | extern uint8_t geti8(FILE *fp); 72 | 73 | extern unsigned char* unicode_to_utf8 (size_t len, unsigned char*buf); 74 | 75 | #endif /* UTIL_H */ 76 | 77 | #ifdef __cplusplus 78 | } 79 | #endif -------------------------------------------------------------------------------- /src/xstrdup.c: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* 7 | * xstrdup.c -- version of strdup for that handles NULL and checks size limit 8 | * 9 | * Copyright (C)1999-2006 Mark Simpson 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU General Public License as published by 13 | * the Free Software Foundation; either version 2, or (at your option) 14 | * any later version. 15 | * 16 | * This program is distributed in the hope that it will be useful, 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | * GNU General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU General Public License 22 | * along with this program; if not, you can either send email to this 23 | * program's maintainer or write to: The Free Software Foundation, 24 | * Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. 25 | * 26 | */ 27 | #ifdef HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif /* HAVE_CONFIG_H */ 30 | 31 | #include "common.h" 32 | 33 | #include "alloc.h" 34 | #include 35 | 36 | char * 37 | xstrdup (const char *str) 38 | { 39 | char *ret = NULL; 40 | if (str) 41 | { 42 | alloc_limit_assert ((char *)"xstrdup", strlen(str)); 43 | ret = strdup (str); 44 | } 45 | return ret; 46 | } 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /tests/test-all.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | test_check("msgxtractr") 3 | -------------------------------------------------------------------------------- /tests/testthat/test-msgxtractr.R: -------------------------------------------------------------------------------- 1 | context("basic functionality") 2 | test_that("we can do something", { 3 | 4 | `%||%` <- function (x, y) { if (is.null(x)) y else x } 5 | 6 | x <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 7 | 8 | expect_equal(x$subject, "Test for TIF files") 9 | expect_equal(x$sender$sender_name, "Brian Zhou") 10 | expect_equal(x$display_envelope$display_to, "brianzhou@me.com") 11 | 12 | # CRAN-able test for file handle disposition 13 | 14 | do.call( 15 | rbind, 16 | lapply(1:100, function(.x) { 17 | 18 | if (.x %% 10 == 0) cat(".") 19 | 20 | msg <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")) 21 | 22 | data.frame( 23 | date = msg$header$Date %||% NA, 24 | sender_name = msg$sender$sender_name %||% NA, 25 | display_to = msg$display_envelope$display_to %||% NA, 26 | subject = msg$subject %||% NA, 27 | body = msg$body$text %||% NA, 28 | stringsAsFactors = FALSE 29 | ) 30 | 31 | }) -> x 32 | 33 | ) -> mail_df 34 | 35 | expect_equal(nrow(mail_df), 100) 36 | 37 | do.call( 38 | rbind.data.frame, 39 | lapply(1:10, function(.x) { 40 | tidy_msg(read_msg(system.file("extdata/unicode.msg", package="msgxtractr"))) 41 | }) 42 | ) -> res 43 | 44 | expect_equal(nrow(res), 10) 45 | 46 | }) 47 | --------------------------------------------------------------------------------