├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── 1.ogg ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── audio_cognizers.R ├── cognizer.R ├── image_cognizers.R └── text_cognizers.R ├── README.Rmd ├── README.md ├── appveyor.yml ├── codecov.yml ├── inst └── extdata │ ├── audio │ └── HelloWorld-16bit.wav │ ├── images_faces │ ├── Einstein_laughing.jpg │ └── wkd_birthofinternet_1220-10.jpg │ ├── images_text │ └── Did_that_billboard_just_change.jpg │ └── text │ └── text_examples.txt ├── man ├── audio_text.Rd ├── cognizer.Rd ├── image_classify.Rd ├── text_audio.Rd ├── text_personality.Rd ├── text_sentiment.Rd ├── text_tone.Rd └── text_translate.Rd └── tests ├── testthat.R └── testthat ├── test_audio.R ├── test_image.R └── test_text.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^notes\.*.*$ 4 | ^README\..*$ 5 | ^.*\.ogg$ 6 | ^\.travis\.yml$ 7 | .Renviron 8 | codecov.yml 9 | ^appveyor\.yml$ 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Renviron 5 | cognizer.Rproj 6 | notes* 7 | .DS_Store 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | matrix: 4 | include: 5 | - os: linux 6 | dist: precise 7 | sudo: false 8 | - os: linux 9 | dist: trusty 10 | env: 11 | - R_CODECOV=true 12 | - os: osx 13 | osx_image: xcode8 14 | before_install: 15 | - brew install curl 16 | - brew link --force curl 17 | latex: false 18 | - os: osx 19 | osx_image: xcode7.3 20 | latex: false 21 | addons: 22 | apt: 23 | packages: 24 | - libcurl4-openssl-dev 25 | env: 26 | global: 27 | - secure: vBiF8R+JSSJLqR3eqT+nCykn5gALoBZ/LVC3NFd60HZdwC3Dclx5fEdxPZfLe/G0ABUgQq5HNdOR2kQfqwyitnHzALdhakGv53t1+pPVKFAm4lc9HpWjOLFSHOTtMnZwwHx78q8tAr9NLn9xve93sdUuXVwzWOXZlMqsr201sXDBA2g5EBxP94wmiR5K+tqBKl+B+Mgl35lhtWLCTctCc4Tp+bncSSefIrD5LqNjZU3bZkGj62GXn+jHpEOB5OMSM8PXrVCwtGkRpP07Ls9kQ5z0G1AQY6sJR/ShgAdXi7QDjs0sQHAidgTdnlQgvKQAvJYeTY34uHN5QzjoX50PEzfU9JEDLL8u4KmDzFSSMLIdG/SjXjDKeutWc+odtbBrwe1/eJqHP/g9H4u8hdxF/n1KzKn4UO1JuCWXKl+bJjaaGDTyY+X4F51No6DuDJW+euP3JSqNyqwjrSWYkj/qGF05pTlTHQoiIA599az+VrTKC10IQuYXMynh5+x2Ht6Y5oA3pRln7B2XwrPIl78rE/643tj6OvyNFX2dkWQvDVwFUXNm3GTNrW2ZcA//j052GTqYJqYQpLz8RrNaTmSdGSGB/xZmEvPo9uFLKRaQ1QETmLahK6rL/+x+Vg3sa0x9mm2JS57xrEQq7cSarlUAeFod2nA3khEadXz7zNpffvU= 28 | - secure: oEJoBeQfdvVRCmqtg9j+WdypUyHlCbjh6J7cPfYUVgf+jrNZHvt4gVZcSKE1632G+PVJ5iveDikRNhFGFDGZ57q8XKaq9vhpPWQjeLMNIMkozf3in1yMbXT2dVAfXzlsaMWEiOKsGh9wiTy2vCBiSsfrVumT63v3UnNTS4O60oD5C+13obbrG1fa/hRZXQ6QWN5iCsRMxSmmakOJfHNIW5dLb0Y7Pe8NC9l53vuex+eB+4OBgFsiGscK+UkECFCHduXmWePItos7wDSYQD4MHkoRQ2CZp7r7leTeEiOLYW3FcHaTtDm+KoDHrLz29HjFzQ2BPqcyTfV6S+g3fwEMkJ/5+bWUrrSOnX3hkA73K/IEE+fve3G/p4Biko+PevdZjBgM+rUJfpzM9bY0YCoHPjwgD6jhjmHrb4kmXqyIX6uO1BtL72MFGIWDziDQis+eRulGKwjKes4w7aymSPbA9Zy5AKvCwalvchE4XGomOR5hsSF+y3mwbLH28E48u9SyoyeZcJRu7DCPLWosTdpZh2M5j3DnhEbIanMBhK3hf9Ke+rRZrwl8zwdC0wwfWTpMtLw+GXI54TqQOa02UVQ5uxSQnK0uiZjnm4w15pTyzb6p4u4x44W9MN5Pb6PIrS8oLBdW4EoLgG4LCMJH27pAJ3elNkDLwSZ6nSROuM9cTq0= 29 | - secure: Ik2UQhLdBFwWstYdf+bk7CbM5crcM1ndcVl68Ebb2IxlxZMP/U3fHqWH4q76AQaA2gWxNnLgmrNy3DeHxfUW0S3vZUFg0AlIM6yK5T3PiTEtjxF3Ij2nXadpMFhSI+YaNZspEzp4xyofYZFx0fMDot50Sg6pqFowwDhP9NBHcPzo0yshjzhGthJuXIVktagk9emRfnsxMCBqHZ+ptvZAlD4k7tD24jg31pl5612HAIKofH9RQmhoTR2YDwbovhsIRy39tgXdPHWkuoG+8SRZ4QvMXbZYf8ktgC1xec1HAA36qTA36zi/N7gAX0D8SzVEfUrUYFsVCd/+bvF08Dx2VgUGtU5oH5VEjm3R/rrjkWdwHHLkcQlqsntOUyywHW6pGDUrMNdQLZMgF7mWxMq+908SZArbHOWYZOAcRfVxgya6xLUPY5GOnRL0lhlnAWqATkVQhqK+dcB6neVZyuVJJF8tMxhwYDOkeMfwvnp2ny9gpzfOL/fRXVuY6ovDQOJwlwM14e5wj9iP5mlZFl0/pXsroKQqjp+GsSJVV8Lxj6SfXQxUmh7qmcNXRIs1pfJVGCvPlEB21Su6AAXfLXoggALrxR0njTMVY72FGaQRQDrBz3FEjiAJS0H5aCAx66mMJ5Ksm+/5B8q2V543pB0/5+eU2obm49i3Z+x8xNdWUgk= 30 | - secure: hUQ/WoV6itx21ZrFdtrL7MGq1cm5VQHrkvUPdAwkCBZWAI13LvmqUl3TDKvIuPGG6X+nPcIuHiZIWceFnC5r1jPcdwd63LkjA+eBF+HoQ2JpiES9+AUgGyrcSgFN7445GpTH9oQ2zU6/DtsttZF0ejFiY48xq3gbzep0AK40Rt/aCGWU/pepf7muX5slrT9DUEcSYVVmIDlEqoLQuuqVLkDXWRunxBMKqMj5DKpS3T2KMR991CdwoVF1SQBoCibqugPKka5AGyMYZMFziHxzxUvOTCgxzkLmGhHoin4I3DxvQ66gCdgBYMQdbxTchm2bMOuTU1iIQf42tbqwo+Afz0Gi2NpjKg5Co0G7p2td7yXw1YLzBLT1/CJjfn0XYVp3YXCcLmexzlCPeO1/WP2qrRcMZhhbZzKJHT3JEKrHy4DMzvKEtybKfPfmmRo6dTCpjPHHHV92/SQGr4PqwDOj98fdueeRT6emHSUjsGyNz2DI8i4JV4EBv0MTEw6xhwbJbTndYmL+TvkINZqkbp8WKYoXKJVj9sZ7DxG2NSV4zGmjZRYYaK4sIuxqXz18s0aEQjtmtita0om7UBenHSzVRNT7V1JR57IH8dR8ViAamCasxfYSz2HUXFqO3rg0r9tCh3w9yhblREqqFke/13a8cBY6JlRfs1bQx4Avgv5WcXQ= 31 | - secure: QmNUrM7VkdoWyn/0T92ARcG5IHU0aqHTLfBg5AkCYxawXSB08aqnE3VWk8I+OGfeh6rn1NNtTh8hu1oH6nxO337oKhEDPpZRQ+1aaDh0kbCzlWEZ3YFOdpViGA5AQkq3rhKkleZFderWwEyWKKDBiqkQTbUVbromYdmx5xc/Mem6/0ID1drySSjLDWYk99AO+4cZjLv1A/4HvCTVSJHyN3aUrAmGdsZjyGKIURLx2tg4AQG32F07gbkJbZkcJV3wuFgSTAl2ygk3LmcFGB0ca5k0VxbOqRS9TWUVF0ji354auwZQWPa1uPfbCNcrRhyFuD8CmJczTpaPKnHeuMZwlo7kzyvs2/uj5PBL86ZBM7rUns0BQTWy6vf0s8IDntdv5SMpUyVKf5tAG6CHRLoDm1nZD8S+2/bEcrDTXr6+BQxc8V5oo5Xk7+pMol2CRd2rVeOCePd064AWw7suoIptODeB+KcA26SUQR9TUkLXlWIwJF/YvM5UIBNTHlhFLwcO7BWg5l8CF0PaZn0meTniEKsZD3FMmSoJUQyHSxSZP6h1DJyEpcg1SqLmAnF8ZtDkJwX0VODJDI31n61xvZLUWfVVfg7ajt7SfRuhlv97FlFEim3u2LSJ2krCHMGU547GvYWbzmazHFDmb2I99qIyzXwGa9Sggom68bGavEj+OTk= 32 | - secure: DVChKxq0Wk95v8sN5U2FrtunPwF1f5x1Iuy6RNPQ5K7w92PvDW7nJB/Fg1zS7EuvCoOCPoCQAXAjiFf4jyvdv7FkpXabHhjfSqUedju6uqfY/8VVjw7mWzDgMONxAom63n7+CztyrexuLQCOspELloax/vmy7iJph1DGya6nU9BrHSEwd9zqbIUi/Bzfvt3Q26AAO67/Op4CCSkxEi+4OzRILWgaLPBcByimyK5Wy5ETMMGxO9ybROJKH2Uw4cBSlntxurrJxZwIIOseP5cST5oEo/xEX6iNRaG1C1Tt2Dxxhe/WN/tOh+98yi0KehgljgNUc/xNcnyXZS6Y6CXKftx7aC/qO6pFhxe2M6RmEaF7iq6qLZjXXRJejZwDDjFo26MqM6LkA9mCd8KTphsP+h9zzlyU+bXn0Qpk77PpOg3h4QvgQ6Zlgxmhvguw1GcqIdfWRsDBXqixkZPGjckG4wHcDeoJNEPXxhFJyng7gl6ZQBPpV8bzxrHPT3AgfNUIQQIK4uQo6BCpjHacj25+9cwSvfMK1wyGHTlV7YZxLy3SLuvVKo0kdiIvrRiRz4uo+40rCWaDFtThB+ehSyOg4YcfuhhrNAtlisB1MkyCN+qU6NET5ceDOB1NfI3KTbwb1l7p2qfuUpnDpz+zovmHF+77ynoqI0JrEM9+bxP8WrY= 33 | - secure: P6XSphhEfUPkWDLPP6v37N2bQ6rYjFNl5vKtYBqmuyBJ6lHRHn1qti2fpDCsT5/IyJEgWMU+4ZNY0dyQxIjXdOYc3LTx8HcS3mdZXdg+zkA1x34U/37DPFC0YCDWeEAsIXwhfEt3Jl1j9xJiFLwPp4jtssVZOBzdk12PcRG+d6/pBZ6HKq1tAmsPVDq1g4nf3a5LjEHFT6B1X/7pbi0WK4yK2cF9d29+0Zt2Z+ueDYUF+2BpSygYXJFcfy/stzJECXUTawocIEdN+wCCENfKyq5bxnhEUfjASsVZXsQQ9BUuRTJxpeZuOGoqGpf5LM74JzWICnV2QMX0nY548jJzd5rCQqNc2SL/aNsjpXh7Cc3Hnz5ncAV2GE2rCjR88/VNGr6ntApKBgp2MHMbRU+OJV41PLotL4WM8iv8x/Vol0wyrEwUoe6bLTRiqi8a25VomwTE5NGt4xDOFXUq6h+0jt+9TfBMlFOw0pb8yBegobPRBevEFbmWRPTrgwdm7b4fkitlThtDI5p8f0O2M27lvHSByAmv2KvrZjRf0Vg4rrtJmW9Ezxwv42Z2g/WjGUHT4ZRSDP1p5bISSUTotQ7ZAGSEVZDQm9KQgDTmedBiwQTFl22fkk6L3M9DHPvb0Jv//5hPGO3BK3opZjSpg8yKLZzTr0XU/GzRbV2ZJ74QhRw= 34 | r_github_packages: 35 | - jimhester/covr 36 | after_success: 37 | - if [[ "${R_CODECOV}" ]]; then R -e 'covr::codecov()'; fi 38 | warnings_are_errors: true 39 | notifications: 40 | email: 41 | on_success: change 42 | on_failure: change 43 | -------------------------------------------------------------------------------- /1.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COVAIL/cognizer/e99244ddfdc89b8d416dc1d09a2fb290cc2c640d/1.ogg -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cognizer 2 | Title: Access to Cognitive APIs 3 | Version: 0.0.2 4 | Authors@R: c( 5 | person("Slava", "Nikitin", email = "dl-cognizer@cbuscollaboratory.com", 6 | role = c("aut", "cre")), 7 | person("Peter", "Gordon", email = "dl-cognizer@cbuscollaboratory.com", 8 | role = c("aut")), 9 | person("Columbus Collaboratory, LLC", email = "info@cbuscollaboratory.com", 10 | role = c("cph"))) 11 | Description: Automates access to IBM Watson APIs to make it easy to request 12 | analyses of text, image and audio data. 13 | URL: http://columbuscollaboratory.com 14 | SystemRequirements: 15 | libcurl: libcurl-devel (rpm) or libcurl4-openssl-dev (deb) 16 | Depends: R (>= 3.3.1) 17 | Imports: 18 | curl (>= 2.3), 19 | jsonlite (>= 1.2), 20 | magrittr (>= 1.5) 21 | Suggests: 22 | testthat, 23 | rmsfact 24 | License: MIT + file LICENSE 25 | LazyData: true 26 | RoxygenNote: 5.0.1 27 | 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Columbus Collaboratory, LLC 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(audio_text) 4 | export(image_classify) 5 | export(image_detectface) 6 | export(image_detecttext) 7 | export(text_audio) 8 | export(text_concept) 9 | export(text_emotion) 10 | export(text_entity) 11 | export(text_keywords) 12 | export(text_language) 13 | export(text_personality) 14 | export(text_relations) 15 | export(text_sentiment) 16 | export(text_taxonomy) 17 | export(text_tone) 18 | export(text_translate) 19 | importFrom(curl,curl_download) 20 | importFrom(curl,curl_escape) 21 | importFrom(curl,form_file) 22 | importFrom(curl,handle_setform) 23 | importFrom(curl,handle_setheaders) 24 | importFrom(curl,handle_setopt) 25 | importFrom(curl,multi_add) 26 | importFrom(curl,multi_run) 27 | importFrom(curl,new_handle) 28 | importFrom(jsonlite,fromJSON) 29 | importFrom(jsonlite,toJSON) 30 | importFrom(magrittr,'%>%') 31 | -------------------------------------------------------------------------------- /R/audio_cognizers.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title IBM Watson Audio Transcriber 3 | #' @description Convert your audio to transcripts with optional keyword 4 | #' detection and profanity cleaning. 5 | #' @param audios Character vector (list) of paths to images or to .zip files containing 6 | #' upto 100 images. 7 | #' @param userpwd Character scalar containing username:password for the service. 8 | #' @param keep_data Character scalar specifying whether to share your data with 9 | #' Watson services for the purpose of training their models. 10 | #' @param callback Function that can be applied to responses to examine http status, 11 | #' headers, and content, to debug or to write a custom parser for content. 12 | #' The default callback parses content into a data.frame while dropping other 13 | #' response values to make the output easily passable to tidyverse packages like 14 | #' dplyr or ggplot2. For further details or debugging one can pass a print or a 15 | #' more compicated function. 16 | #' @param model Character scalar specifying language and bandwidth model. Alternatives 17 | #' are ar-AR_BroadbandModel, en-UK_BroadbandModel, en-UK_NarrowbandModel, 18 | #' en-US_NarrowbandModel, es-ES_BroadbandModel, es-ES_NarrowbandModel, 19 | #' fr-FR_BroadbandModel, ja-JP_BroadbandModel, ja-JP_NarrowbandModel, 20 | #' pt-BR_BroadbandModel, pt-BR_NarrowbandModel, zh-CN_BroadbandModel, 21 | #' zh-CN_NarrowbandModel. 22 | #' @param inactivity_timeout Integer scalar giving the number of seconds after which 23 | #' the result is returned if no speech is detected. 24 | #' @param keywords List of keywords to be detected in the speech stream. 25 | #' @param keywords_threshold Double scalar from 0 to 1 specifying the lower bound on 26 | #' confidence to accept detected keywords in speech. 27 | #' @param max_alternatives Integer scalar giving the maximum number of alternative 28 | #' transcripts to return. 29 | #' @param word_alternatives_threshold Double scalar from 0 to 1 giving lower bound 30 | #' on confidence of possible words. 31 | #' @param word_confidence Logical scalar indicating whether to return confidence for 32 | #' each word. 33 | #' @param timestamps Logical scalar indicating whether to return time alignment for 34 | #' each word. 35 | #' @param profanity_filter Logical scalar indicating whether to censor profane words. 36 | #' @param smart_formatting Logical scalar indicating whether dates, times, numbers, etc. 37 | #' are to be formatted nicely in the transcript. 38 | #' @param content_type Character scalar showing format of the audio file. Alternatives 39 | #' are audio/flac, audio/l16;rate=n;channels=k (16 channel limit), 40 | #' audio/wav (9 channel limit), audio/ogg;codecs=opus, 41 | #' audio/basic (narrowband models only). 42 | #' @param speaker_labels Logical scalar indicating whether to infer speakers on a mono 43 | #' channel. Automatically turns on timestamp collection for each word. 44 | #' @return List of parsed responses. 45 | #' @export 46 | audio_text <- function( 47 | audios, 48 | userpwd, 49 | keep_data = "true", 50 | callback = NULL, 51 | model = "en-US_BroadbandModel", 52 | inactivity_timeout = -1, 53 | keywords = list(), 54 | keywords_threshold = NA, 55 | max_alternatives = 1, 56 | word_alternatives_threshold = NA, 57 | word_confidence = FALSE, 58 | timestamps = FALSE, 59 | profanity_filter = TRUE, 60 | smart_formatting = FALSE, 61 | content_type = "audio/wav", 62 | speaker_labels = FALSE) 63 | { 64 | protocol <- "https://" 65 | service <- "stream.watsonplatform.net/speech-to-text/api/v1/recognize?" 66 | parameters <- paste("model", model, sep = "=") 67 | url <- paste0(protocol, service, parameters) 68 | metadata <- list( 69 | "part_content_type" = content_type, 70 | "data_parts_count" = 1, 71 | "inactivity_timeout" = inactivity_timeout, 72 | "keywords" = keywords, 73 | "keywords_threshold" = keywords_threshold, 74 | "max_alternatives" = max_alternatives, 75 | "word_alternatives_threshold" = word_alternatives_threshold, 76 | "word_confidence" = word_confidence, 77 | "timestamps" = timestamps, 78 | "profanity_filter" = profanity_filter, 79 | "smart_formatting" = smart_formatting, 80 | "speaker_labels" = speaker_labels 81 | ) 82 | metadata <- toJSON(metadata[!is.na(metadata)], auto_unbox = TRUE) 83 | 84 | done <- if (is.null(callback)) function(resp, index) { 85 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 86 | invisible(NULL) 87 | } else callback 88 | fail <- function(resp, index) { 89 | resps[[index]] <<- resp 90 | invisible(NULL) 91 | } 92 | 93 | resps <- vector("list", length(audios)) 94 | invisible( 95 | lapply( 96 | seq_along(audios), 97 | function(index) { 98 | if (is.null(callback)) formals(done)$index <- index 99 | formals(fail)$index <- index 100 | form <- form_file(audios[index], content_type) 101 | new_handle(url = url) %>% 102 | handle_setopt("userpwd" = userpwd) %>% 103 | handle_setheaders( 104 | "X-Watson-Learning-Opt-Out"= keep_data, 105 | "Content-Type" = "multipart/form-data", 106 | "Transfer-Encoding" = "chunked" 107 | ) %>% 108 | handle_setform(metadata = metadata, upload = form) %>% 109 | multi_add(done = done, fail = fail) 110 | } 111 | ) 112 | ) 113 | 114 | multi_run() 115 | resps 116 | } -------------------------------------------------------------------------------- /R/cognizer.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title R Wrapper for IBM Watson Services 3 | #' @description cognizeR provides interface to IBM Watson services that can 4 | #' process text, image and audio data. 5 | #' @docType package 6 | #' @name cognizer 7 | #' @importFrom jsonlite fromJSON 8 | #' @importFrom jsonlite toJSON 9 | NULL 10 | -------------------------------------------------------------------------------- /R/image_cognizers.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' @title IBM Watson Image Classifier 4 | #' @description \bold{image_classify}: Uses default classifier to determine the object 5 | #' catagory in the image. 6 | #' @param images Character vector (list) of paths to images or to .zip files containing 7 | #' upto 100 images. 8 | #' @param api_key Character scalar containing api key obtained from Watson services. 9 | #' @param keep_data Character scalar specifying whether to share your data with 10 | #' Watson services for the purpose of training their models. 11 | #' @param callback Function that can be applied to responses to examine http status, 12 | #' headers, and content, to debug or to write a custom parser for content. 13 | #' The default callback parses content into a data.frame while dropping other 14 | #' response values to make the output easily passable to tidyverse packages like 15 | #' dplyr or ggplot2. For further details or debugging one can pass a fail or a 16 | #' more compicated function. 17 | #' @param type Character scalar specifying image format. Alternative is "image/png". 18 | #' @param version Character scalar giving version of api to use. 19 | #' @param accept_language Character scalar specifying the output language. 20 | #' @param batch_size Integer scalar giving the number of images in a given path. This 21 | #' is used when images are zipped together. Check IBM docs for maximum number in a 22 | #' single zip file. 23 | #' @return List of parsed responses. 24 | #' @export 25 | image_classify <- function( 26 | images, 27 | api_key, 28 | keep_data = "true", 29 | callback = NULL, 30 | type = "image/jpeg", 31 | version = "2016-05-20", 32 | accept_language = "en", 33 | batch_size = 1 34 | ) 35 | { 36 | protocol <- "https://" 37 | service <- "gateway-a.watsonplatform.net/visual-recognition/api/v3/classify?" 38 | parameters <- paste( 39 | c("api_key", "version"), 40 | c(api_key, version), 41 | sep = "=", 42 | collapse = "&" 43 | ) 44 | url <- paste0(protocol, service, parameters) 45 | 46 | done <- if (is.null(callback)) function(resp, index) { 47 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 48 | invisible(NULL) 49 | } else callback 50 | fail <- function(resp, index) { 51 | resps[[index]] <<- resp 52 | invisible(NULL) 53 | } 54 | 55 | resps <- vector("list", length(images)) 56 | invisible( 57 | lapply( 58 | seq_along(images), 59 | function(index) { 60 | if (is.null(callback)) formals(done)$index <- index 61 | formals(fail)$index <- index 62 | form <- form_file(images[index], type) 63 | new_handle(url = url) %>% 64 | handle_setheaders( 65 | "X-Watson-Learning-Opt-Out"= keep_data, 66 | "Accept-Language" = accept_language 67 | ) %>% 68 | handle_setform(image_file = form) %>% 69 | multi_add(done = done, fail = fail) 70 | } 71 | ) 72 | ) 73 | 74 | multi_run() 75 | resps 76 | } 77 | 78 | 79 | 80 | #' @title IBM Watson Face Detection Algorithm 81 | #' @description \bold{image_detectface}: Uses default algorithm to detect 82 | #' a face in the image and provide its coordinates. 83 | #' @inheritParams image_classify 84 | #' @return List of parsed responses. 85 | #' @export 86 | #' @rdname image_classify 87 | image_detectface <- function( 88 | images, 89 | api_key, 90 | keep_data = "true", 91 | callback = NULL, 92 | type = "image/jpeg", 93 | version = "2016-05-20", 94 | batch_size = 1 95 | ) 96 | { 97 | protocol <- "https://" 98 | service <- "gateway-a.watsonplatform.net/visual-recognition/api/v3/detect_faces?" 99 | parameters <- paste( 100 | c("api_key", "version"), 101 | c(api_key, version), 102 | sep = "=", 103 | collapse = "&" 104 | ) 105 | url <- paste0(protocol, service, parameters) 106 | 107 | done <- if (is.null(callback)) function(resp, index) { 108 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 109 | invisible(NULL) 110 | } else callback 111 | fail <- function(resp, index) { 112 | resps[[index]] <<- resp 113 | invisible(NULL) 114 | } 115 | 116 | resps <- vector("list", length(images)) 117 | invisible( 118 | lapply( 119 | seq_along(images), 120 | function(index) { 121 | if (is.null(callback)) formals(done)$index <- index 122 | formals(fail)$index <- index 123 | form <- form_file(images[index], type) 124 | new_handle(url = url) %>% 125 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 126 | handle_setform(images_file = form) %>% 127 | multi_add(done = done, fail = fail) 128 | } 129 | ) 130 | ) 131 | 132 | multi_run() 133 | resps 134 | } 135 | 136 | 137 | #' @title IBM Watson Text-in-image Detection Algorithm 138 | #' @description \bold{image_detecttext}: Uses default algorithm to detect 139 | #' text in the image. 140 | #' @inheritParams image_classify 141 | #' @return List of parsed responses. 142 | #' @export 143 | #' @rdname image_classify 144 | image_detecttext <- function( 145 | images, 146 | api_key, 147 | keep_data = "true", 148 | callback = NULL, 149 | type = "image/jpeg", 150 | version = "2016-05-20", 151 | batch_size = 1 152 | ) 153 | { 154 | protocol <- "https://" 155 | service <- "gateway-a.watsonplatform.net/visual-recognition/api/v3/recognize_text?" 156 | parameters <- paste( 157 | c("api_key", "version"), 158 | c(api_key, version), 159 | sep = "=", 160 | collapse = "&" 161 | ) 162 | url <- paste0(protocol, service, parameters) 163 | 164 | done <- if (is.null(callback)) function(resp, index) { 165 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 166 | invisible(NULL) 167 | } else callback 168 | fail <- function(resp, index) { 169 | resps[[index]] <<- resp 170 | invisible(NULL) 171 | } 172 | 173 | resps <- vector("list", length(images)) 174 | invisible( 175 | lapply( 176 | seq_along(images), 177 | function(index) { 178 | if (is.null(callback)) formals(done)$index <- index 179 | formals(fail)$index <- index 180 | form <- form_file(images[index], type) 181 | new_handle(url = url) %>% 182 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 183 | handle_setform(images_file = form) %>% 184 | multi_add(done = done, fail = fail) 185 | } 186 | ) 187 | ) 188 | 189 | multi_run() 190 | resps 191 | } 192 | -------------------------------------------------------------------------------- /R/text_cognizers.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Process text with IBM Alchemy Language algorithms 3 | #' @description \bold{text_sentiment}: Takes a vector of text and sends to Watson 4 | #' services for various analyses. Requires basic authentication using api key. 5 | #' @param text Character vector containing strings to be processed. 6 | #' @param api_key Character scalar containing api key obtained from Watson services. 7 | #' @param output_mode Character scalar specifying returned data structure. 8 | #' Alternative is xml. 9 | #' @param show_source Intenger scalar specifying whether to send text 10 | #' string back or not. 11 | #' @param keep_data Character scalar specifying whether to share your data with 12 | #' Watson services for the purpose of training their models. 13 | #' @param callback Function that can be applied to responses to examine http status, 14 | #' headers, and content, to debug or to write a custom parser for content. 15 | #' The default callback parses content into a data.frame while dropping other 16 | #' response values to make the output easily passable to tidyverse packages like 17 | #' dplyr or ggplot2. For further details or debugging one can pass a fail or a 18 | #' more compicated function. 19 | #' @return Data.frame containing parsed content in a tidy fashion. 20 | #' @seealso Check \url{http://www.ibm.com/watson/developercloud/alchemy-language.html} 21 | #' for further documentation, and \url{https://alchemy-language-demo.mybluemix.net/?cm_mc_uid=70865809903714586773519&cm_mc_sid_50200000=1468266111} 22 | #' for a web demo. 23 | #' @export 24 | #' @importFrom magrittr '%>%' 25 | #' @importFrom curl new_handle handle_setheaders handle_setopt handle_setform 26 | #' @importFrom curl curl_escape multi_add multi_run form_file curl_download 27 | text_sentiment <- function( 28 | text, 29 | api_key, 30 | output_mode = "json", 31 | show_source = 0, 32 | keep_data = "true", 33 | callback = NULL 34 | ) 35 | { 36 | protocol <- "https://" 37 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetTextSentiment?" 38 | parameters <- paste( 39 | c("apikey", "outputMode", "showSourceText"), 40 | c(api_key, output_mode, show_source), 41 | sep = "=", 42 | collapse = "&" 43 | ) 44 | url <- paste0(protocol, service, parameters) 45 | text <- paste("text", curl_escape(text), sep = "=") 46 | 47 | done <- if (is.null(callback)) function(resp, index) { 48 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 49 | invisible(NULL) 50 | } else callback 51 | fail <- function(resp, index) { 52 | resps[[index]] <<- resp 53 | invisible(NULL) 54 | } 55 | 56 | resps <- vector("list", length(text)) 57 | invisible( 58 | lapply( 59 | seq_along(text), 60 | function(index) { 61 | if (is.null(callback)) formals(done)$index <- index 62 | formals(fail)$index <- index 63 | new_handle(url = url) %>% 64 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 65 | handle_setopt(postfields = text[index]) %>% 66 | multi_add(done = done, fail = fail) 67 | } 68 | ) 69 | ) 70 | 71 | multi_run() 72 | resps 73 | } 74 | 75 | 76 | #' @description \bold{text_keywords}: Keywords analysis extracts keywords from text, and 77 | #' can optionally provide their sentiment and/or associated knowledge graph. 78 | #' @inheritParams text_sentiment 79 | #' @param max_retrieve Integer scalar fixing the number of keywords to extract 80 | #' from text. 81 | #' @param knowledge_graph Integer scalar indicating whether to grab a knowledge 82 | #' graph associated with keywords. This is an additional transaction. 83 | #' @param sentiment Integer scalar indicating whether to infer sentiment of 84 | #' keywords, expressed as category and number. This is an additional transaction. 85 | #' @export 86 | #' @rdname text_sentiment 87 | text_keywords <- function( 88 | text, 89 | api_key, 90 | output_mode = "json", 91 | show_source = 0, 92 | keep_data = "true", 93 | callback = NULL, 94 | max_retrieve = 50, 95 | knowledge_graph = 0, 96 | sentiment = 0 97 | ) 98 | { 99 | protocol <- "https://" 100 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetRankedKeywords?" 101 | parameters <- paste( 102 | c("apikey", "outputMode", "showSourceText", 103 | "maxRetrieve", "knowledgeGraph", "sentiment"), 104 | c(api_key, output_mode, show_source, 105 | max_retrieve, knowledge_graph, sentiment), 106 | sep = "=", 107 | collapse = "&" 108 | ) 109 | url <- paste0(protocol, service, parameters) 110 | text <- paste("text", curl_escape(text), sep = "=") 111 | 112 | done <- if (is.null(callback)) function(resp, index) { 113 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 114 | invisible(NULL) 115 | } else callback 116 | fail <- function(resp, index) { 117 | resps[[index]] <<- resp 118 | invisible(NULL) 119 | } 120 | 121 | resps <- vector("list", length(text)) 122 | invisible( 123 | lapply( 124 | seq_along(text), 125 | function(index) { 126 | if (is.null(callback)) formals(done)$index <- index 127 | formals(fail)$index <- index 128 | new_handle(url = url) %>% 129 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 130 | handle_setopt(postfields = text[index]) %>% 131 | multi_add(done = done, fail = fail) 132 | } 133 | ) 134 | ) 135 | 136 | multi_run() 137 | resps 138 | } 139 | 140 | 141 | #' @description \bold{text_emotion}: Emotion analysis of text infers 142 | #' scores for 7 basic emotions. 143 | #' @inheritParams text_sentiment 144 | #' @export 145 | #' @rdname text_sentiment 146 | text_emotion <- function( 147 | text, 148 | api_key, 149 | output_mode = "json", 150 | show_source = 0, 151 | keep_data = "true", 152 | callback = NULL 153 | ) 154 | { 155 | protocol <- "https://" 156 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetEmotion?" 157 | parameters <- paste( 158 | c("apikey", "outputMode", "showSourceText"), 159 | c(api_key, output_mode, show_source), 160 | sep = "=", 161 | collapse = "&" 162 | ) 163 | url <- paste0(protocol, service, parameters) 164 | text <- paste("text", curl_escape(text), sep = "=") 165 | 166 | done <- if (is.null(callback)) function(resp, index) { 167 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 168 | invisible(NULL) 169 | } else callback 170 | fail <- function(resp, index) { 171 | resps[[index]] <<- resp 172 | invisible(NULL) 173 | } 174 | 175 | resps <- vector("list", length(text)) 176 | invisible( 177 | lapply( 178 | seq_along(text), 179 | function(index) { 180 | if (is.null(callback)) formals(done)$index <- index 181 | formals(fail)$index <- index 182 | new_handle(url = url) %>% 183 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 184 | handle_setopt(postfields = text[index]) %>% 185 | multi_add(done = done, fail = fail) 186 | } 187 | ) 188 | ) 189 | 190 | multi_run() 191 | resps 192 | } 193 | 194 | 195 | #' @description \bold{text_language}: Language detection infers 196 | #' language of the provided text. Works best with at least 100 words. 197 | #' @inheritParams text_sentiment 198 | #' @export 199 | #' @rdname text_sentiment 200 | text_language <- function( 201 | text, 202 | api_key, 203 | output_mode = "json", 204 | show_source = 0, 205 | keep_data = "true", 206 | callback = NULL 207 | ) 208 | { 209 | protocol <- "https://" 210 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetLanguage?" 211 | parameters <- paste( 212 | c("apikey", "outputMode", "showSourceText"), 213 | c(api_key, output_mode, show_source), 214 | sep = "=", 215 | collapse = "&" 216 | ) 217 | url <- paste0(protocol, service, parameters) 218 | text <- paste("text", curl_escape(text), sep = "=") 219 | 220 | done <- if (is.null(callback)) function(resp, index) { 221 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 222 | invisible(NULL) 223 | } else callback 224 | fail <- function(resp, index) { 225 | resps[[index]] <<- resp 226 | invisible(NULL) 227 | } 228 | 229 | resps <- vector("list", length(text)) 230 | invisible( 231 | lapply( 232 | seq_along(text), 233 | function(index) { 234 | if (is.null(callback)) formals(done)$index <- index 235 | formals(fail)$index <- index 236 | new_handle(url = url) %>% 237 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 238 | handle_setopt(postfields = text[index]) %>% 239 | multi_add(done = done, fail = fail) 240 | } 241 | ) 242 | ) 243 | 244 | multi_run() 245 | resps 246 | } 247 | 248 | 249 | #' @description \bold{text_entity}: Entity analysis extracts names of people, 250 | #' products, places from the provided text. Additional arguments can provide 251 | #' sentiment, knowledge graphs and quotations related to inferred entities. 252 | #' @inheritParams text_keywords 253 | #' @param model Character scalar specifying one of three models which will extract 254 | #' entities. Alternatives are 'ie-es-news', 'ie-ar-news' or a custom model. 255 | #' @param coreference Integer scalar specifying whether to resolve coreferences into 256 | #' detected entities. 257 | #' @param disambiguate Integer scalar specifying whether to disambiguate 258 | #' detected entities. 259 | #' @param linked_data Integer scalar specifying whether to include links for 260 | #' related data. 261 | #' @param quotations Integer scalar specifying whether to include quotes related 262 | #' to detected entities. 263 | #' @param structured_entity Integer scalar specifying whether to extract structured 264 | #' entities, such as Quantity, EmailAddress, TwitterHandle, Hashtag, and IPAddress. 265 | #' @export 266 | #' @rdname text_sentiment 267 | text_entity <- function( 268 | text, 269 | api_key, 270 | output_mode = "json", 271 | show_source = 0, 272 | keep_data = "true", 273 | callback = NULL, 274 | max_retrieve = 50, 275 | knowledge_graph = 0, 276 | sentiment = 0, 277 | model = "ie-en-news", 278 | coreference = 1, 279 | disambiguate = 1, 280 | linked_data = 1, 281 | quotations = 0, 282 | structured_entity = 1 283 | ) 284 | { 285 | protocol <- "https://" 286 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetRankedNamedEntities?" 287 | parameters <- paste( 288 | c("apikey", "outputMode", "showSourceText", "maxRetrieve", "knowledgeGraph", 289 | "sentiment", "model", "coreference", "disambiguate", "linkedData", 290 | "quotations", "structuredEntities"), 291 | c(api_key, output_mode, show_source, max_retrieve, knowledge_graph, sentiment, 292 | model, coreference, disambiguate, linked_data, quotations, structured_entity), 293 | sep = "=", 294 | collapse = "&" 295 | ) 296 | url <- paste0(protocol, service, parameters) 297 | text <- paste("text", curl_escape(text), sep = "=") 298 | 299 | done <- if (is.null(callback)) function(resp, index) { 300 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 301 | invisible(NULL) 302 | } else callback 303 | fail <- function(resp, index) { 304 | resps[[index]] <<- resp 305 | invisible(NULL) 306 | } 307 | 308 | resps <- vector("list", length(text)) 309 | invisible( 310 | lapply( 311 | seq_along(text), 312 | function(index) { 313 | if (is.null(callback)) formals(done)$index <- index 314 | formals(fail)$index <- index 315 | new_handle(url = url) %>% 316 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 317 | handle_setopt(postfields = text[index]) %>% 318 | multi_add(done = done, fail = fail) 319 | } 320 | ) 321 | ) 322 | 323 | multi_run() 324 | resps 325 | } 326 | 327 | #' @description \bold{text_concept}: Concept analysis infers categories based on 328 | #' the text, but that are not necessarily in the text. Additional arguments can 329 | #' provide sentiment and/or knowledge graphs related to inferred concepts. 330 | #' @inheritParams text_entity 331 | #' @rdname text_sentiment 332 | #' @export 333 | text_concept <- function( 334 | text, 335 | api_key, 336 | output_mode = "json", 337 | show_source = 0, 338 | keep_data = "true", 339 | callback = NULL, 340 | max_retrieve = 8, 341 | knowledge_graph = 0, 342 | linked_data = 1 343 | ) 344 | { 345 | protocol <- "https://" 346 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetRankedConcepts?" 347 | parameters <- paste( 348 | c("apikey", "outputMode", "showSourceText", 349 | "maxRetrieve", "knowledgeGraph", "linkedData"), 350 | c(api_key, output_mode, show_source, 351 | max_retrieve, knowledge_graph, linked_data), 352 | sep = "=", 353 | collapse = "&" 354 | ) 355 | url <- paste0(protocol, service, parameters) 356 | text <- paste("text", curl_escape(text), sep = "=") 357 | 358 | done <- if (is.null(callback)) function(resp, index) { 359 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 360 | invisible(NULL) 361 | } else callback 362 | fail <- function(resp, index) { 363 | resps[[index]] <<- resp 364 | invisible(NULL) 365 | } 366 | 367 | resps <- vector("list", length(text)) 368 | invisible( 369 | lapply( 370 | seq_along(text), 371 | function(index) { 372 | if (is.null(callback)) formals(done)$index <- index 373 | formals(fail)$index <- index 374 | new_handle(url = url) %>% 375 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 376 | handle_setopt(postfields = text[index]) %>% 377 | multi_add(done = done, fail = fail) 378 | } 379 | ) 380 | ) 381 | 382 | multi_run() 383 | resps 384 | } 385 | 386 | 387 | #' @description \bold{text_relations}: Relation analysis infers associations among 388 | #' entities. 389 | #' @inheritParams text_entity 390 | #' @export 391 | #' @rdname text_sentiment 392 | text_relations <- function( 393 | text, 394 | api_key, 395 | output_mode = "json", 396 | show_source = 0, 397 | keep_data = "true", 398 | callback = NULL, 399 | model = "ie-en-news" 400 | ) 401 | { 402 | protocol <- "https://" 403 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetRankedConcepts?" 404 | parameters <- paste( 405 | c("apikey", "outputMode", "showSourceText", "model"), 406 | c(api_key, output_mode, show_source, model), 407 | sep = "=", 408 | collapse = "&" 409 | ) 410 | url <- paste0(protocol, service, parameters) 411 | text <- paste("text", curl_escape(text), sep = "=") 412 | 413 | done <- if (is.null(callback)) function(resp, index) { 414 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 415 | invisible(NULL) 416 | } else callback 417 | fail <- function(resp, index) { 418 | resps[[index]] <<- resp 419 | invisible(NULL) 420 | } 421 | 422 | resps <- vector("list", length(text)) 423 | invisible( 424 | lapply( 425 | seq_along(text), 426 | function(index) { 427 | if (is.null(callback)) formals(done)$index <- index 428 | formals(fail)$index <- index 429 | new_handle(url = url) %>% 430 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 431 | handle_setopt(postfields = text[index]) %>% 432 | multi_add(done = done, fail = fail) 433 | } 434 | ) 435 | ) 436 | 437 | multi_run() 438 | resps 439 | } 440 | 441 | 442 | #' @description \bold{text_taxonomy}: Taxonomy analysis infers hierarchical relations 443 | #' among entities upto 5 levels deep. 444 | #' @inheritParams text_entity 445 | #' @export 446 | #' @rdname text_sentiment 447 | text_taxonomy <- function( 448 | text, 449 | api_key, 450 | output_mode = "json", 451 | show_source = 0, 452 | keep_data = "true", 453 | callback = NULL, 454 | max_retrieve = 50, 455 | knowledge_graph = 0, 456 | sentiment = 0, 457 | model = "ie-en-news", 458 | coreference = 1, 459 | disambiguate = 1, 460 | linked_data = 1, 461 | quotations = 0, 462 | structured_entity = 1 463 | ) 464 | { 465 | protocol <- "https://" 466 | service <- "gateway-a.watsonplatform.net/calls/text/TextGetRankedTaxonomy?" 467 | parameters <- paste( 468 | c("apikey", "outputMode", "showSourceText", "maxRetrieve", "knowledgeGraph", 469 | "sentiment", "model", "coreference", "disambiguate", "linkedData", 470 | "quotations", "structuredEntities"), 471 | c(api_key, output_mode, show_source, max_retrieve, knowledge_graph, sentiment, 472 | model, coreference, disambiguate, linked_data, quotations, structured_entity), 473 | sep = "=", 474 | collapse = "&" 475 | ) 476 | url <- paste0(protocol, service, parameters) 477 | text <- paste("text", curl_escape(text), sep = "=") 478 | 479 | done <- if (is.null(callback)) function(resp, index) { 480 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 481 | invisible(NULL) 482 | } else callback 483 | fail <- function(resp, index) { 484 | resps[[index]] <<- resp 485 | invisible(NULL) 486 | } 487 | 488 | resps <- vector("list", length(text)) 489 | invisible( 490 | lapply( 491 | seq_along(text), 492 | function(index) { 493 | if (is.null(callback)) formals(done)$index <- index 494 | formals(fail)$index <- index 495 | new_handle(url = url) %>% 496 | handle_setheaders("X-Watson-Learning-Opt-Out"= keep_data) %>% 497 | handle_setopt(postfields = text[index]) %>% 498 | multi_add(done = done, fail = fail) 499 | } 500 | ) 501 | ) 502 | 503 | multi_run() 504 | resps 505 | } 506 | 507 | #' @title IBM Language Translation API. 508 | #' @description Translates text from Arabic, French, 509 | #' Portuguese or Spanish to English. Requires basic authentication using 510 | #' username and password. 511 | #' @param text Character vector. 512 | #' @param userpwd Character scalar that contains 'username:password' string. 513 | #' @param model_id Character scalar formated as 'source-target-domain'. 514 | #' Source language (Arabic, Brazilian Portuguese, English, French, Italian, 515 | #' or Spanish), target language (Arabic, Brazilian Portuguese, English, French, 516 | #' Italian, or Spanish) and domain of text (conversational, news, patent). 517 | #' Check IBM documentation for other language mappings. 518 | #' @param accept Character scalar that specifies response format. Alternative is 519 | #' text/plain. 520 | #' @param keep_data Character scalar specifying whether to share your data with 521 | #' Watson services for the purpose of training their models. 522 | #' @param callback Function that can be applied to responses to examine http status, 523 | #' headers, and content, to debug or to write a custom parser for content. 524 | #' The default callback parses content into a data.frame while dropping other 525 | #' response values to make the output easily passable to tidyverse packages like 526 | #' dplyr or ggplot2. For further details or debugging one can pass a fail or a 527 | #' more compicated function. 528 | #' @return Data.frame containing parsed content in a tidy fashion. 529 | #' @seealso Check \url{http://www.ibm.com/watson/developercloud/language-translation.html} 530 | #' for further documentation, and \url{https://language-translator-demo.mybluemix.net/} 531 | #' for a web demo. 532 | #' @export 533 | text_translate <- function( 534 | text, 535 | userpwd, 536 | keep_data = "true", 537 | callback = NULL, 538 | model_id = "es-en-conversational", 539 | accept = "application/json" 540 | ) 541 | { 542 | protocol <- "https://" 543 | service <- "gateway.watsonplatform.net/language-translator/api/v2/translate?" 544 | parameters <- paste("model_id", model_id, sep = "=") 545 | url <- paste0(protocol, service, parameters) 546 | text <- paste("text", curl_escape(text), sep = "=") 547 | 548 | done <- if (is.null(callback)) function(resp, index) { 549 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 550 | invisible(NULL) 551 | } else callback 552 | fail <- function(resp, index) { 553 | resps[[index]] <<- resp 554 | invisible(NULL) 555 | } 556 | 557 | resps <- vector("list", length(text)) 558 | invisible( 559 | lapply( 560 | seq_along(text), 561 | function(index) { 562 | if (is.null(callback)) formals(done)$index <- index 563 | formals(fail)$index <- index 564 | new_handle(url = url) %>% 565 | handle_setopt("userpwd" = userpwd, "postfields" = text[index]) %>% 566 | handle_setheaders( 567 | "X-Watson-Learning-Opt-Out"= keep_data, 568 | "Accept" = accept 569 | ) %>% 570 | multi_add(done = done, fail = fail) 571 | } 572 | ) 573 | ) 574 | 575 | multi_run() 576 | resps 577 | } 578 | 579 | #' @title IBM personality analysis of text 580 | #' @description Analyze your text along the Big 5 dimensions of personality. 581 | #' @inheritParams text_translate 582 | #' @param raw_scores Character scalar showing whether to include only normalized 583 | #' statistics or also raw statistics. 584 | #' @param consumption_preferences Character scalar showing whether to 585 | #' return consumption preferences 586 | #' @param csv_headers Character scalar showing whether to return column labels when 587 | #' Accept-Content is set to 'text/csv'. 588 | #' @param version Character scalar giving date that specifies the algorithm that went 589 | #' operational on or before the date. Future dates select the most recent algorithm. 590 | #' @param content_type Character scalar setting input data type header. Alternatives 591 | #' are 'application/json; charset=utf-8' and 'text/html; charset=ISO-8859-1'. 592 | #' @param content_language Character scalar setting input language. Alternatives are 593 | #' 'ar' (Arabic), 'es' (Spanish), 'ja' (Japanese). 594 | #' @param accept_language Character scalar setting output langauge. Alternatives are 595 | #' 'ar' (Arabic), 'de' (German), 'es' (Spanish), 'fr' (French), 'it' (Italian), 596 | #' 'ja' (Japanese), 'ko' (Korean), 'pt-br' (Brazilian Portuguese), 597 | #' 'zh-cn' (Simplified Chinese), 'zh-tw' (Traditional Chinese). 598 | #' @return List containing parsed content. 599 | #' @seealso Check \url{http://www.ibm.com/watson/developercloud/doc/personality-insights/} 600 | #' for further documentation, and \url{https://personality-insights-livedemo.mybluemix.net/} 601 | #' for a web demo. 602 | #' @export 603 | text_personality <- function( 604 | text, 605 | userpwd, 606 | keep_data = "true", 607 | callback = NULL, 608 | model_id = "es-en-conversational", 609 | raw_scores = "false", 610 | consumption_preferences = "false", 611 | csv_headers = "false", 612 | version = "2020-01-01", 613 | content_type = "text/plain; charset=utf-8", 614 | content_language = "en", 615 | accept = "application/json", 616 | accept_language = "en" 617 | ) 618 | { 619 | protocol <- "https://" 620 | service <- "gateway.watsonplatform.net/personality-insights/api/v3/profile?" 621 | parameters <- paste( 622 | c("model_id", "raw_scores", "csv_headers"), 623 | c(model_id, raw_scores, csv_headers), 624 | sep = "=", 625 | collapse = "&" 626 | ) 627 | url <- paste0(protocol, service, parameters) 628 | 629 | done <- if (is.null(callback)) function(resp, index) { 630 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 631 | invisible(NULL) 632 | } else callback 633 | fail <- function(resp, index) { 634 | resps[[index]] <<- resp 635 | invisible(NULL) 636 | } 637 | 638 | resps <- vector("list", length(text)) 639 | invisible( 640 | lapply( 641 | seq_along(text), 642 | function(index) { 643 | if (is.null(callback)) formals(done)$index <- index 644 | formals(fail)$index <- index 645 | new_handle(url = url) %>% 646 | handle_setopt("userpwd" = userpwd, "postfields" = text[index]) %>% 647 | handle_setheaders( 648 | "X-Watson-Learning-Opt-Out"= keep_data, 649 | "Content-Type" = content_type, 650 | "Content-Language" = content_language, 651 | "Accept" = accept, 652 | "Accept-Language" = accept_language 653 | ) %>% 654 | multi_add(done = done, fail = fail) 655 | } 656 | ) 657 | ) 658 | 659 | multi_run() 660 | resps 661 | } 662 | 663 | 664 | 665 | #' @title IBM Tone Analyzer of Text 666 | #' @description Infers three types of tone - emotion, language, social - from the 667 | #' whole text or at sentense level. 668 | #' @inheritParams text_translate 669 | #' @param content_type Characte scalar specifying the HTTP header with type of text 670 | #' and its encoding. 671 | #' @param version Character scalar that specifies the data of most recent version of 672 | #' the algorithm. 673 | #' @param tones Character scalar that allows selecting one the three possible tones: 674 | #' emotion, language, social. 675 | #' @param sentences Character scalar specifying whether to do analysis at the 676 | #' sentence level. 677 | #' @return Data.frame containing parsed content in a tidy fashion. 678 | #' @seealso Check \url{http://www.ibm.com/watson/developercloud/doc/tone-analyzer/} 679 | #' for further documentation, and \url{https://tone-analyzer-demo.mybluemix.net/?cm_mc_uid=70865809903714586773519&cm_mc_sid_50200000=1468424667} 680 | #' for a web demo. 681 | #' @export 682 | text_tone <- function( 683 | text, 684 | userpwd, 685 | keep_data = "true", 686 | callback = NULL, 687 | content_type = "text/plain; charset=utf-8", 688 | version = "2016-05-19", 689 | tones = "", 690 | sentences = "true" 691 | ) 692 | { 693 | protocol <- "https://" 694 | service <- "gateway.watsonplatform.net/tone-analyzer/api/v3/tone?" 695 | parameters <- paste( 696 | c("version", "tones", "sentences"), 697 | c(version, tones, sentences), 698 | sep = "=", 699 | collapse = "&" 700 | ) 701 | url <- paste0(protocol, service, parameters) 702 | 703 | done <- if (is.null(callback)) function(resp, index) { 704 | resps[[index]] <<- fromJSON(rawToChar(resp$content)) 705 | invisible(NULL) 706 | } else callback 707 | fail <- function(resp, index) { 708 | resps[[index]] <<- resp 709 | invisible(NULL) 710 | } 711 | 712 | resps <- vector("list", length(text)) 713 | invisible( 714 | lapply( 715 | seq_along(text), 716 | function(index) { 717 | if (is.null(callback)) formals(done)$index <- index 718 | formals(fail)$index <- index 719 | new_handle(url = url) %>% 720 | handle_setopt("userpwd" = userpwd, "postfields" = text[index]) %>% 721 | handle_setheaders( 722 | "X-Watson-Learning-Opt-Out"= keep_data, 723 | "Content-Type" = content_type 724 | ) %>% 725 | multi_add(done = done, fail = fail) 726 | } 727 | ) 728 | ) 729 | 730 | multi_run() 731 | resps 732 | } 733 | 734 | 735 | 736 | 737 | #' @title IBM Text-to-speech API. 738 | #' @description Synthesizes an audio record from text. 739 | #' @inheritParams text_sentiment 740 | #' @param userpwd Character scalar that contains 'username:password' string. 741 | #' @param directory Character scalar specifying directory for storing audio files. 742 | #' @param voice Character scalar setting language and voice model for the synthesized 743 | #' voice. Many models are available: de-DE_BirgitVoice, de-DE_DieterVoice, 744 | #' en-GB_KateVoice, en-US_LisaVoice, en-US_MichaelVoice, es-ES_EnriqueVoice, 745 | #' es-ES_LauraVoice, es-US_SofiaVoice, fr-FR_ReneeVoice, it-IT_FrancescaVoice, 746 | #' ja-JP_EmiVoice, pt-BR_IsabelaVoice. 747 | #' @param accept Characte scalar specifying format for the audio. Alternatives are 748 | #' audio/wav ,audio/flac, audio/l16, audio/basic. 749 | #' @return Audio file with selected format is saved into selected directory. The name 750 | #' is based on integer representation of UTF time and a number of characters of the 751 | #' processed text. 752 | #' @return Logical scalar is returned invisibly. 753 | #' @seealso Check \url{http://www.ibm.com/watson/developercloud/text-to-speech.html} 754 | #' for further documentation, and \url{https://text-to-speech-demo.mybluemix.net/} 755 | #' for a web demo. 756 | #' @export 757 | text_audio <- function( 758 | text, 759 | userpwd, 760 | keep_data = "true", 761 | directory, 762 | voice = "en-US_AllisonVoice", 763 | accept = "audio/ogg;codecs=opus" 764 | ) 765 | { 766 | protocol <- "https://" 767 | service <- "stream.watsonplatform.net/text-to-speech/api/v1/synthesize?" 768 | parameters <- paste("voice", voice, sep = "=", collapse = "&") 769 | url <- paste0(protocol, service, parameters) 770 | 771 | format <- substr(accept, 7, 9) 772 | invisible( 773 | lapply( 774 | seq_along(text), 775 | function(index) { 776 | file_name <- paste0(index, ".", format) 777 | path <- file.path(directory, file_name) 778 | handle <- new_handle(url = url) %>% 779 | handle_setopt( 780 | "userpwd" = userpwd, 781 | "postfields" = toJSON(list(text = text[index]), auto_unbox = TRUE), 782 | "failonerror" = 0 783 | ) %>% 784 | handle_setheaders( 785 | "X-Watson-Learning-Opt-Out"= keep_data, 786 | "Content-Type" = "application/json", 787 | "Accept" = accept 788 | ) 789 | curl_download(url, path, handle = handle) 790 | } 791 | ) 792 | ) 793 | invisible(TRUE) 794 | } 795 | 796 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "cognizeR" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE, echo=FALSE} 7 | knitr::opts_knit$set(progress = TRUE, verbose = TRUE) 8 | library(cognizer) 9 | library(knitr) 10 | ALCHEMY_API_KEY <- Sys.getenv("ALCHEMY_API_KEY") 11 | LANG_TRANSLATE_USERNAME_PASSWORD <- Sys.getenv("LANG_TRANSLATE_USERNAME_PASSWORD") 12 | PERSONALITY_USERNAME_PASSWORD <- Sys.getenv("PERSONALITY_USERNAME_PASSWORD") 13 | TONE_USERNAME_PASSWORD <- Sys.getenv("TONE_USERNAME_PASSWORD") 14 | TEXT_TO_SPEECH_USERNAME_PASSWORD <- Sys.getenv("TEXT_TO_SPEECH_USERNAME_PASSWORD") 15 | IMAGE_API_KEY <- Sys.getenv("IMAGE_API_KEY") 16 | SPEECH_TO_TEXT_USERNAME_PASSWORD <- Sys.getenv("SPEECH_TO_TEXT_USERNAME_PASSWORD") 17 | 18 | 19 | # knitr hook function to allow an output.lines option 20 | # e.g., 21 | # output.lines=12 prints lines 1:12 ... 22 | # output.lines=1:12 does the same 23 | # output.lines=3:15 prints lines ... 3:15 ... 24 | # output.lines=-(1:8) removes lines 1:8 and prints ... 9:n ... 25 | # No allowance for anything but a consecutive range of lines 26 | # https://stat.ethz.ch/pipermail/r-help/2014-October/422286.html 27 | 28 | hook_output <- knit_hooks$get("output") 29 | knit_hooks$set(output = function(x, options) { 30 | lines <- options$output.lines 31 | if (is.null(lines)) { 32 | return(hook_output(x, options)) # pass to default hook 33 | } 34 | x <- unlist(strsplit(x, "\n")) 35 | more <- "..." 36 | if (length(lines)==1) { # first n lines 37 | if (length(x) > lines) { 38 | # truncate the output, but add .... 39 | x <- c(head(x, lines), more) 40 | } 41 | } else { 42 | x <- c(if (abs(lines[1])>1 | lines[1]<0) more else NULL, 43 | x[lines], 44 | if (length(x)>lines[abs(length(lines))]) more else NULL 45 | ) 46 | } 47 | # paste these lines together 48 | x <- paste(c(x, ""), collapse = "\n") 49 | hook_output(x, options) 50 | }) 51 | 52 | ``` 53 | 54 | R package to wrap function calls to IBM Watson services. 55 | 56 | You must already have an active Bluemix ID and account to obtain credentials for a service; for more information, see [Registering for Bluemix](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-bluemix.shtml#register). 57 | 58 | In addition to an active Bluemix ID, you must already have service credentials from Bluemix for each Watson Service you will be using through congizeR. Please follow the following steps for “[Getting service credentials in Bluemix](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml)”. 59 | 60 | ####**Install** 61 | 62 | You can install: 63 | 64 | * the latest version from the private GITHUB repo with 65 | ```r 66 | if (packageVersion("devtools") < 1.6) { 67 | install.packages("devtools") 68 | } 69 | if (packageVersion("curl") < 0.9.9) { 70 | install_github("jeroenooms/curl") 71 | } 72 | devtools::install_github("ColumbusCollaboratory/cognizeR", auth_token = "token") 73 | ``` 74 | 75 | 76 | 77 | * You'll probably also want to install the data packages used in the tests: 78 | ```r 79 | install.packages(c("rmsfact", "testthat")) 80 | ``` 81 | 82 | You may want to review the Watson Services documentation for those services available through the R package. 83 | 84 | ####**Authentication** 85 | All Watson services use basic authentication in the form of api keys or username-password combinations. To start using cognizeR functions, you will need to pass your authentication details to them as an argument. There are many ways to manage your passwords, and we do not want to impose any particular structure on this process. If no solution comes to mind, one approach is to use the R environment file to store your authentication details that can be easily and programmatically passed to the cognizeR functions. 86 | 87 | If you already have .Renviron file in your home directory, then you can add something like 88 | 89 | ```{r eval = FALSE} 90 | SERVICE_API_KEY = "key" 91 | ``` 92 | and/or 93 | 94 | ```{r eval = FALSE} 95 | SERVICE_USERNAME_PASSWORD = "username:password" 96 | ``` 97 | (Notice the use of `=` as opposed `<-` when storing environment variables.) If not, then you can run the following commands to create and edit the file by inserting the name and value pairs of the environment variables in the above format: 98 | 99 | ```{r eval = FALSE} 100 | r_env <- file.path(normalizePath("~"), ".Renviron") 101 | if (!file.exists(r_env)) file.create(r_env) 102 | file.edit(r_env) 103 | ``` 104 | 105 | After restarting R, you can then access the values of environment variables with 106 | 107 | ```{r eval = FALSE} 108 | Sys.getenv("API_SERVICE_NAME") 109 | ``` 110 | 111 | 112 | ####**cognizeR Watson Services Examples:** 113 | 114 | + [Text Processing](#text) 115 | * [Alchemy Language](#alchemy) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language.html) 116 | + [Sentiment Analysis](#sentiment) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#sentiment) 117 | + [Keyword Extraction](#keyword) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#keywords) 118 | + [Emotion Analysis](#emotion) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#emotion_analysis) 119 | + [Language Detection](#language) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#language) 120 | + [Entity Extraction](#entity) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#entities) 121 | + [Concept Tagging](#concept) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#concepts) 122 | + [Relation Extraction](#relations) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#relations) 123 | + [Taxonomy Classification](#taxonomy) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#taxonomy) 124 | * [Language Translation](#translate) [-IBM Docs-](http://www.ibm.com/watson/developercloud/language-translation.html) 125 | * [Personality Insights](#personality) [-IBM Docs-](http://www.ibm.com/watson/developercloud/personality-insights.html) 126 | * [Tone Analyzer](#tone) [-IBM Docs-](http://www.ibm.com/watson/developercloud/tone-analyzer.html) 127 | * [Text-to-Speech](#text-speech) [-IBM Docs-](http://www.ibm.com/watson/developercloud/text-to-speech.html) 128 | + [Image Visual Recognition](#image) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition.html) 129 | * [Classification of Images](#image-classify) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#classify_an_image) 130 | * [Detect Faces in Image](#image-faces) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#detect_faces) 131 | * [Detect Text in Image](#image-text) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#recognize_text) 132 | + [Audio Processing](#audio) 133 | * [Speech-to-Text](#audio-text) [-IBM Docs-](http://www.ibm.com/watson/developercloud/speech-to-text.html) 134 | 135 | 136 | ###Text Processing 137 | ####Alchemy Language 138 | #####Sentiment Analysis [-top-](#toc) 139 | ```{r sentiment, collapse=TRUE} 140 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 141 | result <- text_sentiment(text, YOUR_API_KEY) 142 | str(result) 143 | ``` 144 | 145 | #####Keyword Extraction [-top-](#toc) 146 | ```{r keywords, collapse=TRUE} 147 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 148 | result <- text_keywords(text, YOUR_API_KEY) 149 | str(result) 150 | ``` 151 | 152 | #####Emotion Analysis [-top-](#toc) 153 | ```{r emotion, collapse=TRUE} 154 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 155 | result <- text_emotion(text, YOUR_API_KEY) 156 | str(result) 157 | ``` 158 | 159 | #####Language Detection [-top-](#toc) 160 | ```{r language, collapse=TRUE} 161 | text <- c("Columbus, Ohio is Awesome!", "Mirando hacia adelante a UseR2017 en Bruselas!") 162 | result <- text_language(text, YOUR_API_KEY) 163 | str(result) 164 | ``` 165 | 166 | #####Entity Extraction [-top-](#toc) 167 | ```{r entity, collapse=TRUE} 168 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 169 | result <- text_entity(text, YOUR_API_KEY) 170 | str(result) 171 | ``` 172 | 173 | #####Concept Tagging [-top-](#toc) 174 | ```{r concepts, collapse=TRUE} 175 | text <- "Columbus, Ohio is Awesome!" 176 | result <- text_concept(text, YOUR_API_KEY) 177 | str(result) 178 | ``` 179 | 180 | #####Relation Extraction [-top-](#toc) 181 | ```{r relations, collapse=TRUE} 182 | text <- "Columbus, Ohio is Awesome!" 183 | result <- text_relations(text, YOUR_API_KEY) 184 | str(result) 185 | ``` 186 | 187 | #####Taxonomy Classification [-top-](#toc) 188 | ```{r taxonomy, collapse=TRUE} 189 | text <- "Columbus, Ohio is Awesome!" 190 | result <- text_taxonomy(text, YOUR_API_KEY) 191 | str(result) 192 | ``` 193 | 194 | ####Language Translate [-top-](#toc) 195 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. LANG_TRANSLATE_USERNAME_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 196 | ```{r translate, collapse=TRUE} 197 | text <- c("Mirando hacia adelante a UseR2017 en Bruselas!") 198 | result <- text_translate(text, LANG_TRANSLATE_USERNAME_PASSWORD) 199 | str(result) 200 | ``` 201 | 202 | ####Personality Insights [-top-](#toc) 203 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. PERSONALITY_USERNAME_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 204 | ```{r personality, collapse=TRUE, output.lines=1:35} 205 | text <- paste(replicate(1000, rmsfact::rmsfact()), collapse = ' ') #Ten Richard Stallman Facts used for Personality Insights. 206 | result <- text_personality(text, PERSONALITY_USERNAME_PASSWORD) 207 | str(result) 208 | ``` 209 | 210 | ####Tone Analyzer [-top-](#toc) 211 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. TONE_USERNAME_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 212 | ```{r tone, collapse=TRUE} 213 | text <- c("Columbus, Ohio is Awesome!") 214 | result <- text_tone(text, TONE_USERNAME_PASSWORD) 215 | str(result) 216 | ``` 217 | 218 | ####Text-to-Speech [-top-](#toc) 219 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. TEXT_TO_SPEECH_USERNAME_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 220 | ```{r audio-file-clean, include=FALSE, echo=FALSE} 221 | junk <- dir(path = ".", pattern = "*.ogg") 222 | file.remove(junk) 223 | ``` 224 | ```{r text-speech, collapse=TRUE} 225 | text <- c("Columbus, Ohio is Awesome!") 226 | text_audio(text, TEXT_TO_SPEECH_USERNAME_PASSWORD, directory = '.') 227 | ``` 228 | 229 | ```{r audio-file, include=FALSE, echo=FALSE} 230 | filenames <- list.files(".", pattern="*.ogg", full.names=FALSE) 231 | ``` 232 | The .ogg audio file is written to the current directory. 233 | 234 | You can listen to the example audio file in the repository: [`r filenames[1]`](`r filenames[1]`) 235 | 236 | 237 | ###Image Visual Recognition 238 | #####Classification of Image [-top-](#toc) 239 | The following Web Services use IBM Bluemix Watson Services IMAGE_API_KEY specific to Image processsing. 240 | ```{r image-classify, collapse=TRUE} 241 | image_face_path <- system.file("extdata/images_faces", package = "cognizer") 242 | images <- list.files(image_face_path, full.names = TRUE) 243 | image_classes <- image_classify(images, IMAGE_API_KEY) 244 | str(image_classes) 245 | ``` 246 | 247 | #####Detect Faces in Image [-top-](#toc) 248 | The following Web Services use IBM Bluemix Watson Services IMAGE_API_KEY specific to Image processsing. 249 | ```{r image-faces, collapse=TRUE} 250 | image_face_path <- system.file("extdata/images_faces", package = "cognizer") 251 | images <- list.files(image_face_path, full.names = TRUE) 252 | image_faces <- image_detectface(images, IMAGE_API_KEY) 253 | str(image_faces) 254 | ``` 255 | 256 | #####Detect Text in Image [-top-](#toc) 257 | The following Web Services use IBM Bluemix Watson Services IMAGE_API_KEY specific to Image processsing. 258 | ```{r image-text, collapse=TRUE} 259 | image_text_path <- system.file("extdata/images_text", package = "cognizer") 260 | images <- list.files(image_text_path, full.names = TRUE) 261 | image_text<- image_detecttext(images, IMAGE_API_KEY) 262 | str(image_text) 263 | ``` 264 | 265 | ###Audio Processing 266 | #####Speech to Text [-top-](#toc) 267 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. SPEECH_TO_TEXT_USERNAME_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 268 | ```{r audio-text, collapse=TRUE} 269 | audio_path <- system.file("extdata/audio", package = "cognizer") 270 | audios <- list.files(audio_path, full.names = TRUE) 271 | audio_transcript <- audio_text(audios, SPEECH_TO_TEXT_USERNAME_PASSWORD) 272 | str(audio_transcript) 273 | ``` 274 | 275 | 276 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cognizer 2 | ================ 3 | 4 | [![Build Status](https://travis-ci.org/ColumbusCollaboratory/cognizer.svg?branch=master)](https://travis-ci.org/ColumbusCollaboratory/cognizer) 5 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/ColumbusCollaboratory/cognizer?branch=master&svg=true)](https://ci.appveyor.com/project/ColumbusCollaboratory/cognizer) 6 | [![codecov](https://codecov.io/gh/ColumbusCollaboratory/cognizer/branch/master/graph/badge.svg)](https://codecov.io/gh/ColumbusCollaboratory/cognizer) 7 | 8 | 9 | 10 | 11 | R package to wrap function calls to IBM Watson services. 12 | 13 | You must already have an active Bluemix ID to obtain credentials for a service; for more information, see [Registering for Bluemix](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-bluemix.shtml#register). 14 | 15 | In addition to an active Bluemix ID, you must already have service credentials from Bluemix for each Watson Service you will be using through cognizer. Please follow the following steps for “[Getting service credentials in Bluemix](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml)”. 16 | 17 | #### **Install** 18 | 19 | - You will need these packages: 20 | 21 | ``` r 22 | install.packages("curl") 23 | install.packages("devtools") 24 | devtools::install_github("ColumbusCollaboratory/cognizer") 25 | ``` 26 | 27 | - You'll probably also want to install the data packages used in the tests: 28 | 29 | ``` r 30 | install.packages(c("rmsfact", "testthat")) 31 | ``` 32 | 33 | #### **Authentication** 34 | 35 | All Watson services use basic authentication in the form of api keys or username-password combinations. To start using cognizer functions, you will need to pass your authentication details to them as an argument. There are many ways to manage your passwords, and we do not want to impose any particular structure on this process. If no solution comes to mind, one approach is to use the R environment file to store your authentication details that can be easily and programmatically passed to the cognizer functions. 36 | 37 | If you already have .Renviron file in your home directory, then you can add something like 38 | 39 | ``` r 40 | SERVICE_API_KEY = "key" 41 | ``` 42 | 43 | and/or 44 | 45 | ``` r 46 | SERVICE_USERNAME_PASSWORD = "username:password" 47 | ``` 48 | 49 | (Notice the use of `=` as opposed `<-` when storing environment variables.) If not, then you can run the following commands to create and edit the file by inserting the name and value pairs of the environment variables in the above format: 50 | 51 | ``` r 52 | r_env <- file.path(normalizePath("~"), ".Renviron") 53 | if (!file.exists(r_env)) file.create(r_env) 54 | file.edit(r_env) 55 | ``` 56 | 57 | After restarting R, you can then access the values of environment variables with 58 | 59 | ``` r 60 | Sys.getenv("SERVICE_API_KEY") 61 | ``` 62 | 63 | #### **cognizer Watson Services Examples:** 64 | 65 | - [Text Processing](#text) 66 | - [Alchemy Language](#alchemy) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language.html) 67 | - [Sentiment Analysis](#sentiment) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#sentiment) 68 | - [Keyword Extraction](#keyword) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#keywords) 69 | - [Emotion Analysis](#emotion) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#emotion_analysis) 70 | - [Language Detection](#language) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#language) 71 | - [Entity Extraction](#entity) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#entities) 72 | - [Concept Tagging](#concept) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#concepts) 73 | - [Relation Extraction](#relations) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#relations) 74 | - [Taxonomy Classification](#taxonomy) [-IBM Docs-](http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/?curl#taxonomy) 75 | - [Language Translation](#translate) [-IBM Docs-](http://www.ibm.com/watson/developercloud/language-translation.html) 76 | - [Personality Insights](#personality) [-IBM Docs-](http://www.ibm.com/watson/developercloud/personality-insights.html) 77 | - [Tone Analyzer](#tone) [-IBM Docs-](http://www.ibm.com/watson/developercloud/tone-analyzer.html) 78 | - [Text-to-Speech](#text-speech) [-IBM Docs-](http://www.ibm.com/watson/developercloud/text-to-speech.html) 79 | - [Image Visual Recognition](#image) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition.html) 80 | - [Classification of Images](#image-classify) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#classify_an_image) 81 | - [Detect Faces in Image](#image-faces) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#detect_faces) 82 | - [Detect Text in Image](#image-text) [-IBM Docs-](http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#recognize_text) 83 | - [Audio Processing](#audio) 84 | - [Speech-to-Text](#audio-text) [-IBM Docs-](http://www.ibm.com/watson/developercloud/speech-to-text.html) 85 | 86 | ### Text Processing 87 | 88 | #### Alchemy Language 89 | 90 | ##### Sentiment Analysis [-top-](#toc) 91 | 92 | ``` r 93 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 94 | result <- text_sentiment(text, YOUR_API_KEY) 95 | str(result) 96 | ## List of 2 97 | ## $ :List of 5 98 | ## ..$ status : chr "OK" 99 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 100 | ## ..$ totalTransactions: chr "1" 101 | ## ..$ language : chr "english" 102 | ## ..$ docSentiment :List of 2 103 | ## .. ..$ score: chr "0.736974" 104 | ## .. ..$ type : chr "positive" 105 | ## $ :List of 5 106 | ## ..$ status : chr "OK" 107 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 108 | ## ..$ totalTransactions: chr "1" 109 | ## ..$ language : chr "english" 110 | ## ..$ docSentiment :List of 2 111 | ## .. ..$ score: chr "0.405182" 112 | ## .. ..$ type : chr "positive" 113 | ``` 114 | 115 | ##### Keyword Extraction [-top-](#toc) 116 | 117 | ``` r 118 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 119 | result <- text_keywords(text, YOUR_API_KEY) 120 | str(result) 121 | ## List of 2 122 | ## $ :List of 5 123 | ## ..$ status : chr "OK" 124 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 125 | ## ..$ totalTransactions: chr "1" 126 | ## ..$ language : chr "english" 127 | ## ..$ keywords :'data.frame': 2 obs. of 2 variables: 128 | ## .. ..$ relevance: chr [1:2] "0.903313" "0.878148" 129 | ## .. ..$ text : chr [1:2] "Columbus" "Ohio" 130 | ## $ :List of 5 131 | ## ..$ status : chr "OK" 132 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 133 | ## ..$ totalTransactions: chr "1" 134 | ## ..$ language : chr "english" 135 | ## ..$ keywords :'data.frame': 2 obs. of 2 variables: 136 | ## .. ..$ relevance: chr [1:2] "0.987472" "0.877147" 137 | ## .. ..$ text : chr [1:2] "Brussels" "UseR2017" 138 | ``` 139 | 140 | ##### Emotion Analysis [-top-](#toc) 141 | 142 | ``` r 143 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 144 | result <- text_emotion(text, YOUR_API_KEY) 145 | str(result) 146 | ## List of 2 147 | ## $ :List of 5 148 | ## ..$ status : chr "OK" 149 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 150 | ## ..$ totalTransactions: chr "1" 151 | ## ..$ language : chr "english" 152 | ## ..$ docEmotions :List of 5 153 | ## .. ..$ anger : chr "0.070822" 154 | ## .. ..$ disgust: chr "0.051115" 155 | ## .. ..$ fear : chr "0.327703" 156 | ## .. ..$ joy : chr "0.69756" 157 | ## .. ..$ sadness: chr "0.150018" 158 | ## $ :List of 5 159 | ## ..$ status : chr "OK" 160 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 161 | ## ..$ totalTransactions: chr "1" 162 | ## ..$ language : chr "english" 163 | ## ..$ docEmotions :List of 5 164 | ## .. ..$ anger : chr "0.059402" 165 | ## .. ..$ disgust: chr "0.077588" 166 | ## .. ..$ fear : chr "0.123658" 167 | ## .. ..$ joy : chr "0.760328" 168 | ## .. ..$ sadness: chr "0.35755" 169 | ``` 170 | 171 | ##### Language Detection [-top-](#toc) 172 | 173 | ``` r 174 | text <- c("Columbus, Ohio is Awesome!", "Mirando hacia adelante a UseR2017 en Bruselas!") 175 | result <- text_language(text, YOUR_API_KEY) 176 | str(result) 177 | ## List of 2 178 | ## $ :List of 10 179 | ## ..$ status : chr "OK" 180 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 181 | ## ..$ url : chr "" 182 | ## ..$ language : chr "english" 183 | ## ..$ iso-639-1 : chr "en" 184 | ## ..$ iso-639-2 : chr "eng" 185 | ## ..$ iso-639-3 : chr "eng" 186 | ## ..$ ethnologue : chr "http://www.ethnologue.com/show_language.asp?code=eng" 187 | ## ..$ native-speakers: chr "309-400 million" 188 | ## ..$ wikipedia : chr "http://en.wikipedia.org/wiki/English_language" 189 | ## $ :List of 10 190 | ## ..$ status : chr "OK" 191 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 192 | ## ..$ url : chr "" 193 | ## ..$ language : chr "spanish" 194 | ## ..$ iso-639-1 : chr "es" 195 | ## ..$ iso-639-2 : chr "spa" 196 | ## ..$ iso-639-3 : chr "spa" 197 | ## ..$ ethnologue : chr "http://www.ethnologue.com/show_language.asp?code=spa" 198 | ## ..$ native-speakers: chr "350 million" 199 | ## ..$ wikipedia : chr "http://en.wikipedia.org/wiki/Spanish_language" 200 | ``` 201 | 202 | ##### Entity Extraction [-top-](#toc) 203 | 204 | ``` r 205 | text <- c("Columbus, Ohio is Awesome!", "Looking forward to UseR2017 in Brussels!") 206 | result <- text_entity(text, YOUR_API_KEY) 207 | str(result) 208 | ## List of 2 209 | ## $ :List of 6 210 | ## ..$ status : chr "OK" 211 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 212 | ## ..$ url : chr "" 213 | ## ..$ language: chr "english" 214 | ## ..$ entities:'data.frame': 2 obs. of 3 variables: 215 | ## .. ..$ count: chr [1:2] "1" "1" 216 | ## .. ..$ text : chr [1:2] "Columbus" "Ohio" 217 | ## .. ..$ type : chr [1:2] "GeopoliticalEntity" "GeopoliticalEntity" 218 | ## ..$ model : chr "ie-en-news" 219 | ## $ :List of 6 220 | ## ..$ status : chr "OK" 221 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 222 | ## ..$ url : chr "" 223 | ## ..$ language: chr "english" 224 | ## ..$ entities:'data.frame': 1 obs. of 3 variables: 225 | ## .. ..$ count: chr "1" 226 | ## .. ..$ text : chr "Brussels" 227 | ## .. ..$ type : chr "GeopoliticalEntity" 228 | ## ..$ model : chr "ie-en-news" 229 | ``` 230 | 231 | ##### Concept Tagging [-top-](#toc) 232 | 233 | ``` r 234 | text <- "Columbus, Ohio is Awesome!" 235 | result <- text_concept(text, YOUR_API_KEY) 236 | str(result) 237 | ## List of 1 238 | ## $ :List of 4 239 | ## ..$ status : chr "OK" 240 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 241 | ## ..$ language: chr "english" 242 | ## ..$ concepts:'data.frame': 1 obs. of 8 variables: 243 | ## .. ..$ text : chr "Columbus, Ohio" 244 | ## .. ..$ relevance: chr "0.911407" 245 | ## .. ..$ website : chr "http://www.columbus.gov/" 246 | ## .. ..$ dbpedia : chr "http://dbpedia.org/resource/Columbus,_Ohio" 247 | ## .. ..$ freebase : chr "http://rdf.freebase.com/ns/m.01smm" 248 | ## .. ..$ census : chr "http://www.rdfabout.com/rdf/usgov/geo/us/oh/counties/franklin_county/columbus" 249 | ## .. ..$ yago : chr "http://yago-knowledge.org/resource/Columbus,_Ohio" 250 | ## .. ..$ geonames : chr "http://sws.geonames.org/4509177/" 251 | ``` 252 | 253 | ##### Relation Extraction [-top-](#toc) 254 | 255 | ``` r 256 | text <- "Columbus, Ohio is Awesome!" 257 | result <- text_relations(text, YOUR_API_KEY) 258 | str(result) 259 | ## List of 1 260 | ## $ :List of 4 261 | ## ..$ status : chr "OK" 262 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 263 | ## ..$ language: chr "english" 264 | ## ..$ concepts:'data.frame': 1 obs. of 8 variables: 265 | ## .. ..$ text : chr "Columbus, Ohio" 266 | ## .. ..$ relevance: chr "0.911407" 267 | ## .. ..$ website : chr "http://www.columbus.gov/" 268 | ## .. ..$ dbpedia : chr "http://dbpedia.org/resource/Columbus,_Ohio" 269 | ## .. ..$ freebase : chr "http://rdf.freebase.com/ns/m.01smm" 270 | ## .. ..$ census : chr "http://www.rdfabout.com/rdf/usgov/geo/us/oh/counties/franklin_county/columbus" 271 | ## .. ..$ yago : chr "http://yago-knowledge.org/resource/Columbus,_Ohio" 272 | ## .. ..$ geonames : chr "http://sws.geonames.org/4509177/" 273 | ``` 274 | 275 | ##### Taxonomy Classification [-top-](#toc) 276 | 277 | ``` r 278 | text <- "Columbus, Ohio is Awesome!" 279 | result <- text_taxonomy(text, YOUR_API_KEY) 280 | str(result) 281 | ## List of 1 282 | ## $ :List of 5 283 | ## ..$ status : chr "OK" 284 | ## ..$ usage : chr "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of U"| __truncated__ 285 | ## ..$ totalTransactions: chr "1" 286 | ## ..$ language : chr "english" 287 | ## ..$ taxonomy :'data.frame': 3 obs. of 3 variables: 288 | ## .. ..$ confident: chr [1:3] "no" "no" "no" 289 | ## .. ..$ label : chr [1:3] "/sports/bowling" "/law, govt and politics/law enforcement/highway patrol" "/technology and computing/consumer electronics/tv and video equipment/televisions/lcd tvs" 290 | ## .. ..$ score : chr [1:3] "0.19062" "0.157219" "0.154218" 291 | ``` 292 | 293 | #### Language Translate [-top-](#toc) 294 | 295 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. LANG\_TRANSLATE\_USERNAME\_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 296 | 297 | ``` r 298 | text <- c("Mirando hacia adelante a UseR2017 en Bruselas!") 299 | result <- text_translate(text, LANG_TRANSLATE_USERNAME_PASSWORD) 300 | str(result) 301 | ## List of 1 302 | ## $ :List of 3 303 | ## ..$ translations :'data.frame': 1 obs. of 1 variable: 304 | ## .. ..$ translation: chr "Looking forward to UseR2017 in Brussels." 305 | ## ..$ word_count : int 7 306 | ## ..$ character_count: int 46 307 | ``` 308 | 309 | #### Personality Insights [-top-](#toc) 310 | 311 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. PERSONALITY\_USERNAME\_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 312 | 313 | ``` r 314 | text <- paste(replicate(1000, rmsfact::rmsfact()), collapse = ' ') #Ten Richard Stallman Facts used for Personality Insights. 315 | result <- text_personality(text, PERSONALITY_USERNAME_PASSWORD) 316 | str(result) 317 | ## List of 1 318 | ## $ :List of 6 319 | ## ..$ id : chr "*UNKNOWN*" 320 | ## ..$ source : chr "*UNKNOWN*" 321 | ## ..$ word_count : int 13600 322 | ## ..$ processed_lang: chr "en" 323 | ## ..$ tree :List of 3 324 | ## .. ..$ id : chr "r" 325 | ## .. ..$ name : chr "root" 326 | ## .. ..$ children:'data.frame': 3 obs. of 3 variables: 327 | ## .. .. ..$ id : chr [1:3] "personality" "needs" "values" 328 | ## .. .. ..$ name : chr [1:3] "Big 5" "Needs" "Values" 329 | ## .. .. ..$ children:List of 3 330 | ## .. .. .. ..$ :'data.frame': 1 obs. of 5 variables: 331 | ## .. .. .. .. ..$ id : chr "Agreeableness_parent" 332 | ## .. .. .. .. ..$ name : chr "Agreeableness" 333 | ## .. .. .. .. ..$ category : chr "personality" 334 | ## .. .. .. .. ..$ percentage: num 0.114 335 | ## .. .. .. .. ..$ children :List of 1 336 | ## .. .. .. .. .. ..$ :'data.frame': 5 obs. of 6 variables: 337 | ## .. .. .. .. .. .. ..$ id : chr [1:5] "Openness" "Conscientiousness" "Extraversion" "Agreeableness" ... 338 | ## .. .. .. .. .. .. ..$ name : chr [1:5] "Openness" "Conscientiousness" "Extraversion" "Agreeableness" ... 339 | ## .. .. .. .. .. .. ..$ category : chr [1:5] "personality" "personality" "personality" "personality" ... 340 | ## .. .. .. .. .. .. ..$ percentage : num [1:5] 0.694 0.428 0.703 0.114 0.29 341 | ## .. .. .. .. .. .. ..$ sampling_error: num [1:5] 0.0466 0.0594 0.0453 0.0812 0.0754 342 | ## .. .. .. .. .. .. ..$ children :List of 5 343 | ## .. .. .. .. .. .. .. ..$ :'data.frame': 6 obs. of 5 variables: 344 | ## .. .. .. .. .. .. .. .. ..$ id : chr [1:6] "Adventurousness" "Artistic interests" "Emotionality" "Imagination" ... 345 | ## .. .. .. .. .. .. .. .. ..$ name : chr [1:6] "Adventurousness" "Artistic interests" "Emotionality" "Imagination" ... 346 | ## .. .. .. .. .. .. .. .. ..$ category : chr [1:6] "personality" "personality" "personality" "personality" ... 347 | ## .. .. .. .. .. .. .. .. ..$ percentage : num [1:6] 0.635 0.554 0.389 0.864 0.639 ... 348 | ## .. .. .. .. .. .. .. .. ..$ sampling_error: num [1:6] 0.0417 0.0864 0.039 0.0535 0.0451 ... 349 | ## .. .. .. .. .. .. .. ..$ :'data.frame': 6 obs. of 5 variables: 350 | ## .. .. .. .. .. .. .. .. ..$ id : chr [1:6] "Achievement striving" "Cautiousness" "Dutifulness" "Orderliness" ... 351 | ## .. .. .. .. .. .. .. .. ..$ name : chr [1:6] "Achievement striving" "Cautiousness" "Dutifulness" "Orderliness" ... 352 | ... 353 | ``` 354 | 355 | #### Tone Analyzer [-top-](#toc) 356 | 357 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. TONE\_USERNAME\_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 358 | 359 | ``` r 360 | text <- c("Columbus, Ohio is Awesome!") 361 | result <- text_tone(text, TONE_USERNAME_PASSWORD) 362 | str(result) 363 | ## List of 1 364 | ## $ :List of 1 365 | ## ..$ document_tone:List of 1 366 | ## .. ..$ tone_categories:'data.frame': 3 obs. of 3 variables: 367 | ## .. .. ..$ tones :List of 3 368 | ## .. .. .. ..$ :'data.frame': 5 obs. of 3 variables: 369 | ## .. .. .. .. ..$ score : num [1:5] 0.0708 0.0511 0.3277 0.6976 0.15 370 | ## .. .. .. .. ..$ tone_id : chr [1:5] "anger" "disgust" "fear" "joy" ... 371 | ## .. .. .. .. ..$ tone_name: chr [1:5] "Anger" "Disgust" "Fear" "Joy" ... 372 | ## .. .. .. ..$ :'data.frame': 3 obs. of 3 variables: 373 | ## .. .. .. .. ..$ score : num [1:3] 0 0 0 374 | ## .. .. .. .. ..$ tone_id : chr [1:3] "analytical" "confident" "tentative" 375 | ## .. .. .. .. ..$ tone_name: chr [1:3] "Analytical" "Confident" "Tentative" 376 | ## .. .. .. ..$ :'data.frame': 5 obs. of 3 variables: 377 | ## .. .. .. .. ..$ score : num [1:5] 0.24 0.571 0.694 0.308 0.401 378 | ## .. .. .. .. ..$ tone_id : chr [1:5] "openness_big5" "conscientiousness_big5" "extraversion_big5" "agreeableness_big5" ... 379 | ## .. .. .. .. ..$ tone_name: chr [1:5] "Openness" "Conscientiousness" "Extraversion" "Agreeableness" ... 380 | ## .. .. ..$ category_id : chr [1:3] "emotion_tone" "language_tone" "social_tone" 381 | ## .. .. ..$ category_name: chr [1:3] "Emotion Tone" "Language Tone" "Social Tone" 382 | ``` 383 | 384 | #### Text-to-Speech [-top-](#toc) 385 | 386 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. TEXT\_TO\_SPEECH\_USERNAME\_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 387 | 388 | ``` r 389 | text <- c("Columbus, Ohio is Awesome!") 390 | text_audio(text, TEXT_TO_SPEECH_USERNAME_PASSWORD, directory = '.') 391 | ``` 392 | 393 | The .ogg audio file is written to the current directory. 394 | 395 | You can listen to the example audio file in the repository: [1.ogg](1.ogg) 396 | 397 | ### Image Visual Recognition 398 | 399 | ##### Classification of Image [-top-](#toc) 400 | 401 | The following Web Services use IBM Bluemix Watson Services IMAGE\_API\_KEY specific to Image processsing. 402 | 403 | ``` r 404 | image_face_path <- system.file("extdata/images_faces", package = "cognizer") 405 | images <- list.files(image_face_path, full.names = TRUE) 406 | image_classes <- image_classify(images, IMAGE_API_KEY) 407 | str(image_classes) 408 | ## List of 2 409 | ## $ :List of 3 410 | ## ..$ custom_classes : int 0 411 | ## ..$ images :'data.frame': 1 obs. of 2 variables: 412 | ## .. ..$ classifiers:List of 1 413 | ## .. .. ..$ :'data.frame': 1 obs. of 3 variables: 414 | ## .. .. .. ..$ classes :List of 1 415 | ## .. .. .. .. ..$ :'data.frame': 1 obs. of 3 variables: 416 | ## .. .. .. .. .. ..$ class : chr "person" 417 | ## .. .. .. .. .. ..$ score : num 1 418 | ## .. .. .. .. .. ..$ type_hierarchy: chr "/people" 419 | ## .. .. .. ..$ classifier_id: chr "default" 420 | ## .. .. .. ..$ name : chr "default" 421 | ## .. ..$ image : chr "Einstein_laughing.jpg" 422 | ## ..$ images_processed: int 1 423 | ## $ :List of 3 424 | ## ..$ custom_classes : int 0 425 | ## ..$ images :'data.frame': 1 obs. of 2 variables: 426 | ## .. ..$ classifiers:List of 1 427 | ## .. .. ..$ :'data.frame': 1 obs. of 3 variables: 428 | ## .. .. .. ..$ classes :List of 1 429 | ## .. .. .. .. ..$ :'data.frame': 1 obs. of 3 variables: 430 | ## .. .. .. .. .. ..$ class : chr "person" 431 | ## .. .. .. .. .. ..$ score : num 1 432 | ## .. .. .. .. .. ..$ type_hierarchy: chr "/people" 433 | ## .. .. .. ..$ classifier_id: chr "default" 434 | ## .. .. .. ..$ name : chr "default" 435 | ## .. ..$ image : chr "wkd_birthofinternet_1220-10.jpg" 436 | ## ..$ images_processed: int 1 437 | ``` 438 | 439 | ##### Detect Faces in Image [-top-](#toc) 440 | 441 | The following Web Services use IBM Bluemix Watson Services IMAGE\_API\_KEY specific to Image processsing. 442 | 443 | ``` r 444 | image_face_path <- system.file("extdata/images_faces", package = "cognizer") 445 | images <- list.files(image_face_path, full.names = TRUE) 446 | image_faces <- image_detectface(images, IMAGE_API_KEY) 447 | str(image_faces) 448 | ## List of 2 449 | ## $ :List of 2 450 | ## ..$ images :'data.frame': 1 obs. of 2 variables: 451 | ## .. ..$ faces:List of 1 452 | ## .. .. ..$ :'data.frame': 1 obs. of 4 variables: 453 | ## .. .. .. ..$ age :'data.frame': 1 obs. of 2 variables: 454 | ## .. .. .. .. ..$ min : int 65 455 | ## .. .. .. .. ..$ score: num 0.671 456 | ## .. .. .. ..$ face_location:'data.frame': 1 obs. of 4 variables: 457 | ## .. .. .. .. ..$ height: int 250 458 | ## .. .. .. .. ..$ left : int 214 459 | ## .. .. .. .. ..$ top : int 105 460 | ## .. .. .. .. ..$ width : int 231 461 | ## .. .. .. ..$ gender :'data.frame': 1 obs. of 2 variables: 462 | ## .. .. .. .. ..$ gender: chr "MALE" 463 | ## .. .. .. .. ..$ score : num 1 464 | ## .. .. .. ..$ identity :'data.frame': 1 obs. of 3 variables: 465 | ## .. .. .. .. ..$ name : chr "Alfred Einstein" 466 | ## .. .. .. .. ..$ score : num 0.953 467 | ## .. .. .. .. ..$ type_hierarchy: chr "/people/alfred einstein" 468 | ## .. ..$ image: chr "Einstein_laughing.jpg" 469 | ## ..$ images_processed: int 1 470 | ## $ :List of 2 471 | ## ..$ images :'data.frame': 1 obs. of 2 variables: 472 | ## .. ..$ faces:List of 1 473 | ## .. .. ..$ :'data.frame': 1 obs. of 3 variables: 474 | ## .. .. .. ..$ age :'data.frame': 1 obs. of 3 variables: 475 | ## .. .. .. .. ..$ max : int 44 476 | ## .. .. .. .. ..$ min : int 35 477 | ## .. .. .. .. ..$ score: num 0.235 478 | ## .. .. .. ..$ face_location:'data.frame': 1 obs. of 4 variables: 479 | ## .. .. .. .. ..$ height: int 320 480 | ## .. .. .. .. ..$ left : int 26 481 | ## .. .. .. .. ..$ top : int 120 482 | ## .. .. .. .. ..$ width : int 289 483 | ## .. .. .. ..$ gender :'data.frame': 1 obs. of 2 variables: 484 | ## .. .. .. .. ..$ gender: chr "MALE" 485 | ## .. .. .. .. ..$ score : num 0.971 486 | ## .. ..$ image: chr "wkd_birthofinternet_1220-10.jpg" 487 | ## ..$ images_processed: int 1 488 | ``` 489 | 490 | ##### Detect Text in Image [-top-](#toc) 491 | 492 | The following Web Services use IBM Bluemix Watson Services IMAGE\_API\_KEY specific to Image processsing. 493 | 494 | ``` r 495 | image_text_path <- system.file("extdata/images_text", package = "cognizer") 496 | images <- list.files(image_text_path, full.names = TRUE) 497 | image_text<- image_detecttext(images, IMAGE_API_KEY) 498 | str(image_text) 499 | ## List of 1 500 | ## $ :List of 2 501 | ## ..$ images :'data.frame': 1 obs. of 2 variables: 502 | ## .. ..$ error:'data.frame': 1 obs. of 2 variables: 503 | ## .. .. ..$ description: chr "An undefined server error occurred." 504 | ## .. .. ..$ error_id : chr "server_error" 505 | ## .. ..$ image: chr "Did_that_billboard_just_change.jpg" 506 | ## ..$ images_processed: int 0 507 | ``` 508 | 509 | ### Audio Processing 510 | 511 | ##### Speech to Text [-top-](#toc) 512 | 513 | The following Web Services use IBM Bluemix Watson Services Username and Passwords as available on [Bluemix Credentials](https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml) in a colon deliminated string. SPEECH\_TO\_TEXT\_USERNAME\_PASSWORD is a username:password string as defined for each Bluemix Watson Services. 514 | 515 | ``` r 516 | audio_path <- system.file("extdata/audio", package = "cognizer") 517 | audios <- list.files(audio_path, full.names = TRUE) 518 | audio_transcript <- audio_text(audios, SPEECH_TO_TEXT_USERNAME_PASSWORD) 519 | str(audio_transcript) 520 | ## List of 1 521 | ## $ :List of 2 522 | ## ..$ results :'data.frame': 1 obs. of 2 variables: 523 | ## .. ..$ alternatives:List of 1 524 | ## .. .. ..$ :'data.frame': 1 obs. of 2 variables: 525 | ## .. .. .. ..$ confidence: num 0.954 526 | ## .. .. .. ..$ transcript: chr "hello world " 527 | ## .. ..$ final : logi TRUE 528 | ## ..$ result_index: int 0 529 | ``` 530 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | # Adapt as necessary starting from here 14 | environment: 15 | USE_RTOOLS: true 16 | R_ARCH: x64 17 | R_VERSION: stable 18 | 19 | build_script: 20 | - travis-tool.sh install_deps 21 | 22 | test_script: 23 | - travis-tool.sh run_tests 24 | 25 | on_failure: 26 | - travis-tool.sh dump_logs 27 | 28 | artifacts: 29 | - path: '*.Rcheck\**\*.log' 30 | name: Logs 31 | 32 | - path: '*.Rcheck\**\*.out' 33 | name: Logs 34 | 35 | - path: '*.Rcheck\**\*.fail' 36 | name: Logs 37 | 38 | - path: '*.Rcheck\**\*.Rout' 39 | name: Logs 40 | 41 | - path: '\*_*.tar.gz' 42 | name: Bits 43 | 44 | - path: '\*_*.zip' 45 | name: Bits 46 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | ci: 3 | - !appveyor 4 | -------------------------------------------------------------------------------- /inst/extdata/audio/HelloWorld-16bit.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COVAIL/cognizer/e99244ddfdc89b8d416dc1d09a2fb290cc2c640d/inst/extdata/audio/HelloWorld-16bit.wav -------------------------------------------------------------------------------- /inst/extdata/images_faces/Einstein_laughing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COVAIL/cognizer/e99244ddfdc89b8d416dc1d09a2fb290cc2c640d/inst/extdata/images_faces/Einstein_laughing.jpg -------------------------------------------------------------------------------- /inst/extdata/images_faces/wkd_birthofinternet_1220-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COVAIL/cognizer/e99244ddfdc89b8d416dc1d09a2fb290cc2c640d/inst/extdata/images_faces/wkd_birthofinternet_1220-10.jpg -------------------------------------------------------------------------------- /inst/extdata/images_text/Did_that_billboard_just_change.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/COVAIL/cognizer/e99244ddfdc89b8d416dc1d09a2fb290cc2c640d/inst/extdata/images_text/Did_that_billboard_just_change.jpg -------------------------------------------------------------------------------- /inst/extdata/text/text_examples.txt: -------------------------------------------------------------------------------- 1 | The first principle is that you must not fool yourself and you are the easiest person to fool. 2 | For a successful technology, reality must take precedence over public relations, for Nature cannot be fooled. 3 | What I cannot create, I do not understand. 4 | -------------------------------------------------------------------------------- /man/audio_text.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/audio_cognizers.R 3 | \name{audio_text} 4 | \alias{audio_text} 5 | \title{IBM Watson Audio Transcriber} 6 | \usage{ 7 | audio_text(audios, userpwd, keep_data = "true", callback = NULL, 8 | model = "en-US_BroadbandModel", continuous = FALSE, 9 | inactivity_timeout = 30, keywords = list(), keywords_threshold = NA, 10 | max_alternatives = 1, word_alternatives_threshold = NA, 11 | word_confidence = FALSE, timestamps = FALSE, profanity_filter = TRUE, 12 | smart_formatting = FALSE, content_type = "audio/wav") 13 | } 14 | \arguments{ 15 | \item{audios}{Character vector (list) of paths to images or to .zip files containing 16 | upto 100 images.} 17 | 18 | \item{userpwd}{Character scalar containing username:password for the service.} 19 | 20 | \item{keep_data}{Character scalar specifying whether to share your data with 21 | Watson services for the purpose of training their models.} 22 | 23 | \item{callback}{Function that can be applied to responses to examine http status, 24 | headers, and content, to debug or to write a custom parser for content. 25 | The default callback parses content into a data.frame while dropping other 26 | response values to make the output easily passable to tidyverse packages like 27 | dplyr or ggplot2. For further details or debugging one can pass a print or a 28 | more compicated function.} 29 | 30 | \item{model}{Character scalar specifying language and bandwidth model. Alternatives 31 | are ar-AR_BroadbandModel, en-UK_BroadbandModel, en-UK_NarrowbandModel, 32 | en-US_NarrowbandModel, es-ES_BroadbandModel, es-ES_NarrowbandModel, 33 | fr-FR_BroadbandModel, ja-JP_BroadbandModel, ja-JP_NarrowbandModel, 34 | pt-BR_BroadbandModel, pt-BR_NarrowbandModel, zh-CN_BroadbandModel, 35 | zh-CN_NarrowbandModel.} 36 | 37 | \item{continuous}{Logical scalar specifying whether to return after a first 38 | end-of-speech incident (long pause) or to wait to combine results.} 39 | 40 | \item{inactivity_timeout}{Integer scalar giving the number of seconds after which 41 | the result is returned if no speech is detected.} 42 | 43 | \item{keywords}{List of keywords to be detected in the speech stream.} 44 | 45 | \item{keywords_threshold}{Double scalar from 0 to 1 specifying the lower bound on 46 | confidence to accept detected keywords in speech.} 47 | 48 | \item{max_alternatives}{Integer scalar giving the maximum number of alternative 49 | transcripts to return.} 50 | 51 | \item{word_alternatives_threshold}{Double scalar from 0 to 1 giving lower bound 52 | on confidence of possible words.} 53 | 54 | \item{word_confidence}{Logical scalar indicating whether to return confidence for 55 | each word.} 56 | 57 | \item{timestamps}{Logical scalar indicating whether to return time alignment for 58 | each word.} 59 | 60 | \item{profanity_filter}{Logical scalar indicating whether to censor profane words.} 61 | 62 | \item{smart_formatting}{Logical scalar indicating whether dates, times, numbers, etc. 63 | are to be formatted nicely in the transcript.} 64 | 65 | \item{content_type}{Character scalar showing format of the audio file. Alternatives 66 | are audio/flac, audio/l16;rate=n;channels=k (16 channel limit), 67 | audio/wav (9 channel limit), audio/ogg;codecs=opus, 68 | audio/basic (narrowband models only).} 69 | } 70 | \value{ 71 | List of parsed responses. 72 | } 73 | \description{ 74 | Convert your audio to transcripts with optional keyword 75 | detection and profanity cleaning. 76 | } 77 | 78 | -------------------------------------------------------------------------------- /man/cognizer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cognizer.R 3 | \docType{package} 4 | \name{cognizer} 5 | \alias{cognizer} 6 | \alias{cognizer-package} 7 | \title{R Wrapper for IBM Watson Services} 8 | \description{ 9 | cognizeR provides interface to IBM Watson services that can 10 | process text, image and audio data. 11 | } 12 | 13 | -------------------------------------------------------------------------------- /man/image_classify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/image_cognizers.R 3 | \name{image_classify} 4 | \alias{image_classify} 5 | \alias{image_detectface} 6 | \alias{image_detecttext} 7 | \title{IBM Watson Image Classifier} 8 | \usage{ 9 | image_classify(images, api_key, keep_data = "true", callback = NULL, 10 | type = "image/jpeg", version = "2016-05-20", accept_language = "en", 11 | batch_size = 1) 12 | 13 | image_detectface(images, api_key, keep_data = "true", callback = NULL, 14 | type = "image/jpeg", version = "2016-05-20", batch_size = 1) 15 | 16 | image_detecttext(images, api_key, keep_data = "true", callback = NULL, 17 | type = "image/jpeg", version = "2016-05-20", batch_size = 1) 18 | } 19 | \arguments{ 20 | \item{images}{Character vector (list) of paths to images or to .zip files containing 21 | upto 100 images.} 22 | 23 | \item{api_key}{Character scalar containing api key obtained from Watson services.} 24 | 25 | \item{keep_data}{Character scalar specifying whether to share your data with 26 | Watson services for the purpose of training their models.} 27 | 28 | \item{callback}{Function that can be applied to responses to examine http status, 29 | headers, and content, to debug or to write a custom parser for content. 30 | The default callback parses content into a data.frame while dropping other 31 | response values to make the output easily passable to tidyverse packages like 32 | dplyr or ggplot2. For further details or debugging one can pass a fail or a 33 | more compicated function.} 34 | 35 | \item{type}{Character scalar specifying image format. Alternative is "image/png".} 36 | 37 | \item{version}{Character scalar giving version of api to use.} 38 | 39 | \item{accept_language}{Character scalar specifying the output language.} 40 | 41 | \item{batch_size}{Integer scalar giving the number of images in a given path. This 42 | is used when images are zipped together. Check IBM docs for maximum number in a 43 | single zip file.} 44 | } 45 | \value{ 46 | List of parsed responses. 47 | 48 | List of parsed responses. 49 | 50 | List of parsed responses. 51 | } 52 | \description{ 53 | \bold{image_classify}: Uses default classifier to determine the object 54 | catagory in the image. 55 | 56 | \bold{image_detectface}: Uses default algorithm to detect 57 | a face in the image and provide its coordinates. 58 | 59 | \bold{image_detecttext}: Uses default algorithm to detect 60 | text in the image. 61 | } 62 | 63 | -------------------------------------------------------------------------------- /man/text_audio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_cognizers.R 3 | \name{text_audio} 4 | \alias{text_audio} 5 | \title{IBM Text-to-speech API.} 6 | \usage{ 7 | text_audio(text, userpwd, keep_data = "true", directory, 8 | voice = "en-US_AllisonVoice", accept = "audio/ogg;codecs=opus") 9 | } 10 | \arguments{ 11 | \item{text}{Character vector containing strings to be processed.} 12 | 13 | \item{userpwd}{Character scalar that contains 'username:password' string.} 14 | 15 | \item{keep_data}{Character scalar specifying whether to share your data with 16 | Watson services for the purpose of training their models.} 17 | 18 | \item{directory}{Character scalar specifying directory for storing audio files.} 19 | 20 | \item{voice}{Character scalar setting language and voice model for the synthesized 21 | voice. Many models are available: de-DE_BirgitVoice, de-DE_DieterVoice, 22 | en-GB_KateVoice, en-US_LisaVoice, en-US_MichaelVoice, es-ES_EnriqueVoice, 23 | es-ES_LauraVoice, es-US_SofiaVoice, fr-FR_ReneeVoice, it-IT_FrancescaVoice, 24 | ja-JP_EmiVoice, pt-BR_IsabelaVoice.} 25 | 26 | \item{accept}{Characte scalar specifying format for the audio. Alternatives are 27 | audio/wav ,audio/flac, audio/l16, audio/basic.} 28 | } 29 | \value{ 30 | Audio file with selected format is saved into selected directory. The name 31 | is based on integer representation of UTF time and a number of characters of the 32 | processed text. 33 | 34 | Logical scalar is returned invisibly. 35 | } 36 | \description{ 37 | Synthesizes an audio record from text. 38 | } 39 | \seealso{ 40 | Check \url{http://www.ibm.com/watson/developercloud/text-to-speech.html} 41 | for further documentation, and \url{https://text-to-speech-demo.mybluemix.net/} 42 | for a web demo. 43 | } 44 | 45 | -------------------------------------------------------------------------------- /man/text_personality.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_cognizers.R 3 | \name{text_personality} 4 | \alias{text_personality} 5 | \title{IBM personality analysis of text} 6 | \usage{ 7 | text_personality(text, userpwd, keep_data = "true", callback = NULL, 8 | model_id = "es-en-conversational", raw_scores = "false", 9 | consumption_preferences = "false", csv_headers = "false", 10 | version = "2020-01-01", content_type = "text/plain; charset=utf-8", 11 | content_language = "en", accept = "application/json", 12 | accept_language = "en") 13 | } 14 | \arguments{ 15 | \item{text}{Character vector.} 16 | 17 | \item{userpwd}{Character scalar that contains 'username:password' string.} 18 | 19 | \item{keep_data}{Character scalar specifying whether to share your data with 20 | Watson services for the purpose of training their models.} 21 | 22 | \item{callback}{Function that can be applied to responses to examine http status, 23 | headers, and content, to debug or to write a custom parser for content. 24 | The default callback parses content into a data.frame while dropping other 25 | response values to make the output easily passable to tidyverse packages like 26 | dplyr or ggplot2. For further details or debugging one can pass a fail or a 27 | more compicated function.} 28 | 29 | \item{model_id}{Character scalar formated as 'source-target-domain'. 30 | Source language (Arabic, Brazilian Portuguese, English, French, Italian, 31 | or Spanish), target language (Arabic, Brazilian Portuguese, English, French, 32 | Italian, or Spanish) and domain of text (conversational, news, patent). 33 | Check IBM documentation for other language mappings.} 34 | 35 | \item{raw_scores}{Character scalar showing whether to include only normalized 36 | statistics or also raw statistics.} 37 | 38 | \item{consumption_preferences}{Character scalar showing whether to 39 | return consumption preferences} 40 | 41 | \item{csv_headers}{Character scalar showing whether to return column labels when 42 | Accept-Content is set to 'text/csv'.} 43 | 44 | \item{version}{Character scalar giving date that specifies the algorithm that went 45 | operational on or before the date. Future dates select the most recent algorithm.} 46 | 47 | \item{content_type}{Character scalar setting input data type header. Alternatives 48 | are 'application/json; charset=utf-8' and 'text/html; charset=ISO-8859-1'.} 49 | 50 | \item{content_language}{Character scalar setting input language. Alternatives are 51 | 'ar' (Arabic), 'es' (Spanish), 'ja' (Japanese).} 52 | 53 | \item{accept}{Character scalar that specifies response format. Alternative is 54 | text/plain.} 55 | 56 | \item{accept_language}{Character scalar setting output langauge. Alternatives are 57 | 'ar' (Arabic), 'de' (German), 'es' (Spanish), 'fr' (French), 'it' (Italian), 58 | 'ja' (Japanese), 'ko' (Korean), 'pt-br' (Brazilian Portuguese), 59 | 'zh-cn' (Simplified Chinese), 'zh-tw' (Traditional Chinese).} 60 | } 61 | \value{ 62 | List containing parsed content. 63 | } 64 | \description{ 65 | Analyze your text along the Big 5 dimensions of personality. 66 | } 67 | \seealso{ 68 | Check \url{http://www.ibm.com/watson/developercloud/doc/personality-insights/} 69 | for further documentation, and \url{https://personality-insights-livedemo.mybluemix.net/} 70 | for a web demo. 71 | } 72 | 73 | -------------------------------------------------------------------------------- /man/text_sentiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_cognizers.R 3 | \name{text_sentiment} 4 | \alias{text_concept} 5 | \alias{text_emotion} 6 | \alias{text_entity} 7 | \alias{text_keywords} 8 | \alias{text_language} 9 | \alias{text_relations} 10 | \alias{text_sentiment} 11 | \alias{text_taxonomy} 12 | \title{Process text with IBM Alchemy Language algorithms} 13 | \usage{ 14 | text_sentiment(text, api_key, output_mode = "json", show_source = 0, 15 | keep_data = "true", callback = NULL) 16 | 17 | text_keywords(text, api_key, output_mode = "json", show_source = 0, 18 | keep_data = "true", callback = NULL, max_retrieve = 50, 19 | knowledge_graph = 0, sentiment = 0) 20 | 21 | text_emotion(text, api_key, output_mode = "json", show_source = 0, 22 | keep_data = "true", callback = NULL) 23 | 24 | text_language(text, api_key, output_mode = "json", show_source = 0, 25 | keep_data = "true", callback = NULL) 26 | 27 | text_entity(text, api_key, output_mode = "json", show_source = 0, 28 | keep_data = "true", callback = NULL, max_retrieve = 50, 29 | knowledge_graph = 0, sentiment = 0, model = "ie-en-news", 30 | coreference = 1, disambiguate = 1, linked_data = 1, quotations = 0, 31 | structured_entity = 1) 32 | 33 | text_concept(text, api_key, output_mode = "json", show_source = 0, 34 | keep_data = "true", callback = NULL, max_retrieve = 8, 35 | knowledge_graph = 0, linked_data = 1) 36 | 37 | text_relations(text, api_key, output_mode = "json", show_source = 0, 38 | keep_data = "true", callback = NULL, model = "ie-en-news") 39 | 40 | text_taxonomy(text, api_key, output_mode = "json", show_source = 0, 41 | keep_data = "true", callback = NULL, max_retrieve = 50, 42 | knowledge_graph = 0, sentiment = 0, model = "ie-en-news", 43 | coreference = 1, disambiguate = 1, linked_data = 1, quotations = 0, 44 | structured_entity = 1) 45 | } 46 | \arguments{ 47 | \item{text}{Character vector containing strings to be processed.} 48 | 49 | \item{api_key}{Character scalar containing api key obtained from Watson services.} 50 | 51 | \item{output_mode}{Character scalar specifying returned data structure. 52 | Alternative is xml.} 53 | 54 | \item{show_source}{Intenger scalar specifying whether to send text 55 | string back or not.} 56 | 57 | \item{keep_data}{Character scalar specifying whether to share your data with 58 | Watson services for the purpose of training their models.} 59 | 60 | \item{callback}{Function that can be applied to responses to examine http status, 61 | headers, and content, to debug or to write a custom parser for content. 62 | The default callback parses content into a data.frame while dropping other 63 | response values to make the output easily passable to tidyverse packages like 64 | dplyr or ggplot2. For further details or debugging one can pass a fail or a 65 | more compicated function.} 66 | 67 | \item{max_retrieve}{Integer scalar fixing the number of keywords to extract 68 | from text.} 69 | 70 | \item{knowledge_graph}{Integer scalar indicating whether to grab a knowledge 71 | graph associated with keywords. This is an additional transaction.} 72 | 73 | \item{sentiment}{Integer scalar indicating whether to infer sentiment of 74 | keywords, expressed as category and number. This is an additional transaction.} 75 | 76 | \item{model}{Character scalar specifying one of three models which will extract 77 | entities. Alternatives are 'ie-es-news', 'ie-ar-news' or a custom model.} 78 | 79 | \item{coreference}{Integer scalar specifying whether to resolve coreferences into 80 | detected entities.} 81 | 82 | \item{disambiguate}{Integer scalar specifying whether to disambiguate 83 | detected entities.} 84 | 85 | \item{linked_data}{Integer scalar specifying whether to include links for 86 | related data.} 87 | 88 | \item{quotations}{Integer scalar specifying whether to include quotes related 89 | to detected entities.} 90 | 91 | \item{structured_entity}{Integer scalar specifying whether to extract structured 92 | entities, such as Quantity, EmailAddress, TwitterHandle, Hashtag, and IPAddress.} 93 | } 94 | \value{ 95 | Data.frame containing parsed content in a tidy fashion. 96 | } 97 | \description{ 98 | \bold{text_sentiment}: Takes a vector of text and sends to Watson 99 | services for various analyses. Requires basic authentication using api key. 100 | 101 | \bold{text_keywords}: Keywords analysis extracts keywords from text, and 102 | can optionally provide their sentiment and/or associated knowledge graph. 103 | 104 | \bold{text_emotion}: Emotion analysis of text infers 105 | scores for 7 basic emotions. 106 | 107 | \bold{text_language}: Language detection infers 108 | language of the provided text. Works best with at least 100 words. 109 | 110 | \bold{text_entity}: Entity analysis extracts names of people, 111 | products, places from the provided text. Additional arguments can provide 112 | sentiment, knowledge graphs and quotations related to inferred entities. 113 | 114 | \bold{text_concept}: Concept analysis infers categories based on 115 | the text, but that are not necessarily in the text. Additional arguments can 116 | provide sentiment and/or knowledge graphs related to inferred concepts. 117 | 118 | \bold{text_relations}: Relation analysis infers associations among 119 | entities. 120 | 121 | \bold{text_taxonomy}: Taxonomy analysis infers hierarchical relations 122 | among entities upto 5 levels deep. 123 | } 124 | \seealso{ 125 | Check \url{http://www.ibm.com/watson/developercloud/alchemy-language.html} 126 | for further documentation, and \url{https://alchemy-language-demo.mybluemix.net/?cm_mc_uid=70865809903714586773519&cm_mc_sid_50200000=1468266111} 127 | for a web demo. 128 | } 129 | 130 | -------------------------------------------------------------------------------- /man/text_tone.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_cognizers.R 3 | \name{text_tone} 4 | \alias{text_tone} 5 | \title{IBM Tone Analyzer of Text} 6 | \usage{ 7 | text_tone(text, userpwd, keep_data = "true", callback = NULL, 8 | content_type = "text/plain; charset=utf-8", version = "2016-05-19", 9 | tones = "", sentences = "true") 10 | } 11 | \arguments{ 12 | \item{text}{Character vector.} 13 | 14 | \item{userpwd}{Character scalar that contains 'username:password' string.} 15 | 16 | \item{keep_data}{Character scalar specifying whether to share your data with 17 | Watson services for the purpose of training their models.} 18 | 19 | \item{callback}{Function that can be applied to responses to examine http status, 20 | headers, and content, to debug or to write a custom parser for content. 21 | The default callback parses content into a data.frame while dropping other 22 | response values to make the output easily passable to tidyverse packages like 23 | dplyr or ggplot2. For further details or debugging one can pass a fail or a 24 | more compicated function.} 25 | 26 | \item{content_type}{Characte scalar specifying the HTTP header with type of text 27 | and its encoding.} 28 | 29 | \item{version}{Character scalar that specifies the data of most recent version of 30 | the algorithm.} 31 | 32 | \item{tones}{Character scalar that allows selecting one the three possible tones: 33 | emotion, language, social.} 34 | 35 | \item{sentences}{Character scalar specifying whether to do analysis at the 36 | sentence level.} 37 | } 38 | \value{ 39 | Data.frame containing parsed content in a tidy fashion. 40 | } 41 | \description{ 42 | Infers three types of tone - emotion, language, social - from the 43 | whole text or at sentense level. 44 | } 45 | \seealso{ 46 | Check \url{http://www.ibm.com/watson/developercloud/doc/tone-analyzer/} 47 | for further documentation, and \url{https://tone-analyzer-demo.mybluemix.net/?cm_mc_uid=70865809903714586773519&cm_mc_sid_50200000=1468424667} 48 | for a web demo. 49 | } 50 | 51 | -------------------------------------------------------------------------------- /man/text_translate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_cognizers.R 3 | \name{text_translate} 4 | \alias{text_translate} 5 | \title{IBM Language Translation API.} 6 | \usage{ 7 | text_translate(text, userpwd, keep_data = "true", callback = NULL, 8 | model_id = "es-en-conversational", accept = "application/json") 9 | } 10 | \arguments{ 11 | \item{text}{Character vector.} 12 | 13 | \item{userpwd}{Character scalar that contains 'username:password' string.} 14 | 15 | \item{keep_data}{Character scalar specifying whether to share your data with 16 | Watson services for the purpose of training their models.} 17 | 18 | \item{callback}{Function that can be applied to responses to examine http status, 19 | headers, and content, to debug or to write a custom parser for content. 20 | The default callback parses content into a data.frame while dropping other 21 | response values to make the output easily passable to tidyverse packages like 22 | dplyr or ggplot2. For further details or debugging one can pass a fail or a 23 | more compicated function.} 24 | 25 | \item{model_id}{Character scalar formated as 'source-target-domain'. 26 | Source language (Arabic, Brazilian Portuguese, English, French, Italian, 27 | or Spanish), target language (Arabic, Brazilian Portuguese, English, French, 28 | Italian, or Spanish) and domain of text (conversational, news, patent). 29 | Check IBM documentation for other language mappings.} 30 | 31 | \item{accept}{Character scalar that specifies response format. Alternative is 32 | text/plain.} 33 | } 34 | \value{ 35 | Data.frame containing parsed content in a tidy fashion. 36 | } 37 | \description{ 38 | Translates text from Arabic, French, 39 | Portuguese or Spanish to English. Requires basic authentication using 40 | username and password. 41 | } 42 | \seealso{ 43 | Check \url{http://www.ibm.com/watson/developercloud/language-translation.html} 44 | for further documentation, and \url{https://language-translator-demo.mybluemix.net/} 45 | for a web demo. 46 | } 47 | 48 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(cognizer) 3 | 4 | test_check("cognizer") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_audio.R: -------------------------------------------------------------------------------- 1 | 2 | context("Checking audio processing functions") 3 | 4 | 5 | userpwd <- Sys.getenv("SPEECH_TO_TEXT_USERNAME_PASSWORD") 6 | audio_path <- system.file("extdata/audio", package = "cognizer") 7 | audios <- list.files(audio_path, full.names = TRUE) 8 | test_that( 9 | "image classifications returns successfully", 10 | { 11 | if (identical(userpwd, "")) skip("no authentication provided") 12 | test <- audio_text(audios, userpwd) 13 | expect_is(test, "list") 14 | } 15 | ) 16 | 17 | test_that( 18 | "image classification handles errors properly", 19 | { 20 | if (identical(userpwd, "")) skip("no authentication provided") 21 | test <- audio_text(audios, substr(userpwd, 1, 10)) 22 | expect_is(test, "list") 23 | } 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /tests/testthat/test_image.R: -------------------------------------------------------------------------------- 1 | 2 | context("Checking image processing functions") 3 | 4 | 5 | api_key <- Sys.getenv("IMAGE_API_KEY") 6 | image_face_path <- system.file("extdata/images_faces", package = "cognizer") 7 | images <- list.files(image_face_path, full.names = TRUE) 8 | test_that( 9 | "image classifications returns successfully", 10 | { 11 | if (identical(api_key, "")) skip("no authentication provided") 12 | test <- image_classify(images, api_key) 13 | expect_is(test, "list") 14 | } 15 | ) 16 | test_that( 17 | "image classifications handles errors successfully", 18 | { 19 | if (identical(api_key, "")) skip("no authentication provided") 20 | test <- image_classify(images, substr(api_key, 1, 8)) 21 | expect_is(test, "list") 22 | } 23 | ) 24 | 25 | 26 | test_that( 27 | "image face detection returns successfully", 28 | { 29 | if (identical(api_key, "")) skip("no authentication provided") 30 | test <- image_detectface(images, api_key) 31 | expect_is(test, "list") 32 | } 33 | ) 34 | test_that( 35 | "image face detection handles errors successfully", 36 | { 37 | if (identical(api_key, "")) skip("no authentication provided") 38 | test <- image_detectface(images, substr(api_key, 1, 8)) 39 | expect_is(test, "list") 40 | } 41 | ) 42 | 43 | 44 | image_text_path <- system.file("extdata/images_text", package = "cognizer") 45 | images <- list.files(image_text_path, full.names = TRUE) 46 | test_that( 47 | "image text detection returns successfully", 48 | { 49 | if (identical(api_key, "")) skip("no authentication provided") 50 | test <- image_detecttext(images, api_key) 51 | expect_is(test, "list") 52 | } 53 | ) 54 | test_that( 55 | "image text detection handles errors successfully", 56 | { 57 | if (identical(api_key, "")) skip("no authentication provided") 58 | test <- image_detecttext(images, substr(api_key, 1, 8)) 59 | expect_is(test, "list") 60 | } 61 | ) 62 | -------------------------------------------------------------------------------- /tests/testthat/test_text.R: -------------------------------------------------------------------------------- 1 | 2 | context("Checking text processing functions") 3 | 4 | 5 | api_key <- Sys.getenv("ALCHEMY_API_KEY") 6 | text_path <- system.file( 7 | "extdata/text", 8 | "text_examples.txt", 9 | package = "cognizer" 10 | ) 11 | text <- readLines(text_path) 12 | 13 | test_that( 14 | "sentiment analysis returns successfully", 15 | { 16 | if (identical(api_key, "")) skip("no authentication provided") 17 | test <- text_sentiment(text, api_key) 18 | expect_is(test, "list") 19 | } 20 | ) 21 | 22 | test_that( 23 | "keyword analysis returns successfully", 24 | { 25 | if (identical(api_key, "")) skip("no authentication provided") 26 | test <- text_keywords(text, api_key) 27 | expect_is(test, "list") 28 | } 29 | ) 30 | 31 | test_that( 32 | "emotion analysis returns successfully", 33 | { 34 | if (identical(api_key, "")) skip("no authentication provided") 35 | test <- text_emotion(text, api_key) 36 | expect_is(test, "list") 37 | } 38 | ) 39 | 40 | 41 | test_that( 42 | "language detection returns successfully", 43 | { 44 | if (identical(api_key, "")) skip("no authentication provided") 45 | test <- text_language(text, api_key) 46 | expect_is(test, "list") 47 | } 48 | ) 49 | 50 | 51 | test_that( 52 | "entity extraction returns successfully", 53 | { 54 | if (identical(api_key, "")) skip("no authentication provided") 55 | test <- text_entity(text, api_key) 56 | expect_is(test, "list") 57 | } 58 | ) 59 | 60 | 61 | test_that( 62 | "concept extraction returns successfully", 63 | { 64 | if (identical(api_key, "")) skip("no authentication provided") 65 | test <- text_concept(text, api_key) 66 | expect_is(test, "list") 67 | } 68 | ) 69 | 70 | test_that( 71 | "alchemy analysis errors are handled successfully", 72 | { 73 | if (identical(api_key, "")) skip("no authentication provided") 74 | test <- text_sentiment(text, substr(api_key, 1, 8)) 75 | expect_is(test, "list") 76 | } 77 | ) 78 | 79 | 80 | 81 | userpwd <- Sys.getenv("LANG_TRANSLATE_USERNAME_PASSWORD") 82 | text <- "hola amigo" 83 | test_that( 84 | "language translation returns successfully", 85 | { 86 | if (identical(userpwd, "")) skip("no authentication provided") 87 | test <- text_translate(text, userpwd) 88 | expect_is(test, "list") 89 | } 90 | ) 91 | test_that( 92 | "language translation errors are handled successfully", 93 | { 94 | if (identical(userpwd, "")) skip("no authentication provided") 95 | test <- text_translate(text, substr(userpwd, 1, 8)) 96 | expect_is(test, "list") 97 | } 98 | ) 99 | 100 | userpwd <- Sys.getenv("PERSONALITY_USERNAME_PASSWORD") 101 | set.seed(539843) 102 | text <- paste(replicate(1000, rmsfact::rmsfact()), collapse = ' ') 103 | test_that( 104 | "personality insight returns successfully", 105 | { 106 | if (identical(userpwd, "")) skip("no authentication provided") 107 | test <- text_personality(text, userpwd) 108 | expect_is(test, "list") 109 | } 110 | ) 111 | test_that( 112 | "personality insight errors are handled successfully", 113 | { 114 | if (identical(userpwd, "")) skip("no authentication provided") 115 | test <- text_personality(text, substr(userpwd, 1, 8)) 116 | expect_is(test, "list") 117 | } 118 | ) 119 | 120 | 121 | userpwd <- Sys.getenv("TONE_USERNAME_PASSWORD") 122 | test_that( 123 | "tone analyzer returns successfully", 124 | { 125 | if (identical(userpwd, "")) skip("no authentication provided") 126 | test <- text_tone(text, userpwd) 127 | expect_is(test, "list") 128 | } 129 | ) 130 | test_that( 131 | "tone analyzer errors are handled successfully", 132 | { 133 | if (identical(userpwd, "")) skip("no authentication provided") 134 | test <- text_tone(text, substr(userpwd, 1, 8)) 135 | expect_is(test, "list") 136 | } 137 | ) 138 | 139 | 140 | userpwd <- Sys.getenv("TEXT_TO_SPEECH_USERNAME_PASSWORD") 141 | set.seed(539843) 142 | text <- rmsfact::rmsfact() 143 | test_that( 144 | "text to speech synthesizer returns successfully", 145 | { 146 | if (identical(userpwd, "")) skip("no authentication provided") 147 | tmp <- tempdir() 148 | on.exit(unlink(file.path(tmp, "1.ogg"))) 149 | test <- text_audio(text, userpwd, directory = tmp) 150 | expect_true(test) 151 | expect_identical(list.files(tmp, ".ogg"), "1.ogg") 152 | expect_gt(file.size(list.files(tmp, ".ogg", full.names = TRUE)), 0) 153 | } 154 | ) 155 | 156 | --------------------------------------------------------------------------------