├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── logo.png ├── R ├── sysdata.rda ├── toUpper.R ├── isibib2df.R ├── scopus2df.R ├── logo.R ├── removeStrangeChar.R ├── stopwords.R ├── customTheme.R ├── ltwa.R ├── plot.bibliodendrogram.R ├── trimES.R ├── trim.R ├── trim.leading.R ├── bibtag.R ├── countries.R ├── importFiles.R ├── biblioshiny.R ├── readFiles.R ├── timeslice.R ├── cochrane2df.R ├── sourceGrowth.R ├── net2VOSviewer.R ├── splitCommunities.R ├── localCitations.R ├── keywordAssoc.R ├── net2Pajek.R ├── pubmed2df.R ├── idByAuthor.R ├── duplicatedMatching.R ├── dominance.R ├── tableTag.R ├── isi2df.R ├── missingData.R ├── csvLens2df.R ├── bradford.R ├── mergeDbSources.R ├── csvScopus2df.R ├── normalizeSimilarity.R └── keywordGrowth.R ├── data ├── logo.rda ├── ltwa.rda ├── bibtag.rda ├── countries.rda ├── stopwords.rda └── customTheme.rda ├── cran-comments.md ├── inst ├── biblioshiny │ ├── hexagon.png │ ├── www │ │ ├── logo.jpg │ │ ├── logo.png │ │ ├── ORCID.jpg │ │ ├── logo2.jpg │ │ ├── logo3.png │ │ ├── logoAI.jpg │ │ ├── ai_small2.gif │ │ ├── openalex.jpg │ │ ├── tall_logo.jpg │ │ ├── workflow.jpg │ │ └── table_DBformats.jpg │ └── libraries.R └── CITATION ├── man ├── figures │ ├── README-Co-Word Analysis-1.png │ ├── README-Co-Word Analysis-2.png │ ├── README-Co-Word Analysis-3.png │ ├── README-Co-Word Analysis-4.png │ ├── README-unnamed-chunk-11-1.png │ ├── README-unnamed-chunk-11-2.png │ ├── README-unnamed-chunk-12-1.png │ ├── README-unnamed-chunk-13-1.png │ ├── README-unnamed-chunk-14-1.png │ ├── README-unnamed-chunk-15-1.png │ ├── README-unnamed-chunk-16-1.png │ ├── README-unnamed-chunk-17-1.png │ ├── README-unnamed-chunk-9-1.png │ ├── README-Co-citation network-1.png │ ├── README-Country collaboration-1.png │ ├── README-Keyword c-occurrences-1.png │ ├── README-plot generic function-1.png │ ├── README-plot generic function-2.png │ ├── README-plot generic function-3.png │ ├── README-plot generic function-4.png │ ├── README-plot generic function-5.png │ ├── README-Keyword co-occurrences-1.png │ └── README-Historical Co-citation network-1.png ├── logo.Rd ├── remove_diacritics.Rd ├── get_iso4_stop_words.Rd ├── stopwords.Rd ├── customTheme.Rd ├── print_author_works_summary.Rd ├── ltwa.Rd ├── prepare_ltwa_lookup.Rd ├── plot.bibliodendrogram.Rd ├── trim.Rd ├── trimES.Rd ├── create_journal_iso4_lookup.Rd ├── trim.leading.Rd ├── bibtag.Rd ├── abbreviate_term.Rd ├── countries.Rd ├── normalize_journal_to_iso4.Rd ├── abbreviate_journal_title.Rd ├── lifeCycle.Rd ├── plot.bibliometrix.Rd ├── summary.bibliometrix_netstat.Rd ├── readFiles.Rd ├── threeFieldsPlot.Rd ├── sourceGrowth.Rd ├── mergeKeywords.Rd ├── biblioshiny.Rd ├── authorProdOverTime.Rd ├── convert_scopus_new_to_classic.Rd ├── plotThematicEvolution.Rd ├── net2Pajek.Rd ├── bradford.Rd ├── timeslice.Rd ├── net2VOSviewer.Rd ├── lotka.Rd ├── missingData.Rd ├── get_authors_summary.Rd ├── mergeDbSources.Rd ├── splitCommunities.Rd ├── idByAuthor.Rd ├── keywordAssoc.Rd ├── dominance.Rd ├── tableTag.Rd ├── findAuthorWorks.Rd ├── localCitations.Rd ├── citations.Rd ├── summary.bibliometrix.Rd ├── histNetwork.Rd ├── KeywordGrowth.Rd ├── normalizeCitationScore.Rd ├── networkStat.Rd ├── duplicatedMatching.Rd ├── histPlot.Rd ├── fieldByYear.Rd ├── metaTagExtraction.Rd ├── rpys.Rd ├── Hindex.Rd ├── retrievalByAuthorID.Rd ├── biblioAnalysis.Rd ├── normalizeSimilarity.Rd ├── authorBio.Rd ├── assignEvolutionColors.Rd ├── convert2df.Rd ├── thematicMap.Rd ├── termExtraction.Rd ├── thematicEvolution.Rd └── cocMatrix.Rd ├── .gitignore ├── .Rbuildignore ├── bibliometrix.Rproj ├── LICENCE ├── COPYING ├── DESCRIPTION └── NEWS.md /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/logo.png -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/R/sysdata.rda -------------------------------------------------------------------------------- /data/logo.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/logo.rda -------------------------------------------------------------------------------- /data/ltwa.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/ltwa.rda -------------------------------------------------------------------------------- /data/bibtag.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/bibtag.rda -------------------------------------------------------------------------------- /R/toUpper.R: -------------------------------------------------------------------------------- 1 | toUpper <- function(D) { 2 | stringi::stri_trans_toupper(D, locale = "en") 3 | } 4 | -------------------------------------------------------------------------------- /data/countries.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/countries.rda -------------------------------------------------------------------------------- /data/stopwords.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/stopwords.rda -------------------------------------------------------------------------------- /data/customTheme.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/customTheme.rda -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /inst/biblioshiny/hexagon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/hexagon.png -------------------------------------------------------------------------------- /inst/biblioshiny/www/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo.png -------------------------------------------------------------------------------- /inst/biblioshiny/www/ORCID.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/ORCID.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/logo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo2.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/logo3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo3.png -------------------------------------------------------------------------------- /inst/biblioshiny/www/logoAI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logoAI.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/ai_small2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/ai_small2.gif -------------------------------------------------------------------------------- /inst/biblioshiny/www/openalex.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/openalex.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/tall_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/tall_logo.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/workflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/workflow.jpg -------------------------------------------------------------------------------- /inst/biblioshiny/www/table_DBformats.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/table_DBformats.jpg -------------------------------------------------------------------------------- /man/figures/README-Co-Word Analysis-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-1.png -------------------------------------------------------------------------------- /man/figures/README-Co-Word Analysis-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-2.png -------------------------------------------------------------------------------- /man/figures/README-Co-Word Analysis-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-3.png -------------------------------------------------------------------------------- /man/figures/README-Co-Word Analysis-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-4.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-11-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-11-2.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /man/figures/README-Co-citation network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-citation network-1.png -------------------------------------------------------------------------------- /man/figures/README-Country collaboration-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Country collaboration-1.png -------------------------------------------------------------------------------- /man/figures/README-Keyword c-occurrences-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Keyword c-occurrences-1.png -------------------------------------------------------------------------------- /man/figures/README-plot generic function-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-1.png -------------------------------------------------------------------------------- /man/figures/README-plot generic function-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-2.png -------------------------------------------------------------------------------- /man/figures/README-plot generic function-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-3.png -------------------------------------------------------------------------------- /man/figures/README-plot generic function-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-4.png -------------------------------------------------------------------------------- /man/figures/README-plot generic function-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-5.png -------------------------------------------------------------------------------- /man/figures/README-Keyword co-occurrences-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Keyword co-occurrences-1.png -------------------------------------------------------------------------------- /man/figures/README-Historical Co-citation network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Historical Co-citation network-1.png -------------------------------------------------------------------------------- /R/isibib2df.R: -------------------------------------------------------------------------------- 1 | isibib2df <- function(D) { 2 | # this is a legacy function (for old scripts) 3 | DATA <- bib2df(D, dbsource = "isi") 4 | 5 | return(DATA) 6 | } 7 | -------------------------------------------------------------------------------- /R/scopus2df.R: -------------------------------------------------------------------------------- 1 | scopus2df <- function(D) { 2 | # this is a legacy function (for old scripts) 3 | DATA <- bib2df(D, dbsource = "scopus") 4 | 5 | return(DATA) 6 | } 7 | -------------------------------------------------------------------------------- /R/logo.R: -------------------------------------------------------------------------------- 1 | #' Bibliometrix logo. 2 | #' 3 | #' The matrix contains the rgb format of the bibliometrix official logo.\cr 4 | #' 5 | #' @format A matrix with 927 rows and 800 columns. 6 | #' 7 | #' @name logo 8 | NULL -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Renviron 5 | *.csv 6 | *.xlsx 7 | .txt 8 | .bib 9 | .Ruserdata 10 | .DS_Store 11 | VOSviewer.jar 12 | network.net 13 | Rubbish 14 | desktop.ini 15 | vignette.txt 16 | inst/doc 17 | inst/biblioshiny/__MACOSX 18 | inst/biblioshiny/rsconnect 19 | _gh-pages 20 | 21 | 22 | /.quarto/ 23 | -------------------------------------------------------------------------------- /man/logo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logo.R 3 | \name{logo} 4 | \alias{logo} 5 | \title{Bibliometrix logo.} 6 | \format{ 7 | A matrix with 927 rows and 800 columns. 8 | } 9 | \description{ 10 | The matrix contains the rgb format of the bibliometrix official logo.\cr 11 | } 12 | -------------------------------------------------------------------------------- /R/removeStrangeChar.R: -------------------------------------------------------------------------------- 1 | removeStrangeChar <- function(D) { 2 | ind <- numeric(length(D)) 3 | for (i in 1:length(D)) { 4 | # print(i) 5 | # ind[i] <- nchar(D[i]) 6 | 7 | res <- try(ind[i] <- nchar(D[i]), silent = TRUE) 8 | if (inherits(res, "try-error")) { 9 | ind[i] <- 0 10 | next 11 | } 12 | } 13 | D <- D[ind > 1] 14 | } 15 | -------------------------------------------------------------------------------- /R/stopwords.R: -------------------------------------------------------------------------------- 1 | #' List of English stopwords. 2 | #' 3 | #' A character vector containing a complete list of English stopwords\cr 4 | #' Data are used by \code{\link{biblioAnalysis}} function 5 | #' to extract Country Field of Cited References and Authors. 6 | #' 7 | #' @format A character vector with 665 rows.\cr 8 | #' 9 | #' @name stopwords 10 | NULL 11 | -------------------------------------------------------------------------------- /R/customTheme.R: -------------------------------------------------------------------------------- 1 | #' Custom Theme variables for Biblioshiny. 2 | #' 3 | #' List containing a set of custom theme variables for Biblioshiny. 4 | #' 5 | #' @format A list with 3 elements: 6 | #' \describe{ 7 | #' \item{name}{object name} 8 | #' \item{attribs}{attributes} 9 | #' \item{children}{CSS style} 10 | #' } 11 | #' 12 | #' @name customTheme 13 | NULL 14 | -------------------------------------------------------------------------------- /man/remove_diacritics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{remove_diacritics} 4 | \alias{remove_diacritics} 5 | \title{Remove diacritics from string with robust fallback} 6 | \usage{ 7 | remove_diacritics(x) 8 | } 9 | \description{ 10 | Remove diacritics from string with robust fallback 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.github$ 4 | ^.*\.txt$ 5 | ^.*\.bib$ 6 | ^.*\.ini$ 7 | ^.*\.md$ 8 | ^.*\.RData$ 9 | ^.*\.jar 10 | ^.*\.net 11 | ^.*\.csv 12 | ^.*\.xlsx 13 | ^.*\.xls 14 | ^.*\.xml 15 | ^.*\.png 16 | LICENCE.*$ 17 | Rubbish 18 | ^_gh-pages$ 19 | ^README\.Rmd$ 20 | ^_pkgdown\.yml$ 21 | ^pkgdown$ 22 | ^inst/biblioshiny/rsconnect 23 | ^rsconnect 24 | 25 | ^CRAN-SUBMISSION$ 26 | ^cran-comments\.md$ 27 | -------------------------------------------------------------------------------- /R/ltwa.R: -------------------------------------------------------------------------------- 1 | #' Index of ltwa. 2 | #' 3 | #' Data frame containing a normalized index of words used in journal names and their ISO4 abbreviations. 4 | #' 5 | #' @format A data frame with 56463 rows and 3 variables: 6 | #' \describe{ 7 | #' \item{WORD}{word from journal names} 8 | #' \item{ABBREVIATION}{ISO4 abbreviation} 9 | #' \item{LANGUAGES}{Language of the journal name} 10 | #' } 11 | #' 12 | #' @name ltwa 13 | NULL 14 | -------------------------------------------------------------------------------- /man/get_iso4_stop_words.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{get_iso4_stop_words} 4 | \alias{get_iso4_stop_words} 5 | \title{Articles, prepositions, and conjunctions to be removed (ISO 4 standard)} 6 | \usage{ 7 | get_iso4_stop_words() 8 | } 9 | \description{ 10 | Articles, prepositions, and conjunctions to be removed (ISO 4 standard) 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/stopwords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stopwords.R 3 | \name{stopwords} 4 | \alias{stopwords} 5 | \title{List of English stopwords.} 6 | \format{ 7 | A character vector with 665 rows.\cr 8 | } 9 | \description{ 10 | A character vector containing a complete list of English stopwords\cr 11 | Data are used by \code{\link{biblioAnalysis}} function 12 | to extract Country Field of Cited References and Authors. 13 | } 14 | -------------------------------------------------------------------------------- /man/customTheme.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/customTheme.R 3 | \name{customTheme} 4 | \alias{customTheme} 5 | \title{Custom Theme variables for Biblioshiny.} 6 | \format{ 7 | A list with 3 elements: 8 | \describe{ 9 | \item{name}{object name} 10 | \item{attribs}{attributes} 11 | \item{children}{CSS style} 12 | } 13 | } 14 | \description{ 15 | List containing a set of custom theme variables for Biblioshiny. 16 | } 17 | -------------------------------------------------------------------------------- /man/print_author_works_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findAuthorWorks.r 3 | \name{print_author_works_summary} 4 | \alias{print_author_works_summary} 5 | \title{Print Summary of Author Works Search} 6 | \usage{ 7 | print_author_works_summary(works_df) 8 | } 9 | \arguments{ 10 | \item{works_df}{Data.frame. Result from find_author_works()} 11 | } 12 | \description{ 13 | Prints a summary of the search results from find_author_works() 14 | } 15 | -------------------------------------------------------------------------------- /R/plot.bibliodendrogram.R: -------------------------------------------------------------------------------- 1 | #' Plotting dendrogram resulting from Conceptual Structure Analysis 2 | #' 3 | #' \code{plot} method for class '\code{bibliodendrogram}' 4 | #' @param x is the object for which plots are desired. 5 | #' @param ... is a generic param for plot functions. 6 | #' @return The function \code{plot} draws a dendrogram. 7 | #' 8 | #' 9 | #' @method plot bibliodendrogram 10 | #' @export 11 | 12 | 13 | plot.bibliodendrogram <- function(x, ...) { 14 | plot(x$dend) 15 | abline(h = x$line, lty = 2) 16 | } 17 | -------------------------------------------------------------------------------- /man/ltwa.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ltwa.R 3 | \name{ltwa} 4 | \alias{ltwa} 5 | \title{Index of ltwa.} 6 | \format{ 7 | A data frame with 56463 rows and 3 variables: 8 | \describe{ 9 | \item{WORD}{word from journal names} 10 | \item{ABBREVIATION}{ISO4 abbreviation} 11 | \item{LANGUAGES}{Language of the journal name} 12 | } 13 | } 14 | \description{ 15 | Data frame containing a normalized index of words used in journal names and their ISO4 abbreviations. 16 | } 17 | -------------------------------------------------------------------------------- /R/trimES.R: -------------------------------------------------------------------------------- 1 | #' Deleting extra white spaces 2 | #' 3 | #' Deleting extra white spaces from a \code{character} object. 4 | #' 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package. 6 | #' 7 | #' @param x is a \code{character} object. 8 | 9 | #' @return an object of class \code{character} 10 | #' @examples 11 | #' 12 | #' char <- c("Alfred BJ", "Mary Beth", "John John") 13 | #' char 14 | #' trimES(char) 15 | #' 16 | #' @export 17 | trimES <- function(x) { 18 | gsub("\\s+", " ", x) 19 | } 20 | -------------------------------------------------------------------------------- /man/prepare_ltwa_lookup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{prepare_ltwa_lookup} 4 | \alias{prepare_ltwa_lookup} 5 | \title{Prepare LTWA database for efficient lookup} 6 | \usage{ 7 | prepare_ltwa_lookup(ltwa_db) 8 | } 9 | \arguments{ 10 | \item{ltwa_db}{LTWA database data frame} 11 | } 12 | \value{ 13 | List with singles, prefix, and phrase lookup tables 14 | } 15 | \description{ 16 | Pre-processes LTWA database into optimized lookup tables 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /R/trim.R: -------------------------------------------------------------------------------- 1 | #' Deleting leading and ending white spaces 2 | #' 3 | #' Deleting leading and ending white spaces from a \code{character} object. 4 | #' 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package. 6 | #' 7 | #' @param x is a \code{character} object. 8 | 9 | #' @return an object of class \code{character} 10 | #' @examples 11 | #' 12 | #' char <- c(" Alfred", "Mary", " John") 13 | #' char 14 | #' trim(char) 15 | #' 16 | #' @export 17 | trim <- function(x) { 18 | gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) 19 | } 20 | -------------------------------------------------------------------------------- /R/trim.leading.R: -------------------------------------------------------------------------------- 1 | #' Deleting leading white spaces 2 | #' 3 | #' Deleting leading white spaces from a \code{character} object. 4 | #' 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package. 6 | #' 7 | #' @param x is a \code{character} object. 8 | 9 | #' @return an object of class \code{character} 10 | #' @examples 11 | #' 12 | #' char <- c(" Alfred", "Mary", " John") 13 | #' char 14 | #' trim.leading(char) 15 | #' 16 | #' @export 17 | trim.leading <- function(x) { 18 | sub("^\\s+", "", x) ## function to delete leading spaces in a string 19 | } 20 | -------------------------------------------------------------------------------- /R/bibtag.R: -------------------------------------------------------------------------------- 1 | #' Tag list and bibtex fields. 2 | #' 3 | #' Data frame containing a list of tags and corresponding: WoS, SCOPUS and generic bibtex fields; and Dimensions.ai csv and xlsx fields. 4 | #' 5 | #' @format A data frame with 44 rows and 6 variables: 6 | #' \describe{ 7 | #' \item{TAG}{Tag Fields} 8 | #' \item{SCOPUS}{Scopus bibtex fields} 9 | #' \item{ISI}{WOS/ISI bibtex fields} 10 | #' \item{GENERIC}{Generic bibtex fields} 11 | #' \item{DIMENSIONS_OLD}{DIMENSIONS cvs/xlsx old fields} 12 | #' \item{DIMENSIONS}{DIMENSIONS cvs/xlsx fields} 13 | #' } 14 | #' 15 | #' @name bibtag 16 | NULL 17 | -------------------------------------------------------------------------------- /R/countries.R: -------------------------------------------------------------------------------- 1 | #' Index of Countries. 2 | #' 3 | #' Data frame containing a normalized index of countries.\cr 4 | #' Data are used by \code{\link{biblioAnalysis}} function 5 | #' to extract Country Field of Cited References and Authors. 6 | #' 7 | #' @format A data frame with 199 rows and 5 variables: 8 | #' \describe{ 9 | #' \item{countries}{country names} 10 | #' \item{continent}{continent names} 11 | #' \item{iso2}{country ISO 3166-1 alpha-2 code} 12 | #' \item{Longitude}{country centroid longitude} 13 | #' \item{Latitude}{country centroid latitude} 14 | #' } 15 | #' 16 | #' @name countries 17 | NULL 18 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite bibliometrix in publications, please use:") 2 | 3 | bibentry(bibtype="Article", 4 | title = "bibliometrix: An R-tool for comprehensive science mapping analysis", 5 | author = "Massimo Aria and Corrado Cuccurullo", 6 | journal = "Journal of Informetrics", 7 | year = "2017", 8 | doi = "10.1016/j.joi.2017.08.007", 9 | textVersion = 10 | paste("Aria, M., & Cuccurullo, C. (2017),", 11 | "bibliometrix: An R-tool for comprehensive science mapping analysis,", 12 | "Journal of informetrics, 11(4), 959-975, Elsevier.")) 13 | -------------------------------------------------------------------------------- /man/plot.bibliodendrogram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.bibliodendrogram.R 3 | \name{plot.bibliodendrogram} 4 | \alias{plot.bibliodendrogram} 5 | \title{Plotting dendrogram resulting from Conceptual Structure Analysis} 6 | \usage{ 7 | \method{plot}{bibliodendrogram}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{is the object for which plots are desired.} 11 | 12 | \item{...}{is a generic param for plot functions.} 13 | } 14 | \value{ 15 | The function \code{plot} draws a dendrogram. 16 | } 17 | \description{ 18 | \code{plot} method for class '\code{bibliodendrogram}' 19 | } 20 | -------------------------------------------------------------------------------- /man/trim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/trim.R 3 | \name{trim} 4 | \alias{trim} 5 | \title{Deleting leading and ending white spaces} 6 | \usage{ 7 | trim(x) 8 | } 9 | \arguments{ 10 | \item{x}{is a \code{character} object.} 11 | } 12 | \value{ 13 | an object of class \code{character} 14 | } 15 | \description{ 16 | Deleting leading and ending white spaces from a \code{character} object. 17 | } 18 | \details{ 19 | \code{tableTag} is an internal routine of \code{bibliometrics} package. 20 | } 21 | \examples{ 22 | 23 | char <- c(" Alfred", "Mary", " John") 24 | char 25 | trim(char) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/trimES.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/trimES.R 3 | \name{trimES} 4 | \alias{trimES} 5 | \title{Deleting extra white spaces} 6 | \usage{ 7 | trimES(x) 8 | } 9 | \arguments{ 10 | \item{x}{is a \code{character} object.} 11 | } 12 | \value{ 13 | an object of class \code{character} 14 | } 15 | \description{ 16 | Deleting extra white spaces from a \code{character} object. 17 | } 18 | \details{ 19 | \code{tableTag} is an internal routine of \code{bibliometrics} package. 20 | } 21 | \examples{ 22 | 23 | char <- c("Alfred BJ", "Mary Beth", "John John") 24 | char 25 | trimES(char) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /bibliometrix.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 256c0fcb-6c1f-4220-80e8-8472feb3cda7 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | BuildType: Package 17 | PackageUseDevtools: Yes 18 | PackageInstallArgs: --no-multiarch --with-keep.source --resave-data 19 | PackageBuildArgs: --resave-data 20 | PackageBuildBinaryArgs: --resave-data 21 | PackageCheckArgs: --as-cran --timings --no-stop-on-test-error --no-clean 22 | PackageRoxygenize: rd,collate,namespace,vignette 23 | -------------------------------------------------------------------------------- /man/create_journal_iso4_lookup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{create_journal_iso4_lookup} 4 | \alias{create_journal_iso4_lookup} 5 | \title{Create ISO4 journal normalization lookup table} 6 | \usage{ 7 | create_journal_iso4_lookup(journal_vector, ltwa_db) 8 | } 9 | \arguments{ 10 | \item{journal_vector}{Character vector of journal names} 11 | 12 | \item{ltwa_db}{LTWA database data frame} 13 | } 14 | \value{ 15 | Data frame with journal_original and journal_iso4 columns 16 | } 17 | \description{ 18 | Create ISO4 journal normalization lookup table 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/trim.leading.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/trim.leading.R 3 | \name{trim.leading} 4 | \alias{trim.leading} 5 | \title{Deleting leading white spaces} 6 | \usage{ 7 | trim.leading(x) 8 | } 9 | \arguments{ 10 | \item{x}{is a \code{character} object.} 11 | } 12 | \value{ 13 | an object of class \code{character} 14 | } 15 | \description{ 16 | Deleting leading white spaces from a \code{character} object. 17 | } 18 | \details{ 19 | \code{tableTag} is an internal routine of \code{bibliometrics} package. 20 | } 21 | \examples{ 22 | 23 | char <- c(" Alfred", "Mary", " John") 24 | char 25 | trim.leading(char) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/bibtag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bibtag.R 3 | \name{bibtag} 4 | \alias{bibtag} 5 | \title{Tag list and bibtex fields.} 6 | \format{ 7 | A data frame with 44 rows and 6 variables: 8 | \describe{ 9 | \item{TAG}{Tag Fields} 10 | \item{SCOPUS}{Scopus bibtex fields} 11 | \item{ISI}{WOS/ISI bibtex fields} 12 | \item{GENERIC}{Generic bibtex fields} 13 | \item{DIMENSIONS_OLD}{DIMENSIONS cvs/xlsx old fields} 14 | \item{DIMENSIONS}{DIMENSIONS cvs/xlsx fields} 15 | } 16 | } 17 | \description{ 18 | Data frame containing a list of tags and corresponding: WoS, SCOPUS and generic bibtex fields; and Dimensions.ai csv and xlsx fields. 19 | } 20 | -------------------------------------------------------------------------------- /man/abbreviate_term.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{abbreviate_term} 4 | \alias{abbreviate_term} 5 | \title{Abbreviate a single term using LTWA} 6 | \usage{ 7 | abbreviate_term(word, ltwa_lookup, common_abbr, check = TRUE) 8 | } 9 | \arguments{ 10 | \item{word}{Single word to abbreviate} 11 | 12 | \item{ltwa_lookup}{Pre-processed LTWA lookup tables} 13 | 14 | \item{common_abbr}{Named vector of common abbreviations} 15 | 16 | \item{check}{Logical, whether to check for abbreviation} 17 | } 18 | \value{ 19 | Abbreviated form of word 20 | } 21 | \description{ 22 | Abbreviate a single term using LTWA 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/countries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/countries.R 3 | \name{countries} 4 | \alias{countries} 5 | \title{Index of Countries.} 6 | \format{ 7 | A data frame with 199 rows and 5 variables: 8 | \describe{ 9 | \item{countries}{country names} 10 | \item{continent}{continent names} 11 | \item{iso2}{country ISO 3166-1 alpha-2 code} 12 | \item{Longitude}{country centroid longitude} 13 | \item{Latitude}{country centroid latitude} 14 | } 15 | } 16 | \description{ 17 | Data frame containing a normalized index of countries.\cr 18 | Data are used by \code{\link{biblioAnalysis}} function 19 | to extract Country Field of Cited References and Authors. 20 | } 21 | -------------------------------------------------------------------------------- /man/normalize_journal_to_iso4.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{normalize_journal_to_iso4} 4 | \alias{normalize_journal_to_iso4} 5 | \title{Normalize journal names to ISO4 abbreviated form} 6 | \usage{ 7 | normalize_journal_to_iso4(journal_name, ltwa_db) 8 | } 9 | \arguments{ 10 | \item{journal_name}{Character string with journal name} 11 | 12 | \item{ltwa_db}{Data frame with LTWA database} 13 | } 14 | \value{ 15 | Normalized journal name in ISO4 abbreviated form 16 | } 17 | \description{ 18 | Converts all journal names to their ISO4 abbreviated form using LTWA. 19 | Only uses English language entries from LTWA to avoid foreign word matches. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /R/importFiles.R: -------------------------------------------------------------------------------- 1 | importFiles <- function(...) { 2 | arguments <- unlist(list(...)) 3 | k <- length(arguments) 4 | D <- list() 5 | # enc="UTF-8" 6 | # origEnc=getOption("encoding") 7 | # if (origEnc=="UTF-8"){options(encoding = "native.enc")} 8 | for (i in 1:k) { 9 | D[[i]] <- read_lines( 10 | arguments[i], 11 | skip = 0, 12 | n_max = -1L, 13 | locale = default_locale(), 14 | progress = show_progress() 15 | ) 16 | 17 | 18 | # D[[i]]=suppressWarnings( 19 | # iconv(readLines(arguments[i],encoding = "UTF-8"),"latin1", "ASCII", sub="") 20 | # #conv(readLines(arguments[[i]])) 21 | # ) 22 | } 23 | D <- unlist(D) 24 | # options(encoding = origEnc) 25 | # Encoding(D) <- "UTF-8" 26 | return(D) 27 | } 28 | -------------------------------------------------------------------------------- /man/abbreviate_journal_title.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{abbreviate_journal_title} 4 | \alias{abbreviate_journal_title} 5 | \title{Abbreviate journal title to ISO 4 standard} 6 | \usage{ 7 | abbreviate_journal_title(title, ltwa_lookup) 8 | } 9 | \arguments{ 10 | \item{title}{Journal title string} 11 | 12 | \item{ltwa_lookup}{Pre-processed LTWA lookup tables (from prepare_ltwa_lookup)} 13 | } 14 | \value{ 15 | Abbreviated journal title in ISO 4 format (without periods) 16 | } 17 | \description{ 18 | Converts a full journal title to its ISO 4 abbreviated form using LTWA. 19 | Removes articles, prepositions, and conjunctions according to ISO 4 rules. 20 | Returns result WITHOUT periods (dots). 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/lifeCycle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lifeCycle.R 3 | \name{lifeCycle} 4 | \alias{lifeCycle} 5 | \title{Life Cycle Analysis with Logistic Growth Model} 6 | \usage{ 7 | lifeCycle(data, forecast_years = 5, plot = TRUE, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{data}{Data frame with columns: year (PY) and number of publications (n)} 11 | 12 | \item{forecast_years}{Number of years to forecast beyond saturation} 13 | 14 | \item{plot}{Logical, if TRUE produces plots} 15 | 16 | \item{verbose}{Logical, if TRUE prints detailed output} 17 | } 18 | \value{ 19 | List containing parameters, forecasts and metrics 20 | } 21 | \description{ 22 | Estimates logistic growth model for annual (non-cumulative) publications 23 | following Meyer et al. (1999) methodology 24 | } 25 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | bibliometrix Package for R - Tool for Quantitative Research in Bibliometrics and Scientometrics. 2 | 3 | Copyright (C) 2016 Massimo Aria and Corrado Cuccurullo 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program; if not, write to the Free Software 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | 19 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | bibliometrix R-Package 2 | 3 | A Tool for Quantitative Research in Bibliometrics and Scientometrics. 4 | 5 | Copyright (C) 2016 Massimo Aria and Corrado Cuccurullo 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program; if not, write to the Free Software 19 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | 21 | -------------------------------------------------------------------------------- /man/plot.bibliometrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.bibliometrix.R 3 | \name{plot.bibliometrix} 4 | \alias{plot.bibliometrix} 5 | \title{Plotting bibliometric analysis results} 6 | \usage{ 7 | \method{plot}{bibliometrix}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{is the object for which plots are desired.} 11 | 12 | \item{...}{can accept two arguments:\cr 13 | \code{k} is an integer, used for plot formatting (number of objects). Default value is 10.\cr 14 | \code{pause} is a logical, used to allow pause in screen scrolling of results. Default value is \code{pause = FALSE}.} 15 | } 16 | \value{ 17 | The function \code{plot} returns a list of plots of class \code{ggplot2}. 18 | } 19 | \description{ 20 | \code{plot} method for class '\code{bibliometrix}' 21 | } 22 | \examples{ 23 | data(scientometrics, package = "bibliometrixData") 24 | 25 | results <- biblioAnalysis(scientometrics) 26 | 27 | plot(results, k = 10, pause = FALSE) 28 | 29 | } 30 | \seealso{ 31 | The bibliometric analysis function \code{\link{biblioAnalysis}}. 32 | 33 | \code{\link{summary}} to compute a list of summary statistics of the object of class \code{bibliometrix}. 34 | } 35 | -------------------------------------------------------------------------------- /man/summary.bibliometrix_netstat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.bibliometrix_netstat.R 3 | \name{summary.bibliometrix_netstat} 4 | \alias{summary.bibliometrix_netstat} 5 | \title{Summarizing network analysis results} 6 | \usage{ 7 | \method{summary}{bibliometrix_netstat}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{is the object for which a summary is desired.} 11 | 12 | \item{...}{can accept two arguments:\cr 13 | \code{k} integer, used for table formatting (number of rows). Default value is 10.\cr} 14 | } 15 | \value{ 16 | The function \code{summary} computes and returns on display several statistics both at network and vertex level. 17 | } 18 | \description{ 19 | \code{summary} method for class '\code{bibliometrix_netstat}' 20 | } 21 | \examples{ 22 | 23 | # to run the example, please remove # from the beginning of the following lines 24 | # data(scientometrics, package = "bibliometrixData") 25 | 26 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "collaboration", 27 | # network = "authors", sep = ";") 28 | # netstat <- networkStat(NetMatrix, stat = "all", type = "degree") 29 | # summary(netstat) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/readFiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readFiles.R 3 | \name{readFiles} 4 | \alias{readFiles} 5 | \title{DEPRECATED: Load a sequence of ISI or SCOPUS Export files into a large character object} 6 | \usage{ 7 | readFiles(...) 8 | } 9 | \arguments{ 10 | \item{...}{is a sequence of names of files downloaded from WOS.(in plain text or bibtex format) or SCOPUS Export file (exclusively in bibtex format).} 11 | } 12 | \value{ 13 | a character vector of length the number of lines read. 14 | } 15 | \description{ 16 | The function readFiled is deprecated. You can import and convert your export files directly using the function \code{\link{convert2df}}. 17 | } 18 | \examples{ 19 | # WoS or SCOPUS Export files can be read using \code{\link{readFiles}} function: 20 | 21 | # largechar <- readFiles('filename1.txt','filename2.txt','filename3.txt') 22 | 23 | # filename1.txt, filename2.txt and filename3.txt are ISI or SCOPUS Export file 24 | # in plain text or bibtex format. 25 | 26 | # D <- readFiles('https://www.bibliometrix.org/datasets/bibliometrics_articles.txt') 27 | 28 | } 29 | \seealso{ 30 | \code{\link{convert2df}} for converting SCOPUS of ISI Export file into a dataframe 31 | } 32 | -------------------------------------------------------------------------------- /man/threeFieldsPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/threeFieldsPlot.R 3 | \name{threeFieldsPlot} 4 | \alias{threeFieldsPlot} 5 | \title{Three Fields Plot} 6 | \usage{ 7 | threeFieldsPlot(M, fields = c("DE", "AU", "SO"), n = c(20, 20, 20)) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.} 12 | 13 | \item{fields}{is a character vector. It indicates the fields to analyze using the standard WoS field tags. 14 | Default is \code{fields = c("AU","DE", "SO")}.} 15 | 16 | \item{n}{is a integer vector. It indicates how many items to plot, for each of the three fields. 17 | Default is \code{n = c(20, 20, 20)}} 18 | } 19 | \value{ 20 | a sankeyPlot 21 | } 22 | \description{ 23 | Visualize the main items of three fields (e.g. authors, keywords, journals), and how they are related through a Sankey diagram. 24 | } 25 | \examples{ 26 | 27 | # data(scientometrics, package = "bibliometrixData") 28 | 29 | # threeFieldsPlot(scientometrics, fields=c("DE","AU","CR"),n=c(20,20,20)) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/sourceGrowth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sourceGrowth.R 3 | \name{sourceGrowth} 4 | \alias{sourceGrowth} 5 | \title{Number of documents published annually per Top Sources} 6 | \usage{ 7 | sourceGrowth(M, top = 5, cdf = TRUE) 8 | } 9 | \arguments{ 10 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original ISI or SCOPUS file.} 12 | 13 | \item{top}{is a numeric. It indicates the number of top sources to analyze. The default value is 5.} 14 | 15 | \item{cdf}{is a logical. If TRUE, the function calculates the cumulative occurrences distribution.} 16 | } 17 | \value{ 18 | an object of class \code{data.frame} 19 | } 20 | \description{ 21 | It calculates yearly published documents of the top sources. 22 | } 23 | \examples{ 24 | 25 | data(scientometrics, package = "bibliometrixData") 26 | topSO <- sourceGrowth(scientometrics, top = 1, cdf = TRUE) 27 | topSO 28 | 29 | # Plotting results 30 | \dontrun{ 31 | install.packages("reshape2") 32 | library(reshape2) 33 | library(ggplot2) 34 | DF <- melt(topSO, id = "Year") 35 | ggplot(DF, aes(Year, value, group = variable, color = variable)) + 36 | geom_line() 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/mergeKeywords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \name{mergeKeywords} 4 | \alias{mergeKeywords} 5 | \title{Merge DE and ID Fields into a Unified Keywords Column} 6 | \usage{ 7 | mergeKeywords(M, force = FALSE) 8 | } 9 | \arguments{ 10 | \item{M}{A dataframe containing at least the `DE` and/or `ID` columns, typically generated by `convert2df()` from the `bibliometrix` package.} 11 | 12 | \item{force}{Logical. If `TRUE`, an existing `KW_Merged` column will be overwritten. Default is `FALSE`.} 13 | } 14 | \value{ 15 | A dataframe with an added (or updated) `KW_Merged` column containing deduplicated and cleaned keyword strings. 16 | } 17 | \description{ 18 | This function creates a new column `KW_Merged` by combining the contents of the `DE` (author keywords) and `ID` (keywords plus) fields 19 | in a bibliographic dataframe. Duplicate keywords within each record are removed, and leading/trailing spaces are trimmed. 20 | The merged keywords are separated by a semicolon (`;`). 21 | } 22 | \details{ 23 | If the `KW_Merged` column already exists, it will not be overwritten unless `force = TRUE` is specified. 24 | } 25 | \examples{ 26 | \dontrun{ 27 | data(management, package = "bibliometrix") 28 | M <- mergeKeywords(management) 29 | head(M$KW_Merged) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/biblioshiny.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/biblioshiny.R 3 | \name{biblioshiny} 4 | \alias{biblioshiny} 5 | \title{Shiny UI for bibliometrix package} 6 | \usage{ 7 | biblioshiny( 8 | host = "127.0.0.1", 9 | port = NULL, 10 | launch.browser = TRUE, 11 | maxUploadSize = 200 12 | ) 13 | } 14 | \arguments{ 15 | \item{host}{The IPv4 address that the application should listen on. 16 | Defaults to the shiny.host option, if set, or "127.0.0.1" if not.} 17 | 18 | \item{port}{is the TCP port that the application should listen on. If the port is not specified, 19 | and the shiny.port option is set (with options(shiny.port = XX)), then that port will be used. 20 | Otherwise, use a random port.} 21 | 22 | \item{launch.browser}{If true, the system's default web browser will be launched automatically 23 | after the app is started. Defaults to true in interactive sessions only. This value of 24 | this parameter can also be a function to call with the application's URL.} 25 | 26 | \item{maxUploadSize}{is a integer. The max upload file size argument. Default value is 200 (megabyte)} 27 | } 28 | \description{ 29 | \code{biblioshiny} performs science mapping analysis using the main functions of the bibliometrix package. 30 | } 31 | \examples{ 32 | 33 | # biblioshiny() 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/authorProdOverTime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/authorProdOverTime.R 3 | \name{authorProdOverTime} 4 | \alias{authorProdOverTime} 5 | \title{Top-Authors' Productivity over Time} 6 | \usage{ 7 | authorProdOverTime(M, k = 10, graph = TRUE) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.} 11 | 12 | \item{k}{is a integer. It is the number of top authors to analyze and plot. Default is \code{k = 10}.} 13 | 14 | \item{graph}{is logical. If TRUE the function plots the author production over time graph. Default is \code{graph = TRUE}.} 15 | } 16 | \value{ 17 | The function \code{authorProdOverTime} returns a list containing two objects: 18 | \tabular{lll}{ 19 | \code{dfAU} \tab \tab is a data frame\cr 20 | \code{dfpapersAU}\tab \tab is a data frame\cr 21 | \code{graph} \tab \tab a ggplot object} 22 | } 23 | \description{ 24 | It calculates and plots the author production (in terms of number of publications) over the time. 25 | } 26 | \examples{ 27 | data(scientometrics, package = "bibliometrixData") 28 | res <- authorProdOverTime(scientometrics, k = 10) 29 | print(res$dfAU) 30 | plot(res$graph) 31 | 32 | } 33 | \seealso{ 34 | \code{\link{biblioAnalysis}} function for bibliometric analysis 35 | 36 | \code{\link{summary}} method for class '\code{bibliometrix}' 37 | } 38 | -------------------------------------------------------------------------------- /man/convert_scopus_new_to_classic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/apply_citation_matching.R 3 | \name{convert_scopus_new_to_classic} 4 | \alias{convert_scopus_new_to_classic} 5 | \title{Convert new Scopus citation format to classic format} 6 | \usage{ 7 | convert_scopus_new_to_classic(citation) 8 | } 9 | \arguments{ 10 | \item{citation}{Character string containing a bibliographic citation} 11 | } 12 | \value{ 13 | Character string with citation in classic Scopus format 14 | } 15 | \description{ 16 | Scopus has introduced a new citation format where the publication year appears 17 | at the end in parentheses instead of after the title. This function converts 18 | citations from the new format to the classic format by moving the year from 19 | the end to after the title. 20 | } 21 | \details{ 22 | New Scopus format: AUTHOR, TITLE, JOURNAL, VOLUME, ISSUE, PAGES, (YEAR) 23 | Classic Scopus format: AUTHOR, TITLE (YEAR) JOURNAL, VOLUME, PAGES 24 | 25 | The function uses a robust approach: 26 | \itemize{ 27 | \item Extracts year from end (YYYY) 28 | \item Extracts first author from beginning 29 | \item Extracts pages (PP. xxx-xxx or PP. xxx) 30 | \item Extracts volume and issue numbers 31 | \item Extracts journal name (text before volume/issue/pages) 32 | \item Deduces title as remaining text after author 33 | } 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /R/biblioshiny.R: -------------------------------------------------------------------------------- 1 | #' Shiny UI for bibliometrix package 2 | #' 3 | #' \code{biblioshiny} performs science mapping analysis using the main functions of the bibliometrix package. 4 | #' 5 | #' @param port is the TCP port that the application should listen on. If the port is not specified, 6 | #' and the shiny.port option is set (with options(shiny.port = XX)), then that port will be used. 7 | #' Otherwise, use a random port. 8 | #' 9 | #' @param launch.browser If true, the system's default web browser will be launched automatically 10 | #' after the app is started. Defaults to true in interactive sessions only. This value of 11 | #' this parameter can also be a function to call with the application's URL. 12 | #' 13 | #' @param host The IPv4 address that the application should listen on. 14 | #' Defaults to the shiny.host option, if set, or "127.0.0.1" if not. 15 | #' 16 | #' @param maxUploadSize is a integer. The max upload file size argument. Default value is 200 (megabyte) 17 | #' 18 | #' @examples 19 | #' 20 | #' # biblioshiny() 21 | #' 22 | #' @export 23 | 24 | biblioshiny <- function(host = "127.0.0.1", port = NULL, 25 | launch.browser = TRUE, maxUploadSize = 200) { 26 | shinyOptions(maxUploadSize = maxUploadSize) 27 | 28 | runApp(system.file("biblioshiny", package = "bibliometrix"), launch.browser = launch.browser, port = port, host = getOption("shiny.host", host)) 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'release'} 24 | 25 | env: 26 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 27 | R_KEEP_PKG_SOURCE: yes 28 | 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - uses: r-lib/actions/setup-pandoc@v2 33 | 34 | - uses: r-lib/actions/setup-r@v2 35 | with: 36 | r-version: ${{ matrix.config.r }} 37 | http-user-agent: ${{ matrix.config.http-user-agent }} 38 | use-public-rspm: true 39 | 40 | - uses: r-lib/actions/setup-r-dependencies@v2 41 | with: 42 | extra-packages: any::rcmdcheck 43 | needs: check 44 | 45 | - uses: r-lib/actions/check-r-package@v2 46 | with: 47 | upload-snapshots: true 48 | -------------------------------------------------------------------------------- /man/plotThematicEvolution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotThematicEvolution.R 3 | \name{plotThematicEvolution} 4 | \alias{plotThematicEvolution} 5 | \title{Plot a Thematic Evolution Analysis} 6 | \usage{ 7 | plotThematicEvolution(Nodes, Edges, measure = "inclusion", min.flow = 0) 8 | } 9 | \arguments{ 10 | \item{Nodes}{is a list of nodes obtained by \code{\link{thematicEvolution}} function.} 11 | 12 | \item{Edges}{is a list of edges obtained by \code{\link{thematicEvolution}} function.} 13 | 14 | \item{measure}{is a character. It can be \code{measure=("inclusion","stability", "weighted")}.} 15 | 16 | \item{min.flow}{is numerical. It indicates the minimum value of measure to plot a flow.} 17 | } 18 | \value{ 19 | a sankeyPlot 20 | } 21 | \description{ 22 | It plot a Thematic Evolution Analysis performed using the \code{\link{thematicEvolution}} function. 23 | } 24 | \examples{ 25 | 26 | \dontrun{ 27 | data(management, package = "bibliometrixData") 28 | years=c(2004,2008,2015) 29 | 30 | nexus <- thematicEvolution(management,field="DE",years=years,n=100,minFreq=2) 31 | 32 | plotThematicEvolution(nexus$Nodes,nexus$Edges) 33 | } 34 | 35 | } 36 | \seealso{ 37 | \code{\link{thematicMap}} function to create a thematic map based on co-word network analysis and clustering. 38 | 39 | \code{\link{thematicEvolution}} function to perform a thematic evolution analysis. 40 | 41 | \code{\link{networkPlot}} to plot a bibliographic network. 42 | } 43 | -------------------------------------------------------------------------------- /man/net2Pajek.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/net2Pajek.R 3 | \name{net2Pajek} 4 | \alias{net2Pajek} 5 | \title{Save a network graph object as Pajek files} 6 | \usage{ 7 | net2Pajek(net, filename = "my_pajek_network", path = NULL) 8 | } 9 | \arguments{ 10 | \item{net}{is a network graph object returned by the function \code{\link{networkPlot}}.} 11 | 12 | \item{filename}{is a character. It indicates the filename for Pajek export files.} 13 | 14 | \item{path}{is a character. It indicates the path where the files will be saved. When path="NULL, the files will be saved in the current folder. Default is NULL.} 15 | } 16 | \value{ 17 | The function returns no object but will save three Pajek files in the folder given in the "path" argument with the name "filename.clu," "filename.vec," and "filename.net." 18 | } 19 | \description{ 20 | The function \code{\link{net2Pajek}} save a bibliographic network previously created by \code{\link{networkPlot}} as pajek files. 21 | } 22 | \examples{ 23 | \dontrun{ 24 | data(management, package = "bibliometrixData") 25 | 26 | NetMatrix <- biblioNetwork(management, 27 | analysis = "co-occurrences", 28 | network = "keywords", sep = ";" 29 | ) 30 | 31 | net <- networkPlot(NetMatrix, n = 30, type = "auto", Title = "Co-occurrence Network", labelsize = 1) 32 | 33 | net2Pajek(net, filename = "pajekfiles", path = NULL) 34 | } 35 | } 36 | \seealso{ 37 | \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software. 38 | } 39 | -------------------------------------------------------------------------------- /man/bradford.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bradford.R 3 | \name{bradford} 4 | \alias{bradford} 5 | \title{Bradford's law} 6 | \usage{ 7 | bradford(M) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic dataframe.} 11 | } 12 | \value{ 13 | The function \code{bradford} returns a list containing the following objects: 14 | \tabular{lll}{ 15 | \code{table} \tab \tab a dataframe with the source distribution partitioned in the three zones\cr 16 | \code{graph} \tab \tab the source distribution plot in ggplot2 format} 17 | } 18 | \description{ 19 | It estimates and draws the Bradford's law source distribution. 20 | } 21 | \details{ 22 | Bradford's law is a pattern first described by (\cite{Samuel C. Bradford, 1934}) that estimates the exponentially diminishing returns 23 | of searching for references in science journals. 24 | 25 | One formulation is that if journals in a field are sorted by number of articles into three groups, each with about one-third of all articles, 26 | then the number of journals in each group will be proportional to 1:n:n2.\cr\cr 27 | 28 | Reference:\cr 29 | Bradford, S. C. (1934). Sources of information on specific subjects. Engineering, 137, 85-86.\cr 30 | } 31 | \examples{ 32 | \dontrun{ 33 | data(management, package = "bibliometrixData") 34 | 35 | BR <- bradford(management) 36 | } 37 | 38 | } 39 | \seealso{ 40 | \code{\link{biblioAnalysis}} function for bibliometric analysis 41 | 42 | \code{\link{summary}} method for class '\code{bibliometrix}' 43 | } 44 | -------------------------------------------------------------------------------- /man/timeslice.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/timeslice.R 3 | \name{timeslice} 4 | \alias{timeslice} 5 | \title{Bibliographic data frame time slice} 6 | \usage{ 7 | timeslice(M, breaks = NA, k = 5) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.} 12 | 13 | \item{breaks}{is a numeric vector of two or more unique cut points.} 14 | 15 | \item{k}{is an integer value giving the number of intervals into which the data frame is to be cut. \code{k} is used only in case \code{breaks} argument is not provided. The default is \code{k = 5}.} 16 | } 17 | \value{ 18 | the value returned from \code{split} is a list containing the data frames for each sub-period. 19 | } 20 | \description{ 21 | Divide a bibliographic data frame into time slice 22 | } 23 | \examples{ 24 | 25 | data(scientometrics, package = "bibliometrixData") 26 | 27 | list_df <- timeslice(scientometrics, breaks = c(1995, 2005)) 28 | 29 | names(list_df) 30 | 31 | } 32 | \seealso{ 33 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame. 34 | 35 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 36 | 37 | \code{\link{summary}} to obtain a summary of the results. 38 | 39 | \code{\link{plot}} to draw some useful plots of the results. 40 | } 41 | -------------------------------------------------------------------------------- /man/net2VOSviewer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/net2VOSviewer.R 3 | \name{net2VOSviewer} 4 | \alias{net2VOSviewer} 5 | \title{Open a bibliometrix network in VosViewer} 6 | \usage{ 7 | net2VOSviewer(net, vos.path = NULL) 8 | } 9 | \arguments{ 10 | \item{net}{is an object created by networkPlot function.} 11 | 12 | \item{vos.path}{is a character indicating the full path where VOSviewer.jar is located.} 13 | } 14 | \value{ 15 | It write a .net file that can be open in VOSviewer 16 | } 17 | \description{ 18 | \code{net2VOSviewer} plots a network created with \code{\link{networkPlot}} using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman. 19 | } 20 | \details{ 21 | The function \code{\link{networkPlot}} can plot a bibliographic network previously created by \code{\link{biblioNetwork}}. 22 | The network map can be plotted using internal R routines or using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman. 23 | } 24 | \examples{ 25 | # EXAMPLE 26 | 27 | # VOSviewer.jar have to be present in the working folder 28 | 29 | # data(scientometrics, package = "bibliometrixData") 30 | 31 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation", 32 | # network = "references", sep = ";") 33 | 34 | # net <- networkPlot(NetMatrix, n = 30, type = "kamada", Title = "Co-Citation",labelsize=0.5) 35 | 36 | # net2VOSviewer(net) 37 | 38 | } 39 | \seealso{ 40 | \code{\link{biblioNetwork}} to compute a bibliographic network. 41 | 42 | \code{\link{networkPlot}} to create and plot a network object 43 | } 44 | -------------------------------------------------------------------------------- /man/lotka.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lotka.R 3 | \name{lotka} 4 | \alias{lotka} 5 | \title{Lotka's law coefficient estimation} 6 | \usage{ 7 | lotka(M) 8 | } 9 | \arguments{ 10 | \item{M}{is an object of the class '\code{bibliometrixDB}'.} 11 | } 12 | \value{ 13 | The function \code{lotka} returns a list of summary statistics of the Lotka's law estimation of an object of class \code{bibliometrix}. 14 | 15 | the list contains the following objects: 16 | \tabular{lll}{ 17 | \code{Beta} \tab \tab Beta coefficient\cr 18 | \code{C} \tab \tab Constant coefficient\cr 19 | \code{R2} \tab \tab Goodness of Fit\cr 20 | \code{fitted} \tab \tab Fitted Values\cr 21 | \code{p.value} \tab \tab Pvalue of two-sample Kolmogorov-Smirnov test between the empirical and the theoretical Lotka's Law distribution (with Beta=2)\cr 22 | \code{AuthorProd} \tab \tab Authors' Productivity frequency table\cr 23 | \code{g} \tab \tab Lotka's law plot\cr 24 | \code{g_shiny} \tab \tab Lotka's law plot for biblioshiny} 25 | } 26 | \description{ 27 | It estimates Lotka's law coefficients for scientific productivity (\cite{Lotka A.J., 1926}).\cr\cr 28 | } 29 | \details{ 30 | Reference: 31 | Lotka, A. J. (1926). The frequency distribution of scientific productivity. Journal of the Washington academy of sciences, 16(12), 317-323.\cr 32 | } 33 | \examples{ 34 | data(management, package = "bibliometrixData") 35 | L <- lotka(management) 36 | L 37 | 38 | } 39 | \seealso{ 40 | \code{\link{biblioAnalysis}} function for bibliometric analysis 41 | 42 | \code{\link{summary}} method for class '\code{bibliometrix}' 43 | } 44 | -------------------------------------------------------------------------------- /man/missingData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/missingData.R 3 | \name{missingData} 4 | \alias{missingData} 5 | \title{Completeness of bibliographic metadata} 6 | \usage{ 7 | missingData(M) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.} 11 | } 12 | \value{ 13 | The function \code{missingData} returns a list containing two objects: 14 | \tabular{lll}{ 15 | \code{allTags} \tab \tab is a data frame including results for all original metadata tags from the collection\cr 16 | \code{mandatoryTags}\tab \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.} 17 | } 18 | \description{ 19 | It calculates the percentage of missing data in the metadata of a bibliographic data frame. 20 | } 21 | \details{ 22 | Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing") 23 | depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing 24 | value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%), 25 | "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%). 26 | 27 | The results of the function allow us to understand which analyses can be performed with bibliometrix 28 | and which cannot based on the completeness (or status) of different metadata. 29 | } 30 | \examples{ 31 | data(scientometrics, package = "bibliometrixData") 32 | res <- missingData(scientometrics) 33 | print(res$mandatoryTags) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/get_authors_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/authorBio.r 3 | \name{get_authors_summary} 4 | \alias{get_authors_summary} 5 | \title{Get Authors Summary from OpenAlex} 6 | \usage{ 7 | get_authors_summary( 8 | doi = "10.1016/j.joi.2017.08.007", 9 | verbose = FALSE, 10 | sleep_time = 0.2, 11 | max_retries = 3 12 | ) 13 | } 14 | \arguments{ 15 | \item{doi}{Character. DOI of the article} 16 | 17 | \item{verbose}{Logical. Print informative messages during execution (default: FALSE)} 18 | 19 | \item{sleep_time}{Numeric. Seconds to wait before API call (default: 0.2)} 20 | 21 | \item{max_retries}{Integer. Maximum number of retry attempts (default: 3)} 22 | } 23 | \value{ 24 | A data frame with summary information for all authors including: 25 | \itemize{ 26 | \item position: Author position in the paper 27 | \item display_name: Author name as it appears in the paper 28 | \item author_position_type: Type of position (first, last, middle) 29 | \item is_corresponding: Whether the author is a corresponding author 30 | \item orcid: ORCID identifier if available 31 | \item openalex_id: OpenAlex author identifier 32 | \item primary_affiliation: Main institutional affiliation 33 | } 34 | } 35 | \description{ 36 | Retrieves a quick summary of all authors from a paper without making additional API calls 37 | for individual author profiles. Useful for getting an overview of the authorship structure. 38 | } 39 | \examples{ 40 | \dontrun{ 41 | # Get a quick summary of all authors 42 | summary <- get_authors_summary(doi = "10.1016/j.joi.2017.08.007") 43 | print(summary) 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/mergeDbSources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mergeDbSources.R 3 | \name{mergeDbSources} 4 | \alias{mergeDbSources} 5 | \title{Merge bibliographic data frames from supported bibliogtraphic DBs} 6 | \usage{ 7 | mergeDbSources(..., remove.duplicated = TRUE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{...}{are the bibliographic data frames to merge.} 11 | 12 | \item{remove.duplicated}{is logical. If TRUE duplicated documents will be deleted from the bibliographic collection.} 13 | 14 | \item{verbose}{is logical. If TRUE, information on duplicate documents is printed on the screen.} 15 | } 16 | \value{ 17 | the value returned from \code{mergeDbSources} is a bibliographic data frame. 18 | } 19 | \description{ 20 | Merge bibliographic data frames from different databases (WoS,SCOPUS, Lens, Openalex, etc-) into a single one. 21 | } 22 | \details{ 23 | bibliographic data frames are obtained by the converting function \code{\link{convert2df}}. 24 | The function merges data frames identifying common tag fields and duplicated records. 25 | } 26 | \examples{ 27 | 28 | data(isiCollection, package = "bibliometrixData") 29 | 30 | data(scopusCollection, package = "bibliometrixData") 31 | 32 | M <- mergeDbSources(isiCollection, scopusCollection, remove.duplicated = TRUE) 33 | 34 | dim(M) 35 | 36 | } 37 | \seealso{ 38 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame. 39 | 40 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 41 | 42 | \code{\link{summary}} to obtain a summary of the results. 43 | 44 | \code{\link{plot}} to draw some useful plots of the results. 45 | } 46 | -------------------------------------------------------------------------------- /man/splitCommunities.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/splitCommunities.R 3 | \name{splitCommunities} 4 | \alias{splitCommunities} 5 | \title{Splitting Network communities} 6 | \usage{ 7 | splitCommunities(graph, n = NULL) 8 | } 9 | \arguments{ 10 | \item{graph}{is a network plot obtained by the function \code{\link{networkPlot}}.} 11 | 12 | \item{n}{is an integer. It indicates the number of vertices to plot for each community.} 13 | } 14 | \value{ 15 | It is a network object of the class \code{igraph} 16 | } 17 | \description{ 18 | \code{networkPlot} Create a network plot with separated communities. 19 | } 20 | \details{ 21 | The function \code{\link{splitCommunities}} splits communities in separated subnetworks from a bibliographic network plot previously created by \code{\link{networkPlot}}. 22 | } 23 | \examples{ 24 | # EXAMPLE Keywordd co-occurrence network 25 | 26 | data(management, package = "bibliometrixData") 27 | 28 | NetMatrix <- biblioNetwork(management, 29 | analysis = "co-occurrences", 30 | network = "keywords", sep = ";" 31 | ) 32 | 33 | net <- networkPlot(NetMatrix, 34 | n = 30, type = "auto", 35 | Title = "Co-occurrence Network", labelsize = 1, verbose = FALSE 36 | ) 37 | 38 | graph <- splitCommunities(net$graph, n = 30) 39 | 40 | } 41 | \seealso{ 42 | \code{\link{biblioNetwork}} to compute a bibliographic network. 43 | 44 | \code{\link{networkPlot}} to plot a bibliographic network. 45 | 46 | \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software. 47 | 48 | \code{\link{cocMatrix}} to compute a co-occurrence matrix. 49 | 50 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 51 | } 52 | -------------------------------------------------------------------------------- /man/idByAuthor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/idByAuthor.R 3 | \name{idByAuthor} 4 | \alias{idByAuthor} 5 | \title{Get Complete Author Information and ID from Scopus} 6 | \usage{ 7 | idByAuthor(df, api_key) 8 | } 9 | \arguments{ 10 | \item{df}{is a dataframe composed of three columns: 11 | \tabular{lll}{ 12 | \code{lastname}\tab \tab author's last name\cr 13 | \code{firstname}\tab \tab author's first name\cr 14 | \code{affiliation}\tab \tab Part of the affiliation name (university name, city, etc.)} 15 | i.e. df[1,1:3]<-c("aria","massimo","naples") 16 | When affiliation is not specified, the field df$affiliation have to be NA. 17 | i.e. df[2,1:3]<-c("cuccurullo","corrado", NA)} 18 | 19 | \item{api_key}{is a character. It contains the Elsevier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website}} 20 | } 21 | \value{ 22 | a data frame with cases corresponding to authors and variables to author's information and ID got from SCOPUS. 23 | } 24 | \description{ 25 | Uses SCOPUS API author search to identify author identification information. 26 | } 27 | \examples{ 28 | ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html 29 | # 30 | # api_key="your api key" 31 | 32 | ## create a data frame with the list of authors to get information and IDs 33 | # i.e. df[1,1:3]<-c("aria","massimo","naples") 34 | # df[2,1:3]<-c("cuccurullo","corrado", NA) 35 | 36 | ## run idByAuthor function 37 | # 38 | # authorsID <- idByAuthor(df, api_key) 39 | 40 | } 41 | \seealso{ 42 | \code{\link{retrievalByAuthorID}} for downloading the complete author bibliographic collection from SCOPUS 43 | } 44 | -------------------------------------------------------------------------------- /R/readFiles.R: -------------------------------------------------------------------------------- 1 | #' DEPRECATED: Load a sequence of ISI or SCOPUS Export files into a large character object 2 | #' 3 | #' The function readFiled is deprecated. You can import and convert your export files directly using the function \code{\link{convert2df}}. 4 | #' 5 | #' @param ... is a sequence of names of files downloaded from WOS.(in plain text or bibtex format) or SCOPUS Export file (exclusively in bibtex format). 6 | #' @return a character vector of length the number of lines read. 7 | #' 8 | #' @examples 9 | #' # WoS or SCOPUS Export files can be read using \code{\link{readFiles}} function: 10 | #' 11 | #' # largechar <- readFiles('filename1.txt','filename2.txt','filename3.txt') 12 | #' 13 | #' # filename1.txt, filename2.txt and filename3.txt are ISI or SCOPUS Export file 14 | #' # in plain text or bibtex format. 15 | #' 16 | #' # D <- readFiles('https://www.bibliometrix.org/datasets/bibliometrics_articles.txt') 17 | #' 18 | #' @seealso \code{\link{convert2df}} for converting SCOPUS of ISI Export file into a dataframe 19 | #' 20 | #' @export 21 | 22 | readFiles <- function(...) { 23 | cat("\nFrom version 3.0.0, the function readFiles has been dropped.\nPlease use the function 'convert2df' to import and convert your export files") 24 | # arguments <- unlist(list(...)) 25 | # k=length(arguments) 26 | # D=list() 27 | # enc="UTF-8" 28 | # origEnc=getOption("encoding") 29 | # if (origEnc=="UTF-8"){options(encoding = "native.enc")} 30 | # for (i in 1:k){ 31 | # D[[i]]=suppressWarnings( 32 | # iconv(readLines(arguments[i],encoding = "UTF-8"),"latin1", "ASCII", sub="") 33 | # #conv(readLines(arguments[[i]])) 34 | # ) 35 | # } 36 | # D=unlist(D) 37 | # options(encoding = origEnc) 38 | # Encoding(D) <- "UTF-8" 39 | # return(D) 40 | return(NULL) 41 | } 42 | -------------------------------------------------------------------------------- /man/keywordAssoc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/keywordAssoc.R 3 | \name{keywordAssoc} 4 | \alias{keywordAssoc} 5 | \title{ID and DE keyword associations} 6 | \usage{ 7 | keywordAssoc(M, sep = ";", n = 10, excludeKW = NA) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.} 12 | 13 | \item{sep}{is the field separator character. This character separates keywords in each string of ID and DE columns of the bibliographic data frame. The default is \code{sep = ";"}.} 14 | 15 | \item{n}{is a integer. It indicates the number of authors' keywords to associate to each keyword plus. The default is \code{n = 10}.} 16 | 17 | \item{excludeKW}{is character vector. It contains authors' keywords to exclude from the analysis.} 18 | } 19 | \value{ 20 | an object of \code{class} "list". 21 | } 22 | \description{ 23 | It associates authors' keywords to keywords plus. 24 | } 25 | \examples{ 26 | 27 | data(scientometrics, package = "bibliometrixData") 28 | 29 | KWlist <- keywordAssoc(scientometrics, sep = ";", n = 10, excludeKW = NA) 30 | 31 | # list of first 10 Keywords plus 32 | names(KWlist) 33 | 34 | # list of first 10 authors' keywords associated to the first Keyword plus 35 | KWlist[[1]][1:10] 36 | 37 | } 38 | \seealso{ 39 | \code{\link{convert2df}} to import and convert a WoS or SCOPUS Export file in a bibliographic data frame. 40 | 41 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 42 | 43 | \code{\link{summary}} to obtain a summary of the results. 44 | 45 | \code{\link{plot}} to draw some useful plots of the results. 46 | } 47 | -------------------------------------------------------------------------------- /man/dominance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dominance.R 3 | \name{dominance} 4 | \alias{dominance} 5 | \title{Authors' dominance ranking} 6 | \usage{ 7 | dominance(results, k = 10) 8 | } 9 | \arguments{ 10 | \item{results}{is an object of the class '\code{bibliometrix}' for which the analysis of the authors' dominance ranking is desired.} 11 | 12 | \item{k}{is an integer, used for table formatting (number of authors). Default value is 10.} 13 | } 14 | \value{ 15 | The function \code{dominance} returns a data frame with cases corresponding to the first \code{k} most productive authors and variables to typical field of a dominance analysis. 16 | 17 | the data frame variables are: 18 | \tabular{lll}{ 19 | \code{Author} \tab \tab Author's name\cr 20 | \code{Dominance Factor} \tab \tab Dominance Factor (DF = FAA / MAA)\cr 21 | \code{Tot Articles} \tab \tab N. of Authored Articles (TAA)\cr 22 | \code{Single Authored} \tab \tab N. of Single-Authored Articles (SAA)\cr 23 | \code{Multi Authored} \tab \tab N. of Multi-Authored Articles (MAA=TAA-SAA)\cr 24 | \code{First Authored} \tab \tab N. of First Authored Articles (FAA)\cr 25 | \code{Rank by Articles} \tab \tab Author Ranking by N. of Articles\cr 26 | \code{Rank by DF} \tab \tab Author Ranking by Dominance Factor} 27 | } 28 | \description{ 29 | It calculates the authors' dominance ranking from an object of the class '\code{bibliometrix}' as proposed by Kumar & Kumar, 2008. 30 | } 31 | \examples{ 32 | data(scientometrics, package = "bibliometrixData") 33 | results <- biblioAnalysis(scientometrics) 34 | DF <- dominance(results) 35 | DF 36 | 37 | } 38 | \seealso{ 39 | \code{\link{biblioAnalysis}} function for bibliometric analysis 40 | 41 | \code{\link{summary}} method for class '\code{bibliometrix}' 42 | } 43 | -------------------------------------------------------------------------------- /man/tableTag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tableTag.R 3 | \name{tableTag} 4 | \alias{tableTag} 5 | \title{Tabulate elements from a Tag Field column} 6 | \usage{ 7 | tableTag( 8 | M, 9 | Tag = "CR", 10 | sep = ";", 11 | ngrams = 1, 12 | remove.terms = NULL, 13 | synonyms = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}. 18 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.} 19 | 20 | \item{Tag}{is a character object. It indicates one of the field tags of the 21 | standard ISI WoS Field Tag codify.} 22 | 23 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.} 24 | 25 | \item{ngrams}{is an integer between 1 and 3. It indicates the type of n-gram to extract from titles or abstracts.} 26 | 27 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 28 | 29 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 30 | } 31 | \value{ 32 | an object of class \code{table} 33 | } 34 | \description{ 35 | It tabulates elements from a Tag Field column of a bibliographic data frame. 36 | } 37 | \details{ 38 | \code{tableTag} is an internal routine of main function \code{\link{biblioAnalysis}}. 39 | } 40 | \examples{ 41 | 42 | data(scientometrics, package = "bibliometrixData") 43 | Tab <- tableTag(scientometrics, Tag = "CR", sep = ";") 44 | Tab[1:10] 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/findAuthorWorks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findAuthorWorks.r 3 | \name{findAuthorWorks} 4 | \alias{findAuthorWorks} 5 | \title{Find Author's Co-authored Works} 6 | \usage{ 7 | findAuthorWorks(author_name, data, partial_match = TRUE, exact_match = FALSE) 8 | } 9 | \arguments{ 10 | \item{author_name}{Character. The author's name to search for (case-insensitive)} 11 | 12 | \item{data}{Data.frame. The bibliometric dataframe with AU and DI columns} 13 | 14 | \item{partial_match}{Logical. If TRUE, allows partial name matching (default: TRUE)} 15 | 16 | \item{exact_match}{Logical. If TRUE, requires exact name matching (default: FALSE)} 17 | } 18 | \value{ 19 | A data.frame with columns: 20 | \itemize{ 21 | \item doi: DOI of the work 22 | \item author_position: Numerical position of the author in the author list 23 | \item total_authors: Total number of authors in the work 24 | \item all_authors: Complete list of authors for reference 25 | \item matched_name: The exact name variant that was matched 26 | } 27 | } 28 | \description{ 29 | Searches for an author's name in a bibliometric dataframe and returns 30 | the DOIs and author positions of their co-authored works. 31 | } 32 | \details{ 33 | The function searches through the AU column which contains author names 34 | separated by semicolons. It identifies the position of the target author 35 | and returns comprehensive information about each matching work. 36 | } 37 | \examples{ 38 | \dontrun{ 39 | # Find works by "ARIA M" 40 | works <- findAuthorWorks("ARIA M", M) 41 | 42 | # Find works with exact matching 43 | works_exact <- findAuthorWorks("PESTANA MH", M, exact_match = TRUE) 44 | 45 | # Find works with partial matching disabled 46 | works_full <- findAuthorWorks("MASSIMO ARIA", M, partial_match = FALSE) 47 | } 48 | 49 | } 50 | \author{ 51 | Your Name 52 | } 53 | -------------------------------------------------------------------------------- /man/localCitations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/localCitations.R 3 | \name{localCitations} 4 | \alias{localCitations} 5 | \title{Author local citations} 6 | \usage{ 7 | localCitations(M, fast.search = FALSE, sep = ";", verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.} 12 | 13 | \item{fast.search}{is logical. If true, the function calculates local citations only for 25 percent top cited documents.} 14 | 15 | \item{sep}{is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}.} 16 | 17 | \item{verbose}{is a logical. If TRUE, results are printed on screen.} 18 | } 19 | \value{ 20 | an object of \code{class} "list" containing author local citations and document local citations. 21 | } 22 | \description{ 23 | It calculates local citations (LCS) of authors and documents of a bibliographic collection. 24 | } 25 | \details{ 26 | Local citations measure how many times an author (or a document) included in this collection have been cited by the documents also included in the collection. 27 | } 28 | \examples{ 29 | 30 | data(scientometrics, package = "bibliometrixData") 31 | 32 | CR <- localCitations(scientometrics, sep = ";") 33 | 34 | CR$Authors[1:10, ] 35 | CR$Papers[1:10, ] 36 | 37 | } 38 | \seealso{ 39 | \code{\link{citations}} function for citation frequency distribution. 40 | 41 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 42 | 43 | \code{\link{summary}} to obtain a summary of the results. 44 | 45 | \code{\link{plot}} to draw some useful plots of the results. 46 | } 47 | -------------------------------------------------------------------------------- /R/timeslice.R: -------------------------------------------------------------------------------- 1 | #' Bibliographic data frame time slice 2 | #' 3 | #' Divide a bibliographic data frame into time slice 4 | #' 5 | #' 6 | #' 7 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 8 | #' It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file. 9 | #' @param breaks is a numeric vector of two or more unique cut points. 10 | #' @param k is an integer value giving the number of intervals into which the data frame is to be cut. \code{k} is used only in case \code{breaks} argument is not provided. The default is \code{k = 5}. 11 | #' @return the value returned from \code{split} is a list containing the data frames for each sub-period. 12 | #' 13 | #' 14 | #' 15 | #' @examples 16 | #' 17 | #' data(scientometrics, package = "bibliometrixData") 18 | #' 19 | #' list_df <- timeslice(scientometrics, breaks = c(1995, 2005)) 20 | #' 21 | #' names(list_df) 22 | #' 23 | #' @seealso \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame. 24 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis. 25 | #' @seealso \code{\link{summary}} to obtain a summary of the results. 26 | #' @seealso \code{\link{plot}} to draw some useful plots of the results. 27 | #' 28 | #' @export 29 | timeslice <- function(M, breaks = NA, k = 5) { 30 | M$PY <- as.numeric(M$PY) 31 | period <- list() 32 | if (is.na(breaks[1]) & is.numeric(k)) { 33 | breaks <- (floor(seq(min(M$PY, na.rm = TRUE) - 1, max(M$PY, na.rm = TRUE), length.out = k + 1))) 34 | } else { 35 | breaks <- c(min(M$PY, na.rm = TRUE) - 1, breaks, max(M$PY, na.rm = TRUE)) 36 | } 37 | df <- cut(M$PY, breaks) 38 | N <- levels(df) 39 | ind <- as.numeric(df) 40 | df <- split(M, ind) 41 | names(df) <- N 42 | return(df) 43 | } 44 | -------------------------------------------------------------------------------- /R/cochrane2df.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont")) 2 | cochrane2df <- function(D) { 3 | D <- D[nchar(D) > 0] # remove empty rows 4 | 5 | Papers <- which(substr(D, 1, 8) == "Record #") # first row of each document 6 | nP <- length(Papers) # number of documents 7 | 8 | rowPapers <- diff(c(Papers, length(D) + 1)) 9 | 10 | numPapers <- rep(1:nP, rowPapers) 11 | 12 | DATA <- data.frame(Tag = substr(D, 1, 4), content = substr(D, 5, nchar(D)), Paper = numPapers) 13 | DATA$Tag <- gsub(" ", "", gsub(":", "", DATA$Tag)) 14 | df <- DATA %>% 15 | group_by(Paper, Tag) %>% 16 | summarise(cont = paste(content, collapse = "---", sep = "")) %>% 17 | arrange(Tag, Paper) %>% 18 | pivot_wider(names_from = Tag, values_from = cont) %>% 19 | ungroup() %>% 20 | rename( 21 | "PY" = "YR", 22 | "UT" = "ID", 23 | "ID" = "KY", 24 | "URL" = "US", 25 | "DI" = "DOI", 26 | "NR" = "NO" 27 | ) 28 | df <- as.data.frame(df) 29 | 30 | df$PY <- as.numeric(df$PY) 31 | 32 | ### replace "---" with ";" 33 | tagsComma <- c("AU", "ID") 34 | df1 <- data.frame(lapply(df[tagsComma], function(x) { 35 | gsub("---", ";", x) 36 | })) 37 | 38 | ### replace "---" with " " 39 | otherTags <- setdiff(names(df), tagsComma) 40 | df2 <- data.frame(lapply(df[otherTags], function(x) { 41 | trimES(gsub("---", " ", x)) 42 | }), stringsAsFactors = FALSE) 43 | df <- cbind(df1, df2) 44 | rm(df1, df2) 45 | 46 | df$ID <- gsub(" ;", ";", gsub("; ", ";", gsub("\\[[^\\]]*\\]", "", df$ID, perl = TRUE))) 47 | 48 | df$DB <- "COCHRANE" 49 | 50 | # Authors 51 | # df$AU <- trimES(gsub("-","",df$AU)) 52 | 53 | # Toupper 54 | DI <- df$DI 55 | df <- data.frame(lapply(df, toupper), stringsAsFactors = FALSE) 56 | df$DI <- gsub(" ", "", DI) 57 | 58 | df <- df[!(names(df) %in% c("Paper", "Reco"))] 59 | 60 | df$DE <- df$ID 61 | 62 | df$JI <- df$J9 <- df$SO 63 | 64 | return(df) 65 | } 66 | -------------------------------------------------------------------------------- /man/citations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/citations.R 3 | \name{citations} 4 | \alias{citations} 5 | \title{Citation frequency distribution} 6 | \usage{ 7 | citations(M, field = "article", sep = ";") 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics Web of Science file.} 12 | 13 | \item{field}{is a character. It can be "article" or "author" to obtain frequency distribution of cited citations or cited authors (only first authors for WoS database) respectively. The default is \code{field = "article"}.} 14 | 15 | \item{sep}{is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}.} 16 | } 17 | \value{ 18 | an object of \code{class} "list" containing the following components: 19 | 20 | \tabular{lll}{ 21 | Cited \tab \tab the most frequent cited manuscripts or authors\cr 22 | Year \tab \tab the publication year (only for cited article analysis)\cr 23 | Source \tab \tab the journal (only for cited article analysis)} 24 | } 25 | \description{ 26 | It calculates frequency distribution of citations. 27 | } 28 | \examples{ 29 | ## EXAMPLE 1: Cited articles 30 | 31 | data(scientometrics, package = "bibliometrixData") 32 | 33 | CR <- citations(scientometrics, field = "article", sep = ";") 34 | 35 | CR$Cited[1:10] 36 | CR$Year[1:10] 37 | CR$Source[1:10] 38 | 39 | ## EXAMPLE 2: Cited first authors 40 | 41 | data(scientometrics) 42 | 43 | CR <- citations(scientometrics, field = "author", sep = ";") 44 | 45 | CR$Cited[1:10] 46 | 47 | } 48 | \seealso{ 49 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 50 | 51 | \code{\link{summary}} to obtain a summary of the results. 52 | 53 | \code{\link{plot}} to draw some useful plots of the results. 54 | } 55 | -------------------------------------------------------------------------------- /man/summary.bibliometrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.bibliometrix.R 3 | \name{summary.bibliometrix} 4 | \alias{summary.bibliometrix} 5 | \title{Summarizing bibliometric analysis results} 6 | \usage{ 7 | \method{summary}{bibliometrix}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{is the object for which a summary is desired.} 11 | 12 | \item{...}{can accept two arguments:\cr 13 | \code{k} integer, used for table formatting (number of rows). Default value is 10.\cr 14 | \code{pause} logical, used to allow pause in screen scrolling of results. Default value is \code{pause = FALSE}.\cr 15 | \code{width} integer, used to define screen output width. Default value is \code{width = 120}. 16 | \code{verbose} logical, used to allow screen output. Default is TRUE.} 17 | } 18 | \value{ 19 | The function \code{summary} computes and returns a list of summary statistics of the object of class \code{bibliometrics}. 20 | 21 | the list contains the following objects: 22 | \tabular{lll}{ 23 | \code{MainInformation} \tab \tab Main Information about Data\cr 24 | \code{AnnualProduction} \tab \tab Annual Scientific Production\cr 25 | \code{AnnualGrowthRate} \tab \tab Annual Percentage Growth Rate\cr 26 | \code{MostProdAuthors} \tab \tab Most Productive Authors\cr 27 | \code{MostCitedPapers} \tab \tab Top manuscripts per number of citations\cr 28 | \code{MostProdCountries} \tab \tab Corresponding Author's Countries\cr 29 | \code{TCperCountries} \tab \tab Total Citation per Countries\cr 30 | \code{MostRelSources} \tab \tab Most Relevant Sources\cr 31 | \code{MostRelKeywords} \tab \tab Most Relevant Keywords} 32 | } 33 | \description{ 34 | \code{summary} method for class '\code{bibliometrix}' 35 | } 36 | \examples{ 37 | data(scientometrics, package = "bibliometrixData") 38 | 39 | results <- biblioAnalysis(scientometrics) 40 | 41 | summary(results) 42 | 43 | } 44 | \seealso{ 45 | \code{\link{biblioAnalysis}} function for bibliometric analysis 46 | 47 | \code{\link{plot}} to draw some useful plots of the results. 48 | } 49 | -------------------------------------------------------------------------------- /man/histNetwork.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/histNetwork.R 3 | \name{histNetwork} 4 | \alias{histNetwork} 5 | \title{Historical co-citation network} 6 | \usage{ 7 | histNetwork(M, min.citations, sep = ";", network = TRUE, verbose = TRUE) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function 11 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to 12 | manuscripts and variables to Field Tag in the original SCOPUS, OpenAlex, Lens.org and Clarivate 13 | Analytics Web of Science file.} 14 | 15 | \item{min.citations}{DEPRECATED. New algorithm does not use this parameters. It will be remove in the next version of bibliometrix.} 16 | 17 | \item{sep}{is the field separator character. This character separates strings 18 | in CR column of the data frame. The default is \code{sep = ";"}.} 19 | 20 | \item{network}{is logical. If TRUE, function calculates and returns also the direct citation network. If FALSE, 21 | the function returns only the local citation table.} 22 | 23 | \item{verbose}{is logical. If TRUE, results are printed on screen.} 24 | } 25 | \value{ 26 | \code{histNetwork} returns an object of \code{class} "list" 27 | containing the following components: 28 | 29 | \tabular{lll}{ NetMatrix \tab \tab the historical co-citation network 30 | matrix\cr histData \tab \tab the set of n most cited references\cr M 31 | \tab \tab the bibliographic data frame} 32 | } 33 | \description{ 34 | \code{histNetwork} creates a historical citation network from a bibliographic 35 | data frame. 36 | } 37 | \examples{ 38 | \dontrun{ 39 | data(management, package = "bibliometrixData") 40 | 41 | histResults <- histNetwork(management, sep = ";") 42 | } 43 | 44 | } 45 | \seealso{ 46 | \code{\link{convert2df}} to import and convert a supported 47 | export file in a bibliographic data frame. 48 | 49 | \code{\link{summary}} to obtain a summary of the results. 50 | 51 | \code{\link{plot}} to draw some useful plots of the results. 52 | 53 | \code{\link{biblioNetwork}} to compute a bibliographic network. 54 | } 55 | -------------------------------------------------------------------------------- /R/sourceGrowth.R: -------------------------------------------------------------------------------- 1 | #' Number of documents published annually per Top Sources 2 | #' 3 | #' It calculates yearly published documents of the top sources. 4 | #' 5 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}. 6 | #' It is a data matrix with cases corresponding to articles and variables to Field Tag in the original ISI or SCOPUS file. 7 | #' @param top is a numeric. It indicates the number of top sources to analyze. The default value is 5. 8 | #' @param cdf is a logical. If TRUE, the function calculates the cumulative occurrences distribution. 9 | #' @return an object of class \code{data.frame} 10 | #' @examples 11 | #' 12 | #' data(scientometrics, package = "bibliometrixData") 13 | #' topSO <- sourceGrowth(scientometrics, top = 1, cdf = TRUE) 14 | #' topSO 15 | #' 16 | #' # Plotting results 17 | #' \dontrun{ 18 | #' install.packages("reshape2") 19 | #' library(reshape2) 20 | #' library(ggplot2) 21 | #' DF <- melt(topSO, id = "Year") 22 | #' ggplot(DF, aes(Year, value, group = variable, color = variable)) + 23 | #' geom_line() 24 | #' } 25 | #' 26 | #' @export 27 | #' 28 | sourceGrowth <- function(M, top = 5, cdf = TRUE) { 29 | PY <- min(M$PY, na.rm = T):max(M$PY, na.rm = T) 30 | WSO <- cocMatrix(M, Field = "SO") 31 | if (is.null(dim(WSO))) { 32 | WSO <- cbind(WSO) 33 | colnames(WSO) <- M$SO[1] 34 | } 35 | if (top > dim(WSO)[2]) { 36 | top <- dim(WSO)[2] 37 | } 38 | 39 | M$PY <- as.character(M$PY) 40 | WPY <- cocMatrix(M, Field = "PY") 41 | i <- setdiff(PY, colnames(WPY)) 42 | if (length(i) > 0) { 43 | WPY <- cbind(WPY, matrix(0, dim(WPY)[1], length(i))) 44 | colnames(WPY)[(dim(WPY)[2] - length(i) + 1):dim(WPY)[2]] <- as.character(i) 45 | } 46 | PYSO <- Matrix::crossprod(WPY, WSO) 47 | ind <- Matrix::colSums(PYSO) 48 | deg <- sort(ind, decreasing = T)[top] 49 | sonames <- colnames(PYSO)[ind >= deg] 50 | PYSO <- as.data.frame(as.matrix(PYSO[, ind >= deg])) 51 | 52 | PYSO <- cbind(as.numeric(colnames(WPY)), PYSO) 53 | 54 | PYSO <- PYSO[order(PYSO[, 1]), ] 55 | if (isTRUE(cdf)) { 56 | PYSO[, -1] <- apply(as.data.frame(PYSO[, -1]), 2, cumsum) 57 | } 58 | names(PYSO) <- c("Year", sonames) 59 | return(PYSO) 60 | } 61 | -------------------------------------------------------------------------------- /man/KeywordGrowth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/keywordGrowth.R 3 | \name{KeywordGrowth} 4 | \alias{KeywordGrowth} 5 | \title{Yearly occurrences of top keywords/terms} 6 | \usage{ 7 | KeywordGrowth( 8 | M, 9 | Tag = "ID", 10 | sep = ";", 11 | top = 10, 12 | cdf = TRUE, 13 | remove.terms = NULL, 14 | synonyms = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}. 19 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.} 20 | 21 | \item{Tag}{is a character object. It indicates one of the keyword field tags of the 22 | standard ISI WoS Field Tag codify (ID, DE, KW_Merged) or a field tag created by \code{\link{termExtraction}} function (TI_TM, AB_TM, etc.).} 23 | 24 | \item{sep}{is the field separator character. This character separates strings in each keyword column of the data frame. The default is \code{sep = ";"}.} 25 | 26 | \item{top}{is a numeric. It indicates the number of top keywords to analyze. The default value is 10.} 27 | 28 | \item{cdf}{is a logical. If TRUE, the function calculates the cumulative occurrences distribution.} 29 | 30 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 31 | 32 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 33 | } 34 | \value{ 35 | an object of class \code{data.frame} 36 | } 37 | \description{ 38 | It calculates yearly occurrences of top keywords/terms. 39 | } 40 | \examples{ 41 | 42 | data(scientometrics, package = "bibliometrixData") 43 | topKW <- KeywordGrowth(scientometrics, Tag = "ID", sep = ";", top = 5, cdf = TRUE) 44 | topKW 45 | 46 | # Plotting results 47 | \dontrun{ 48 | install.packages("reshape2") 49 | library(reshape2) 50 | library(ggplot2) 51 | DF <- melt(topKW, id = "Year") 52 | ggplot(DF, aes(Year, value, group = variable, color = variable)) + geom_line 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /man/normalizeCitationScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalizeCItationScore.R 3 | \name{normalizeCitationScore} 4 | \alias{normalizeCitationScore} 5 | \title{Calculate the normalized citation score metric} 6 | \usage{ 7 | normalizeCitationScore(M, field = "documents", impact.measure = "local") 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.} 11 | 12 | \item{field}{is a character. It indicates the unit of analysis on which calculate the NCS. It can be equal to \code{field = c("documents", "authors", "sources")}. Default is \code{field = "documents"}.} 13 | 14 | \item{impact.measure}{is a character. It indicates the impact measure used to rank cluster elements (documents, authors or sources). 15 | It can be \code{impact.measure = c("local", "global")}.\\ 16 | With \code{impact.measure = "local"}, \link{normalizeCitationScore} calculates elements impact using the Normalized Local Citation Score while 17 | using \code{impact.measure = "global"}, the function uses the Normalized Global Citation Score to measure elements impact.} 18 | } 19 | \value{ 20 | a dataframe. 21 | } 22 | \description{ 23 | It calculates the normalized citation score for documents, authors and sources using both global and local citations. 24 | } 25 | \details{ 26 | The document Normalized Citation Score (NCS) of a document is calculated by dividing the actual count of citing items by the expected 27 | citation rate for documents with the same year of publication. 28 | 29 | The MNCS of a set of documents, for example the collected works of an individual, or published on a journal, is the average of the NCS values for all the documents in the set. 30 | 31 | The NGCS is the NCS calculated using the global citations (total citations that a document received considering the whole bibliographic database). 32 | 33 | The NLCS is the NCS calculated using the local citations (total citations that a document received from a set of documents included in the same collection). 34 | } 35 | \examples{ 36 | \dontrun{ 37 | data(management, package = "bibliometrixData") 38 | NCS <- normalizeCitationScore(management, field = "authors", impact.measure = "local") 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/networkStat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/networkStat.R 3 | \name{networkStat} 4 | \alias{networkStat} 5 | \title{Calculating network summary statistics} 6 | \usage{ 7 | networkStat(object, stat = "network", type = "degree") 8 | } 9 | \arguments{ 10 | \item{object}{is a network matrix obtained by the function \code{\link{biblioNetwork}} or an graph object of the class \code{igraph}.} 11 | 12 | \item{stat}{is a character. It indicates which statistics are to be calculated. \code{stat = "network"} calculates the statistics related to the network; 13 | \code{stat = "all"} calculates the statistics related to the network and the individual nodes that compose it. Default value is \code{stat = "network"}.} 14 | 15 | \item{type}{is a character. It indicates which centrality index is calculated. type values can be c("degree", "closeness", "betweenness","eigenvector","pagerank","hub","authority", "all"). Default is "degree".} 16 | } 17 | \value{ 18 | It is a list containing the following elements: 19 | \tabular{lll}{ 20 | \code{graph} \tab \tab a network object of the class \code{igraph}\cr 21 | \code{network} \tab \tab a \code{communities} a list with the main statistics of the network\cr 22 | \code{vertex} \tab \tab a data frame with the main measures of centrality and prestige of vertices.\cr} 23 | } 24 | \description{ 25 | \code{networkStat} calculates main network statistics. 26 | } 27 | \details{ 28 | The function \code{\link{networkStat}} can calculate the main network statistics from a bibliographic network previously created by \code{\link{biblioNetwork}}. 29 | } 30 | \examples{ 31 | # EXAMPLE Co-citation network 32 | 33 | # to run the example, please remove # from the beginning of the following lines 34 | # data(scientometrics, package = "bibliometrixData") 35 | 36 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation", 37 | # network = "references", sep = ";") 38 | 39 | # netstat <- networkStat(NetMatrix, stat = "all", type = "degree") 40 | 41 | } 42 | \seealso{ 43 | \code{\link{biblioNetwork}} to compute a bibliographic network. 44 | 45 | \code{\link{cocMatrix}} to compute a co-occurrence matrix. 46 | 47 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 48 | } 49 | -------------------------------------------------------------------------------- /R/net2VOSviewer.R: -------------------------------------------------------------------------------- 1 | #' Open a bibliometrix network in VosViewer 2 | #' 3 | #' \code{net2VOSviewer} plots a network created with \code{\link{networkPlot}} using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman. 4 | #' 5 | #' The function \code{\link{networkPlot}} can plot a bibliographic network previously created by \code{\link{biblioNetwork}}. 6 | #' The network map can be plotted using internal R routines or using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman. 7 | #' 8 | #' 9 | #' @param net is an object created by networkPlot function. 10 | #' @param vos.path is a character indicating the full path where VOSviewer.jar is located. 11 | #' @return It write a .net file that can be open in VOSviewer 12 | #' 13 | #' @examples 14 | #' # EXAMPLE 15 | #' 16 | #' # VOSviewer.jar have to be present in the working folder 17 | #' 18 | #' # data(scientometrics, package = "bibliometrixData") 19 | #' 20 | #' # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation", 21 | #' # network = "references", sep = ";") 22 | #' 23 | #' # net <- networkPlot(NetMatrix, n = 30, type = "kamada", Title = "Co-Citation",labelsize=0.5) 24 | #' 25 | #' # net2VOSviewer(net) 26 | #' 27 | #' @seealso \code{\link{biblioNetwork}} to compute a bibliographic network. 28 | #' @seealso \code{\link{networkPlot}} to create and plot a network object 29 | #' 30 | #' @export 31 | 32 | 33 | net2VOSviewer <- function(net, vos.path = NULL) { 34 | net <- net$graph_pajek 35 | V(net)$id <- V(net)$name 36 | 37 | if (is.null(vos.path)) { 38 | vos.path <- getwd() 39 | } 40 | if (sum(dir(vos.path) %in% "VOSviewer.jar") == 0) { 41 | cat( 42 | paste( 43 | "VOSviewer.jar does not exist in the path", 44 | vos.path, 45 | "\n\nPlese download it from https://www.vosviewer.com/download", 46 | "\n(Java version for other systems)\n" 47 | ) 48 | ) 49 | } else { 50 | netfile <- paste(vos.path, "/", "vosnetwork.net", sep = "") 51 | VOScommand <- paste("java -jar ", 52 | vos.path, 53 | "/", 54 | "VOSviewer.jar -pajek_network ", 55 | netfile, 56 | sep = "" 57 | ) 58 | write.graph( 59 | graph = net, 60 | file = netfile, 61 | format = "pajek" 62 | ) 63 | system(VOScommand, wait = FALSE) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /R/splitCommunities.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("group", "size")) 2 | #' Splitting Network communities 3 | #' 4 | #' \code{networkPlot} Create a network plot with separated communities. 5 | #' 6 | #' The function \code{\link{splitCommunities}} splits communities in separated subnetworks from a bibliographic network plot previously created by \code{\link{networkPlot}}. 7 | #' 8 | #' @param graph is a network plot obtained by the function \code{\link{networkPlot}}. 9 | #' @param n is an integer. It indicates the number of vertices to plot for each community. 10 | #' @return It is a network object of the class \code{igraph} 11 | #' 12 | #' 13 | #' @examples 14 | #' # EXAMPLE Keywordd co-occurrence network 15 | #' 16 | #' data(management, package = "bibliometrixData") 17 | #' 18 | #' NetMatrix <- biblioNetwork(management, 19 | #' analysis = "co-occurrences", 20 | #' network = "keywords", sep = ";" 21 | #' ) 22 | #' 23 | #' net <- networkPlot(NetMatrix, 24 | #' n = 30, type = "auto", 25 | #' Title = "Co-occurrence Network", labelsize = 1, verbose = FALSE 26 | #' ) 27 | #' 28 | #' graph <- splitCommunities(net$graph, n = 30) 29 | #' 30 | #' @seealso \code{\link{biblioNetwork}} to compute a bibliographic network. 31 | #' @seealso \code{\link{networkPlot}} to plot a bibliographic network. 32 | #' @seealso \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software. 33 | #' @seealso \code{\link{cocMatrix}} to compute a co-occurrence matrix. 34 | #' @seealso \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 35 | #' 36 | #' @export 37 | splitCommunities <- function(graph, n = NULL) { 38 | df <- data.frame(label = V(graph)$name, size = V(graph)$deg, group = V(graph)$community) 39 | 40 | if (!is.null(n)) { 41 | labels <- df %>% 42 | group_by(group) %>% 43 | top_n(n = n, wt = size) %>% 44 | as.data.frame() 45 | } else { 46 | labels <- df %>% 47 | group_by(group) %>% 48 | as.data.frame() 49 | } 50 | 51 | # remove inter-cluster edges 52 | ind <- which(E(graph)$color == adjustcolor("gray70", alpha.f = graph$alpha / 2)) 53 | coGraph <- igraph::delete_edges(graph, E(graph)[ind]) 54 | ind <- which(V(coGraph)$name %in% labels$label) 55 | V(coGraph)$label[-ind] <- "" 56 | igraph::graph_attr(coGraph, "layout") <- igraph::layout_with_fr(coGraph) 57 | # plot(coGraph) 58 | return(coGraph) 59 | } 60 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bibliometrix 2 | Type: Package 3 | Title: Comprehensive Science Mapping Analysis 4 | Version: 5.2.1.9000 5 | Authors@R: c( 6 | person(given = "Massimo", 7 | family = "Aria", 8 | role = c("cre", "aut", "cph"), 9 | email = "aria@unina.it", 10 | comment = c(ORCID = "0000-0002-8517-9411")), 11 | person(given = "Corrado", 12 | family = "Cuccurullo", 13 | role = "aut", 14 | email = "cuccurullocorrado@gmail.com", 15 | comment = c(ORCID = "0000-0002-7401-8575"))) 16 | Description: Tool for quantitative research in scientometrics and bibliometrics. 17 | It implements the comprehensive workflow for science mapping analysis proposed in Aria M. and 18 | Cuccurullo C. (2017) . 19 | 'bibliometrix' provides various routines for importing bibliographic data from 'SCOPUS', 20 | 'Clarivate Analytics Web of Science' (), 'Digital Science Dimensions' 21 | (), 'OpenAlex' (), 'Cochrane Library' (), 'Lens' (), 22 | and 'PubMed' () databases, performing bibliometric analysis 23 | and building networks for co-citation, coupling, scientific collaboration and co-word analysis. 24 | License: GPL-3 25 | URL: https://www.bibliometrix.org, https://github.com/massimoaria/bibliometrix, https://www.k-synth.com 26 | BugReports: https://github.com/massimoaria/bibliometrix/issues 27 | LazyData: true 28 | Encoding: UTF-8 29 | Depends: R (>= 3.3.0) 30 | Imports: stats, 31 | grDevices, 32 | bibliometrixData, 33 | contentanalysis, 34 | dimensionsR, 35 | dplyr, 36 | DT, 37 | ca, 38 | forcats, 39 | ggplot2, 40 | ggrepel, 41 | igraph, 42 | Matrix, 43 | plotly, 44 | openalexR, 45 | openxlsx, 46 | pubmedR, 47 | purrr, 48 | readr, 49 | readxl, 50 | rscopus, 51 | shiny, 52 | shinycssloaders (>= 1.1.0), 53 | SnowballC, 54 | stringdist, 55 | stringi, 56 | stringr, 57 | tibble, 58 | tidyr, 59 | tidytext, 60 | visNetwork 61 | Suggests: 62 | knitr, 63 | rmarkdown, 64 | testthat (>= 3.0.0), 65 | wordcloud2 66 | RoxygenNote: 7.3.3 67 | NeedsCompilation: no 68 | Config/testthat/edition: 3 69 | -------------------------------------------------------------------------------- /man/duplicatedMatching.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duplicatedMatching.R 3 | \name{duplicatedMatching} 4 | \alias{duplicatedMatching} 5 | \title{Searching of duplicated records in a bibliographic database} 6 | \usage{ 7 | duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95) 8 | } 9 | \arguments{ 10 | \item{M}{is the bibliographic data frame.} 11 | 12 | \item{Field}{is a character object. It indicates one of the field tags used to identify duplicated records. Field can be equal to one of these tags: TI (title), AB (abstract), UT (manuscript ID).} 13 | 14 | \item{exact}{is logical. If exact = TRUE the function searches duplicates using exact matching. If exact=FALSE, 15 | the function uses the restricted Damerau-Levenshtein distance to find duplicated documents.} 16 | 17 | \item{tol}{is a numeric value giving the minimum relative similarity to match two manuscripts. Default value is \code{tol = 0.95}. 18 | To use the restricted Damerau-Levenshtein distance, exact argument has to be set as FALSE.} 19 | } 20 | \value{ 21 | the value returned from \code{duplicatedMatching} is a data frame without duplicated records. 22 | } 23 | \description{ 24 | Search duplicated records in a dataframe. 25 | } 26 | \details{ 27 | A bibliographic data frame is obtained by the converting function \code{\link{convert2df}}. 28 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file. 29 | The function identifies duplicated records in a bibliographic data frame and deletes them. 30 | Duplicate entries are identified through the restricted Damerau-Levenshtein distance. 31 | Two manuscripts that have a relative similarity measure greater than \code{tol} argument are stored in the output data frame only once. 32 | } 33 | \examples{ 34 | 35 | data(scientometrics, package = "bibliometrixData") 36 | 37 | M <- rbind(scientometrics[1:20, ], scientometrics[10:30, ]) 38 | 39 | newM <- duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95) 40 | 41 | dim(newM) 42 | 43 | } 44 | \seealso{ 45 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame. 46 | 47 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 48 | 49 | \code{\link{summary}} to obtain a summary of the results. 50 | 51 | \code{\link{plot}} to draw some useful plots of the results. 52 | } 53 | -------------------------------------------------------------------------------- /man/histPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/histPlot.R 3 | \name{histPlot} 4 | \alias{histPlot} 5 | \title{Plotting historical co-citation network} 6 | \usage{ 7 | histPlot( 8 | histResults, 9 | n = 20, 10 | size = 5, 11 | labelsize = 5, 12 | remove.isolates = TRUE, 13 | title_as_label = FALSE, 14 | label = "short", 15 | verbose = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{histResults}{is an object of \code{class} "list" containing the following components: 20 | 21 | \tabular{lll}{ 22 | NetMatrix \tab \tab the historical citation network matrix\cr 23 | Degree \tab \tab the min degree of the network\cr 24 | histData \tab \tab the set of n most cited references\cr 25 | M \tab \tab the bibliographic data frame} 26 | 27 | is a network matrix obtained by the function \code{\link{histNetwork}}.} 28 | 29 | \item{n}{is integer. It defines the number of vertices to plot.} 30 | 31 | \item{size}{is an integer. It defines the point size of the vertices. Default value is 5.} 32 | 33 | \item{labelsize}{is an integer. It indicates the label size in the plot. Default is \code{labelsize=5}.} 34 | 35 | \item{remove.isolates}{is logical. If TRUE isolates vertices are not plotted.} 36 | 37 | \item{title_as_label}{is a logical. DEPRECATED} 38 | 39 | \item{label}{is a character. It indicates which label type to use as node id in the historiograph. It can be \code{label=c("short", "title", "keywords", "keywordsplus")}. 40 | Default is \code{label = "short"}.} 41 | 42 | \item{verbose}{is logical. If TRUE, results and plots are printed on screen.} 43 | } 44 | \value{ 45 | It is list containing: a network object of the class \code{igraph} and a plot object of the class \code{ggraph}. 46 | } 47 | \description{ 48 | \code{histPlot} plots a historical co-citation network. 49 | } 50 | \details{ 51 | The function \code{\link{histPlot}} can plot a historical co-citation network previously created by \code{\link{histNetwork}}. 52 | } 53 | \examples{ 54 | # EXAMPLE Citation network 55 | \dontrun{ 56 | data(management, package = "bibliometrixData") 57 | 58 | histResults <- histNetwork(management, sep = ";") 59 | 60 | net <- histPlot(histResults, n = 20, labelsize = 5) 61 | } 62 | 63 | } 64 | \seealso{ 65 | \code{\link{histNetwork}} to compute a historical co-citation network. 66 | 67 | \code{\link{cocMatrix}} to compute a co-occurrence matrix. 68 | 69 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 70 | } 71 | -------------------------------------------------------------------------------- /R/localCitations.R: -------------------------------------------------------------------------------- 1 | #' Author local citations 2 | #' 3 | #' It calculates local citations (LCS) of authors and documents of a bibliographic collection. 4 | #' 5 | #' Local citations measure how many times an author (or a document) included in this collection have been cited by the documents also included in the collection. 6 | #' 7 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 8 | #' It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file. 9 | #' @param sep is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}. 10 | #' @param fast.search is logical. If true, the function calculates local citations only for 25 percent top cited documents. 11 | #' @param verbose is a logical. If TRUE, results are printed on screen. 12 | #' @return an object of \code{class} "list" containing author local citations and document local citations. 13 | #' 14 | #' 15 | #' @examples 16 | #' 17 | #' data(scientometrics, package = "bibliometrixData") 18 | #' 19 | #' CR <- localCitations(scientometrics, sep = ";") 20 | #' 21 | #' CR$Authors[1:10, ] 22 | #' CR$Papers[1:10, ] 23 | #' 24 | #' @seealso \code{\link{citations}} function for citation frequency distribution. 25 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis. 26 | #' @seealso \code{\link{summary}} to obtain a summary of the results. 27 | #' @seealso \code{\link{plot}} to draw some useful plots of the results. 28 | #' 29 | #' @export 30 | 31 | localCitations <- function(M, fast.search = FALSE, sep = ";", verbose = FALSE) { 32 | M$TC[is.na(M$TC)] <- 0 33 | if (isTRUE(fast.search)) { 34 | loccit <- quantile(as.numeric(M$TC), 0.75, na.rm = TRUE) 35 | } else { 36 | loccit <- 1 37 | } 38 | 39 | H <- histNetwork(M, min.citations = loccit, sep = sep, network = FALSE, verbose = verbose) 40 | LCS <- H$histData 41 | M <- H$M 42 | rm(H) 43 | AU <- strsplit(M$AU, split = ";") 44 | n <- lengths(AU) 45 | 46 | df <- data.frame(AU = unlist(AU), LCS = rep(M$LCS, n)) 47 | AU <- aggregate(df$LCS, by = list(df$AU), FUN = "sum") 48 | names(AU) <- c("Author", "LocalCitations") 49 | AU <- AU[order(-AU$LocalCitations), ] 50 | 51 | if ("SR" %in% names(M)) { 52 | LCS <- data.frame(Paper = M$SR, DOI = M$DI, Year = M$PY, LCS = M$LCS, GCS = M$TC) 53 | LCS <- LCS[order(-LCS$LCS), ] 54 | } 55 | CR <- list(Authors = AU, Papers = LCS, M = M) 56 | return(CR) 57 | } 58 | -------------------------------------------------------------------------------- /R/keywordAssoc.R: -------------------------------------------------------------------------------- 1 | #' ID and DE keyword associations 2 | #' 3 | #' It associates authors' keywords to keywords plus. 4 | #' 5 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 6 | #' It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file. 7 | #' @param sep is the field separator character. This character separates keywords in each string of ID and DE columns of the bibliographic data frame. The default is \code{sep = ";"}. 8 | #' @param excludeKW is character vector. It contains authors' keywords to exclude from the analysis. 9 | #' @param n is a integer. It indicates the number of authors' keywords to associate to each keyword plus. The default is \code{n = 10}. 10 | #' @return an object of \code{class} "list". 11 | #' 12 | #' 13 | #' 14 | #' @examples 15 | #' 16 | #' data(scientometrics, package = "bibliometrixData") 17 | #' 18 | #' KWlist <- keywordAssoc(scientometrics, sep = ";", n = 10, excludeKW = NA) 19 | #' 20 | #' # list of first 10 Keywords plus 21 | #' names(KWlist) 22 | #' 23 | #' # list of first 10 authors' keywords associated to the first Keyword plus 24 | #' KWlist[[1]][1:10] 25 | #' 26 | #' @seealso \code{\link{convert2df}} to import and convert a WoS or SCOPUS Export file in a bibliographic data frame. 27 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis. 28 | #' @seealso \code{\link{summary}} to obtain a summary of the results. 29 | #' @seealso \code{\link{plot}} to draw some useful plots of the results. 30 | #' 31 | #' @export 32 | keywordAssoc <- function(M, sep = ";", n = 10, excludeKW = NA) { 33 | excludeKW <- toupper(excludeKW) 34 | 35 | WDE <- cocMatrix(M, Field = "DE", type = "sparse", sep = sep) 36 | WID <- cocMatrix(M, Field = "ID", type = "sparse", sep = sep) 37 | 38 | NetMatrix <- Matrix::crossprod(WID, WDE) 39 | if (!is.na(excludeKW)) { 40 | NetMatrix <- NetMatrix[!(row.names(NetMatrix) %in% excludeKW), !(colnames(NetMatrix) %in% excludeKW)] 41 | } 42 | NetMatrix <- NetMatrix[!is.na(row.names(NetMatrix)), !is.na(colnames(NetMatrix))] 43 | NetMatrix <- NetMatrix[nchar(row.names(NetMatrix)) > 0, nchar(colnames(NetMatrix)) > 0] 44 | rS <- Matrix::rowSums(NetMatrix) 45 | 46 | NetDegree <- sort(rS, decreasing = TRUE)[n] 47 | NET <- NetMatrix[rS >= NetDegree, Matrix::colSums(NetMatrix) > 1] 48 | KW <- apply(NET, 1, function(x) { 49 | i <- sort(x, decreasing = TRUE)[n] 50 | x <- sort(x[x >= i], decreasing = TRUE) 51 | return(x) 52 | }) 53 | 54 | return(KW) 55 | } 56 | -------------------------------------------------------------------------------- /man/fieldByYear.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fieldByYear.R 3 | \name{fieldByYear} 4 | \alias{fieldByYear} 5 | \title{Field Tag distribution by Year} 6 | \usage{ 7 | fieldByYear( 8 | M, 9 | field = "ID", 10 | timespan = NULL, 11 | min.freq = 2, 12 | n.items = 5, 13 | labelsize = NULL, 14 | remove.terms = NULL, 15 | synonyms = NULL, 16 | dynamic.plot = FALSE, 17 | graph = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.} 22 | 23 | \item{field}{is a character object. It indicates one of the field tags of the 24 | standard ISI WoS Field Tag codify.} 25 | 26 | \item{timespan}{is a vector with the min and max year. If it is = NULL, the analysis is performed on the entire period. Default is \code{timespan = NULL}.} 27 | 28 | \item{min.freq}{is an integer. It indicates the min frequency of the items to include in the analysis} 29 | 30 | \item{n.items}{is an integer. I indicates the maximum number of items per year to include in the plot.} 31 | 32 | \item{labelsize}{is deprecated argument. It will be removed in the next update.} 33 | 34 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 35 | 36 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 37 | 38 | \item{dynamic.plot}{is a logical. If TRUE plot aesthetics are optimized for plotly package.} 39 | 40 | \item{graph}{is logical. If TRUE the function plots Filed Tag distribution by Year graph. Default is \code{graph = TRUE}.} 41 | } 42 | \value{ 43 | The function \code{fieldByYear} returns a list containing threeobjects: 44 | \tabular{lll}{ 45 | \code{df} \tab \tab is a data frame\cr 46 | \code{df_graph}\tab \tab is a data frame with data used to build the graph\cr 47 | \code{graph} \tab \tab a ggplot object} 48 | } 49 | \description{ 50 | It calculates the median year for each item of a field tag. 51 | } 52 | \examples{ 53 | data(management, package = "bibliometrixData") 54 | timespan <- c(2005, 2015) 55 | res <- fieldByYear(management, 56 | field = "ID", timespan = timespan, 57 | min.freq = 5, n.items = 5, graph = TRUE 58 | ) 59 | 60 | } 61 | \seealso{ 62 | \code{\link{biblioAnalysis}} function for bibliometric analysis 63 | 64 | \code{\link{summary}} method for class '\code{bibliometrix}' 65 | } 66 | -------------------------------------------------------------------------------- /man/metaTagExtraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metaTagExtraction.R 3 | \name{metaTagExtraction} 4 | \alias{metaTagExtraction} 5 | \title{Meta-Field Tag Extraction} 6 | \usage{ 7 | metaTagExtraction(M, Field = "CR_AU", sep = ";", aff.disamb = TRUE) 8 | } 9 | \arguments{ 10 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.} 12 | 13 | \item{Field}{is a character object. New tag extracted from aggregated data is specified by this string. 14 | Field can be equal to one of these tags: 15 | \tabular{lll}{ 16 | \code{"CR_AU"}\tab \tab First Author of each cited reference\cr 17 | \code{"CR_SO"}\tab \tab Source of each cited reference\cr 18 | \code{"AU_CO"}\tab \tab Country of affiliation for co-authors \cr 19 | \code{"AU1_CO"}\tab \tab Country of affiliation for the first author\cr 20 | \code{"AU_UN"}\tab \tab University of affiliation for each co-author and the corresponding author (AU1_UN)\cr 21 | \code{"SR"}\tab \tab Short tag of the document (as used in reference lists)}} 22 | 23 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.} 24 | 25 | \item{aff.disamb}{is a logical. If TRUE and Field="AU_UN", then a disambiguation algorithm is used to identify and match scientific affiliations 26 | (univ, research centers, etc.). The default is \code{aff.disamb=TRUE}.} 27 | } 28 | \value{ 29 | the bibliometric data frame with a new column containing data about new field tag indicated in the argument \code{Field}. 30 | } 31 | \description{ 32 | It extracts other field tags, different from the standard WoS/SCOPUS codify. 33 | } 34 | \examples{ 35 | # Example 1: First Authors for each cited reference 36 | 37 | data(scientometrics, package = "bibliometrixData") 38 | scientometrics <- metaTagExtraction(scientometrics, Field = "CR_AU", sep = ";") 39 | unlist(strsplit(scientometrics$CR_AU[1], ";")) 40 | 41 | 42 | # Example 2: Source for each cited reference 43 | 44 | data(scientometrics) 45 | scientometrics <- metaTagExtraction(scientometrics, Field = "CR_SO", sep = ";") 46 | unlist(strsplit(scientometrics$CR_SO[1], ";")) 47 | 48 | # Example 3: Affiliation country for co-authors 49 | 50 | data(scientometrics) 51 | scientometrics <- metaTagExtraction(scientometrics, Field = "AU_CO", sep = ";") 52 | scientometrics$AU_CO[1:10] 53 | 54 | } 55 | \seealso{ 56 | \code{\link{convert2df}} for importing and converting bibliographic files into a data frame. 57 | 58 | \code{\link{biblioAnalysis}} function for bibliometric analysis 59 | } 60 | -------------------------------------------------------------------------------- /man/rpys.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rpys.R 3 | \name{rpys} 4 | \alias{rpys} 5 | \title{Reference Publication Year Spectroscopy} 6 | \usage{ 7 | rpys(M, sep = ";", timespan = NULL, median.window = "centered", graph = T) 8 | } 9 | \arguments{ 10 | \item{M}{is a data frame obtained by the converting function 11 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to 12 | articles and variables to Field Tag in the original ISI or SCOPUS file.} 13 | 14 | \item{sep}{is the cited-references separator character. This character separates cited-references in the CR 15 | column of the data frame. The default is \code{sep = ";"}.} 16 | 17 | \item{timespan}{is a numeric vector c(min year,max year). The default value is NULL (the entire timespan is considered).} 18 | 19 | \item{median.window}{is a character string that can be "centered" or "backward". It indicates the type of median to be used. 20 | "centered" is the default value and it uses the centered 5-year median (t-2 to t+2) as proposed by Marx et al. (2014). "backward" uses the backward 5-year median (t-4 to t) as proposed by Aria and Cuccurullo (2017).} 21 | 22 | \item{graph}{is a logical. If TRUE the function plot the spectroscopy otherwise the plot is created but not drawn down.} 23 | } 24 | \value{ 25 | a list containing the spectroscopy (class ggplot2) and three dataframes with the number of citations 26 | per year, the list of the cited references for each year, and the reference list with citations recorded year by year, respectively. 27 | } 28 | \description{ 29 | \code{rpys} computes a Reference Publication Year Spectroscopy for detecting 30 | the Historical Roots of Research Fields. 31 | The method was introduced by Marx et al., 2014.\cr\cr 32 | } 33 | \details{ 34 | References:\cr\cr 35 | Marx, W., Bornmann, L., Barth, A., & Leydesdorff, L. (2014). 36 | Detecting the historical roots of research fields by reference publication 37 | year spectroscopy (RPYS). Journal of the Association for Information Science and Technology, 38 | 65(4), 751-764.\cr\cr 39 | Thor A., Bornmann L., Mark W. & Mutz R.(2018). 40 | Identifying single influential publications in a research field: new analysis opportunities of the CRExplorer. 41 | Scientometrics, 116:591–608 https://doi.org/10.1007/s11192-018-2733-7\cr\cr 42 | } 43 | \examples{ 44 | 45 | \dontrun{ 46 | data(management, package = "bibliometrixData") 47 | res <- rpys(management, sep = ";", graph = TRUE) 48 | } 49 | 50 | } 51 | \seealso{ 52 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS 53 | Export file in a data frame. 54 | 55 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 56 | 57 | \code{\link{biblioNetwork}} to compute a bibliographic network. 58 | } 59 | -------------------------------------------------------------------------------- /R/net2Pajek.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("id", "name")) 2 | #' Save a network graph object as Pajek files 3 | #' 4 | #' 5 | #' The function \code{\link{net2Pajek}} save a bibliographic network previously created by \code{\link{networkPlot}} as pajek files. 6 | #' 7 | #' @param net is a network graph object returned by the function \code{\link{networkPlot}}. 8 | #' @param filename is a character. It indicates the filename for Pajek export files. 9 | #' @param path is a character. It indicates the path where the files will be saved. When path="NULL, the files will be saved in the current folder. Default is NULL. 10 | #' @return The function returns no object but will save three Pajek files in the folder given in the "path" argument with the name "filename.clu," "filename.vec," and "filename.net." 11 | #' 12 | #' @examples 13 | #' \dontrun{ 14 | #' data(management, package = "bibliometrixData") 15 | #' 16 | #' NetMatrix <- biblioNetwork(management, 17 | #' analysis = "co-occurrences", 18 | #' network = "keywords", sep = ";" 19 | #' ) 20 | #' 21 | #' net <- networkPlot(NetMatrix, n = 30, type = "auto", Title = "Co-occurrence Network", labelsize = 1) 22 | #' 23 | #' net2Pajek(net, filename = "pajekfiles", path = NULL) 24 | #' } 25 | #' @seealso \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software. 26 | #' 27 | #' @export 28 | net2Pajek <- function(net, filename = "my_pajek_network", path = NULL) { 29 | graph <- net$graph 30 | 31 | nodes <- igraph::as_data_frame(graph, what = c("vertices")) %>% 32 | mutate(id = row_number()) 33 | 34 | edges <- igraph::as_data_frame(graph, what = c("edges")) 35 | edges <- edges %>% 36 | left_join(nodes %>% select(id, name), by = c("from" = "name")) %>% 37 | rename(id_from = id) %>% 38 | left_join(nodes %>% select(id, name), by = c("to" = "name")) %>% 39 | rename(id_to = id) 40 | 41 | ### Creation of NET file 42 | if (!is.null(path)) { 43 | if (substr(path, nchar(path), nchar(path)) != "/") { 44 | path <- paste0(path, "/") 45 | } 46 | } 47 | filename <- paste0(path, filename) 48 | 49 | file <- paste0(filename, ".net") 50 | 51 | # Nodes 52 | write(paste0("*Vertices ", nrow(nodes)), file = file) 53 | write(paste0(nodes$id, ' "', nodes$name, '"'), file = file, append = T) 54 | 55 | # Edges 56 | write(paste0("*Edges ", nrow(nodes)), file = file, append = T) 57 | write(paste0(edges$id_from, " ", edges$id_to, " ", edges$weight), file = file, append = T) 58 | 59 | ### Creation of VEC file 60 | file <- paste0(filename, ".vec") 61 | 62 | # Nodes 63 | write(paste0("*Vertices ", nrow(nodes)), file = file) 64 | write(paste0(nodes$deg), file = file, append = T) 65 | 66 | ### Creation of CLU file 67 | file <- paste0(filename, ".clu") 68 | 69 | # Nodes 70 | write(paste0("*Vertices ", nrow(nodes)), file = file) 71 | write(paste0(nodes$community), file = file, append = T) 72 | } 73 | -------------------------------------------------------------------------------- /man/Hindex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Hindex.R 3 | \name{Hindex} 4 | \alias{Hindex} 5 | \title{h-index calculation} 6 | \usage{ 7 | Hindex(M, field = "author", elements = NULL, sep = ";", years = Inf) 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.} 12 | 13 | \item{field}{is character. It can be equal to c("author", "source"). field indicates if H-index have to be calculated for a list of authors or for a list of sources. Default 14 | value is \code{field = "author"}.} 15 | 16 | \item{elements}{is a character vector. It contains the authors' names list or the source list for which you want to calculate the H-index. When the field is 17 | "author", the argument has the form C("SURNAME1 N","SURNAME2 N",...), in other words, for each author: surname and initials separated by one blank space. If elements=NULL, the function calculates impact indices for all elements contained in the data frame. 18 | i.e for the authors SEMPRONIO TIZIO CAIO and ARIA MASSIMO \code{elements} argument is \code{elements = c("SEMPRONIO TC", "ARIA M")}.} 19 | 20 | \item{sep}{is the field separator character. This character separates authors in each string of AU column of the bibliographic data frame. The default is \code{sep = ";"}.} 21 | 22 | \item{years}{is a integer. It indicates the number of years to consider for Hindex calculation. Default is Inf.} 23 | } 24 | \value{ 25 | an object of \code{class} "list". It contains two elements: H is a data frame with h-index, g-index and m-index for each author; CitationList is a list with the bibliographic collection for each author. 26 | } 27 | \description{ 28 | It calculates the authors' h-index and its variants. 29 | } 30 | \examples{ 31 | 32 | ### EXAMPLE 1: ### 33 | 34 | data(scientometrics, package = "bibliometrixData") 35 | 36 | authors <- c("SMALL H", "CHEN DZ") 37 | 38 | Hindex(scientometrics, field = "author", elements = authors, sep = ";")$H 39 | 40 | Hindex(scientometrics, field = "source", elements = "SCIENTOMETRICS", sep = ";")$H 41 | 42 | ### EXAMPLE 2: Garfield h-index### 43 | 44 | data(garfield, package = "bibliometrixData") 45 | 46 | indices <- Hindex(garfield, field = "author", elements = "GARFIELD E", years = Inf, sep = ";") 47 | 48 | # h-index, g-index and m-index of Eugene Garfield 49 | indices$H 50 | 51 | # Papers and total citations 52 | head(indices$CitationList[[1]]) 53 | 54 | } 55 | \seealso{ 56 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame. 57 | 58 | \code{\link{biblioAnalysis}} function for bibliometric analysis. 59 | 60 | \code{\link{summary}} to obtain a summary of the results. 61 | 62 | \code{\link{plot}} to draw some useful plots of the results. 63 | } 64 | -------------------------------------------------------------------------------- /R/pubmed2df.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont")) 2 | pubmed2df <- function(D) { 3 | D <- D[nchar(D) > 0] # remove empty rows 4 | 5 | for (i in 1:length(D)) { 6 | if (substr(D[i], 1, 4) == " ") substr(D[i], 1, 4) <- substr(D[i - 1], 1, 4) 7 | } 8 | 9 | Papers <- which(regexpr("PMID-", D) == 1) # first row of each document 10 | nP <- length(Papers) # number of docuemnts 11 | 12 | rowPapers <- diff(c(Papers, length(D) + 1)) 13 | 14 | numPapers <- rep(1:nP, rowPapers) 15 | 16 | DATA <- data.frame(Tag = substr(D, 1, 4), content = substr(D, 7, nchar(D)), Paper = numPapers) 17 | DATA$Tag <- gsub(" ", "", DATA$Tag) 18 | df <- DATA %>% 19 | group_by(Paper, Tag) %>% 20 | summarise(cont = paste(content, collapse = "---", sep = "")) %>% 21 | arrange(Tag, Paper) %>% 22 | pivot_wider(names_from = Tag, values_from = cont) %>% 23 | ungroup() 24 | df <- as.data.frame(df) 25 | 26 | # rename field tags 27 | error <- 0 28 | old_labs <- c("AD", "AUID", "FAU", "IS", "IP", "SO", "JT", "TA", "MH", "PG", "PT", "VI", "DP") 29 | new_labs <- c("C1", "OI", "AF", "SN", "IS", "SO2", "SO", "J9", "DE", "PP", "DT", "VL", "PY") 30 | lab <- names(df) 31 | for (j in 1:length(old_labs)) { 32 | i <- which(lab %in% old_labs[j]) 33 | if (length(i) > 0) { 34 | lab[i] <- new_labs[j] 35 | } else { 36 | error <- 1 37 | } 38 | } 39 | names(df) <- lab 40 | if (error == 1) { 41 | cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n 42 | Please, take a look at the vignettes: 43 | - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html) 44 | - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)\n\n") 45 | } 46 | 47 | # extract DOIs 48 | df$DI <- trimws(unlist(lapply(strsplit(df$LID, "\\["), "[", 1))) 49 | df$PY <- as.numeric(substr(df$PY, 1, 4)) 50 | 51 | 52 | ### replace "---" with ";" 53 | tagsComma <- c("AU", "AF", "DE", "AID", "OT", "PHST", "DT") 54 | nolab <- setdiff(tagsComma, names(df)) 55 | tagsComma <- tagsComma[(!(tagsComma %in% nolab))] 56 | 57 | df1 <- data.frame(lapply(df[tagsComma], function(x) { 58 | gsub("---", ";", x) 59 | })) 60 | 61 | ### replace "---" with " " 62 | otherTags <- setdiff(names(df), tagsComma) 63 | df2 <- data.frame(lapply(df[otherTags], function(x) { 64 | trimES(gsub("---", " ", x)) 65 | })) 66 | df <- cbind(df1, df2) 67 | rm(df1, df2) 68 | 69 | df$DB <- "PUBMED" 70 | 71 | # remove * char from keywords 72 | df$DE <- df$ID <- gsub("\\*", "", df$DE) 73 | AB <- df$AB 74 | TI <- df$TI 75 | DE <- df$DE 76 | df <- data.frame(lapply(df, toupper)) 77 | df$AB_raw <- AB 78 | df$TI_raw <- TI 79 | df$DE_raw <- DE 80 | # add sep ; to affiliations 81 | df$C1 <- gsub("\\.", ".;", df$C1) 82 | df$RP <- NA 83 | df <- df[names(df) != "Paper"] 84 | 85 | return(df) 86 | } 87 | -------------------------------------------------------------------------------- /R/idByAuthor.R: -------------------------------------------------------------------------------- 1 | #' Get Complete Author Information and ID from Scopus 2 | #' 3 | #' Uses SCOPUS API author search to identify author identification information. 4 | #' 5 | #' @param df is a dataframe composed of three columns: 6 | #' \tabular{lll}{ 7 | #' \code{lastname}\tab \tab author's last name\cr 8 | #' \code{firstname}\tab \tab author's first name\cr 9 | #' \code{affiliation}\tab \tab Part of the affiliation name (university name, city, etc.)} 10 | #' i.e. df[1,1:3]<-c("aria","massimo","naples") 11 | #' When affiliation is not specified, the field df$affiliation have to be NA. 12 | #' i.e. df[2,1:3]<-c("cuccurullo","corrado", NA) 13 | #' @param api_key is a character. It contains the Elsevier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website} 14 | #' @return a data frame with cases corresponding to authors and variables to author's information and ID got from SCOPUS. 15 | #' @examples 16 | #' ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html 17 | #' # 18 | #' # api_key="your api key" 19 | #' 20 | #' ## create a data frame with the list of authors to get information and IDs 21 | #' # i.e. df[1,1:3]<-c("aria","massimo","naples") 22 | #' # df[2,1:3]<-c("cuccurullo","corrado", NA) 23 | #' 24 | #' ## run idByAuthor function 25 | #' # 26 | #' # authorsID <- idByAuthor(df, api_key) 27 | #' 28 | #' @seealso \code{\link{retrievalByAuthorID}} for downloading the complete author bibliographic collection from SCOPUS 29 | #' 30 | #' @export 31 | 32 | idByAuthor <- function(df, api_key) { 33 | n <- dim(df)[1] 34 | 35 | ### download authors' info 36 | AU_ID <- NA 37 | AU_AFF <- NA 38 | AU_count <- NA 39 | 40 | for (j in 1:n) { 41 | lastname <- tolower(df[j, 1]) 42 | firstname <- tolower(df[j, 2]) 43 | if (!is.na(df[j, 3])) { 44 | query <- paste("affil(", df[j, 3], ")", sep = "") 45 | } else { 46 | query <- NULL 47 | } 48 | cat("\nSearching author's info: ", toupper(df[j, 1]), toupper(df[j, 2])) 49 | 50 | AU_info <- get_complete_author_info(last_name = lastname, first_name = firstname, api_key = api_key, query = query) 51 | 52 | ### author id 53 | if (AU_info$content$`search-results`$`opensearch:totalResults` != 0) { 54 | AU_ID[j] <- AU_info[[2]]$`search-results`$entr[[1]]$`dc:identifier` 55 | AU_ID[j] <- gsub("AUTHOR_ID:", "", AU_ID[j]) 56 | AU_info2 <- AU_info[[2]] 57 | aff <- AU_info2$`search-results`$entry[[1]]$`affiliation-current` 58 | AU_AFF[j] <- paste(aff$`affiliation-name`, ", ", aff$`affiliation-city`, ", ", aff$`affiliation-country`, sep = "") 59 | ### author document counts 60 | AU_count[j] <- AU_info[[2]]$`search-results`$entr[[1]]$`document-count` 61 | } else { 62 | AU_ID[j] <- NA 63 | AU_AFF[j] <- NA 64 | AU_count[j] <- NA 65 | } 66 | } 67 | authorsID <- data.frame(lastname = df[, 1], firstname = df[, 2], id = AU_ID, affiliation = AU_AFF, count = AU_count) 68 | return(authorsID) 69 | } 70 | -------------------------------------------------------------------------------- /R/duplicatedMatching.R: -------------------------------------------------------------------------------- 1 | #' Searching of duplicated records in a bibliographic database 2 | #' 3 | #' Search duplicated records in a dataframe. 4 | #' 5 | #' A bibliographic data frame is obtained by the converting function \code{\link{convert2df}}. 6 | #' It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file. 7 | #' The function identifies duplicated records in a bibliographic data frame and deletes them. 8 | #' Duplicate entries are identified through the restricted Damerau-Levenshtein distance. 9 | #' Two manuscripts that have a relative similarity measure greater than \code{tol} argument are stored in the output data frame only once. 10 | #' 11 | #' @param M is the bibliographic data frame. 12 | #' @param Field is a character object. It indicates one of the field tags used to identify duplicated records. Field can be equal to one of these tags: TI (title), AB (abstract), UT (manuscript ID). 13 | #' @param exact is logical. If exact = TRUE the function searches duplicates using exact matching. If exact=FALSE, 14 | #' the function uses the restricted Damerau-Levenshtein distance to find duplicated documents. 15 | #' @param tol is a numeric value giving the minimum relative similarity to match two manuscripts. Default value is \code{tol = 0.95}. 16 | #' To use the restricted Damerau-Levenshtein distance, exact argument has to be set as FALSE. 17 | #' @return the value returned from \code{duplicatedMatching} is a data frame without duplicated records. 18 | #' 19 | #' 20 | #' @examples 21 | #' 22 | #' data(scientometrics, package = "bibliometrixData") 23 | #' 24 | #' M <- rbind(scientometrics[1:20, ], scientometrics[10:30, ]) 25 | #' 26 | #' newM <- duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95) 27 | #' 28 | #' dim(newM) 29 | #' 30 | #' @seealso \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame. 31 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis. 32 | #' @seealso \code{\link{summary}} to obtain a summary of the results. 33 | #' @seealso \code{\link{plot}} to draw some useful plots of the results. 34 | #' 35 | #' @export 36 | duplicatedMatching <- function(M, Field = "TI", exact = FALSE, tol = 0.95) { 37 | if (!(Field %in% names(M))) { 38 | cat("\nField", Field, "is not a valid column name of your bibliographic data frame\n ") 39 | return(M) 40 | } 41 | if (isTRUE(exact)) { 42 | exact <- "true" 43 | } else { 44 | exact <- "false" 45 | } 46 | switch(exact, 47 | true = { 48 | M <- M[!duplicated(M[Field]), ] 49 | }, 50 | false = { 51 | a <- b <- M[[Field]] 52 | an <- nchar(a) 53 | A <- matrix(an, length(an), length(an)) 54 | A[is.na(A)] <- 0 55 | B <- t(A) 56 | C <- A 57 | C[B > A] <- B[B > A] 58 | D <- as.matrix(stringdistmatrix(a)) 59 | Dn <- 1 - (D / C) 60 | Dn[Dn > tol] <- 2 61 | M <- M[!duplicated(Dn), ] 62 | } 63 | ) 64 | 65 | return(M) 66 | } 67 | -------------------------------------------------------------------------------- /R/dominance.R: -------------------------------------------------------------------------------- 1 | #' Authors' dominance ranking 2 | #' 3 | #' It calculates the authors' dominance ranking from an object of the class '\code{bibliometrix}' as proposed by Kumar & Kumar, 2008. 4 | #' @param results is an object of the class '\code{bibliometrix}' for which the analysis of the authors' dominance ranking is desired. 5 | #' @param k is an integer, used for table formatting (number of authors). Default value is 10. 6 | #' @return The function \code{dominance} returns a data frame with cases corresponding to the first \code{k} most productive authors and variables to typical field of a dominance analysis. 7 | #' 8 | #' the data frame variables are: 9 | #' \tabular{lll}{ 10 | #' \code{Author} \tab \tab Author's name\cr 11 | #' \code{Dominance Factor} \tab \tab Dominance Factor (DF = FAA / MAA)\cr 12 | #' \code{Tot Articles} \tab \tab N. of Authored Articles (TAA)\cr 13 | #' \code{Single Authored} \tab \tab N. of Single-Authored Articles (SAA)\cr 14 | #' \code{Multi Authored} \tab \tab N. of Multi-Authored Articles (MAA=TAA-SAA)\cr 15 | #' \code{First Authored} \tab \tab N. of First Authored Articles (FAA)\cr 16 | #' \code{Rank by Articles} \tab \tab Author Ranking by N. of Articles\cr 17 | #' \code{Rank by DF} \tab \tab Author Ranking by Dominance Factor} 18 | #' 19 | #' 20 | #' 21 | #' @examples 22 | #' data(scientometrics, package = "bibliometrixData") 23 | #' results <- biblioAnalysis(scientometrics) 24 | #' DF <- dominance(results) 25 | #' DF 26 | #' 27 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis 28 | #' @seealso \code{\link{summary}} method for class '\code{bibliometrix}' 29 | #' 30 | #' @export 31 | 32 | dominance <- function(results, k = 10) { 33 | # Author Rank by Dominance Rank (Kumar & Kumar, 2008) 34 | 35 | # options(warn=-1) 36 | 37 | if (!inherits(results, "bibliometrix")) { 38 | cat('\n argument "results" have to be an object of class "bibliometrix"\n') 39 | return(NA) 40 | } 41 | 42 | Nmf <- table(results$FirstAuthors[results$nAUperPaper > 1]) 43 | FA <- names(Nmf) 44 | # FA=gsub(" ", "", FA, fixed = TRUE) # delete spaces 45 | 46 | AU <- names(results$Authors) 47 | 48 | 49 | Tot <- Single <- rep(NA, length(FA)) 50 | for (i in 1:length(FA)) { 51 | Single[i] <- sum(results$FirstAuthors[results$nAUperPaper == 1] == FA[i]) 52 | Tot[i] <- results$Authors[FA[i] == AU] 53 | } 54 | Dominance <- Nmf / (Tot - Single) 55 | 56 | D <- data.frame("Author" = FA, "Dominance Factor" = as.numeric(Dominance), "Articles" = Tot, "Single-Authored" = Single, "Multi-Authored" = Tot - Single, "First-Author" = as.numeric(Nmf)) 57 | D <- D[order(-D[, 3]), ] 58 | D <- D[1:k, ] 59 | D$RankbyArticles <- rank(-D$Articles, ties.method = "min") 60 | D <- D[order(-D$Dominance.Factor), ] 61 | D$RankDF <- rank(-D$Dominance.Factor, ties.method = "min") 62 | names(D) <- c("Author", "Dominance Factor", "Tot Articles", "Single-Authored", "Multi-Authored", "First-Authored", "Rank by Articles", "Rank by DF") 63 | row.names(D) <- 1:k 64 | return(D) 65 | } 66 | -------------------------------------------------------------------------------- /R/tableTag.R: -------------------------------------------------------------------------------- 1 | #' Tabulate elements from a Tag Field column 2 | #' 3 | #' It tabulates elements from a Tag Field column of a bibliographic data frame. 4 | #' 5 | #' \code{tableTag} is an internal routine of main function \code{\link{biblioAnalysis}}. 6 | #' 7 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}. 8 | #' It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file. 9 | #' @param Tag is a character object. It indicates one of the field tags of the 10 | #' standard ISI WoS Field Tag codify. 11 | #' @param sep is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}. 12 | #' @param ngrams is an integer between 1 and 3. It indicates the type of n-gram to extract from titles or abstracts. 13 | #' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}. 14 | #' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}. 15 | #' @return an object of class \code{table} 16 | #' @examples 17 | #' 18 | #' data(scientometrics, package = "bibliometrixData") 19 | #' Tab <- tableTag(scientometrics, Tag = "CR", sep = ";") 20 | #' Tab[1:10] 21 | #' 22 | #' @export 23 | tableTag <- function(M, Tag = "CR", sep = ";", ngrams = 1, remove.terms = NULL, synonyms = NULL) { 24 | ## check and remove duplicated 25 | M <- M[!duplicated(M$SR), ] 26 | 27 | if (Tag %in% c("AB", "TI")) { 28 | M <- termExtraction(M, Field = Tag, stemming = F, verbose = FALSE, ngrams = ngrams, remove.terms = remove.terms, synonyms = synonyms) 29 | i <- which(names(M) == paste(Tag, "_TM", sep = "")) 30 | remove.terms <- NULL 31 | synonyms <- NULL 32 | } else { 33 | i <- which(names(M) == Tag) 34 | } 35 | 36 | if (Tag == "C1") { 37 | M$C1 <- gsub("\\[.+?]", "", M$C1) 38 | } 39 | 40 | Tab <- unlist(strsplit(as.character(M %>% dplyr::pull(i)), sep)) 41 | 42 | ### inserted to remove punct and extra spaces #### 43 | Tab <- trimws(gsub("\\s+|\\.|\\,", " ", Tab)) 44 | #### 45 | # Tab<-Tab[Tab!=""] 46 | Tab <- Tab[nchar(Tab) > 0] 47 | 48 | # Merge synonyms in the vector synonyms 49 | if (length(synonyms) > 0 & is.character(synonyms)) { 50 | s <- strsplit(toupper(synonyms), ";") 51 | snew <- trimws(unlist(lapply(s, function(l) l[1]))) 52 | sold <- (lapply(s, function(l) trimws(l[-1]))) 53 | for (i in 1:length(s)) { 54 | Tab[Tab %in% unlist(sold[[i]])] <- snew[i] 55 | } 56 | } 57 | 58 | Tab <- sort(table(Tab), decreasing = TRUE) 59 | # remove terms from ID and DE 60 | if ((Tag %in% c("DE", "ID", "KW_Merged")) & (!is.null(remove.terms))) { 61 | term <- setdiff(names(Tab), toupper(remove.terms)) 62 | Tab <- Tab[term] 63 | } 64 | 65 | return(Tab) 66 | } 67 | -------------------------------------------------------------------------------- /R/isi2df.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont")) 2 | isi2df <- function(D) { 3 | # D <- D[nchar(D)>0] # remove empty rows 4 | 5 | # remove empty rows and strange characters 6 | res <- try(D <- D[nchar(D) > 1], silent = T) 7 | if (inherits(res, "try-error")) { 8 | D <- removeStrangeChar(D) 9 | # next 10 | } else { 11 | D <- res 12 | rm(res) 13 | } 14 | 15 | D <- D[!(substr(D, 1, 3) %in% c("FN ", "VR "))] 16 | 17 | for (i in 1:length(D)) { 18 | if (substr(D[i], 1, 3) == " ") substr(D[i], 1, 3) <- substr(D[i - 1], 1, 3) 19 | } 20 | Papers <- which(substr(D, 1, 3) == "PT ") # first row of each document 21 | nP <- length(Papers) # number of documents 22 | 23 | rowPapers <- diff(c(Papers, length(D) + 1)) 24 | 25 | numPapers <- rep(1:nP, rowPapers) 26 | 27 | DATA <- data.frame(Tag = substr(D, 1, 3), content = substr(D, 4, nchar(D)), Paper = numPapers) 28 | DATA$Tag <- gsub(" ", "", DATA$Tag) 29 | df <- DATA %>% 30 | group_by(Paper, Tag) %>% 31 | summarise(cont = paste(content, collapse = "---", sep = "")) %>% 32 | arrange(Tag, Paper) %>% 33 | pivot_wider(names_from = Tag, values_from = cont) %>% 34 | ungroup() 35 | df <- as.data.frame(df) 36 | 37 | 38 | df$PY <- as.numeric(df$PY) 39 | 40 | missingTags <- setdiff(c("AU", "DE", "C1", "RP", "CR", "PY", "SO", "TI", "TC"), names(df)) 41 | if (length(missingTags) > 0) { 42 | cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n 43 | Please, take a look at the vignettes: 44 | - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html) 45 | - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)\n\n") 46 | cat("\nMissing fields: ", missingTags, "\n") 47 | } 48 | 49 | ### replace "---" with ";" 50 | tagsComma <- c("AU", "AF", "CR") 51 | 52 | nolab <- setdiff(tagsComma, names(df)) 53 | 54 | tagsComma <- tagsComma[(!(tagsComma %in% nolab))] 55 | 56 | df1 <- data.frame(lapply(df[tagsComma], function(x) { 57 | gsub("---", ";", x) 58 | })) 59 | 60 | ### replace "---" with " " 61 | otherTags <- setdiff(names(df), tagsComma) 62 | df2 <- data.frame(lapply(df[otherTags], function(x) { 63 | trimES(gsub("---", " ", x)) 64 | })) 65 | df <- cbind(df1, df2) 66 | rm(df1, df2) 67 | 68 | ### store raw affiliation format to extract link among authors and affiliations 69 | df$C1raw <- df$C1 70 | ### 71 | 72 | df$DB <- "ISI" 73 | 74 | # Authors 75 | df$AU <- trimES(gsub(",", " ", df$AU)) 76 | 77 | # Toupper 78 | DI <- df$DI 79 | AB <- df$AB 80 | TI <- df$TI 81 | DE <- df$DE 82 | df <- data.frame(lapply(df, toupper)) 83 | df$DI <- DI 84 | df$AB_raw <- AB 85 | df$TI_raw <- TI 86 | df$DE_raw <- DE 87 | 88 | # add sep ; to affiliations 89 | df$C1 <- trim(gsub("\\[.*?\\]", "", df$C1)) # to remove author info in square brackets 90 | df$C1 <- gsub("\\.", ".;", df$C1) 91 | 92 | df <- df[names(df) != "Paper"] 93 | 94 | return(df) 95 | } 96 | -------------------------------------------------------------------------------- /man/retrievalByAuthorID.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/retrievalByAuthorID.R 3 | \name{retrievalByAuthorID} 4 | \alias{retrievalByAuthorID} 5 | \title{Get Author Content on SCOPUS by ID} 6 | \usage{ 7 | retrievalByAuthorID(id, api_key, remove.duplicated = TRUE, country = TRUE) 8 | } 9 | \arguments{ 10 | \item{id}{is a vector of characters containing the author's SCOPUS IDs. 11 | SCOPUS IDs con be obtained using the function \code{\link{idByAuthor}}.} 12 | 13 | \item{api_key}{is a character. It contains the Elsvier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website}} 14 | 15 | \item{remove.duplicated}{is logical. If TRUE duplicated documents will be deleted from the bibliographic collection.} 16 | 17 | \item{country}{is logical. If TRUE authors' country information will be downloaded from SCOPUS.} 18 | } 19 | \value{ 20 | a list containing two objects: (i) M which is a data frame with cases corresponding to articles and variables to main Field Tags named using the standard ISI WoS Field Tag codify. 21 | M includes the entire bibliographic collection downloaded from SCOPUS. 22 | The main field tags are: 23 | 24 | \tabular{lll}{ 25 | \code{AU}\tab \tab Authors\cr 26 | \code{TI}\tab \tab Document Title\cr 27 | \code{SO}\tab \tab Publication Name (or Source)\cr 28 | \code{DT}\tab \tab Document Type\cr 29 | \code{DE}\tab \tab Authors' Keywords\cr 30 | \code{ID}\tab \tab Keywords associated by SCOPUS or ISI database \cr 31 | \code{AB}\tab \tab Abstract\cr 32 | \code{C1}\tab \tab Author Address\cr 33 | \code{RP}\tab \tab Reprint Address\cr 34 | \code{TC}\tab \tab Times Cited\cr 35 | \code{PY}\tab \tab Year\cr 36 | \code{UT}\tab \tab Unique Article Identifier\cr 37 | \code{DB}\tab \tab Database\cr} 38 | (ii) authorDocuments which is a list containing a bibliographic data frame for each author. 39 | 40 | LIMITATIONS: 41 | Currently, SCOPUS API does not allow to download document references. 42 | As consequence, it is not possible to perform co-citation analysis (the field CR is empty). 43 | } 44 | \description{ 45 | Uses SCOPUS API search to get information about documents on a set of authors using SCOPUS ID. 46 | } 47 | \examples{ 48 | ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html 49 | 50 | ## api_key="your api key" 51 | 52 | ## create a data frame with the list of authors to get information and IDs 53 | # i.e. df[1,1:3] <- c("aria","massimo","naples") 54 | # df[2,1:3] <- c("cuccurullo","corrado", "naples") 55 | 56 | ## run idByAuthor function 57 | # 58 | # authorsID <- idByAuthor(df, api_key) 59 | # 60 | 61 | ## extract the IDs 62 | # 63 | # id <- authorsID[,3] 64 | # 65 | 66 | ## create the bibliographic collection 67 | # 68 | # res <- retrievalByAuthorID(id, api_key) 69 | # 70 | # M <- res$M # the entire bibliographic data frame 71 | # M <- res$authorDocuments # the list containing a bibliographic data frame for each author 72 | 73 | } 74 | \seealso{ 75 | \code{\link{idByAuthor}} for downloading author information and SCOPUS ID. 76 | } 77 | -------------------------------------------------------------------------------- /inst/biblioshiny/libraries.R: -------------------------------------------------------------------------------- 1 | # ### packages for biblishiny() 2 | libraries <- function() { 3 | all_ok <- TRUE 4 | 5 | parse_pkg <- function(pkg_str) { 6 | # Estrae nome e versione minima, se specificata 7 | matches <- regmatches( 8 | pkg_str, 9 | regexec("^([a-zA-Z0-9\\.]+)(?: \\(>= ([0-9\\.]+)\\))?$", pkg_str) 10 | )[[1]] 11 | if (length(matches) >= 2) { 12 | list( 13 | name = matches[2], 14 | min_version = ifelse(length(matches) == 3, matches[3], NA) 15 | ) 16 | } else { 17 | list(name = pkg_str, min_version = NA) 18 | } 19 | } 20 | 21 | safe_install <- function(pkg_str) { 22 | pkg_info <- parse_pkg(pkg_str) 23 | pkg <- pkg_info$name 24 | min_ver <- pkg_info$min_version 25 | 26 | need_install <- FALSE 27 | 28 | if (pkg %in% rownames(installed.packages())) { 29 | if (!is.na(min_ver)) { 30 | installed_ver <- as.character(packageVersion(pkg)) 31 | if (compareVersion(installed_ver, min_ver) < 0) { 32 | message(sprintf( 33 | "The installed version of '%s' (%s) is lower than the required (%s).", 34 | pkg, 35 | installed_ver, 36 | min_ver 37 | )) 38 | need_install <- TRUE 39 | } 40 | } 41 | } else { 42 | need_install <- TRUE 43 | } 44 | 45 | if (need_install) { 46 | install.packages(pkg) 47 | } 48 | 49 | return(require(pkg, character.only = TRUE, quietly = TRUE)) 50 | } 51 | 52 | pkgs <- c( 53 | "httr2", 54 | "base64enc", 55 | "bibliometrix", 56 | "zip", 57 | "shiny", 58 | "igraph", 59 | "DT", 60 | "stringr", 61 | "contentanalysis", 62 | "ggplot2", 63 | "wordcloud2", 64 | "ggmap", 65 | "maps", 66 | "pdftools (>= 3.6.0)", 67 | "tidytext", 68 | "visNetwork", 69 | "plotly", 70 | "fontawesome", 71 | "shinydashboardPlus", 72 | "shinydashboard", 73 | "shinyjs", 74 | "curl (>= 6.3.0)", 75 | "RCurl", 76 | "openxlsx", 77 | "shinyWidgets", 78 | "chromote", 79 | "pagedown", 80 | "Matrix", 81 | "dimensionsR", 82 | "pubmedR", 83 | "dplyr", 84 | "tidyr", 85 | "sparkline", 86 | "tidygraph", 87 | "ggraph" 88 | ) 89 | 90 | suppressPackageStartupMessages({ 91 | results <- vapply(pkgs, safe_install, logical(1)) 92 | all_ok <- all(results) 93 | }) 94 | 95 | return(all_ok) 96 | } 97 | 98 | messageItem2 <- function( 99 | from, 100 | message, 101 | icon = shiny::icon("user"), 102 | time = NULL, 103 | href = NULL, 104 | inputId = NULL 105 | ) { 106 | if (is.null(href)) { 107 | href <- "#" 108 | } 109 | shiny::tags$li(shiny::a( 110 | id = inputId, 111 | class = if (!is.null(inputId)) { 112 | "action-button" 113 | }, 114 | href = href, 115 | target = "_blank", 116 | icon, 117 | shiny::h4( 118 | from, 119 | if (!is.null(time)) { 120 | shiny::tags$small(shiny::icon("clock-o"), time) 121 | } 122 | ), 123 | shiny::p(message) 124 | )) 125 | } 126 | -------------------------------------------------------------------------------- /man/biblioAnalysis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/biblioAnalysis.R 3 | \name{biblioAnalysis} 4 | \alias{biblioAnalysis} 5 | \title{Bibliometric Analysis} 6 | \usage{ 7 | biblioAnalysis(M, sep = ";") 8 | } 9 | \arguments{ 10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}. 11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics Web of Science file.} 12 | 13 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.} 14 | } 15 | \value{ 16 | \code{biblioAnalysis} returns an object of \code{class} "bibliometrix". 17 | 18 | The functions \code{\link{summary}} and \code{\link{plot}} are used to obtain or print a summary and some useful plots of the results. 19 | 20 | An object of \code{class} "bibliometrix" is a list containing the following components: 21 | 22 | \tabular{lll}{ 23 | Articles \tab \tab the total number of manuscripts\cr 24 | Authors \tab \tab the authors' frequency distribution\cr 25 | AuthorsFrac \tab \tab the authors' frequency distribution (fractionalized)\cr 26 | FirstAuthors \tab \tab corresponding author of each manuscript\cr 27 | nAUperPaper \tab \tab the number of authors per manuscript\cr 28 | Appearances \tab \tab the number of author appearances\cr 29 | nAuthors \tab \tab the number of authors\cr 30 | AuMultiAuthoredArt \tab \tab the number of authors of multi-authored articles\cr 31 | MostCitedPapers \tab \tab the list of manuscripts sorted by citations\cr 32 | Years \tab \tab publication year of each manuscript\cr 33 | FirstAffiliation \tab \tab the affiliation of the first author\cr 34 | Affiliations \tab \tab the frequency distribution of affiliations (of all co-authors for each paper)\cr 35 | Aff_frac \tab \tab the fractionalized frequency distribution of affiliations (of all co-authors for each paper)\cr 36 | CO \tab \tab the affiliation country of the first author\cr 37 | Countries \tab \tab the affiliation countries' frequency distribution\cr 38 | CountryCollaboration \tab \tab Intra-country (SCP) and intercountry (MCP) collaboration indices\cr 39 | TotalCitation \tab \tab the number of times each manuscript has been cited\cr 40 | TCperYear \tab \tab the yearly average number of times each manuscript has been cited\cr 41 | Sources \tab \tab the frequency distribution of sources (journals, books, etc.)\cr 42 | DE \tab \tab the frequency distribution of authors' keywords\cr 43 | ID \tab \tab the frequency distribution of keywords associated to the manuscript by SCOPUS and Clarivate Analytics Web of Science database} 44 | } 45 | \description{ 46 | It performs a bibliometric analysis of a dataset imported from SCOPUS and Clarivate Analytics Web of Science databases. 47 | } 48 | \examples{ 49 | \dontrun{ 50 | data(management, package = "bibliometrixData") 51 | 52 | results <- biblioAnalysis(management) 53 | 54 | summary(results, k = 10, pause = FALSE) 55 | } 56 | 57 | } 58 | \seealso{ 59 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame. 60 | 61 | \code{\link{summary}} to obtain a summary of the results. 62 | 63 | \code{\link{plot}} to draw some useful plots of the results. 64 | } 65 | -------------------------------------------------------------------------------- /man/normalizeSimilarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalizeSimilarity.R 3 | \name{normalizeSimilarity} 4 | \alias{normalizeSimilarity} 5 | \title{Calculate similarity indices} 6 | \usage{ 7 | normalizeSimilarity(NetMatrix, type = "association") 8 | } 9 | \arguments{ 10 | \item{NetMatrix}{is a coupling matrix obtained by the network functions \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}.} 11 | 12 | \item{type}{is a character. It can be "association", "jaccard", "inclusion","salton" or "equivalence" to obtain Association Strength, Jaccard, 13 | Inclusion, Salton or Equivalence similarity index respectively. The default is \code{type = "association"}.} 14 | } 15 | \value{ 16 | a similarity matrix. 17 | } 18 | \description{ 19 | It calculates a relative measure of bibliographic co-occurrences. 20 | } 21 | \details{ 22 | \code{couplingSimilarity} calculates Association strength, Inclusion, Jaccard or Salton similarity from a co-occurrence bibliographic matrix. 23 | 24 | The association strength is used by Van Eck and Waltman (2007) and Van Eck et al. (2006). Several works refer to the measure as the proximity index, 25 | while Leydesdorff (2008)and Zitt et al. (2000) refer to it as the probabilistic affinity (or activity) index. 26 | 27 | The inclusion index, also called Simpson coefficient, is an overlap measure used in information retrieval. 28 | 29 | The Jaccard index (or Jaccard similarity coefficient) gives us a relative measure of the overlap of two sets. 30 | It is calculated as the ratio between the intersection and the union of the reference lists (of two manuscripts). 31 | 32 | The Salton index, instead, relates the intersection of the two lists to the geometric mean of the size of both sets. 33 | The square of Salton index is also called Equivalence index. 34 | 35 | The indices are equal to zero if the intersection of the reference lists is empty.\cr\cr 36 | 37 | References\cr\cr 38 | Leydesdorff, L. (2008). On the normalization and visualization of author Cocitation data: Salton's cosine versus the Jaccard index. 39 | Journal of the American Society for Information Science and Technology, 59(1), 77– 85.\cr 40 | Van Eck, N.J., Waltman, L., Van den Berg, J., & Kaymak, U. (2006). Visualizing the computational intelligence field. 41 | IEEE Computational Intelligence Magazine, 1(4), 6– 10.\cr 42 | Van Eck, N.J., & Waltman, L. (2007). Bibliometric mapping of the computational intelligence field. 43 | International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems, 15(5), 625– 645\cr. 44 | Van Eck, N. J., & Waltman, L. (2009). How to normalize cooccurrence data? An analysis of some well-known similarity measures. 45 | Journal of the American society for information science and technology, 60(8), 1635-1651.\cr 46 | Zitt, M., Bassecoulard, E., & Okubo, Y. (2000). Shadows of the past in international cooperation: 47 | Collaboration profiles of the top five producers of science. Scientometrics, 47(3), 627– 657.\cr 48 | } 49 | \examples{ 50 | 51 | data(scientometrics, package = "bibliometrixData") 52 | NetMatrix <- biblioNetwork(scientometrics, 53 | analysis = "co-occurrences", 54 | network = "keywords", sep = ";" 55 | ) 56 | S <- normalizeSimilarity(NetMatrix, type = "association") 57 | 58 | } 59 | \seealso{ 60 | \code{\link{biblioNetwork}} function to compute a bibliographic network. 61 | 62 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network. 63 | } 64 | -------------------------------------------------------------------------------- /man/authorBio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/authorBio.r 3 | \name{authorBio} 4 | \alias{authorBio} 5 | \title{Retrieve Author Biographical Information from OpenAlex} 6 | \usage{ 7 | authorBio( 8 | author_position = 1, 9 | doi = "10.1016/j.joi.2017.08.007", 10 | verbose = FALSE, 11 | return_all_authors = FALSE, 12 | sleep_time = 1, 13 | max_retries = 3, 14 | retry_delay = 2 15 | ) 16 | } 17 | \arguments{ 18 | \item{author_position}{Integer. The numerical position of the author in the authors list (default: 1)} 19 | 20 | \item{doi}{Character. DOI of the article used to identify the authors} 21 | 22 | \item{verbose}{Logical. Print informative messages during execution (default: FALSE)} 23 | 24 | \item{return_all_authors}{Logical. If TRUE, returns information for all co-authors (default: FALSE)} 25 | 26 | \item{sleep_time}{Numeric. Seconds to wait between API calls to respect rate limits (default: 1)} 27 | 28 | \item{max_retries}{Integer. Maximum number of retry attempts for failed API calls (default: 3)} 29 | 30 | \item{retry_delay}{Numeric. Base delay in seconds before retrying after an error (default: 2)} 31 | } 32 | \value{ 33 | If \code{return_all_authors = FALSE}, returns a tibble with comprehensive information 34 | about the specified author including: 35 | \itemize{ 36 | \item Basic information (name, ORCID, OpenAlex ID) 37 | \item Bibliometric indicators (works count, citations, h-index, i10-index) 38 | \item Affiliation details from both the paper and author profile 39 | \item Research topics and areas 40 | \item Paper-specific metadata (corresponding author status, position type) 41 | } 42 | If \code{return_all_authors = TRUE}, returns a list of tibbles, one for each co-author. 43 | } 44 | \description{ 45 | This function downloads comprehensive author information from OpenAlex based on a DOI 46 | and the numerical position of the author in the co-authors list. It provides detailed 47 | biographical data, bibliometric indicators, and affiliation information. 48 | } 49 | \details{ 50 | The function first retrieves the work information using the provided DOI, then extracts 51 | author IDs from the authorships data, and finally fetches detailed author profiles from 52 | OpenAlex. It enriches the author data with paper-specific information such as authorship 53 | position, corresponding author status, and affiliations as listed in the paper. 54 | 55 | The function implements automatic retry logic with exponential backoff to handle rate limiting 56 | (HTTP 429 errors) and temporary network issues. It respects OpenAlex API rate limits by adding 57 | configurable delays between requests. 58 | 59 | IMPORTANT: For better rate limits, set your OpenAlex API key using: 60 | Sys.setenv(openalexR_apikey = "YOUR_API_KEY") 61 | Get a free API key at: https://openalex.org/ 62 | } 63 | \examples{ 64 | \dontrun{ 65 | # Get information for the first author 66 | first_author <- authorBio(doi = "10.1016/j.joi.2017.08.007") 67 | 68 | # Get information for the second author with verbose output 69 | second_author <- authorBio( 70 | author_position = 2, 71 | doi = "10.1016/j.joi.2017.08.007", 72 | verbose = TRUE 73 | ) 74 | 75 | # Get information for all co-authors with custom rate limiting 76 | all_authors <- authorBio( 77 | doi = "10.1016/j.joi.2017.08.007", 78 | return_all_authors = TRUE, 79 | sleep_time = 0.5, 80 | max_retries = 5 81 | ) 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /R/missingData.R: -------------------------------------------------------------------------------- 1 | #' Completeness of bibliographic metadata 2 | #' 3 | #' It calculates the percentage of missing data in the metadata of a bibliographic data frame. 4 | #' 5 | #' Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing") 6 | #' depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing 7 | #' value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%), 8 | #' "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%). 9 | #' 10 | #' The results of the function allow us to understand which analyses can be performed with bibliometrix 11 | #' and which cannot based on the completeness (or status) of different metadata. 12 | #' @param M is a bibliographic data frame obtained by \code{\link{convert2df}} function. 13 | #' 14 | #' @return The function \code{missingData} returns a list containing two objects: 15 | #' \tabular{lll}{ 16 | #' \code{allTags} \tab \tab is a data frame including results for all original metadata tags from the collection\cr 17 | #' \code{mandatoryTags}\tab \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.} 18 | #' 19 | #' @examples 20 | #' data(scientometrics, package = "bibliometrixData") 21 | #' res <- missingData(scientometrics) 22 | #' print(res$mandatoryTags) 23 | #' 24 | #' @export 25 | #' 26 | missingData <- function(M) { 27 | cols <- names(M) 28 | # count the number of missing values in each column 29 | missing_counts <- sapply(cols, function(x) { 30 | sum(is.na(M[, x]) | M[, x] %in% c("NA,0000,NA", "NA", "", "none")) 31 | }) 32 | 33 | if (sum(as.numeric(M$TC), na.rm = T) == 0) { 34 | missing_counts["TC"] <- nrow(M) 35 | } 36 | # calculate the percentage of missing values in each column 37 | missing_pct <- round(missing_counts / nrow(M) * 100, 2) 38 | # create a dataframe with the column names, number of missing values and percentage of missing values 39 | df_all <- data.frame(cols, missing_counts, missing_pct) 40 | 41 | # create a vector with the tags 42 | tag <- unlist( 43 | strsplit( 44 | "AB,AU,C1,CR,DE,DI,DT,ID,LA,PY,RP,SO,TC,TI,WC", "," 45 | ) 46 | ) 47 | # create a vector with the description of the tags 48 | description <- trimws(unlist( 49 | strsplit( 50 | "Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language, 51 | Publication Year,Corresponding Author, Journal, Total Citation, Title, Science Categories", "," 52 | ) 53 | )) 54 | 55 | # create a dataframe with the column names, number of missing values, percentage of missing values and status 56 | df_all <- df_all %>% 57 | mutate(status = status(missing_pct)) %>% 58 | replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100)) 59 | 60 | df_tags <- data.frame(tag, description) %>% 61 | left_join(df_all, by = c("tag" = "cols")) %>% 62 | replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100, status = "Completely missing")) %>% 63 | arrange(missing_pct, description) 64 | 65 | results <- list(allTags = df_all, mandatoryTags = df_tags) 66 | return(results) 67 | } 68 | 69 | status <- function(x) { 70 | y <- character(length(x)) 71 | y[x == 0] <- "Excellent" 72 | y[x > 0 & x <= 10] <- "Good" 73 | y[x > 10 & x <= 20] <- "Acceptable" 74 | y[x > 20 & x <= 50] <- "Poor" 75 | y[x > 50 & x < 100] <- "Critical" 76 | y[is.na(x) | x == 100] <- "Completely missing" 77 | return(y) 78 | } 79 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # bibliometrix (development version) 2 | 3 | # bibliometrix 5.2.1 4 | 5 | # bibliometrix V.5.2.1 (Release date: 2025-12-11) 6 | 7 | Changes: 8 | 9 | * Removed old geemini models 1.5 and 2.0 from biblioAI 10 | * Solved issue with internet connection checking in MS Windows systems 11 | 12 | # bibliometrix V.5.2.0 (Release date: 2025-11-04) 13 | 14 | Features: 15 | 16 | * Added Author Bio Card (new function AuthorBio()) 17 | * Added new plot layout in plotThematicEvolution() 18 | * Added automatic identification of continuous flows in plotThematicEvolution() 19 | * Added seed argument to networkPlot(), ThematicMap(), and ThematicEvolution() functions 20 | * Improved OpenAlex data conversion process 21 | * Enhanced Community Repulsion algorithm in networkPlot(), ThematicMap(), and ThematicEvolution() functions 22 | * Added Citation Matching algorithm (new function applyCitationMatching()) 23 | * Added Life Cycle Analysis (new function lifeCycle()) 24 | * Biblioshiny enhancements: 25 | - Added AI-powered article summaries 26 | - Added Author Profile feature (using OpenAlex data) 27 | - Added API Search and Download menu for OpenAlex and PubMed 28 | - Removed API support for Dimensions.ai 29 | - Added Content Analysis menu (using contentanalysis library) 30 | - Improved parameter layout across all analyses 31 | - Added seed parameter in Settings 32 | - Added Life Cycle Analysis menu 33 | 34 | Changes: 35 | 36 | #564 [Solved] - bibliometrix now supports the new Scopus CSV format (both for data import and local citation analyses) 37 | 38 | 39 | # bibliometrix V.5.1.1 (Release date: 2025-09-02) 40 | 41 | Features: 42 | * Biblioshiny – Filters: Added the ability to upload a journal ranking list and filter publications based on selected rankings. 43 | 44 | Changes: 45 | * Biblioshiny: Introduced automatic checks and updates for required packages, such as curl (version ≥ 6.3.0). 46 | 47 | 48 | # bibliometrix v.5.1.0 (Release date: 2025-07-15) 49 | 50 | Features: 51 | * rpys(): 52 | - Introduced two options for the median window: centered and backward. 53 | - Implemented an algorithm to detect citation sequences and influential references ("Hot Papers", "Constant Performers", "Life Cycles", and "Sleeping Beauties"). 54 | - These results are now also included in Biblioshiny outputs. 55 | * Biblioshiny: 56 | - Added animated diachronic networks to Conceptual and Social Structure analyses. 57 | - Completely redesigned the Filters panel using a J-AU-DOC framework, now supporting a broader range of filters (e.g., Countries, Journals, Citations). 58 | * Biblio AI: 59 | - Enhanced prompt templates. 60 | - Integrated new Gemini models (2.5 Flash and Flash-lite). 61 | 62 | Changes: 63 | * Fixed an issue in mergeDbSources(): the function now preserves the cited references field when merging files from a single database. 64 | * convert2df() now automatically converts ISO2 country codes to full country names for OpenAlex data. 65 | 66 | 67 | # bibliometrix v.5.0.1 68 | 69 | Features: 70 | * Introduced mergeKeywords(), a new function that combines DE and ID keywords into a single field named KW_Merged. 71 | 72 | Changes: 73 | * Resolved various issues in biblioshiny(). 74 | * Updated the following functions to work with the new KW_Merged field: 75 | - tableTag() 76 | - cocMatrix() 77 | - biblioNetwork() 78 | - conceptualStructure() 79 | - thematicMap() 80 | - thematicEvolution() 81 | - threeFieldPlot() 82 | 83 | 84 | # bibliometrix v.5.0.0 85 | 86 | Features: 87 | * Biblioshiny 5.0 now includes Biblio AI – a powerful AI assistant to support your science mapping analyses. 88 | Changes: 89 | * Resolved multiple issues in biblioshiny(). 90 | -------------------------------------------------------------------------------- /R/csvLens2df.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables("where") 2 | 3 | csvLens2df <- function(file) { 4 | options(readr.num_columns = 0) 5 | 6 | ## import all files in a single data frame 7 | for (i in 1:length(file)) { 8 | D <- read_csv(file[i], na = character(), quote = '"', trim_ws = FALSE, progress = show_progress(), show_col_types = FALSE) %>% 9 | mutate(across(where(is.numeric), as.character)) %>% 10 | mutate(across(where(is.character), function(x) tidyr::replace_na(x, ""))) %>% 11 | as.data.frame() 12 | 13 | if (i > 1) { 14 | l <- intersect(l, names(D)) 15 | DATA <- rbind(DATA[l], D[l]) 16 | } else { 17 | l <- names(D) 18 | DATA <- D 19 | } 20 | } 21 | rm(D) 22 | 23 | ## Post-Processing 24 | 25 | # column re-labelling 26 | DATA <- relabelling_lens(DATA) 27 | 28 | if ("TC" %in% names(DATA)) DATA$TC <- as.numeric(DATA$TC) 29 | 30 | # Authors' names cleaning (surname and initials) 31 | DATA$AF <- DATA$AU 32 | 33 | # Authors' names cleaning (surname and initials) 34 | # remove ; and 2 or more spaces 35 | # DATA$AU <- gsub("\\s+", " ", DATA$AU) 36 | 37 | listAU <- strsplit(DATA$AU, split = "; ") 38 | 39 | AU <- lapply(listAU, function(l) { 40 | lastname <- sub(".*\\s", "", trimws(l)) 41 | firstname <- sub("\\s+[^ ]+$", "", l) 42 | firstname <- gsub("[^:A-Z:]", "", firstname) 43 | AU <- paste(lastname, firstname, sep = " ", collapse = ";") 44 | return(AU) 45 | }) 46 | 47 | 48 | DATA$AU <- unlist(AU) 49 | # DATA$AU=gsub("\\.", "", DATA$AU) 50 | # DATA$AU=gsub(",", ";", DATA$AU) 51 | 52 | # Affiliation 53 | # DATA$C1 <- "Unknown" 54 | 55 | # Iso Source Titles 56 | DATA$SO[DATA$SO == ""] <- DATA$Publisher[DATA$SO == ""] 57 | # DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE) 58 | # DATA$J9 <- gsub("\\.","",DATA$JI) 59 | DATA$JI <- DATA$J9 <- DATA$SO 60 | DATA$ID <- DATA$DE 61 | DI <- DATA$DI 62 | URL <- DATA$URL 63 | AB <- DATA$AB 64 | TI <- DATA$TI 65 | DE <- DATA$DE 66 | DATA <- data.frame(lapply(DATA, toupper)) 67 | DATA$AB_raw <- AB 68 | DATA$TI_raw <- TI 69 | DATA$DE_raw <- DE 70 | DATA$DI <- DI 71 | DATA$URL <- URL 72 | DATA$AU_CO <- "NA" 73 | DATA$DB <- "LENS" 74 | return(DATA) 75 | } 76 | 77 | 78 | 79 | 80 | relabelling_lens <- function(DATA) { 81 | ## column re-labelling 82 | label <- names(DATA) 83 | label <- gsub("Source Title", "SO", label) 84 | # label <- gsub("Authors with affiliations","C1",label) 85 | label <- gsub("Author/s", "AU", label) 86 | label <- gsub("Publication.Type", "DT", label) 87 | label <- gsub("Title", "TI", label) 88 | label <- gsub("Publication Year", "PY", label) 89 | label <- gsub("Volume", "VL", label) 90 | label <- gsub("Issue Number", "IS", label) 91 | label <- gsub("Source Country", "SO_CO", label) 92 | label <- gsub("Scholarly Citation Count", "TC", label) 93 | label <- gsub("DOI", "DI", label) 94 | label <- gsub("Source URLs", "URL", label) 95 | label <- gsub("Abstract", "AB", label) 96 | label <- gsub("Keywords", "DE", label) 97 | label <- gsub("MeSH Terms", "MESH", label) 98 | label <- gsub("Funding Details", "FU", label) 99 | label <- gsub("Funding", "FX", label) 100 | label <- gsub("References", "CR", label) 101 | # label <- gsub("Correspondence Address","RP",label) 102 | label <- gsub("Fields of Study", "SC", label) 103 | label <- gsub("Language of Original Document", "LA", label) 104 | label <- gsub("Document Type", "DT", label) 105 | label <- gsub("Source", "DB", label) 106 | label <- gsub("Lens ID", "UT", label) 107 | label <- gsub("Citing Works Count", "TC", label) 108 | names(DATA) <- label 109 | 110 | return(DATA) 111 | } 112 | -------------------------------------------------------------------------------- /R/bradford.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("Rank", "SO", "Freq")) 2 | #' Bradford's law 3 | #' 4 | #' It estimates and draws the Bradford's law source distribution. 5 | #' 6 | #' Bradford's law is a pattern first described by (\cite{Samuel C. Bradford, 1934}) that estimates the exponentially diminishing returns 7 | #' of searching for references in science journals. 8 | #' 9 | #' One formulation is that if journals in a field are sorted by number of articles into three groups, each with about one-third of all articles, 10 | #' then the number of journals in each group will be proportional to 1:n:n2.\cr\cr 11 | #' 12 | #' Reference:\cr 13 | #' Bradford, S. C. (1934). Sources of information on specific subjects. Engineering, 137, 85-86.\cr 14 | #' 15 | #' @param M is a bibliographic dataframe. 16 | #' @return The function \code{bradford} returns a list containing the following objects: 17 | #' \tabular{lll}{ 18 | #' \code{table} \tab \tab a dataframe with the source distribution partitioned in the three zones\cr 19 | #' \code{graph} \tab \tab the source distribution plot in ggplot2 format} 20 | #' 21 | #' @examples 22 | #' \dontrun{ 23 | #' data(management, package = "bibliometrixData") 24 | #' 25 | #' BR <- bradford(management) 26 | #' } 27 | #' 28 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis 29 | #' @seealso \code{\link{summary}} method for class '\code{bibliometrix}' 30 | #' 31 | #' @export 32 | 33 | bradford <- function(M) { 34 | SO <- sort(table(M$SO), decreasing = TRUE) 35 | n <- sum(SO) 36 | cumSO <- cumsum(SO) 37 | cutpoints <- round(c(1, n * 0.33, n * 0.67, Inf)) 38 | groups <- cut(cumSO, breaks = cutpoints, labels = c("Zone 1", "Zone 2", "Zone 3")) 39 | a <- length(which(cumSO < n * 0.33)) + 1 40 | b <- length(which(cumSO < n * 0.67)) + 1 41 | Z <- c(rep("Zone 1", a), rep("Zone 2", b - a), rep("Zone 3", length(cumSO) - b)) 42 | df <- data.frame(SO = names(cumSO), Rank = 1:length(cumSO), Freq = as.numeric(SO), cumFreq = cumSO, Zone = Z) 43 | 44 | x <- c(max(log(df$Rank)) - 0.02 - diff(range(log(df$Rank))) * 0.125, max(log(df$Rank)) - 0.02) 45 | y <- c(min(df$Freq), min(df$Freq) + diff(range(df$Freq)) * 0.125) + 1 46 | data("logo", envir = environment()) 47 | logo <- grid::rasterGrob(logo, interpolate = TRUE) 48 | 49 | g <- ggplot2::ggplot(df, aes(x = log(Rank), y = Freq, text = paste("Source: ", SO, "\nN. of Documents: ", Freq))) + 50 | geom_line(aes(group = "NA")) + 51 | # geom_area(aes(group="NA"),fill = "gray90", alpha = 0.5) + 52 | annotate("rect", xmin = 0, xmax = log(df$Rank[a]), ymin = 0, ymax = max(df$Freq), alpha = 0.2) + 53 | labs(x = "Source log(Rank)", y = "Articles", title = "Core Sources by Bradford's Law") + 54 | annotate("text", x = log(df$Rank[a]) / 2, y = max(df$Freq) / 2, label = "Core\nSources", fontface = 2, alpha = 0.5, size = 10) + 55 | scale_x_continuous(breaks = log(df$Rank)[1:a], labels = as.character(substr(df$SO, 1, 25))[1:a]) + 56 | theme( 57 | text = element_text(color = "#444444"), 58 | legend.position = "none", 59 | panel.background = element_rect(fill = "#FFFFFF"), 60 | panel.grid.minor = element_blank(), 61 | panel.grid.major = element_blank(), 62 | plot.title = element_text(size = 24), 63 | axis.title = element_text(size = 14, color = "#555555"), 64 | axis.line.x = element_line(color = "black", linewidth = 0.5), 65 | axis.line.y = element_line(color = "black", linewidth = 0.5), 66 | axis.title.y = element_text(vjust = 1, angle = 90), 67 | axis.title.x = element_text(hjust = 0), 68 | axis.text.x = element_text(angle = 90, hjust = 1, size = 8, face = "bold") 69 | ) + 70 | annotation_custom(logo, xmin = x[1], xmax = x[2], ymin = y[1], ymax = y[2]) 71 | 72 | results <- list(table = df, graph = g) 73 | return(results) 74 | } 75 | -------------------------------------------------------------------------------- /R/mergeDbSources.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("num")) 2 | #' Merge bibliographic data frames from supported bibliogtraphic DBs 3 | #' 4 | #' Merge bibliographic data frames from different databases (WoS,SCOPUS, Lens, Openalex, etc-) into a single one. 5 | #' 6 | #' bibliographic data frames are obtained by the converting function \code{\link{convert2df}}. 7 | #' The function merges data frames identifying common tag fields and duplicated records. 8 | #' 9 | #' @param ... are the bibliographic data frames to merge. 10 | #' @param remove.duplicated is logical. If TRUE duplicated documents will be deleted from the bibliographic collection. 11 | #' @param verbose is logical. If TRUE, information on duplicate documents is printed on the screen. 12 | #' @return the value returned from \code{mergeDbSources} is a bibliographic data frame. 13 | #' 14 | #' 15 | #' @examples 16 | #' 17 | #' data(isiCollection, package = "bibliometrixData") 18 | #' 19 | #' data(scopusCollection, package = "bibliometrixData") 20 | #' 21 | #' M <- mergeDbSources(isiCollection, scopusCollection, remove.duplicated = TRUE) 22 | #' 23 | #' dim(M) 24 | #' 25 | #' @seealso \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame. 26 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis. 27 | #' @seealso \code{\link{summary}} to obtain a summary of the results. 28 | #' @seealso \code{\link{plot}} to draw some useful plots of the results. 29 | #' 30 | #' @export 31 | 32 | 33 | mergeDbSources <- function(..., remove.duplicated = TRUE, verbose = TRUE) { 34 | index <- NULL 35 | 36 | mc <- match.call(expand.dots = TRUE) 37 | 38 | if (length(mc) > 3) { 39 | M <- dplyr::bind_rows(list(...)) 40 | } else { 41 | M <- dplyr::bind_rows(...) 42 | } 43 | # create KW_Merged field 44 | M <- M %>% mergeKeywords(force=TRUE) 45 | 46 | dbLabels <- data.frame( 47 | DB = toupper(c("isi", "scopus", "openalex", "lens", "dimensions", "pubmed", "cochrane")), 48 | num = c(1, 2, 3, 4, 5, 6, 7) 49 | ) 50 | DB <- unique(M$DB) 51 | 52 | if (length(DB) >1) { 53 | # order by db 54 | M <- M %>% 55 | left_join(dbLabels, by = "DB") %>% 56 | arrange(num) %>% 57 | select(-num) %>% 58 | rename("CR_raw" = "CR") %>% 59 | mutate(CR = "NA") 60 | } 61 | 62 | 63 | if (isTRUE(remove.duplicated)) { 64 | # remove by DOI 65 | if ("DI" %in% names(M)) { 66 | M$DI[M$DI == ""] <- NA 67 | index <- which(duplicated(M$DI) & !is.na(M$DI)) 68 | if (length(index) > 0) M <- M[-index, ] 69 | } 70 | 71 | # remove by title 72 | if ("TI" %in% names(M)) { 73 | TI <- gsub("[^[:alnum:] ]", "", M$TI) 74 | TI <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", TI, perl = TRUE) 75 | d <- duplicated(paste(TI, " ", M$PY)) 76 | if (isTRUE(verbose)) cat("\n", sum(d) + length(index), "duplicated documents have been removed\n") 77 | M <- M[!d, ] 78 | } 79 | } 80 | 81 | if (length(unique(M$DB)) > 1) { 82 | M$DB_Original <- M$DB 83 | M$DB <- "ISI" 84 | 85 | ## author data cleaning 86 | if ("AU" %in% names(M)) { 87 | M$AU <- gsub(",", " ", M$AU) 88 | AUlist <- strsplit(M$AU, ";") 89 | AU <- lapply(AUlist, function(l) { 90 | l <- trim(l) 91 | name <- strsplit(l, " ") 92 | lastname <- unlist(lapply(name, function(ln) { 93 | ln[1] 94 | })) 95 | firstname <- lapply(name, function(ln) { 96 | f <- paste(substr(ln[-1], 1, 1), collapse = " ") 97 | }) 98 | AU <- paste(lastname, unlist(firstname), sep = " ", collapse = ";") 99 | return(AU) 100 | }) 101 | M$AU <- unlist(AU) 102 | } 103 | } 104 | 105 | M <- metaTagExtraction(M, "SR") 106 | row.names(M) <- M$SR 107 | 108 | class(M) <- c("bibliometrixDB", "data.frame") 109 | return(M) 110 | } 111 | -------------------------------------------------------------------------------- /R/csvScopus2df.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("X1", "X2", "tag", "orig")) 2 | 3 | csvScopus2df <- function(file) { 4 | options(readr.num_columns = 0) 5 | 6 | ## import all files in a single data frame 7 | for (i in 1:length(file)) { 8 | D <- read_csv(file[i], 9 | na = character(), quote = '"', trim_ws = FALSE, progress = show_progress(), 10 | col_types = cols(.default = col_character()) 11 | ) %>% # Introduced to remove cols parsing errors 12 | # mutate(across(!where(is.numeric), as.character)) %>% # not yet necessary with the inclusion of previuos line 13 | mutate(across(where(is.character), function(x) tidyr::replace_na(x, ""))) %>% as.data.frame() 14 | 15 | if (i > 1) { 16 | l <- intersect(l, names(D)) 17 | DATA <- rbind(DATA[l], D[l]) 18 | } else { 19 | l <- names(D) 20 | DATA <- D 21 | } 22 | } 23 | 24 | ## Post-Processing 25 | 26 | # column re-labelling 27 | DATA <- labelling(DATA) 28 | 29 | # Authors' names cleaning (surname and initials) 30 | DATA$AU <- gsub("\\.", "", DATA$AU) 31 | #DATA$AU <- gsub(",", ";", DATA$AU) 32 | DATA$AU <- gsub(",", "", DATA$AU) 33 | 34 | ### store raw affiliation format to extract link among authors and affiliations 35 | DATA$C1raw <- DATA$C1 36 | ### 37 | 38 | # Affiliation 39 | if (!("C1" %in% names(DATA))) { 40 | DATA$C1 <- NA 41 | } else { 42 | DATA$C1 <- unlist(lapply(strsplit(DATA$C1, ";"), function(l) { 43 | l <- paste(gsub(".*\\., ", "", l), collapse = ";", sep = "") 44 | })) 45 | } 46 | # Iso Source Titles 47 | if ("JI" %in% names(DATA)) { 48 | DATA$J9 <- gsub("\\.", "", DATA$JI) 49 | } else { 50 | DATA$J9 <- DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE) 51 | } 52 | 53 | DI <- DATA$DI 54 | URL <- DATA$URL 55 | AB <- DATA$AB 56 | TI <- DATA$TI 57 | DE <- DATA$DE 58 | DATA <- data.frame(lapply(DATA, toupper)) 59 | DATA$AB_raw <- AB 60 | DATA$TI_raw <- TI 61 | DATA$DE_raw <- DE 62 | DATA$DI <- DI 63 | DATA$URL <- URL 64 | return(DATA) 65 | } 66 | 67 | 68 | 69 | 70 | labelling <- function(DATA) { 71 | ## column re-labelling 72 | 73 | df_tag <- data.frame( 74 | rbind( 75 | c("Abbreviated Source Title", "JI"), 76 | c("Affiliations", "C1"), 77 | c("Authors with affiliations", "C1_raw"), 78 | c("Author Addresses", "C1_raw"), 79 | c("Authors", "AU"), 80 | c("Author Names", "AU"), 81 | c("Author full names", "AF"), 82 | c("Source title", "SO"), 83 | c("Titles", "TI"), 84 | c("Title", "TI"), 85 | c("Publication Year", "PY"), 86 | c("Year", "PY"), 87 | c("Volume", "VL"), 88 | c("Issue", "IS"), 89 | c("Page count", "PP"), 90 | c("Cited by", "TC"), 91 | c("DOI", "DI"), 92 | c("Link", "URL"), 93 | c("Abstract", "AB"), 94 | c("Author Keywords", "DE"), 95 | c("Indexed Keywords", "ID"), 96 | c("Index Keywords", "ID"), 97 | c("Funding Details", "FU"), 98 | c("Funding Texts", "FX"), 99 | c("Funding Text 1", "FX"), 100 | c("References", "CR"), 101 | c("Correspondence Address", "RP"), 102 | c("Publisher", "PU"), 103 | c("Open Access", "OA"), 104 | c("Language of Original Document", "LA"), 105 | c("Document Type", "DT"), 106 | c("Source", "DB"), 107 | c("EID", "UT") 108 | ) 109 | ) %>% 110 | rename( 111 | orig = X1, 112 | tag = X2 113 | ) 114 | 115 | label <- data.frame(orig = names(DATA)) %>% 116 | left_join(df_tag, by = "orig") %>% 117 | mutate(tag = ifelse(is.na(tag), orig, tag)) 118 | 119 | names(DATA) <- label$tag 120 | 121 | if (!"C1" %in% names(DATA)) { 122 | if ("C1_raw" %in% names(DATA)) { 123 | DATA$C1 <- DATA$C1_raw 124 | } else { 125 | DATA$C1 <- NA 126 | } 127 | } 128 | 129 | return(DATA) 130 | } 131 | -------------------------------------------------------------------------------- /man/assignEvolutionColors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assignEvolutionColors.R 3 | \name{assignEvolutionColors} 4 | \alias{assignEvolutionColors} 5 | \title{Assign Colors to Thematic Evolution Nodes Based on Lineages} 6 | \usage{ 7 | assignEvolutionColors( 8 | nexus, 9 | threshold = 0.5, 10 | palette = NULL, 11 | use_measure = "weighted" 12 | ) 13 | } 14 | \arguments{ 15 | \item{nexus}{A list object returned by \code{\link{thematicEvolution}} containing: 16 | \itemize{ 17 | \item \code{Nodes}: data frame with node information (name, group, id, sum, freq, etc.) 18 | \item \code{Edges}: data frame with edge information (from, to, weight measures) 19 | \item \code{TM}: list of thematic maps for each period 20 | }} 21 | 22 | \item{threshold}{Numeric. The minimum weight value for an edge to be considered a 23 | "strong connection" (default: 0.5). Edges with weights >= threshold will propagate 24 | the same color to connected nodes across periods.} 25 | 26 | \item{palette}{Character vector. Optional custom color palette as hex codes. If NULL, 27 | uses a default palette of 50+ distinct colors. Colors are assigned sequentially 28 | without reuse.} 29 | 30 | \item{use_measure}{Character. The measure to use for determining edge strength. 31 | Can be one of: 32 | \itemize{ 33 | \item \code{"inclusion"}: uses the Inclusion measure (column 3 of Edges) 34 | \item \code{"stability"}: uses the Stability measure (column 5 of Edges) 35 | \item \code{"weighted"}: uses the weighted Inclusion measure (column 4 of Edges) 36 | } 37 | Default is "inclusion".} 38 | } 39 | \value{ 40 | Returns the modified \code{nexus} object with updated colors: 41 | \itemize{ 42 | \item \code{Nodes$color}: updated with lineage-based colors 43 | \item \code{Edges$color}: edges connecting same-colored nodes receive the node color, 44 | others are grey 45 | \item \code{TM}: thematic maps updated with new cluster colors 46 | } 47 | } 48 | \description{ 49 | This function assigns colors to nodes in a thematic evolution analysis based on 50 | their lineages across time periods. Nodes connected by strong edges (above a threshold) 51 | receive the same color to visualize thematic continuity. Nodes with the same name 52 | across periods that are not strongly connected to other nodes are also colored identically. 53 | } 54 | \details{ 55 | The function uses a multi-phase algorithm: 56 | \enumerate{ 57 | \item \strong{Phase 1}: Identifies lineages by following strong connections (weight >= threshold) 58 | from the first period forward. When a node has multiple strong connections, the 59 | strongest one determines the lineage. 60 | \item \strong{Phase 1.5}: Assigns the same lineage to nodes with identical names across 61 | periods if they are not already part of different strong connections. 62 | \item \strong{Phase 2}: Assigns unique colors from the palette to each identified lineage. 63 | \item \strong{Phase 3}: Assigns unique colors to isolated nodes (those without any lineage). 64 | \item \strong{Phase 4}: Colors edges based on their connected nodes - same color if both 65 | nodes share a color, grey otherwise. 66 | \item \strong{Final}: Updates thematic maps with the new color scheme. 67 | } 68 | 69 | Each lineage receives a unique color from the palette. No color is reused across 70 | different lineages, ensuring clear visual distinction between independent thematic streams. 71 | } 72 | \examples{ 73 | \dontrun{ 74 | data(scientometrics, package = "bibliometrixData") 75 | years <- c(2000, 2010) 76 | 77 | nexus <- thematicEvolution(scientometrics, field = "ID", 78 | years = years, n = 100, minFreq = 2) 79 | 80 | 81 | # Use custom threshold and measure 82 | nexus <- assignEvolutionColors(nexus, threshold = 0.6, use_measure = "weighted") 83 | 84 | } 85 | 86 | } 87 | \seealso{ 88 | \code{\link{thematicEvolution}} to perform thematic evolution analysis. 89 | 90 | \code{\link{plotThematicEvolution}} to visualize the colored evolution. 91 | } 92 | -------------------------------------------------------------------------------- /R/normalizeSimilarity.R: -------------------------------------------------------------------------------- 1 | #' Calculate similarity indices 2 | #' 3 | #' It calculates a relative measure of bibliographic co-occurrences. 4 | #' 5 | #' \code{couplingSimilarity} calculates Association strength, Inclusion, Jaccard or Salton similarity from a co-occurrence bibliographic matrix. 6 | #' 7 | #' The association strength is used by Van Eck and Waltman (2007) and Van Eck et al. (2006). Several works refer to the measure as the proximity index, 8 | #' while Leydesdorff (2008)and Zitt et al. (2000) refer to it as the probabilistic affinity (or activity) index. 9 | #' 10 | #' The inclusion index, also called Simpson coefficient, is an overlap measure used in information retrieval. 11 | #' 12 | #' The Jaccard index (or Jaccard similarity coefficient) gives us a relative measure of the overlap of two sets. 13 | #' It is calculated as the ratio between the intersection and the union of the reference lists (of two manuscripts). 14 | #' 15 | #' The Salton index, instead, relates the intersection of the two lists to the geometric mean of the size of both sets. 16 | #' The square of Salton index is also called Equivalence index. 17 | #' 18 | #' The indices are equal to zero if the intersection of the reference lists is empty.\cr\cr 19 | #' 20 | #' References\cr\cr 21 | #' Leydesdorff, L. (2008). On the normalization and visualization of author Cocitation data: Salton's cosine versus the Jaccard index. 22 | #' Journal of the American Society for Information Science and Technology, 59(1), 77– 85.\cr 23 | #' Van Eck, N.J., Waltman, L., Van den Berg, J., & Kaymak, U. (2006). Visualizing the computational intelligence field. 24 | #' IEEE Computational Intelligence Magazine, 1(4), 6– 10.\cr 25 | #' Van Eck, N.J., & Waltman, L. (2007). Bibliometric mapping of the computational intelligence field. 26 | #' International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems, 15(5), 625– 645\cr. 27 | #' Van Eck, N. J., & Waltman, L. (2009). How to normalize cooccurrence data? An analysis of some well-known similarity measures. 28 | #' Journal of the American society for information science and technology, 60(8), 1635-1651.\cr 29 | #' Zitt, M., Bassecoulard, E., & Okubo, Y. (2000). Shadows of the past in international cooperation: 30 | #' Collaboration profiles of the top five producers of science. Scientometrics, 47(3), 627– 657.\cr 31 | #' 32 | #' 33 | #' @param NetMatrix is a coupling matrix obtained by the network functions \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}. 34 | #' @param type is a character. It can be "association", "jaccard", "inclusion","salton" or "equivalence" to obtain Association Strength, Jaccard, 35 | #' Inclusion, Salton or Equivalence similarity index respectively. The default is \code{type = "association"}. 36 | #' @return a similarity matrix. 37 | #' 38 | #' 39 | #' 40 | #' @examples 41 | #' 42 | #' data(scientometrics, package = "bibliometrixData") 43 | #' NetMatrix <- biblioNetwork(scientometrics, 44 | #' analysis = "co-occurrences", 45 | #' network = "keywords", sep = ";" 46 | #' ) 47 | #' S <- normalizeSimilarity(NetMatrix, type = "association") 48 | #' 49 | #' @seealso \code{\link{biblioNetwork}} function to compute a bibliographic network. 50 | #' @seealso \code{\link{cocMatrix}} to compute a bibliographic bipartite network. 51 | #' 52 | #' @export 53 | 54 | normalizeSimilarity <- function(NetMatrix, type = "association") { 55 | diag <- Matrix::diag 56 | D <- diag(NetMatrix) 57 | # S=NetMatrix 58 | switch(type, 59 | association = { 60 | S <- NetMatrix / ((outer(D, D, "*"))) 61 | }, 62 | inclusion = { 63 | S <- NetMatrix / outer(D, D, function(a, b) { 64 | mapply(min, a, b) 65 | }) 66 | }, 67 | jaccard = { 68 | S <- NetMatrix / (outer(D, D, "+") - NetMatrix) 69 | }, 70 | salton = { 71 | S <- NetMatrix / (sqrt(outer(D, D, "*"))) 72 | }, 73 | equivalence = { 74 | S <- (NetMatrix / sqrt(outer(D, D, "*")))^2 75 | } 76 | ) 77 | 78 | S <- as.matrix(S) 79 | S[is.nan(S)] <- 0 80 | S <- Matrix(S, sparse = TRUE) 81 | # if (class(S)!="dgCMatrix"){S=as.matrix(S)} 82 | 83 | return(S) 84 | } 85 | -------------------------------------------------------------------------------- /man/convert2df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert2df.R 3 | \name{convert2df} 4 | \alias{convert2df} 5 | \title{Import and Convert bibliographic export files and API objects.} 6 | \usage{ 7 | convert2df( 8 | file, 9 | dbsource = "wos", 10 | format = "plaintext", 11 | remove.duplicates = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{file}{a character array containing a sequence of filenames coming from WoS, Scopus, Dimensions, Lens.org, OpenAlex and Pubmed. Alternatively, \code{file} can be 16 | an object resulting from an API query fetched from Dimensions, and PubMed databases: 17 | \tabular{lll}{ 18 | a)\tab 'wos' \tab Clarivate Analytics WoS (in plaintext '.txt', Endnote Desktop '.ciw', or bibtex formats '.bib');\cr 19 | b)\tab 'scopus' \tab SCOPUS (exclusively in bibtex format '.bib');\cr 20 | c)\tab 'dimensions' \tab Digital Science Dimensions (in csv '.csv' or excel '.xlsx' formats);\cr 21 | d)\tab 'lens' \tab Lens.org (in csv '.csv');\cr 22 | e)\tab 'pubmed' \tab an object of the class \code{pubmedR (package pubmedR)} containing a collection obtained from a query performed with pubmedR package;\cr 23 | f)\tab 'dimensions' \tab an object of the class \code{dimensionsR (package dimensionsR)} containing a collection obtained from a query performed with dimensionsR package;\cr 24 | g)\tab 'openalex' \tab OpenAlex .csv file;\cr 25 | h)\tab 'openalex_api' \tab the filename and path to a list object returned by openalexR package, containing a collection of works resulting from a query fetched from OpenAlex database.}} 26 | 27 | \item{dbsource}{is a character indicating the bibliographic database. \code{dbsource} can be \code{dbsource = c('cochrane','dimensions','generic','isi','openalex', 'pubmed','scopus','wos', 'lens')} . Default is \code{dbsource = "isi"}.} 28 | 29 | \item{format}{is a character indicating the SCOPUS, Clarivate Analytics WoS, and other databases export file format. \code{format} can be \code{c('api', 'bibtex', 'csv', 'endnote','excel','plaintext', 'pubmed')}. Default is \code{format = "plaintext"}.} 30 | 31 | \item{remove.duplicates}{is logical. If TRUE, the function will remove duplicated items checking by DOI and database ID.} 32 | } 33 | \value{ 34 | a data frame with cases corresponding to articles and variables to Field Tags in the original export file. 35 | 36 | I.e We have three files download from Web of Science in plaintext format, file will be: 37 | 38 | file <- c("filename1.txt", "filename2.txt", "filename3.txt") 39 | 40 | data frame columns are named using the standard Clarivate Analytics WoS Field Tag codify. The main field tags are: 41 | 42 | \tabular{lll}{ 43 | \code{AU}\tab \tab Authors\cr 44 | \code{TI}\tab \tab Document Title\cr 45 | \code{SO}\tab \tab Publication Name (or Source)\cr 46 | \code{JI}\tab \tab ISO Source Abbreviation\cr 47 | \code{DT}\tab \tab Document Type\cr 48 | \code{DE}\tab \tab Authors' Keywords\cr 49 | \code{ID}\tab \tab Keywords associated by SCOPUS or WoS database \cr 50 | \code{AB}\tab \tab Abstract\cr 51 | \code{C1}\tab \tab Author Address\cr 52 | \code{RP}\tab \tab Reprint Address\cr 53 | \code{CR}\tab \tab Cited References\cr 54 | \code{TC}\tab \tab Times Cited\cr 55 | \code{PY}\tab \tab Year\cr 56 | \code{SC}\tab \tab Subject Category\cr 57 | \code{UT}\tab \tab Unique Article Identifier\cr 58 | \code{DB}\tab \tab Database\cr} 59 | 60 | for a complete list of field tags see: \href{https://www.bibliometrix.org/documents/Field_Tags_bibliometrix.pdf}{Field Tags used in bibliometrix} 61 | } 62 | \description{ 63 | It converts a SCOPUS, Clarivate Analytics WoS, Dimensions, Lens.org, PubMed and COCHRANE Database export files or pubmedR and dimensionsR JSON/XML 64 | objects into a data frame, with cases corresponding to articles and variables to Field Tags as used in WoS. 65 | } 66 | \examples{ 67 | 68 | # Example: 69 | # Import and convert a Web of Science collection form an export file in plaintext format: 70 | 71 | \dontrun{ 72 | files <- "https://www.bibliometrix.org/datasets/wos_plaintext.txt" 73 | 74 | M <- convert2df(file = files, dbsource = "wos", format = "plaintext") 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /R/keywordGrowth.R: -------------------------------------------------------------------------------- 1 | #' Yearly occurrences of top keywords/terms 2 | #' 3 | #' It calculates yearly occurrences of top keywords/terms. 4 | #' 5 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}. 6 | #' It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file. 7 | #' @param Tag is a character object. It indicates one of the keyword field tags of the 8 | #' standard ISI WoS Field Tag codify (ID, DE, KW_Merged) or a field tag created by \code{\link{termExtraction}} function (TI_TM, AB_TM, etc.). 9 | #' @param sep is the field separator character. This character separates strings in each keyword column of the data frame. The default is \code{sep = ";"}. 10 | #' @param top is a numeric. It indicates the number of top keywords to analyze. The default value is 10. 11 | #' @param cdf is a logical. If TRUE, the function calculates the cumulative occurrences distribution. 12 | #' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}. 13 | #' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}. 14 | #' @return an object of class \code{data.frame} 15 | #' @examples 16 | #' 17 | #' data(scientometrics, package = "bibliometrixData") 18 | #' topKW <- KeywordGrowth(scientometrics, Tag = "ID", sep = ";", top = 5, cdf = TRUE) 19 | #' topKW 20 | #' 21 | #' # Plotting results 22 | #' \dontrun{ 23 | #' install.packages("reshape2") 24 | #' library(reshape2) 25 | #' library(ggplot2) 26 | #' DF <- melt(topKW, id = "Year") 27 | #' ggplot(DF, aes(Year, value, group = variable, color = variable)) + geom_line 28 | #' } 29 | #' 30 | #' @export 31 | KeywordGrowth <- function(M, Tag = "ID", sep = ";", top = 10, cdf = TRUE, remove.terms = NULL, synonyms = NULL) { 32 | i <- which(names(M) == Tag) 33 | PY <- as.numeric(M$PY) 34 | Tab <- (strsplit(as.character(M[, i]), sep)) 35 | Y <- rep(PY, lengths(Tab)) 36 | A <- data.frame(Tab = unlist(Tab), Y = Y) 37 | A$Tab <- trim.leading(A$Tab) 38 | A <- A[A$Tab != "", ] 39 | A <- A[!is.na(A$Y), ] 40 | 41 | ### remove terms 42 | terms <- data.frame(Tab = toupper(remove.terms)) 43 | A <- anti_join(A, terms) 44 | # end of block 45 | 46 | ### Merge synonyms in the vector synonyms 47 | if (length(synonyms) > 0 & is.character(synonyms)) { 48 | s <- strsplit(toupper(synonyms), ";") 49 | snew <- trimws(unlist(lapply(s, function(l) l[1]))) 50 | sold <- (lapply(s, function(l) trimws(l[-1]))) 51 | for (i in 1:length(s)) { 52 | A <- A %>% 53 | mutate( 54 | # Tab = str_replace_all(Tab, paste(sold[[i]], collapse="|",sep=""),snew[i]) 55 | # Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i]), 56 | Tab = stringi::stri_replace_all_regex(Tab, stringi::stri_replace_all_regex(stringi::stri_replace_all_regex(paste(sold[[i]], collapse = "|", sep = ""), "\\(", "\\\\("), "\\)", "\\\\)"), snew[i]) 57 | ) 58 | } 59 | } 60 | # end of block 61 | 62 | Ymin <- min(A$Y) 63 | Ymax <- max(A$Y) 64 | Year <- Ymin:Ymax 65 | if (top==Inf) top <- length(unique(A$Tab)) 66 | Tab <- names(sort(table(A$Tab), decreasing = TRUE))[1:top] 67 | 68 | words <- matrix(0, length(Year), top + 1) 69 | words <- data.frame(words) 70 | names(words) <- c("Year", Tab) 71 | words[, 1] <- Year 72 | for (j in 1:length(Tab)) { 73 | word <- (table(A[A$Tab %in% Tab[j], 2])) 74 | words[, j + 1] <- trim.years(word, Year, cdf) 75 | } 76 | return(words) 77 | } 78 | 79 | trim.years <- function(w, Year, cdf) { 80 | Y <- as.numeric(names(w)) 81 | W <- matrix(0, length(Year), 1) 82 | 83 | for (i in 1:length(Year)) { 84 | if (Y[1] == Year[i] & length(Y) > 0) { 85 | W[i, 1] <- w[1] 86 | Y <- Y[-1] 87 | w <- w[-1] 88 | } 89 | } 90 | if (isTRUE(cdf)) W <- cumsum(W) 91 | names(W) <- Year 92 | W <- data.frame(W) 93 | return(W) 94 | } 95 | -------------------------------------------------------------------------------- /man/thematicMap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thematicMap.R 3 | \name{thematicMap} 4 | \alias{thematicMap} 5 | \title{Create a thematic map} 6 | \usage{ 7 | thematicMap( 8 | M, 9 | field = "ID", 10 | n = 250, 11 | minfreq = 5, 12 | ngrams = 1, 13 | stemming = FALSE, 14 | size = 0.5, 15 | n.labels = 1, 16 | community.repulsion = 0.5, 17 | repel = TRUE, 18 | remove.terms = NULL, 19 | synonyms = NULL, 20 | cluster = "louvain", 21 | subgraphs = FALSE, 22 | seed = 1234 23 | ) 24 | } 25 | \arguments{ 26 | \item{M}{is a bibliographic dataframe.} 27 | 28 | \item{field}{is the textual attribute used to build up the thematic map. It can be \code{field = c("ID","DE","KW_Merged","TI", "AB")}. 29 | \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}.} 30 | 31 | \item{n}{is an integer. It indicates the number of terms to include in the analysis.} 32 | 33 | \item{minfreq}{is a integer. It indicates the minimum frequency (per thousand) of a cluster. It is a number in the range (0,1000).} 34 | 35 | \item{ngrams}{is an integer between 1 and 4. It indicates the type of n-gram to extract from texts. 36 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.} 37 | 38 | \item{stemming}{is logical. If it is TRUE the word (from titles or abstracts) will be stemmed (using the Porter's algorithm).} 39 | 40 | \item{size}{is numerical. It indicates del size of the cluster circles and is a number in the range (0.01,1).} 41 | 42 | \item{n.labels}{is integer. It indicates how many labels associate to each cluster. Default is \code{n.labels = 1}.} 43 | 44 | \item{community.repulsion}{is a real. It indicates the repulsion force among network communities. It is a real number between 0 and 1. Default is \code{community.repulsion = 0.5}.} 45 | 46 | \item{repel}{is logical. If it is TRUE ggplot uses geom_label_repel instead of geom_label.} 47 | 48 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 49 | 50 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 51 | 52 | \item{cluster}{is a character. It indicates the type of cluster to perform among ("optimal", "louvain","leiden", "infomap","edge_betweenness","walktrap", "spinglass", "leading_eigen", "fast_greedy").} 53 | 54 | \item{subgraphs}{is a logical. If TRUE cluster subgraphs are returned.} 55 | 56 | \item{seed}{is an integer. It indicates the seed for random number generation. Default is \code{seed = 1234}.} 57 | } 58 | \value{ 59 | a list containing: 60 | \tabular{lll}{ 61 | \code{map}\tab \tab The thematic map as ggplot2 object\cr 62 | \code{clusters}\tab \tab Centrality and Density values for each cluster. \cr 63 | \code{words}\tab \tab A list of words following in each cluster\cr 64 | \code{nclust}\tab \tab The number of clusters\cr 65 | \code{net}\tab \tab A list containing the network output (as provided from the networkPlot function)} 66 | } 67 | \description{ 68 | It creates a thematic map based on co-word network analysis and clustering. 69 | The methodology is inspired by the proposal of Cobo et al. (2011). 70 | } 71 | \details{ 72 | \code{thematicMap} starts from a co-occurrence keyword network to plot in a 73 | two-dimensional map the typological themes of a domain.\cr\cr 74 | 75 | Reference:\cr 76 | Cobo, M. J., Lopez-Herrera, A. G., Herrera-Viedma, E., & Herrera, F. (2011). An approach for detecting, quantifying, 77 | and visualizing the evolution of a research field: A practical application to the fuzzy sets theory field. Journal of Informetrics, 5(1), 146-166.\cr 78 | } 79 | \examples{ 80 | 81 | \dontrun{ 82 | data(management, package = "bibliometrixData") 83 | res <- thematicMap(management, field = "ID", n = 250, minfreq = 5, size = 0.5, repel = TRUE) 84 | plot(res$map) 85 | plot(res$net$graph) 86 | } 87 | 88 | } 89 | \seealso{ 90 | \code{\link{biblioNetwork}} function to compute a bibliographic network. 91 | 92 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network. 93 | 94 | \code{\link{networkPlot}} to plot a bibliographic network. 95 | } 96 | -------------------------------------------------------------------------------- /man/termExtraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/termExtraction.R 3 | \name{termExtraction} 4 | \alias{termExtraction} 5 | \title{Term extraction tool from textual fields of a manuscript} 6 | \usage{ 7 | termExtraction( 8 | M, 9 | Field = "TI", 10 | ngrams = 1, 11 | stemming = FALSE, 12 | language = "english", 13 | remove.numbers = TRUE, 14 | remove.terms = NULL, 15 | keep.terms = NULL, 16 | synonyms = NULL, 17 | verbose = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}. 22 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.} 23 | 24 | \item{Field}{is a character object. It indicates the field tag of textual data : 25 | \tabular{lll}{ 26 | \code{"TI"}\tab \tab Manuscript title\cr 27 | \code{"AB"}\tab \tab Manuscript abstract\cr 28 | \code{"ID"}\tab \tab Manuscript keywords plus\cr 29 | \code{"DE"}\tab \tab Manuscript author's keywords} 30 | The default is \code{Field = "TI"}.} 31 | 32 | \item{ngrams}{is an integer between 1 and 3. It indicates the type of n-gram to extract from texts. 33 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.} 34 | 35 | \item{stemming}{is logical. If TRUE the Porter Stemming algorithm is applied to all extracted terms. The default is \code{stemming = FALSE}.} 36 | 37 | \item{language}{is a character. It is the language of textual contents ("english", "german","italian","french","spanish"). The default is \code{language="english"}.} 38 | 39 | \item{remove.numbers}{is logical. If TRUE all numbers are deleted from the documents before term extraction. The default is \code{remove.numbers = TRUE}.} 40 | 41 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the corpus after term extraction. The default is \code{remove.terms = NULL}.} 42 | 43 | \item{keep.terms}{is a character vector. It contains a list of compound words "formed by two or more terms" to keep in their original form in the term extraction process. The default is \code{keep.terms = NULL}.} 44 | 45 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 46 | 47 | \item{verbose}{is logical. If TRUE the function prints the most frequent terms extracted from documents. The default is \code{verbose=TRUE}.} 48 | } 49 | \value{ 50 | the bibliometric data frame with a new column containing terms about the field tag indicated in the argument \code{Field}. 51 | } 52 | \description{ 53 | It extracts terms from a text field (abstract, title, author's keywords, etc.) of a bibliographic data frame. 54 | } 55 | \examples{ 56 | # Example 1: Term extraction from titles 57 | 58 | data(scientometrics, package = "bibliometrixData") 59 | 60 | # vector of compound words 61 | keep.terms <- c("co-citation analysis", "bibliographic coupling") 62 | 63 | # term extraction 64 | scientometrics <- termExtraction(scientometrics, 65 | Field = "TI", ngrams = 1, 66 | remove.numbers = TRUE, remove.terms = NULL, keep.terms = keep.terms, verbose = TRUE 67 | ) 68 | 69 | # terms extracted from the first 10 titles 70 | scientometrics$TI_TM[1:10] 71 | 72 | 73 | # Example 2: Term extraction from abstracts 74 | 75 | data(scientometrics) 76 | 77 | # term extraction 78 | scientometrics <- termExtraction(scientometrics, 79 | Field = "AB", ngrams = 2, 80 | stemming = TRUE, language = "english", 81 | remove.numbers = TRUE, remove.terms = NULL, keep.terms = NULL, verbose = TRUE 82 | ) 83 | 84 | # terms extracted from the first abstract 85 | scientometrics$AB_TM[1] 86 | 87 | # Example 3: Term extraction from keywords with synonyms 88 | 89 | data(scientometrics) 90 | 91 | # vector of synonyms 92 | synonyms <- c("citation; citation analysis", "h-index; index; impact factor") 93 | 94 | # term extraction 95 | scientometrics <- termExtraction(scientometrics, 96 | Field = "ID", ngrams = 1, 97 | synonyms = synonyms, verbose = TRUE 98 | ) 99 | 100 | } 101 | \seealso{ 102 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame. 103 | 104 | \code{\link{biblioAnalysis}} function for bibliometric analysis 105 | } 106 | -------------------------------------------------------------------------------- /man/thematicEvolution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thematicEvolution.R 3 | \name{thematicEvolution} 4 | \alias{thematicEvolution} 5 | \title{Perform a Thematic Evolution Analysis} 6 | \usage{ 7 | thematicEvolution( 8 | M, 9 | field = "ID", 10 | years, 11 | n = 250, 12 | minFreq = 2, 13 | size = 0.5, 14 | ngrams = 1, 15 | stemming = FALSE, 16 | n.labels = 1, 17 | repel = TRUE, 18 | remove.terms = NULL, 19 | synonyms = NULL, 20 | cluster = "louvain", 21 | seed = 1234, 22 | assign.evolution.colors = list(assign = TRUE, measure = "weighted") 23 | ) 24 | } 25 | \arguments{ 26 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.} 27 | 28 | \item{field}{is a character object. It indicates the content field to use. Field can be one of c=("ID","DE","KW_Merged","TI","AB"). Default value is \code{field="ID"}.} 29 | 30 | \item{years}{is a numeric vector of one or more unique cut points.} 31 | 32 | \item{n}{is numerical. It indicates the number of words to use in the network analysis} 33 | 34 | \item{minFreq}{is numerical. It indicates the min frequency of words included in to a cluster.} 35 | 36 | \item{size}{is numerical. It indicates del size of the cluster circles and is a number in the range (0.01,1).} 37 | 38 | \item{ngrams}{is an integer between 1 and 4. It indicates the type of n-gram to extract from texts. 39 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.} 40 | 41 | \item{stemming}{is logical. If it is TRUE the word (from titles or abstracts) will be stemmed (using the Porter's algorithm).} 42 | 43 | \item{n.labels}{is integer. It indicates how many labels associate to each cluster. Default is \code{n.labels = 1}.} 44 | 45 | \item{repel}{is logical. If it is TRUE ggplot uses geom_label_repel instead of geom_label.} 46 | 47 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 48 | 49 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 50 | 51 | \item{cluster}{is a character. It indicates the type of cluster to perform among ("optimal", "louvain","leiden", "infomap","edge_betweenness","walktrap", "spinglass", "leading_eigen", "fast_greedy").} 52 | 53 | \item{seed}{is numerical. It indicates the seed for random number generator to obtain always the same results. Default value is \code{seed = 1234}.} 54 | 55 | \item{assign.evolution.colors}{is a list. If \code{assignEvolutionColors = list(assign = TRUE)}, colors are assigned to lineages based on the highest weighted inclusion value. If a list is provided, it must contain the arguments \code{assignEvolutionColors = list(assign = c(TRUE, FALSE), measure=("inclusion","stability", "weighted"))}. 56 | Default is \code{assign.evolution.colors = list(assign=TRUE, measure="weighted")}. If assign = FALSE, measure argument is ignored.} 57 | } 58 | \value{ 59 | a list containing: 60 | \tabular{lll}{ 61 | \code{nets}\tab \tab The thematic nexus graph for each comparison\cr 62 | \code{incMatrix}\tab \tab Some useful statistics about the thematic nexus} 63 | } 64 | \description{ 65 | It performs a Thematic Evolution Analysis based on co-word network analysis and clustering. 66 | The methodology is inspired by the proposal of Cobo et al. (2011). 67 | } 68 | \details{ 69 | \code{\link{thematicEvolution}} starts from two or more thematic maps created by \code{\link{thematicMap}} function.\cr\cr 70 | 71 | Reference:\cr 72 | Cobo, M. J., Lopez-Herrera, A. G., Herrera-Viedma, E., & Herrera, F. (2011). An approach for detecting, quantifying, 73 | and visualizing the evolution of a research field: A practical application to the fuzzy sets theory field. Journal of Informetrics, 5(1), 146-166.\cr 74 | } 75 | \examples{ 76 | \dontrun{ 77 | data(management, package = "bibliometrixData") 78 | years=c(2004,2008,2015) 79 | 80 | nexus <- thematicEvolution(management,field="DE",years=years,n=100,minFreq=2) 81 | } 82 | 83 | } 84 | \seealso{ 85 | \code{\link{thematicMap}} function to create a thematic map based on co-word network analysis and clustering. 86 | 87 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network. 88 | 89 | \code{\link{networkPlot}} to plot a bibliographic network. 90 | } 91 | -------------------------------------------------------------------------------- /man/cocMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cocMatrix.R 3 | \name{cocMatrix} 4 | \alias{cocMatrix} 5 | \title{Bibliographic bipartite network matrices} 6 | \usage{ 7 | cocMatrix( 8 | M, 9 | Field = "AU", 10 | type = "sparse", 11 | n = NULL, 12 | sep = ";", 13 | binary = TRUE, 14 | short = FALSE, 15 | remove.terms = NULL, 16 | synonyms = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{M}{is a data frame obtained by the converting function 21 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to 22 | articles and variables to Field Tag in the original WoS or SCOPUS file.} 23 | 24 | \item{Field}{is a character object. It indicates one of the field tags of the 25 | standard ISI WoS Field Tag codify. Field can be equal to one of these tags: 26 | \tabular{lll}{ \code{AU}\tab \tab Authors\cr \code{SO}\tab \tab 27 | Publication Name (or Source)\cr \code{JI}\tab \tab ISO Source 28 | Abbreviation\cr \code{DE}\tab \tab Author Keywords\cr \code{ID}\tab 29 | \tab Keywords associated by WoS or SCOPUS database \cr 30 | \code{KW_Merged}\tab \tab All Keywords (merged by DE and ID) \cr 31 | \code{CR}\tab \tab Cited References} 32 | 33 | for a complete list of filed tags see: 34 | \href{https://www.bibliometrix.org/documents/Field_Tags_bibliometrix.pdf}{Field Tags used in bibliometrix}\cr\cr} 35 | 36 | \item{type}{indicates the output format of co-occurrences: \tabular{lll}{ 37 | \code{type = "matrix"} \tab \tab produces an object of class 38 | \code{matrix}\cr \code{type = "sparse"} \tab \tab produces an object of 39 | class \code{dgMatrix} of the package \code{Matrix}. "sparse" 40 | argument generates a compact representation of the matrix.}} 41 | 42 | \item{n}{is an integer. It indicates the number of items to select. If \code{N = NULL}, all items are selected.} 43 | 44 | \item{sep}{is the field separator character. This character separates strings in each 45 | column of the data frame. The default is \code{sep = ";"}.} 46 | 47 | \item{binary}{is a logical. If TRUE each cell contains a 0/1. if FALSE each cell contains the frequency.} 48 | 49 | \item{short}{is a logical. If TRUE all items with frequency<2 are deleted to reduce the matrix size.} 50 | 51 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.} 52 | 53 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.} 54 | } 55 | \value{ 56 | a bipartite network matrix with cases corresponding to manuscripts and variables to the 57 | objects extracted from the Tag \code{Field}. 58 | } 59 | \description{ 60 | \code{cocMatrix} computes occurrences between elements of a Tag Field from a bibliographic data frame. Manuscript is the unit of analysis. 61 | } 62 | \details{ 63 | This occurrence matrix represents a bipartite network which can be transformed into a collection of bibliographic 64 | networks such as coupling, co-citation, etc.. 65 | 66 | The function follows the approach proposed by Batagelj & Cerinsek (2013) and Aria & cuccurullo (2017).\cr\cr 67 | 68 | References:\cr 69 | Batagelj, V., & Cerinsek, M. (2013). On bibliographic networks. Scientometrics, 96(3), 845-864.\cr 70 | Aria, M., & Cuccurullo, C. (2017). bibliometrix: An R-tool for comprehensive science mapping analysis. Journal of Informetrics, 11(4), 959-975.\cr 71 | } 72 | \examples{ 73 | # EXAMPLE 1: Articles x Authors occurrence matrix 74 | 75 | data(scientometrics, package = "bibliometrixData") 76 | WA <- cocMatrix(scientometrics, Field = "AU", type = "sparse", sep = ";") 77 | 78 | # EXAMPLE 2: Articles x Cited References occurrence matrix 79 | 80 | # data(scientometrics, package = "bibliometrixData") 81 | 82 | # WCR <- cocMatrix(scientometrics, Field = "CR", type = "sparse", sep = ";") 83 | 84 | # EXAMPLE 3: Articles x Cited First Authors occurrence matrix 85 | 86 | # data(scientometrics, package = "bibliometrixData") 87 | # scientometrics <- metaTagExtraction(scientometrics, Field = "CR_AU", sep = ";") 88 | # WCR <- cocMatrix(scientometrics, Field = "CR_AU", type = "sparse", sep = ";") 89 | 90 | } 91 | \seealso{ 92 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS 93 | Export file in a data frame. 94 | 95 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis. 96 | 97 | \code{\link{biblioNetwork}} to compute a bibliographic network. 98 | } 99 | --------------------------------------------------------------------------------