├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── logo.png
├── R
    ├── sysdata.rda
    ├── toUpper.R
    ├── isibib2df.R
    ├── scopus2df.R
    ├── logo.R
    ├── removeStrangeChar.R
    ├── stopwords.R
    ├── customTheme.R
    ├── ltwa.R
    ├── plot.bibliodendrogram.R
    ├── trimES.R
    ├── trim.R
    ├── trim.leading.R
    ├── bibtag.R
    ├── countries.R
    ├── importFiles.R
    ├── biblioshiny.R
    ├── readFiles.R
    ├── timeslice.R
    ├── cochrane2df.R
    ├── sourceGrowth.R
    ├── net2VOSviewer.R
    ├── splitCommunities.R
    ├── localCitations.R
    ├── keywordAssoc.R
    ├── net2Pajek.R
    ├── pubmed2df.R
    ├── idByAuthor.R
    ├── duplicatedMatching.R
    ├── dominance.R
    ├── tableTag.R
    ├── isi2df.R
    ├── missingData.R
    ├── csvLens2df.R
    ├── bradford.R
    ├── mergeDbSources.R
    ├── csvScopus2df.R
    ├── normalizeSimilarity.R
    └── keywordGrowth.R
├── data
    ├── logo.rda
    ├── ltwa.rda
    ├── bibtag.rda
    ├── countries.rda
    ├── stopwords.rda
    └── customTheme.rda
├── cran-comments.md
├── inst
    ├── biblioshiny
    │   ├── hexagon.png
    │   ├── www
    │   │   ├── logo.jpg
    │   │   ├── logo.png
    │   │   ├── ORCID.jpg
    │   │   ├── logo2.jpg
    │   │   ├── logo3.png
    │   │   ├── logoAI.jpg
    │   │   ├── ai_small2.gif
    │   │   ├── openalex.jpg
    │   │   ├── tall_logo.jpg
    │   │   ├── workflow.jpg
    │   │   └── table_DBformats.jpg
    │   └── libraries.R
    └── CITATION
├── man
    ├── figures
    │   ├── README-Co-Word Analysis-1.png
    │   ├── README-Co-Word Analysis-2.png
    │   ├── README-Co-Word Analysis-3.png
    │   ├── README-Co-Word Analysis-4.png
    │   ├── README-unnamed-chunk-11-1.png
    │   ├── README-unnamed-chunk-11-2.png
    │   ├── README-unnamed-chunk-12-1.png
    │   ├── README-unnamed-chunk-13-1.png
    │   ├── README-unnamed-chunk-14-1.png
    │   ├── README-unnamed-chunk-15-1.png
    │   ├── README-unnamed-chunk-16-1.png
    │   ├── README-unnamed-chunk-17-1.png
    │   ├── README-unnamed-chunk-9-1.png
    │   ├── README-Co-citation network-1.png
    │   ├── README-Country collaboration-1.png
    │   ├── README-Keyword c-occurrences-1.png
    │   ├── README-plot generic function-1.png
    │   ├── README-plot generic function-2.png
    │   ├── README-plot generic function-3.png
    │   ├── README-plot generic function-4.png
    │   ├── README-plot generic function-5.png
    │   ├── README-Keyword co-occurrences-1.png
    │   └── README-Historical Co-citation network-1.png
    ├── logo.Rd
    ├── remove_diacritics.Rd
    ├── get_iso4_stop_words.Rd
    ├── stopwords.Rd
    ├── customTheme.Rd
    ├── print_author_works_summary.Rd
    ├── ltwa.Rd
    ├── prepare_ltwa_lookup.Rd
    ├── plot.bibliodendrogram.Rd
    ├── trim.Rd
    ├── trimES.Rd
    ├── create_journal_iso4_lookup.Rd
    ├── trim.leading.Rd
    ├── bibtag.Rd
    ├── abbreviate_term.Rd
    ├── countries.Rd
    ├── normalize_journal_to_iso4.Rd
    ├── abbreviate_journal_title.Rd
    ├── lifeCycle.Rd
    ├── plot.bibliometrix.Rd
    ├── summary.bibliometrix_netstat.Rd
    ├── readFiles.Rd
    ├── threeFieldsPlot.Rd
    ├── sourceGrowth.Rd
    ├── mergeKeywords.Rd
    ├── biblioshiny.Rd
    ├── authorProdOverTime.Rd
    ├── convert_scopus_new_to_classic.Rd
    ├── plotThematicEvolution.Rd
    ├── net2Pajek.Rd
    ├── bradford.Rd
    ├── timeslice.Rd
    ├── net2VOSviewer.Rd
    ├── lotka.Rd
    ├── missingData.Rd
    ├── get_authors_summary.Rd
    ├── mergeDbSources.Rd
    ├── splitCommunities.Rd
    ├── idByAuthor.Rd
    ├── keywordAssoc.Rd
    ├── dominance.Rd
    ├── tableTag.Rd
    ├── findAuthorWorks.Rd
    ├── localCitations.Rd
    ├── citations.Rd
    ├── summary.bibliometrix.Rd
    ├── histNetwork.Rd
    ├── KeywordGrowth.Rd
    ├── normalizeCitationScore.Rd
    ├── networkStat.Rd
    ├── duplicatedMatching.Rd
    ├── histPlot.Rd
    ├── fieldByYear.Rd
    ├── metaTagExtraction.Rd
    ├── rpys.Rd
    ├── Hindex.Rd
    ├── retrievalByAuthorID.Rd
    ├── biblioAnalysis.Rd
    ├── normalizeSimilarity.Rd
    ├── authorBio.Rd
    ├── assignEvolutionColors.Rd
    ├── convert2df.Rd
    ├── thematicMap.Rd
    ├── termExtraction.Rd
    ├── thematicEvolution.Rd
    └── cocMatrix.Rd
├── .gitignore
├── .Rbuildignore
├── bibliometrix.Rproj
├── LICENCE
├── COPYING
├── DESCRIPTION
└── NEWS.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/logo.png


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/R/sysdata.rda


--------------------------------------------------------------------------------
/data/logo.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/logo.rda


--------------------------------------------------------------------------------
/data/ltwa.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/ltwa.rda


--------------------------------------------------------------------------------
/data/bibtag.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/bibtag.rda


--------------------------------------------------------------------------------
/R/toUpper.R:
--------------------------------------------------------------------------------
1 | toUpper <- function(D) {
2 |   stringi::stri_trans_toupper(D, locale = "en")
3 | }
4 | 


--------------------------------------------------------------------------------
/data/countries.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/countries.rda


--------------------------------------------------------------------------------
/data/stopwords.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/stopwords.rda


--------------------------------------------------------------------------------
/data/customTheme.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/data/customTheme.rda


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 | 
3 | 0 errors | 0 warnings | 1 note
4 | 
5 | * This is a new release.
6 | 


--------------------------------------------------------------------------------
/inst/biblioshiny/hexagon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/hexagon.png


--------------------------------------------------------------------------------
/inst/biblioshiny/www/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo.png


--------------------------------------------------------------------------------
/inst/biblioshiny/www/ORCID.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/ORCID.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/logo2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo2.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/logo3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logo3.png


--------------------------------------------------------------------------------
/inst/biblioshiny/www/logoAI.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/logoAI.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/ai_small2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/ai_small2.gif


--------------------------------------------------------------------------------
/inst/biblioshiny/www/openalex.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/openalex.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/tall_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/tall_logo.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/workflow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/workflow.jpg


--------------------------------------------------------------------------------
/inst/biblioshiny/www/table_DBformats.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/inst/biblioshiny/www/table_DBformats.jpg


--------------------------------------------------------------------------------
/man/figures/README-Co-Word Analysis-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-1.png


--------------------------------------------------------------------------------
/man/figures/README-Co-Word Analysis-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-2.png


--------------------------------------------------------------------------------
/man/figures/README-Co-Word Analysis-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-3.png


--------------------------------------------------------------------------------
/man/figures/README-Co-Word Analysis-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-Word Analysis-4.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-11-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-11-2.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/man/figures/README-Co-citation network-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Co-citation network-1.png


--------------------------------------------------------------------------------
/man/figures/README-Country collaboration-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Country collaboration-1.png


--------------------------------------------------------------------------------
/man/figures/README-Keyword c-occurrences-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Keyword c-occurrences-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot generic function-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-1.png


--------------------------------------------------------------------------------
/man/figures/README-plot generic function-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-2.png


--------------------------------------------------------------------------------
/man/figures/README-plot generic function-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-3.png


--------------------------------------------------------------------------------
/man/figures/README-plot generic function-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-4.png


--------------------------------------------------------------------------------
/man/figures/README-plot generic function-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-plot generic function-5.png


--------------------------------------------------------------------------------
/man/figures/README-Keyword co-occurrences-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Keyword co-occurrences-1.png


--------------------------------------------------------------------------------
/man/figures/README-Historical Co-citation network-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/massimoaria/bibliometrix/HEAD/man/figures/README-Historical Co-citation network-1.png


--------------------------------------------------------------------------------
/R/isibib2df.R:
--------------------------------------------------------------------------------
1 | isibib2df <- function(D) {
2 |   # this is a legacy function (for old scripts)
3 |   DATA <- bib2df(D, dbsource = "isi")
4 | 
5 |   return(DATA)
6 | }
7 | 


--------------------------------------------------------------------------------
/R/scopus2df.R:
--------------------------------------------------------------------------------
1 | scopus2df <- function(D) {
2 |   # this is a legacy function (for old scripts)
3 |   DATA <- bib2df(D, dbsource = "scopus")
4 | 
5 |   return(DATA)
6 | }
7 | 


--------------------------------------------------------------------------------
/R/logo.R:
--------------------------------------------------------------------------------
1 | #' Bibliometrix logo.
2 | #'
3 | #' The matrix contains the rgb format of the bibliometrix official logo.\cr
4 | #'
5 | #' @format A matrix with 927 rows and 800 columns.
6 | #' 
7 | #' @name logo
8 | NULL


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Renviron
 5 | *.csv
 6 | *.xlsx
 7 | .txt
 8 | .bib
 9 | .Ruserdata
10 | .DS_Store
11 | VOSviewer.jar
12 | network.net
13 | Rubbish
14 | desktop.ini
15 | vignette.txt
16 | inst/doc
17 | inst/biblioshiny/__MACOSX
18 | inst/biblioshiny/rsconnect
19 | _gh-pages
20 | 
21 | 
22 | /.quarto/
23 | 


--------------------------------------------------------------------------------
/man/logo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logo.R
 3 | \name{logo}
 4 | \alias{logo}
 5 | \title{Bibliometrix logo.}
 6 | \format{
 7 | A matrix with 927 rows and 800 columns.
 8 | }
 9 | \description{
10 | The matrix contains the rgb format of the bibliometrix official logo.\cr
11 | }
12 | 


--------------------------------------------------------------------------------
/R/removeStrangeChar.R:
--------------------------------------------------------------------------------
 1 | removeStrangeChar <- function(D) {
 2 |   ind <- numeric(length(D))
 3 |   for (i in 1:length(D)) {
 4 |     # print(i)
 5 |     # ind[i] <- nchar(D[i])
 6 | 
 7 |     res <- try(ind[i] <- nchar(D[i]), silent = TRUE)
 8 |     if (inherits(res, "try-error")) {
 9 |       ind[i] <- 0
10 |       next
11 |     }
12 |   }
13 |   D <- D[ind > 1]
14 | }
15 | 


--------------------------------------------------------------------------------
/R/stopwords.R:
--------------------------------------------------------------------------------
 1 | #' List of English stopwords.
 2 | #'
 3 | #' A character vector containing a complete list of English stopwords\cr
 4 | #' Data are used by  \code{\link{biblioAnalysis}} function
 5 | #' to extract Country Field of Cited References and Authors.
 6 | #'
 7 | #' @format A character vector with 665 rows.\cr
 8 | #'
 9 | #' @name stopwords
10 | NULL
11 | 


--------------------------------------------------------------------------------
/R/customTheme.R:
--------------------------------------------------------------------------------
 1 | #' Custom Theme variables for Biblioshiny.
 2 | #'
 3 | #' List containing a set of custom theme variables for Biblioshiny.
 4 | #'
 5 | #' @format A list with 3 elements:
 6 | #'  \describe{
 7 | #'     \item{name}{object name}
 8 | #'     \item{attribs}{attributes}
 9 | #'     \item{children}{CSS style}
10 | #'     }
11 | #'
12 | #' @name customTheme
13 | NULL
14 | 


--------------------------------------------------------------------------------
/man/remove_diacritics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{remove_diacritics}
 4 | \alias{remove_diacritics}
 5 | \title{Remove diacritics from string with robust fallback}
 6 | \usage{
 7 | remove_diacritics(x)
 8 | }
 9 | \description{
10 | Remove diacritics from string with robust fallback
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.github$
 4 | ^.*\.txt$
 5 | ^.*\.bib$
 6 | ^.*\.ini$
 7 | ^.*\.md$
 8 | ^.*\.RData$
 9 | ^.*\.jar
10 | ^.*\.net
11 | ^.*\.csv
12 | ^.*\.xlsx
13 | ^.*\.xls
14 | ^.*\.xml
15 | ^.*\.png
16 | LICENCE.*$
17 | Rubbish
18 | ^_gh-pages$
19 | ^README\.Rmd$
20 | ^_pkgdown\.yml$
21 | ^pkgdown$
22 | ^inst/biblioshiny/rsconnect
23 | ^rsconnect
24 | 
25 | ^CRAN-SUBMISSION$
26 | ^cran-comments\.md$
27 | 


--------------------------------------------------------------------------------
/R/ltwa.R:
--------------------------------------------------------------------------------
 1 | #' Index of ltwa.
 2 | #'
 3 | #' Data frame containing a normalized index of words used in journal names and their ISO4 abbreviations.
 4 | #'
 5 | #' @format A data frame with 56463 rows and 3 variables:
 6 | #'  \describe{
 7 | #'     \item{WORD}{word from journal names}
 8 | #'     \item{ABBREVIATION}{ISO4 abbreviation}
 9 | #'     \item{LANGUAGES}{Language of the journal name}
10 | #'     }
11 | #'
12 | #' @name ltwa
13 | NULL
14 | 


--------------------------------------------------------------------------------
/man/get_iso4_stop_words.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{get_iso4_stop_words}
 4 | \alias{get_iso4_stop_words}
 5 | \title{Articles, prepositions, and conjunctions to be removed (ISO 4 standard)}
 6 | \usage{
 7 | get_iso4_stop_words()
 8 | }
 9 | \description{
10 | Articles, prepositions, and conjunctions to be removed (ISO 4 standard)
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/stopwords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/stopwords.R
 3 | \name{stopwords}
 4 | \alias{stopwords}
 5 | \title{List of English stopwords.}
 6 | \format{
 7 | A character vector with 665 rows.\cr
 8 | }
 9 | \description{
10 | A character vector containing a complete list of English stopwords\cr
11 | Data are used by  \code{\link{biblioAnalysis}} function
12 | to extract Country Field of Cited References and Authors.
13 | }
14 | 


--------------------------------------------------------------------------------
/man/customTheme.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/customTheme.R
 3 | \name{customTheme}
 4 | \alias{customTheme}
 5 | \title{Custom Theme variables for Biblioshiny.}
 6 | \format{
 7 | A list with 3 elements:
 8 |  \describe{
 9 |     \item{name}{object name}
10 |     \item{attribs}{attributes}
11 |     \item{children}{CSS style}
12 |     }
13 | }
14 | \description{
15 | List containing a set of custom theme variables for Biblioshiny.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/print_author_works_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/findAuthorWorks.r
 3 | \name{print_author_works_summary}
 4 | \alias{print_author_works_summary}
 5 | \title{Print Summary of Author Works Search}
 6 | \usage{
 7 | print_author_works_summary(works_df)
 8 | }
 9 | \arguments{
10 | \item{works_df}{Data.frame. Result from find_author_works()}
11 | }
12 | \description{
13 | Prints a summary of the search results from find_author_works()
14 | }
15 | 


--------------------------------------------------------------------------------
/R/plot.bibliodendrogram.R:
--------------------------------------------------------------------------------
 1 | #' Plotting dendrogram resulting from Conceptual Structure Analysis
 2 | #'
 3 | #' \code{plot} method for class '\code{bibliodendrogram}'
 4 | #' @param x is the object for which plots are desired.
 5 | #' @param ... is a generic param for plot functions.
 6 | #' @return The function \code{plot} draws a dendrogram.
 7 | #'
 8 | #'
 9 | #' @method plot bibliodendrogram
10 | #' @export
11 | 
12 | 
13 | plot.bibliodendrogram <- function(x, ...) {
14 |   plot(x$dend)
15 |   abline(h = x$line, lty = 2)
16 | }
17 | 


--------------------------------------------------------------------------------
/man/ltwa.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ltwa.R
 3 | \name{ltwa}
 4 | \alias{ltwa}
 5 | \title{Index of ltwa.}
 6 | \format{
 7 | A data frame with 56463 rows and 3 variables:
 8 |  \describe{
 9 |     \item{WORD}{word from journal names}
10 |     \item{ABBREVIATION}{ISO4 abbreviation}
11 |     \item{LANGUAGES}{Language of the journal name}
12 |     }
13 | }
14 | \description{
15 | Data frame containing a normalized index of words used in journal names and their ISO4 abbreviations.
16 | }
17 | 


--------------------------------------------------------------------------------
/R/trimES.R:
--------------------------------------------------------------------------------
 1 | #' Deleting extra white spaces
 2 | #'
 3 | #' Deleting extra white spaces from a \code{character} object.
 4 | #'
 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package.
 6 | #'
 7 | #' @param x is a \code{character} object.
 8 | 
 9 | #' @return an object of class \code{character}
10 | #' @examples
11 | #'
12 | #' char <- c("Alfred  BJ", "Mary    Beth", "John      John")
13 | #' char
14 | #' trimES(char)
15 | #'
16 | #' @export
17 | trimES <- function(x) {
18 |   gsub("\\s+", " ", x)
19 | }
20 | 


--------------------------------------------------------------------------------
/man/prepare_ltwa_lookup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{prepare_ltwa_lookup}
 4 | \alias{prepare_ltwa_lookup}
 5 | \title{Prepare LTWA database for efficient lookup}
 6 | \usage{
 7 | prepare_ltwa_lookup(ltwa_db)
 8 | }
 9 | \arguments{
10 | \item{ltwa_db}{LTWA database data frame}
11 | }
12 | \value{
13 | List with singles, prefix, and phrase lookup tables
14 | }
15 | \description{
16 | Pre-processes LTWA database into optimized lookup tables
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/R/trim.R:
--------------------------------------------------------------------------------
 1 | #' Deleting leading and ending white spaces
 2 | #'
 3 | #' Deleting leading and ending white spaces from a \code{character} object.
 4 | #'
 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package.
 6 | #'
 7 | #' @param x is a \code{character} object.
 8 | 
 9 | #' @return an object of class \code{character}
10 | #' @examples
11 | #'
12 | #' char <- c("  Alfred", "Mary", " John")
13 | #' char
14 | #' trim(char)
15 | #'
16 | #' @export
17 | trim <- function(x) {
18 |   gsub("(^[[:space:]]+|[[:space:]]+$)", "", x)
19 | }
20 | 


--------------------------------------------------------------------------------
/R/trim.leading.R:
--------------------------------------------------------------------------------
 1 | #' Deleting leading white spaces
 2 | #'
 3 | #' Deleting leading white spaces from a \code{character} object.
 4 | #'
 5 | #' \code{tableTag} is an internal routine of \code{bibliometrics} package.
 6 | #'
 7 | #' @param x is a \code{character} object.
 8 | 
 9 | #' @return an object of class \code{character}
10 | #' @examples
11 | #'
12 | #' char <- c("  Alfred", "Mary", " John")
13 | #' char
14 | #' trim.leading(char)
15 | #'
16 | #' @export
17 | trim.leading <- function(x) {
18 |   sub("^\\s+", "", x) ## function to delete leading spaces in a string
19 | }
20 | 


--------------------------------------------------------------------------------
/R/bibtag.R:
--------------------------------------------------------------------------------
 1 | #' Tag list and bibtex fields.
 2 | #'
 3 | #' Data frame containing a list of tags and corresponding: WoS, SCOPUS and generic bibtex fields; and Dimensions.ai csv and xlsx fields.
 4 | #'
 5 | #' @format A data frame with 44 rows and 6 variables:
 6 | #'  \describe{
 7 | #'     \item{TAG}{Tag Fields}
 8 | #'     \item{SCOPUS}{Scopus bibtex fields}
 9 | #'     \item{ISI}{WOS/ISI bibtex fields}
10 | #'     \item{GENERIC}{Generic bibtex fields}
11 | #'     \item{DIMENSIONS_OLD}{DIMENSIONS cvs/xlsx old fields}
12 | #'     \item{DIMENSIONS}{DIMENSIONS cvs/xlsx fields}
13 | #'     }
14 | #'
15 | #' @name bibtag
16 | NULL
17 | 


--------------------------------------------------------------------------------
/R/countries.R:
--------------------------------------------------------------------------------
 1 | #' Index of Countries.
 2 | #'
 3 | #' Data frame containing a normalized index of countries.\cr
 4 | #' Data are used by \code{\link{biblioAnalysis}} function
 5 | #' to extract Country Field of Cited References and Authors.
 6 | #'
 7 | #' @format A data frame with 199 rows and 5 variables:
 8 | #'  \describe{
 9 | #'     \item{countries}{country names}
10 | #'     \item{continent}{continent names}
11 | #'     \item{iso2}{country ISO 3166-1 alpha-2 code}
12 | #'     \item{Longitude}{country centroid longitude}
13 | #'     \item{Latitude}{country centroid latitude}
14 | #'     }
15 | #'
16 | #' @name countries
17 | NULL
18 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite bibliometrix in publications, please use:")
 2 | 
 3 | bibentry(bibtype="Article",
 4 |          title = "bibliometrix: An R-tool for comprehensive science mapping analysis",
 5 |          author = "Massimo Aria and Corrado Cuccurullo",
 6 |          journal = "Journal of Informetrics",
 7 |          year = "2017",
 8 |          doi = "10.1016/j.joi.2017.08.007",
 9 |          textVersion =
10 |            paste("Aria, M., & Cuccurullo, C. (2017),",
11 |            "bibliometrix: An R-tool for comprehensive science mapping analysis,", 
12 |            "Journal of informetrics, 11(4), 959-975, Elsevier."))
13 | 


--------------------------------------------------------------------------------
/man/plot.bibliodendrogram.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.bibliodendrogram.R
 3 | \name{plot.bibliodendrogram}
 4 | \alias{plot.bibliodendrogram}
 5 | \title{Plotting dendrogram resulting from Conceptual Structure Analysis}
 6 | \usage{
 7 | \method{plot}{bibliodendrogram}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{is the object for which plots are desired.}
11 | 
12 | \item{...}{is a generic param for plot functions.}
13 | }
14 | \value{
15 | The function \code{plot} draws a dendrogram.
16 | }
17 | \description{
18 | \code{plot} method for class '\code{bibliodendrogram}'
19 | }
20 | 


--------------------------------------------------------------------------------
/man/trim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/trim.R
 3 | \name{trim}
 4 | \alias{trim}
 5 | \title{Deleting leading and ending white spaces}
 6 | \usage{
 7 | trim(x)
 8 | }
 9 | \arguments{
10 | \item{x}{is a \code{character} object.}
11 | }
12 | \value{
13 | an object of class \code{character}
14 | }
15 | \description{
16 | Deleting leading and ending white spaces from a \code{character} object.
17 | }
18 | \details{
19 | \code{tableTag} is an internal routine of \code{bibliometrics} package.
20 | }
21 | \examples{
22 | 
23 | char <- c("  Alfred", "Mary", " John")
24 | char
25 | trim(char)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/man/trimES.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/trimES.R
 3 | \name{trimES}
 4 | \alias{trimES}
 5 | \title{Deleting extra white spaces}
 6 | \usage{
 7 | trimES(x)
 8 | }
 9 | \arguments{
10 | \item{x}{is a \code{character} object.}
11 | }
12 | \value{
13 | an object of class \code{character}
14 | }
15 | \description{
16 | Deleting extra white spaces from a \code{character} object.
17 | }
18 | \details{
19 | \code{tableTag} is an internal routine of \code{bibliometrics} package.
20 | }
21 | \examples{
22 | 
23 | char <- c("Alfred  BJ", "Mary    Beth", "John      John")
24 | char
25 | trimES(char)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/bibliometrix.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 256c0fcb-6c1f-4220-80e8-8472feb3cda7
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | BuildType: Package
17 | PackageUseDevtools: Yes
18 | PackageInstallArgs: --no-multiarch --with-keep.source --resave-data
19 | PackageBuildArgs: --resave-data
20 | PackageBuildBinaryArgs: --resave-data
21 | PackageCheckArgs: --as-cran  --timings --no-stop-on-test-error --no-clean
22 | PackageRoxygenize: rd,collate,namespace,vignette
23 | 


--------------------------------------------------------------------------------
/man/create_journal_iso4_lookup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{create_journal_iso4_lookup}
 4 | \alias{create_journal_iso4_lookup}
 5 | \title{Create ISO4 journal normalization lookup table}
 6 | \usage{
 7 | create_journal_iso4_lookup(journal_vector, ltwa_db)
 8 | }
 9 | \arguments{
10 | \item{journal_vector}{Character vector of journal names}
11 | 
12 | \item{ltwa_db}{LTWA database data frame}
13 | }
14 | \value{
15 | Data frame with journal_original and journal_iso4 columns
16 | }
17 | \description{
18 | Create ISO4 journal normalization lookup table
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/trim.leading.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/trim.leading.R
 3 | \name{trim.leading}
 4 | \alias{trim.leading}
 5 | \title{Deleting leading white spaces}
 6 | \usage{
 7 | trim.leading(x)
 8 | }
 9 | \arguments{
10 | \item{x}{is a \code{character} object.}
11 | }
12 | \value{
13 | an object of class \code{character}
14 | }
15 | \description{
16 | Deleting leading white spaces from a \code{character} object.
17 | }
18 | \details{
19 | \code{tableTag} is an internal routine of \code{bibliometrics} package.
20 | }
21 | \examples{
22 | 
23 | char <- c("  Alfred", "Mary", " John")
24 | char
25 | trim.leading(char)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/man/bibtag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bibtag.R
 3 | \name{bibtag}
 4 | \alias{bibtag}
 5 | \title{Tag list and bibtex fields.}
 6 | \format{
 7 | A data frame with 44 rows and 6 variables:
 8 |  \describe{
 9 |     \item{TAG}{Tag Fields}
10 |     \item{SCOPUS}{Scopus bibtex fields}
11 |     \item{ISI}{WOS/ISI bibtex fields}
12 |     \item{GENERIC}{Generic bibtex fields}
13 |     \item{DIMENSIONS_OLD}{DIMENSIONS cvs/xlsx old fields}
14 |     \item{DIMENSIONS}{DIMENSIONS cvs/xlsx fields}
15 |     }
16 | }
17 | \description{
18 | Data frame containing a list of tags and corresponding: WoS, SCOPUS and generic bibtex fields; and Dimensions.ai csv and xlsx fields.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/abbreviate_term.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{abbreviate_term}
 4 | \alias{abbreviate_term}
 5 | \title{Abbreviate a single term using LTWA}
 6 | \usage{
 7 | abbreviate_term(word, ltwa_lookup, common_abbr, check = TRUE)
 8 | }
 9 | \arguments{
10 | \item{word}{Single word to abbreviate}
11 | 
12 | \item{ltwa_lookup}{Pre-processed LTWA lookup tables}
13 | 
14 | \item{common_abbr}{Named vector of common abbreviations}
15 | 
16 | \item{check}{Logical, whether to check for abbreviation}
17 | }
18 | \value{
19 | Abbreviated form of word
20 | }
21 | \description{
22 | Abbreviate a single term using LTWA
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/countries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/countries.R
 3 | \name{countries}
 4 | \alias{countries}
 5 | \title{Index of Countries.}
 6 | \format{
 7 | A data frame with 199 rows and 5 variables:
 8 |  \describe{
 9 |     \item{countries}{country names}
10 |     \item{continent}{continent names}
11 |     \item{iso2}{country ISO 3166-1 alpha-2 code}
12 |     \item{Longitude}{country centroid longitude}
13 |     \item{Latitude}{country centroid latitude}
14 |     }
15 | }
16 | \description{
17 | Data frame containing a normalized index of countries.\cr
18 | Data are used by \code{\link{biblioAnalysis}} function
19 | to extract Country Field of Cited References and Authors.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/normalize_journal_to_iso4.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{normalize_journal_to_iso4}
 4 | \alias{normalize_journal_to_iso4}
 5 | \title{Normalize journal names to ISO4 abbreviated form}
 6 | \usage{
 7 | normalize_journal_to_iso4(journal_name, ltwa_db)
 8 | }
 9 | \arguments{
10 | \item{journal_name}{Character string with journal name}
11 | 
12 | \item{ltwa_db}{Data frame with LTWA database}
13 | }
14 | \value{
15 | Normalized journal name in ISO4 abbreviated form
16 | }
17 | \description{
18 | Converts all journal names to their ISO4 abbreviated form using LTWA.
19 | Only uses English language entries from LTWA to avoid foreign word matches.
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/R/importFiles.R:
--------------------------------------------------------------------------------
 1 | importFiles <- function(...) {
 2 |   arguments <- unlist(list(...))
 3 |   k <- length(arguments)
 4 |   D <- list()
 5 |   # enc="UTF-8"
 6 |   # origEnc=getOption("encoding")
 7 |   # if (origEnc=="UTF-8"){options(encoding = "native.enc")}
 8 |   for (i in 1:k) {
 9 |     D[[i]] <- read_lines(
10 |       arguments[i],
11 |       skip = 0,
12 |       n_max = -1L,
13 |       locale = default_locale(),
14 |       progress = show_progress()
15 |     )
16 | 
17 | 
18 |     # D[[i]]=suppressWarnings(
19 |     #   iconv(readLines(arguments[i],encoding = "UTF-8"),"latin1", "ASCII", sub="")
20 |     #   #conv(readLines(arguments[[i]]))
21 |     # )
22 |   }
23 |   D <- unlist(D)
24 |   # options(encoding = origEnc)
25 |   # Encoding(D) <- "UTF-8"
26 |   return(D)
27 | }
28 | 


--------------------------------------------------------------------------------
/man/abbreviate_journal_title.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{abbreviate_journal_title}
 4 | \alias{abbreviate_journal_title}
 5 | \title{Abbreviate journal title to ISO 4 standard}
 6 | \usage{
 7 | abbreviate_journal_title(title, ltwa_lookup)
 8 | }
 9 | \arguments{
10 | \item{title}{Journal title string}
11 | 
12 | \item{ltwa_lookup}{Pre-processed LTWA lookup tables (from prepare_ltwa_lookup)}
13 | }
14 | \value{
15 | Abbreviated journal title in ISO 4 format (without periods)
16 | }
17 | \description{
18 | Converts a full journal title to its ISO 4 abbreviated form using LTWA.
19 | Removes articles, prepositions, and conjunctions according to ISO 4 rules.
20 | Returns result WITHOUT periods (dots).
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/lifeCycle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lifeCycle.R
 3 | \name{lifeCycle}
 4 | \alias{lifeCycle}
 5 | \title{Life Cycle Analysis with Logistic Growth Model}
 6 | \usage{
 7 | lifeCycle(data, forecast_years = 5, plot = TRUE, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{Data frame with columns: year (PY) and number of publications (n)}
11 | 
12 | \item{forecast_years}{Number of years to forecast beyond saturation}
13 | 
14 | \item{plot}{Logical, if TRUE produces plots}
15 | 
16 | \item{verbose}{Logical, if TRUE prints detailed output}
17 | }
18 | \value{
19 | List containing parameters, forecasts and metrics
20 | }
21 | \description{
22 | Estimates logistic growth model for annual (non-cumulative) publications
23 | following Meyer et al. (1999) methodology
24 | }
25 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 |     bibliometrix Package for R - Tool for Quantitative Research in Bibliometrics and Scientometrics.
 2 |     
 3 |     Copyright (C) 2016  Massimo Aria and Corrado Cuccurullo
 4 | 
 5 |     This program is free software; you can redistribute it and/or modify
 6 |     it under the terms of the GNU General Public License as published by
 7 |     the Free Software Foundation; either version 2 of the License, or
 8 |     (at your option) any later version.
 9 | 
10 |     This program is distributed in the hope that it will be useful,
11 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |     GNU General Public License for more details.
14 | 
15 |     You should have received a copy of the GNU General Public License
16 |     along with this program; if not, write to the Free Software
17 |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 | 
19 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 |     bibliometrix R-Package 
 2 |     
 3 |     A Tool for Quantitative Research in Bibliometrics and Scientometrics.
 4 |     
 5 |     Copyright (C) 2016  Massimo Aria and Corrado Cuccurullo
 6 | 
 7 |     This program is free software; you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation; either version 2 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program; if not, write to the Free Software
19 |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 | 
21 | 


--------------------------------------------------------------------------------
/man/plot.bibliometrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.bibliometrix.R
 3 | \name{plot.bibliometrix}
 4 | \alias{plot.bibliometrix}
 5 | \title{Plotting bibliometric analysis results}
 6 | \usage{
 7 | \method{plot}{bibliometrix}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{is the object for which plots are desired.}
11 | 
12 | \item{...}{can accept two arguments:\cr
13 | \code{k} is an integer, used for plot formatting (number of objects). Default value is 10.\cr
14 | \code{pause} is a logical, used to allow pause in screen scrolling of results. Default value is \code{pause = FALSE}.}
15 | }
16 | \value{
17 | The function \code{plot} returns a list of plots of class \code{ggplot2}.
18 | }
19 | \description{
20 | \code{plot} method for class '\code{bibliometrix}'
21 | }
22 | \examples{
23 | data(scientometrics, package = "bibliometrixData")
24 | 
25 | results <- biblioAnalysis(scientometrics)
26 | 
27 | plot(results, k = 10, pause = FALSE)
28 | 
29 | }
30 | \seealso{
31 | The bibliometric analysis function \code{\link{biblioAnalysis}}.
32 | 
33 | \code{\link{summary}} to compute a list of summary statistics of the object of class \code{bibliometrix}.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/summary.bibliometrix_netstat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.bibliometrix_netstat.R
 3 | \name{summary.bibliometrix_netstat}
 4 | \alias{summary.bibliometrix_netstat}
 5 | \title{Summarizing network analysis results}
 6 | \usage{
 7 | \method{summary}{bibliometrix_netstat}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{is the object for which a summary is desired.}
11 | 
12 | \item{...}{can accept two arguments:\cr
13 | \code{k} integer, used for table formatting (number of rows). Default value is 10.\cr}
14 | }
15 | \value{
16 | The function \code{summary} computes and returns on display several statistics both at network and vertex level.
17 | }
18 | \description{
19 | \code{summary} method for class '\code{bibliometrix_netstat}'
20 | }
21 | \examples{
22 | 
23 | # to run the example, please remove # from the beginning of the following lines
24 | # data(scientometrics, package = "bibliometrixData")
25 | 
26 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "collaboration",
27 | #                   network = "authors", sep = ";")
28 | # netstat <- networkStat(NetMatrix, stat = "all", type = "degree")
29 | # summary(netstat)
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/man/readFiles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readFiles.R
 3 | \name{readFiles}
 4 | \alias{readFiles}
 5 | \title{DEPRECATED: Load a sequence of ISI or SCOPUS Export files into a large character object}
 6 | \usage{
 7 | readFiles(...)
 8 | }
 9 | \arguments{
10 | \item{...}{is a sequence of names of files downloaded from WOS.(in plain text or bibtex format) or SCOPUS Export file (exclusively in bibtex format).}
11 | }
12 | \value{
13 | a character vector of length the number of lines read.
14 | }
15 | \description{
16 | The function readFiled is deprecated. You can import and convert your export files directly using the function \code{\link{convert2df}}.
17 | }
18 | \examples{
19 | # WoS or SCOPUS Export files can be read using \code{\link{readFiles}} function:
20 | 
21 | # largechar <- readFiles('filename1.txt','filename2.txt','filename3.txt')
22 | 
23 | # filename1.txt, filename2.txt and filename3.txt are ISI or SCOPUS Export file
24 | # in plain text or bibtex format.
25 | 
26 | # D <- readFiles('https://www.bibliometrix.org/datasets/bibliometrics_articles.txt')
27 | 
28 | }
29 | \seealso{
30 | \code{\link{convert2df}} for converting SCOPUS of ISI Export file into a dataframe
31 | }
32 | 


--------------------------------------------------------------------------------
/man/threeFieldsPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/threeFieldsPlot.R
 3 | \name{threeFieldsPlot}
 4 | \alias{threeFieldsPlot}
 5 | \title{Three Fields Plot}
 6 | \usage{
 7 | threeFieldsPlot(M, fields = c("DE", "AU", "SO"), n = c(20, 20, 20))
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.}
12 | 
13 | \item{fields}{is a character vector. It indicates the fields to analyze using the standard WoS field tags.
14 | Default is \code{fields = c("AU","DE", "SO")}.}
15 | 
16 | \item{n}{is a integer vector. It indicates how many items to plot, for each of the three fields.
17 | Default is \code{n = c(20, 20, 20)}}
18 | }
19 | \value{
20 | a sankeyPlot
21 | }
22 | \description{
23 | Visualize the main items of three fields (e.g. authors, keywords, journals), and how they are related through a Sankey diagram.
24 | }
25 | \examples{
26 | 
27 | # data(scientometrics, package = "bibliometrixData")
28 | 
29 | # threeFieldsPlot(scientometrics, fields=c("DE","AU","CR"),n=c(20,20,20))
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/man/sourceGrowth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sourceGrowth.R
 3 | \name{sourceGrowth}
 4 | \alias{sourceGrowth}
 5 | \title{Number of documents published annually per Top Sources}
 6 | \usage{
 7 | sourceGrowth(M, top = 5, cdf = TRUE)
 8 | }
 9 | \arguments{
10 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original ISI or SCOPUS file.}
12 | 
13 | \item{top}{is a numeric. It indicates the number of top sources to analyze. The default value is 5.}
14 | 
15 | \item{cdf}{is a logical. If TRUE, the function calculates the cumulative occurrences distribution.}
16 | }
17 | \value{
18 | an object of class \code{data.frame}
19 | }
20 | \description{
21 | It calculates yearly published documents of the top sources.
22 | }
23 | \examples{
24 | 
25 | data(scientometrics, package = "bibliometrixData")
26 | topSO <- sourceGrowth(scientometrics, top = 1, cdf = TRUE)
27 | topSO
28 | 
29 | # Plotting results
30 | \dontrun{
31 | install.packages("reshape2")
32 | library(reshape2)
33 | library(ggplot2)
34 | DF <- melt(topSO, id = "Year")
35 | ggplot(DF, aes(Year, value, group = variable, color = variable)) +
36 |   geom_line()
37 | }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/man/mergeKeywords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zzz.R
 3 | \name{mergeKeywords}
 4 | \alias{mergeKeywords}
 5 | \title{Merge DE and ID Fields into a Unified Keywords Column}
 6 | \usage{
 7 | mergeKeywords(M, force = FALSE)
 8 | }
 9 | \arguments{
10 | \item{M}{A dataframe containing at least the `DE` and/or `ID` columns, typically generated by `convert2df()` from the `bibliometrix` package.}
11 | 
12 | \item{force}{Logical. If `TRUE`, an existing `KW_Merged` column will be overwritten. Default is `FALSE`.}
13 | }
14 | \value{
15 | A dataframe with an added (or updated) `KW_Merged` column containing deduplicated and cleaned keyword strings.
16 | }
17 | \description{
18 | This function creates a new column `KW_Merged` by combining the contents of the `DE` (author keywords) and `ID` (keywords plus) fields
19 | in a bibliographic dataframe. Duplicate keywords within each record are removed, and leading/trailing spaces are trimmed.
20 | The merged keywords are separated by a semicolon (`;`).
21 | }
22 | \details{
23 | If the `KW_Merged` column already exists, it will not be overwritten unless `force = TRUE` is specified.
24 | }
25 | \examples{
26 | \dontrun{
27 | data(management, package = "bibliometrix")
28 | M <- mergeKeywords(management)
29 | head(M$KW_Merged)
30 | }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/biblioshiny.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/biblioshiny.R
 3 | \name{biblioshiny}
 4 | \alias{biblioshiny}
 5 | \title{Shiny UI for bibliometrix package}
 6 | \usage{
 7 | biblioshiny(
 8 |   host = "127.0.0.1",
 9 |   port = NULL,
10 |   launch.browser = TRUE,
11 |   maxUploadSize = 200
12 | )
13 | }
14 | \arguments{
15 | \item{host}{The IPv4 address that the application should listen on.
16 | Defaults to the shiny.host option, if set, or "127.0.0.1" if not.}
17 | 
18 | \item{port}{is the TCP port that the application should listen on. If the port is not specified,
19 | and the shiny.port option is set (with options(shiny.port = XX)), then that port will be used.
20 | Otherwise, use a random port.}
21 | 
22 | \item{launch.browser}{If true, the system's default web browser will be launched automatically
23 | after the app is started. Defaults to true in interactive sessions only. This value of
24 | this parameter can also be a function to call with the application's URL.}
25 | 
26 | \item{maxUploadSize}{is a integer. The max upload file size argument. Default value is 200 (megabyte)}
27 | }
28 | \description{
29 | \code{biblioshiny} performs science mapping analysis using the main functions of the bibliometrix package.
30 | }
31 | \examples{
32 | 
33 | # biblioshiny()
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/man/authorProdOverTime.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/authorProdOverTime.R
 3 | \name{authorProdOverTime}
 4 | \alias{authorProdOverTime}
 5 | \title{Top-Authors' Productivity over Time}
 6 | \usage{
 7 | authorProdOverTime(M, k = 10, graph = TRUE)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.}
11 | 
12 | \item{k}{is a integer. It is the number of top authors to analyze and plot. Default is \code{k = 10}.}
13 | 
14 | \item{graph}{is logical. If TRUE the function plots the author production over time graph. Default is \code{graph = TRUE}.}
15 | }
16 | \value{
17 | The function \code{authorProdOverTime} returns a list containing two objects:
18 | \tabular{lll}{
19 | \code{dfAU}  \tab   \tab is a data frame\cr
20 | \code{dfpapersAU}\tab    \tab is a data frame\cr
21 | \code{graph}   \tab   \tab a ggplot object}
22 | }
23 | \description{
24 | It calculates and plots the author production (in terms of number of publications) over the time.
25 | }
26 | \examples{
27 | data(scientometrics, package = "bibliometrixData")
28 | res <- authorProdOverTime(scientometrics, k = 10)
29 | print(res$dfAU)
30 | plot(res$graph)
31 | 
32 | }
33 | \seealso{
34 | \code{\link{biblioAnalysis}} function for bibliometric analysis
35 | 
36 | \code{\link{summary}} method for class '\code{bibliometrix}'
37 | }
38 | 


--------------------------------------------------------------------------------
/man/convert_scopus_new_to_classic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/apply_citation_matching.R
 3 | \name{convert_scopus_new_to_classic}
 4 | \alias{convert_scopus_new_to_classic}
 5 | \title{Convert new Scopus citation format to classic format}
 6 | \usage{
 7 | convert_scopus_new_to_classic(citation)
 8 | }
 9 | \arguments{
10 | \item{citation}{Character string containing a bibliographic citation}
11 | }
12 | \value{
13 | Character string with citation in classic Scopus format
14 | }
15 | \description{
16 | Scopus has introduced a new citation format where the publication year appears
17 | at the end in parentheses instead of after the title. This function converts
18 | citations from the new format to the classic format by moving the year from
19 | the end to after the title.
20 | }
21 | \details{
22 | New Scopus format: AUTHOR, TITLE, JOURNAL, VOLUME, ISSUE, PAGES, (YEAR)
23 | Classic Scopus format: AUTHOR, TITLE (YEAR) JOURNAL, VOLUME, PAGES
24 | 
25 | The function uses a robust approach:
26 | \itemize{
27 |   \item Extracts year from end (YYYY)
28 |   \item Extracts first author from beginning
29 |   \item Extracts pages (PP. xxx-xxx or PP. xxx)
30 |   \item Extracts volume and issue numbers
31 |   \item Extracts journal name (text before volume/issue/pages)
32 |   \item Deduces title as remaining text after author
33 | }
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/R/biblioshiny.R:
--------------------------------------------------------------------------------
 1 | #' Shiny UI for bibliometrix package
 2 | #'
 3 | #' \code{biblioshiny} performs science mapping analysis using the main functions of the bibliometrix package.
 4 | #'
 5 | #' @param port is the TCP port that the application should listen on. If the port is not specified,
 6 | #' and the shiny.port option is set (with options(shiny.port = XX)), then that port will be used.
 7 | #' Otherwise, use a random port.
 8 | #'
 9 | #' @param launch.browser If true, the system's default web browser will be launched automatically
10 | #' after the app is started. Defaults to true in interactive sessions only. This value of
11 | #' this parameter can also be a function to call with the application's URL.
12 | #'
13 | #' @param host The IPv4 address that the application should listen on.
14 | #' Defaults to the shiny.host option, if set, or "127.0.0.1" if not.
15 | #'
16 | #' @param maxUploadSize is a integer. The max upload file size argument. Default value is 200 (megabyte)
17 | #'
18 | #' @examples
19 | #'
20 | #' # biblioshiny()
21 | #'
22 | #' @export
23 | 
24 | biblioshiny <- function(host = "127.0.0.1", port = NULL,
25 |                         launch.browser = TRUE, maxUploadSize = 200) {
26 |   shinyOptions(maxUploadSize = maxUploadSize)
27 | 
28 |   runApp(system.file("biblioshiny", package = "bibliometrix"), launch.browser = launch.browser, port = port, host = getOption("shiny.host", host))
29 | }
30 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macOS-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'release'}
24 | 
25 |     env:
26 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
27 |       R_KEEP_PKG_SOURCE: yes
28 | 
29 |     steps:
30 |       - uses: actions/checkout@v4
31 | 
32 |       - uses: r-lib/actions/setup-pandoc@v2
33 | 
34 |       - uses: r-lib/actions/setup-r@v2
35 |         with:
36 |           r-version: ${{ matrix.config.r }}
37 |           http-user-agent: ${{ matrix.config.http-user-agent }}
38 |           use-public-rspm: true
39 | 
40 |       - uses: r-lib/actions/setup-r-dependencies@v2
41 |         with:
42 |           extra-packages: any::rcmdcheck
43 |           needs: check
44 | 
45 |       - uses: r-lib/actions/check-r-package@v2
46 |         with:
47 |           upload-snapshots: true
48 | 


--------------------------------------------------------------------------------
/man/plotThematicEvolution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotThematicEvolution.R
 3 | \name{plotThematicEvolution}
 4 | \alias{plotThematicEvolution}
 5 | \title{Plot a Thematic Evolution Analysis}
 6 | \usage{
 7 | plotThematicEvolution(Nodes, Edges, measure = "inclusion", min.flow = 0)
 8 | }
 9 | \arguments{
10 | \item{Nodes}{is a list of nodes obtained by \code{\link{thematicEvolution}} function.}
11 | 
12 | \item{Edges}{is a list of edges obtained by \code{\link{thematicEvolution}} function.}
13 | 
14 | \item{measure}{is a character. It can be \code{measure=("inclusion","stability", "weighted")}.}
15 | 
16 | \item{min.flow}{is numerical. It indicates the minimum value of measure to plot a flow.}
17 | }
18 | \value{
19 | a sankeyPlot
20 | }
21 | \description{
22 | It plot a Thematic Evolution Analysis performed using the \code{\link{thematicEvolution}} function.
23 | }
24 | \examples{
25 | 
26 | \dontrun{
27 | data(management, package = "bibliometrixData")
28 | years=c(2004,2008,2015)
29 | 
30 | nexus <- thematicEvolution(management,field="DE",years=years,n=100,minFreq=2)
31 | 
32 | plotThematicEvolution(nexus$Nodes,nexus$Edges)
33 | }
34 | 
35 | }
36 | \seealso{
37 | \code{\link{thematicMap}} function to create a thematic map based on co-word network analysis and clustering.
38 | 
39 | \code{\link{thematicEvolution}} function to perform a thematic evolution analysis.
40 | 
41 | \code{\link{networkPlot}} to plot a bibliographic network.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/net2Pajek.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/net2Pajek.R
 3 | \name{net2Pajek}
 4 | \alias{net2Pajek}
 5 | \title{Save a network graph object as Pajek files}
 6 | \usage{
 7 | net2Pajek(net, filename = "my_pajek_network", path = NULL)
 8 | }
 9 | \arguments{
10 | \item{net}{is a network graph object returned by the function \code{\link{networkPlot}}.}
11 | 
12 | \item{filename}{is a character. It indicates the filename for Pajek export files.}
13 | 
14 | \item{path}{is a character. It indicates the path where the files will be saved. When path="NULL, the files will be saved in the current folder. Default is NULL.}
15 | }
16 | \value{
17 | The function returns no object but will save three Pajek files in the folder given in the "path" argument with the name "filename.clu," "filename.vec," and "filename.net."
18 | }
19 | \description{
20 | The function \code{\link{net2Pajek}} save a bibliographic network previously created by \code{\link{networkPlot}} as pajek files.
21 | }
22 | \examples{
23 | \dontrun{
24 | data(management, package = "bibliometrixData")
25 | 
26 | NetMatrix <- biblioNetwork(management,
27 |   analysis = "co-occurrences",
28 |   network = "keywords", sep = ";"
29 | )
30 | 
31 | net <- networkPlot(NetMatrix, n = 30, type = "auto", Title = "Co-occurrence Network", labelsize = 1)
32 | 
33 | net2Pajek(net, filename = "pajekfiles", path = NULL)
34 | }
35 | }
36 | \seealso{
37 | \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/bradford.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bradford.R
 3 | \name{bradford}
 4 | \alias{bradford}
 5 | \title{Bradford's law}
 6 | \usage{
 7 | bradford(M)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic dataframe.}
11 | }
12 | \value{
13 | The function \code{bradford} returns a list containing the following objects:
14 | \tabular{lll}{
15 | \code{table}  \tab   \tab a dataframe with the source distribution partitioned in the three zones\cr
16 | \code{graph}   \tab   \tab the source distribution plot in ggplot2 format}
17 | }
18 | \description{
19 | It estimates and draws the Bradford's law source distribution.
20 | }
21 | \details{
22 | Bradford's law is a pattern first described by (\cite{Samuel C. Bradford, 1934}) that estimates the exponentially diminishing returns
23 | of searching for references in science journals.
24 | 
25 | One formulation is that if journals in a field are sorted by number of articles into three groups, each with about one-third of all articles,
26 | then the number of journals in each group will be proportional to 1:n:n2.\cr\cr
27 | 
28 | Reference:\cr
29 | Bradford, S. C. (1934). Sources of information on specific subjects. Engineering, 137, 85-86.\cr
30 | }
31 | \examples{
32 | \dontrun{
33 | data(management, package = "bibliometrixData")
34 | 
35 | BR <- bradford(management)
36 | }
37 | 
38 | }
39 | \seealso{
40 | \code{\link{biblioAnalysis}} function for bibliometric analysis
41 | 
42 | \code{\link{summary}} method for class '\code{bibliometrix}'
43 | }
44 | 


--------------------------------------------------------------------------------
/man/timeslice.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/timeslice.R
 3 | \name{timeslice}
 4 | \alias{timeslice}
 5 | \title{Bibliographic data frame time slice}
 6 | \usage{
 7 | timeslice(M, breaks = NA, k = 5)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.}
12 | 
13 | \item{breaks}{is a numeric vector of two or more unique cut points.}
14 | 
15 | \item{k}{is an integer value giving the number of intervals into which the data frame is to be cut. \code{k} is used only in case \code{breaks} argument is not provided. The default is \code{k = 5}.}
16 | }
17 | \value{
18 | the value returned from \code{split} is a list containing the data frames for each sub-period.
19 | }
20 | \description{
21 | Divide a bibliographic data frame into time slice
22 | }
23 | \examples{
24 | 
25 | data(scientometrics, package = "bibliometrixData")
26 | 
27 | list_df <- timeslice(scientometrics, breaks = c(1995, 2005))
28 | 
29 | names(list_df)
30 | 
31 | }
32 | \seealso{
33 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame.
34 | 
35 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
36 | 
37 | \code{\link{summary}} to obtain a summary of the results.
38 | 
39 | \code{\link{plot}} to draw some useful plots of the results.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/net2VOSviewer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/net2VOSviewer.R
 3 | \name{net2VOSviewer}
 4 | \alias{net2VOSviewer}
 5 | \title{Open a bibliometrix network in VosViewer}
 6 | \usage{
 7 | net2VOSviewer(net, vos.path = NULL)
 8 | }
 9 | \arguments{
10 | \item{net}{is an object created by networkPlot function.}
11 | 
12 | \item{vos.path}{is a character indicating the full path where VOSviewer.jar is located.}
13 | }
14 | \value{
15 | It write a .net file that can be open in VOSviewer
16 | }
17 | \description{
18 | \code{net2VOSviewer} plots a network created with \code{\link{networkPlot}} using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman.
19 | }
20 | \details{
21 | The function \code{\link{networkPlot}} can plot a bibliographic network previously created by \code{\link{biblioNetwork}}.
22 | The network map can be plotted using internal R routines or using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman.
23 | }
24 | \examples{
25 | # EXAMPLE
26 | 
27 | # VOSviewer.jar have to be present in the working folder
28 | 
29 | # data(scientometrics, package = "bibliometrixData")
30 | 
31 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation",
32 | # network = "references", sep = ";")
33 | 
34 | # net <- networkPlot(NetMatrix, n = 30, type = "kamada", Title = "Co-Citation",labelsize=0.5)
35 | 
36 | # net2VOSviewer(net)
37 | 
38 | }
39 | \seealso{
40 | \code{\link{biblioNetwork}} to compute a bibliographic network.
41 | 
42 | \code{\link{networkPlot}} to create and plot a network object
43 | }
44 | 


--------------------------------------------------------------------------------
/man/lotka.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lotka.R
 3 | \name{lotka}
 4 | \alias{lotka}
 5 | \title{Lotka's law coefficient estimation}
 6 | \usage{
 7 | lotka(M)
 8 | }
 9 | \arguments{
10 | \item{M}{is an object of the class '\code{bibliometrixDB}'.}
11 | }
12 | \value{
13 | The function \code{lotka} returns a list of summary statistics of the Lotka's law estimation of an object of class \code{bibliometrix}.
14 | 
15 | the list contains the following objects:
16 | \tabular{lll}{
17 | \code{Beta}  \tab   \tab Beta coefficient\cr
18 | \code{C}   \tab   \tab Constant coefficient\cr
19 | \code{R2} \tab   \tab Goodness of Fit\cr
20 | \code{fitted} \tab     \tab Fitted Values\cr
21 | \code{p.value} \tab     \tab Pvalue of two-sample Kolmogorov-Smirnov test between the empirical and the theoretical Lotka's Law distribution (with Beta=2)\cr
22 | \code{AuthorProd}    \tab   \tab Authors' Productivity frequency table\cr
23 | \code{g}   \tab    \tab Lotka's law plot\cr
24 | \code{g_shiny}   \tab  \tab Lotka's law plot for biblioshiny}
25 | }
26 | \description{
27 | It estimates Lotka's law coefficients for scientific productivity (\cite{Lotka A.J., 1926}).\cr\cr
28 | }
29 | \details{
30 | Reference:
31 | Lotka, A. J. (1926). The frequency distribution of scientific productivity. Journal of the Washington academy of sciences, 16(12), 317-323.\cr
32 | }
33 | \examples{
34 | data(management, package = "bibliometrixData")
35 | L <- lotka(management)
36 | L
37 | 
38 | }
39 | \seealso{
40 | \code{\link{biblioAnalysis}} function for bibliometric analysis
41 | 
42 | \code{\link{summary}} method for class '\code{bibliometrix}'
43 | }
44 | 


--------------------------------------------------------------------------------
/man/missingData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/missingData.R
 3 | \name{missingData}
 4 | \alias{missingData}
 5 | \title{Completeness of bibliographic metadata}
 6 | \usage{
 7 | missingData(M)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.}
11 | }
12 | \value{
13 | The function \code{missingData} returns a list containing two objects:
14 | \tabular{lll}{
15 | \code{allTags}  \tab   \tab is a data frame including results for all original metadata tags from the collection\cr
16 | \code{mandatoryTags}\tab    \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.}
17 | }
18 | \description{
19 | It calculates the percentage of missing data in the metadata of a bibliographic data frame.
20 | }
21 | \details{
22 | Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing")
23 | depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing
24 | value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%),
25 | "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%).
26 | 
27 | The results of the function allow us to understand which analyses can be performed with bibliometrix
28 | and which cannot based on the completeness (or status) of different metadata.
29 | }
30 | \examples{
31 | data(scientometrics, package = "bibliometrixData")
32 | res <- missingData(scientometrics)
33 | print(res$mandatoryTags)
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/man/get_authors_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/authorBio.r
 3 | \name{get_authors_summary}
 4 | \alias{get_authors_summary}
 5 | \title{Get Authors Summary from OpenAlex}
 6 | \usage{
 7 | get_authors_summary(
 8 |   doi = "10.1016/j.joi.2017.08.007",
 9 |   verbose = FALSE,
10 |   sleep_time = 0.2,
11 |   max_retries = 3
12 | )
13 | }
14 | \arguments{
15 | \item{doi}{Character. DOI of the article}
16 | 
17 | \item{verbose}{Logical. Print informative messages during execution (default: FALSE)}
18 | 
19 | \item{sleep_time}{Numeric. Seconds to wait before API call (default: 0.2)}
20 | 
21 | \item{max_retries}{Integer. Maximum number of retry attempts (default: 3)}
22 | }
23 | \value{
24 | A data frame with summary information for all authors including:
25 |   \itemize{
26 |     \item position: Author position in the paper
27 |     \item display_name: Author name as it appears in the paper
28 |     \item author_position_type: Type of position (first, last, middle)
29 |     \item is_corresponding: Whether the author is a corresponding author
30 |     \item orcid: ORCID identifier if available
31 |     \item openalex_id: OpenAlex author identifier
32 |     \item primary_affiliation: Main institutional affiliation
33 |   }
34 | }
35 | \description{
36 | Retrieves a quick summary of all authors from a paper without making additional API calls
37 | for individual author profiles. Useful for getting an overview of the authorship structure.
38 | }
39 | \examples{
40 | \dontrun{
41 | # Get a quick summary of all authors
42 | summary <- get_authors_summary(doi = "10.1016/j.joi.2017.08.007")
43 | print(summary)
44 | }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/man/mergeDbSources.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mergeDbSources.R
 3 | \name{mergeDbSources}
 4 | \alias{mergeDbSources}
 5 | \title{Merge bibliographic data frames from supported bibliogtraphic DBs}
 6 | \usage{
 7 | mergeDbSources(..., remove.duplicated = TRUE, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{...}{are the bibliographic data frames to merge.}
11 | 
12 | \item{remove.duplicated}{is logical. If TRUE duplicated documents will be deleted from the bibliographic collection.}
13 | 
14 | \item{verbose}{is logical.  If TRUE, information on duplicate documents is printed on the screen.}
15 | }
16 | \value{
17 | the value returned from \code{mergeDbSources} is a bibliographic data frame.
18 | }
19 | \description{
20 | Merge bibliographic data frames from different databases (WoS,SCOPUS, Lens, Openalex, etc-) into a single one.
21 | }
22 | \details{
23 | bibliographic data frames are obtained by the converting function \code{\link{convert2df}}.
24 | The function merges data frames identifying common tag fields and duplicated records.
25 | }
26 | \examples{
27 | 
28 | data(isiCollection, package = "bibliometrixData")
29 | 
30 | data(scopusCollection, package = "bibliometrixData")
31 | 
32 | M <- mergeDbSources(isiCollection, scopusCollection, remove.duplicated = TRUE)
33 | 
34 | dim(M)
35 | 
36 | }
37 | \seealso{
38 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame.
39 | 
40 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
41 | 
42 | \code{\link{summary}} to obtain a summary of the results.
43 | 
44 | \code{\link{plot}} to draw some useful plots of the results.
45 | }
46 | 


--------------------------------------------------------------------------------
/man/splitCommunities.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/splitCommunities.R
 3 | \name{splitCommunities}
 4 | \alias{splitCommunities}
 5 | \title{Splitting Network communities}
 6 | \usage{
 7 | splitCommunities(graph, n = NULL)
 8 | }
 9 | \arguments{
10 | \item{graph}{is a network plot obtained by the function \code{\link{networkPlot}}.}
11 | 
12 | \item{n}{is an integer. It indicates the number of vertices to plot for each community.}
13 | }
14 | \value{
15 | It is a network object of the class \code{igraph}
16 | }
17 | \description{
18 | \code{networkPlot} Create a network plot with separated communities.
19 | }
20 | \details{
21 | The function \code{\link{splitCommunities}} splits communities in separated subnetworks from a bibliographic network plot previously created by \code{\link{networkPlot}}.
22 | }
23 | \examples{
24 | # EXAMPLE Keywordd co-occurrence network
25 | 
26 | data(management, package = "bibliometrixData")
27 | 
28 | NetMatrix <- biblioNetwork(management,
29 |   analysis = "co-occurrences",
30 |   network = "keywords", sep = ";"
31 | )
32 | 
33 | net <- networkPlot(NetMatrix,
34 |   n = 30, type = "auto",
35 |   Title = "Co-occurrence Network", labelsize = 1, verbose = FALSE
36 | )
37 | 
38 | graph <- splitCommunities(net$graph, n = 30)
39 | 
40 | }
41 | \seealso{
42 | \code{\link{biblioNetwork}} to compute a bibliographic network.
43 | 
44 | \code{\link{networkPlot}} to plot a bibliographic network.
45 | 
46 | \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software.
47 | 
48 | \code{\link{cocMatrix}} to compute a co-occurrence matrix.
49 | 
50 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/idByAuthor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/idByAuthor.R
 3 | \name{idByAuthor}
 4 | \alias{idByAuthor}
 5 | \title{Get Complete Author Information and ID from Scopus}
 6 | \usage{
 7 | idByAuthor(df, api_key)
 8 | }
 9 | \arguments{
10 | \item{df}{is a dataframe composed of three columns:
11 | \tabular{lll}{
12 | \code{lastname}\tab   \tab author's last name\cr
13 | \code{firstname}\tab   \tab author's first name\cr
14 | \code{affiliation}\tab   \tab Part of the affiliation name (university name, city, etc.)}
15 | i.e. df[1,1:3]<-c("aria","massimo","naples")
16 | When affiliation is not specified, the field df$affiliation have to be NA.
17 | i.e. df[2,1:3]<-c("cuccurullo","corrado", NA)}
18 | 
19 | \item{api_key}{is a character. It contains the Elsevier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website}}
20 | }
21 | \value{
22 | a data frame with cases corresponding to authors and variables to author's information and ID got from SCOPUS.
23 | }
24 | \description{
25 | Uses SCOPUS API author search to identify author identification information.
26 | }
27 | \examples{
28 | ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html
29 | #
30 | # api_key="your api key"
31 | 
32 | ## create a data frame with the list of authors to get information and IDs
33 | # i.e. df[1,1:3]<-c("aria","massimo","naples")
34 | #      df[2,1:3]<-c("cuccurullo","corrado", NA)
35 | 
36 | ## run idByAuthor function
37 | #
38 | # authorsID <- idByAuthor(df, api_key)
39 | 
40 | }
41 | \seealso{
42 | \code{\link{retrievalByAuthorID}} for downloading the complete author bibliographic collection from SCOPUS
43 | }
44 | 


--------------------------------------------------------------------------------
/R/readFiles.R:
--------------------------------------------------------------------------------
 1 | #' DEPRECATED: Load a sequence of ISI or SCOPUS Export files into a large character object
 2 | #'
 3 | #' The function readFiled is deprecated. You can import and convert your export files directly using the function \code{\link{convert2df}}.
 4 | #'
 5 | #' @param ... is a sequence of names of files downloaded from WOS.(in plain text or bibtex format) or SCOPUS Export file (exclusively in bibtex format).
 6 | #' @return a character vector of length the number of lines read.
 7 | #'
 8 | #' @examples
 9 | #' # WoS or SCOPUS Export files can be read using \code{\link{readFiles}} function:
10 | #'
11 | #' # largechar <- readFiles('filename1.txt','filename2.txt','filename3.txt')
12 | #'
13 | #' # filename1.txt, filename2.txt and filename3.txt are ISI or SCOPUS Export file
14 | #' # in plain text or bibtex format.
15 | #'
16 | #' # D <- readFiles('https://www.bibliometrix.org/datasets/bibliometrics_articles.txt')
17 | #'
18 | #' @seealso \code{\link{convert2df}} for converting SCOPUS of ISI Export file into a dataframe
19 | #'
20 | #' @export
21 | 
22 | readFiles <- function(...) {
23 |   cat("\nFrom version 3.0.0, the function readFiles has been dropped.\nPlease use the function 'convert2df' to import and convert your export files")
24 |   # arguments <- unlist(list(...))
25 |   # k=length(arguments)
26 |   # D=list()
27 |   # enc="UTF-8"
28 |   # origEnc=getOption("encoding")
29 |   # if (origEnc=="UTF-8"){options(encoding = "native.enc")}
30 |   #   for (i in 1:k){
31 |   #     D[[i]]=suppressWarnings(
32 |   #       iconv(readLines(arguments[i],encoding = "UTF-8"),"latin1", "ASCII", sub="")
33 |   #       #conv(readLines(arguments[[i]]))
34 |   #       )
35 |   #     }
36 |   # D=unlist(D)
37 |   # options(encoding = origEnc)
38 |   # Encoding(D) <- "UTF-8"
39 |   # return(D)
40 |   return(NULL)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/keywordAssoc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/keywordAssoc.R
 3 | \name{keywordAssoc}
 4 | \alias{keywordAssoc}
 5 | \title{ID and DE keyword associations}
 6 | \usage{
 7 | keywordAssoc(M, sep = ";", n = 10, excludeKW = NA)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.}
12 | 
13 | \item{sep}{is the field separator character. This character separates keywords in each string of ID and DE columns of the bibliographic data frame. The default is \code{sep = ";"}.}
14 | 
15 | \item{n}{is a integer. It indicates the number of authors' keywords to associate to each keyword plus. The default is \code{n = 10}.}
16 | 
17 | \item{excludeKW}{is character vector. It contains authors' keywords to exclude from the analysis.}
18 | }
19 | \value{
20 | an object of \code{class} "list".
21 | }
22 | \description{
23 | It associates authors' keywords to keywords plus.
24 | }
25 | \examples{
26 | 
27 | data(scientometrics, package = "bibliometrixData")
28 | 
29 | KWlist <- keywordAssoc(scientometrics, sep = ";", n = 10, excludeKW = NA)
30 | 
31 | # list of first 10 Keywords plus
32 | names(KWlist)
33 | 
34 | # list of first 10 authors' keywords associated to the first Keyword plus
35 | KWlist[[1]][1:10]
36 | 
37 | }
38 | \seealso{
39 | \code{\link{convert2df}} to import and convert a WoS or SCOPUS Export file in a bibliographic data frame.
40 | 
41 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
42 | 
43 | \code{\link{summary}} to obtain a summary of the results.
44 | 
45 | \code{\link{plot}} to draw some useful plots of the results.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/dominance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dominance.R
 3 | \name{dominance}
 4 | \alias{dominance}
 5 | \title{Authors' dominance ranking}
 6 | \usage{
 7 | dominance(results, k = 10)
 8 | }
 9 | \arguments{
10 | \item{results}{is an object of the class '\code{bibliometrix}' for which the analysis of the authors' dominance ranking is desired.}
11 | 
12 | \item{k}{is an integer, used for table formatting (number of authors). Default value is 10.}
13 | }
14 | \value{
15 | The function \code{dominance} returns a data frame with cases corresponding to the first \code{k} most productive authors and variables to typical field of a dominance analysis.
16 | 
17 | the data frame variables are:
18 | \tabular{lll}{
19 | \code{Author} \tab   \tab Author's name\cr
20 | \code{Dominance Factor}  \tab   \tab Dominance Factor (DF = FAA / MAA)\cr
21 | \code{Tot Articles}   \tab   \tab N. of Authored Articles (TAA)\cr
22 | \code{Single Authored}   \tab   \tab N. of Single-Authored Articles (SAA)\cr
23 | \code{Multi Authored}   \tab   \tab N. of Multi-Authored Articles (MAA=TAA-SAA)\cr
24 | \code{First Authored} \tab   \tab N. of First Authored Articles (FAA)\cr
25 | \code{Rank by Articles}    \tab   \tab Author Ranking by N. of Articles\cr
26 | \code{Rank by DF}    \tab   \tab Author Ranking by Dominance Factor}
27 | }
28 | \description{
29 | It calculates the authors' dominance ranking from an object of the class '\code{bibliometrix}' as proposed by Kumar & Kumar, 2008.
30 | }
31 | \examples{
32 | data(scientometrics, package = "bibliometrixData")
33 | results <- biblioAnalysis(scientometrics)
34 | DF <- dominance(results)
35 | DF
36 | 
37 | }
38 | \seealso{
39 | \code{\link{biblioAnalysis}} function for bibliometric analysis
40 | 
41 | \code{\link{summary}} method for class '\code{bibliometrix}'
42 | }
43 | 


--------------------------------------------------------------------------------
/man/tableTag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tableTag.R
 3 | \name{tableTag}
 4 | \alias{tableTag}
 5 | \title{Tabulate elements from a Tag Field column}
 6 | \usage{
 7 | tableTag(
 8 |   M,
 9 |   Tag = "CR",
10 |   sep = ";",
11 |   ngrams = 1,
12 |   remove.terms = NULL,
13 |   synonyms = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}.
18 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.}
19 | 
20 | \item{Tag}{is a character object. It indicates one of the field tags of the
21 | standard ISI WoS Field Tag codify.}
22 | 
23 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.}
24 | 
25 | \item{ngrams}{is an integer between 1 and 3. It indicates the type of n-gram to extract from titles or abstracts.}
26 | 
27 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
28 | 
29 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
30 | }
31 | \value{
32 | an object of class \code{table}
33 | }
34 | \description{
35 | It tabulates elements from a Tag Field column of a bibliographic data frame.
36 | }
37 | \details{
38 | \code{tableTag} is an internal routine of main function \code{\link{biblioAnalysis}}.
39 | }
40 | \examples{
41 | 
42 | data(scientometrics, package = "bibliometrixData")
43 | Tab <- tableTag(scientometrics, Tag = "CR", sep = ";")
44 | Tab[1:10]
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/man/findAuthorWorks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/findAuthorWorks.r
 3 | \name{findAuthorWorks}
 4 | \alias{findAuthorWorks}
 5 | \title{Find Author's Co-authored Works}
 6 | \usage{
 7 | findAuthorWorks(author_name, data, partial_match = TRUE, exact_match = FALSE)
 8 | }
 9 | \arguments{
10 | \item{author_name}{Character. The author's name to search for (case-insensitive)}
11 | 
12 | \item{data}{Data.frame. The bibliometric dataframe with AU and DI columns}
13 | 
14 | \item{partial_match}{Logical. If TRUE, allows partial name matching (default: TRUE)}
15 | 
16 | \item{exact_match}{Logical. If TRUE, requires exact name matching (default: FALSE)}
17 | }
18 | \value{
19 | A data.frame with columns:
20 |   \itemize{
21 |     \item doi: DOI of the work
22 |     \item author_position: Numerical position of the author in the author list
23 |     \item total_authors: Total number of authors in the work
24 |     \item all_authors: Complete list of authors for reference
25 |     \item matched_name: The exact name variant that was matched
26 |   }
27 | }
28 | \description{
29 | Searches for an author's name in a bibliometric dataframe and returns 
30 | the DOIs and author positions of their co-authored works.
31 | }
32 | \details{
33 | The function searches through the AU column which contains author names 
34 | separated by semicolons. It identifies the position of the target author
35 | and returns comprehensive information about each matching work.
36 | }
37 | \examples{
38 | \dontrun{
39 | # Find works by "ARIA M"
40 | works <- findAuthorWorks("ARIA M", M)
41 | 
42 | # Find works with exact matching
43 | works_exact <- findAuthorWorks("PESTANA MH", M, exact_match = TRUE)
44 | 
45 | # Find works with partial matching disabled
46 | works_full <- findAuthorWorks("MASSIMO ARIA", M, partial_match = FALSE)
47 | }
48 | 
49 | }
50 | \author{
51 | Your Name
52 | }
53 | 


--------------------------------------------------------------------------------
/man/localCitations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/localCitations.R
 3 | \name{localCitations}
 4 | \alias{localCitations}
 5 | \title{Author local citations}
 6 | \usage{
 7 | localCitations(M, fast.search = FALSE, sep = ";", verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.}
12 | 
13 | \item{fast.search}{is logical. If true, the function calculates local citations only for 25 percent top cited documents.}
14 | 
15 | \item{sep}{is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}.}
16 | 
17 | \item{verbose}{is a logical.  If TRUE, results are printed on screen.}
18 | }
19 | \value{
20 | an object of \code{class} "list" containing author local citations and document local citations.
21 | }
22 | \description{
23 | It calculates local citations (LCS) of authors and documents of a bibliographic collection.
24 | }
25 | \details{
26 | Local citations measure how many times an author (or a document) included in this collection have been cited by the documents also included in the collection.
27 | }
28 | \examples{
29 | 
30 | data(scientometrics, package = "bibliometrixData")
31 | 
32 | CR <- localCitations(scientometrics, sep = ";")
33 | 
34 | CR$Authors[1:10, ]
35 | CR$Papers[1:10, ]
36 | 
37 | }
38 | \seealso{
39 | \code{\link{citations}} function for citation frequency distribution.
40 | 
41 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
42 | 
43 | \code{\link{summary}} to obtain a summary of the results.
44 | 
45 | \code{\link{plot}} to draw some useful plots of the results.
46 | }
47 | 


--------------------------------------------------------------------------------
/R/timeslice.R:
--------------------------------------------------------------------------------
 1 | #' Bibliographic data frame time slice
 2 | #'
 3 | #' Divide a bibliographic data frame into time slice
 4 | #'
 5 | #'
 6 | #'
 7 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
 8 | #'        It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.
 9 | #' @param breaks is a numeric vector of two or more unique cut points.
10 | #' @param k is an integer value giving the number of intervals into which the data frame is to be cut. \code{k} is used only in case \code{breaks} argument is not provided. The default is \code{k = 5}.
11 | #' @return the value returned from \code{split} is a list containing the data frames for each sub-period.
12 | #'
13 | #'
14 | #'
15 | #' @examples
16 | #'
17 | #' data(scientometrics, package = "bibliometrixData")
18 | #'
19 | #' list_df <- timeslice(scientometrics, breaks = c(1995, 2005))
20 | #'
21 | #' names(list_df)
22 | #'
23 | #' @seealso \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame.
24 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis.
25 | #' @seealso \code{\link{summary}} to obtain a summary of the results.
26 | #' @seealso \code{\link{plot}} to draw some useful plots of the results.
27 | #'
28 | #' @export
29 | timeslice <- function(M, breaks = NA, k = 5) {
30 |   M$PY <- as.numeric(M$PY)
31 |   period <- list()
32 |   if (is.na(breaks[1]) & is.numeric(k)) {
33 |     breaks <- (floor(seq(min(M$PY, na.rm = TRUE) - 1, max(M$PY, na.rm = TRUE), length.out = k + 1)))
34 |   } else {
35 |     breaks <- c(min(M$PY, na.rm = TRUE) - 1, breaks, max(M$PY, na.rm = TRUE))
36 |   }
37 |   df <- cut(M$PY, breaks)
38 |   N <- levels(df)
39 |   ind <- as.numeric(df)
40 |   df <- split(M, ind)
41 |   names(df) <- N
42 |   return(df)
43 | }
44 | 


--------------------------------------------------------------------------------
/R/cochrane2df.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont"))
 2 | cochrane2df <- function(D) {
 3 |   D <- D[nchar(D) > 0] # remove empty rows
 4 | 
 5 |   Papers <- which(substr(D, 1, 8) == "Record #") # first row of each document
 6 |   nP <- length(Papers) # number of documents
 7 | 
 8 |   rowPapers <- diff(c(Papers, length(D) + 1))
 9 | 
10 |   numPapers <- rep(1:nP, rowPapers)
11 | 
12 |   DATA <- data.frame(Tag = substr(D, 1, 4), content = substr(D, 5, nchar(D)), Paper = numPapers)
13 |   DATA$Tag <- gsub(" ", "", gsub(":", "", DATA$Tag))
14 |   df <- DATA %>%
15 |     group_by(Paper, Tag) %>%
16 |     summarise(cont = paste(content, collapse = "---", sep = "")) %>%
17 |     arrange(Tag, Paper) %>%
18 |     pivot_wider(names_from = Tag, values_from = cont) %>%
19 |     ungroup() %>%
20 |     rename(
21 |       "PY" = "YR",
22 |       "UT" = "ID",
23 |       "ID" = "KY",
24 |       "URL" = "US",
25 |       "DI" = "DOI",
26 |       "NR" = "NO"
27 |     )
28 |   df <- as.data.frame(df)
29 | 
30 |   df$PY <- as.numeric(df$PY)
31 | 
32 |   ### replace "---" with ";"
33 |   tagsComma <- c("AU", "ID")
34 |   df1 <- data.frame(lapply(df[tagsComma], function(x) {
35 |     gsub("---", ";", x)
36 |   }))
37 | 
38 |   ### replace "---" with " "
39 |   otherTags <- setdiff(names(df), tagsComma)
40 |   df2 <- data.frame(lapply(df[otherTags], function(x) {
41 |     trimES(gsub("---", " ", x))
42 |   }), stringsAsFactors = FALSE)
43 |   df <- cbind(df1, df2)
44 |   rm(df1, df2)
45 | 
46 |   df$ID <- gsub(" ;", ";", gsub("; ", ";", gsub("\\[[^\\]]*\\]", "", df$ID, perl = TRUE)))
47 | 
48 |   df$DB <- "COCHRANE"
49 | 
50 |   # Authors
51 |   # df$AU <- trimES(gsub("-","",df$AU))
52 | 
53 |   # Toupper
54 |   DI <- df$DI
55 |   df <- data.frame(lapply(df, toupper), stringsAsFactors = FALSE)
56 |   df$DI <- gsub(" ", "", DI)
57 | 
58 |   df <- df[!(names(df) %in% c("Paper", "Reco"))]
59 | 
60 |   df$DE <- df$ID
61 | 
62 |   df$JI <- df$J9 <- df$SO
63 | 
64 |   return(df)
65 | }
66 | 


--------------------------------------------------------------------------------
/man/citations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/citations.R
 3 | \name{citations}
 4 | \alias{citations}
 5 | \title{Citation frequency distribution}
 6 | \usage{
 7 | citations(M, field = "article", sep = ";")
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics Web of Science file.}
12 | 
13 | \item{field}{is a character. It can be "article" or "author" to obtain frequency distribution of cited citations or cited authors (only first authors for WoS database) respectively. The default is \code{field = "article"}.}
14 | 
15 | \item{sep}{is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}.}
16 | }
17 | \value{
18 | an object of \code{class} "list"  containing the following components:
19 | 
20 | \tabular{lll}{
21 | Cited \tab  \tab the most frequent cited manuscripts or authors\cr
22 | Year \tab       \tab the publication year (only for cited article analysis)\cr
23 | Source \tab      \tab the journal (only for cited article analysis)}
24 | }
25 | \description{
26 | It calculates frequency distribution of citations.
27 | }
28 | \examples{
29 | ## EXAMPLE 1: Cited articles
30 | 
31 | data(scientometrics, package = "bibliometrixData")
32 | 
33 | CR <- citations(scientometrics, field = "article", sep = ";")
34 | 
35 | CR$Cited[1:10]
36 | CR$Year[1:10]
37 | CR$Source[1:10]
38 | 
39 | ## EXAMPLE 2: Cited first authors
40 | 
41 | data(scientometrics)
42 | 
43 | CR <- citations(scientometrics, field = "author", sep = ";")
44 | 
45 | CR$Cited[1:10]
46 | 
47 | }
48 | \seealso{
49 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
50 | 
51 | \code{\link{summary}} to obtain a summary of the results.
52 | 
53 | \code{\link{plot}} to draw some useful plots of the results.
54 | }
55 | 


--------------------------------------------------------------------------------
/man/summary.bibliometrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.bibliometrix.R
 3 | \name{summary.bibliometrix}
 4 | \alias{summary.bibliometrix}
 5 | \title{Summarizing bibliometric analysis results}
 6 | \usage{
 7 | \method{summary}{bibliometrix}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{is the object for which a summary is desired.}
11 | 
12 | \item{...}{can accept two arguments:\cr
13 | \code{k} integer, used for table formatting (number of rows). Default value is 10.\cr
14 | \code{pause} logical, used to allow pause in screen scrolling of results. Default value is \code{pause = FALSE}.\cr
15 | \code{width} integer, used to define screen output width. Default value is \code{width = 120}.
16 | \code{verbose} logical, used to allow screen output. Default is TRUE.}
17 | }
18 | \value{
19 | The function \code{summary} computes and returns a list of summary statistics of the object of class \code{bibliometrics}.
20 | 
21 | the list contains the following objects:
22 | \tabular{lll}{
23 | \code{MainInformation}   \tab   \tab Main Information about Data\cr
24 | \code{AnnualProduction}  \tab   \tab Annual Scientific Production\cr
25 | \code{AnnualGrowthRate}  \tab   \tab Annual Percentage Growth Rate\cr
26 | \code{MostProdAuthors}   \tab   \tab Most Productive Authors\cr
27 | \code{MostCitedPapers}   \tab   \tab Top manuscripts per number of citations\cr
28 | \code{MostProdCountries} \tab   \tab Corresponding Author's Countries\cr
29 | \code{TCperCountries}    \tab   \tab Total Citation per Countries\cr
30 | \code{MostRelSources}    \tab   \tab Most Relevant Sources\cr
31 | \code{MostRelKeywords}   \tab   \tab Most Relevant Keywords}
32 | }
33 | \description{
34 | \code{summary} method for class '\code{bibliometrix}'
35 | }
36 | \examples{
37 | data(scientometrics, package = "bibliometrixData")
38 | 
39 | results <- biblioAnalysis(scientometrics)
40 | 
41 | summary(results)
42 | 
43 | }
44 | \seealso{
45 | \code{\link{biblioAnalysis}} function for bibliometric analysis
46 | 
47 | \code{\link{plot}} to draw some useful plots of the results.
48 | }
49 | 


--------------------------------------------------------------------------------
/man/histNetwork.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/histNetwork.R
 3 | \name{histNetwork}
 4 | \alias{histNetwork}
 5 | \title{Historical co-citation network}
 6 | \usage{
 7 | histNetwork(M, min.citations, sep = ";", network = TRUE, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function
11 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to
12 | manuscripts and variables to Field Tag in the original SCOPUS, OpenAlex, Lens.org and Clarivate
13 | Analytics Web of Science file.}
14 | 
15 | \item{min.citations}{DEPRECATED. New algorithm does not use this parameters. It will be remove in the next version of bibliometrix.}
16 | 
17 | \item{sep}{is the field separator character. This character separates strings
18 | in CR column of the data frame. The default is \code{sep = ";"}.}
19 | 
20 | \item{network}{is logical. If TRUE, function calculates and returns also the direct citation network. If FALSE,
21 | the function returns only the local citation table.}
22 | 
23 | \item{verbose}{is logical. If TRUE, results are printed on screen.}
24 | }
25 | \value{
26 | \code{histNetwork} returns an object of \code{class} "list"
27 |   containing the following components:
28 | 
29 |   \tabular{lll}{ NetMatrix \tab  \tab the historical co-citation network
30 |   matrix\cr histData \tab      \tab the set of n most cited references\cr M
31 |   \tab      \tab the bibliographic data frame}
32 | }
33 | \description{
34 | \code{histNetwork} creates a historical citation network from a bibliographic
35 | data frame.
36 | }
37 | \examples{
38 | \dontrun{
39 | data(management, package = "bibliometrixData")
40 | 
41 | histResults <- histNetwork(management, sep = ";")
42 | }
43 | 
44 | }
45 | \seealso{
46 | \code{\link{convert2df}} to import and convert a supported
47 |   export file in a bibliographic data frame.
48 | 
49 | \code{\link{summary}} to obtain a summary of the results.
50 | 
51 | \code{\link{plot}} to draw some useful plots of the results.
52 | 
53 | \code{\link{biblioNetwork}} to compute a bibliographic network.
54 | }
55 | 


--------------------------------------------------------------------------------
/R/sourceGrowth.R:
--------------------------------------------------------------------------------
 1 | #' Number of documents published annually per Top Sources
 2 | #'
 3 | #' It calculates yearly published documents of the top sources.
 4 | #'
 5 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}.
 6 | #'        It is a data matrix with cases corresponding to articles and variables to Field Tag in the original ISI or SCOPUS file.
 7 | #' @param top is a numeric. It indicates the number of top sources to analyze. The default value is 5.
 8 | #' @param cdf is a logical. If TRUE, the function calculates the cumulative occurrences distribution.
 9 | #' @return an object of class \code{data.frame}
10 | #' @examples
11 | #'
12 | #' data(scientometrics, package = "bibliometrixData")
13 | #' topSO <- sourceGrowth(scientometrics, top = 1, cdf = TRUE)
14 | #' topSO
15 | #'
16 | #' # Plotting results
17 | #' \dontrun{
18 | #' install.packages("reshape2")
19 | #' library(reshape2)
20 | #' library(ggplot2)
21 | #' DF <- melt(topSO, id = "Year")
22 | #' ggplot(DF, aes(Year, value, group = variable, color = variable)) +
23 | #'   geom_line()
24 | #' }
25 | #'
26 | #' @export
27 | #'
28 | sourceGrowth <- function(M, top = 5, cdf = TRUE) {
29 |   PY <- min(M$PY, na.rm = T):max(M$PY, na.rm = T)
30 |   WSO <- cocMatrix(M, Field = "SO")
31 |   if (is.null(dim(WSO))) {
32 |     WSO <- cbind(WSO)
33 |     colnames(WSO) <- M$SO[1]
34 |   }
35 |   if (top > dim(WSO)[2]) {
36 |     top <- dim(WSO)[2]
37 |   }
38 | 
39 |   M$PY <- as.character(M$PY)
40 |   WPY <- cocMatrix(M, Field = "PY")
41 |   i <- setdiff(PY, colnames(WPY))
42 |   if (length(i) > 0) {
43 |     WPY <- cbind(WPY, matrix(0, dim(WPY)[1], length(i)))
44 |     colnames(WPY)[(dim(WPY)[2] - length(i) + 1):dim(WPY)[2]] <- as.character(i)
45 |   }
46 |   PYSO <- Matrix::crossprod(WPY, WSO)
47 |   ind <- Matrix::colSums(PYSO)
48 |   deg <- sort(ind, decreasing = T)[top]
49 |   sonames <- colnames(PYSO)[ind >= deg]
50 |   PYSO <- as.data.frame(as.matrix(PYSO[, ind >= deg]))
51 | 
52 |   PYSO <- cbind(as.numeric(colnames(WPY)), PYSO)
53 | 
54 |   PYSO <- PYSO[order(PYSO[, 1]), ]
55 |   if (isTRUE(cdf)) {
56 |     PYSO[, -1] <- apply(as.data.frame(PYSO[, -1]), 2, cumsum)
57 |   }
58 |   names(PYSO) <- c("Year", sonames)
59 |   return(PYSO)
60 | }
61 | 


--------------------------------------------------------------------------------
/man/KeywordGrowth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/keywordGrowth.R
 3 | \name{KeywordGrowth}
 4 | \alias{KeywordGrowth}
 5 | \title{Yearly occurrences of top keywords/terms}
 6 | \usage{
 7 | KeywordGrowth(
 8 |   M,
 9 |   Tag = "ID",
10 |   sep = ";",
11 |   top = 10,
12 |   cdf = TRUE,
13 |   remove.terms = NULL,
14 |   synonyms = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}.
19 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.}
20 | 
21 | \item{Tag}{is a character object. It indicates one of the keyword field tags of the
22 | standard ISI WoS Field Tag codify (ID, DE, KW_Merged) or a field tag created by \code{\link{termExtraction}} function (TI_TM, AB_TM, etc.).}
23 | 
24 | \item{sep}{is the field separator character. This character separates strings in each keyword column of the data frame. The default is \code{sep = ";"}.}
25 | 
26 | \item{top}{is a numeric. It indicates the number of top keywords to analyze. The default value is 10.}
27 | 
28 | \item{cdf}{is a logical. If TRUE, the function calculates the cumulative occurrences distribution.}
29 | 
30 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
31 | 
32 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
33 | }
34 | \value{
35 | an object of class \code{data.frame}
36 | }
37 | \description{
38 | It calculates yearly occurrences of top keywords/terms.
39 | }
40 | \examples{
41 | 
42 | data(scientometrics, package = "bibliometrixData")
43 | topKW <- KeywordGrowth(scientometrics, Tag = "ID", sep = ";", top = 5, cdf = TRUE)
44 | topKW
45 | 
46 | # Plotting results
47 | \dontrun{
48 | install.packages("reshape2")
49 | library(reshape2)
50 | library(ggplot2)
51 | DF <- melt(topKW, id = "Year")
52 | ggplot(DF, aes(Year, value, group = variable, color = variable)) + geom_line
53 | }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/man/normalizeCitationScore.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalizeCItationScore.R
 3 | \name{normalizeCitationScore}
 4 | \alias{normalizeCitationScore}
 5 | \title{Calculate the normalized citation score metric}
 6 | \usage{
 7 | normalizeCitationScore(M, field = "documents", impact.measure = "local")
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.}
11 | 
12 | \item{field}{is a character. It indicates the unit of analysis on which calculate the NCS. It can be equal to \code{field = c("documents", "authors", "sources")}. Default is \code{field = "documents"}.}
13 | 
14 | \item{impact.measure}{is a character. It indicates the impact measure used to rank cluster elements (documents, authors or sources).
15 | It can be \code{impact.measure = c("local", "global")}.\\
16 | With \code{impact.measure = "local"}, \link{normalizeCitationScore} calculates elements impact using the Normalized Local Citation Score while
17 | using \code{impact.measure = "global"}, the function uses the Normalized Global Citation Score to measure elements impact.}
18 | }
19 | \value{
20 | a dataframe.
21 | }
22 | \description{
23 | It calculates the normalized citation score for documents, authors and sources using both global and local citations.
24 | }
25 | \details{
26 | The document Normalized Citation Score (NCS) of a document is calculated by dividing the actual count of citing items by the expected
27 | citation rate for documents with the same year of publication.
28 | 
29 | The MNCS of a set of documents, for example the collected works of an individual, or published on a journal, is the average of the NCS values for all the documents in the set.
30 | 
31 | The NGCS is the NCS calculated using the global citations (total citations that a document received considering the whole bibliographic database).
32 | 
33 | The NLCS is the NCS calculated using the local citations (total citations that a document received from a set of documents included in the same collection).
34 | }
35 | \examples{
36 | \dontrun{
37 | data(management, package = "bibliometrixData")
38 | NCS <- normalizeCitationScore(management, field = "authors", impact.measure = "local")
39 | }
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/networkStat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/networkStat.R
 3 | \name{networkStat}
 4 | \alias{networkStat}
 5 | \title{Calculating network summary statistics}
 6 | \usage{
 7 | networkStat(object, stat = "network", type = "degree")
 8 | }
 9 | \arguments{
10 | \item{object}{is a network matrix obtained by the function \code{\link{biblioNetwork}} or an graph object of the class \code{igraph}.}
11 | 
12 | \item{stat}{is a character. It indicates which statistics are to be calculated. \code{stat = "network"} calculates the statistics related to the network;
13 | \code{stat = "all"} calculates the statistics related to the network and the individual nodes that compose it. Default value is \code{stat = "network"}.}
14 | 
15 | \item{type}{is a character. It indicates which centrality index is calculated. type values can be c("degree", "closeness", "betweenness","eigenvector","pagerank","hub","authority", "all"). Default is "degree".}
16 | }
17 | \value{
18 | It is a list containing the following elements:
19 | \tabular{lll}{
20 | \code{graph} \tab  \tab a network object of the class \code{igraph}\cr
21 | \code{network} \tab  \tab a \code{communities} a list with the main statistics of the network\cr
22 | \code{vertex} \tab  \tab a data frame with the main measures of centrality and prestige of vertices.\cr}
23 | }
24 | \description{
25 | \code{networkStat} calculates main network statistics.
26 | }
27 | \details{
28 | The function \code{\link{networkStat}} can calculate the main network statistics from a bibliographic network previously created by \code{\link{biblioNetwork}}.
29 | }
30 | \examples{
31 | # EXAMPLE Co-citation network
32 | 
33 | # to run the example, please remove # from the beginning of the following lines
34 | # data(scientometrics, package = "bibliometrixData")
35 | 
36 | # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation",
37 | #      network = "references", sep = ";")
38 | 
39 | # netstat <- networkStat(NetMatrix, stat = "all", type = "degree")
40 | 
41 | }
42 | \seealso{
43 | \code{\link{biblioNetwork}} to compute a bibliographic network.
44 | 
45 | \code{\link{cocMatrix}} to compute a co-occurrence matrix.
46 | 
47 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
48 | }
49 | 


--------------------------------------------------------------------------------
/R/net2VOSviewer.R:
--------------------------------------------------------------------------------
 1 | #' Open a bibliometrix network in VosViewer
 2 | #'
 3 | #' \code{net2VOSviewer} plots a network created with \code{\link{networkPlot}} using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman.
 4 | #'
 5 | #' The function \code{\link{networkPlot}} can plot a bibliographic network previously created by \code{\link{biblioNetwork}}.
 6 | #' The network map can be plotted using internal R routines or using \href{https://www.vosviewer.com/}{VOSviewer} by Nees Jan van Eck and Ludo Waltman.
 7 | #'
 8 | #'
 9 | #' @param net is an object created by networkPlot function.
10 | #' @param vos.path is a character indicating the full path where VOSviewer.jar is located.
11 | #' @return It write a .net file that can be open in VOSviewer
12 | #'
13 | #' @examples
14 | #' # EXAMPLE
15 | #'
16 | #' # VOSviewer.jar have to be present in the working folder
17 | #'
18 | #' # data(scientometrics, package = "bibliometrixData")
19 | #'
20 | #' # NetMatrix <- biblioNetwork(scientometrics, analysis = "co-citation",
21 | #' # network = "references", sep = ";")
22 | #'
23 | #' # net <- networkPlot(NetMatrix, n = 30, type = "kamada", Title = "Co-Citation",labelsize=0.5)
24 | #'
25 | #' # net2VOSviewer(net)
26 | #'
27 | #' @seealso \code{\link{biblioNetwork}} to compute a bibliographic network.
28 | #' @seealso \code{\link{networkPlot}} to create and plot a network object
29 | #'
30 | #' @export
31 | 
32 | 
33 | net2VOSviewer <- function(net, vos.path = NULL) {
34 |   net <- net$graph_pajek
35 |   V(net)$id <- V(net)$name
36 | 
37 |   if (is.null(vos.path)) {
38 |     vos.path <- getwd()
39 |   }
40 |   if (sum(dir(vos.path) %in% "VOSviewer.jar") == 0) {
41 |     cat(
42 |       paste(
43 |         "VOSviewer.jar does not exist in the path",
44 |         vos.path,
45 |         "\n\nPlese download it from https://www.vosviewer.com/download",
46 |         "\n(Java version for other systems)\n"
47 |       )
48 |     )
49 |   } else {
50 |     netfile <- paste(vos.path, "/", "vosnetwork.net", sep = "")
51 |     VOScommand <- paste("java -jar ",
52 |       vos.path,
53 |       "/",
54 |       "VOSviewer.jar -pajek_network ",
55 |       netfile,
56 |       sep = ""
57 |     )
58 |     write.graph(
59 |       graph = net,
60 |       file = netfile,
61 |       format = "pajek"
62 |     )
63 |     system(VOScommand, wait = FALSE)
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/R/splitCommunities.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("group", "size"))
 2 | #' Splitting Network communities
 3 | #'
 4 | #' \code{networkPlot} Create a network plot with separated communities.
 5 | #'
 6 | #' The function \code{\link{splitCommunities}} splits communities in separated subnetworks from a bibliographic network plot previously created by \code{\link{networkPlot}}.
 7 | #'
 8 | #' @param graph is a network plot obtained by the function \code{\link{networkPlot}}.
 9 | #' @param n is an integer. It indicates the number of vertices to plot for each community.
10 | #' @return It is a network object of the class \code{igraph}
11 | #'
12 | #'
13 | #' @examples
14 | #' # EXAMPLE Keywordd co-occurrence network
15 | #'
16 | #' data(management, package = "bibliometrixData")
17 | #'
18 | #' NetMatrix <- biblioNetwork(management,
19 | #'   analysis = "co-occurrences",
20 | #'   network = "keywords", sep = ";"
21 | #' )
22 | #'
23 | #' net <- networkPlot(NetMatrix,
24 | #'   n = 30, type = "auto",
25 | #'   Title = "Co-occurrence Network", labelsize = 1, verbose = FALSE
26 | #' )
27 | #'
28 | #' graph <- splitCommunities(net$graph, n = 30)
29 | #'
30 | #' @seealso \code{\link{biblioNetwork}} to compute a bibliographic network.
31 | #' @seealso \code{\link{networkPlot}} to plot a bibliographic network.
32 | #' @seealso \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software.
33 | #' @seealso \code{\link{cocMatrix}} to compute a co-occurrence matrix.
34 | #' @seealso \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
35 | #'
36 | #' @export
37 | splitCommunities <- function(graph, n = NULL) {
38 |   df <- data.frame(label = V(graph)$name, size = V(graph)$deg, group = V(graph)$community)
39 | 
40 |   if (!is.null(n)) {
41 |     labels <- df %>%
42 |       group_by(group) %>%
43 |       top_n(n = n, wt = size) %>%
44 |       as.data.frame()
45 |   } else {
46 |     labels <- df %>%
47 |       group_by(group) %>%
48 |       as.data.frame()
49 |   }
50 | 
51 |   # remove inter-cluster edges
52 |   ind <- which(E(graph)$color == adjustcolor("gray70", alpha.f = graph$alpha / 2))
53 |   coGraph <- igraph::delete_edges(graph, E(graph)[ind])
54 |   ind <- which(V(coGraph)$name %in% labels$label)
55 |   V(coGraph)$label[-ind] <- ""
56 |   igraph::graph_attr(coGraph, "layout") <- igraph::layout_with_fr(coGraph)
57 |   # plot(coGraph)
58 |   return(coGraph)
59 | }
60 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bibliometrix
 2 | Type: Package
 3 | Title: Comprehensive Science Mapping Analysis
 4 | Version: 5.2.1.9000
 5 | Authors@R: c(
 6 |     person(given = "Massimo",
 7 |            family = "Aria",
 8 |            role = c("cre", "aut", "cph"),
 9 |            email = "aria@unina.it",
10 |            comment = c(ORCID = "0000-0002-8517-9411")),
11 |     person(given = "Corrado", 
12 |            family = "Cuccurullo", 
13 |            role = "aut",
14 |            email = "cuccurullocorrado@gmail.com",
15 |            comment = c(ORCID = "0000-0002-7401-8575")))
16 | Description: Tool for quantitative research in scientometrics and bibliometrics.
17 |     It implements the comprehensive workflow for science mapping analysis proposed in Aria M. and 
18 |     Cuccurullo C. (2017) <doi:10.1016/j.joi.2017.08.007>.
19 |     'bibliometrix' provides various routines for importing bibliographic data from 'SCOPUS',
20 |     'Clarivate Analytics Web of Science' (<https://www.webofknowledge.com/>), 'Digital Science Dimensions' 
21 | 	(<https://www.dimensions.ai/>), 'OpenAlex' (<https://openalex.org/>), 'Cochrane Library' (<https://www.cochranelibrary.com/>),  'Lens' (<https://lens.org>), 
22 | 	and 'PubMed' (<https://pubmed.ncbi.nlm.nih.gov/>) databases, performing bibliometric analysis 
23 |     and building networks for co-citation, coupling, scientific collaboration and co-word analysis.
24 | License: GPL-3
25 | URL: https://www.bibliometrix.org, https://github.com/massimoaria/bibliometrix, https://www.k-synth.com
26 | BugReports: https://github.com/massimoaria/bibliometrix/issues
27 | LazyData: true
28 | Encoding: UTF-8
29 | Depends: R (>= 3.3.0)
30 | Imports: stats,
31 |      grDevices,
32 | 		 bibliometrixData,
33 | 		 contentanalysis,
34 | 		 dimensionsR, 
35 | 		 dplyr,
36 |      DT,
37 | 		 ca,
38 | 		 forcats,
39 | 		 ggplot2,
40 | 		 ggrepel,
41 | 		 igraph,
42 | 		 Matrix, 
43 | 		 plotly,
44 | 		 openalexR,
45 | 		 openxlsx,
46 | 		 pubmedR,
47 | 		 purrr,
48 | 		 readr,
49 | 		 readxl,
50 | 	   rscopus,
51 | 		 shiny,
52 | 		 shinycssloaders (>= 1.1.0),
53 | 		 SnowballC,
54 | 		 stringdist,
55 | 		 stringi,
56 | 		 stringr,
57 | 		 tibble,
58 | 		 tidyr,
59 | 		 tidytext,
60 | 		 visNetwork
61 | Suggests: 
62 |     knitr,
63 |     rmarkdown,
64 |     testthat (>= 3.0.0),
65 |     wordcloud2
66 | RoxygenNote: 7.3.3
67 | NeedsCompilation: no
68 | Config/testthat/edition: 3
69 | 


--------------------------------------------------------------------------------
/man/duplicatedMatching.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duplicatedMatching.R
 3 | \name{duplicatedMatching}
 4 | \alias{duplicatedMatching}
 5 | \title{Searching of duplicated records in a bibliographic database}
 6 | \usage{
 7 | duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95)
 8 | }
 9 | \arguments{
10 | \item{M}{is the bibliographic data frame.}
11 | 
12 | \item{Field}{is a character object. It indicates one of the field tags used to identify duplicated records. Field can be equal to one of these tags: TI (title), AB (abstract), UT (manuscript ID).}
13 | 
14 | \item{exact}{is logical. If exact = TRUE the function searches duplicates using exact matching. If exact=FALSE,
15 | the function uses the restricted Damerau-Levenshtein distance to find duplicated documents.}
16 | 
17 | \item{tol}{is a numeric value giving the minimum relative similarity to match two manuscripts. Default value is \code{tol = 0.95}.
18 | To use the restricted Damerau-Levenshtein distance, exact argument has to be set as FALSE.}
19 | }
20 | \value{
21 | the value returned from \code{duplicatedMatching} is a data frame without duplicated records.
22 | }
23 | \description{
24 | Search duplicated records in a dataframe.
25 | }
26 | \details{
27 | A bibliographic data frame is obtained by the converting function \code{\link{convert2df}}.
28 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.
29 | The function identifies duplicated records in a bibliographic data frame and deletes them.
30 | Duplicate entries are identified through the restricted Damerau-Levenshtein distance.
31 | Two manuscripts that have a relative similarity measure greater than \code{tol} argument are stored in the output data frame only once.
32 | }
33 | \examples{
34 | 
35 | data(scientometrics, package = "bibliometrixData")
36 | 
37 | M <- rbind(scientometrics[1:20, ], scientometrics[10:30, ])
38 | 
39 | newM <- duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95)
40 | 
41 | dim(newM)
42 | 
43 | }
44 | \seealso{
45 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame.
46 | 
47 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
48 | 
49 | \code{\link{summary}} to obtain a summary of the results.
50 | 
51 | \code{\link{plot}} to draw some useful plots of the results.
52 | }
53 | 


--------------------------------------------------------------------------------
/man/histPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/histPlot.R
 3 | \name{histPlot}
 4 | \alias{histPlot}
 5 | \title{Plotting historical co-citation network}
 6 | \usage{
 7 | histPlot(
 8 |   histResults,
 9 |   n = 20,
10 |   size = 5,
11 |   labelsize = 5,
12 |   remove.isolates = TRUE,
13 |   title_as_label = FALSE,
14 |   label = "short",
15 |   verbose = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{histResults}{is an object of \code{class} "list" containing the following components:
20 | 
21 | \tabular{lll}{
22 | NetMatrix \tab  \tab the historical citation network matrix\cr
23 | Degree \tab       \tab the min degree of the network\cr
24 | histData \tab      \tab the set of n most cited references\cr
25 | M \tab      \tab the bibliographic data frame}
26 | 
27 | is a network matrix obtained by the function \code{\link{histNetwork}}.}
28 | 
29 | \item{n}{is integer. It defines the number of vertices to plot.}
30 | 
31 | \item{size}{is an integer. It defines the point size of the vertices. Default value is 5.}
32 | 
33 | \item{labelsize}{is an integer. It indicates the label size in the plot. Default is \code{labelsize=5}.}
34 | 
35 | \item{remove.isolates}{is logical. If TRUE isolates vertices are not plotted.}
36 | 
37 | \item{title_as_label}{is a logical. DEPRECATED}
38 | 
39 | \item{label}{is a character. It indicates which label type to use as node id in the historiograph. It can be \code{label=c("short", "title", "keywords", "keywordsplus")}.
40 | Default is \code{label = "short"}.}
41 | 
42 | \item{verbose}{is logical. If TRUE, results and plots are printed on screen.}
43 | }
44 | \value{
45 | It is list containing: a network object of the class \code{igraph} and a plot object of the class \code{ggraph}.
46 | }
47 | \description{
48 | \code{histPlot} plots a historical co-citation network.
49 | }
50 | \details{
51 | The function \code{\link{histPlot}} can plot a historical co-citation network previously created by \code{\link{histNetwork}}.
52 | }
53 | \examples{
54 | # EXAMPLE Citation network
55 | \dontrun{
56 | data(management, package = "bibliometrixData")
57 | 
58 | histResults <- histNetwork(management, sep = ";")
59 | 
60 | net <- histPlot(histResults, n = 20, labelsize = 5)
61 | }
62 | 
63 | }
64 | \seealso{
65 | \code{\link{histNetwork}} to compute a historical co-citation network.
66 | 
67 | \code{\link{cocMatrix}} to compute a co-occurrence matrix.
68 | 
69 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
70 | }
71 | 


--------------------------------------------------------------------------------
/R/localCitations.R:
--------------------------------------------------------------------------------
 1 | #' Author local citations
 2 | #'
 3 | #' It calculates local citations (LCS) of authors and documents of a bibliographic collection.
 4 | #'
 5 | #' Local citations measure how many times an author (or a document) included in this collection have been cited by the documents also included in the collection.
 6 | #'
 7 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
 8 | #'        It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.
 9 | #' @param sep is the field separator character. This character separates citations in each string of CR column of the bibliographic data frame. The default is \code{sep = ";"}.
10 | #' @param fast.search is logical. If true, the function calculates local citations only for 25 percent top cited documents.
11 | #' @param verbose is a logical.  If TRUE, results are printed on screen.
12 | #' @return an object of \code{class} "list" containing author local citations and document local citations.
13 | #'
14 | #'
15 | #' @examples
16 | #'
17 | #' data(scientometrics, package = "bibliometrixData")
18 | #'
19 | #' CR <- localCitations(scientometrics, sep = ";")
20 | #'
21 | #' CR$Authors[1:10, ]
22 | #' CR$Papers[1:10, ]
23 | #'
24 | #' @seealso \code{\link{citations}} function for citation frequency distribution.
25 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis.
26 | #' @seealso \code{\link{summary}} to obtain a summary of the results.
27 | #' @seealso \code{\link{plot}} to draw some useful plots of the results.
28 | #'
29 | #' @export
30 | 
31 | localCitations <- function(M, fast.search = FALSE, sep = ";", verbose = FALSE) {
32 |   M$TC[is.na(M$TC)] <- 0
33 |   if (isTRUE(fast.search)) {
34 |     loccit <- quantile(as.numeric(M$TC), 0.75, na.rm = TRUE)
35 |   } else {
36 |     loccit <- 1
37 |   }
38 | 
39 |   H <- histNetwork(M, min.citations = loccit, sep = sep, network = FALSE, verbose = verbose)
40 |   LCS <- H$histData
41 |   M <- H$M
42 |   rm(H)
43 |   AU <- strsplit(M$AU, split = ";")
44 |   n <- lengths(AU)
45 | 
46 |   df <- data.frame(AU = unlist(AU), LCS = rep(M$LCS, n))
47 |   AU <- aggregate(df$LCS, by = list(df$AU), FUN = "sum")
48 |   names(AU) <- c("Author", "LocalCitations")
49 |   AU <- AU[order(-AU$LocalCitations), ]
50 | 
51 |   if ("SR" %in% names(M)) {
52 |     LCS <- data.frame(Paper = M$SR, DOI = M$DI, Year = M$PY, LCS = M$LCS, GCS = M$TC)
53 |     LCS <- LCS[order(-LCS$LCS), ]
54 |   }
55 |   CR <- list(Authors = AU, Papers = LCS, M = M)
56 |   return(CR)
57 | }
58 | 


--------------------------------------------------------------------------------
/R/keywordAssoc.R:
--------------------------------------------------------------------------------
 1 | #' ID and DE keyword associations
 2 | #'
 3 | #' It associates authors' keywords to keywords plus.
 4 | #'
 5 | #' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
 6 | #'        It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.
 7 | #' @param sep is the field separator character. This character separates keywords in each string of ID and DE columns of the bibliographic data frame. The default is \code{sep = ";"}.
 8 | #' @param excludeKW is character vector. It contains authors' keywords to exclude from the analysis.
 9 | #' @param n is a integer. It indicates the number of authors' keywords to associate to each keyword plus. The default is \code{n = 10}.
10 | #' @return an object of \code{class} "list".
11 | #'
12 | #'
13 | #'
14 | #' @examples
15 | #'
16 | #' data(scientometrics, package = "bibliometrixData")
17 | #'
18 | #' KWlist <- keywordAssoc(scientometrics, sep = ";", n = 10, excludeKW = NA)
19 | #'
20 | #' # list of first 10 Keywords plus
21 | #' names(KWlist)
22 | #'
23 | #' # list of first 10 authors' keywords associated to the first Keyword plus
24 | #' KWlist[[1]][1:10]
25 | #'
26 | #' @seealso \code{\link{convert2df}} to import and convert a WoS or SCOPUS Export file in a bibliographic data frame.
27 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis.
28 | #' @seealso \code{\link{summary}} to obtain a summary of the results.
29 | #' @seealso \code{\link{plot}} to draw some useful plots of the results.
30 | #'
31 | #' @export
32 | keywordAssoc <- function(M, sep = ";", n = 10, excludeKW = NA) {
33 |   excludeKW <- toupper(excludeKW)
34 | 
35 |   WDE <- cocMatrix(M, Field = "DE", type = "sparse", sep = sep)
36 |   WID <- cocMatrix(M, Field = "ID", type = "sparse", sep = sep)
37 | 
38 |   NetMatrix <- Matrix::crossprod(WID, WDE)
39 |   if (!is.na(excludeKW)) {
40 |     NetMatrix <- NetMatrix[!(row.names(NetMatrix) %in% excludeKW), !(colnames(NetMatrix) %in% excludeKW)]
41 |   }
42 |   NetMatrix <- NetMatrix[!is.na(row.names(NetMatrix)), !is.na(colnames(NetMatrix))]
43 |   NetMatrix <- NetMatrix[nchar(row.names(NetMatrix)) > 0, nchar(colnames(NetMatrix)) > 0]
44 |   rS <- Matrix::rowSums(NetMatrix)
45 | 
46 |   NetDegree <- sort(rS, decreasing = TRUE)[n]
47 |   NET <- NetMatrix[rS >= NetDegree, Matrix::colSums(NetMatrix) > 1]
48 |   KW <- apply(NET, 1, function(x) {
49 |     i <- sort(x, decreasing = TRUE)[n]
50 |     x <- sort(x[x >= i], decreasing = TRUE)
51 |     return(x)
52 |   })
53 | 
54 |   return(KW)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/fieldByYear.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fieldByYear.R
 3 | \name{fieldByYear}
 4 | \alias{fieldByYear}
 5 | \title{Field Tag distribution by Year}
 6 | \usage{
 7 | fieldByYear(
 8 |   M,
 9 |   field = "ID",
10 |   timespan = NULL,
11 |   min.freq = 2,
12 |   n.items = 5,
13 |   labelsize = NULL,
14 |   remove.terms = NULL,
15 |   synonyms = NULL,
16 |   dynamic.plot = FALSE,
17 |   graph = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{M}{is a bibliographic data frame obtained by \code{\link{convert2df}} function.}
22 | 
23 | \item{field}{is a character object. It indicates one of the field tags of the
24 | standard ISI WoS Field Tag codify.}
25 | 
26 | \item{timespan}{is a vector with the min and max year. If it is = NULL, the analysis is performed on the entire period. Default is \code{timespan = NULL}.}
27 | 
28 | \item{min.freq}{is an integer. It indicates the min frequency of the items to include in the analysis}
29 | 
30 | \item{n.items}{is an integer. I indicates the maximum number of items per year to include in the plot.}
31 | 
32 | \item{labelsize}{is deprecated argument. It will be removed in the next update.}
33 | 
34 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
35 | 
36 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
37 | 
38 | \item{dynamic.plot}{is a logical. If TRUE plot aesthetics are optimized for plotly package.}
39 | 
40 | \item{graph}{is logical. If TRUE the function plots Filed Tag distribution by Year graph. Default is \code{graph = TRUE}.}
41 | }
42 | \value{
43 | The function \code{fieldByYear} returns a list containing threeobjects:
44 | \tabular{lll}{
45 | \code{df}  \tab   \tab is a data frame\cr
46 | \code{df_graph}\tab    \tab is a data frame with data used to build the graph\cr
47 | \code{graph}   \tab   \tab a ggplot object}
48 | }
49 | \description{
50 | It calculates the median year for each item of a field tag.
51 | }
52 | \examples{
53 | data(management, package = "bibliometrixData")
54 | timespan <- c(2005, 2015)
55 | res <- fieldByYear(management,
56 |   field = "ID", timespan = timespan,
57 |   min.freq = 5, n.items = 5, graph = TRUE
58 | )
59 | 
60 | }
61 | \seealso{
62 | \code{\link{biblioAnalysis}} function for bibliometric analysis
63 | 
64 | \code{\link{summary}} method for class '\code{bibliometrix}'
65 | }
66 | 


--------------------------------------------------------------------------------
/man/metaTagExtraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metaTagExtraction.R
 3 | \name{metaTagExtraction}
 4 | \alias{metaTagExtraction}
 5 | \title{Meta-Field Tag Extraction}
 6 | \usage{
 7 | metaTagExtraction(M, Field = "CR_AU", sep = ";", aff.disamb = TRUE)
 8 | }
 9 | \arguments{
10 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.}
12 | 
13 | \item{Field}{is a character object. New tag extracted from aggregated data is specified by this string.
14 | Field can be equal to one of these tags:
15 | \tabular{lll}{
16 | \code{"CR_AU"}\tab   \tab First Author of each cited reference\cr
17 | \code{"CR_SO"}\tab   \tab Source of each cited reference\cr
18 | \code{"AU_CO"}\tab   \tab Country of affiliation for co-authors \cr
19 | \code{"AU1_CO"}\tab   \tab Country of affiliation for the first author\cr
20 | \code{"AU_UN"}\tab   \tab University of affiliation for each co-author and the corresponding author (AU1_UN)\cr
21 | \code{"SR"}\tab     \tab Short tag of the document (as used in reference lists)}}
22 | 
23 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.}
24 | 
25 | \item{aff.disamb}{is a logical. If TRUE and Field="AU_UN", then a disambiguation algorithm is used to identify and match scientific affiliations
26 | (univ, research centers, etc.). The default is \code{aff.disamb=TRUE}.}
27 | }
28 | \value{
29 | the bibliometric data frame with a new column containing data about new field tag indicated in the argument \code{Field}.
30 | }
31 | \description{
32 | It extracts other field tags, different from the standard WoS/SCOPUS codify.
33 | }
34 | \examples{
35 | # Example 1: First Authors for each cited reference
36 | 
37 | data(scientometrics, package = "bibliometrixData")
38 | scientometrics <- metaTagExtraction(scientometrics, Field = "CR_AU", sep = ";")
39 | unlist(strsplit(scientometrics$CR_AU[1], ";"))
40 | 
41 | 
42 | # Example 2: Source for each cited reference
43 | 
44 | data(scientometrics)
45 | scientometrics <- metaTagExtraction(scientometrics, Field = "CR_SO", sep = ";")
46 | unlist(strsplit(scientometrics$CR_SO[1], ";"))
47 | 
48 | # Example 3: Affiliation country for co-authors
49 | 
50 | data(scientometrics)
51 | scientometrics <- metaTagExtraction(scientometrics, Field = "AU_CO", sep = ";")
52 | scientometrics$AU_CO[1:10]
53 | 
54 | }
55 | \seealso{
56 | \code{\link{convert2df}} for importing and converting bibliographic files into a data frame.
57 | 
58 | \code{\link{biblioAnalysis}} function for bibliometric analysis
59 | }
60 | 


--------------------------------------------------------------------------------
/man/rpys.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rpys.R
 3 | \name{rpys}
 4 | \alias{rpys}
 5 | \title{Reference Publication Year Spectroscopy}
 6 | \usage{
 7 | rpys(M, sep = ";", timespan = NULL, median.window = "centered", graph = T)
 8 | }
 9 | \arguments{
10 | \item{M}{is a data frame obtained by the converting function
11 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to
12 | articles and variables to Field Tag in the original ISI or SCOPUS file.}
13 | 
14 | \item{sep}{is the cited-references separator character. This character separates cited-references in the CR
15 | column of the data frame. The default is \code{sep = ";"}.}
16 | 
17 | \item{timespan}{is a numeric vector c(min year,max year). The default value is NULL (the entire timespan is considered).}
18 | 
19 | \item{median.window}{is a character string that can be "centered" or "backward". It indicates the type of median to be used. 
20 | "centered" is the default value and it uses the centered 5-year median (t-2 to t+2) as proposed by Marx et al. (2014). "backward" uses the backward 5-year median (t-4 to t) as proposed by Aria and Cuccurullo (2017).}
21 | 
22 | \item{graph}{is a logical. If TRUE the function plot the spectroscopy otherwise the plot is created but not drawn down.}
23 | }
24 | \value{
25 | a list containing the spectroscopy (class ggplot2) and three dataframes with the number of citations
26 | per year, the list of the cited references for each year, and the reference list with citations recorded year by year, respectively.
27 | }
28 | \description{
29 | \code{rpys} computes a Reference Publication Year Spectroscopy for detecting
30 | the Historical Roots of Research Fields.
31 | The method was introduced by Marx et al., 2014.\cr\cr
32 | }
33 | \details{
34 | References:\cr\cr
35 | Marx, W., Bornmann, L., Barth, A., & Leydesdorff, L. (2014).
36 | Detecting the historical roots of research fields by reference publication
37 | year spectroscopy (RPYS). Journal of the Association for Information Science and Technology,
38 | 65(4), 751-764.\cr\cr
39 | Thor A., Bornmann L., Mark W. & Mutz R.(2018). 
40 | Identifying single influential publications in a research field: new analysis opportunities of the CRExplorer. 
41 | Scientometrics, 116:591–608 https://doi.org/10.1007/s11192-018-2733-7\cr\cr
42 | }
43 | \examples{
44 | 
45 | \dontrun{
46 | data(management, package = "bibliometrixData")
47 | res <- rpys(management, sep = ";", graph = TRUE)
48 | }
49 | 
50 | }
51 | \seealso{
52 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS
53 |   Export file in a data frame.
54 | 
55 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
56 | 
57 | \code{\link{biblioNetwork}} to compute a bibliographic network.
58 | }
59 | 


--------------------------------------------------------------------------------
/R/net2Pajek.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("id", "name"))
 2 | #' Save a network graph object as Pajek files
 3 | #'
 4 | #'
 5 | #' The function \code{\link{net2Pajek}} save a bibliographic network previously created by \code{\link{networkPlot}} as pajek files.
 6 | #'
 7 | #' @param net is a network graph object returned by the function \code{\link{networkPlot}}.
 8 | #' @param filename is a character. It indicates the filename for Pajek export files.
 9 | #' @param path is a character. It indicates the path where the files will be saved. When path="NULL, the files will be saved in the current folder. Default is NULL.
10 | #' @return The function returns no object but will save three Pajek files in the folder given in the "path" argument with the name "filename.clu," "filename.vec," and "filename.net."
11 | #'
12 | #' @examples
13 | #' \dontrun{
14 | #' data(management, package = "bibliometrixData")
15 | #'
16 | #' NetMatrix <- biblioNetwork(management,
17 | #'   analysis = "co-occurrences",
18 | #'   network = "keywords", sep = ";"
19 | #' )
20 | #'
21 | #' net <- networkPlot(NetMatrix, n = 30, type = "auto", Title = "Co-occurrence Network", labelsize = 1)
22 | #'
23 | #' net2Pajek(net, filename = "pajekfiles", path = NULL)
24 | #' }
25 | #' @seealso \code{\link{net2VOSviewer}} to export and plot the network with VOSviewer software.
26 | #'
27 | #' @export
28 | net2Pajek <- function(net, filename = "my_pajek_network", path = NULL) {
29 |   graph <- net$graph
30 | 
31 |   nodes <- igraph::as_data_frame(graph, what = c("vertices")) %>%
32 |     mutate(id = row_number())
33 | 
34 |   edges <- igraph::as_data_frame(graph, what = c("edges"))
35 |   edges <- edges %>%
36 |     left_join(nodes %>% select(id, name), by = c("from" = "name")) %>%
37 |     rename(id_from = id) %>%
38 |     left_join(nodes %>% select(id, name), by = c("to" = "name")) %>%
39 |     rename(id_to = id)
40 | 
41 |   ### Creation of NET file
42 |   if (!is.null(path)) {
43 |     if (substr(path, nchar(path), nchar(path)) != "/") {
44 |       path <- paste0(path, "/")
45 |     }
46 |   }
47 |   filename <- paste0(path, filename)
48 | 
49 |   file <- paste0(filename, ".net")
50 | 
51 |   # Nodes
52 |   write(paste0("*Vertices ", nrow(nodes)), file = file)
53 |   write(paste0(nodes$id, ' "', nodes$name, '"'), file = file, append = T)
54 | 
55 |   # Edges
56 |   write(paste0("*Edges ", nrow(nodes)), file = file, append = T)
57 |   write(paste0(edges$id_from, " ", edges$id_to, " ", edges$weight), file = file, append = T)
58 | 
59 |   ### Creation of VEC file
60 |   file <- paste0(filename, ".vec")
61 | 
62 |   # Nodes
63 |   write(paste0("*Vertices ", nrow(nodes)), file = file)
64 |   write(paste0(nodes$deg), file = file, append = T)
65 | 
66 |   ### Creation of CLU file
67 |   file <- paste0(filename, ".clu")
68 | 
69 |   # Nodes
70 |   write(paste0("*Vertices ", nrow(nodes)), file = file)
71 |   write(paste0(nodes$community), file = file, append = T)
72 | }
73 | 


--------------------------------------------------------------------------------
/man/Hindex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Hindex.R
 3 | \name{Hindex}
 4 | \alias{Hindex}
 5 | \title{h-index calculation}
 6 | \usage{
 7 | Hindex(M, field = "author", elements = NULL, sep = ";", years = Inf)
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.}
12 | 
13 | \item{field}{is character. It can be equal to c("author", "source"). field indicates if H-index have to be calculated for a list of authors or for a list of sources. Default
14 | value is \code{field = "author"}.}
15 | 
16 | \item{elements}{is a character vector. It contains the authors' names list or the source list for which you want to calculate the H-index. When the field is
17 | "author", the argument has the form C("SURNAME1 N","SURNAME2 N",...), in other words, for each author: surname and initials separated by one blank space. If elements=NULL, the function calculates impact indices for all elements contained in the data frame.
18 | i.e for the authors SEMPRONIO TIZIO CAIO and ARIA MASSIMO \code{elements} argument is \code{elements = c("SEMPRONIO TC", "ARIA M")}.}
19 | 
20 | \item{sep}{is the field separator character. This character separates authors in each string of AU column of the bibliographic data frame. The default is \code{sep = ";"}.}
21 | 
22 | \item{years}{is a integer. It indicates the number of years to consider for Hindex calculation. Default is Inf.}
23 | }
24 | \value{
25 | an object of \code{class} "list". It contains two elements: H is a data frame with h-index, g-index and m-index for each author; CitationList is a list with the bibliographic collection for each author.
26 | }
27 | \description{
28 | It calculates the authors' h-index and its variants.
29 | }
30 | \examples{
31 | 
32 | ### EXAMPLE 1: ###
33 | 
34 | data(scientometrics, package = "bibliometrixData")
35 | 
36 | authors <- c("SMALL H", "CHEN DZ")
37 | 
38 | Hindex(scientometrics, field = "author", elements = authors, sep = ";")$H
39 | 
40 | Hindex(scientometrics, field = "source", elements = "SCIENTOMETRICS", sep = ";")$H
41 | 
42 | ### EXAMPLE 2: Garfield h-index###
43 | 
44 | data(garfield, package = "bibliometrixData")
45 | 
46 | indices <- Hindex(garfield, field = "author", elements = "GARFIELD E", years = Inf, sep = ";")
47 | 
48 | # h-index, g-index and m-index of Eugene Garfield
49 | indices$H
50 | 
51 | # Papers and total citations
52 | head(indices$CitationList[[1]])
53 | 
54 | }
55 | \seealso{
56 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame.
57 | 
58 | \code{\link{biblioAnalysis}} function for bibliometric analysis.
59 | 
60 | \code{\link{summary}} to obtain a summary of the results.
61 | 
62 | \code{\link{plot}} to draw some useful plots of the results.
63 | }
64 | 


--------------------------------------------------------------------------------
/R/pubmed2df.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont"))
 2 | pubmed2df <- function(D) {
 3 |   D <- D[nchar(D) > 0] # remove empty rows
 4 | 
 5 |   for (i in 1:length(D)) {
 6 |     if (substr(D[i], 1, 4) == "    ") substr(D[i], 1, 4) <- substr(D[i - 1], 1, 4)
 7 |   }
 8 | 
 9 |   Papers <- which(regexpr("PMID-", D) == 1) # first row of each document
10 |   nP <- length(Papers) # number of docuemnts
11 | 
12 |   rowPapers <- diff(c(Papers, length(D) + 1))
13 | 
14 |   numPapers <- rep(1:nP, rowPapers)
15 | 
16 |   DATA <- data.frame(Tag = substr(D, 1, 4), content = substr(D, 7, nchar(D)), Paper = numPapers)
17 |   DATA$Tag <- gsub(" ", "", DATA$Tag)
18 |   df <- DATA %>%
19 |     group_by(Paper, Tag) %>%
20 |     summarise(cont = paste(content, collapse = "---", sep = "")) %>%
21 |     arrange(Tag, Paper) %>%
22 |     pivot_wider(names_from = Tag, values_from = cont) %>%
23 |     ungroup()
24 |   df <- as.data.frame(df)
25 | 
26 |   # rename field tags
27 |   error <- 0
28 |   old_labs <- c("AD", "AUID", "FAU", "IS", "IP", "SO", "JT", "TA", "MH", "PG", "PT", "VI", "DP")
29 |   new_labs <- c("C1", "OI", "AF", "SN", "IS", "SO2", "SO", "J9", "DE", "PP", "DT", "VL", "PY")
30 |   lab <- names(df)
31 |   for (j in 1:length(old_labs)) {
32 |     i <- which(lab %in% old_labs[j])
33 |     if (length(i) > 0) {
34 |       lab[i] <- new_labs[j]
35 |     } else {
36 |       error <- 1
37 |     }
38 |   }
39 |   names(df) <- lab
40 |   if (error == 1) {
41 |     cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n
42 | Please, take a look at the vignettes:
43 | - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html)
44 | - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)\n\n")
45 |   }
46 | 
47 |   # extract DOIs
48 |   df$DI <- trimws(unlist(lapply(strsplit(df$LID, "\\["), "[", 1)))
49 |   df$PY <- as.numeric(substr(df$PY, 1, 4))
50 | 
51 | 
52 |   ### replace "---" with ";"
53 |   tagsComma <- c("AU", "AF", "DE", "AID", "OT", "PHST", "DT")
54 |   nolab <- setdiff(tagsComma, names(df))
55 |   tagsComma <- tagsComma[(!(tagsComma %in% nolab))]
56 | 
57 |   df1 <- data.frame(lapply(df[tagsComma], function(x) {
58 |     gsub("---", ";", x)
59 |   }))
60 | 
61 |   ### replace "---" with " "
62 |   otherTags <- setdiff(names(df), tagsComma)
63 |   df2 <- data.frame(lapply(df[otherTags], function(x) {
64 |     trimES(gsub("---", " ", x))
65 |   }))
66 |   df <- cbind(df1, df2)
67 |   rm(df1, df2)
68 | 
69 |   df$DB <- "PUBMED"
70 | 
71 |   # remove * char from keywords
72 |   df$DE <- df$ID <- gsub("\\*", "", df$DE)
73 |   AB <- df$AB
74 |   TI <- df$TI
75 |   DE <- df$DE
76 |   df <- data.frame(lapply(df, toupper))
77 |   df$AB_raw <- AB
78 |   df$TI_raw <- TI
79 |   df$DE_raw <- DE
80 |   # add sep ; to affiliations
81 |   df$C1 <- gsub("\\.", ".;", df$C1)
82 |   df$RP <- NA
83 |   df <- df[names(df) != "Paper"]
84 | 
85 |   return(df)
86 | }
87 | 


--------------------------------------------------------------------------------
/R/idByAuthor.R:
--------------------------------------------------------------------------------
 1 | #' Get Complete Author Information and ID from Scopus
 2 | #'
 3 | #' Uses SCOPUS API author search to identify author identification information.
 4 | #'
 5 | #' @param df is a dataframe composed of three columns:
 6 | #' \tabular{lll}{
 7 | #' \code{lastname}\tab   \tab author's last name\cr
 8 | #' \code{firstname}\tab   \tab author's first name\cr
 9 | #' \code{affiliation}\tab   \tab Part of the affiliation name (university name, city, etc.)}
10 | #' i.e. df[1,1:3]<-c("aria","massimo","naples")
11 | #' When affiliation is not specified, the field df$affiliation have to be NA.
12 | #' i.e. df[2,1:3]<-c("cuccurullo","corrado", NA)
13 | #' @param api_key is a character. It contains the Elsevier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website}
14 | #' @return a data frame with cases corresponding to authors and variables to author's information and ID got from SCOPUS.
15 | #' @examples
16 | #' ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html
17 | #' #
18 | #' # api_key="your api key"
19 | #'
20 | #' ## create a data frame with the list of authors to get information and IDs
21 | #' # i.e. df[1,1:3]<-c("aria","massimo","naples")
22 | #' #      df[2,1:3]<-c("cuccurullo","corrado", NA)
23 | #'
24 | #' ## run idByAuthor function
25 | #' #
26 | #' # authorsID <- idByAuthor(df, api_key)
27 | #'
28 | #' @seealso \code{\link{retrievalByAuthorID}} for downloading the complete author bibliographic collection from SCOPUS
29 | #'
30 | #' @export
31 | 
32 | idByAuthor <- function(df, api_key) {
33 |   n <- dim(df)[1]
34 | 
35 |   ### download authors' info
36 |   AU_ID <- NA
37 |   AU_AFF <- NA
38 |   AU_count <- NA
39 | 
40 |   for (j in 1:n) {
41 |     lastname <- tolower(df[j, 1])
42 |     firstname <- tolower(df[j, 2])
43 |     if (!is.na(df[j, 3])) {
44 |       query <- paste("affil(", df[j, 3], ")", sep = "")
45 |     } else {
46 |       query <- NULL
47 |     }
48 |     cat("\nSearching author's info: ", toupper(df[j, 1]), toupper(df[j, 2]))
49 | 
50 |     AU_info <- get_complete_author_info(last_name = lastname, first_name = firstname, api_key = api_key, query = query)
51 | 
52 |     ### author id
53 |     if (AU_info$content$`search-results`$`opensearch:totalResults` != 0) {
54 |       AU_ID[j] <- AU_info[[2]]$`search-results`$entr[[1]]$`dc:identifier`
55 |       AU_ID[j] <- gsub("AUTHOR_ID:", "", AU_ID[j])
56 |       AU_info2 <- AU_info[[2]]
57 |       aff <- AU_info2$`search-results`$entry[[1]]$`affiliation-current`
58 |       AU_AFF[j] <- paste(aff$`affiliation-name`, ", ", aff$`affiliation-city`, ", ", aff$`affiliation-country`, sep = "")
59 |       ### author document counts
60 |       AU_count[j] <- AU_info[[2]]$`search-results`$entr[[1]]$`document-count`
61 |     } else {
62 |       AU_ID[j] <- NA
63 |       AU_AFF[j] <- NA
64 |       AU_count[j] <- NA
65 |     }
66 |   }
67 |   authorsID <- data.frame(lastname = df[, 1], firstname = df[, 2], id = AU_ID, affiliation = AU_AFF, count = AU_count)
68 |   return(authorsID)
69 | }
70 | 


--------------------------------------------------------------------------------
/R/duplicatedMatching.R:
--------------------------------------------------------------------------------
 1 | #' Searching of duplicated records in a bibliographic database
 2 | #'
 3 | #' Search duplicated records in a dataframe.
 4 | #'
 5 | #' A bibliographic data frame is obtained by the converting function \code{\link{convert2df}}.
 6 | #' It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics WoS file.
 7 | #' The function identifies duplicated records in a bibliographic data frame and deletes them.
 8 | #' Duplicate entries are identified through the restricted Damerau-Levenshtein distance.
 9 | #' Two manuscripts that have a relative similarity measure greater than \code{tol} argument are stored in the output data frame only once.
10 | #'
11 | #' @param M is the bibliographic data frame.
12 | #' @param Field is a character object. It indicates one of the field tags used to identify duplicated records. Field can be equal to one of these tags: TI (title), AB (abstract), UT (manuscript ID).
13 | #' @param exact is logical. If exact = TRUE the function searches duplicates using exact matching. If exact=FALSE,
14 | #' the function uses the restricted Damerau-Levenshtein distance to find duplicated documents.
15 | #' @param tol is a numeric value giving the minimum relative similarity to match two manuscripts. Default value is \code{tol = 0.95}.
16 | #' To use the restricted Damerau-Levenshtein distance, exact argument has to be set as FALSE.
17 | #' @return the value returned from \code{duplicatedMatching} is a data frame without duplicated records.
18 | #'
19 | #'
20 | #' @examples
21 | #'
22 | #' data(scientometrics, package = "bibliometrixData")
23 | #'
24 | #' M <- rbind(scientometrics[1:20, ], scientometrics[10:30, ])
25 | #'
26 | #' newM <- duplicatedMatching(M, Field = "TI", exact = FALSE, tol = 0.95)
27 | #'
28 | #' dim(newM)
29 | #'
30 | #' @seealso \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame.
31 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis.
32 | #' @seealso \code{\link{summary}} to obtain a summary of the results.
33 | #' @seealso \code{\link{plot}} to draw some useful plots of the results.
34 | #'
35 | #' @export
36 | duplicatedMatching <- function(M, Field = "TI", exact = FALSE, tol = 0.95) {
37 |   if (!(Field %in% names(M))) {
38 |     cat("\nField", Field, "is not a valid column name of your bibliographic data frame\n ")
39 |     return(M)
40 |   }
41 |   if (isTRUE(exact)) {
42 |     exact <- "true"
43 |   } else {
44 |     exact <- "false"
45 |   }
46 |   switch(exact,
47 |     true = {
48 |       M <- M[!duplicated(M[Field]), ]
49 |     },
50 |     false = {
51 |       a <- b <- M[[Field]]
52 |       an <- nchar(a)
53 |       A <- matrix(an, length(an), length(an))
54 |       A[is.na(A)] <- 0
55 |       B <- t(A)
56 |       C <- A
57 |       C[B > A] <- B[B > A]
58 |       D <- as.matrix(stringdistmatrix(a))
59 |       Dn <- 1 - (D / C)
60 |       Dn[Dn > tol] <- 2
61 |       M <- M[!duplicated(Dn), ]
62 |     }
63 |   )
64 | 
65 |   return(M)
66 | }
67 | 


--------------------------------------------------------------------------------
/R/dominance.R:
--------------------------------------------------------------------------------
 1 | #' Authors' dominance ranking
 2 | #'
 3 | #' It calculates the authors' dominance ranking from an object of the class '\code{bibliometrix}' as proposed by Kumar & Kumar, 2008.
 4 | #' @param results is an object of the class '\code{bibliometrix}' for which the analysis of the authors' dominance ranking is desired.
 5 | #' @param k is an integer, used for table formatting (number of authors). Default value is 10.
 6 | #' @return The function \code{dominance} returns a data frame with cases corresponding to the first \code{k} most productive authors and variables to typical field of a dominance analysis.
 7 | #'
 8 | #' the data frame variables are:
 9 | #' \tabular{lll}{
10 | #' \code{Author} \tab   \tab Author's name\cr
11 | #' \code{Dominance Factor}  \tab   \tab Dominance Factor (DF = FAA / MAA)\cr
12 | #' \code{Tot Articles}   \tab   \tab N. of Authored Articles (TAA)\cr
13 | #' \code{Single Authored}   \tab   \tab N. of Single-Authored Articles (SAA)\cr
14 | #' \code{Multi Authored}   \tab   \tab N. of Multi-Authored Articles (MAA=TAA-SAA)\cr
15 | #' \code{First Authored} \tab   \tab N. of First Authored Articles (FAA)\cr
16 | #' \code{Rank by Articles}    \tab   \tab Author Ranking by N. of Articles\cr
17 | #' \code{Rank by DF}    \tab   \tab Author Ranking by Dominance Factor}
18 | #'
19 | #'
20 | #'
21 | #' @examples
22 | #' data(scientometrics, package = "bibliometrixData")
23 | #' results <- biblioAnalysis(scientometrics)
24 | #' DF <- dominance(results)
25 | #' DF
26 | #'
27 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis
28 | #' @seealso \code{\link{summary}} method for class '\code{bibliometrix}'
29 | #'
30 | #' @export
31 | 
32 | dominance <- function(results, k = 10) {
33 |   # Author Rank by Dominance Rank  (Kumar & Kumar, 2008)
34 | 
35 |   # options(warn=-1)
36 | 
37 |   if (!inherits(results, "bibliometrix")) {
38 |     cat('\n argument "results" have to be an object of class "bibliometrix"\n')
39 |     return(NA)
40 |   }
41 | 
42 |   Nmf <- table(results$FirstAuthors[results$nAUperPaper > 1])
43 |   FA <- names(Nmf)
44 |   # FA=gsub(" ", "", FA, fixed = TRUE)  # delete spaces
45 | 
46 |   AU <- names(results$Authors)
47 | 
48 | 
49 |   Tot <- Single <- rep(NA, length(FA))
50 |   for (i in 1:length(FA)) {
51 |     Single[i] <- sum(results$FirstAuthors[results$nAUperPaper == 1] == FA[i])
52 |     Tot[i] <- results$Authors[FA[i] == AU]
53 |   }
54 |   Dominance <- Nmf / (Tot - Single)
55 | 
56 |   D <- data.frame("Author" = FA, "Dominance Factor" = as.numeric(Dominance), "Articles" = Tot, "Single-Authored" = Single, "Multi-Authored" = Tot - Single, "First-Author" = as.numeric(Nmf))
57 |   D <- D[order(-D[, 3]), ]
58 |   D <- D[1:k, ]
59 |   D$RankbyArticles <- rank(-D$Articles, ties.method = "min")
60 |   D <- D[order(-D$Dominance.Factor), ]
61 |   D$RankDF <- rank(-D$Dominance.Factor, ties.method = "min")
62 |   names(D) <- c("Author", "Dominance Factor", "Tot Articles", "Single-Authored", "Multi-Authored", "First-Authored", "Rank by Articles", "Rank by DF")
63 |   row.names(D) <- 1:k
64 |   return(D)
65 | }
66 | 


--------------------------------------------------------------------------------
/R/tableTag.R:
--------------------------------------------------------------------------------
 1 | #' Tabulate elements from a Tag Field column
 2 | #'
 3 | #' It tabulates elements from a Tag Field column of a bibliographic data frame.
 4 | #'
 5 | #' \code{tableTag} is an internal routine of main function \code{\link{biblioAnalysis}}.
 6 | #'
 7 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}.
 8 | #'        It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.
 9 | #' @param Tag is a character object. It indicates one of the field tags of the
10 | #'   standard ISI WoS Field Tag codify.
11 | #' @param sep is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.
12 | #' @param ngrams is an integer between 1 and 3. It indicates the type of n-gram to extract from titles or abstracts.
13 | #' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.
14 | #' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.
15 | #' @return an object of class \code{table}
16 | #' @examples
17 | #'
18 | #' data(scientometrics, package = "bibliometrixData")
19 | #' Tab <- tableTag(scientometrics, Tag = "CR", sep = ";")
20 | #' Tab[1:10]
21 | #'
22 | #' @export
23 | tableTag <- function(M, Tag = "CR", sep = ";", ngrams = 1, remove.terms = NULL, synonyms = NULL) {
24 |   ## check and remove duplicated
25 |   M <- M[!duplicated(M$SR), ]
26 | 
27 |   if (Tag %in% c("AB", "TI")) {
28 |     M <- termExtraction(M, Field = Tag, stemming = F, verbose = FALSE, ngrams = ngrams, remove.terms = remove.terms, synonyms = synonyms)
29 |     i <- which(names(M) == paste(Tag, "_TM", sep = ""))
30 |     remove.terms <- NULL
31 |     synonyms <- NULL
32 |   } else {
33 |     i <- which(names(M) == Tag)
34 |   }
35 | 
36 |   if (Tag == "C1") {
37 |     M$C1 <- gsub("\\[.+?]", "", M$C1)
38 |   }
39 | 
40 |   Tab <- unlist(strsplit(as.character(M %>% dplyr::pull(i)), sep))
41 | 
42 |   ### inserted to remove punct and extra spaces ####
43 |   Tab <- trimws(gsub("\\s+|\\.|\\,", " ", Tab))
44 |   ####
45 |   # Tab<-Tab[Tab!=""]
46 |   Tab <- Tab[nchar(Tab) > 0]
47 | 
48 |   # Merge synonyms in the vector synonyms
49 |   if (length(synonyms) > 0 & is.character(synonyms)) {
50 |     s <- strsplit(toupper(synonyms), ";")
51 |     snew <- trimws(unlist(lapply(s, function(l) l[1])))
52 |     sold <- (lapply(s, function(l) trimws(l[-1])))
53 |     for (i in 1:length(s)) {
54 |       Tab[Tab %in% unlist(sold[[i]])] <- snew[i]
55 |     }
56 |   }
57 | 
58 |   Tab <- sort(table(Tab), decreasing = TRUE)
59 |   # remove terms from ID and DE
60 |   if ((Tag %in% c("DE", "ID", "KW_Merged")) & (!is.null(remove.terms))) {
61 |     term <- setdiff(names(Tab), toupper(remove.terms))
62 |     Tab <- Tab[term]
63 |   }
64 | 
65 |   return(Tab)
66 | }
67 | 


--------------------------------------------------------------------------------
/R/isi2df.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("Paper", "Tag", "content", "cont"))
 2 | isi2df <- function(D) {
 3 |   # D <- D[nchar(D)>0]  # remove empty rows
 4 | 
 5 |   # remove empty rows and strange characters
 6 |   res <- try(D <- D[nchar(D) > 1], silent = T)
 7 |   if (inherits(res, "try-error")) {
 8 |     D <- removeStrangeChar(D)
 9 |     # next
10 |   } else {
11 |     D <- res
12 |     rm(res)
13 |   }
14 | 
15 |   D <- D[!(substr(D, 1, 3) %in% c("FN ", "VR "))]
16 | 
17 |   for (i in 1:length(D)) {
18 |     if (substr(D[i], 1, 3) == "   ") substr(D[i], 1, 3) <- substr(D[i - 1], 1, 3)
19 |   }
20 |   Papers <- which(substr(D, 1, 3) == "PT ") # first row of each document
21 |   nP <- length(Papers) # number of documents
22 | 
23 |   rowPapers <- diff(c(Papers, length(D) + 1))
24 | 
25 |   numPapers <- rep(1:nP, rowPapers)
26 | 
27 |   DATA <- data.frame(Tag = substr(D, 1, 3), content = substr(D, 4, nchar(D)), Paper = numPapers)
28 |   DATA$Tag <- gsub(" ", "", DATA$Tag)
29 |   df <- DATA %>%
30 |     group_by(Paper, Tag) %>%
31 |     summarise(cont = paste(content, collapse = "---", sep = "")) %>%
32 |     arrange(Tag, Paper) %>%
33 |     pivot_wider(names_from = Tag, values_from = cont) %>%
34 |     ungroup()
35 |   df <- as.data.frame(df)
36 | 
37 | 
38 |   df$PY <- as.numeric(df$PY)
39 | 
40 |   missingTags <- setdiff(c("AU", "DE", "C1", "RP", "CR", "PY", "SO", "TI", "TC"), names(df))
41 |   if (length(missingTags) > 0) {
42 |     cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n
43 | Please, take a look at the vignettes:
44 | - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html)
45 | - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)\n\n")
46 |     cat("\nMissing fields: ", missingTags, "\n")
47 |   }
48 | 
49 |   ### replace "---" with ";"
50 |   tagsComma <- c("AU", "AF", "CR")
51 | 
52 |   nolab <- setdiff(tagsComma, names(df))
53 | 
54 |   tagsComma <- tagsComma[(!(tagsComma %in% nolab))]
55 | 
56 |   df1 <- data.frame(lapply(df[tagsComma], function(x) {
57 |     gsub("---", ";", x)
58 |   }))
59 | 
60 |   ### replace "---" with " "
61 |   otherTags <- setdiff(names(df), tagsComma)
62 |   df2 <- data.frame(lapply(df[otherTags], function(x) {
63 |     trimES(gsub("---", " ", x))
64 |   }))
65 |   df <- cbind(df1, df2)
66 |   rm(df1, df2)
67 | 
68 |   ### store raw affiliation format to extract link among authors and affiliations
69 |   df$C1raw <- df$C1
70 |   ###
71 | 
72 |   df$DB <- "ISI"
73 | 
74 |   # Authors
75 |   df$AU <- trimES(gsub(",", " ", df$AU))
76 | 
77 |   # Toupper
78 |   DI <- df$DI
79 |   AB <- df$AB
80 |   TI <- df$TI
81 |   DE <- df$DE
82 |   df <- data.frame(lapply(df, toupper))
83 |   df$DI <- DI
84 |   df$AB_raw <- AB
85 |   df$TI_raw <- TI
86 |   df$DE_raw <- DE
87 | 
88 |   # add sep ; to affiliations
89 |   df$C1 <- trim(gsub("\\[.*?\\]", "", df$C1)) # to remove author info in square brackets
90 |   df$C1 <- gsub("\\.", ".;", df$C1)
91 | 
92 |   df <- df[names(df) != "Paper"]
93 | 
94 |   return(df)
95 | }
96 | 


--------------------------------------------------------------------------------
/man/retrievalByAuthorID.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/retrievalByAuthorID.R
 3 | \name{retrievalByAuthorID}
 4 | \alias{retrievalByAuthorID}
 5 | \title{Get Author Content on SCOPUS by ID}
 6 | \usage{
 7 | retrievalByAuthorID(id, api_key, remove.duplicated = TRUE, country = TRUE)
 8 | }
 9 | \arguments{
10 | \item{id}{is a vector of characters containing the author's SCOPUS IDs.
11 | SCOPUS IDs con be obtained using the function \code{\link{idByAuthor}}.}
12 | 
13 | \item{api_key}{is a character. It contains the Elsvier API key. Information about how to obtain an API Key \href{https://dev.elsevier.com/sc_apis.html}{Elsevier API website}}
14 | 
15 | \item{remove.duplicated}{is logical. If TRUE duplicated documents will be deleted from the bibliographic collection.}
16 | 
17 | \item{country}{is logical. If TRUE authors' country information will be downloaded from SCOPUS.}
18 | }
19 | \value{
20 | a list containing two objects: (i) M which is a data frame with cases corresponding to articles and variables to main Field Tags named using the standard ISI WoS Field Tag codify.
21 | M includes the entire bibliographic collection downloaded from SCOPUS.
22 | The main field tags are:
23 | 
24 | \tabular{lll}{
25 | \code{AU}\tab   \tab Authors\cr
26 | \code{TI}\tab   \tab Document Title\cr
27 | \code{SO}\tab   \tab Publication Name (or Source)\cr
28 | \code{DT}\tab   \tab Document Type\cr
29 | \code{DE}\tab   \tab Authors' Keywords\cr
30 | \code{ID}\tab   \tab Keywords associated by SCOPUS or ISI database \cr
31 | \code{AB}\tab   \tab Abstract\cr
32 | \code{C1}\tab   \tab Author Address\cr
33 | \code{RP}\tab   \tab Reprint Address\cr
34 | \code{TC}\tab   \tab Times Cited\cr
35 | \code{PY}\tab   \tab Year\cr
36 | \code{UT}\tab   \tab Unique Article Identifier\cr
37 | \code{DB}\tab   \tab Database\cr}
38 | (ii) authorDocuments which is a list containing a bibliographic data frame for each author.
39 | 
40 | LIMITATIONS:
41 | Currently, SCOPUS API does not allow to download document references.
42 | As consequence, it is not possible to perform co-citation analysis (the field CR is empty).
43 | }
44 | \description{
45 | Uses SCOPUS API search to get information about documents on a set of authors using SCOPUS ID.
46 | }
47 | \examples{
48 | ## Request a personal API Key to Elsevier web page https://dev.elsevier.com/sc_apis.html
49 | 
50 | ## api_key="your api key"
51 | 
52 | ## create a data frame with the list of authors to get information and IDs
53 | # i.e. df[1,1:3] <- c("aria","massimo","naples")
54 | #      df[2,1:3] <- c("cuccurullo","corrado", "naples")
55 | 
56 | ## run idByAuthor function
57 | #
58 | # authorsID <- idByAuthor(df, api_key)
59 | #
60 | 
61 | ## extract the IDs
62 | #
63 | # id <- authorsID[,3]
64 | #
65 | 
66 | ## create the bibliographic collection
67 | #
68 | # res <- retrievalByAuthorID(id, api_key)
69 | #
70 | # M <- res$M  # the entire bibliographic data frame
71 | # M <- res$authorDocuments # the list containing a bibliographic data frame for each author
72 | 
73 | }
74 | \seealso{
75 | \code{\link{idByAuthor}} for downloading author information and SCOPUS ID.
76 | }
77 | 


--------------------------------------------------------------------------------
/inst/biblioshiny/libraries.R:
--------------------------------------------------------------------------------
  1 | # ### packages for biblishiny()
  2 | libraries <- function() {
  3 |   all_ok <- TRUE
  4 | 
  5 |   parse_pkg <- function(pkg_str) {
  6 |     # Estrae nome e versione minima, se specificata
  7 |     matches <- regmatches(
  8 |       pkg_str,
  9 |       regexec("^([a-zA-Z0-9\\.]+)(?: \\(>= ([0-9\\.]+)\\))?$", pkg_str)
 10 |     )[[1]]
 11 |     if (length(matches) >= 2) {
 12 |       list(
 13 |         name = matches[2],
 14 |         min_version = ifelse(length(matches) == 3, matches[3], NA)
 15 |       )
 16 |     } else {
 17 |       list(name = pkg_str, min_version = NA)
 18 |     }
 19 |   }
 20 | 
 21 |   safe_install <- function(pkg_str) {
 22 |     pkg_info <- parse_pkg(pkg_str)
 23 |     pkg <- pkg_info$name
 24 |     min_ver <- pkg_info$min_version
 25 | 
 26 |     need_install <- FALSE
 27 | 
 28 |     if (pkg %in% rownames(installed.packages())) {
 29 |       if (!is.na(min_ver)) {
 30 |         installed_ver <- as.character(packageVersion(pkg))
 31 |         if (compareVersion(installed_ver, min_ver) < 0) {
 32 |           message(sprintf(
 33 |             "The installed version of '%s' (%s) is lower than the required (%s).",
 34 |             pkg,
 35 |             installed_ver,
 36 |             min_ver
 37 |           ))
 38 |           need_install <- TRUE
 39 |         }
 40 |       }
 41 |     } else {
 42 |       need_install <- TRUE
 43 |     }
 44 | 
 45 |     if (need_install) {
 46 |       install.packages(pkg)
 47 |     }
 48 | 
 49 |     return(require(pkg, character.only = TRUE, quietly = TRUE))
 50 |   }
 51 | 
 52 |   pkgs <- c(
 53 |     "httr2",
 54 |     "base64enc",
 55 |     "bibliometrix",
 56 |     "zip",
 57 |     "shiny",
 58 |     "igraph",
 59 |     "DT",
 60 |     "stringr",
 61 |     "contentanalysis",
 62 |     "ggplot2",
 63 |     "wordcloud2",
 64 |     "ggmap",
 65 |     "maps",
 66 |     "pdftools (>= 3.6.0)",
 67 |     "tidytext",
 68 |     "visNetwork",
 69 |     "plotly",
 70 |     "fontawesome",
 71 |     "shinydashboardPlus",
 72 |     "shinydashboard",
 73 |     "shinyjs",
 74 |     "curl (>= 6.3.0)",
 75 |     "RCurl",
 76 |     "openxlsx",
 77 |     "shinyWidgets",
 78 |     "chromote",
 79 |     "pagedown",
 80 |     "Matrix",
 81 |     "dimensionsR",
 82 |     "pubmedR",
 83 |     "dplyr",
 84 |     "tidyr",
 85 |     "sparkline",
 86 |     "tidygraph",
 87 |     "ggraph"
 88 |   )
 89 | 
 90 |   suppressPackageStartupMessages({
 91 |     results <- vapply(pkgs, safe_install, logical(1))
 92 |     all_ok <- all(results)
 93 |   })
 94 | 
 95 |   return(all_ok)
 96 | }
 97 | 
 98 | messageItem2 <- function(
 99 |   from,
100 |   message,
101 |   icon = shiny::icon("user"),
102 |   time = NULL,
103 |   href = NULL,
104 |   inputId = NULL
105 | ) {
106 |   if (is.null(href)) {
107 |     href <- "#"
108 |   }
109 |   shiny::tags$li(shiny::a(
110 |     id = inputId,
111 |     class = if (!is.null(inputId)) {
112 |       "action-button"
113 |     },
114 |     href = href,
115 |     target = "_blank",
116 |     icon,
117 |     shiny::h4(
118 |       from,
119 |       if (!is.null(time)) {
120 |         shiny::tags$small(shiny::icon("clock-o"), time)
121 |       }
122 |     ),
123 |     shiny::p(message)
124 |   ))
125 | }
126 | 


--------------------------------------------------------------------------------
/man/biblioAnalysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/biblioAnalysis.R
 3 | \name{biblioAnalysis}
 4 | \alias{biblioAnalysis}
 5 | \title{Bibliometric Analysis}
 6 | \usage{
 7 | biblioAnalysis(M, sep = ";")
 8 | }
 9 | \arguments{
10 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
11 | It is a data matrix with cases corresponding to manuscripts and variables to Field Tag in the original SCOPUS and Clarivate Analytics Web of Science file.}
12 | 
13 | \item{sep}{is the field separator character. This character separates strings in each column of the data frame. The default is \code{sep = ";"}.}
14 | }
15 | \value{
16 | \code{biblioAnalysis} returns an object of \code{class} "bibliometrix".
17 | 
18 | The functions \code{\link{summary}} and \code{\link{plot}} are used to obtain or print a summary and some useful plots of the results.
19 | 
20 | An object of \code{class} "bibliometrix" is a list containing the following components:
21 | 
22 | \tabular{lll}{
23 | Articles \tab  \tab the total number of manuscripts\cr
24 | Authors \tab       \tab the authors' frequency distribution\cr
25 | AuthorsFrac \tab      \tab the authors' frequency distribution (fractionalized)\cr
26 | FirstAuthors \tab      \tab corresponding author of each manuscript\cr
27 | nAUperPaper \tab      \tab the number of authors per manuscript\cr
28 | Appearances \tab      \tab the number of author appearances\cr
29 | nAuthors \tab       \tab the number of authors\cr
30 | AuMultiAuthoredArt \tab      \tab the number of authors of multi-authored articles\cr
31 | MostCitedPapers \tab      \tab the list of manuscripts sorted by citations\cr
32 | Years \tab      \tab publication year of each manuscript\cr
33 | FirstAffiliation \tab      \tab the affiliation of the first author\cr
34 | Affiliations \tab      \tab the frequency distribution of affiliations (of all co-authors for each paper)\cr
35 | Aff_frac \tab      \tab the fractionalized frequency distribution of affiliations (of all co-authors for each paper)\cr
36 | CO \tab      \tab the affiliation country of the first author\cr
37 | Countries \tab      \tab the affiliation countries' frequency distribution\cr
38 | CountryCollaboration \tab      \tab Intra-country (SCP) and intercountry (MCP) collaboration indices\cr
39 | TotalCitation \tab      \tab the number of times each manuscript has been cited\cr
40 | TCperYear \tab      \tab the yearly average number of times each manuscript has been cited\cr
41 | Sources \tab      \tab the frequency distribution of sources (journals, books, etc.)\cr
42 | DE \tab      \tab the frequency distribution of authors' keywords\cr
43 | ID \tab      \tab the frequency distribution of keywords associated to the manuscript by SCOPUS and Clarivate Analytics Web of Science database}
44 | }
45 | \description{
46 | It performs a bibliometric analysis of a dataset imported from SCOPUS and Clarivate Analytics Web of Science databases.
47 | }
48 | \examples{
49 | \dontrun{
50 | data(management, package = "bibliometrixData")
51 | 
52 | results <- biblioAnalysis(management)
53 | 
54 | summary(results, k = 10, pause = FALSE)
55 | }
56 | 
57 | }
58 | \seealso{
59 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame.
60 | 
61 | \code{\link{summary}} to obtain a summary of the results.
62 | 
63 | \code{\link{plot}} to draw some useful plots of the results.
64 | }
65 | 


--------------------------------------------------------------------------------
/man/normalizeSimilarity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalizeSimilarity.R
 3 | \name{normalizeSimilarity}
 4 | \alias{normalizeSimilarity}
 5 | \title{Calculate similarity indices}
 6 | \usage{
 7 | normalizeSimilarity(NetMatrix, type = "association")
 8 | }
 9 | \arguments{
10 | \item{NetMatrix}{is a coupling matrix obtained by the network functions \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}.}
11 | 
12 | \item{type}{is a character. It can be "association", "jaccard", "inclusion","salton" or "equivalence" to obtain Association Strength, Jaccard,
13 | Inclusion, Salton or Equivalence similarity index respectively. The default is \code{type = "association"}.}
14 | }
15 | \value{
16 | a similarity matrix.
17 | }
18 | \description{
19 | It calculates a relative measure of bibliographic co-occurrences.
20 | }
21 | \details{
22 | \code{couplingSimilarity} calculates Association strength, Inclusion, Jaccard or Salton similarity from a co-occurrence bibliographic matrix.
23 | 
24 | The association strength is used by Van Eck and Waltman (2007) and Van Eck et al. (2006). Several works refer to the measure as the proximity index,
25 | while Leydesdorff (2008)and Zitt et al. (2000) refer to it as the probabilistic affinity (or activity) index.
26 | 
27 | The inclusion index, also called Simpson coefficient, is an overlap measure used in information retrieval.
28 | 
29 | The Jaccard index (or Jaccard similarity coefficient) gives us a relative measure of the overlap of two sets.
30 | It is calculated as the ratio between the intersection and the union of the reference lists (of two manuscripts).
31 | 
32 | The Salton index, instead, relates the intersection of the two lists to the geometric mean of the size of both sets.
33 | The square of Salton index is also called Equivalence index.
34 | 
35 | The indices are equal to zero if the intersection of the reference lists is empty.\cr\cr
36 | 
37 | References\cr\cr
38 | Leydesdorff, L. (2008). On the normalization and visualization of author Cocitation data: Salton's cosine versus the Jaccard index.
39 | Journal of the American Society for Information Science and Technology, 59(1), 77– 85.\cr
40 | Van Eck, N.J., Waltman, L., Van den Berg, J., & Kaymak, U. (2006). Visualizing the computational intelligence field.
41 | IEEE Computational Intelligence Magazine, 1(4), 6– 10.\cr
42 | Van Eck, N.J., & Waltman, L. (2007). Bibliometric mapping of the computational intelligence field.
43 | International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems, 15(5), 625– 645\cr.
44 | Van Eck, N. J., & Waltman, L. (2009). How to normalize cooccurrence data? An analysis of some well-known similarity measures.
45 | Journal of the American society for information science and technology, 60(8), 1635-1651.\cr
46 | Zitt, M., Bassecoulard, E., & Okubo, Y. (2000). Shadows of the past in international cooperation:
47 | Collaboration profiles of the top five producers of science. Scientometrics, 47(3), 627– 657.\cr
48 | }
49 | \examples{
50 | 
51 | data(scientometrics, package = "bibliometrixData")
52 | NetMatrix <- biblioNetwork(scientometrics,
53 |   analysis = "co-occurrences",
54 |   network = "keywords", sep = ";"
55 | )
56 | S <- normalizeSimilarity(NetMatrix, type = "association")
57 | 
58 | }
59 | \seealso{
60 | \code{\link{biblioNetwork}} function to compute a bibliographic network.
61 | 
62 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network.
63 | }
64 | 


--------------------------------------------------------------------------------
/man/authorBio.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/authorBio.r
 3 | \name{authorBio}
 4 | \alias{authorBio}
 5 | \title{Retrieve Author Biographical Information from OpenAlex}
 6 | \usage{
 7 | authorBio(
 8 |   author_position = 1,
 9 |   doi = "10.1016/j.joi.2017.08.007",
10 |   verbose = FALSE,
11 |   return_all_authors = FALSE,
12 |   sleep_time = 1,
13 |   max_retries = 3,
14 |   retry_delay = 2
15 | )
16 | }
17 | \arguments{
18 | \item{author_position}{Integer. The numerical position of the author in the authors list (default: 1)}
19 | 
20 | \item{doi}{Character. DOI of the article used to identify the authors}
21 | 
22 | \item{verbose}{Logical. Print informative messages during execution (default: FALSE)}
23 | 
24 | \item{return_all_authors}{Logical. If TRUE, returns information for all co-authors (default: FALSE)}
25 | 
26 | \item{sleep_time}{Numeric. Seconds to wait between API calls to respect rate limits (default: 1)}
27 | 
28 | \item{max_retries}{Integer. Maximum number of retry attempts for failed API calls (default: 3)}
29 | 
30 | \item{retry_delay}{Numeric. Base delay in seconds before retrying after an error (default: 2)}
31 | }
32 | \value{
33 | If \code{return_all_authors = FALSE}, returns a tibble with comprehensive information 
34 |   about the specified author including:
35 |   \itemize{
36 |     \item Basic information (name, ORCID, OpenAlex ID)
37 |     \item Bibliometric indicators (works count, citations, h-index, i10-index)
38 |     \item Affiliation details from both the paper and author profile
39 |     \item Research topics and areas
40 |     \item Paper-specific metadata (corresponding author status, position type)
41 |   }
42 |   If \code{return_all_authors = TRUE}, returns a list of tibbles, one for each co-author.
43 | }
44 | \description{
45 | This function downloads comprehensive author information from OpenAlex based on a DOI 
46 | and the numerical position of the author in the co-authors list. It provides detailed 
47 | biographical data, bibliometric indicators, and affiliation information.
48 | }
49 | \details{
50 | The function first retrieves the work information using the provided DOI, then extracts
51 | author IDs from the authorships data, and finally fetches detailed author profiles from
52 | OpenAlex. It enriches the author data with paper-specific information such as authorship
53 | position, corresponding author status, and affiliations as listed in the paper.
54 | 
55 | The function implements automatic retry logic with exponential backoff to handle rate limiting
56 | (HTTP 429 errors) and temporary network issues. It respects OpenAlex API rate limits by adding
57 | configurable delays between requests.
58 | 
59 | IMPORTANT: For better rate limits, set your OpenAlex API key using:
60 | Sys.setenv(openalexR_apikey = "YOUR_API_KEY")
61 | Get a free API key at: https://openalex.org/
62 | }
63 | \examples{
64 | \dontrun{
65 | # Get information for the first author
66 | first_author <- authorBio(doi = "10.1016/j.joi.2017.08.007")
67 | 
68 | # Get information for the second author with verbose output
69 | second_author <- authorBio(
70 |   author_position = 2, 
71 |   doi = "10.1016/j.joi.2017.08.007", 
72 |   verbose = TRUE
73 | )
74 | 
75 | # Get information for all co-authors with custom rate limiting
76 | all_authors <- authorBio(
77 |   doi = "10.1016/j.joi.2017.08.007", 
78 |   return_all_authors = TRUE,
79 |   sleep_time = 0.5,
80 |   max_retries = 5
81 | )
82 | }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/R/missingData.R:
--------------------------------------------------------------------------------
 1 | #' Completeness of bibliographic metadata
 2 | #'
 3 | #' It calculates the percentage of missing data in the metadata of a bibliographic data frame.
 4 | #'
 5 | #' Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing")
 6 | #' depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing
 7 | #' value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%),
 8 | #' "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%).
 9 | #'
10 | #' The results of the function allow us to understand which analyses can be performed with bibliometrix
11 | #' and which cannot based on the completeness (or status) of different metadata.
12 | #' @param M is a bibliographic data frame obtained by \code{\link{convert2df}} function.
13 | #'
14 | #' @return The function \code{missingData} returns a list containing two objects:
15 | #' \tabular{lll}{
16 | #' \code{allTags}  \tab   \tab is a data frame including results for all original metadata tags from the collection\cr
17 | #' \code{mandatoryTags}\tab    \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.}
18 | #'
19 | #' @examples
20 | #' data(scientometrics, package = "bibliometrixData")
21 | #' res <- missingData(scientometrics)
22 | #' print(res$mandatoryTags)
23 | #'
24 | #' @export
25 | #'
26 | missingData <- function(M) {
27 |   cols <- names(M)
28 |   # count the number of missing values in each column
29 |   missing_counts <- sapply(cols, function(x) {
30 |     sum(is.na(M[, x]) | M[, x] %in% c("NA,0000,NA", "NA", "", "none"))
31 |   })
32 | 
33 |   if (sum(as.numeric(M$TC), na.rm = T) == 0) {
34 |     missing_counts["TC"] <- nrow(M)
35 |   }
36 |   # calculate the percentage of missing values in each column
37 |   missing_pct <- round(missing_counts / nrow(M) * 100, 2)
38 |   # create a dataframe with the column names, number of missing values and percentage of missing values
39 |   df_all <- data.frame(cols, missing_counts, missing_pct)
40 | 
41 |   # create a vector with the tags
42 |   tag <- unlist(
43 |     strsplit(
44 |       "AB,AU,C1,CR,DE,DI,DT,ID,LA,PY,RP,SO,TC,TI,WC", ","
45 |     )
46 |   )
47 |   # create a vector with the description of the tags
48 |   description <- trimws(unlist(
49 |     strsplit(
50 |       "Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language,
51 |       Publication Year,Corresponding Author, Journal, Total Citation, Title, Science Categories", ","
52 |     )
53 |   ))
54 | 
55 |   # create a dataframe with the column names, number of missing values, percentage of missing values and status
56 |   df_all <- df_all %>%
57 |     mutate(status = status(missing_pct)) %>%
58 |     replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100))
59 | 
60 |   df_tags <- data.frame(tag, description) %>%
61 |     left_join(df_all, by = c("tag" = "cols")) %>%
62 |     replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100, status = "Completely missing")) %>%
63 |     arrange(missing_pct, description)
64 | 
65 |   results <- list(allTags = df_all, mandatoryTags = df_tags)
66 |   return(results)
67 | }
68 | 
69 | status <- function(x) {
70 |   y <- character(length(x))
71 |   y[x == 0] <- "Excellent"
72 |   y[x > 0 & x <= 10] <- "Good"
73 |   y[x > 10 & x <= 20] <- "Acceptable"
74 |   y[x > 20 & x <= 50] <- "Poor"
75 |   y[x > 50 & x < 100] <- "Critical"
76 |   y[is.na(x) | x == 100] <- "Completely missing"
77 |   return(y)
78 | }
79 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # bibliometrix (development version)
 2 | 
 3 | # bibliometrix 5.2.1
 4 | 
 5 | # bibliometrix V.5.2.1 (Release date: 2025-12-11)
 6 | 
 7 | Changes:
 8 | 
 9 | * Removed old geemini models 1.5 and 2.0 from biblioAI
10 | * Solved issue with internet connection checking in MS Windows systems
11 | 
12 | # bibliometrix V.5.2.0 (Release date: 2025-11-04)
13 | 
14 | Features:
15 | 
16 | * Added Author Bio Card (new function AuthorBio())
17 | * Added new plot layout in plotThematicEvolution()
18 | * Added automatic identification of continuous flows in plotThematicEvolution()
19 | * Added seed argument to networkPlot(), ThematicMap(), and ThematicEvolution() functions
20 | * Improved OpenAlex data conversion process
21 | * Enhanced Community Repulsion algorithm in networkPlot(), ThematicMap(), and ThematicEvolution() functions
22 | * Added Citation Matching algorithm (new function applyCitationMatching())
23 | * Added Life Cycle Analysis (new function lifeCycle())
24 | * Biblioshiny enhancements:
25 |     - Added AI-powered article summaries
26 |     - Added Author Profile feature (using OpenAlex data)
27 |     - Added API Search and Download menu for OpenAlex and PubMed
28 |     - Removed API support for Dimensions.ai
29 |     - Added Content Analysis menu (using contentanalysis library)
30 |     - Improved parameter layout across all analyses
31 |     - Added seed parameter in Settings
32 |     - Added Life Cycle Analysis menu
33 | 
34 | Changes:
35 | 
36 | #564 [Solved] - bibliometrix now supports the new Scopus CSV format (both for data import and local citation analyses)
37 | 
38 | 
39 | # bibliometrix V.5.1.1 (Release date: 2025-09-02)
40 | 
41 | Features:
42 | * Biblioshiny – Filters: Added the ability to upload a journal ranking list and filter publications based on selected rankings.
43 | 
44 | Changes:
45 | * Biblioshiny: Introduced automatic checks and updates for required packages, such as curl (version ≥ 6.3.0).
46 | 
47 | 
48 | # bibliometrix v.5.1.0 (Release date: 2025-07-15)
49 | 
50 | Features:
51 | * rpys(): 
52 |     - Introduced two options for the median window: centered and backward.
53 |     - Implemented an algorithm to detect citation sequences and influential references ("Hot Papers", "Constant Performers", "Life Cycles", and "Sleeping Beauties").
54 |     - These results are now also included in Biblioshiny outputs.
55 | * Biblioshiny:
56 |     - Added animated diachronic networks to Conceptual and Social Structure analyses.
57 |     - Completely redesigned the Filters panel using a J-AU-DOC framework, now supporting a broader range of filters (e.g., Countries, Journals, Citations).
58 | * Biblio AI: 
59 |     - Enhanced prompt templates.
60 |     - Integrated new Gemini models (2.5 Flash and Flash-lite).
61 |     
62 | Changes:
63 | * Fixed an issue in mergeDbSources(): the function now preserves the cited references field when merging files from a single database.
64 | * convert2df() now automatically converts ISO2 country codes to full country names for OpenAlex data.
65 |     
66 | 
67 | # bibliometrix v.5.0.1 
68 | 
69 | Features:
70 | * Introduced mergeKeywords(), a new function that combines DE and ID keywords into a single field named KW_Merged.
71 | 
72 | Changes:
73 | * Resolved various issues in biblioshiny().
74 | * Updated the following functions to work with the new KW_Merged field:
75 |   - tableTag()
76 |   - cocMatrix()
77 |   - biblioNetwork()
78 |   - conceptualStructure()
79 |   - thematicMap()
80 |   - thematicEvolution()
81 |   - threeFieldPlot()
82 | 
83 | 
84 | # bibliometrix v.5.0.0
85 | 
86 | Features:
87 | * Biblioshiny 5.0 now includes Biblio AI – a powerful AI assistant to support your science mapping analyses.
88 | Changes:
89 | * Resolved multiple issues in biblioshiny().
90 | 


--------------------------------------------------------------------------------
/R/csvLens2df.R:
--------------------------------------------------------------------------------
  1 | utils::globalVariables("where")
  2 | 
  3 | csvLens2df <- function(file) {
  4 |   options(readr.num_columns = 0)
  5 | 
  6 |   ## import all files in a single data frame
  7 |   for (i in 1:length(file)) {
  8 |     D <- read_csv(file[i], na = character(), quote = '"', trim_ws = FALSE, progress = show_progress(), show_col_types = FALSE) %>%
  9 |       mutate(across(where(is.numeric), as.character)) %>%
 10 |       mutate(across(where(is.character), function(x) tidyr::replace_na(x, ""))) %>%
 11 |       as.data.frame()
 12 | 
 13 |     if (i > 1) {
 14 |       l <- intersect(l, names(D))
 15 |       DATA <- rbind(DATA[l], D[l])
 16 |     } else {
 17 |       l <- names(D)
 18 |       DATA <- D
 19 |     }
 20 |   }
 21 |   rm(D)
 22 | 
 23 |   ## Post-Processing
 24 | 
 25 |   # column re-labelling
 26 |   DATA <- relabelling_lens(DATA)
 27 | 
 28 |   if ("TC" %in% names(DATA)) DATA$TC <- as.numeric(DATA$TC)
 29 | 
 30 |   # Authors' names cleaning (surname and initials)
 31 |   DATA$AF <- DATA$AU
 32 | 
 33 |   # Authors' names cleaning (surname and initials)
 34 |   # remove ; and 2 or more spaces
 35 |   # DATA$AU <- gsub("\\s+", " ", DATA$AU)
 36 | 
 37 |   listAU <- strsplit(DATA$AU, split = "; ")
 38 | 
 39 |   AU <- lapply(listAU, function(l) {
 40 |     lastname <- sub(".*\\s", "", trimws(l))
 41 |     firstname <- sub("\\s+[^ ]+$", "", l)
 42 |     firstname <- gsub("[^:A-Z:]", "", firstname)
 43 |     AU <- paste(lastname, firstname, sep = " ", collapse = ";")
 44 |     return(AU)
 45 |   })
 46 | 
 47 | 
 48 |   DATA$AU <- unlist(AU)
 49 |   # DATA$AU=gsub("\\.", "", DATA$AU)
 50 |   # DATA$AU=gsub(",", ";", DATA$AU)
 51 | 
 52 |   # Affiliation
 53 |   # DATA$C1 <- "Unknown"
 54 | 
 55 |   # Iso Source Titles
 56 |   DATA$SO[DATA$SO == ""] <- DATA$Publisher[DATA$SO == ""]
 57 |   # DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE)
 58 |   # DATA$J9 <- gsub("\\.","",DATA$JI)
 59 |   DATA$JI <- DATA$J9 <- DATA$SO
 60 |   DATA$ID <- DATA$DE
 61 |   DI <- DATA$DI
 62 |   URL <- DATA$URL
 63 |   AB <- DATA$AB
 64 |   TI <- DATA$TI
 65 |   DE <- DATA$DE
 66 |   DATA <- data.frame(lapply(DATA, toupper))
 67 |   DATA$AB_raw <- AB
 68 |   DATA$TI_raw <- TI
 69 |   DATA$DE_raw <- DE
 70 |   DATA$DI <- DI
 71 |   DATA$URL <- URL
 72 |   DATA$AU_CO <- "NA"
 73 |   DATA$DB <- "LENS"
 74 |   return(DATA)
 75 | }
 76 | 
 77 | 
 78 | 
 79 | 
 80 | relabelling_lens <- function(DATA) {
 81 |   ## column re-labelling
 82 |   label <- names(DATA)
 83 |   label <- gsub("Source Title", "SO", label)
 84 |   # label <- gsub("Authors with affiliations","C1",label)
 85 |   label <- gsub("Author/s", "AU", label)
 86 |   label <- gsub("Publication.Type", "DT", label)
 87 |   label <- gsub("Title", "TI", label)
 88 |   label <- gsub("Publication Year", "PY", label)
 89 |   label <- gsub("Volume", "VL", label)
 90 |   label <- gsub("Issue Number", "IS", label)
 91 |   label <- gsub("Source Country", "SO_CO", label)
 92 |   label <- gsub("Scholarly Citation Count", "TC", label)
 93 |   label <- gsub("DOI", "DI", label)
 94 |   label <- gsub("Source URLs", "URL", label)
 95 |   label <- gsub("Abstract", "AB", label)
 96 |   label <- gsub("Keywords", "DE", label)
 97 |   label <- gsub("MeSH Terms", "MESH", label)
 98 |   label <- gsub("Funding Details", "FU", label)
 99 |   label <- gsub("Funding", "FX", label)
100 |   label <- gsub("References", "CR", label)
101 |   # label <- gsub("Correspondence Address","RP",label)
102 |   label <- gsub("Fields of Study", "SC", label)
103 |   label <- gsub("Language of Original Document", "LA", label)
104 |   label <- gsub("Document Type", "DT", label)
105 |   label <- gsub("Source", "DB", label)
106 |   label <- gsub("Lens ID", "UT", label)
107 |   label <- gsub("Citing Works Count", "TC", label)
108 |   names(DATA) <- label
109 | 
110 |   return(DATA)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/bradford.R:
--------------------------------------------------------------------------------
 1 | utils::globalVariables(c("Rank", "SO", "Freq"))
 2 | #' Bradford's law
 3 | #'
 4 | #' It estimates and draws the Bradford's law source distribution.
 5 | #'
 6 | #' Bradford's law is a pattern first described by (\cite{Samuel C. Bradford, 1934}) that estimates the exponentially diminishing returns
 7 | #' of searching for references in science journals.
 8 | #'
 9 | #' One formulation is that if journals in a field are sorted by number of articles into three groups, each with about one-third of all articles,
10 | #' then the number of journals in each group will be proportional to 1:n:n2.\cr\cr
11 | #'
12 | #' Reference:\cr
13 | #' Bradford, S. C. (1934). Sources of information on specific subjects. Engineering, 137, 85-86.\cr
14 | #'
15 | #' @param M is a bibliographic dataframe.
16 | #' @return The function \code{bradford} returns a list containing the following objects:
17 | #' \tabular{lll}{
18 | #' \code{table}  \tab   \tab a dataframe with the source distribution partitioned in the three zones\cr
19 | #' \code{graph}   \tab   \tab the source distribution plot in ggplot2 format}
20 | #'
21 | #' @examples
22 | #' \dontrun{
23 | #' data(management, package = "bibliometrixData")
24 | #'
25 | #' BR <- bradford(management)
26 | #' }
27 | #'
28 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis
29 | #' @seealso \code{\link{summary}} method for class '\code{bibliometrix}'
30 | #'
31 | #' @export
32 | 
33 | bradford <- function(M) {
34 |   SO <- sort(table(M$SO), decreasing = TRUE)
35 |   n <- sum(SO)
36 |   cumSO <- cumsum(SO)
37 |   cutpoints <- round(c(1, n * 0.33, n * 0.67, Inf))
38 |   groups <- cut(cumSO, breaks = cutpoints, labels = c("Zone 1", "Zone 2", "Zone 3"))
39 |   a <- length(which(cumSO < n * 0.33)) + 1
40 |   b <- length(which(cumSO < n * 0.67)) + 1
41 |   Z <- c(rep("Zone 1", a), rep("Zone 2", b - a), rep("Zone 3", length(cumSO) - b))
42 |   df <- data.frame(SO = names(cumSO), Rank = 1:length(cumSO), Freq = as.numeric(SO), cumFreq = cumSO, Zone = Z)
43 | 
44 |   x <- c(max(log(df$Rank)) - 0.02 - diff(range(log(df$Rank))) * 0.125, max(log(df$Rank)) - 0.02)
45 |   y <- c(min(df$Freq), min(df$Freq) + diff(range(df$Freq)) * 0.125) + 1
46 |   data("logo", envir = environment())
47 |   logo <- grid::rasterGrob(logo, interpolate = TRUE)
48 | 
49 |   g <- ggplot2::ggplot(df, aes(x = log(Rank), y = Freq, text = paste("Source: ", SO, "\nN. of Documents: ", Freq))) +
50 |     geom_line(aes(group = "NA")) +
51 |     # geom_area(aes(group="NA"),fill = "gray90", alpha = 0.5) +
52 |     annotate("rect", xmin = 0, xmax = log(df$Rank[a]), ymin = 0, ymax = max(df$Freq), alpha = 0.2) +
53 |     labs(x = "Source log(Rank)", y = "Articles", title = "Core Sources by Bradford's Law") +
54 |     annotate("text", x = log(df$Rank[a]) / 2, y = max(df$Freq) / 2, label = "Core\nSources", fontface = 2, alpha = 0.5, size = 10) +
55 |     scale_x_continuous(breaks = log(df$Rank)[1:a], labels = as.character(substr(df$SO, 1, 25))[1:a]) +
56 |     theme(
57 |       text = element_text(color = "#444444"),
58 |       legend.position = "none",
59 |       panel.background = element_rect(fill = "#FFFFFF"),
60 |       panel.grid.minor = element_blank(),
61 |       panel.grid.major = element_blank(),
62 |       plot.title = element_text(size = 24),
63 |       axis.title = element_text(size = 14, color = "#555555"),
64 |       axis.line.x = element_line(color = "black", linewidth = 0.5),
65 |       axis.line.y = element_line(color = "black", linewidth = 0.5),
66 |       axis.title.y = element_text(vjust = 1, angle = 90),
67 |       axis.title.x = element_text(hjust = 0),
68 |       axis.text.x = element_text(angle = 90, hjust = 1, size = 8, face = "bold")
69 |     ) +
70 |     annotation_custom(logo, xmin = x[1], xmax = x[2], ymin = y[1], ymax = y[2])
71 | 
72 |   results <- list(table = df, graph = g)
73 |   return(results)
74 | }
75 | 


--------------------------------------------------------------------------------
/R/mergeDbSources.R:
--------------------------------------------------------------------------------
  1 | utils::globalVariables(c("num"))
  2 | #' Merge bibliographic data frames from supported bibliogtraphic DBs
  3 | #'
  4 | #' Merge bibliographic data frames from different databases (WoS,SCOPUS, Lens, Openalex, etc-) into a single one.
  5 | #'
  6 | #' bibliographic data frames are obtained by the converting function \code{\link{convert2df}}.
  7 | #' The function merges data frames identifying common tag fields and duplicated records.
  8 | #'
  9 | #' @param ... are the bibliographic data frames to merge.
 10 | #' @param remove.duplicated is logical. If TRUE duplicated documents will be deleted from the bibliographic collection.
 11 | #' @param verbose is logical.  If TRUE, information on duplicate documents is printed on the screen.
 12 | #' @return the value returned from \code{mergeDbSources} is a bibliographic data frame.
 13 | #'
 14 | #'
 15 | #' @examples
 16 | #'
 17 | #' data(isiCollection, package = "bibliometrixData")
 18 | #'
 19 | #' data(scopusCollection, package = "bibliometrixData")
 20 | #'
 21 | #' M <- mergeDbSources(isiCollection, scopusCollection, remove.duplicated = TRUE)
 22 | #'
 23 | #' dim(M)
 24 | #'
 25 | #' @seealso \code{\link{convert2df}} to import and convert an ISI or SCOPUS Export file in a bibliographic data frame.
 26 | #' @seealso \code{\link{biblioAnalysis}} function for bibliometric analysis.
 27 | #' @seealso \code{\link{summary}} to obtain a summary of the results.
 28 | #' @seealso \code{\link{plot}} to draw some useful plots of the results.
 29 | #'
 30 | #' @export
 31 | 
 32 | 
 33 | mergeDbSources <- function(..., remove.duplicated = TRUE, verbose = TRUE) {
 34 |   index <- NULL
 35 | 
 36 |   mc <- match.call(expand.dots = TRUE)
 37 | 
 38 |   if (length(mc) > 3) {
 39 |     M <- dplyr::bind_rows(list(...))
 40 |   } else {
 41 |     M <- dplyr::bind_rows(...)
 42 |   }
 43 |   # create KW_Merged field 
 44 |   M <- M %>% mergeKeywords(force=TRUE)
 45 | 
 46 |   dbLabels <- data.frame(
 47 |     DB = toupper(c("isi", "scopus", "openalex", "lens", "dimensions", "pubmed", "cochrane")),
 48 |     num = c(1, 2, 3, 4, 5, 6, 7)
 49 |   )
 50 |   DB <- unique(M$DB)
 51 |   
 52 |   if (length(DB) >1) {
 53 |     # order by db
 54 |     M <- M %>%
 55 |       left_join(dbLabels, by = "DB") %>%
 56 |       arrange(num) %>%
 57 |       select(-num) %>%
 58 |       rename("CR_raw" = "CR") %>%
 59 |       mutate(CR = "NA")
 60 |   }
 61 |   
 62 | 
 63 |   if (isTRUE(remove.duplicated)) {
 64 |     # remove by DOI
 65 |     if ("DI" %in% names(M)) {
 66 |       M$DI[M$DI == ""] <- NA
 67 |       index <- which(duplicated(M$DI) & !is.na(M$DI))
 68 |       if (length(index) > 0) M <- M[-index, ]
 69 |     }
 70 | 
 71 |     # remove by title
 72 |     if ("TI" %in% names(M)) {
 73 |       TI <- gsub("[^[:alnum:] ]", "", M$TI)
 74 |       TI <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", TI, perl = TRUE)
 75 |       d <- duplicated(paste(TI, " ", M$PY))
 76 |       if (isTRUE(verbose)) cat("\n", sum(d) + length(index), "duplicated documents have been removed\n")
 77 |       M <- M[!d, ]
 78 |     }
 79 |   }
 80 | 
 81 |   if (length(unique(M$DB)) > 1) {
 82 |     M$DB_Original <- M$DB
 83 |     M$DB <- "ISI"
 84 | 
 85 |     ## author data cleaning
 86 |     if ("AU" %in% names(M)) {
 87 |       M$AU <- gsub(",", " ", M$AU)
 88 |       AUlist <- strsplit(M$AU, ";")
 89 |       AU <- lapply(AUlist, function(l) {
 90 |         l <- trim(l)
 91 |         name <- strsplit(l, " ")
 92 |         lastname <- unlist(lapply(name, function(ln) {
 93 |           ln[1]
 94 |         }))
 95 |         firstname <- lapply(name, function(ln) {
 96 |           f <- paste(substr(ln[-1], 1, 1), collapse = " ")
 97 |         })
 98 |         AU <- paste(lastname, unlist(firstname), sep = " ", collapse = ";")
 99 |         return(AU)
100 |       })
101 |       M$AU <- unlist(AU)
102 |     }
103 |   }
104 | 
105 |   M <- metaTagExtraction(M, "SR")
106 |   row.names(M) <- M$SR
107 | 
108 |   class(M) <- c("bibliometrixDB", "data.frame")
109 |   return(M)
110 | }
111 | 


--------------------------------------------------------------------------------
/R/csvScopus2df.R:
--------------------------------------------------------------------------------
  1 | utils::globalVariables(c("X1", "X2", "tag", "orig"))
  2 | 
  3 | csvScopus2df <- function(file) {
  4 |   options(readr.num_columns = 0)
  5 | 
  6 |   ## import all files in a single data frame
  7 |   for (i in 1:length(file)) {
  8 |     D <- read_csv(file[i],
  9 |       na = character(), quote = '"', trim_ws = FALSE, progress = show_progress(),
 10 |       col_types = cols(.default = col_character())
 11 |     ) %>% # Introduced to remove cols parsing errors
 12 |       # mutate(across(!where(is.numeric), as.character)) %>%   # not yet necessary with the inclusion of previuos line
 13 |       mutate(across(where(is.character), function(x) tidyr::replace_na(x, ""))) %>% as.data.frame()
 14 | 
 15 |     if (i > 1) {
 16 |       l <- intersect(l, names(D))
 17 |       DATA <- rbind(DATA[l], D[l])
 18 |     } else {
 19 |       l <- names(D)
 20 |       DATA <- D
 21 |     }
 22 |   }
 23 | 
 24 |   ## Post-Processing
 25 | 
 26 |   # column re-labelling
 27 |   DATA <- labelling(DATA)
 28 | 
 29 |   # Authors' names cleaning (surname and initials)
 30 |   DATA$AU <- gsub("\\.", "", DATA$AU)
 31 |   #DATA$AU <- gsub(",", ";", DATA$AU)
 32 |   DATA$AU <- gsub(",", "", DATA$AU)
 33 | 
 34 |   ### store raw affiliation format to extract link among authors and affiliations
 35 |   DATA$C1raw <- DATA$C1
 36 |   ###
 37 | 
 38 |   # Affiliation
 39 |   if (!("C1" %in% names(DATA))) {
 40 |     DATA$C1 <- NA
 41 |   } else {
 42 |     DATA$C1 <- unlist(lapply(strsplit(DATA$C1, ";"), function(l) {
 43 |       l <- paste(gsub(".*\\., ", "", l), collapse = ";", sep = "")
 44 |     }))
 45 |   }
 46 |   # Iso Source Titles
 47 |   if ("JI" %in% names(DATA)) {
 48 |     DATA$J9 <- gsub("\\.", "", DATA$JI)
 49 |   } else {
 50 |     DATA$J9 <- DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE)
 51 |   }
 52 | 
 53 |   DI <- DATA$DI
 54 |   URL <- DATA$URL
 55 |   AB <- DATA$AB
 56 |   TI <- DATA$TI
 57 |   DE <- DATA$DE
 58 |   DATA <- data.frame(lapply(DATA, toupper))
 59 |   DATA$AB_raw <- AB
 60 |   DATA$TI_raw <- TI
 61 |   DATA$DE_raw <- DE
 62 |   DATA$DI <- DI
 63 |   DATA$URL <- URL
 64 |   return(DATA)
 65 | }
 66 | 
 67 | 
 68 | 
 69 | 
 70 | labelling <- function(DATA) {
 71 |   ## column re-labelling
 72 | 
 73 |   df_tag <- data.frame(
 74 |     rbind(
 75 |       c("Abbreviated Source Title", "JI"),
 76 |       c("Affiliations", "C1"),
 77 |       c("Authors with affiliations", "C1_raw"),
 78 |       c("Author Addresses", "C1_raw"),
 79 |       c("Authors", "AU"),
 80 |       c("Author Names", "AU"),
 81 |       c("Author full names", "AF"),
 82 |       c("Source title", "SO"),
 83 |       c("Titles", "TI"),
 84 |       c("Title", "TI"),
 85 |       c("Publication Year", "PY"),
 86 |       c("Year", "PY"),
 87 |       c("Volume", "VL"),
 88 |       c("Issue", "IS"),
 89 |       c("Page count", "PP"),
 90 |       c("Cited by", "TC"),
 91 |       c("DOI", "DI"),
 92 |       c("Link", "URL"),
 93 |       c("Abstract", "AB"),
 94 |       c("Author Keywords", "DE"),
 95 |       c("Indexed Keywords", "ID"),
 96 |       c("Index Keywords", "ID"),
 97 |       c("Funding Details", "FU"),
 98 |       c("Funding Texts", "FX"),
 99 |       c("Funding Text 1", "FX"),
100 |       c("References", "CR"),
101 |       c("Correspondence Address", "RP"),
102 |       c("Publisher", "PU"),
103 |       c("Open Access", "OA"),
104 |       c("Language of Original Document", "LA"),
105 |       c("Document Type", "DT"),
106 |       c("Source", "DB"),
107 |       c("EID", "UT")
108 |     )
109 |   ) %>%
110 |     rename(
111 |       orig = X1,
112 |       tag = X2
113 |     )
114 | 
115 |   label <- data.frame(orig = names(DATA)) %>%
116 |     left_join(df_tag, by = "orig") %>%
117 |     mutate(tag = ifelse(is.na(tag), orig, tag))
118 | 
119 |   names(DATA) <- label$tag
120 |   
121 |   if (!"C1" %in% names(DATA)) {
122 |     if ("C1_raw" %in% names(DATA)) {
123 |       DATA$C1 <- DATA$C1_raw
124 |     } else {
125 |       DATA$C1 <- NA
126 |     }
127 |   }
128 | 
129 |   return(DATA)
130 | }
131 | 


--------------------------------------------------------------------------------
/man/assignEvolutionColors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/assignEvolutionColors.R
 3 | \name{assignEvolutionColors}
 4 | \alias{assignEvolutionColors}
 5 | \title{Assign Colors to Thematic Evolution Nodes Based on Lineages}
 6 | \usage{
 7 | assignEvolutionColors(
 8 |   nexus,
 9 |   threshold = 0.5,
10 |   palette = NULL,
11 |   use_measure = "weighted"
12 | )
13 | }
14 | \arguments{
15 | \item{nexus}{A list object returned by \code{\link{thematicEvolution}} containing:
16 | \itemize{
17 |   \item \code{Nodes}: data frame with node information (name, group, id, sum, freq, etc.)
18 |   \item \code{Edges}: data frame with edge information (from, to, weight measures)
19 |   \item \code{TM}: list of thematic maps for each period
20 | }}
21 | 
22 | \item{threshold}{Numeric. The minimum weight value for an edge to be considered a
23 | "strong connection" (default: 0.5). Edges with weights >= threshold will propagate
24 | the same color to connected nodes across periods.}
25 | 
26 | \item{palette}{Character vector. Optional custom color palette as hex codes. If NULL,
27 | uses a default palette of 50+ distinct colors. Colors are assigned sequentially
28 | without reuse.}
29 | 
30 | \item{use_measure}{Character. The measure to use for determining edge strength.
31 | Can be one of:
32 | \itemize{
33 |   \item \code{"inclusion"}: uses the Inclusion measure (column 3 of Edges)
34 |   \item \code{"stability"}: uses the Stability measure (column 5 of Edges)
35 |   \item \code{"weighted"}: uses the weighted Inclusion measure (column 4 of Edges)
36 | }
37 | Default is "inclusion".}
38 | }
39 | \value{
40 | Returns the modified \code{nexus} object with updated colors:
41 |   \itemize{
42 |     \item \code{Nodes$color}: updated with lineage-based colors
43 |     \item \code{Edges$color}: edges connecting same-colored nodes receive the node color,
44 |       others are grey
45 |     \item \code{TM}: thematic maps updated with new cluster colors
46 |   }
47 | }
48 | \description{
49 | This function assigns colors to nodes in a thematic evolution analysis based on
50 | their lineages across time periods. Nodes connected by strong edges (above a threshold)
51 | receive the same color to visualize thematic continuity. Nodes with the same name
52 | across periods that are not strongly connected to other nodes are also colored identically.
53 | }
54 | \details{
55 | The function uses a multi-phase algorithm:
56 | \enumerate{
57 |   \item \strong{Phase 1}: Identifies lineages by following strong connections (weight >= threshold)
58 |     from the first period forward. When a node has multiple strong connections, the
59 |     strongest one determines the lineage.
60 |   \item \strong{Phase 1.5}: Assigns the same lineage to nodes with identical names across
61 |     periods if they are not already part of different strong connections.
62 |   \item \strong{Phase 2}: Assigns unique colors from the palette to each identified lineage.
63 |   \item \strong{Phase 3}: Assigns unique colors to isolated nodes (those without any lineage).
64 |   \item \strong{Phase 4}: Colors edges based on their connected nodes - same color if both
65 |     nodes share a color, grey otherwise.
66 |   \item \strong{Final}: Updates thematic maps with the new color scheme.
67 | }
68 | 
69 | Each lineage receives a unique color from the palette. No color is reused across
70 | different lineages, ensuring clear visual distinction between independent thematic streams.
71 | }
72 | \examples{
73 | \dontrun{
74 | data(scientometrics, package = "bibliometrixData")
75 | years <- c(2000, 2010)
76 | 
77 | nexus <- thematicEvolution(scientometrics, field = "ID",
78 |                             years = years, n = 100, minFreq = 2)
79 | 
80 | 
81 | # Use custom threshold and measure
82 | nexus <- assignEvolutionColors(nexus, threshold = 0.6, use_measure = "weighted")
83 | 
84 | }
85 | 
86 | }
87 | \seealso{
88 | \code{\link{thematicEvolution}} to perform thematic evolution analysis.
89 | 
90 | \code{\link{plotThematicEvolution}} to visualize the colored evolution.
91 | }
92 | 


--------------------------------------------------------------------------------
/R/normalizeSimilarity.R:
--------------------------------------------------------------------------------
 1 | #' Calculate similarity indices
 2 | #'
 3 | #' It calculates a relative measure of bibliographic co-occurrences.
 4 | #'
 5 | #' \code{couplingSimilarity} calculates Association strength, Inclusion, Jaccard or Salton similarity from a co-occurrence bibliographic matrix.
 6 | #'
 7 | #' The association strength is used by Van Eck and Waltman (2007) and Van Eck et al. (2006). Several works refer to the measure as the proximity index,
 8 | #' while Leydesdorff (2008)and Zitt et al. (2000) refer to it as the probabilistic affinity (or activity) index.
 9 | #'
10 | #' The inclusion index, also called Simpson coefficient, is an overlap measure used in information retrieval.
11 | #'
12 | #' The Jaccard index (or Jaccard similarity coefficient) gives us a relative measure of the overlap of two sets.
13 | #' It is calculated as the ratio between the intersection and the union of the reference lists (of two manuscripts).
14 | #'
15 | #' The Salton index, instead, relates the intersection of the two lists to the geometric mean of the size of both sets.
16 | #' The square of Salton index is also called Equivalence index.
17 | #'
18 | #' The indices are equal to zero if the intersection of the reference lists is empty.\cr\cr
19 | #'
20 | #' References\cr\cr
21 | #' Leydesdorff, L. (2008). On the normalization and visualization of author Cocitation data: Salton's cosine versus the Jaccard index.
22 | #' Journal of the American Society for Information Science and Technology, 59(1), 77– 85.\cr
23 | #' Van Eck, N.J., Waltman, L., Van den Berg, J., & Kaymak, U. (2006). Visualizing the computational intelligence field.
24 | #' IEEE Computational Intelligence Magazine, 1(4), 6– 10.\cr
25 | #' Van Eck, N.J., & Waltman, L. (2007). Bibliometric mapping of the computational intelligence field.
26 | #' International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems, 15(5), 625– 645\cr.
27 | #' Van Eck, N. J., & Waltman, L. (2009). How to normalize cooccurrence data? An analysis of some well-known similarity measures.
28 | #' Journal of the American society for information science and technology, 60(8), 1635-1651.\cr
29 | #' Zitt, M., Bassecoulard, E., & Okubo, Y. (2000). Shadows of the past in international cooperation:
30 | #' Collaboration profiles of the top five producers of science. Scientometrics, 47(3), 627– 657.\cr
31 | #'
32 | #'
33 | #' @param NetMatrix is a coupling matrix obtained by the network functions \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}.
34 | #' @param type is a character. It can be "association", "jaccard", "inclusion","salton" or "equivalence" to obtain Association Strength, Jaccard,
35 | #' Inclusion, Salton or Equivalence similarity index respectively. The default is \code{type = "association"}.
36 | #' @return a similarity matrix.
37 | #'
38 | #'
39 | #'
40 | #' @examples
41 | #'
42 | #' data(scientometrics, package = "bibliometrixData")
43 | #' NetMatrix <- biblioNetwork(scientometrics,
44 | #'   analysis = "co-occurrences",
45 | #'   network = "keywords", sep = ";"
46 | #' )
47 | #' S <- normalizeSimilarity(NetMatrix, type = "association")
48 | #'
49 | #' @seealso \code{\link{biblioNetwork}} function to compute a bibliographic network.
50 | #' @seealso \code{\link{cocMatrix}} to compute a bibliographic bipartite network.
51 | #'
52 | #' @export
53 | 
54 | normalizeSimilarity <- function(NetMatrix, type = "association") {
55 |   diag <- Matrix::diag
56 |   D <- diag(NetMatrix)
57 |   # S=NetMatrix
58 |   switch(type,
59 |     association = {
60 |       S <- NetMatrix / ((outer(D, D, "*")))
61 |     },
62 |     inclusion = {
63 |       S <- NetMatrix / outer(D, D, function(a, b) {
64 |         mapply(min, a, b)
65 |       })
66 |     },
67 |     jaccard = {
68 |       S <- NetMatrix / (outer(D, D, "+") - NetMatrix)
69 |     },
70 |     salton = {
71 |       S <- NetMatrix / (sqrt(outer(D, D, "*")))
72 |     },
73 |     equivalence = {
74 |       S <- (NetMatrix / sqrt(outer(D, D, "*")))^2
75 |     }
76 |   )
77 | 
78 |   S <- as.matrix(S)
79 |   S[is.nan(S)] <- 0
80 |   S <- Matrix(S, sparse = TRUE)
81 |   # if (class(S)!="dgCMatrix"){S=as.matrix(S)}
82 | 
83 |   return(S)
84 | }
85 | 


--------------------------------------------------------------------------------
/man/convert2df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert2df.R
 3 | \name{convert2df}
 4 | \alias{convert2df}
 5 | \title{Import and Convert bibliographic export files and API objects.}
 6 | \usage{
 7 | convert2df(
 8 |   file,
 9 |   dbsource = "wos",
10 |   format = "plaintext",
11 |   remove.duplicates = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{file}{a character array containing a sequence of filenames coming from WoS, Scopus, Dimensions, Lens.org, OpenAlex and Pubmed. Alternatively, \code{file} can be
16 | an object resulting from an API query fetched from Dimensions, and PubMed databases:
17 | \tabular{lll}{
18 | a)\tab 'wos' \tab Clarivate Analytics WoS (in plaintext '.txt', Endnote Desktop '.ciw', or bibtex formats '.bib');\cr
19 | b)\tab 'scopus' \tab SCOPUS (exclusively in bibtex format '.bib');\cr
20 | c)\tab 'dimensions' \tab Digital Science Dimensions (in csv '.csv' or excel '.xlsx' formats);\cr
21 | d)\tab 'lens' \tab Lens.org (in csv '.csv');\cr
22 | e)\tab 'pubmed' \tab an object of the class \code{pubmedR (package pubmedR)} containing a collection obtained from a query performed with pubmedR package;\cr
23 | f)\tab 'dimensions' \tab an object of the class \code{dimensionsR (package dimensionsR)} containing a collection obtained from a query performed with dimensionsR package;\cr
24 | g)\tab 'openalex' \tab OpenAlex .csv file;\cr
25 | h)\tab 'openalex_api' \tab the filename and path to a list object returned by openalexR package, containing a collection of works resulting from a query fetched from OpenAlex database.}}
26 | 
27 | \item{dbsource}{is a character indicating the bibliographic database. \code{dbsource} can be \code{dbsource = c('cochrane','dimensions','generic','isi','openalex', 'pubmed','scopus','wos', 'lens')} . Default is \code{dbsource = "isi"}.}
28 | 
29 | \item{format}{is a character indicating the SCOPUS, Clarivate Analytics WoS, and other databases export file format. \code{format} can be \code{c('api', 'bibtex', 'csv', 'endnote','excel','plaintext', 'pubmed')}. Default is \code{format = "plaintext"}.}
30 | 
31 | \item{remove.duplicates}{is logical. If TRUE, the function will remove duplicated items checking by DOI and database ID.}
32 | }
33 | \value{
34 | a data frame with cases corresponding to articles and variables to Field Tags in the original export file.
35 | 
36 | I.e We have three files download from Web of Science in plaintext format, file will be:
37 | 
38 | file <- c("filename1.txt", "filename2.txt", "filename3.txt")
39 | 
40 | data frame columns are named using the standard Clarivate Analytics WoS Field Tag codify. The main field tags are:
41 | 
42 | \tabular{lll}{
43 | \code{AU}\tab   \tab Authors\cr
44 | \code{TI}\tab   \tab Document Title\cr
45 | \code{SO}\tab   \tab Publication Name (or Source)\cr
46 | \code{JI}\tab   \tab ISO Source Abbreviation\cr
47 | \code{DT}\tab   \tab Document Type\cr
48 | \code{DE}\tab   \tab Authors' Keywords\cr
49 | \code{ID}\tab   \tab Keywords associated by SCOPUS or WoS database \cr
50 | \code{AB}\tab   \tab Abstract\cr
51 | \code{C1}\tab   \tab Author Address\cr
52 | \code{RP}\tab   \tab Reprint Address\cr
53 | \code{CR}\tab   \tab Cited References\cr
54 | \code{TC}\tab   \tab Times Cited\cr
55 | \code{PY}\tab   \tab Year\cr
56 | \code{SC}\tab   \tab Subject Category\cr
57 | \code{UT}\tab   \tab Unique Article Identifier\cr
58 | \code{DB}\tab   \tab Database\cr}
59 | 
60 | for a complete list of field tags see: \href{https://www.bibliometrix.org/documents/Field_Tags_bibliometrix.pdf}{Field Tags used in bibliometrix}
61 | }
62 | \description{
63 | It converts a SCOPUS, Clarivate Analytics WoS, Dimensions, Lens.org, PubMed and COCHRANE Database export files or pubmedR and dimensionsR JSON/XML
64 | objects into a data frame, with cases corresponding to articles and variables to Field Tags as used in WoS.
65 | }
66 | \examples{
67 | 
68 | # Example:
69 | # Import and convert a Web of Science collection form an export file in plaintext format:
70 | 
71 | \dontrun{
72 | files <- "https://www.bibliometrix.org/datasets/wos_plaintext.txt"
73 | 
74 | M <- convert2df(file = files, dbsource = "wos", format = "plaintext")
75 | }
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/R/keywordGrowth.R:
--------------------------------------------------------------------------------
 1 | #' Yearly occurrences of top keywords/terms
 2 | #'
 3 | #' It calculates yearly occurrences of top keywords/terms.
 4 | #'
 5 | #' @param M is a data frame obtained by the converting function \code{\link{convert2df}}.
 6 | #'        It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.
 7 | #' @param Tag is a character object. It indicates one of the keyword field tags of the
 8 | #'   standard ISI WoS Field Tag codify (ID, DE, KW_Merged) or a field tag created by \code{\link{termExtraction}} function (TI_TM, AB_TM, etc.).
 9 | #' @param sep is the field separator character. This character separates strings in each keyword column of the data frame. The default is \code{sep = ";"}.
10 | #' @param top is a numeric. It indicates the number of top keywords to analyze. The default value is 10.
11 | #' @param cdf is a logical. If TRUE, the function calculates the cumulative occurrences distribution.
12 | #' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.
13 | #' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.
14 | #' @return an object of class \code{data.frame}
15 | #' @examples
16 | #'
17 | #' data(scientometrics, package = "bibliometrixData")
18 | #' topKW <- KeywordGrowth(scientometrics, Tag = "ID", sep = ";", top = 5, cdf = TRUE)
19 | #' topKW
20 | #'
21 | #' # Plotting results
22 | #' \dontrun{
23 | #' install.packages("reshape2")
24 | #' library(reshape2)
25 | #' library(ggplot2)
26 | #' DF <- melt(topKW, id = "Year")
27 | #' ggplot(DF, aes(Year, value, group = variable, color = variable)) + geom_line
28 | #' }
29 | #'
30 | #' @export
31 | KeywordGrowth <- function(M, Tag = "ID", sep = ";", top = 10, cdf = TRUE, remove.terms = NULL, synonyms = NULL) {
32 |   i <- which(names(M) == Tag)
33 |   PY <- as.numeric(M$PY)
34 |   Tab <- (strsplit(as.character(M[, i]), sep))
35 |   Y <- rep(PY, lengths(Tab))
36 |   A <- data.frame(Tab = unlist(Tab), Y = Y)
37 |   A$Tab <- trim.leading(A$Tab)
38 |   A <- A[A$Tab != "", ]
39 |   A <- A[!is.na(A$Y), ]
40 | 
41 |   ### remove terms
42 |   terms <- data.frame(Tab = toupper(remove.terms))
43 |   A <- anti_join(A, terms)
44 |   # end of block
45 | 
46 |   ### Merge synonyms in the vector synonyms
47 |   if (length(synonyms) > 0 & is.character(synonyms)) {
48 |     s <- strsplit(toupper(synonyms), ";")
49 |     snew <- trimws(unlist(lapply(s, function(l) l[1])))
50 |     sold <- (lapply(s, function(l) trimws(l[-1])))
51 |     for (i in 1:length(s)) {
52 |       A <- A %>%
53 |         mutate(
54 |           # Tab = str_replace_all(Tab, paste(sold[[i]], collapse="|",sep=""),snew[i])
55 |           # Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i]),
56 |           Tab = stringi::stri_replace_all_regex(Tab, stringi::stri_replace_all_regex(stringi::stri_replace_all_regex(paste(sold[[i]], collapse = "|", sep = ""), "\\(", "\\\\("), "\\)", "\\\\)"), snew[i])
57 |         )
58 |     }
59 |   }
60 |   # end of block
61 | 
62 |   Ymin <- min(A$Y)
63 |   Ymax <- max(A$Y)
64 |   Year <- Ymin:Ymax
65 |   if (top==Inf) top <- length(unique(A$Tab))
66 |   Tab <- names(sort(table(A$Tab), decreasing = TRUE))[1:top]
67 | 
68 |   words <- matrix(0, length(Year), top + 1)
69 |   words <- data.frame(words)
70 |   names(words) <- c("Year", Tab)
71 |   words[, 1] <- Year
72 |   for (j in 1:length(Tab)) {
73 |     word <- (table(A[A$Tab %in% Tab[j], 2]))
74 |     words[, j + 1] <- trim.years(word, Year, cdf)
75 |   }
76 |   return(words)
77 | }
78 | 
79 | trim.years <- function(w, Year, cdf) {
80 |   Y <- as.numeric(names(w))
81 |   W <- matrix(0, length(Year), 1)
82 | 
83 |   for (i in 1:length(Year)) {
84 |     if (Y[1] == Year[i] & length(Y) > 0) {
85 |       W[i, 1] <- w[1]
86 |       Y <- Y[-1]
87 |       w <- w[-1]
88 |     }
89 |   }
90 |   if (isTRUE(cdf)) W <- cumsum(W)
91 |   names(W) <- Year
92 |   W <- data.frame(W)
93 |   return(W)
94 | }
95 | 


--------------------------------------------------------------------------------
/man/thematicMap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thematicMap.R
 3 | \name{thematicMap}
 4 | \alias{thematicMap}
 5 | \title{Create a thematic map}
 6 | \usage{
 7 | thematicMap(
 8 |   M,
 9 |   field = "ID",
10 |   n = 250,
11 |   minfreq = 5,
12 |   ngrams = 1,
13 |   stemming = FALSE,
14 |   size = 0.5,
15 |   n.labels = 1,
16 |   community.repulsion = 0.5,
17 |   repel = TRUE,
18 |   remove.terms = NULL,
19 |   synonyms = NULL,
20 |   cluster = "louvain",
21 |   subgraphs = FALSE,
22 |   seed = 1234
23 | )
24 | }
25 | \arguments{
26 | \item{M}{is a bibliographic dataframe.}
27 | 
28 | \item{field}{is the textual attribute used to build up the thematic map. It can be \code{field = c("ID","DE","KW_Merged","TI", "AB")}.
29 | \code{\link{biblioNetwork}} or \code{\link{cocMatrix}}.}
30 | 
31 | \item{n}{is an integer. It indicates the number of terms to include in the analysis.}
32 | 
33 | \item{minfreq}{is a integer. It indicates the minimum frequency (per thousand) of a cluster. It is a number in the range (0,1000).}
34 | 
35 | \item{ngrams}{is an integer between 1 and 4. It indicates the type of n-gram to extract from texts.
36 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.}
37 | 
38 | \item{stemming}{is logical. If it is TRUE the word (from titles or abstracts) will be stemmed (using the Porter's algorithm).}
39 | 
40 | \item{size}{is numerical. It indicates del size of the cluster circles and is a number in the range (0.01,1).}
41 | 
42 | \item{n.labels}{is integer. It indicates how many labels associate to each cluster. Default is \code{n.labels = 1}.}
43 | 
44 | \item{community.repulsion}{is a real. It indicates the repulsion force among network communities. It is a real number between 0 and 1. Default is \code{community.repulsion = 0.5}.}
45 | 
46 | \item{repel}{is logical. If it is TRUE ggplot uses geom_label_repel instead of geom_label.}
47 | 
48 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
49 | 
50 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
51 | 
52 | \item{cluster}{is a character. It indicates the type of cluster to perform among ("optimal", "louvain","leiden", "infomap","edge_betweenness","walktrap", "spinglass", "leading_eigen", "fast_greedy").}
53 | 
54 | \item{subgraphs}{is a logical. If TRUE cluster subgraphs are returned.}
55 | 
56 | \item{seed}{is an integer. It indicates the seed for random number generation. Default is \code{seed = 1234}.}
57 | }
58 | \value{
59 | a list containing:
60 | \tabular{lll}{
61 | \code{map}\tab   \tab The thematic map as ggplot2 object\cr
62 | \code{clusters}\tab   \tab Centrality and Density values for each cluster. \cr
63 | \code{words}\tab   \tab A list of words following in each cluster\cr
64 | \code{nclust}\tab   \tab The number of clusters\cr
65 | \code{net}\tab    \tab A list containing the network output (as provided from the networkPlot function)}
66 | }
67 | \description{
68 | It creates a thematic map based on co-word network analysis and clustering.
69 | The methodology is inspired by the proposal of Cobo et al. (2011).
70 | }
71 | \details{
72 | \code{thematicMap} starts from a co-occurrence keyword network to plot in a
73 | two-dimensional map the typological themes of a domain.\cr\cr
74 | 
75 | Reference:\cr
76 | Cobo, M. J., Lopez-Herrera, A. G., Herrera-Viedma, E., & Herrera, F. (2011). An approach for detecting, quantifying,
77 | and visualizing the evolution of a research field: A practical application to the fuzzy sets theory field. Journal of Informetrics, 5(1), 146-166.\cr
78 | }
79 | \examples{
80 | 
81 | \dontrun{
82 | data(management, package = "bibliometrixData")
83 | res <- thematicMap(management, field = "ID", n = 250, minfreq = 5, size = 0.5, repel = TRUE)
84 | plot(res$map)
85 | plot(res$net$graph)
86 | }
87 | 
88 | }
89 | \seealso{
90 | \code{\link{biblioNetwork}} function to compute a bibliographic network.
91 | 
92 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network.
93 | 
94 | \code{\link{networkPlot}} to plot a bibliographic network.
95 | }
96 | 


--------------------------------------------------------------------------------
/man/termExtraction.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/termExtraction.R
  3 | \name{termExtraction}
  4 | \alias{termExtraction}
  5 | \title{Term extraction tool from textual fields of a manuscript}
  6 | \usage{
  7 | termExtraction(
  8 |   M,
  9 |   Field = "TI",
 10 |   ngrams = 1,
 11 |   stemming = FALSE,
 12 |   language = "english",
 13 |   remove.numbers = TRUE,
 14 |   remove.terms = NULL,
 15 |   keep.terms = NULL,
 16 |   synonyms = NULL,
 17 |   verbose = TRUE
 18 | )
 19 | }
 20 | \arguments{
 21 | \item{M}{is a data frame obtained by the converting function \code{\link{convert2df}}.
 22 | It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.}
 23 | 
 24 | \item{Field}{is a character object. It indicates the field tag of textual data :
 25 | \tabular{lll}{
 26 | \code{"TI"}\tab   \tab Manuscript title\cr
 27 | \code{"AB"}\tab   \tab Manuscript abstract\cr
 28 | \code{"ID"}\tab   \tab Manuscript keywords plus\cr
 29 | \code{"DE"}\tab   \tab Manuscript author's keywords}
 30 | The default is \code{Field = "TI"}.}
 31 | 
 32 | \item{ngrams}{is an integer between 1 and 3. It indicates the type of n-gram to extract from texts.
 33 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.}
 34 | 
 35 | \item{stemming}{is logical. If TRUE the Porter Stemming algorithm is applied to all extracted terms. The default is \code{stemming = FALSE}.}
 36 | 
 37 | \item{language}{is a character. It is the language of textual contents ("english", "german","italian","french","spanish"). The default is \code{language="english"}.}
 38 | 
 39 | \item{remove.numbers}{is logical. If TRUE all numbers are deleted from the documents before term extraction. The default is \code{remove.numbers = TRUE}.}
 40 | 
 41 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the corpus after term extraction. The default is \code{remove.terms = NULL}.}
 42 | 
 43 | \item{keep.terms}{is a character vector. It contains a list of compound words "formed by two or more terms" to keep in their original form in the term extraction process. The default is \code{keep.terms = NULL}.}
 44 | 
 45 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
 46 | 
 47 | \item{verbose}{is logical. If TRUE the function prints the most frequent terms extracted from documents. The default is \code{verbose=TRUE}.}
 48 | }
 49 | \value{
 50 | the bibliometric data frame with a new column containing terms about the field tag indicated in the argument \code{Field}.
 51 | }
 52 | \description{
 53 | It extracts terms from a text field (abstract, title, author's keywords, etc.) of a bibliographic data frame.
 54 | }
 55 | \examples{
 56 | # Example 1: Term extraction from titles
 57 | 
 58 | data(scientometrics, package = "bibliometrixData")
 59 | 
 60 | # vector of compound words
 61 | keep.terms <- c("co-citation analysis", "bibliographic coupling")
 62 | 
 63 | # term extraction
 64 | scientometrics <- termExtraction(scientometrics,
 65 |   Field = "TI", ngrams = 1,
 66 |   remove.numbers = TRUE, remove.terms = NULL, keep.terms = keep.terms, verbose = TRUE
 67 | )
 68 | 
 69 | # terms extracted from the first 10 titles
 70 | scientometrics$TI_TM[1:10]
 71 | 
 72 | 
 73 | # Example 2: Term extraction from abstracts
 74 | 
 75 | data(scientometrics)
 76 | 
 77 | # term extraction
 78 | scientometrics <- termExtraction(scientometrics,
 79 |   Field = "AB", ngrams = 2,
 80 |   stemming = TRUE, language = "english",
 81 |   remove.numbers = TRUE, remove.terms = NULL, keep.terms = NULL, verbose = TRUE
 82 | )
 83 | 
 84 | # terms extracted from the first abstract
 85 | scientometrics$AB_TM[1]
 86 | 
 87 | # Example 3: Term extraction from keywords with synonyms
 88 | 
 89 | data(scientometrics)
 90 | 
 91 | # vector of synonyms
 92 | synonyms <- c("citation; citation analysis", "h-index; index; impact factor")
 93 | 
 94 | # term extraction
 95 | scientometrics <- termExtraction(scientometrics,
 96 |   Field = "ID", ngrams = 1,
 97 |   synonyms = synonyms, verbose = TRUE
 98 | )
 99 | 
100 | }
101 | \seealso{
102 | \code{\link{convert2df}} to import and convert an WoS or SCOPUS Export file in a bibliographic data frame.
103 | 
104 | \code{\link{biblioAnalysis}} function for bibliometric analysis
105 | }
106 | 


--------------------------------------------------------------------------------
/man/thematicEvolution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thematicEvolution.R
 3 | \name{thematicEvolution}
 4 | \alias{thematicEvolution}
 5 | \title{Perform a Thematic Evolution Analysis}
 6 | \usage{
 7 | thematicEvolution(
 8 |   M,
 9 |   field = "ID",
10 |   years,
11 |   n = 250,
12 |   minFreq = 2,
13 |   size = 0.5,
14 |   ngrams = 1,
15 |   stemming = FALSE,
16 |   n.labels = 1,
17 |   repel = TRUE,
18 |   remove.terms = NULL,
19 |   synonyms = NULL,
20 |   cluster = "louvain",
21 |   seed = 1234,
22 |   assign.evolution.colors = list(assign = TRUE, measure = "weighted")
23 | )
24 | }
25 | \arguments{
26 | \item{M}{is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.}
27 | 
28 | \item{field}{is a character object. It indicates the content field to use. Field can be one of c=("ID","DE","KW_Merged","TI","AB"). Default value is \code{field="ID"}.}
29 | 
30 | \item{years}{is a numeric vector of one or more unique cut points.}
31 | 
32 | \item{n}{is numerical. It indicates the number of words to use in the network analysis}
33 | 
34 | \item{minFreq}{is numerical. It indicates the min frequency of words included in to a cluster.}
35 | 
36 | \item{size}{is numerical. It indicates del size of the cluster circles and is a number in the range (0.01,1).}
37 | 
38 | \item{ngrams}{is an integer between 1 and 4. It indicates the type of n-gram to extract from texts.
39 | An n-gram is a contiguous sequence of n terms. The function can extract n-grams composed by 1, 2, 3 or 4 terms. Default value is \code{ngrams=1}.}
40 | 
41 | \item{stemming}{is logical. If it is TRUE the word (from titles or abstracts) will be stemmed (using the Porter's algorithm).}
42 | 
43 | \item{n.labels}{is integer. It indicates how many labels associate to each cluster. Default is \code{n.labels = 1}.}
44 | 
45 | \item{repel}{is logical. If it is TRUE ggplot uses geom_label_repel instead of geom_label.}
46 | 
47 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
48 | 
49 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
50 | 
51 | \item{cluster}{is a character. It indicates the type of cluster to perform among ("optimal", "louvain","leiden", "infomap","edge_betweenness","walktrap", "spinglass", "leading_eigen", "fast_greedy").}
52 | 
53 | \item{seed}{is numerical. It indicates the seed for random number generator to obtain always the same results. Default value is \code{seed = 1234}.}
54 | 
55 | \item{assign.evolution.colors}{is a list. If \code{assignEvolutionColors = list(assign = TRUE)}, colors are assigned to lineages based on the highest weighted inclusion value. If a list is provided, it must contain the arguments \code{assignEvolutionColors = list(assign = c(TRUE, FALSE), measure=("inclusion","stability", "weighted"))}.
56 | Default is \code{assign.evolution.colors = list(assign=TRUE, measure="weighted")}. If assign = FALSE, measure argument is ignored.}
57 | }
58 | \value{
59 | a list containing:
60 | \tabular{lll}{
61 | \code{nets}\tab   \tab The thematic nexus graph for each comparison\cr
62 | \code{incMatrix}\tab   \tab Some useful statistics about the thematic nexus}
63 | }
64 | \description{
65 | It performs a Thematic Evolution Analysis based on co-word network analysis and clustering.
66 | The methodology is inspired by the proposal of Cobo et al. (2011).
67 | }
68 | \details{
69 | \code{\link{thematicEvolution}} starts from two or more thematic maps created by \code{\link{thematicMap}} function.\cr\cr
70 | 
71 | Reference:\cr
72 | Cobo, M. J., Lopez-Herrera, A. G., Herrera-Viedma, E., & Herrera, F. (2011). An approach for detecting, quantifying,
73 | and visualizing the evolution of a research field: A practical application to the fuzzy sets theory field. Journal of Informetrics, 5(1), 146-166.\cr
74 | }
75 | \examples{
76 | \dontrun{
77 | data(management, package = "bibliometrixData")
78 | years=c(2004,2008,2015)
79 | 
80 | nexus <- thematicEvolution(management,field="DE",years=years,n=100,minFreq=2)
81 | }
82 | 
83 | }
84 | \seealso{
85 | \code{\link{thematicMap}} function to create a thematic map based on co-word network analysis and clustering.
86 | 
87 | \code{\link{cocMatrix}} to compute a bibliographic bipartite network.
88 | 
89 | \code{\link{networkPlot}} to plot a bibliographic network.
90 | }
91 | 


--------------------------------------------------------------------------------
/man/cocMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cocMatrix.R
 3 | \name{cocMatrix}
 4 | \alias{cocMatrix}
 5 | \title{Bibliographic bipartite network matrices}
 6 | \usage{
 7 | cocMatrix(
 8 |   M,
 9 |   Field = "AU",
10 |   type = "sparse",
11 |   n = NULL,
12 |   sep = ";",
13 |   binary = TRUE,
14 |   short = FALSE,
15 |   remove.terms = NULL,
16 |   synonyms = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{M}{is a data frame obtained by the converting function
21 | \code{\link{convert2df}}. It is a data matrix with cases corresponding to
22 | articles and variables to Field Tag in the original WoS or SCOPUS file.}
23 | 
24 | \item{Field}{is a character object. It indicates one of the field tags of the
25 |   standard ISI WoS Field Tag codify. Field can be equal to one of these tags:
26 |   \tabular{lll}{ \code{AU}\tab   \tab Authors\cr \code{SO}\tab   \tab
27 |   Publication Name (or Source)\cr \code{JI}\tab   \tab ISO Source
28 |   Abbreviation\cr \code{DE}\tab   \tab Author Keywords\cr \code{ID}\tab
29 |   \tab Keywords associated by WoS or SCOPUS database \cr 
30 |   \code{KW_Merged}\tab    \tab All Keywords (merged by DE and ID) \cr
31 |   \code{CR}\tab   \tab Cited References}
32 | 
33 |   for a complete list of filed tags see:
34 |   \href{https://www.bibliometrix.org/documents/Field_Tags_bibliometrix.pdf}{Field Tags used in bibliometrix}\cr\cr}
35 | 
36 | \item{type}{indicates the output format of co-occurrences: \tabular{lll}{
37 | \code{type = "matrix"} \tab   \tab produces an object of class
38 | \code{matrix}\cr \code{type = "sparse"} \tab   \tab produces an object of
39 | class \code{dgMatrix} of the package \code{Matrix}. "sparse"
40 | argument generates a compact representation of the matrix.}}
41 | 
42 | \item{n}{is an integer. It indicates the number of items to select. If \code{N = NULL}, all items are selected.}
43 | 
44 | \item{sep}{is the field separator character. This character separates strings in each
45 | column of the data frame. The default is \code{sep = ";"}.}
46 | 
47 | \item{binary}{is a logical. If TRUE each cell contains a 0/1. if FALSE each cell contains the frequency.}
48 | 
49 | \item{short}{is a logical. If TRUE all items with frequency<2 are deleted to reduce the matrix size.}
50 | 
51 | \item{remove.terms}{is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.}
52 | 
53 | \item{synonyms}{is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.}
54 | }
55 | \value{
56 | a bipartite network matrix with cases corresponding to manuscripts and variables to the
57 |   objects extracted from the Tag \code{Field}.
58 | }
59 | \description{
60 | \code{cocMatrix} computes occurrences between elements of a Tag Field from a bibliographic data frame. Manuscript is the unit of analysis.
61 | }
62 | \details{
63 | This occurrence matrix represents a bipartite network which can be transformed into a collection of bibliographic
64 | networks such as coupling, co-citation, etc..
65 | 
66 | The function follows the approach proposed by Batagelj & Cerinsek (2013) and Aria & cuccurullo (2017).\cr\cr
67 | 
68 | References:\cr
69 | Batagelj, V., & Cerinsek, M. (2013). On bibliographic networks. Scientometrics, 96(3), 845-864.\cr
70 | Aria, M., & Cuccurullo, C. (2017). bibliometrix: An R-tool for comprehensive science mapping analysis. Journal of Informetrics, 11(4), 959-975.\cr
71 | }
72 | \examples{
73 | # EXAMPLE 1: Articles x Authors occurrence matrix
74 | 
75 | data(scientometrics, package = "bibliometrixData")
76 | WA <- cocMatrix(scientometrics, Field = "AU", type = "sparse", sep = ";")
77 | 
78 | # EXAMPLE 2: Articles x Cited References occurrence matrix
79 | 
80 | # data(scientometrics, package = "bibliometrixData")
81 | 
82 | # WCR <- cocMatrix(scientometrics, Field = "CR", type = "sparse", sep = ";")
83 | 
84 | # EXAMPLE 3: Articles x Cited First Authors occurrence matrix
85 | 
86 | # data(scientometrics, package = "bibliometrixData")
87 | # scientometrics <- metaTagExtraction(scientometrics, Field = "CR_AU", sep = ";")
88 | # WCR <- cocMatrix(scientometrics, Field = "CR_AU", type = "sparse", sep = ";")
89 | 
90 | }
91 | \seealso{
92 | \code{\link{convert2df}} to import and convert an ISI or SCOPUS
93 |   Export file in a data frame.
94 | 
95 | \code{\link{biblioAnalysis}} to perform a bibliometric analysis.
96 | 
97 | \code{\link{biblioNetwork}} to compute a bibliographic network.
98 | }
99 | 


--------------------------------------------------------------------------------