├── docs ├── notebooks ├── images │ ├── SDG_detector.png │ ├── Inspection_Accuracy.png │ ├── SDG-Icons-2019_WEB │ │ ├── E-WEB-Goal-01.png │ │ ├── E-WEB-Goal-02.png │ │ ├── E-WEB-Goal-03.png │ │ ├── E-WEB-Goal-04.png │ │ ├── E-WEB-Goal-05.png │ │ ├── E-WEB-Goal-06.png │ │ ├── E-WEB-Goal-07.png │ │ ├── E-WEB-Goal-08.png │ │ ├── E-WEB-Goal-09.png │ │ ├── E-WEB-Goal-10.png │ │ ├── E-WEB-Goal-11.png │ │ ├── E-WEB-Goal-12.png │ │ ├── E-WEB-Goal-13.png │ │ ├── E-WEB-Goal-14.png │ │ ├── E-WEB-Goal-15.png │ │ ├── E-WEB-Goal-16.png │ │ ├── E-WEB-Goal-17.png │ │ └── E-WEB-Goal-18.png │ └── example_plots │ │ ├── plot_sdg_bar_example.png │ │ └── plot_sdg_map_example.png └── accuracy_evaluation │ ├── Inspection_Accuracy.png │ ├── Inspection_InterExpert.png │ ├── data │ ├── task1_R1_done │ │ ├── task1_MC.xlsx │ │ ├── task1_VF.xlsx │ │ ├── task1_YL.xlsx │ │ ├── task1_YZ.xlsx │ │ └── desktop.ini │ ├── task1_R2_done │ │ ├── task1_R2_MC.xlsx │ │ ├── task1_R2_VF.xlsx │ │ ├── task1_R2_YL.xlsx │ │ ├── task1_R2_YZ.xlsx │ │ └── desktop.ini │ ├── task1_R3_done │ │ ├── task1_R3_MC.xlsx │ │ ├── task1_R3_VF.xlsx │ │ ├── task1_R3_YL.xlsx │ │ ├── task1_R3_YZ.xlsx │ │ └── desktop.ini │ ├── results_accuracy_task1_R1_done.xlsx │ ├── results_accuracy_task1_R2_done.xlsx │ ├── results_accuracy_task1_R3_done.xlsx │ ├── results_intercoder_task1_R1_done.xlsx │ ├── results_intercoder_task1_R2_done.xlsx │ └── results_intercoder_task1_R3_done.xlsx │ ├── SDGdetector_Accuracy_Evaluation.pdf │ └── validation_analysis.Rmd ├── .github ├── .gitignore └── workflows │ └── draft-pdf.yml ├── .gitignore ├── data ├── shp.rda ├── SDG_keys.rda ├── sdgstat.rda ├── sdg_icons.rda ├── codelist_panel.rda ├── country_region_names.RData └── list_of_un_goals_targets.rda ├── paper ├── paper.pdf ├── figure1.png ├── paper.md └── paper.bib ├── tests ├── testthat.R └── testthat │ ├── Rplots.pdf │ ├── test-plot_sdg_bar.R │ ├── test-sdg_icon.R │ ├── test-sdg_color.R │ ├── test-detect_region.R │ ├── test-SDGdetector.R │ ├── test-add_sdg_pattern.R │ ├── test-plot_sdg_map.R │ └── test-helper_SDG_search_terms.R ├── cran-comments.md ├── inst ├── extdata │ ├── E-WEB-Goal-01.png │ ├── E-WEB-Goal-02.png │ ├── E-WEB-Goal-03.png │ ├── E-WEB-Goal-04.png │ ├── E-WEB-Goal-05.png │ ├── E-WEB-Goal-06.png │ ├── E-WEB-Goal-07.png │ ├── E-WEB-Goal-08.png │ ├── E-WEB-Goal-09.png │ ├── E-WEB-Goal-10.png │ ├── E-WEB-Goal-11.png │ ├── E-WEB-Goal-12.png │ ├── E-WEB-Goal-13.png │ ├── E-WEB-Goal-14.png │ ├── E-WEB-Goal-15.png │ ├── E-WEB-Goal-16.png │ ├── E-WEB-Goal-17.png │ └── E-WEB-Goal-18.png └── CITATION ├── R ├── globals.R ├── SDG_keys.R ├── sdg_color.R ├── sdg_icon.R ├── data.R ├── add_sdg_pattern.R ├── plot_sdg_bar.R ├── plot_sdg_map.R ├── detect_region.R ├── summarize_sdg.R └── SDGdetector.R ├── NEWS.md ├── INDEX ├── .Rbuildignore ├── man ├── shp.Rd ├── sdgstat.Rd ├── sdg_color.Rd ├── sdg_icons.Rd ├── country_region_names.Rd ├── func_OR_vector.Rd ├── sdg_icon.Rd ├── plot_sdg_bar.Rd ├── codelist_panel.Rd ├── list_of_un_goals_targets.Rd ├── detect_region.Rd ├── plot_sdg_map.Rd ├── add_sdg_pattern.Rd ├── func_AND_vector.Rd ├── summarize_sdg.Rd ├── lookaround_nearby_n.Rd ├── SDG_keys.Rd └── SDGdetector.Rd ├── SDGdetector.Rproj ├── DESCRIPTION ├── NAMESPACE ├── README.md ├── .Rhistory └── LICENSE.md /docs/notebooks: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | -------------------------------------------------------------------------------- /data/shp.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/shp.rda -------------------------------------------------------------------------------- /paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/paper/paper.pdf -------------------------------------------------------------------------------- /data/SDG_keys.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/SDG_keys.rda -------------------------------------------------------------------------------- /data/sdgstat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/sdgstat.rda -------------------------------------------------------------------------------- /paper/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/paper/figure1.png -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(SDGdetector) 3 | 4 | test_check("SDGdetector") 5 | -------------------------------------------------------------------------------- /data/sdg_icons.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/sdg_icons.rda -------------------------------------------------------------------------------- /data/codelist_panel.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/codelist_panel.rda -------------------------------------------------------------------------------- /tests/testthat/Rplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/tests/testthat/Rplots.pdf -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /docs/images/SDG_detector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG_detector.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-01.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-02.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-03.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-04.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-05.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-06.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-07.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-08.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-09.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-10.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-11.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-12.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-13.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-14.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-15.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-16.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-17.png -------------------------------------------------------------------------------- /inst/extdata/E-WEB-Goal-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/inst/extdata/E-WEB-Goal-18.png -------------------------------------------------------------------------------- /data/country_region_names.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/country_region_names.RData -------------------------------------------------------------------------------- /data/list_of_un_goals_targets.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/data/list_of_un_goals_targets.rda -------------------------------------------------------------------------------- /docs/images/Inspection_Accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/Inspection_Accuracy.png -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("SDG_keys", "country_region_names")) 2 | utils::globalVariables(c("goal", "goal_id", "id", "target", "target_id")) 3 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/Inspection_Accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/Inspection_Accuracy.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-01.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-02.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-03.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-04.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-05.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-06.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-07.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-08.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-09.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-10.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-11.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-12.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-13.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-14.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-15.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-16.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-17.png -------------------------------------------------------------------------------- /docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/SDG-Icons-2019_WEB/E-WEB-Goal-18.png -------------------------------------------------------------------------------- /docs/images/example_plots/plot_sdg_bar_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/example_plots/plot_sdg_bar_example.png -------------------------------------------------------------------------------- /docs/images/example_plots/plot_sdg_map_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/images/example_plots/plot_sdg_map_example.png -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # SDGdetector 2.7.3 2 | 3 | ## New edits 4 | 5 | - added a new function `summarize_sdg` 6 | - fix the description of "return" in `SDGdetector` 7 | 8 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/Inspection_InterExpert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/Inspection_InterExpert.png -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R1_done/task1_MC.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R1_done/task1_MC.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R1_done/task1_VF.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R1_done/task1_VF.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R1_done/task1_YL.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R1_done/task1_YL.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R1_done/task1_YZ.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R1_done/task1_YZ.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/SDGdetector_Accuracy_Evaluation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/SDGdetector_Accuracy_Evaluation.pdf -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R2_done/task1_R2_MC.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R2_done/task1_R2_MC.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R2_done/task1_R2_VF.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R2_done/task1_R2_VF.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R2_done/task1_R2_YL.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R2_done/task1_R2_YL.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R2_done/task1_R2_YZ.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R2_done/task1_R2_YZ.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R3_done/task1_R3_MC.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R3_done/task1_R3_MC.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R3_done/task1_R3_VF.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R3_done/task1_R3_VF.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R3_done/task1_R3_YL.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R3_done/task1_R3_YL.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R3_done/task1_R3_YZ.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/task1_R3_done/task1_R3_YZ.xlsx -------------------------------------------------------------------------------- /INDEX: -------------------------------------------------------------------------------- 1 | SDGdetector Identify SDGs in text 2 | detect_region Detect country or region names in text for further mapping 3 | plot_sdg_bar SDG bar plot 4 | plot_sdg_map SDG map plot 5 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_accuracy_task1_R1_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_accuracy_task1_R1_done.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_accuracy_task1_R2_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_accuracy_task1_R2_done.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_accuracy_task1_R3_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_accuracy_task1_R3_done.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_intercoder_task1_R1_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_intercoder_task1_R1_done.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_intercoder_task1_R2_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_intercoder_task1_R2_done.xlsx -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/results_intercoder_task1_R3_done.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yingjie4Science/SDGdetector/HEAD/docs/accuracy_evaluation/data/results_intercoder_task1_R3_done.xlsx -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^SDGdetector\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.github$ 4 | ^cran-comments\.md$ 5 | ^CRAN-SUBMISSION$ 6 | ^_pkgdown\.yml$ 7 | ^docs$ 8 | ^pkgdown$ 9 | ^paper$ 10 | ^LICENSE\.md$ 11 | LICENSE 12 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_sdg_bar.R: -------------------------------------------------------------------------------- 1 | test_that("geom_bar works", { 2 | data("sdgstat") 3 | p <- plot_sdg_bar(sdgstat, sdg = "SDG", value = "Value") 4 | x <- ggplot2::layer_data(p) 5 | 6 | expect_false(x$flipped_aes[1]) 7 | }) 8 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R1_done/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\71.0.3.0\GoogleDriveFS.exe,23 4 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R2_done/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\71.0.3.0\GoogleDriveFS.exe,23 4 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/data/task1_R3_done/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\71.0.3.0\GoogleDriveFS.exe,23 4 | -------------------------------------------------------------------------------- /tests/testthat/test-sdg_icon.R: -------------------------------------------------------------------------------- 1 | test_that("Return the icon information of a specified SDG", { 2 | output <- sdg_icon(x = 17, res = 300) 3 | 4 | output_check <- magick::image_info(output) 5 | 6 | expect_equal( 7 | output_check$format, 8 | expected = "PNG" 9 | ) 10 | 11 | expect_equal( 12 | output_check$width, 13 | expected = 300 14 | ) 15 | }) 16 | -------------------------------------------------------------------------------- /man/shp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{shp} 5 | \alias{shp} 6 | \title{Datasets of shapefiles.} 7 | \format{ 8 | \code{shp}: A data frame with 241 rows and 6 variables 9 | } 10 | \usage{ 11 | shp 12 | } 13 | \description{ 14 | Datasets of shapefiles.. 15 | } 16 | \author{ 17 | Yingjie Li \email{yingjieli.edu@gmail.com} 18 | } 19 | \keyword{dataset} 20 | -------------------------------------------------------------------------------- /man/sdgstat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{sdgstat} 5 | \alias{sdgstat} 6 | \title{Datasets of SDG statistics.} 7 | \format{ 8 | \code{sdgstat}: A data frame with 62 rows and 4 variables 9 | } 10 | \usage{ 11 | sdgstat 12 | } 13 | \description{ 14 | Datasets of SDG statistics. 15 | } 16 | \author{ 17 | Yingjie Li \email{yingjieli.edu@gmail.com} 18 | } 19 | \keyword{dataset} 20 | -------------------------------------------------------------------------------- /man/sdg_color.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sdg_color.R 3 | \name{sdg_color} 4 | \alias{sdg_color} 5 | \title{Color scheme for the 17 SDGs} 6 | \usage{ 7 | sdg_color(x) 8 | } 9 | \arguments{ 10 | \item{x}{A number, which indicates the SDG ID} 11 | } 12 | \value{ 13 | HTML color code of a specified SDG 14 | } 15 | \description{ 16 | Color scheme for the 17 SDGs 17 | } 18 | \examples{ 19 | sdg_color(1) 20 | sdg_color(x = 1:17) 21 | } 22 | -------------------------------------------------------------------------------- /man/sdg_icons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{sdg_icons} 5 | \alias{sdg_icons} 6 | \title{List SDG Icons} 7 | \format{ 8 | \code{sdg_icons}: External pointer of class "magick-image" 9 | } 10 | \source{ 11 | \url{https://www.un.org/sustainabledevelopment/wp-content/uploads/2019/01/SDG_Guidelines_AUG_2019_Final.pdf} 12 | } 13 | \usage{ 14 | sdg_icons 15 | } 16 | \description{ 17 | List SDG Icons 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /SDGdetector.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/country_region_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{country_region_names} 5 | \alias{country_region_names} 6 | \title{Datasets of country and region names.} 7 | \format{ 8 | \code{country_region_names}: A data frame with 644 rows and 3 variables 9 | } 10 | \usage{ 11 | country_region_names 12 | } 13 | \description{ 14 | Datasets of country and region names. 15 | } 16 | \author{ 17 | Yingjie Li \email{yingjieli.edu@gmail.com} 18 | } 19 | \keyword{dataset} 20 | -------------------------------------------------------------------------------- /man/func_OR_vector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_SDG_search_terms.R 3 | \name{func_OR_vector} 4 | \alias{func_OR_vector} 5 | \title{Use \code{OR} to Concatenate a Vector of Terms} 6 | \usage{ 7 | func_OR_vector(v) 8 | } 9 | \arguments{ 10 | \item{v}{a vector of characters} 11 | } 12 | \value{ 13 | A character 14 | } 15 | \description{ 16 | Use \code{OR} to Concatenate a Vector of Terms 17 | } 18 | \examples{ 19 | words <- c('apple', 'bean', 'food') 20 | func_OR_vector(v= words) 21 | } 22 | -------------------------------------------------------------------------------- /man/sdg_icon.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sdg_icon.R 3 | \name{sdg_icon} 4 | \alias{sdg_icon} 5 | \title{Icons for SDGs} 6 | \usage{ 7 | sdg_icon(x, res = 200) 8 | } 9 | \arguments{ 10 | \item{x}{Numeric code for each SDG, ranging from 1 to 17} 11 | 12 | \item{res}{Resolution of SDG icon. Default: \code{res = 200} indicates 13 | resizing proportionally to 200px} 14 | } 15 | \description{ 16 | The \code{sdg_icon} function provides the specific icon for each SDG 17 | } 18 | \examples{ 19 | sdg_icon(x = 17, res = 300) 20 | 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test-sdg_color.R: -------------------------------------------------------------------------------- 1 | test_that("Return the color for a specified SDG", { 2 | output <- sdg_color(x = 1) 3 | 4 | expect_equal( 5 | unname(output), 6 | expected = "#E5243B" 7 | ) 8 | }) 9 | 10 | 11 | test_that("Return the color for a list of specified SDGs", { 12 | output <- sdg_color(x = 1:17) 13 | 14 | expect_equal( 15 | unname(output), 16 | expected = c( 17 | "#E5243B", "#DDA63A", "#4C9F38", "#C5192D", "#FF3A21", "#26BDE2", "#FCC30B", 18 | "#A21942", "#FD6925", "#DD1367", "#FD9D24", "#BF8B2E", "#3F7E44", "#0A8DD9", 19 | "#56C02B", "#00689D", "#19486A" 20 | ) 21 | ) 22 | }) 23 | -------------------------------------------------------------------------------- /tests/testthat/test-detect_region.R: -------------------------------------------------------------------------------- 1 | test_that("Detect country or region names in a string", { 2 | 3 | x = 'China and USA devoted the largest efforts on solar energy' 4 | 5 | output <- detect_region(x) 6 | 7 | expect_equal( 8 | output, 9 | expected = "China,USA" 10 | ) 11 | 12 | }) 13 | 14 | 15 | test_that("Detect country or region names in text from a dataframe", { 16 | 17 | x = data.frame(txt_col = c( 18 | 'China and USA devoted the largest efforts on solar energy', 19 | 'Congo needs to improve SDG 1 and 2' 20 | )) 21 | 22 | output <- detect_region(x, col = txt_col) 23 | 24 | expect_equal( 25 | output$region, 26 | expected = c("China,USA", "Congo") 27 | ) 28 | 29 | }) 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /man/plot_sdg_bar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_sdg_bar.R 3 | \name{plot_sdg_bar} 4 | \alias{plot_sdg_bar} 5 | \title{SDG bar plot} 6 | \usage{ 7 | plot_sdg_bar(data, sdg = "sdg", value = "value", quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{data}{Data frame as the input} 11 | 12 | \item{sdg}{Vector with SDG code to be visualized.} 13 | 14 | \item{value}{The value, e.g., number of SDGs, to be show in the thematic map} 15 | 16 | \item{quiet}{Logical. Suppress info message} 17 | } 18 | \value{ 19 | Returns the tool text outputs. 20 | } 21 | \description{ 22 | SDG bar plot 23 | } 24 | \examples{ 25 | data("sdgstat") 26 | plot_sdg_bar(sdgstat, sdg = "SDG", value = "Value") 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/codelist_panel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{codelist_panel} 5 | \alias{codelist_panel} 6 | \title{List of Names and ISO Code for Countries} 7 | \format{ 8 | \subsection{\code{codelist_panel}}{ 9 | 10 | A data frame with 28941 rows and 55 columns: 11 | \describe{ 12 | \item{country.name.en}{Country name in English} 13 | \item{iso2c, iso3c}{2 & 3 letter ISO country codes} 14 | \item{year}{Year} 15 | ... 16 | } 17 | } 18 | } 19 | \source{ 20 | \url{https://en.wikipedia.org/wiki/List_of_countries_and_territories_by_land_and_maritime_borders} 21 | } 22 | \usage{ 23 | codelist_panel 24 | } 25 | \description{ 26 | List of Names and ISO Code for Countries 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /R/SDG_keys.R: -------------------------------------------------------------------------------- 1 | #' SDG_keys 2 | #' 3 | #' @description 4 | #' Database of SDG search terms 5 | #' 6 | #' @details 7 | #' The search terms are developed at the “Target” level (SDG Goal/Target/Indicator) 8 | #' to extract SDG-related statements. These SDG search terms can be "direct mention", 9 | #' such as "SDG 1", or "indirect mention", which means a statement aligns with 10 | #' the description of certain SDGs or targets. For example, "Our company has embraced CO2 11 | #' emissions mitigation as a priority within our sustainability strategy") is an indirect 12 | #' mention of "SDG 13.a" ("Implement the commitment... in the context of meaningful 13 | #' mitigation actions and ..."). 14 | #' 15 | #' @docType data 16 | #' 17 | #' @usage data(SDG_keys) 18 | #' 19 | #' @examples 20 | #' data(SDG_keys) 21 | 22 | "SDG_keys" 23 | -------------------------------------------------------------------------------- /man/list_of_un_goals_targets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{list_of_un_goals_targets} 5 | \alias{list_of_un_goals_targets} 6 | \title{The Names, ID, and Descriptions of all the 17 SDGs and 169 Targets} 7 | \format{ 8 | \subsection{\code{list_of_un_goals_targets}}{ 9 | 10 | A data frame with 169 rows and 3 columns: 11 | \describe{ 12 | \item{GoalID}{The ID of each SDG} 13 | \item{GoalName}{The name of each SDG} 14 | \item{target_id_un}{The name of each Target} 15 | \item{target_desc_un}{The description for each Target} 16 | } 17 | } 18 | } 19 | \source{ 20 | \url{https://unstats.un.org/sdgs/indicators/indicators-list/} 21 | } 22 | \usage{ 23 | list_of_un_goals_targets 24 | } 25 | \description{ 26 | The Names, ID, and Descriptions of all the 17 SDGs and 169 Targets 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/detect_region.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect_region.R 3 | \name{detect_region} 4 | \alias{detect_region} 5 | \title{Detect country or region names in text for further mapping} 6 | \usage{ 7 | detect_region(x, col) 8 | } 9 | \arguments{ 10 | \item{x}{Data frame or a string} 11 | 12 | \item{col}{Column name for text to be assessed} 13 | } 14 | \value{ 15 | Returns the tool text outputs. 16 | } 17 | \description{ 18 | Detect country or region names in text for further mapping. 19 | } 20 | \examples{ 21 | x <- c("This paper explores the method and results from an independent 22 | evidence based assessment of Australia's progress towards the SDGs", 23 | "Last year alone, the United States experienced 14 separate billion-dollar 24 | disasters related to climate change") 25 | col <- data.frame(x) 26 | regions <- detect_region(x, col) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/plot_sdg_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_sdg_map.R 3 | \name{plot_sdg_map} 4 | \alias{plot_sdg_map} 5 | \title{SDG Map Plot} 6 | \usage{ 7 | plot_sdg_map(data, sdg = sdg, value = value, 8 | country = country, by_sdg = TRUE) 9 | } 10 | \arguments{ 11 | \item{data}{Data frame as the input} 12 | 13 | \item{sdg}{Vector with SDG code to be visualized.} 14 | 15 | \item{value}{The value, e.g., number of SDGs, to be show in the thematic map} 16 | 17 | \item{country}{Country that are associated with the SDGs.} 18 | 19 | \item{by_sdg}{If mapping by SDG, TRUE or FALSE.} 20 | } 21 | \value{ 22 | Returns the tool text outputs. 23 | } 24 | \description{ 25 | SDG map plot 26 | } 27 | \examples{ 28 | data("sdgstat") 29 | plot_sdg_map(sdgstat, 30 | sdg = "SDG", value = "Value", 31 | country = "Country", by_sdg = FALSE 32 | ) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/add_sdg_pattern.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/add_sdg_pattern.R 3 | \name{add_sdg_pattern} 4 | \alias{add_sdg_pattern} 5 | \title{Users Can Add Customized Patterns for Each SDG or Target} 6 | \usage{ 7 | add_sdg_pattern(sdg_id, x, operator = "AND", quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{sdg_id}{SDG Goal's ID or Target's ID, in the format of 'SDGx_y', e.g., SDG1_1, SDG2_general} 11 | 12 | \item{x}{A vector of strings} 13 | 14 | \item{operator}{'AND', 'OR' to combine a vector of keywords for identifying SDG Goals or Targets.} 15 | 16 | \item{quiet}{Logical. Suppress info message} 17 | } 18 | \value{ 19 | A regerx string 20 | } 21 | \description{ 22 | Users Can Add Customized Patterns for Each SDG or Target 23 | } 24 | \examples{ 25 | terms_new <- c("improve", "farmer", "income") 26 | add_sdg_pattern(sdg_id = 'SDG1_2', x = terms_new, operator = 'AND') 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/func_AND_vector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_SDG_search_terms.R 3 | \name{func_AND_vector} 4 | \alias{func_AND_vector} 5 | \title{\strong{Last update on}: 3/31/2022} 6 | \usage{ 7 | func_AND_vector(v) 8 | } 9 | \arguments{ 10 | \item{v}{a vector of characters} 11 | } 12 | \value{ 13 | A character 14 | } 15 | \description{ 16 | \strong{New changes}: 17 | } 18 | \details{ 19 | Compare to the earlier version, we made the following changes 20 | \enumerate{ 21 | \item Instead of combining multiple term lists by \code{OR} for one particular target, 22 | it is more intuitive and accurate to add each alternative term list to the search 23 | term table or database directly. 24 | \item Added \verb{Look around} function to more accurately match SDG targets. 25 | } 26 | 27 | Use \code{AND} to Concatenate a Vector of Terms 28 | } 29 | \examples{ 30 | words <- c('apple', 'bean', 'food') 31 | func_AND_vector(v= words) 32 | } 33 | -------------------------------------------------------------------------------- /tests/testthat/test-SDGdetector.R: -------------------------------------------------------------------------------- 1 | test_that("Return data frames of correct size", { 2 | text <- 'our goal is to mitigate climate change, end poverty, and reduce inequality globally' 3 | df <- data.frame(col = c( 4 | 'our goal is to end poverty globally', 5 | 'this product contributes to slowing down climate change')) 6 | expect_true(ncol(SDGdetector(x = text)) == 3) 7 | expect_true(nrow(SDGdetector(x = text)) == 1) 8 | expect_true(ncol(SDGdetector(x = df, col = col)) == 3) 9 | expect_true(nrow(SDGdetector(x = df, col = col)) == 2) 10 | }) 11 | 12 | test_that("Detect indirect mentions of SDGs and associated targets", { 13 | text <- 'our goal is to mitigate climate change globally' 14 | df <- data.frame(col = c( 15 | 'our goal is to end poverty globally', 16 | 'this product contributes to slowing down climate change')) 17 | expect_equal(SDGdetector(x = text)$sdgs, expected = "SDG13_2,SDG13_general") 18 | expect_equal(SDGdetector(x = df, col = col)$sdgs, expected = c("SDG13_2,SDG13_general", "SDG1_2")) 19 | }) 20 | -------------------------------------------------------------------------------- /.github/workflows/draft-pdf.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | jobs: 4 | paper: 5 | runs-on: ubuntu-latest 6 | name: Paper Draft 7 | steps: 8 | - name: Checkout 9 | uses: actions/checkout@v2 10 | - name: Build draft PDF 11 | uses: openjournals/openjournals-draft-action@master 12 | with: 13 | journal: joss 14 | # This should be the path to the paper within your repo. 15 | paper-path: paper/paper.md 16 | - name: Upload 17 | uses: actions/upload-artifact@v4 18 | with: 19 | name: paper 20 | # This is the output path where Pandoc will write the compiled 21 | # PDF. Note, this should be the same directory as the input 22 | # paper.md 23 | path: paper/paper.pdf 24 | ## suspend this process with GitHub commits by commenting the following code 25 | # - name: save pdf to repo 26 | # uses: stefanzweifel/git-auto-commit-action@v4 27 | # with: 28 | # commit_message: Saved new PDF of paper 29 | -------------------------------------------------------------------------------- /R/sdg_color.R: -------------------------------------------------------------------------------- 1 | ## SDG name --------------------------------------------------------------------------- 2 | sdg_name <- paste0("SDG", seq(1, 17, 1)) 3 | 4 | ## SDG colors ------------------------------------------------------------------------- 5 | 6 | 7 | ### RGB 8 | color_rgb <- data.frame( 9 | R = c(229, 221, 76, 197, 255, 38, 252, 162, 253, 221, 253, 191, 63, 10, 86, 0, 25), 10 | G = c(36, 166, 159, 25, 58, 189, 195, 25, 105, 19, 157, 139, 126, 141, 192, 104, 72), 11 | B = c(59, 58, 56, 45, 33, 226, 11, 66, 37, 103, 36, 46, 68, 217, 43, 157, 106) 12 | ) 13 | 14 | ### HEX 15 | color_hex <- rgb(color_rgb, max = 255) 16 | names(color_hex) <- sdg_name 17 | 18 | #' Color scheme for the 17 SDGs 19 | #' 20 | #' @usage sdg_color(x) 21 | #' 22 | #' @param x A number, which indicates the SDG ID 23 | #' 24 | #' @return HTML color code of a specified SDG 25 | #' @export 26 | #' 27 | #' @examples 28 | #' sdg_color(1) 29 | #' sdg_color(x = 1:17) 30 | sdg_color <- function(x) { 31 | color <- color_hex[x] 32 | scales::show_col(color) 33 | return(color) 34 | } 35 | -------------------------------------------------------------------------------- /man/summarize_sdg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_sdg.R 3 | \name{summarize_sdg} 4 | \alias{summarize_sdg} 5 | \title{Summarize results from SDGdetector at either the Goal level or Target level.} 6 | \usage{ 7 | summarize_sdg(data, sum_by = "target", quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{data}{Data frame or a string} 11 | 12 | \item{sum_by}{The group level to be chosen for data summary. Default parameter is 13 | "target", and can also set at "goal" level.} 14 | 15 | \item{quiet}{Logical. Suppress info message} 16 | } 17 | \value{ 18 | Data frame with at least one column named "SDG" or "Target", and one column \code{Freq} that 19 | represent the total hits. 20 | } 21 | \description{ 22 | Summarize results from SDGdetector at either the Goal level or Target level. 23 | } 24 | \examples{ 25 | library(SDGdetector) 26 | df <- data.frame(col = c( 27 | 'our goal is to end poverty globally', 28 | 'this product contributes to slowing down climate change')) 29 | data <- SDGdetector(x = df, col = col) 30 | summarize_sdg(data, sum_by = 'target', quiet = FALSE) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/lookaround_nearby_n.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_SDG_search_terms.R 3 | \name{lookaround_nearby_n} 4 | \alias{lookaround_nearby_n} 5 | \title{Look Around} 6 | \usage{ 7 | lookaround_nearby_n(word_ls1, word_ls2, n, exclude = "", third_AND_string = "") 8 | } 9 | \arguments{ 10 | \item{word_ls1}{is a string, which includes a list of words connected by "|" that indicates 'OR'} 11 | 12 | \item{word_ls2}{is a string, which includes a list of words connected by "|" that indicates 'OR'} 13 | 14 | \item{n}{is a number, indicates the number of words to look around} 15 | 16 | \item{exclude}{is a vector, including a list of words to be excluded from match} 17 | 18 | \item{third_AND_string}{similar to word_ls1 or word_ls2, it is a string that includes 19 | a list of words connected by "|" that indicates 'OR'} 20 | } 21 | \value{ 22 | A regex string 23 | } 24 | \description{ 25 | Look around to match pattern in a sentence 26 | } 27 | \examples{ 28 | con1 <- c('apple', 'bean', 'food') 29 | con2 <- c('big', 'delicious') 30 | lookaround_nearby_n(word_ls1 = con1, word_ls2 = con2, n = 2, exclude = "", third_AND_string = "") 31 | } 32 | -------------------------------------------------------------------------------- /man/SDG_keys.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SDG_keys.R, R/data.R 3 | \docType{data} 4 | \name{SDG_keys} 5 | \alias{SDG_keys} 6 | \title{SDG_keys} 7 | \format{ 8 | An object of class \code{data.frame} with 557 rows and 3 columns. 9 | 10 | \code{SDG_keys}: A data frame with 557 rows and 3 variables 11 | } 12 | \usage{ 13 | data(SDG_keys) 14 | 15 | SDG_keys 16 | } 17 | \description{ 18 | Database of SDG search terms 19 | 20 | Datasets of SDG keys. 21 | } 22 | \details{ 23 | The search terms are developed at the “Target” level (SDG Goal/Target/Indicator) 24 | to extract SDG-related statements. These SDG search terms can be "direct mention", 25 | such as "SDG 1", or "indirect mention", which means a statement aligns with 26 | the description of certain SDGs or targets. For example, "Our company has embraced CO2 27 | emissions mitigation as a priority within our sustainability strategy") is an indirect 28 | mention of "SDG 13.a" ("Implement the commitment... in the context of meaningful 29 | mitigation actions and ..."). 30 | } 31 | \examples{ 32 | data(SDG_keys) 33 | } 34 | \author{ 35 | Yingjie Li \email{yingjieli.edu@gmail.com} 36 | } 37 | \keyword{dataset} 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /tests/testthat/test-add_sdg_pattern.R: -------------------------------------------------------------------------------- 1 | test_that("Add Customized Patterns for a SDG Target", { 2 | terms_new <- c("improve", "farmer", "income") 3 | 4 | output <- add_sdg_pattern( 5 | sdg_id = "SDG1_2", 6 | x = terms_new, 7 | operator = "AND" 8 | ) 9 | 10 | output_test_data <- tail(output, 1) 11 | 12 | expect_equal( 13 | output_test_data$SDG_keywords, 14 | expected = "(?=.*(?:improve))(?=.*(?:farmer))(?=.*(?:income))" 15 | ) 16 | 17 | expect_equal( 18 | output_test_data$match_tpye, 19 | expected = "user_defined" 20 | ) 21 | }) 22 | 23 | 24 | test_that("Add Customized Patterns for a SDG Target", { 25 | terms_new <- c("improve", "farmer", "income") 26 | 27 | output <- add_sdg_pattern( 28 | sdg_id = "SDG1_2", 29 | x = terms_new, 30 | operator = "OR" 31 | ) 32 | 33 | output_test_data <- tail(output, 1) 34 | 35 | expect_equal( 36 | output_test_data$SDG_keywords, 37 | expected = "(improve|farmer|income)" 38 | ) 39 | 40 | expect_equal( 41 | output_test_data$match_tpye, 42 | expected = "user_defined" 43 | ) 44 | }) 45 | 46 | 47 | test_that("Check if the input SDG name is correct", { 48 | terms_new <- c("improve", "farmer", "income") 49 | 50 | expect_error( 51 | add_sdg_pattern( 52 | sdg_id = "SDG1_12", 53 | x = terms_new, 54 | operator = "OR" 55 | ) 56 | ) 57 | }) 58 | 59 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_sdg_map.R: -------------------------------------------------------------------------------- 1 | test_that("test if geom_sf works", { 2 | data("sdgstat") 3 | p <- plot_sdg_map( 4 | data = sdgstat, 5 | sdg = "SDG", 6 | value = "Value", 7 | country = "Country", 8 | by_sdg = FALSE 9 | ) 10 | 11 | expect_identical(p$layers[[1]]$show.legend, NA) 12 | expect_identical(p$layers[[1]]$computed_geom_params$legend, NULL) 13 | 14 | # Perform minimal tests 15 | expect_error(regexp = NA, p) 16 | }) 17 | 18 | test_that("Verify the input data for mapping", { 19 | data("sdgstat") 20 | df <- sdgstat[1:3] 21 | 22 | # expect_message( 23 | # plot_sdg_map( 24 | # data = df, 25 | # sdg = "SDG", 26 | # value = "Value", 27 | # country = "Country", 28 | # by_sdg = F 29 | # ), 30 | # "The input data must contain a minimum of three columns, including SDG name, numeric value, and geographic location." 31 | # ) 32 | 33 | 34 | expect_error( 35 | plot_sdg_map( 36 | data = df, 37 | sdg = "SDG", 38 | value = "Value", 39 | country = "Country", 40 | by_sdg = F), 41 | "Data object must include columns [Country].", fixed=TRUE) 42 | 43 | }) 44 | 45 | 46 | test_that("test plot mapping by sdgs", { 47 | data("sdgstat") 48 | p <- plot_sdg_map( 49 | data = sdgstat, 50 | sdg = "SDG", 51 | value = "Value", 52 | country = "Country", 53 | by_sdg = T 54 | ) 55 | 56 | expect_error(regexp = NA, p) 57 | }) 58 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite SDGdetector in publications use:") 2 | 3 | bibentry( 4 | bibtype = "Article", 5 | title = "SDGdetector: an R-based text mining tool for quantifying efforts toward Sustainable Development Goals", 6 | author = c( 7 | person("Yingjie", "Li", email = "yingjieli.edu@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8401-0649")), 8 | person("Veronica", "Frans", email = "verofrans@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5634-3956")), 9 | person("Yongze", "Song", email = "yongze.song@outlook.com", role = "aut", comment = c(ORCID = "0000-0003-3420-9622")), 10 | person("Meng", "Cai", email = "mengcai24601@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-8318-572X")), 11 | person("Yuqian", "Zhang", email = "zhan1364@msu.edu", role = "aut", comment = c(ORCID = "0000-0001-7576-2526")), 12 | person("Jianguo", "Liu", email = "liuji@msu.edu", role = "aut", comment = c(ORCID = "0000-0001-6344-0087")) 13 | ), 14 | journal = "Journal of Open Source Software", 15 | doi = "10.21105/joss.05124", 16 | year = 2023, 17 | volume = 8, 18 | number = 84, 19 | pages = 5124, 20 | url = "https://github.com/Yingjie4Science/SDGdetector", 21 | textVersion = paste("Li, Y., Frans, V.F., Song, Y., Cai, M., Zhang, Y., Liu, J., 2023. SDGdetector: an R-based text mining tool for quantifying efforts toward Sustainable Development Goals. Journal of Open Source Software 8(84), 5124. https://doi.org/10.21105/joss.05124" 22 | 23 | ) 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: SDGdetector 2 | Title: Detect SDGs and Targets in Text 3 | Version: 2.7.3 4 | Authors@R: c( 5 | person("Yingjie", "Li", email = "yingjieli.edu@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8401-0649")), 6 | person("Veronica", "Frans", email = "verofrans@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5634-3956")), 7 | person("Yongze", "Song", email = "yongze.song@outlook.com", role = "aut", comment = c(ORCID = "0000-0003-3420-9622")), 8 | person("Meng", "Cai", email = "mengcai24601@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-8318-572X")), 9 | person("Yuqian", "Zhang", email = "zhan1364@msu.edu", role = "aut", comment = c(ORCID = "0000-0001-7576-2526")), 10 | person("Jianguo", "Liu", email = "liuji@msu.edu", role = "aut", comment = c(ORCID = "0000-0001-6344-0087")) 11 | ) 12 | Description: Identify 17 Sustainable Development Goals and associated 169 targets in text. 13 | URL: https://github.com/Yingjie4Science/SDGdetector 14 | BugReports: https://github.com/Yingjie4Science/SDGdetector/issues 15 | Imports: 16 | dplyr, 17 | magrittr, 18 | stringr, 19 | ggplot2, 20 | tidyr, 21 | grDevices, 22 | rnaturalearth, 23 | utils, 24 | scales, 25 | magick 26 | Depends: R (>= 3.5.0) 27 | License: GPL (>= 3) 28 | Encoding: UTF-8 29 | Roxygen: list(markdown = TRUE) 30 | RoxygenNote: 7.2.3 31 | LazyData: true 32 | NeedsCompilation: no 33 | Packaged: 2023-03-25 10:11:03 UTC; 268222h 34 | Suggests: 35 | testthat (>= 3.0.0) 36 | Config/testthat/edition: 3 37 | -------------------------------------------------------------------------------- /man/SDGdetector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SDGdetector.R 3 | \name{SDGdetector} 4 | \alias{SDGdetector} 5 | \title{Identify SDGs in text} 6 | \usage{ 7 | SDGdetector(x, col, quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{Data frame or a string} 11 | 12 | \item{col}{Column name for text to be assessed} 13 | 14 | \item{quiet}{Logical. Suppress info message} 15 | } 16 | \value{ 17 | Data frame with the same columns as the \code{df} plus one extra column named "sdgs", which 18 | list the occurrence (or hits) of SDG goals or targets detected from each sentence in rows. 19 | Users can further use our function \code{summarize_sdg()} to clean the result for visulization. 20 | } 21 | \description{ 22 | Identify 17 Sustainable Development Goals and associated 169 targets in text. 23 | } 24 | \details{ 25 | In 2015, leaders worldwide adopted 17 Sustainable Development Goals (SDGs) with 169 26 | targets to be achieved by 2030 (https://sdgs.un.org). The framework of SDGs serves 27 | as a blueprint for shared prosperity for both people and the earth. \code{SDGdetector} 28 | identifies both direct and indirect expressions of SDGs and associated targets in 29 | chunks of text. It takes a data frame with a specified column of text to process as 30 | inputs and outputs a data frame with original columns plus matched SDGs and targets. 31 | } 32 | \examples{ 33 | my_col <- c("our goal is to end poverty globally", "this product 34 | contributes to slowing down climate change") 35 | my_text <- data.frame(my_col) 36 | SDGdetector(my_text, my_col) 37 | } 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(SDGdetector) 4 | export(add_sdg_pattern) 5 | export(detect_region) 6 | export(func_AND_vector) 7 | export(func_OR_vector) 8 | export(lookaround_nearby_n) 9 | export(plot_sdg_bar) 10 | export(plot_sdg_map) 11 | export(sdg_color) 12 | export(sdg_icon) 13 | export(summarize_sdg) 14 | importFrom(dplyr,arrange) 15 | importFrom(dplyr,desc) 16 | importFrom(dplyr,everything) 17 | importFrom(dplyr,filter) 18 | importFrom(dplyr,group_by) 19 | importFrom(dplyr,last_col) 20 | importFrom(dplyr,left_join) 21 | importFrom(dplyr,mutate) 22 | importFrom(dplyr,relocate) 23 | importFrom(dplyr,rename) 24 | importFrom(dplyr,row_number) 25 | importFrom(dplyr,select) 26 | importFrom(ggplot2,aes) 27 | importFrom(ggplot2,element_blank) 28 | importFrom(ggplot2,element_rect) 29 | importFrom(ggplot2,facet_wrap) 30 | importFrom(ggplot2,geom_col) 31 | importFrom(ggplot2,geom_sf) 32 | importFrom(ggplot2,ggplot) 33 | importFrom(ggplot2,guide_legend) 34 | importFrom(ggplot2,guides) 35 | importFrom(ggplot2,scale_fill_distiller) 36 | importFrom(ggplot2,scale_fill_manual) 37 | importFrom(ggplot2,scale_x_continuous) 38 | importFrom(ggplot2,scale_y_continuous) 39 | importFrom(ggplot2,theme) 40 | importFrom(ggplot2,theme_bw) 41 | importFrom(ggplot2,unit) 42 | importFrom(grDevices,rgb) 43 | importFrom(magick,image_scale) 44 | importFrom(magrittr,"%>%") 45 | importFrom(rnaturalearth,ne_countries) 46 | importFrom(stringr,str_count) 47 | importFrom(stringr,str_length) 48 | importFrom(tidyr,pivot_longer) 49 | importFrom(tidyr,pivot_wider) 50 | importFrom(tidyr,separate) 51 | importFrom(tidyr,starts_with) 52 | importFrom(utils,data) 53 | -------------------------------------------------------------------------------- /R/sdg_icon.R: -------------------------------------------------------------------------------- 1 | # dir <- dirname(rstudioapi::getSourceEditorContext()$path); dir 2 | # path <- paste0(dirname(dir), "/inst/SDG-Icons-2019_WEB") 3 | # image_pngs <- sort(sample(dir(path, full.names = TRUE), 18)) 4 | 5 | #' Icons for SDGs 6 | #' @description 7 | #' The `sdg_icon` function provides the specific icon for each SDG 8 | #' 9 | #' 10 | #' @param x Numeric code for each SDG, ranging from 1 to 17 11 | #' @param res Resolution of SDG icon. Default: `res = 200` indicates 12 | #' resizing proportionally to 200px 13 | #' 14 | #' @importFrom magick image_scale 15 | #' @importFrom magrittr %>% 16 | #' 17 | #' @export 18 | #' 19 | #' @examples 20 | #' sdg_icon(x = 17, res = 300) 21 | #' 22 | sdg_icon <- function(x, res = 200) { 23 | ## --- approach 1 24 | # load('./data/sdg_icons.rda') 25 | # icon <- sdg_icons[i] %>% magick::image_scale(., geometry = res) 26 | 27 | ## --- approach 2 28 | # icon <- magick::image_read(path = image_pngs[x]) %>% 29 | # magick::image_scale(., geometry = res) 30 | 31 | ## --- approach 3 32 | # png <- 33 | # system.file(paste0("E-WEB-Goal-", stringr::str_pad(x, 2, pad = "0"), "png"), 34 | # package = "SDGdetector") 35 | # icon <- 36 | # magick::image_read(path = png) %>% magick::image_scale(., geometry = res) 37 | 38 | ## --- approach 4 39 | image_pngs <- 40 | list.files( 41 | system.file("extdata", package = "SDGdetector"), 42 | pattern = "^E-WEB-Goal", 43 | full.names = TRUE 44 | ) 45 | icon <- magick::image_read(path = image_pngs[x]) 46 | icon <- magick::image_scale(image = icon, geometry = res) 47 | 48 | return(icon) 49 | } 50 | 51 | ## test 52 | sdg_icon(x = 17, res = 300) 53 | -------------------------------------------------------------------------------- /tests/testthat/test-helper_SDG_search_terms.R: -------------------------------------------------------------------------------- 1 | test_that("test function - func_AND_vector()", { 2 | words <- c('apple', 'bean', 'food') 3 | output <- func_AND_vector(v= words) 4 | expect_length(output, 1) 5 | expect_equal( 6 | output, 7 | expected = "(?=.*(?:apple))(?=.*(?:bean))(?=.*(?:food))" 8 | ) 9 | }) 10 | 11 | 12 | test_that("test function - func_AND_plus()", { 13 | words <- c('apple', 'bean', 'food') 14 | output <- func_AND_plus(v= words) 15 | expect_length(output, 1) 16 | expect_equal( 17 | output, 18 | expected = "^(?=.*(?:apple))(?=.*(?:bean))(?=.*(?:food)).+" 19 | ) 20 | }) 21 | 22 | 23 | test_that("test function - func_OR_vector()", { 24 | words <- c('apple', 'bean', 'food') 25 | output <- func_OR_vector(v= words) 26 | expect_length(output, 1) 27 | expect_equal( 28 | output, 29 | expected = "(apple|bean|food)" 30 | ) 31 | }) 32 | 33 | 34 | 35 | test_that("test function - func_to_exclude_terms()", { 36 | exclude <- "Access Bank" 37 | SDG_xx = c( 38 | "access to|inclusi\\S*", 39 | "financial service.?|financial institution|\\bbanks|\\banking" 40 | ) 41 | SDG_xx <- func_AND_plus(SDG_xx) 42 | output <- func_to_exclude_terms(which_sdg_term = SDG_xx, terms_to_exclude = exclude) 43 | expect_length(output, 1) 44 | expect_equal( 45 | output, 46 | expected = "^(?!.*(?:Access Bank))(?=.*(?:access to|inclusi\\S*))(?=.*(?:financial service.?|financial institution|\\bbanks|\\banking)).+" 47 | ) 48 | }) 49 | 50 | 51 | 52 | 53 | test_that("test function - lookaround_nearby_n()", { 54 | con1 <- c('apple', 'bean', 'food') 55 | con2 <- c('big', 'delicious') 56 | 57 | output <- lookaround_nearby_n(word_ls1 = con1, word_ls2 = con2, n = 2, exclude = "", third_AND_string = "") 58 | expect_length(output, 3) 59 | expect_equal( 60 | output[1], 61 | expected = "(((?:apple)\\s(?:\\w+\\s){0,2}(?=(?:big)))|((?:big)\\s(?:\\w+\\s){0,2}(?=(?:apple))))" 62 | ) 63 | 64 | 65 | output2 <- lookaround_nearby_n(word_ls1 = con1, word_ls2 = con2, n = 2, exclude = "", third_AND_string = "sustainable") 66 | expect_length(output2, 3) 67 | expect_equal( 68 | output2[1], 69 | expected = "^(?=.*(?:sustainable)).+(((?:apple)\\s(?:\\w+\\s){0,2}(?=(?:big)))|((?:big)\\s(?:\\w+\\s){0,2}(?=(?:apple))))" 70 | ) 71 | }) 72 | 73 | 74 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' @title Datasets of country and region names. 4 | #' 5 | #' @description Datasets of country and region names. 6 | #' 7 | #' @name country_region_names 8 | #' @format \code{country_region_names}: A data frame with 644 rows and 3 variables 9 | #' @docType data 10 | #' @author Yingjie Li \email{yingjieli.edu@gmail.com} 11 | #' @keywords dataset 12 | "country_region_names" 13 | 14 | #' @title Datasets of shapefiles. 15 | #' 16 | #' @description Datasets of shapefiles.. 17 | #' 18 | #' @name shp 19 | #' @format \code{shp}: A data frame with 241 rows and 6 variables 20 | #' @docType data 21 | #' @author Yingjie Li \email{yingjieli.edu@gmail.com} 22 | #' @keywords dataset 23 | "shp" 24 | 25 | 26 | #' @title Datasets of SDG statistics. 27 | #' 28 | #' @description Datasets of SDG statistics. 29 | #' 30 | #' @name sdgstat 31 | #' @format \code{sdgstat}: A data frame with 62 rows and 4 variables 32 | #' @docType data 33 | #' @author Yingjie Li \email{yingjieli.edu@gmail.com} 34 | #' @keywords dataset 35 | "sdgstat" 36 | 37 | 38 | 39 | 40 | #' @title Datasets of SDG keys. 41 | #' 42 | #' @description Datasets of SDG keys. 43 | #' 44 | #' @name SDG_keys 45 | #' @format \code{SDG_keys}: A data frame with 557 rows and 3 variables 46 | #' @docType data 47 | #' @author Yingjie Li \email{yingjieli.edu@gmail.com} 48 | #' @keywords dataset 49 | "SDG_keys" 50 | 51 | 52 | 53 | #' @title List of Names and ISO Code for Countries 54 | #' 55 | #' @name codelist_panel 56 | #' @format ## `codelist_panel` 57 | #' A data frame with 28941 rows and 55 columns: 58 | #' \describe{ 59 | #' \item{country.name.en}{Country name in English} 60 | #' \item{iso2c, iso3c}{2 & 3 letter ISO country codes} 61 | #' \item{year}{Year} 62 | #' ... 63 | #' } 64 | #' @docType data 65 | #' @source 66 | "codelist_panel" 67 | 68 | 69 | 70 | 71 | #' @title List SDG Icons 72 | #' 73 | #' @name sdg_icons 74 | #' @format \code{sdg_icons}: External pointer of class "magick-image" 75 | #' @docType data 76 | #' @source 77 | "sdg_icons" 78 | 79 | 80 | 81 | #' @title The Names, ID, and Descriptions of all the 17 SDGs and 169 Targets 82 | #' 83 | #' @name list_of_un_goals_targets 84 | #' @format ## `list_of_un_goals_targets` 85 | #' A data frame with 169 rows and 3 columns: 86 | #' \describe{ 87 | #' \item{GoalID}{The ID of each SDG} 88 | #' \item{GoalName}{The name of each SDG} 89 | #' \item{target_id_un}{The name of each Target} 90 | #' \item{target_desc_un}{The description for each Target} 91 | #' } 92 | #' @docType data 93 | #' @source 94 | "list_of_un_goals_targets" 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /R/add_sdg_pattern.R: -------------------------------------------------------------------------------- 1 | ## helper functions ---------------------------------------------------------------------- 2 | 3 | ## a vector (v) of terms to be concatenated by `AND` 4 | 5 | func_AND_vector <- function(v){ 6 | pat <- paste0("(?=.*(?:", v, "))", collapse="") 7 | return(pat) 8 | } 9 | 10 | 11 | ## a vector (v) of terms to be concatenated by `OR` 12 | func_OR_vector <- function(v){ 13 | pat <- paste0(v, collapse = "|") 14 | pat <- paste0("(", pat, ")") 15 | # print(pat) 16 | return(pat) 17 | } 18 | 19 | ## loas database ------------------------------------------------------------------------- 20 | load('data/SDG_keys.rda') 21 | sdg_id_list <- unique(SDG_keys$SDG_id) 22 | 23 | 24 | ## define the function to add patterns to the existing database -------------------------- 25 | 26 | #' Users Can Add Customized Patterns for Each SDG or Target 27 | #' 28 | #' @param x A vector of strings 29 | #' @param sdg_id SDG Goal's ID or Target's ID, in the format of 'SDGx_y', e.g., SDG1_1, SDG2_general 30 | #' @param operator 'AND', 'OR' to combine a vector of keywords for identifying SDG Goals or Targets. 31 | #' @param quiet Logical. Suppress info message 32 | #' 33 | #' @importFrom magrittr %>% 34 | #' @importFrom dplyr mutate select left_join group_by 35 | #' 36 | #' @return A regerx string 37 | #' @export 38 | #' 39 | #' @examples 40 | #' terms_new <- c("improve", "farmer", "income") 41 | #' add_sdg_pattern(sdg_id = 'SDG1_2', x = terms_new, operator = 'AND') 42 | #' 43 | add_sdg_pattern <- 44 | function(sdg_id, 45 | x, 46 | operator = 'AND', 47 | quiet = FALSE) { 48 | ## check the format of `sdg_id` 49 | if (any(!sdg_id %in% sdg_id_list)) { 50 | stop( 51 | paste0( 52 | "sdg_id names must be in the right format that similar to ", 53 | "'SDG1_1', 'SDG12_3', or 'SDG2_general'" 54 | ) 55 | ) 56 | } 57 | 58 | 59 | if (length(x) < 2) { 60 | new_pattern <- func_OR_vector(x) 61 | } else if (length(x) > 1 & operator == 'AND') { 62 | new_pattern <- func_AND_vector(x) 63 | } else if (length(x) > 1 & operator == 'OR') { 64 | new_pattern <- func_OR_vector(x) 65 | } 66 | 67 | new_pattern_df <- data.frame(SDG_id = sdg_id, 68 | SDG_keywords = new_pattern, 69 | match_tpye = 'user_defined') 70 | 71 | cat('New pattern for detecting SDGs was added: \n') 72 | print(new_pattern_df) 73 | 74 | ## update the search term database 75 | SDG_keys <- rbind(SDG_keys, 76 | new_pattern_df) %>% 77 | dplyr::distinct_all() 78 | 79 | # return(SDG_keys) 80 | invisible(SDG_keys) 81 | } 82 | 83 | 84 | ## test ---------------------------------------------------------------------------------- 85 | # x <- "improve" 86 | # x <- c("improve", "farmer", "income") 87 | # add_sdg_pattern(sdg_id = 'SDG1_2', x = x, operator = 'AND') 88 | # SDG_keys <- add_sdg_pattern(sdg_id = 'SDG1_1', x = x, operator = 'AND') 89 | -------------------------------------------------------------------------------- /R/plot_sdg_bar.R: -------------------------------------------------------------------------------- 1 | #' SDG bar plot 2 | #' 3 | #' @param data Data frame as the input 4 | #' @param value The value, e.g., number of SDGs, to be show in the thematic map 5 | #' @param sdg Vector with SDG code to be visualized. 6 | #' @param quiet Logical. Suppress info message 7 | #' 8 | #' @importFrom magrittr %>% 9 | #' @importFrom dplyr mutate select left_join group_by 10 | #' @importFrom ggplot2 ggplot geom_col ggplot element_blank element_rect unit scale_fill_manual theme_bw theme 11 | #' @importFrom tidyr pivot_wider pivot_longer 12 | #' @importFrom utils data 13 | #' @importFrom grDevices rgb 14 | #' 15 | #' @examples 16 | #' data("sdgstat") 17 | #' plot_sdg_bar(sdgstat, sdg = "SDG", value = "Value") 18 | #' 19 | #' @return Returns the tool text outputs. 20 | #' @export 21 | #' 22 | plot_sdg_bar <- function(data, 23 | sdg = "sdg", 24 | value = "value", 25 | quiet = FALSE) { 26 | 27 | # check if columns present 28 | required_columns <- c("SDG", "Value") 29 | if (any(!required_columns %in% names(data))) { 30 | missing <- required_columns[!required_columns %in% names(data)] 31 | stop(paste0("Data object must include columns [", paste0(missing, collapse = ", "), "].")) 32 | } 33 | 34 | # rename the two required columns 35 | k <- which(names(data) %in% c(deparse(substitute(SDG)), deparse(substitute(Value)))) 36 | names(data)[k] <- c("sdg", "value") 37 | 38 | sdg_name <- paste0("SDG", seq(1, 17, 1)) 39 | 40 | data$sdg <- factor(data$sdg, levels = sdg_name) 41 | 42 | 43 | 44 | ## format data 45 | data <- data %>% 46 | dplyr::filter(sdg %in% sdg_name) %>% 47 | dplyr::mutate( 48 | ## remove extra spaces and punctuation from the text of SDG names 49 | sdg = gsub(" ", "", sdg), 50 | sdg = gsub("[[:punct:] ]+", " ", sdg) 51 | ) 52 | 53 | ## check values in the `sdg` column 54 | sdg_column_unique <- unique(data$sdg) 55 | if (any(!sdg_column_unique %in% sdg_name)) { 56 | message(paste0("sdg names must be in the format of ", sdg_name)) 57 | } 58 | 59 | 60 | ## aggregate by SDGs 61 | data <- data %>% 62 | dplyr::group_by(sdg) %>% 63 | dplyr::summarise_at(c("value"), sum, na.rm = TRUE) 64 | 65 | color_rgb <- data.frame( 66 | R = c(229, 221, 76, 197, 255, 38, 252, 162, 253, 221, 253, 191, 63, 10, 86, 0, 25), 67 | G = c(36, 166, 159, 25, 58, 189, 195, 25, 105, 19, 157, 139, 126, 141, 192, 104, 72), 68 | B = c(59, 58, 56, 45, 33, 226, 11, 66, 37, 103, 36, 46, 68, 217, 43, 157, 106) 69 | ) 70 | 71 | ### HEX 72 | color_hex <- rgb(color_rgb, maxColorValue = 255) 73 | names(color_hex) <- sdg_name 74 | sdg_color <- function(x) { 75 | color <- color_hex[x] 76 | return(color) 77 | } 78 | 79 | ## plot 80 | p1 <- ggplot(data, aes(x = sdg, y = value, fill = sdg)) + 81 | geom_col(show.legend = F) + 82 | scale_fill_manual(values = sdg_color(x = 1:17)) + 83 | theme_bw() + 84 | theme( 85 | panel.grid.minor = ggplot2::element_blank(), 86 | axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), 87 | axis.title.x = ggplot2::element_blank() 88 | ) 89 | 90 | 91 | return(p1) 92 | } 93 | -------------------------------------------------------------------------------- /R/plot_sdg_map.R: -------------------------------------------------------------------------------- 1 | #' SDG Map Plot 2 | #' @description SDG map plot 3 | #' 4 | #' @usage plot_sdg_map(data, sdg = sdg, value = value, 5 | #' country = country, by_sdg = TRUE) 6 | #' 7 | #' @param data Data frame as the input 8 | #' @param value The value, e.g., number of SDGs, to be show in the thematic map 9 | #' @param sdg Vector with SDG code to be visualized. 10 | #' @param country Country that are associated with the SDGs. 11 | #' @param by_sdg If mapping by SDG, TRUE or FALSE. 12 | #' 13 | #' @importFrom magrittr %>% 14 | #' @importFrom dplyr mutate select left_join group_by 15 | #' @importFrom ggplot2 ggplot geom_sf scale_fill_distiller theme_bw element_blank element_rect unit aes guides guide_legend 16 | #' scale_x_continuous scale_y_continuous facet_wrap 17 | #' @importFrom tidyr pivot_wider pivot_longer starts_with 18 | #' @importFrom rnaturalearth ne_countries 19 | #' 20 | #' @examples 21 | #' data("sdgstat") 22 | #' plot_sdg_map(sdgstat, 23 | #' sdg = "SDG", value = "Value", 24 | #' country = "Country", by_sdg = FALSE 25 | #' ) 26 | #' 27 | #' @return Returns the tool text outputs. 28 | #' @export 29 | #' 30 | plot_sdg_map <- function(data, sdg = "sdg", value = "value", 31 | country = "country", by_sdg = TRUE) { 32 | 33 | # check if columns present 34 | required_columns <- c("SDG", "Value", "Country") 35 | if (any(!required_columns %in% names(data))) { 36 | missing <- required_columns[!required_columns %in% names(data)] 37 | stop(paste0("Data object must include columns [", paste0(missing, collapse = ", "), "].")) 38 | } 39 | 40 | # rename the required columns 41 | k <- which(names(data) %in% c( 42 | deparse(substitute(SDG)), 43 | deparse(substitute(Value)), 44 | deparse(substitute(Country)) 45 | )) 46 | 47 | if (length(k) < 3) { 48 | message("The input data must contain a minimum of three columns, including SDG name, numeric value, and geographic location.") 49 | } else { 50 | names(data)[k] <- c("sdg", "value", "country") 51 | 52 | 53 | sdg_name <- paste0("SDG", seq(1, 17, 1)) 54 | 55 | data$sdg <- factor(data$sdg, levels = sdg_name) 56 | 57 | world <- ne_countries(scale = "small", returnclass = "sf") 58 | 59 | 60 | if (by_sdg == TRUE) { 61 | d1 <- data %>% 62 | group_by(country, sdg) %>% 63 | dplyr::summarise_at(c("value"), sum, na.rm = TRUE) %>% 64 | as.data.frame() 65 | 66 | wd <- merge(world, d1, by.x = "iso_a3", by.y = "country") 67 | 68 | p1 <- ggplot(wd) + 69 | geom_sf(data = world) + 70 | geom_sf(aes(fill = value)) + 71 | scale_fill_distiller(palette = "YlGnBu", direction = 1, na.value = "gray80") + 72 | facet_wrap(~sdg) + 73 | theme_bw() 74 | } else { 75 | d1 <- data %>% 76 | group_by(country) %>% 77 | dplyr::summarise_at(c("value"), sum, na.rm = TRUE) %>% 78 | as.data.frame() 79 | 80 | wd <- merge(world, d1, by.x = "iso_a3", by.y = "country") 81 | 82 | p1 <- ggplot(wd) + 83 | geom_sf(data = world) + 84 | geom_sf(aes(fill = value)) + 85 | scale_fill_distiller(palette = "YlGnBu", direction = 1, na.value = "gray80") + 86 | theme_bw() 87 | } 88 | return(p1) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /R/detect_region.R: -------------------------------------------------------------------------------- 1 | 2 | #' Detect country or region names in text for further mapping 3 | #' 4 | #' @description Detect country or region names in text for further mapping. 5 | #' 6 | #' @usage detect_region(x, col) 7 | #' 8 | #' @param x Data frame or a string 9 | #' @param col Column name for text to be assessed 10 | #' 11 | #' @importFrom magrittr %>% 12 | #' @importFrom dplyr mutate select 13 | #' 14 | #' @examples 15 | #' x <- c("This paper explores the method and results from an independent 16 | #' evidence based assessment of Australia's progress towards the SDGs", 17 | #' "Last year alone, the United States experienced 14 separate billion-dollar 18 | #' disasters related to climate change") 19 | #' col <- data.frame(x) 20 | #' regions <- detect_region(x, col) 21 | #' 22 | #' @return Returns the tool text outputs. 23 | #' @export 24 | 25 | 26 | 27 | detect_region <- function(x, col) { 28 | # data(country_region_names, "country_region_names") 29 | 30 | ## --> input = a string 31 | if (is.data.frame(x) == FALSE) { 32 | region <- '' 33 | ## loop and detect each country/region name 34 | for (i in 1:nrow(country_region_names)) { 35 | region_i <- country_region_names$name[i] ## get the region name 36 | 37 | ## if the number of character of a region name is less than 4, 38 | ## we need to add word boundary to the word 39 | region_i_enhanced <- ifelse(nchar(region_i) < 4, 40 | paste0('\\b', region_i, '\\b'), 41 | region_i) 42 | 43 | ## if the number of character of a region name is less than 4, 44 | ## case_sensitive = TRUE 45 | case_sensitive <- ifelse(nchar(region_i) < 4, 46 | 0, 47 | 1) 48 | 49 | region <- ifelse( 50 | grepl( 51 | pattern = region_i_enhanced, 52 | x = x, 53 | ## using column names as function arguments, see 54 | ignore.case = case_sensitive, 55 | perl = T 56 | ), 57 | paste0(region, ',', region_i), 58 | ## If detected, add the region name to the cell 59 | paste0(region, '') 60 | ) 61 | 62 | } 63 | region <- gsub("^,*|(?<=,),|,*$", "", region, perl = T) 64 | 65 | 66 | 67 | 68 | ## --> input = a dataframe 69 | } else { 70 | data <- x 71 | ## add a "region" column to the dataframe 72 | data$region <- '' 73 | 74 | ## loop and detect each country/region name 75 | for (i in 1:nrow(country_region_names)) { 76 | region_i <- country_region_names$name[i] ## get the region name 77 | 78 | ## if the number of character of a region name is less than 4, 79 | ## we need to add word boundary to the word 80 | region_i_enhanced <- ifelse(nchar(region_i) < 4, 81 | paste0('\\b', region_i, '\\b'), 82 | region_i) 83 | 84 | ## if the number of character of a region name is less than 4, 85 | ## case_sensitive = TRUE 86 | case_sensitive <- ifelse(nchar(region_i) < 4, 87 | 0, 88 | 1) 89 | 90 | # print(region_i) 91 | # print(region_i_enhanced) 92 | # print(case_sensitive) 93 | 94 | data <- data %>% 95 | as.data.frame() %>% 96 | ## at the sentence level - detect if a subject country or region is mentioned 97 | dplyr::mutate(region = ifelse( 98 | grepl( 99 | pattern = region_i_enhanced, 100 | ## using column names as function arguments, see 101 | # x = !!sym(col), 102 | x = {{col}}, 103 | # x = as.character({{col}}), 104 | ignore.case = case_sensitive, 105 | perl = T 106 | ), 107 | paste0(region, ',', region_i), 108 | ## If detected, add the region name to the cell 109 | paste0(region, '') 110 | )) %>% ## If not, add nothing 111 | as.data.frame() 112 | } 113 | region <- data %>% 114 | dplyr::mutate( 115 | region = gsub("^,*|(?<=,),|,*$", "", region, perl = T)) 116 | } 117 | 118 | return(region) 119 | } 120 | 121 | 122 | ### test 123 | # x = 'China and USA devoted the largest efforts on solar energy' 124 | # detect_region(x) 125 | # 126 | # x = data.frame(txt_col = c( 127 | # 'China and USA devoted the largest efforts on solar energy', 128 | # 'Congo needs to improve SDG 1 and 2' 129 | # )) 130 | # detect_region(x, col = txt_col) 131 | -------------------------------------------------------------------------------- /R/summarize_sdg.R: -------------------------------------------------------------------------------- 1 | 2 | ## load database ------------------------------------------------------------------------- 3 | library(dplyr) 4 | load('./data/list_of_un_goals_targets.rda') 5 | UN_SDGs <- list_of_un_goals_targets %>% 6 | dplyr::mutate( 7 | goalname = paste0('SDG', GoalID), 8 | target_id_un = paste0('SDG', target_id_un), 9 | id = row_number() 10 | ) %>% 11 | dplyr::select(goalname, target_id_un, id) %>% 12 | as.data.frame() 13 | rm(list_of_un_goals_targets) 14 | 15 | 16 | 17 | #' Summarize results from SDGdetector at either the Goal level or Target level. 18 | #' 19 | #' @param data Data frame or a string 20 | #' @param sum_by The group level to be chosen for data summary. Default parameter is 21 | #' "target", and can also set at "goal" level. 22 | #' @param quiet Logical. Suppress info message 23 | #' 24 | #' @return 25 | #' Data frame with at least one column named "SDG" or "Target", and one column `Freq` that 26 | #' represent the total hits. 27 | #' 28 | #' @importFrom magrittr %>% 29 | #' @importFrom dplyr mutate select filter rename row_number everything arrange 30 | #' @importFrom tidyr separate 31 | #' @importFrom utils data 32 | #' 33 | #' @export 34 | #' 35 | #' @examples 36 | #' library(SDGdetector) 37 | #' df <- data.frame(col = c( 38 | #' 'our goal is to end poverty globally', 39 | #' 'this product contributes to slowing down climate change')) 40 | #' data <- SDGdetector(x = df, col = col) 41 | #' summarize_sdg(data, sum_by = 'target', quiet = FALSE) 42 | #' 43 | summarize_sdg <- function(data, sum_by = 'target', quiet = FALSE) { 44 | 45 | coded <- data %>% as.data.frame() 46 | 47 | ### --> pull out the column with coded results 48 | # coded_sdgs <- dplyr::pull(coded, sdgs) ## the same as below one 49 | coded_sdgs <- coded['sdgs'] 50 | # nn <- ncol(coded) 51 | # coded_sdgs <- coded[nn]; 52 | coded_sdgs <- unlist(coded_sdgs) 53 | coded_sdgs <- as.vector(coded_sdgs) 54 | coded_sdgs <- paste(coded_sdgs, sep = " ", collapse = ",") 55 | # coded_sdgs 56 | 57 | 58 | ### --> format it as a DF 59 | coded_sdgs <- unlist(strsplit(coded_sdgs, split = "\\,")) 60 | coded_sdgs <- trimws(coded_sdgs); 61 | 62 | coded_sdgs_df <- as.data.frame(table(coded_sdgs)) %>% 63 | dplyr::filter(coded_sdgs != '') %>% 64 | tidyr::separate(col = coded_sdgs, into = c('goal', 'target_id'), sep = '_', remove = F) %>% 65 | dplyr::mutate( 66 | goal_id = as.numeric(gsub("\\D", "", goal)), 67 | target_id = gsub('^.{3}', "", coded_sdgs), 68 | target_id = gsub('_', ".", target_id), 69 | coded_sdgs = gsub('_', ".", coded_sdgs), 70 | ) %>% 71 | arrange(goal_id, target_id) %>% 72 | dplyr::select(1:2, goal_id, everything()) 73 | 74 | 75 | ### --> The above data only presents what were detected, but some SDGs without match won't be shown. 76 | ### --> Force to list all 17 SDGs and 169 Targets 77 | ### --> merge data 78 | coded_sdgs_df_format <- 79 | merge(x = coded_sdgs_df, ## the coded data 80 | y = UN_SDGs, ## the UN goal and target id for formatting 81 | by.x = c('goal', 'coded_sdgs'), 82 | by.y = c('goalname', 'target_id_un'), all = T) %>% 83 | dplyr::mutate(goal = factor(goal, levels = paste0('SDG', 1:17))) %>% 84 | arrange(goal, id) %>% 85 | dplyr::mutate(id = row_number()) %>% 86 | ## for the NA in `goal_id` and `target_id`, fill the info 87 | dplyr::mutate( 88 | goal_id = ifelse(is.na(goal_id), gsub('SDG|\\_.*$','', goal), goal_id), 89 | target_id = ifelse(is.na(target_id), gsub('SDG','', coded_sdgs), target_id) ## Remove all text before '_' 90 | ) %>% 91 | dplyr::rename('target' = 'coded_sdgs') %>% 92 | dplyr::select(-id) %>% 93 | as.data.frame() 94 | 95 | 96 | ### summarize data by group ------------------------------------------------------------ 97 | if (sum_by == 'target') { 98 | ### group and sum by `target` 99 | coded_sdgs_df_sum <- coded_sdgs_df_format %>% 100 | group_by(goal, target) %>% 101 | dplyr::summarise_at(c("Freq"), sum, na.rm = TRUE) %>% 102 | dplyr::rename('SDG' = 'goal', 'Target' = 'target') 103 | 104 | } else if (sum_by == 'goal') { 105 | coded_sdgs_df_sum <- coded_sdgs_df_format %>% 106 | group_by(goal) %>% 107 | dplyr::summarise_at(c("Freq"), sum, na.rm = TRUE) %>% 108 | dplyr::rename('SDG' = 'goal') 109 | 110 | } else { 111 | print('Please specify `sum_by` with either "goal", or "target" for data summary.') 112 | } 113 | 114 | 115 | ### 116 | return(coded_sdgs_df_sum) 117 | 118 | } 119 | -------------------------------------------------------------------------------- /R/SDGdetector.R: -------------------------------------------------------------------------------- 1 | #' Identify SDGs in text 2 | #' 3 | #' @description 4 | #' Identify 17 Sustainable Development Goals and associated 169 targets in text. 5 | #' 6 | #' @details 7 | #' In 2015, leaders worldwide adopted 17 Sustainable Development Goals (SDGs) with 169 8 | #' targets to be achieved by 2030 (https://sdgs.un.org). The framework of SDGs serves 9 | #' as a blueprint for shared prosperity for both people and the earth. `SDGdetector` 10 | #' identifies both direct and indirect expressions of SDGs and associated targets in 11 | #' chunks of text. It takes a data frame with a specified column of text to process as 12 | #' inputs and outputs a data frame with original columns plus matched SDGs and targets. 13 | #' 14 | #' 15 | #' @param x Data frame or a string 16 | #' @param col Column name for text to be assessed 17 | #' @param quiet Logical. Suppress info message 18 | #' 19 | #' @return 20 | #' Data frame with the same columns as the `df` plus one extra column named "sdgs", which 21 | #' list the occurrence (or hits) of SDG goals or targets detected from each sentence in rows. 22 | #' Users can further use our function `summarize_sdg()` to clean the result for visulization. 23 | #' 24 | #' @importFrom magrittr %>% 25 | #' @importFrom dplyr mutate select relocate last_col desc 26 | #' @importFrom stringr str_count str_length 27 | #' 28 | #' @export 29 | #' 30 | #' @examples 31 | #' my_col <- c("our goal is to end poverty globally", "this product 32 | #' contributes to slowing down climate change") 33 | #' my_text <- data.frame(my_col) 34 | #' SDGdetector(my_text, my_col) 35 | SDGdetector <- function(x, 36 | col, 37 | quiet = FALSE) { 38 | nchr <- sdgs <- id <- NULL 39 | 40 | # data(SDG_keys, "SDG_keys") 41 | 42 | ## first, to check the input is a string or a dataframe -------------------------------- 43 | ## --> if a string ===================================================================== 44 | 45 | if (!is.data.frame(x)) { 46 | ## -> if not a dataframe 47 | # print('change/put the string into a dataframe') 48 | 49 | ## check the number of characters in the sentence 50 | if (nchar(x) > 750) { 51 | message( 52 | paste0( 53 | "The length of your input text reached the limit in PCRE, ", 54 | "please split your input text into shorts ones for another try.", 55 | "Idealy, `nchar(x)` should smaller than 750. " 56 | ) 57 | ) 58 | } 59 | 60 | 61 | df <- data.frame(col = x) %>% 62 | dplyr::mutate(id = dplyr::row_number()) 63 | 64 | code <- df %>% 65 | dplyr::mutate(sdgs = "") ## for later use, to append data to this column 66 | 67 | for (i in 1:nrow(SDG_keys)) { ## loop each SDG indicators 68 | sdg_i_str <- SDG_keys$SDG_id[i] %>% as.character() ## get the SDG id name 69 | sdg_i_obj <- SDG_keys$SDG_keywords[i] ## get the corresponding SDG search term list 70 | 71 | # print(sdg_i_str) 72 | # print(sdg_i_obj) 73 | 74 | code <- code %>% 75 | as.data.frame() %>% 76 | ## at the sentence level - count once if goals/targets are mentioned ------------- 77 | dplyr::mutate( 78 | match = ifelse( 79 | # grepl(pattern = sdg_i_obj, x = col, ignore.case = T, perl = T ), 1, 0 ) 80 | ###' option 2/ stringr::str_detect() uses stringi, which avoids common PCRE issues like match limit exceeded 81 | stringr::str_detect(col, regex(sdg_i_obj, ignore_case = TRUE)), 1, 0) 82 | ) %>% ## yes-1 or no-0 if they match 83 | dplyr::mutate( 84 | sdgs = ifelse(match > 0, paste0(sdgs, ",", sdg_i_str), sdgs) 85 | ) %>% 86 | dplyr::select(-match) %>% ## remove this column 87 | dplyr::mutate(sdgs = gsub("^,*|(?<=,),|,*$", "", sdgs, perl = T)) %>% 88 | as.data.frame() 89 | } 90 | 91 | 92 | ## --> if a dataframe ================================================================ 93 | } else { 94 | ## 95 | df <- x %>% 96 | dplyr::rename(col = {{ col }}) %>% 97 | dplyr::mutate(id = dplyr::row_number()) 98 | 99 | ## check the number of characters in the sentence 100 | df_nchar <- df %>% 101 | dplyr::mutate(nchr = stringr::str_length(col)) %>% 102 | dplyr::filter(nchr > 750) 103 | if (nrow(df_nchar) > 0) { 104 | message(paste0( 105 | "The length of your input text reached the limit in PCRE, ", 106 | "please split your input text in rows ", 107 | paste(unique(df_nchar$id), collapse = ", "), 108 | " into shorts ones for another try.", 109 | "Idealy, `nchar(x)` should smaller than 750. " 110 | )) 111 | } 112 | 113 | code <- df %>% 114 | dplyr::mutate(sdgs = "") ## for later use, to append data to this column 115 | 116 | for (i in 1:nrow(SDG_keys)) { ## loop each SDG indicators 117 | sdg_i_str <- SDG_keys$SDG_id[i] %>% as.character() ## get the SDG id name 118 | sdg_i_obj <- SDG_keys$SDG_keywords[i] ## get the corresponding SDG search term list 119 | 120 | # print(sdg_i_str) 121 | # print(sdg_i_obj) 122 | 123 | code <- code %>% 124 | as.data.frame() %>% 125 | ## at the sentence level - count once if goals/targets are mentioned --------------- 126 | dplyr::mutate( 127 | match = ifelse( 128 | # grepl(pattern = sdg_i_obj, x = as.character(col), ignore.case = T, perl = T), 1, 0 ) 129 | ###' option 2/ stringr::str_detect() uses stringi, which avoids common PCRE issues like match limit exceeded 130 | stringr::str_detect(col, regex(sdg_i_obj, ignore_case = TRUE)), 1, 0) 131 | ) %>% ## yes-1 or no-0 if they match 132 | dplyr::mutate( 133 | sdgs = ifelse(match > 0, paste0(sdgs, ",", sdg_i_str), sdgs) 134 | ) %>% 135 | dplyr::select(-match) %>% ## remove this column 136 | dplyr::mutate(sdgs = gsub("^,*|(?<=,),|,*$", "", sdgs, perl = T)) %>% 137 | as.data.frame() 138 | } 139 | } 140 | 141 | 142 | 143 | ### sort from most SDG hits to least (or, none) 144 | coded <- code %>% dplyr::arrange(desc(nchar(sdgs)), id) 145 | 146 | return(coded) 147 | } 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.05124/status.svg)](https://doi.org/10.21105/joss.05124) 2 | [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/SDGdetector)](https://CRAN.R-project.org/package=SDGdetector) 3 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 4 | [![CRAN downloadcount](https://cranlogs.r-pkg.org/badges/grand-total/SDGdetector)](https://cranlogs.r-pkg.org/badges/grand-total/SDGdetector) 5 | [![CRAN downloadcount](https://cranlogs.r-pkg.org/badges/SDGdetector)](https://cranlogs.r-pkg.org/badges/SDGdetector) 6 | ![Visitor Badge](https://visitor-badge.laobi.icu/badge?page_id=yingjieli.visitor-badge) 7 | 8 | 9 |

10 | 11 |

12 | 13 | # SDG Detector 14 | 15 | In 2015, leaders worldwide adopted 17 Sustainable Development Goals (SDGs) with 169 targets to be achieved by 2030 (https://sdgs.un.org). The framework of SDGs serves as a blueprint for shared prosperity for both people and the earth. `SDGdetector` identifies both direct and indirect expressions of SDGs and associated targets in chunks of text. It takes a data frame with a specified column of text to process as inputs, and outputs a data frame with original columns plus matched SDGs and targets. 16 | 17 | For Python package, check `seesus`. 18 | 19 | ## Installation 20 | 21 | There are two ways to install the **SDGdetector** R package. 22 | 23 | ### 1. CRAN 24 | 25 | **SDGdetector** is now available on 26 | [CRAN](https://CRAN.R-project.org/package=SDGdetector), so you can install it with: 27 | 28 | ``` r 29 | install.packages("SDGdetector") 30 | ``` 31 | 32 | ### 2. GitHub 33 | 34 | You can alternatively install the development version of **SDGdetector** from [GitHub](https://github.com/Yingjie4Science/SDGdetector) as follows: 35 | 36 | ``` r 37 | if (!require("remotes")) { 38 | install.packages("remotes") 39 | } 40 | 41 | remotes::install_github("Yingjie4Science/SDGdetector") 42 | ``` 43 | 44 | ## Example Usage 45 | 46 | **To detect SDGs from text** 47 | ``` r 48 | library(SDGdetector) 49 | 50 | ### string as input data 51 | text <- 'our goal is to mitigate climate change, end poverty, and reduce inequality globally' 52 | SDGdetector(x = text) 53 | 54 | ### dataframe as input data 55 | df <- data.frame(col = c( 56 | 'our goal is to end poverty globally', 57 | 'this product contributes to slowing down climate change')) 58 | SDGdetector(x = df, col = col) 59 | ``` 60 | 61 | **To detect regions/countries in text** 62 | ``` r 63 | x = 'China and USA devoted the largest efforts on solar energy' 64 | detect_region(x) 65 | ``` 66 | 67 | **To use specific SDG colors** 68 | ``` r 69 | sdg_color(1) 70 | sdg_color(x = 1:17) 71 | ``` 72 | 73 | 74 | **To use specific SDG icons** 75 | ``` r 76 | sdg_icon(x = 7, res = 300) 77 | ``` 78 | 79 | 80 | **To visualize SDG on a bar plot** 81 | ``` r 82 | data("sdgstat") 83 | df <- sdgstat 84 | 85 | # plot SDG on a bar plot 86 | plot_sdg_bar(data = df, sdg = SDG, value = Value) 87 | ``` 88 |

89 | 90 |

91 | 92 | **To visualize SDG on a map** 93 | ```r 94 | # plot SDG by country on a map 95 | plot_sdg_map(data = df, sdg = SDG, value = Value, country = Country, by_sdg = F) 96 | ``` 97 |

98 | 99 |

100 | 101 | ## Accuracy Evaluation 102 | 103 | This package has achieved high accuracy in detecting SDG-related statements within textual data (> 75%, measured by the alignment between the R package results and four experts' manually-coded results; see this [supplementary document](https://drive.google.com/file/d/14TV54YCQqKwO9j-abC2RuKpy9tKY7jXM/view?usp=sharing) for more information. The data and code for reproducing the performance metrics can be found in this repo under [`./docs/accuracy_evaluation/`](https://github.com/Yingjie4Science/SDGdetector/tree/main/docs/accuracy_evaluation). 104 | 105 |

106 | 107 |

108 | The overall accuracy (left) and the accuracy for each round of inspection (right). The red squared dot in the left plot indicates the mean value and the hollow round dots represent the accuracy values reported by each expert. 109 | 110 | 111 | ## License 112 | 113 | The SDGdetector **R** package is distributed under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html). 114 | 115 | 116 | ## How to Cite 117 | 118 | Get citation information for *SDGdetector* in R doing 119 | `citation(package = 'SDGdetector')` 120 | 121 | 122 | To cite SDGdetector in publications, please use: 123 | 124 | ``` 125 | Li, Y., Frans, V.F., Song, Y., Cai, M., Zhang, Y., Liu, J. (2023). SDGdetector: an R-based text mining tool for quantifying efforts toward Sustainable Development Goals. Journal of Open Source Software 8(84), 5124. https://doi.org/10.21105/joss.05124. 126 | ``` 127 | 128 | A BibTeX entry for LaTeX users is 129 | 130 | ``` 131 | @Article{, 132 | title = {SDGdetector: an R-based text mining tool for quantifying efforts toward Sustainable Development Goals}, 133 | author = {Yingjie Li and Veronica F. Frans and Yongze Song and Meng Cai and Yuqian Zhang and Jianguo Liu}, 134 | journal = {Journal of Open Source Software}, 135 | year = {2023}, 136 | volume = {8}, 137 | number = {84}, 138 | pages = {5124}, 139 | doi = {10.21105/joss.05124}, 140 | url = {https://github.com/Yingjie4Science/SDGdetector} 141 | } 142 | ``` 143 | 144 | 145 | ## Reporting Bugs 146 | 147 | *SDGdetector* is distributed as is and without warranty of suitability for application. If you encounter flaws with the software (i.e. bugs) please report the issue. Providing a detailed description of the conditions under which the bug occurred will help to identify the bug. *Use the [Issues tracker](https://github.com/Yingjie4Science/SDGdetector/issues) on GitHub to report issues with the software and to request feature enhancements.* 148 | 149 | -------------------------------------------------------------------------------- /docs/accuracy_evaluation/validation_analysis.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Text_mining" 3 | author: "Yingjie" 4 | date: "01/23/2022" 5 | output: html_document 6 | editor_options: 7 | chunk_output_type: inline 8 | --- 9 | 10 | 11 | This script aims to use human intelligence to evaluate the accuracy of the text mining results. 12 | For details, please refer to [this report](https://drive.google.com/file/d/1EHUV6Jc3N4A-IshKU4dbxtIqlfj50mzi/view) 13 | 14 | 15 | # Directories and packages 16 | 17 | ```{r include=FALSE} 18 | ### To clear your environment 19 | remove(list = ls()) 20 | 21 | 22 | # directories 23 | dir.this <- "./docs/accuracy_evaluation/" 24 | dir.data <- paste0(dir.this, "data/") 25 | dir.figures <- dir.this 26 | 27 | # load packages 28 | library(dplyr) 29 | library(readxl) 30 | library(writexl) 31 | 32 | library(tidyverse) # piping and wrangling 33 | library(ggplot2) 34 | library(ggpubr) 35 | ``` 36 | 37 | 38 | 39 | # Data 40 | 41 | We validated the accuracy by conducting three round of test. For each round, we asked 42 | four experts to identify if the machine-coded results are correct or not. 43 | 44 | The data are save in XLSX files under each round's folder. 45 | 46 | ``` 47 | 48 | ./docs/accuracy_evaluation/ 49 | | 50 | |__ data/ 51 | | | 52 | | |__ task1_R1_done/ 53 | | | 54 | | |__ task1_R2_done/ 55 | | | 56 | | |__ task1_R3_done/ 57 | | 58 | | 59 | |__ (results and figures) 60 | 61 | ``` 62 | 63 | ## 1. Clean data from each round 64 | 65 | First, we need to 66 | - read in and clean four coders' data 67 | - calculate the accuracy based on each coder's evaluation 68 | - check alignment across coders 69 | - save result in xlsx for visualization 70 | 71 | Please modify `nth_round` and run all the code in this section 72 | (i.e., Section ## 1. Clean data from each round) before running code in the 2nd section 73 | for visualization. In this case, you will need to run code in section 1 for three times. 74 | 75 | ```{r - read and clean} 76 | getwd() 77 | 78 | 79 | ## choose the round number and run all code in section 1. 80 | nth_round <- 1 81 | # nth_round <- 2 82 | # nth_round <- 3 83 | 84 | 85 | ## list data from this round of evaluation ----------------------------------------------- 86 | dir.task1 <- paste0(dir.data, "task1_R", nth_round, "_done") 87 | dir.task1 88 | f.ls <- list.files(path = dir.task1, pattern = "task1_", full.names = T, recursive = T) 89 | f.ls 90 | ## test line 91 | # f <- "./Data/data_TextMining/coded_validation_coder/planB_done/task1/task1_YL.xlsx" 92 | 93 | 94 | ## read in all the XLSX ------------------------------------------------------------------ 95 | ds <- data.frame() 96 | for (f in f.ls) { 97 | coder <- basename(f) %>% gsub("task1_|\\.xlsx", "", .) 98 | coder 99 | d <- readxl::read_excel(path = f) %>% 100 | dplyr::mutate(coder = coder, index = as.numeric(row.names(.))) 101 | ds <- rbind(ds, d) 102 | } 103 | 104 | unique(ds$correct_or_not) 105 | unique(ds$coder) 106 | 107 | 108 | ## unify the checking result to "0, 1, NA" only ------------------------------------------ 109 | ds0 <- ds %>% 110 | dplyr::mutate(coder = gsub(pattern = ".*_", "", coder)) %>% ## clean coder name 111 | dplyr::mutate(correct_or_not = gsub(pattern = "\\*", "", correct_or_not)) %>% ## clean coder's response 112 | dplyr::mutate(correct = case_when( 113 | correct_or_not == "1" ~ 1, 114 | correct_or_not == "0" ~ 0, 115 | is.na(correct_or_not) ~ NA_real_, 116 | grepl("^Y", correct_or_not) ~ 1, 117 | TRUE ~ 0 118 | )) %>% 119 | dplyr::select(coder, correct, correct_or_not, everything()) 120 | 121 | 122 | ds1 <- ds0 %>% 123 | arrange(correct_or_not) 124 | 125 | 126 | 127 | ## to check the alignment across coders -------------------------------------------------- 128 | ds2 <- ds1 %>% 129 | dplyr::select(coder, correct, company, index) %>% 130 | ## only analyze rows being checked 131 | # dplyr::filter(!is.na(correct)) %>% 132 | spread(key = coder, value = correct) %>% 133 | dplyr::mutate( 134 | same12 = ifelse(MC == VF, 1, 0), 135 | same13 = ifelse(MC == YL, 1, 0), 136 | same14 = ifelse(MC == YZ, 1, 0), 137 | same23 = ifelse(VF == YL, 1, 0), 138 | same24 = ifelse(VF == YZ, 1, 0), 139 | same34 = ifelse(YL == YZ, 1, 0), 140 | same3 = ifelse(MC == YL & YZ == YL, 1, 0), 141 | same4 = ifelse(MC == YL & YZ == YL & VF == YL, 1, 0) 142 | ) %>% 143 | as.data.frame() 144 | ``` 145 | 146 | 147 | 148 | ```{r - accuracy} 149 | names(ds2) 150 | 151 | ### The accuracy of our SDGdetector judged by SDG experts 152 | ds_accuracy <- ds2 %>% 153 | dplyr::select(company, index, MC, VF, YL, YZ) %>% 154 | gather(key = "coder", value = "value", 3:ncol(.)) %>% 155 | dplyr::group_by(coder) %>% 156 | dplyr::summarise(accuracy = sum(value, na.rm = T) / nrow(ds2)) %>% 157 | dplyr::mutate(id = row.names(.)) %>% 158 | dplyr::select(id, everything()) 159 | ds_accuracy 160 | 161 | 162 | ### test code 163 | # (sum(ds2$MC, na.rm = T)/nrow(ds2)) %>% 164 | # scales::percent() %>% 165 | # cat('accuracy by MC:', .) 166 | ``` 167 | 168 | 169 | 170 | 171 | ```{r - alignment across coders} 172 | names(ds2) 173 | 174 | ### The inter-expert reliability 175 | ds_intercoder <- ds2 %>% 176 | dplyr::select(company, index, same12, same13, same14, same23, same24, same34) %>% 177 | gather(key = "pair", value = "value", 3:ncol(.)) %>% 178 | dplyr::group_by(pair) %>% 179 | dplyr::summarise(alignment = sum(value, na.rm = T) / nrow(ds2)) 180 | 181 | ds_intercoder 182 | ``` 183 | 184 | 185 | 186 | ```{r - save result in xlsx} 187 | ## save to XLSX for easy read 188 | f <- paste0(dirname(dir.task1), "/", "results_accuracy_", basename(dir.task1), ".xlsx") 189 | f 190 | writexl::write_xlsx(ds_accuracy, path = f) 191 | 192 | f <- paste0(dirname(dir.task1), "/", "results_intercoder_", basename(dir.task1), ".xlsx") 193 | f 194 | writexl::write_xlsx(ds_intercoder, path = f) 195 | ``` 196 | 197 | 198 | 199 | ## 2. Visulzation of the results 200 | 201 | ```{r - read in results} 202 | ## for accuracy 203 | ind <- "results_accuracy_" 204 | fs <- list.files(path = dirname(dir.task1), pattern = ind, full.names = T) 205 | # f <- fs[1] 206 | dfs_acc <- data.frame() 207 | 208 | for (f in fs) { 209 | # print(f) 210 | round_id <- f %>% 211 | basename(.) %>% 212 | gsub(ind, "", .) %>% 213 | gsub("task1_|_done.xlsx", "", .) 214 | # print(round_id) 215 | 216 | acc <- readxl::read_excel(f) %>% 217 | dplyr::mutate(round_id = round_id) 218 | dfs_acc <- rbind(dfs_acc, acc) 219 | } 220 | 221 | 222 | 223 | ## for inter-coder 224 | ind <- "results_intercoder_" 225 | fs <- list.files(path = dirname(dir.task1), pattern = ind, full.names = T) 226 | # f <- fs[1] 227 | dfs_int <- data.frame() 228 | 229 | for (f in fs) { 230 | # print(f) 231 | round_id <- f %>% 232 | basename(.) %>% 233 | gsub(ind, "", .) %>% 234 | gsub("task1_|_done.xlsx", "", .) 235 | # print(round_id) 236 | 237 | int <- readxl::read_excel(f) %>% 238 | dplyr::mutate(round_id = round_id) 239 | dfs_int <- rbind(dfs_int, int) 240 | } 241 | ``` 242 | 243 | 244 | 245 | 246 | ```{r - plot accuracy} 247 | ## plot ---------------------------------------------------------------------------------- 248 | 249 | mean(dfs_acc$accuracy) 250 | fun_mean <- function(x) { 251 | return(data.frame(y = mean(x), label = round(mean(x, na.rm = T), digits = 2))) 252 | } 253 | 254 | p1 <- dfs_acc %>% 255 | ggplot(aes(x = 1, y = accuracy)) + 256 | geom_boxplot(fill = "gray", show.legend = F, varwidth = .5) + # , fill = round_id 257 | stat_summary(fun = mean, geom = "point", shape = 15, size = 3, alpha = 0.6, color = "red", fill = "red") + 258 | stat_summary(fun.data = fun_mean, geom = "text", vjust = .5, hjust = -0.2, color = "red") + 259 | scale_y_continuous(limits = c(0.7, .85)) + 260 | geom_jitter(color = "black", shape = 1, size = 2, alpha = 0.6, width = .05) + 261 | xlab("Overall") + 262 | ylab("Accuracy") + 263 | theme_bw() + 264 | theme( 265 | panel.grid = element_blank(), 266 | axis.text.x = element_text(colour = "transparent"), 267 | axis.ticks = element_blank(), 268 | legend.position = "none" 269 | ) 270 | p2 <- dfs_acc %>% 271 | ggplot(aes(x = round_id, y = accuracy)) + # , fill = round_id 272 | geom_boxplot(alpha = .8) + # 273 | scale_y_continuous(limits = c(0.7, .85)) + 274 | geom_jitter(color = "black", shape = 1, size = 2, alpha = 0.6, width = .05) + 275 | xlab("Each Round of Inspection") + 276 | ylab("") + 277 | theme_bw() + 278 | theme(legend.position = "none") 279 | p <- ggarrange(p1, p2, widths = c(1, 3)) 280 | 281 | fname <- paste0(dir.figures, "Inspection_Accuracy.png") 282 | fname 283 | ggsave(fname, plot = p, width = 6.5, height = 3.2, units = "in", dpi = 300) 284 | ``` 285 | 286 | 287 | ```{r - plot inter-coder} 288 | p3 <- dfs_int %>% 289 | ggplot(aes(x = 1, y = alignment)) + 290 | geom_boxplot(fill = "gray", show.legend = F, varwidth = .5) + # , fill = round_id 291 | stat_summary(fun = mean, geom = "point", shape = 15, size = 3, alpha = 0.6, color = "red", fill = "red") + 292 | stat_summary(fun.data = fun_mean, geom = "text", vjust = .7, hjust = -0.3, color = "red") + 293 | scale_y_continuous(limits = c(0.7, .9)) + 294 | geom_jitter(color = "black", shape = 1, size = 2, alpha = 0.6, width = .05) + 295 | xlab("Overall") + 296 | ylab("Inter-expert Reliability") + 297 | theme_bw() + 298 | theme( 299 | panel.grid = element_blank(), 300 | axis.text.x = element_text(colour = "transparent"), 301 | axis.ticks = element_blank(), 302 | legend.position = "none" 303 | ) 304 | 305 | 306 | p4 <- dfs_int %>% 307 | ggplot(aes(x = round_id, y = alignment)) + # , fill = round_id 308 | geom_boxplot(alpha = .8) + # 309 | scale_y_continuous(limits = c(0.7, .9)) + 310 | geom_jitter(color = "black", shape = 1, size = 2, alpha = 0.6, width = .05) + 311 | xlab("Each Round of Inspection") + 312 | ylab("") + 313 | theme_bw() + 314 | theme(legend.position = "none") 315 | p <- ggarrange(p3, p4, widths = c(1, 3)) 316 | 317 | fname <- paste0(dir.figures, "Inspection_InterExpert.png") 318 | fname 319 | ggsave(fname, plot = p, width = 6.5, height = 3.2, units = "in", dpi = 300) 320 | ``` 321 | 322 | 323 | 324 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "SDGdetector: an R-based text mining tool for quantifying efforts toward Sustainable Development Goals" 3 | tags: 4 | - Sustainability 5 | - Sustainable Development Goals (SDGs) 6 | - Text mining 7 | - Text analysis 8 | - R 9 | authors: 10 | - name: Yingjie Li 11 | orcid: 0000-0002-8401-0649 12 | affiliation: "1, 2, 3" 13 | corresponding: true 14 | - name: Veronica F. Frans 15 | orcid: 0000-0002-5634-3956 16 | affiliation: "1, 4, 5" 17 | - name: Yongze Song 18 | orcid: 0000-0003-3420-9622 19 | affiliation: 6 20 | - name: Meng Cai 21 | orcid: 0000-0002-8318-572X 22 | affiliation: "7, 8" 23 | - name: Yuqian Zhang 24 | orcid: 0000-0001-7576-2526 25 | affiliation: "1, 2" 26 | - name: Jianguo Liu 27 | orcid: 0000-0001-6344-0087 28 | affiliation: "1, 2" 29 | affiliations: 30 | - name: Center for Systems Integration and Sustainability, Department of Fisheries and Wildlife, Michigan State University, East Lansing, MI 48823, United States 31 | index: 1 32 | - name: Environmental Science and Policy Program, Michigan State University, East Lansing, MI 48823, United States 33 | index: 2 34 | - name: Natural Capital Project, Woods Institute for the Environment, Stanford University, Stanford, CA, 94305, United States 35 | index: 3 36 | - name: Ecology, Evolution, and Behavior Program, Michigan State University, East Lansing, MI 48824, United States 37 | index: 4 38 | - name: W.K. Kellogg Biological Station, Michigan State University, Hickory Corners, MI 49060, United States 39 | index: 5 40 | - name: School of Design and the Built Environment, Curtin University, Perth, WA, 6102, Australia 41 | index: 6 42 | - name: School of Planning, Design and Construction, Michigan State University, East Lansing, MI, 48824, United States 43 | index: 7 44 | - name: Department of Civil and Environmental Engineering, Technical University of Darmstadt, Darmstadt 64287, Germany 45 | index: 8 46 | date: 25 March 2023 47 | bibliography: paper.bib 48 | 49 | --- 50 | 51 | # Summary 52 | 53 | The global interest in moving towards a sustainable future has grown exponentially at all levels. The United Nations’ Sustainable Development Goals (SDGs), adopted by world leaders in 2015, provide an integrated framework to track progress toward sustainability [@un_global_2019]. Textual data, such as public statements posted on websites, organization reports, and scientific publications, is a rich source for evaluating the planned and ongoing efforts, as well as achievements towards sustainability. However, no computational tool exists to date that can accurately and efficiently identify SDG-related statements from these large amounts of text data. To fill this gap, we developed the ***SDGdetector*** package in R [@r_core_team_r_2021] to map textual data to specific goals and targets under the UN SDG framework for quantitative analysis. This is the first open-source, high-resolution, and high-accuracy analytical package that can identify which and how many SDG goals and targets are declared in any type of text-based data frame or corpus. This package thus enables a unique way to monitor individuals' and organizations' commitments and efforts towards advancing the 17 SDGs and 169 associated targets. 54 | 55 | # Statement of need 56 | 57 | The Sustainable Development Goals (SDGs) agenda, adopted by all United Nations Member States in 2015, provides a shared blueprint for nations, cities, corporations, research institutions, and individuals to track and plan their contributions to social, economic, and environmental transformations [@un_global_2019]. Although considerable efforts and contributions have been made to use existing statistical data for SDG assessments, half of the 231 indicators listed in the global indicator framework for SDGs lack either established methodologies or available data for measuring and implementing the goals ([https://unstats.un.org/sdgs/iaeg-sdgs/tier-classification](https://unstats.un.org/sdgs/iaeg-sdgs/tier-classification)). As a complement to the commonly used statistical data, textual data (e.g., websites, organization or government reports, and scientific publications) are rarely considered but show great potential for becoming a rich and important data source to narrow this existing SDG data gap [@cai_natural_2021; @chang_accelerating_2021]. For example, by identifying SDG commitments and contributions in text from legally-binding corporate annual reports, one can evaluate which SDGs are being mentioned (directly or indirectly) and to what extent corporations are moving towards them. Or, published research papers could also be evaluated to link research institutions’ commitments to SDG progress. Manually reviewing and matching text corpora to specific SDGs or targets can be extremely time-consuming and costly. In addition, though conventional manual coding may achieve high accuracy, it faces precision issues because of intercoder reliability challenges. This is especially an issue when attempting to objectively classify and map massive data into tens and hundreds of topic categories (e.g., the 169 SDG targets). To address these challenges, we developed the ***SDGdetector*** package, which automates the text analysis process via a text mining approach (Figure 1). 58 | 59 | ![](figure1.png) 60 | 61 | **Figure 1.** Flowchart for identifying SDG-related statements from textual data. 62 | 63 | The SDGdetector package was developed by (1) compiling six existing databases on SDG search queries [@un_global_2019; @duran-silva_controlled_2019; @jayabalasingham_identifying_2019; @vanderfeesten_search_2020; @schubert_scientific_2020; @bautista-puig_unveiling_2019; @wulff_text2sdg_2021]; (2) reviewing all SDG targets and indicators [@un_global_2019] to manually refine and update the search terms to create query dictionaries at the levels of the 17 SDGs and the 169 SDG targets (which correspond to the 231 SDG indicators); (3) manually assessing and improving the accuracy of these queries using thousands of randomly-selected statements from real-world corporate annual reports across multiple iterations; and (4) turning these queries into a lexical database for text mining across large bodies of text and tabulating the matched SDGs and SDG targets. 64 | 65 | SDGdetector is a unique tool because it is by far the only one available that is equipped with a database for detecting SDG-relevant statements at the target level. We are aware of another useful R package (*text2sdg*)[@wulff_text2sdg_2021], which mostly uses single words as search terms and was designed to only map text to SDGs at the goal level (coarser resolution). Our search queries in the comprehensive database further considered sentence structure to reduce noise hits, and can capture hits at both goal and target levels. In combination with this database, the text mining approach, an artificial intelligence (AI) technology, enables us to use natural language processing to transform the unstructured text within documents into normalized and structured data suitable for analysis and visualization. After repeated validation and calibration, this package has achieved high accuracy in detecting SDG-related statements within textual data (> 75.5%, measured by the alignment between the R package results and four experts’ manually-coded results; see the "Accuracy Evaluation" section on [GitHub](https://github.com/Yingjie4Science/SDGdetector) for more information). Complete data and code necessary for reproducing this accuracy evaluation can be found on our GitHub repository under the [`./docs/accuracy_evaluation/`](https://github.com/Yingjie4Science/SDGdetector/tree/main/docs/accuracy_evaluation) folder. Other similar tools, such as the *text2sdg*, however, did not report any accuracy evaluations. 66 | 67 | This lightweight package has great potential to be useful in many disciplines with objectives to identify which SDGs and to what extent an entity is putting effort into them. This package can be used in large-scale research projects in the field of corporate sustainability and urban science. It can also be used in systematic reviews and syntheses of published literature and patents. The associated lexical database embedded within this R package can be also used for developing similar applications in Python or other programming languages. 68 | 69 | # Functionality 70 | 71 | ***SDGdetector*** is an R [@r_core_team_r_2021] package that provides functions for three main tasks: 72 | 73 | (1) detecting whether a reported action aligns with any specific Goals (among the 17 SDGs) and Targets (among the 169 targets) under the Global indicator framework for Sustainable Development Goals [@un_global_2019]. 74 | 75 | (2) estimating the priorities of sustainability contributions by counting how frequently a particular Goal or Target is mentioned in the text report. 76 | 77 | (3) detecting which countries or regions are mentioned along with the SDG statements. For global studies, this function provides a means to show where the SDG efforts could be possibly implemented or have been planned. 78 | 79 | The package is based on the tidyverse [@wickham_welcome_2019] framework and is available on GitHub [https://github.com/Yingjie4Science/SDGdetector](https://github.com/Yingjie4Science/SDGdetector). 80 | 81 | # Usage 82 | 83 | (1) Data preparation. Textual data can come from a variety of sources, such as PDF files, HTML webpages, TXT, or Microsoft Word documents. The unit of text can be a clause, a sentence, or a paragraph. For the best accuracy, we suggest users split a large chunk of text into sentence or clause levels for analysis. Users can use our function *pdf2text()* or self-defined functions to extract textual data from PDF files, clean the text, split the text into sentences, and format the data in a dataframe. 84 | 85 | (2) Detect SDG goals and targets. The input can be a single sentence, or a dataframe that contains many rows of sentences. If the input is a dataframe, users should designate which column to be used for SDG detection. 86 | 87 | ```r 88 | # load package 89 | library(SDGdetector) 90 | 91 | # a string as the input 92 | text <- 'our goal is to mitigate climate change, end poverty, and reduce 93 | inequality globally' 94 | SDGdetector(x = text) 95 | 96 | # a dataframe as the input 97 | df <- data.frame(col = c( 98 | 'our goal is to end poverty globally', 99 | 'this product contributes to slowing down climate change')) 100 | SDGdetector(x = df, col = col) 101 | ``` 102 | 103 | In addition to the lexical database included in the ***SDGdetector*** package, users can also add customized search queries to the lexical database. 104 | 105 | ```r 106 | # A list of terms used to determine whether a sentence relates to SDG efforts 107 | terms_new <- c("improve", "farmer", "income") 108 | 109 | # Use *AND* operator to combine the terms and generate a customized search query 110 | # (or called a matching pattern); 111 | # then add the query to the existing lexical database 112 | add_sdg_pattern(sdg_id = 'SDG1_2', x = terms_new, operator = 'AND') 113 | ``` 114 | 115 | (3) Detect countries or regions. To understand where the SDG efforts are implemented or planned, users can use the function *detect_region()*. The result will return a list of country names in the ISO 3166-1 alpha-3 – three-letter country codes format. 116 | 117 | ```r 118 | text = 'China and USA devoted the largest efforts on solar energy' 119 | detect_region(x = text) 120 | ``` 121 | 122 | (4) Results and visualization. Users can summarize the detected SDG goals and targets by report (or organization), by Goal, and by region. The package provides methods for visualizing the SDG frequency, via its family of *plot_sdg_** functions. For instance, 123 | 124 | ```r 125 | data("sdgstat") 126 | df <- sdgstat 127 | 128 | # plot SDG on a bar plot 129 | plot_sdg_bar(data = df, sdg = SDG, value = Value) 130 | ``` 131 | ![](../docs/images/example_plots/plot_sdg_bar_example.png) 132 | 133 | ```r 134 | # plot SDG by country on a map 135 | plot_sdg_map(data = df, sdg = SDG, value = Value, country = Country, by_sdg = F) 136 | ``` 137 | ![](../docs/images/example_plots/plot_sdg_map_example.png) 138 | 139 | # Acknowledgements 140 | 141 | The authors acknowledge contributions from UN Global Sustainability Index Institute (UNGSII) Foundation during the genesis of this project. We thank Racheline Maltese for her input in developing the SDG search terms during the early stages of this project. This work was funded by the National Science Foundation (grant numbers: DEB-1924111, OAC-2118329). VFF was supported by the National Science Foundation Graduate Research Fellowship Program (Fellow ID: 2018253044) and the Michigan State University Enrichment Fellowship. 142 | 143 | # References 144 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | 2 | @techreport{un_global_2019, 3 | title = {Global indicator framework for the {Sustainable} {Development} {Goals} and targets of the 2030 {Agenda} for {Sustainable} {Development}}, 4 | url = {https://unstats.un.org/sdgs/indicators/indicators-list/}, 5 | urldate = {2020-02-24}, 6 | author = {UN}, 7 | year = {2019}, 8 | keywords = {SDGdetector}, 9 | file = {SDG Indicators — SDG Indicators:C\:\\Users\\pc\\Zotero\\storage\\295ZMZGW\\indicators-list.html:text/html}, 10 | } 11 | 12 | @article{jayabalasingham_identifying_2019, 13 | title = {Identifying research supporting the {United} {Nations} {Sustainable} {Development} {Goals}}, 14 | volume = {1}, 15 | url = {https://data.mendeley.com/datasets/87txkw7khs/1}, 16 | doi = {10.17632/87txkw7khs.1}, 17 | abstract = {In an effort to identify research that supports the UN SDGs, Elsevier has generated a set of Scopus queries related to each of the SDGs. In this dataset, you will find documentation describing how each of the Scopus queries were created along with a collated list of the queries.}, 18 | language = {en}, 19 | urldate = {2021-06-18}, 20 | author = {Jayabalasingham, Bamini and Boverhof, Roy and Agnew, Kevin and Klein, L.}, 21 | month = oct, 22 | year = {2019}, 23 | note = {ZSCC: 0000009 24 | Publisher: Mendeley}, 25 | keywords = {SDGdetector}, 26 | file = {Snapshot:C\:\\Users\\pc\\Zotero\\storage\\Y522Y4ZE\\1.html:text/html}, 27 | } 28 | 29 | @misc{r_core_team_r_2021, 30 | address = {Vienna, Austria}, 31 | title = {R: {A} language and environment for statistical computing}, 32 | shorttitle = {R}, 33 | url = {https://www.R-project.org/}, 34 | author = {R Core Team,}, 35 | year = {2021}, 36 | note = {ZSCC: NoCitationData[s1]}, 37 | keywords = {SDGdetector}, 38 | } 39 | 40 | @inproceedings{bautista-puig_unveiling_2019, 41 | address = {Rome, Italy}, 42 | title = {Unveiling the path towards sustainability: is there a research interest on sustainable goals?}, 43 | volume = {II}, 44 | isbn = {978-88-338-1118-5}, 45 | shorttitle = {Unveiling the path towards sustainability}, 46 | booktitle = {{ISSI}}, 47 | author = {Bautista-Puig, Núria and Mauleón, Elba}, 48 | year = {2019}, 49 | keywords = {SDGdetector}, 50 | pages = {2770--2771}, 51 | } 52 | 53 | @unpublished{cai_sustainable_2022, 54 | type = {in revision}, 55 | title = {Sustainable development goals in smart city implementation}, 56 | author = {Cai, Meng and Li and Huang, Huiqing and Decaminada, T and Kassens-noor, E}, 57 | year = {2022}, 58 | keywords = {SDGdetector}, 59 | } 60 | 61 | @misc{duran-silva_controlled_2019, 62 | title = {A controlled vocabulary defining the semantic perimeter of {Sustainable} {Development} {Goals}}, 63 | url = {https://zenodo.org/record/3567769}, 64 | abstract = {A set of controlled terms that define the scope and breadth of Sustainable Development Goals (SDGs) as defined by the United Nations.  These terms may be used to tag and index textual records in accordance with SDGs. The vocabulary is constructed by means of the following steps: An initial set of terms per SDG target is built by extracting key terms from the UN official list of Goals, Targets and Indicators The list is manually enriched by performing a review of the literature produced around SDGs and by compiling lists of pertinent words per Target mentioned by the reviewed documents A reference textual corpus is downloaded by searching for the initial set terms defined at step 1. and 2. The corpus is used to train a Word2Vec word embedding model (a machine learning model based on neural networks). The terms’ list is then enriched by means of automatic methods, which are run in parallel: The trained Word2Vec model is used to select, among the indexed keywords of the reference corpus, all terms “semantically close” to the initial set of words. This step is carried out to select terms that might not appear in the texts themselves, but that were deemed pertinent to label the textual records. Further terms that are mentioned in the texts of the reference corpus and that are valued by the trained Word2Vec model as “semantically close” to the initial set of words are also retained. This step is performed to include in the controlled vocabulary a series of terms that are related to the focus of the SDGs and which are used by practitioners. An automated algorithm is used to retrieve, from the APIs of WikiPedia a series of terms that have some categorical relationships (i.e. those that are indexed as “a broader concept of”, or “equivalent to” in DBpedia) with the initial set of words. The final list produced by steps 1-4 s finally manually revised}, 65 | language = {eng}, 66 | urldate = {2022-10-24}, 67 | publisher = {Zenodo}, 68 | author = {Duran-Silva, Nicolau and Fuster, Enric and Massucci, Francesco Alessandro and Quinquillà, Arnau}, 69 | month = dec, 70 | year = {2019}, 71 | doi = {10.5281/zenodo.3567769}, 72 | note = {Type: dataset}, 73 | keywords = {SDGdetector}, 74 | file = {Zenodo Snapshot:C\:\\Users\\pc\\Zotero\\storage\\GMEWK9SM\\3567769.html:text/html}, 75 | } 76 | 77 | @unpublished{kassens-noor_urban_2022, 78 | type = {in revision}, 79 | title = {Urban {scAInce}: explaining why and how cities transform through artificial intelligence}, 80 | author = {Kassens-Noor, E}, 81 | year = {2022}, 82 | keywords = {SDGdetector}, 83 | } 84 | 85 | @unpublished{li_global_2022, 86 | type = {in revision}, 87 | title = {Global {Business} {Giants}’ {Commitment} to {Sustainable} {Development} {Goals}}, 88 | author = {Li, Yingjie and Frans, V and Zhang, Yuqian and Cai, Meng and Chen, Ruishan}, 89 | year = {2022}, 90 | keywords = {SDGdetector}, 91 | } 92 | 93 | @techreport{schubert_scientific_2020, 94 | address = {Stockholm, Sweden}, 95 | title = {Scientific publications on sustainable development}, 96 | url = {https://www.su.se/polopoly_fs/1.530251.1607009534!/menu/standard/file/sdg-publikationer-2010-2019_gabor_rev3.pdf}, 97 | language = {Swedish}, 98 | institution = {Stockholm University Library}, 99 | author = {Schubert, G}, 100 | year = {2020}, 101 | keywords = {SDGdetector}, 102 | } 103 | 104 | @misc{vanderfeesten_search_2020, 105 | title = {Search {Queries} for "{Mapping} {Research} {Output} to the {Sustainable} {Development} {Goals} ({SDGs})" v5.0}, 106 | url = {https://zenodo.org/record/3817445}, 107 | abstract = {This package contains machine readable (xml) search queries, for the Scopus publication database, to find domain specific research output that are related to the 17 Sustainable Development Goals (SDGs). [ SDG QUERIES PAGES ] [ PROJECT WEBSITE ] [ FORK ON GITHUB ] Sustainable Development Goals are the 17 global challenges set by the United Nations. Within each of the goals specific targets and indicators are mentioned to monitor the progress of reaching those goals by 2030. In an effort to capture how research is contributing to move the needle on those challenges, we earlier have made an initial classification model than enables to quickly identify what research output is related to what SDG. (This Aurora SDG dashboard is the initial outcome as proof of practice.) The initiative started from the Aurora Universities Network in 2017, in the working group "Societal Impact and Relevance of Research", to investigate and to make visible 1. what research is done that are relevant to topics or challenges that live in society (for the proof of practice this has been scoped down to the SDGs), and 2. what the effect or impact is of implementing those research outcomes to those societal challenges (this also have been scoped down to research output being cited in policy documents from national and local governments an NGO's). The classification model we have used are 17 different search queries on the Scopus database. The search queries are elegant constructions with keyword combinations and boolean operators, in the syntax specific to the Scopus Query Language. We have used Scopus because it covers more research area's that are relevant to the SDG's, and we could filter much easier the Aurora Institutions. Versions Different versions of the search queries have been made over the past years to improve the precision (soundness) and recall (completeness) of the results. The queries have been made in a team effort by several bibliometric experts from the Aurora Universities. Each one did two or 3 SDG's, and than reviewed each other's work. v1.0 January 2018 Initial 'strict' version. In this version only the terms were used that appear in the SDG policy text of the targets and indicators defined by the UN. At this point we have been aware of the SDSN Compiled list of keywords, and used them as inspiration. Rule of thumb was to use keyword-combination searches as much as possible rather than single-keyword searches, to be more precise rather than to yield large amounts of false positive papers. Also we did not use the inverse or 'NOT' operator, to prevent removing true positives from the result set. This version has not been reviewed by peers. Download from: GitHub / Zenodo v2.0 March 2018 Reviewed 'strict' version. Same as version 1, but now reviewed by peers. Download from: GitHub / Zenodo v3.0 May 2019 'echo chamber' version. We noticed that using strictly the terms that policy makers of the UN use in the targets and indicators, that much of the research that did not use that specific terms was left out in the result set. (eg. "mortality" vs "deaths") To increase the recall, without reducing precision of the papers in the results, we added keywords that were obvious synonyms and antonyms to the existing 'strict' keywords. This was done based on the keywords that appeared in papers in the result set of version 2. This creates an 'echo chamber', that results in more of the same papers. Download from: GitHub / Zenodo v4.0 August 2019 uniform 'split' version. Over the course of the years, the UN changed and added Targets and indicators. In order to keep track of if we missed a target, we have split the queries to match the targets within the goals. This gives much more control in maintenance of the queries. Also in this version the use of brackets, quotation marks, etc. has been made uniform, so it also works with API's, and not only with GUI's. His version has been used to evaluate using a survey, to get baseline measurements for the precision and recall. Published here: Survey data of "Mapping Research output to the SDGs" by Aurora Universities Network (AUR) doi:10.5281/zenodo.3798385. Download from: GitHub / Zenodo v5.0 June 2020 'improved' version. In order to better reflect academic representation of research output that relate to the SDG's, we have added more keyword combinations to the queries to increase the recall, to yield more research papers related to the SDG's, using academic terminology. We mainly used the input from the Survey data of "Mapping Research output to the SDGs" by Aurora Universities Network (AUR) doi:10.5281/zenodo.3798385. We ran several text analyses: Frequent term combination in title and abstracts from Suggested papers, and in selected (accepted) papers, suggested journals, etc.found in this data set Spielberg, Eike, \& Hasse, Linda. (2020). Text Analyses of Survey Data on "Mapping Research Output to the Sustainable Development Goals (SDGs)" (Version 1.0) [Data set]. Zenodo http://doi.org/10.5281/zenodo.3832090 .  Secondly we got inspiration out of the Elsevier SDG queries Jayabalasingham, Bamini; Boverhof, Roy; Agnew, Kevin; Klein, Lisette (2019), “Identifying research supporting the United Nations Sustainable Development Goals”, Mendeley Data, v1 https://dx.doi.org/10.17632/87txkw7khs.1. And thirdly we got inspiration from this controlled vocabulary containing closely related terms. Duran-Silva, Nicolau, Fuster, Enric, Massucci, Francesco Alessandro, \& Quinquillà, Arnau. (2019). A controlled vocabulary defining the semantic perimeter of Sustainable Development Goals (Version 1.2) [Data set]. Zenodo. doi.org/10.5281/zenodo.3567769 Download from: GitHub / Zenodo Contribute and improve the SDG Search Queries We welcome you to join the Github community and to fork, improve and make a pull request to add your improvements to the new version of the SDG queries. https://aurora-network-global.github.io/sdg-queries/}, 108 | urldate = {2022-10-24}, 109 | publisher = {Zenodo}, 110 | author = {Vanderfeesten, Maurice and Otten, René and Spielberg, Eike}, 111 | month = jul, 112 | year = {2020}, 113 | doi = {10.5281/zenodo.3817445}, 114 | keywords = {SDGdetector}, 115 | file = {Zenodo Snapshot:C\:\\Users\\pc\\Zotero\\storage\\7ZHIYT56\\3817445.html:text/html}, 116 | } 117 | 118 | @article{wickham_welcome_2019, 119 | title = {Welcome to the {Tidyverse}}, 120 | volume = {4}, 121 | issn = {2475-9066}, 122 | url = {https://joss.theoj.org/papers/10.21105/joss.01686}, 123 | doi = {10.21105/joss.01686}, 124 | abstract = {Wickham et al., (2019). Welcome to the Tidyverse. Journal of Open Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686}, 125 | language = {en}, 126 | number = {43}, 127 | urldate = {2022-10-24}, 128 | journal = {Journal of Open Source Software}, 129 | author = {Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and François, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and Kuhn, Max and Pedersen, Thomas Lin and Miller, Evan and Bache, Stephan Milton and Müller, Kirill and Ooms, Jeroen and Robinson, David and Seidel, Dana Paige and Spinu, Vitalie and Takahashi, Kohske and Vaughan, Davis and Wilke, Claus and Woo, Kara and Yutani, Hiroaki}, 130 | month = nov, 131 | year = {2019}, 132 | keywords = {SDGdetector}, 133 | pages = {1686}, 134 | file = {Snapshot:C\:\\Users\\pc\\Zotero\\storage\\RLB7UKZE\\joss.html:text/html;Wickham et al_2019_Welcome to the Tidyverse.pdf:G\:\\My Drive\\lib\\MEGAsync\\Wickham et al_2019_Welcome to the Tidyverse.pdf:application/pdf}, 135 | } 136 | 137 | @misc{wulff_text2sdg_2021, 138 | title = {text2sdg: {Detecting} {UN} {Sustainable} {Development} {Goals} in {Text}}, 139 | shorttitle = {text2sdg}, 140 | url = {https://zenodo.org/record/5553980}, 141 | abstract = {The United Nations’ Sustainable Development Goals (SDGs) have become an important guideline for organisations to monitor and plan their contributions to social, economic, and environmental transformations. The text2sdg package is an open-source analysis package that identifies SDGs in text using scientifically developed query systems, opening up the opportunity to monitor any type of text-based data, such as scientific output or corporate publications.}, 142 | urldate = {2022-10-24}, 143 | publisher = {Zenodo}, 144 | author = {Wulff, Dirk U. and Meier, Dominik S.}, 145 | month = oct, 146 | year = {2021}, 147 | doi = {10.5281/zenodo.5553980}, 148 | keywords = {SDGdetector}, 149 | file = {Zenodo Snapshot:C\:\\Users\\pc\\Zotero\\storage\\L5MAMSS6\\5553980.html:text/html}, 150 | } 151 | 152 | 153 | @article{oconnor_intercoder_2020, 154 | title = {Intercoder {Reliability} in {Qualitative} {Research}: {Debates} and {Practical} {Guidelines}}, 155 | volume = {19}, 156 | issn = {1609-4069}, 157 | shorttitle = {Intercoder {Reliability} in {Qualitative} {Research}}, 158 | url = {https://doi.org/10.1177/1609406919899220}, 159 | doi = {10.1177/1609406919899220}, 160 | abstract = {Evaluating the intercoder reliability (ICR) of a coding frame is frequently recommended as good practice in qualitative analysis. ICR is a somewhat controversial topic in the qualitative research community, with some arguing that it is an inappropriate or unnecessary step within the goals of qualitative analysis. Yet ICR assessment can yield numerous benefits for qualitative studies, which include improving the systematicity, communicability, and transparency of the coding process; promoting reflexivity and dialogue within research teams; and helping convince diverse audiences of the trustworthiness of the analysis. Few guidelines exist to help researchers negotiate the assessment of ICR in qualitative analysis. The current article explains what ICR is, reviews common arguments for and against its incorporation in qualitative analysis and offers guidance on the practical elements of performing an ICR assessment.}, 161 | language = {en}, 162 | urldate = {2022-11-17}, 163 | journal = {International Journal of Qualitative Methods}, 164 | author = {O{\textquoteright}Connor, Cliodhna and Joffe, Helene}, 165 | month = jan, 166 | year = {2020}, 167 | note = {Publisher: SAGE Publications Inc}, 168 | pages = {1609406919899220}, 169 | file = {O{\textquoteright}Connor et al_2020_Intercoder Reliability in Qualitative Research - Debates and Practical Guidelines.pdf:G\:\\My Drive\\lib\\MEGAsync\\O{\textquoteright}Connor et al_2020_Intercoder Reliability in Qualitative Research - Debates and Practical Guidelines.pdf:application/pdf}, 170 | } 171 | 172 | @article{cai_natural_2021, 173 | title = {Natural language processing for urban research: {A} systematic review}, 174 | volume = {7}, 175 | issn = {2405-8440}, 176 | shorttitle = {Natural language processing for urban research}, 177 | url = {https://www.sciencedirect.com/science/article/pii/S2405844021004278}, 178 | doi = {10.1016/j.heliyon.2021.e06322}, 179 | abstract = {Natural language processing (NLP) has shown potential as a promising tool to exploit under-utilized urban data sources. This paper presents a systematic review of urban studies published in peer-reviewed journals and conference proceedings that adopted NLP. The review suggests that the application of NLP in studying cities is still in its infancy. Current applications fell into five areas: urban governance and management, public health, land use and functional zones, mobility, and urban design. NLP demonstrates the advantages of improving the usability of urban big data sources, expanding study scales, and reducing research costs. On the other hand, to take advantage of NLP, urban researchers face challenges of raising good research questions, overcoming data incompleteness, inaccessibility, and non-representativeness, immature NLP techniques, and computational skill requirements. This review is among the first efforts intended to provide an overview of existing applications and challenges for advancing urban research through the adoption of NLP.}, 180 | language = {en}, 181 | number = {3}, 182 | urldate = {2021-05-18}, 183 | journal = {Heliyon}, 184 | author = {Cai, Meng}, 185 | month = mar, 186 | year = {2021}, 187 | note = {ZSCC: 0000001}, 188 | keywords = {Text mining, Natural language processing, Urban big data, Urban research}, 189 | pages = {e06322}, 190 | file = {Cai_2021_Heliyon_Natural language processing for urban research.pdf:G\:\\My Drive\\lib\\MEGAsync\\Cai_2021_Heliyon_Natural language processing for urban research.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\yingjiel\\Zotero\\storage\\DDPMF587\\S2405844021004278.html:text/html}, 191 | } 192 | 193 | @article{chang_accelerating_2021, 194 | title = {Accelerating {Mixed} {Methods} {Research} {With} {Natural} {Language} {Processing} of {Big} {Text} {Data}}, 195 | volume = {15}, 196 | issn = {1558-6898}, 197 | url = {https://doi.org/10.1177/15586898211021196}, 198 | doi = {10.1177/15586898211021196}, 199 | abstract = {Situations of catastrophic social change, such as COVID-19, raise complex, interdisciplinary research questions that intersect health, education, economics, psychology, and social behavior and require mixed methods research. The pandemic has been a quickly evolving phenomenon, which pressures the time necessary to perform mixed methods research. Natural language processing (NLP) is a promising solution that leverages computational approaches to analyze textual data in ?natural language.? The aim of this article is to introduce NLP as an innovative technology to assist with the rapid mixed methods analysis of textual big data in times of catastrophic change. The contribution of this article is illustrating how NLP is a type of mixed methods analysis and making recommendations for its use in mixed methods research.}, 200 | language = {en}, 201 | number = {3}, 202 | urldate = {2022-12-09}, 203 | journal = {Journal of Mixed Methods Research}, 204 | author = {Chang, Tammy and DeJonckheere, Melissa and Vydiswaran, V. G. Vinod and Li, Jiazhao and Buis, Lorraine R. and Guetterman, Timothy C.}, 205 | month = jul, 206 | year = {2021}, 207 | note = {Publisher: SAGE Publications}, 208 | pages = {398--412}, 209 | } 210 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | # load database 2 | load('./R/SDG_keys.RData') 3 | View(SDG_keys) 4 | load("~/Documents/Projects/findSDGs/R/SDG_keys.RData") 5 | # load database 6 | load('./R/SDG_keys.RData') 7 | load("~/Documents/Projects/findSDGs/R/SDG_keys.RData") 8 | # load database 9 | load('./R/SDG_keys.RData') 10 | sdg_detector <- function(dataframe, company_name) { 11 | code <- dataframe %>% 12 | dplyr::mutate(#match = 0, 13 | sdgs = '', ## for later use, to append data to this column 14 | n_total = 0, 15 | sdgs_n = '') 16 | for (i in 1:nrow(SDG_keys)){ ## all SDG indicators 17 | sdg_i_str <- SDG_keys$SDG_id[i] ## the SDG id name 18 | sdg_i_obj <- SDG_keys$SDG_keywords[i] ## the corresponding SDG search term list 19 | print(sdg_i_str) 20 | # print(sdg_i_obj) 21 | code <- code %>% as.data.frame() %>% 22 | ## at the sentence level - count once ---------------------- 23 | dplyr::mutate( 24 | match = ifelse( 25 | grepl(pattern = sdg_i_obj, x = statement, ignore.case = T, perl = T), 1, 0)) %>% ## yes-1 or no-0 if they match 26 | dplyr::mutate(sdgs = ifelse(match > 0, paste0(sdgs, ',', sdg_i_str), sdgs)) %>% 27 | ## at the sentence level - count all matches --------------- 28 | dplyr::mutate( 29 | n = str_count(string = statement, regex(pattern = sdg_i_obj, ignore_case = T)), 30 | n_total = n_total + n, 31 | sdgs_n = ifelse(n > 0, paste0(sdgs_n, ',', sdg_i_str, '-', n), sdgs_n)) %>% 32 | as.data.frame() 33 | } 34 | ### sort from most SDG hits to least (or, none) 35 | coded <- code %>% arrange(desc(nchar(sdgs)), id) 36 | ### save all the hits to xlsx for easier inspection. 37 | # fname <- paste0(dirpath, 'DF_coded/', company_name, '_coded.xlsx'); fname 38 | # writexl::write_xlsx(x = coded, path = fname) 39 | fname <- paste0('./data/output/', company_name, '_coded.csv'); fname 40 | readr::write_csv(x = coded, file = fname) 41 | fname <- paste0('./data/output/', company_name, '_coded.RData'); fname 42 | save(coded, file = fname) 43 | return(coded) 44 | } 45 | # load database 46 | load('./R/SDG_keys.RData') 47 | # load database 48 | load('./R/SDG_keys.RData') 49 | View(SDG_keys) 50 | # identify 17 Sustainable Development Goals and associated targets in text 51 | findSDGs <- function(df) { 52 | text_df <- df %>% dplyr::select(where(is.character)) 53 | colnames(text_df) <- "statement" 54 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 55 | for (i in 1:nrow(SDG_keys)) { 56 | sdg_id <- SDG_keys$SDG_id[i] # to add later 57 | target_id <- SDG_keys$target_id[i] 58 | target_key <- SDG_keys$SDG_keywords[i] 59 | match <- ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0) 60 | coded_df <- coded_df %>% 61 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 62 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 63 | as.data.frame() 64 | return(coded_df) 65 | } 66 | test_df <- data.frame(boo = c("and conference calls Quarterly financial updates and guidance Annual integrated report, annual reports, and mandatory filings Ongoing website updates and press releases Annual CEMEX Day investor event Understanding of CEMEXs financial position, performance, business perspectives, and risks Understanding of management and corporate ESG practices ACADEMIC INSTITUTIONS & NGOs Development of communities and regions Collaboration in applied research projects Engaging students Develop employees talent and awareness of global trends Innovation efforts Yearly collaborative research portfolio Bi-weekly Lighthouses talks Quarterly whitepapers Yearly best practices and methodologies playbooks Annual event to share the projects outcomes Hackathons co-organized with Universities Impact through collaborative projects to develop knowledge and solutions Build strategic partnerships with top universities Employee awareness of global trends Attract new talent for CEMEX COMMUNITIES Education and capability development Sustainable and resilient infrastructure and mobility Social and environmental innovation and entrepreneurship Health and safety, environment protection culture Human Rights Accountability and transparent communication Contribution to community quality of life and wellbeing Local employment opportunities Economic empowerment for women in communities Ongoing dialogues with community stakeholders Quarterly community panels, sessions, and meetings Annual open house days at operating sites Ongoing educational programs and training Participation in local career events Development of community infrastructure, volunteering, and social investment initiatives Development of inclusive business programs Creation of social and inclusive businesses Generation of empowerment and capacity-building Improvement to community infrastructure and wellbeing Development of learning opportunities Open communication and feedback Community transformation SUPPLIERS Health and safety Companys economic performance in mid and long term Fair business conditions Sustainability management practices Daily procurement interactions Ongoing capacity building programs Supplier sustainability guidance CEMEX Supplier Sustainability Program Inclusion of human rights, labor, antitrust, and sustainability clauses in our contracts and purchase orders Promotion of local suppliers GOVERNMENTS AND POLICY MAKERS Environmental impact mitigation and management Companys economic performance in mid and long term Health and safety Climate change and CO2 emissions policy Infrastructure and housing solutions Local employment opportunities Annual integrated report and conservation books Ongoing public policy discussions Long-term partnerships Working groups Periodic plant visits Events and conferences Collaboration and communication with governments and regulatory bodies Successful adaptations to new local, national, and regional regulations BUSINESS ASSOCIATIONS Active engagement and guidance Periodic meetings Annual conferences Ongoing working groups Ongoing research studies Development of coordinated initiatives with trade associations Best practices sharing COMMUNICATION MECHANISMS WITH OUR STAKEHOLDERS We outline the key concerns brought up by our materiality assessment and how we engage with our key stakeholders in the table below", "200 1 Company Overview 2 How We Create Value 3 Our Performance 4 Governance 5 Results in Detail 6 About this ReportContents ENVIRONMENTAL AND QUALITY MANAGEMENT 2016 2017 2018 Sites with ISO 9001 Certification (%) 45 46 47 Cement 79 82 84 Ready-mix 44 45 46 Aggregates 41 43 41 Environmental investment (US million) 80 83 83 Environmental incidents (No.) Category 1 (Major) 0 0 1 Category 2 (Moderate) 64 37 37 Category 3 (Minor) 224 216 238 Complaints 77 81 114 Social incidents (No.)10 80 Environmental fines above US$10,000 (No.) 1 2 2 Total Environmental fines (No.)11 51 56 37 Environmental fines above US$10,000 (US million) 0.02 0.08 0.44 Total Environmental fines (US million) 0.15 0.39 0.58 AIR QUALITY MANAGEMENT 2016 2017 2018 Clinker produced with continuous monitoring of major emissions (dust, NOX and SOX) (%) 84 92 98 Clinker produced with monitoring of major and minor emissions (dust, NOX, SOX, Hg, Cd, TI, VOC, PCDD/F) (%) 80 80 80 Absolute dust emissions (ton/year) 2,703 2,214 1,911 Specific dust emissions (g/ton clinker) 67 47 39 Absolute NOX emissions (ton/year) 76,552 57,973 56,228 Specific NOX emissions (g/ton clinker) 1,533 1,177 1,141 Absolute SOX emissions (ton/year) 13,089 10,399 11,543 Specific SOX emissions (g/ton clinker) 253 216 233 Annual reduction in dust emissions per ton of clinker from 2005 baseline (%) 78 84 87 Annual reduction in NOX emissions per ton of clinker from 2005 baseline (%) 26 43 45 Annual reduction in SOX emissions per ton of clinker from 2005 baseline (%) 61 67 64 WATER MANAGEMENT 2016 2017 2018 Total water withdrawals by source (million m3) 62.1 61.4 65.6 Surface water 22.5 22.3 18.9 Ground water 24.3 24.7 30.6 Municipal water 11.5 10.4 12.0 Rain water 0.4 0.4 0.3 Sea water 0.0 0.0 0.0 Other 3.4 3.7 3.8 WATER MANAGEMENT 2016 2017 2018 Total water discharge by destination (million m3) 18.6 22.6 24.7 Surface water 11.0 11.8 12.3 Ground water 6.8 10.2 11.1 Municipal water 0.8 0.5 1.0 Sea water 0.0 0.0 0.0 Other 0.0 0.2 0.2 Total water consumption (million m3) 43.5 38.7 40.9 Cement 15.5 14.3 14.6 Ready-mix 11.5 11.2 13.6 Aggregates 16.5 13.2 12.7 Specific water consumption Cement (l/ton) 247 235 232 Ready-mix (l/m3) 222 219 258 Aggregates (l/ton) 131 107 102 Sites with water recycling systems (%) 79 85 83 BIODIVERSITY MANAGEMENT 2016 2017 2018 Active sites with quarry rehabilitation plans (%) 94 97 96 Cement 90 94 92 Aggregates 96 98 97 Active quarries located within or adjacent to high biodiversity value areas (No.) 63 64 58 Cement 9 11 10 Aggregates 54 53 48 Active quarries located within or adjacent to high biodiversity value areas where Biodiversity Action Plans (BAPs) are implemented (%) 63 69 91 Cement 89 73 90 Aggregates 59 68 92 Quarry rehabilitation plans, Biodiversity Action Plans (BAPs), and third party certification (% from target quarries)5 65 CUSTOMERS AND SUPPLIERS 2016 2017 2018 Purchases sourced from locally-based suppliers (%) 95 89 90 Sustainability assessment executed by an independent party for our critical suppliers (% spend evaluated)12 44 Countries that conduct regular customer satisfaction surveys (%) 100 100 100 Net Promoter Score (NPS) (%)13 44 ETHICS AND COMPLIANCE 2016 2017 2018 Executives and employees actively aware of our Code of Ethics (%)14 77 77 77 Reports of alleged breaches to the Code of Ethics received by Local Ethics Committees (No.) 453 568 630 Ethics and compliance cases reported during the year that were investigated and closed (%) 68 73 77 Disciplinary actions taken as a result of reports of non-compliance with the Code of Ethics, other policies or the law (No.) 115 185 166 Target countries that participated on the Global Compliance Program (antitrust, antri-bribery and insider trading) (%) 100 100 100 Countries with local mechanisms to promote employee awareness of procedures to identify and report incidences of internal fraud, kickbacks, among others (%) 100 100 100 Investigated incidents reported and found to be true related to fraud, kick-backs among others corruption incidents to government officials (No.) 0 0 0 Implementation of Ethics and Compliance Continuous Improvement Program (%)5 50 GRI 102-48, GRI 102-49 Footnotes")) 67 | test_df <- data.frame(boo = c('and conference calls Quarterly financial updates and guidance Annual integrated report, annual reports, and mandatory filings Ongoing website updates and press releases Annual CEMEX Day investor event Understanding of CEMEXs financial position, performance, business perspectives, and risks Understanding of management and corporate ESG practices ACADEMIC INSTITUTIONS & NGOs Development of communities and regions Collaboration in applied research projects Engaging students Develop employees talent and awareness of global trends Innovation efforts Yearly collaborative research portfolio Bi-weekly Lighthouses talks Quarterly whitepapers Yearly best practices and methodologies playbooks Annual event to share the projects outcomes Hackathons co-organized with Universities Impact through collaborative projects to develop knowledge and solutions Build strategic partnerships with top universities Employee awareness of global trends Attract new talent for CEMEX COMMUNITIES Education and capability development Sustainable and resilient infrastructure and mobility Social and environmental innovation and entrepreneurship Health and safety, environment protection culture Human Rights Accountability and transparent communication Contribution to community quality of life and wellbeing Local employment opportunities Economic empowerment for women in communities Ongoing dialogues with community stakeholders Quarterly community panels, sessions, and meetings Annual open house days at operating sites Ongoing educational programs and training Participation in local career events Development of community infrastructure, volunteering, and social investment initiatives Development of inclusive business programs Creation of social and inclusive businesses Generation of empowerment and capacity-building Improvement to community infrastructure and wellbeing Development of learning opportunities Open communication and feedback Community transformation SUPPLIERS Health and safety Companys economic performance in mid and long term Fair business conditions Sustainability management practices Daily procurement interactions Ongoing capacity building programs Supplier sustainability guidance CEMEX Supplier Sustainability Program Inclusion of human rights, labor, antitrust, and sustainability clauses in our contracts and purchase orders Promotion of local suppliers GOVERNMENTS AND POLICY MAKERS Environmental impact mitigation and management Companys economic performance in mid and long term Health and safety Climate change and CO2 emissions policy Infrastructure and housing solutions Local employment opportunities Annual integrated report and conservation books Ongoing public policy discussions Long-term partnerships Working groups Periodic plant visits Events and conferences Collaboration and communication with governments and regulatory bodies Successful adaptations to new local, national, and regional regulations BUSINESS ASSOCIATIONS Active engagement and guidance Periodic meetings Annual conferences Ongoing working groups Ongoing research studies Development of coordinated initiatives with trade associations Best practices sharing COMMUNICATION MECHANISMS WITH OUR STAKEHOLDERS We outline the key concerns brought up by our materiality assessment and how we engage with our key stakeholders in the table below', '200 1 Company Overview 2 How We Create Value 3 Our Performance 4 Governance 5 Results in Detail 6 About this ReportContents ENVIRONMENTAL AND QUALITY MANAGEMENT 2016 2017 2018 Sites with ISO 9001 Certification (%) 45 46 47 Cement 79 82 84 Ready-mix 44 45 46 Aggregates 41 43 41 Environmental investment (US million) 80 83 83 Environmental incidents (No.) Category 1 (Major) 0 0 1 Category 2 (Moderate) 64 37 37 Category 3 (Minor) 224 216 238 Complaints 77 81 114 Social incidents (No.)10 80 Environmental fines above US$10,000 (No.) 1 2 2 Total Environmental fines (No.)11 51 56 37 Environmental fines above US$10,000 (US million) 0.02 0.08 0.44 Total Environmental fines (US million) 0.15 0.39 0.58 AIR QUALITY MANAGEMENT 2016 2017 2018 Clinker produced with continuous monitoring of major emissions (dust, NOX and SOX) (%) 84 92 98 Clinker produced with monitoring of major and minor emissions (dust, NOX, SOX, Hg, Cd, TI, VOC, PCDD/F) (%) 80 80 80 Absolute dust emissions (ton/year) 2,703 2,214 1,911 Specific dust emissions (g/ton clinker) 67 47 39 Absolute NOX emissions (ton/year) 76,552 57,973 56,228 Specific NOX emissions (g/ton clinker) 1,533 1,177 1,141 Absolute SOX emissions (ton/year) 13,089 10,399 11,543 Specific SOX emissions (g/ton clinker) 253 216 233 Annual reduction in dust emissions per ton of clinker from 2005 baseline (%) 78 84 87 Annual reduction in NOX emissions per ton of clinker from 2005 baseline (%) 26 43 45 Annual reduction in SOX emissions per ton of clinker from 2005 baseline (%) 61 67 64 WATER MANAGEMENT 2016 2017 2018 Total water withdrawals by source (million m3) 62.1 61.4 65.6 Surface water 22.5 22.3 18.9 Ground water 24.3 24.7 30.6 Municipal water 11.5 10.4 12.0 Rain water 0.4 0.4 0.3 Sea water 0.0 0.0 0.0 Other 3.4 3.7 3.8 WATER MANAGEMENT 2016 2017 2018 Total water discharge by destination (million m3) 18.6 22.6 24.7 Surface water 11.0 11.8 12.3 Ground water 6.8 10.2 11.1 Municipal water 0.8 0.5 1.0 Sea water 0.0 0.0 0.0 Other 0.0 0.2 0.2 Total water consumption (million m3) 43.5 38.7 40.9 Cement 15.5 14.3 14.6 Ready-mix 11.5 11.2 13.6 Aggregates 16.5 13.2 12.7 Specific water consumption Cement (l/ton) 247 235 232 Ready-mix (l/m3) 222 219 258 Aggregates (l/ton) 131 107 102 Sites with water recycling systems (%) 79 85 83 BIODIVERSITY MANAGEMENT 2016 2017 2018 Active sites with quarry rehabilitation plans (%) 94 97 96 Cement 90 94 92 Aggregates 96 98 97 Active quarries located within or adjacent to high biodiversity value areas (No.) 63 64 58 Cement 9 11 10 Aggregates 54 53 48 Active quarries located within or adjacent to high biodiversity value areas where Biodiversity Action Plans (BAPs) are implemented (%) 63 69 91 Cement 89 73 90 Aggregates 59 68 92 Quarry rehabilitation plans, Biodiversity Action Plans (BAPs), and third party certification (% from target quarries)5 65 CUSTOMERS AND SUPPLIERS 2016 2017 2018 Purchases sourced from locally-based suppliers (%) 95 89 90 Sustainability assessment executed by an independent party for our critical suppliers (% spend evaluated)12 44 Countries that conduct regular customer satisfaction surveys (%) 100 100 100 Net Promoter Score (NPS) (%)13 44 ETHICS AND COMPLIANCE 2016 2017 2018 Executives and employees actively aware of our Code of Ethics (%)14 77 77 77 Reports of alleged breaches to the Code of Ethics received by Local Ethics Committees (No.) 453 568 630 Ethics and compliance cases reported during the year that were investigated and closed (%) 68 73 77 Disciplinary actions taken as a result of reports of non-compliance with the Code of Ethics, other policies or the law (No.) 115 185 166 Target countries that participated on the Global Compliance Program (antitrust, antri-bribery and insider trading) (%) 100 100 100 Countries with local mechanisms to promote employee awareness of procedures to identify and report incidences of internal fraud, kickbacks, among others (%) 100 100 100 Investigated incidents reported and found to be true related to fraud, kick-backs among others corruption incidents to government officials (No.) 0 0 0 Implementation of Ethics and Compliance Continuous Improvement Program (%)5 50 GRI 102-48, GRI 102-49 Footnotes')) 68 | test_df <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 69 | 'environmental and ecosystem conservation (SDG 15)', 70 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 71 | test_df <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 72 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 73 | View(test_df) 74 | findSDGs(test_df) 75 | library(dplyr) 76 | # identify 17 Sustainable Development Goals and associated targets in text 77 | findSDGs <- function(df) { 78 | text_df <- df %>% dplyr::select(where(is.character)) 79 | colnames(text_df) <- "statement" 80 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 81 | for (i in 1:nrow(SDG_keys)) { 82 | sdg_id <- SDG_keys$SDG_id[i] # to add later 83 | target_id <- SDG_keys$target_id[i] 84 | target_key <- SDG_keys$SDG_keywords[i] 85 | match <- ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0) 86 | coded_df <- coded_df %>% 87 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 88 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 89 | as.data.frame() 90 | return(coded_df) 91 | } 92 | test_df <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 93 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 94 | findSDGs(test_df) 95 | target_key <- SDG_keys$SDG_keywords[i] 96 | coded_df <- coded_df %>% 97 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 98 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 99 | as.data.frame() 100 | return(coded_df) 101 | for (i in 1:nrow(SDG_keys)) { 102 | sdg_id <- SDG_keys$SDG_id[i] # to add later 103 | target_id <- SDG_keys$target_id[i] 104 | target_key <- SDG_keys$SDG_keywords[i] 105 | coded_df <- coded_df %>% 106 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 107 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 108 | as.data.frame() 109 | return(coded_df) 110 | } 111 | # identify 17 Sustainable Development Goals and associated targets in text 112 | findSDGs <- function(df) { 113 | text_df <- df %>% dplyr::select(where(is.character)) 114 | colnames(text_df) <- "statement" 115 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 116 | for (i in 1:nrow(SDG_keys)) { 117 | sdg_id <- SDG_keys$SDG_id[i] # to add later 118 | target_id <- SDG_keys$target_id[i] 119 | target_key <- SDG_keys$SDG_keywords[i] 120 | coded_df <- coded_df %>% 121 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 122 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 123 | as.data.frame() 124 | return(coded_df) 125 | } 126 | test_df <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 127 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 128 | findSDGs(test_df) 129 | coded_test <- findSDGs(test) 130 | # load database 131 | load('./R/SDG_keys.RData') 132 | library(dplyr) 133 | # identify 17 Sustainable Development Goals and associated targets in text 134 | findSDGs <- function(df) { 135 | text_df <- df %>% dplyr::select(where(is.character)) 136 | colnames(text_df) <- "statement" 137 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 138 | for (i in 1:nrow(SDG_keys)) { 139 | sdg_id <- SDG_keys$SDG_id[i] # to add later 140 | target_id <- SDG_keys$target_id[i] 141 | target_key <- SDG_keys$SDG_keywords[i] 142 | coded_df <- coded_df %>% 143 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 144 | dplyr::mutate(matched_target = ifelse(match == 1, paste0(matched_target, target_id, ', '), matched_target)) %>% 145 | as.data.frame() 146 | return(coded_df) 147 | } 148 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 149 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 150 | coded_test <- findSDGs(test) 151 | View(coded_test) 152 | # identify 17 Sustainable Development Goals and associated targets in text 153 | findSDGs <- function(df) { 154 | text_df <- df %>% dplyr::select(where(is.character)) 155 | colnames(text_df) <- "statement" 156 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 157 | for (i in 1:nrow(SDG_keys)) { 158 | sdg_id <- SDG_keys$SDG_id[i] # to add later 159 | target_id <- SDG_keys$target_id[i] 160 | target_key <- SDG_keys$SDG_keywords[i] 161 | coded_df <- coded_df %>% 162 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 163 | dplyr::mutate(matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 164 | as.data.frame() 165 | return(coded_df) 166 | } 167 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 168 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 169 | coded_test <- findSDGs(test) 170 | View(coded_test) 171 | test <- data.frame(boo=c("Our adherence to values evaluation invites direct supervisors, direct reports, and other employee stakeholders to share their perception of specific behaviors aligned with each one of our core values")) 172 | #test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 173 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 174 | test <- data.frame(boo=c("Our adherence to values evaluation invites direct supervisors, direct reports, and other employee stakeholders to share their perception of specific behaviors aligned with each one of our core values")) 175 | coded_test <- findSDGs(test) 176 | View(coded_test) 177 | # identify 17 Sustainable Development Goals and associated targets in text 178 | findSDGs <- function(df) { 179 | text_df <- df %>% dplyr::select(where(is.character)) # locate the column of text 180 | colnames(text_df) <- "statement" 181 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 182 | for (i in 1:nrow(SDG_keys)) { 183 | sdg_id <- SDG_keys$SDG_id[i] # to add later 184 | target_id <- SDG_keys$target_id[i] 185 | target_key <- SDG_keys$SDG_keywords[i] 186 | print(target_id) 187 | coded_df <- coded_df %>% 188 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 189 | dplyr::mutate(matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 190 | as.data.frame() 191 | return(coded_df) 192 | } 193 | test <- data.frame(boo=c("Our adherence to values evaluation invites direct supervisors, direct reports, and other employee stakeholders to share their perception of specific behaviors aligned with each one of our core values")) 194 | coded_test <- findSDGs(test) 195 | nrow(SDG_keys) 196 | # identify 17 Sustainable Development Goals and associated targets in text 197 | findSDGs <- function(df) { 198 | text_df <- df %>% dplyr::select(where(is.character)) # locate the column of text 199 | colnames(text_df) <- "statement" 200 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 201 | for (i in 1:nrow(SDG_keys)) { 202 | sdg_id <- SDG_keys$SDG_id[i] # to add later 203 | target_id <- SDG_keys$target_id[i] 204 | target_key <- SDG_keys$SDG_keywords[i] 205 | print(target_id) 206 | coded_df <- coded_df %>% 207 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 208 | dplyr::mutate(matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 209 | as.data.frame() 210 | } 211 | return(coded_df) 212 | } 213 | test <- data.frame(boo=c("Our adherence to values evaluation invites direct supervisors, direct reports, and other employee stakeholders to share their perception of specific behaviors aligned with each one of our core values")) 214 | coded_test <- findSDGs(test) 215 | View(coded_test) 216 | #test <- data.frame(boo=c("Our adherence to values evaluation invites direct supervisors, direct reports, and other employee stakeholders to share their perception of specific behaviors aligned with each one of our core values")) 217 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 218 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 219 | coded_test <- findSDGs(test) 220 | View(coded_test) 221 | # identify 17 Sustainable Development Goals and associated targets in text 222 | # input: a dataframe 223 | findSDGs <- function(df) { 224 | text_df <- df %>% dplyr::select(where(is.character)) # locate the column of text 225 | colnames(text_df) <- "statement" 226 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 227 | for (i in 1:nrow(SDG_keys)) { 228 | sdg_id <- SDG_keys$SDG_id[i] # to add later 229 | target_id <- SDG_keys$target_id[i] 230 | target_key <- SDG_keys$SDG_keywords[i] 231 | coded_df <- coded_df %>% 232 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 233 | dplyr::mutate(matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 234 | as.data.frame() 235 | } 236 | coded_df <- subset(coded_df, select = -match) 237 | return(coded_df) 238 | } 239 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 240 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 241 | coded_test <- findSDGs(test) 242 | View(coded_test) 243 | coded_df <- subset(coded_df, select = -match) 244 | # load database 245 | load('./R/SDG_keys.RData') 246 | library(dplyr) 247 | findSDGs <- function(df) { 248 | text_df <- df %>% dplyr::select(where(is.character)) # locate the column of text 249 | colnames(text_df) <- "statement" 250 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 251 | for (i in 1:nrow(SDG_keys)) { 252 | sdg_id <- SDG_keys$SDG_id[i] # to add later 253 | target_id <- SDG_keys$target_id[i] 254 | target_key <- SDG_keys$SDG_keywords[i] 255 | coded_df <- coded_df %>% 256 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 257 | dplyr::mutate(matched_sdg = ifelse(match > 0, paste0(matched_sdg, sdg_id, ', '), matched_sdg), 258 | matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 259 | as.data.frame() 260 | } 261 | coded_df <- subset(coded_df, select = -match) 262 | return(coded_df) 263 | } 264 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 265 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate')) 266 | coded_test <- findSDGs(test) 267 | View(coded_test) 268 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 269 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate', 270 | 'We are further leading the UN Global compact in Mexico, encouraging more companies to partner to contribute and explore business opportunities while reducing negative impact, and creating shared value to society')) 271 | coded_test <- findSDGs(test) 272 | View(coded_test) 273 | View(SDG_keys) 274 | library(dplyr) 275 | findSDGs <- function(df) { 276 | text_df <- df %>% dplyr::select(where(is.character)) # locate the column of text 277 | colnames(text_df) <- "statement" 278 | coded_df <- text_df %>% dplyr::mutate(matched_sdg = '', matched_target = '') 279 | #sdg_count = '', target_count = '' # to add later 280 | for (i in 1:nrow(SDG_keys)) { 281 | sdg_id <- SDG_keys$SDG_id[i] 282 | target_id <- SDG_keys$target_id[i] 283 | target_key <- SDG_keys$SDG_keywords[i] 284 | coded_df <- coded_df %>% 285 | dplyr::mutate(match = ifelse(grepl(pattern = target_key, x = statement, ignore.case = T, perl = T), 1, 0)) %>% 286 | dplyr::mutate(matched_sdg = ifelse(match > 0, paste0(matched_sdg, sdg_id, ', '), matched_sdg), 287 | matched_target = ifelse(match > 0, paste0(matched_target, target_id, ', '), matched_target)) %>% 288 | as.data.frame() 289 | } 290 | coded_df <- subset(coded_df, select = -match) 291 | return(coded_df) 292 | } 293 | test <- data.frame(boo=c('To this end, we tailor our products and services to suit our customers specific needsfrom home construction, improvement, and renovation to agricultural, industrial, and marine/hydraulic applications', 294 | 'define and carry out a worldwide survey across Working with a diverse group of stakeholderslistening to their concerns and managing our relations in a proactive and fruitful wayis crucial to understanding our ecosystem and maximizing our positive impact in the places where we operate', 295 | 'We are further leading the UN Global compact in Mexico, encouraging more companies to partner to contribute and explore business opportunities while reducing negative impact, and creating shared value to society')) 296 | coded_test <- findSDGs(test) 297 | View(coded_test) 298 | library(findSDGs) 299 | my_text <- data.frame(my_col=c('our goal is to end poverty globally', 'this product contributes to slowing down climate change')) 300 | findSDGs(my_text, my_col) 301 | findSDGs(df = my_text, col = my_col) 302 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU General Public License 2 | ========================== 3 | 4 | _Version 3, 29 June 2007_ 5 | _Copyright © 2007 Free Software Foundation, Inc. <>_ 6 | 7 | Everyone is permitted to copy and distribute verbatim copies of this license 8 | document, but changing it is not allowed. 9 | 10 | ## Preamble 11 | 12 | The GNU General Public License is a free, copyleft license for software and other 13 | kinds of works. 14 | 15 | The licenses for most software and other practical works are designed to take away 16 | your freedom to share and change the works. By contrast, the GNU General Public 17 | License is intended to guarantee your freedom to share and change all versions of a 18 | program--to make sure it remains free software for all its users. We, the Free 19 | Software Foundation, use the GNU General Public License for most of our software; it 20 | applies also to any other work released this way by its authors. You can apply it to 21 | your programs, too. 22 | 23 | When we speak of free software, we are referring to freedom, not price. Our General 24 | Public Licenses are designed to make sure that you have the freedom to distribute 25 | copies of free software (and charge for them if you wish), that you receive source 26 | code or can get it if you want it, that you can change the software or use pieces of 27 | it in new free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you these rights or 30 | asking you to surrender the rights. Therefore, you have certain responsibilities if 31 | you distribute copies of the software, or if you modify it: responsibilities to 32 | respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether gratis or for a fee, 35 | you must pass on to the recipients the same freedoms that you received. You must make 36 | sure that they, too, receive or can get the source code. And you must show them these 37 | terms so they know their rights. 38 | 39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert 40 | copyright on the software, and **(2)** offer you this License giving you legal permission 41 | to copy, distribute and/or modify it. 42 | 43 | For the developers' and authors' protection, the GPL clearly explains that there is 44 | no warranty for this free software. For both users' and authors' sake, the GPL 45 | requires that modified versions be marked as changed, so that their problems will not 46 | be attributed erroneously to authors of previous versions. 47 | 48 | Some devices are designed to deny users access to install or run modified versions of 49 | the software inside them, although the manufacturer can do so. This is fundamentally 50 | incompatible with the aim of protecting users' freedom to change the software. The 51 | systematic pattern of such abuse occurs in the area of products for individuals to 52 | use, which is precisely where it is most unacceptable. Therefore, we have designed 53 | this version of the GPL to prohibit the practice for those products. If such problems 54 | arise substantially in other domains, we stand ready to extend this provision to 55 | those domains in future versions of the GPL, as needed to protect the freedom of 56 | users. 57 | 58 | Finally, every program is threatened constantly by software patents. States should 59 | not allow patents to restrict development and use of software on general-purpose 60 | computers, but in those that do, we wish to avoid the special danger that patents 61 | applied to a free program could make it effectively proprietary. To prevent this, the 62 | GPL assures that patents cannot be used to render the program non-free. 63 | 64 | The precise terms and conditions for copying, distribution and modification follow. 65 | 66 | ## TERMS AND CONDITIONS 67 | 68 | ### 0. Definitions 69 | 70 | “This License” refers to version 3 of the GNU General Public License. 71 | 72 | “Copyright” also means copyright-like laws that apply to other kinds of 73 | works, such as semiconductor masks. 74 | 75 | “The Program” refers to any copyrightable work licensed under this 76 | License. Each licensee is addressed as “you”. “Licensees” and 77 | “recipients” may be individuals or organizations. 78 | 79 | To “modify” a work means to copy from or adapt all or part of the work in 80 | a fashion requiring copyright permission, other than the making of an exact copy. The 81 | resulting work is called a “modified version” of the earlier work or a 82 | work “based on” the earlier work. 83 | 84 | A “covered work” means either the unmodified Program or a work based on 85 | the Program. 86 | 87 | To “propagate” a work means to do anything with it that, without 88 | permission, would make you directly or secondarily liable for infringement under 89 | applicable copyright law, except executing it on a computer or modifying a private 90 | copy. Propagation includes copying, distribution (with or without modification), 91 | making available to the public, and in some countries other activities as well. 92 | 93 | To “convey” a work means any kind of propagation that enables other 94 | parties to make or receive copies. Mere interaction with a user through a computer 95 | network, with no transfer of a copy, is not conveying. 96 | 97 | An interactive user interface displays “Appropriate Legal Notices” to the 98 | extent that it includes a convenient and prominently visible feature that **(1)** 99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no 100 | warranty for the work (except to the extent that warranties are provided), that 101 | licensees may convey the work under this License, and how to view a copy of this 102 | License. If the interface presents a list of user commands or options, such as a 103 | menu, a prominent item in the list meets this criterion. 104 | 105 | ### 1. Source Code 106 | 107 | The “source code” for a work means the preferred form of the work for 108 | making modifications to it. “Object code” means any non-source form of a 109 | work. 110 | 111 | A “Standard Interface” means an interface that either is an official 112 | standard defined by a recognized standards body, or, in the case of interfaces 113 | specified for a particular programming language, one that is widely used among 114 | developers working in that language. 115 | 116 | The “System Libraries” of an executable work include anything, other than 117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major 118 | Component, but which is not part of that Major Component, and **(b)** serves only to 119 | enable use of the work with that Major Component, or to implement a Standard 120 | Interface for which an implementation is available to the public in source code form. 121 | A “Major Component”, in this context, means a major essential component 122 | (kernel, window system, and so on) of the specific operating system (if any) on which 123 | the executable work runs, or a compiler used to produce the work, or an object code 124 | interpreter used to run it. 125 | 126 | The “Corresponding Source” for a work in object code form means all the 127 | source code needed to generate, install, and (for an executable work) run the object 128 | code and to modify the work, including scripts to control those activities. However, 129 | it does not include the work's System Libraries, or general-purpose tools or 130 | generally available free programs which are used unmodified in performing those 131 | activities but which are not part of the work. For example, Corresponding Source 132 | includes interface definition files associated with source files for the work, and 133 | the source code for shared libraries and dynamically linked subprograms that the work 134 | is specifically designed to require, such as by intimate data communication or 135 | control flow between those subprograms and other parts of the work. 136 | 137 | The Corresponding Source need not include anything that users can regenerate 138 | automatically from other parts of the Corresponding Source. 139 | 140 | The Corresponding Source for a work in source code form is that same work. 141 | 142 | ### 2. Basic Permissions 143 | 144 | All rights granted under this License are granted for the term of copyright on the 145 | Program, and are irrevocable provided the stated conditions are met. This License 146 | explicitly affirms your unlimited permission to run the unmodified Program. The 147 | output from running a covered work is covered by this License only if the output, 148 | given its content, constitutes a covered work. This License acknowledges your rights 149 | of fair use or other equivalent, as provided by copyright law. 150 | 151 | You may make, run and propagate covered works that you do not convey, without 152 | conditions so long as your license otherwise remains in force. You may convey covered 153 | works to others for the sole purpose of having them make modifications exclusively 154 | for you, or provide you with facilities for running those works, provided that you 155 | comply with the terms of this License in conveying all material for which you do not 156 | control copyright. Those thus making or running the covered works for you must do so 157 | exclusively on your behalf, under your direction and control, on terms that prohibit 158 | them from making any copies of your copyrighted material outside their relationship 159 | with you. 160 | 161 | Conveying under any other circumstances is permitted solely under the conditions 162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 163 | 164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law 165 | 166 | No covered work shall be deemed part of an effective technological measure under any 167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty 168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention 169 | of such measures. 170 | 171 | When you convey a covered work, you waive any legal power to forbid circumvention of 172 | technological measures to the extent such circumvention is effected by exercising 173 | rights under this License with respect to the covered work, and you disclaim any 174 | intention to limit operation or modification of the work as a means of enforcing, 175 | against the work's users, your or third parties' legal rights to forbid circumvention 176 | of technological measures. 177 | 178 | ### 4. Conveying Verbatim Copies 179 | 180 | You may convey verbatim copies of the Program's source code as you receive it, in any 181 | medium, provided that you conspicuously and appropriately publish on each copy an 182 | appropriate copyright notice; keep intact all notices stating that this License and 183 | any non-permissive terms added in accord with section 7 apply to the code; keep 184 | intact all notices of the absence of any warranty; and give all recipients a copy of 185 | this License along with the Program. 186 | 187 | You may charge any price or no price for each copy that you convey, and you may offer 188 | support or warranty protection for a fee. 189 | 190 | ### 5. Conveying Modified Source Versions 191 | 192 | You may convey a work based on the Program, or the modifications to produce it from 193 | the Program, in the form of source code under the terms of section 4, provided that 194 | you also meet all of these conditions: 195 | 196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a 197 | relevant date. 198 | * **b)** The work must carry prominent notices stating that it is released under this 199 | License and any conditions added under section 7. This requirement modifies the 200 | requirement in section 4 to “keep intact all notices”. 201 | * **c)** You must license the entire work, as a whole, under this License to anyone who 202 | comes into possession of a copy. This License will therefore apply, along with any 203 | applicable section 7 additional terms, to the whole of the work, and all its parts, 204 | regardless of how they are packaged. This License gives no permission to license the 205 | work in any other way, but it does not invalidate such permission if you have 206 | separately received it. 207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal 208 | Notices; however, if the Program has interactive interfaces that do not display 209 | Appropriate Legal Notices, your work need not make them do so. 210 | 211 | A compilation of a covered work with other separate and independent works, which are 212 | not by their nature extensions of the covered work, and which are not combined with 213 | it such as to form a larger program, in or on a volume of a storage or distribution 214 | medium, is called an “aggregate” if the compilation and its resulting 215 | copyright are not used to limit the access or legal rights of the compilation's users 216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate 217 | does not cause this License to apply to the other parts of the aggregate. 218 | 219 | ### 6. Conveying Non-Source Forms 220 | 221 | You may convey a covered work in object code form under the terms of sections 4 and 222 | 5, provided that you also convey the machine-readable Corresponding Source under the 223 | terms of this License, in one of these ways: 224 | 225 | * **a)** Convey the object code in, or embodied in, a physical product (including a 226 | physical distribution medium), accompanied by the Corresponding Source fixed on a 227 | durable physical medium customarily used for software interchange. 228 | * **b)** Convey the object code in, or embodied in, a physical product (including a 229 | physical distribution medium), accompanied by a written offer, valid for at least 230 | three years and valid for as long as you offer spare parts or customer support for 231 | that product model, to give anyone who possesses the object code either **(1)** a copy of 232 | the Corresponding Source for all the software in the product that is covered by this 233 | License, on a durable physical medium customarily used for software interchange, for 234 | a price no more than your reasonable cost of physically performing this conveying of 235 | source, or **(2)** access to copy the Corresponding Source from a network server at no 236 | charge. 237 | * **c)** Convey individual copies of the object code with a copy of the written offer to 238 | provide the Corresponding Source. This alternative is allowed only occasionally and 239 | noncommercially, and only if you received the object code with such an offer, in 240 | accord with subsection 6b. 241 | * **d)** Convey the object code by offering access from a designated place (gratis or for 242 | a charge), and offer equivalent access to the Corresponding Source in the same way 243 | through the same place at no further charge. You need not require recipients to copy 244 | the Corresponding Source along with the object code. If the place to copy the object 245 | code is a network server, the Corresponding Source may be on a different server 246 | (operated by you or a third party) that supports equivalent copying facilities, 247 | provided you maintain clear directions next to the object code saying where to find 248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source, 249 | you remain obligated to ensure that it is available for as long as needed to satisfy 250 | these requirements. 251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform 252 | other peers where the object code and Corresponding Source of the work are being 253 | offered to the general public at no charge under subsection 6d. 254 | 255 | A separable portion of the object code, whose source code is excluded from the 256 | Corresponding Source as a System Library, need not be included in conveying the 257 | object code work. 258 | 259 | A “User Product” is either **(1)** a “consumer product”, which 260 | means any tangible personal property which is normally used for personal, family, or 261 | household purposes, or **(2)** anything designed or sold for incorporation into a 262 | dwelling. In determining whether a product is a consumer product, doubtful cases 263 | shall be resolved in favor of coverage. For a particular product received by a 264 | particular user, “normally used” refers to a typical or common use of 265 | that class of product, regardless of the status of the particular user or of the way 266 | in which the particular user actually uses, or expects or is expected to use, the 267 | product. A product is a consumer product regardless of whether the product has 268 | substantial commercial, industrial or non-consumer uses, unless such uses represent 269 | the only significant mode of use of the product. 270 | 271 | “Installation Information” for a User Product means any methods, 272 | procedures, authorization keys, or other information required to install and execute 273 | modified versions of a covered work in that User Product from a modified version of 274 | its Corresponding Source. The information must suffice to ensure that the continued 275 | functioning of the modified object code is in no case prevented or interfered with 276 | solely because modification has been made. 277 | 278 | If you convey an object code work under this section in, or with, or specifically for 279 | use in, a User Product, and the conveying occurs as part of a transaction in which 280 | the right of possession and use of the User Product is transferred to the recipient 281 | in perpetuity or for a fixed term (regardless of how the transaction is 282 | characterized), the Corresponding Source conveyed under this section must be 283 | accompanied by the Installation Information. But this requirement does not apply if 284 | neither you nor any third party retains the ability to install modified object code 285 | on the User Product (for example, the work has been installed in ROM). 286 | 287 | The requirement to provide Installation Information does not include a requirement to 288 | continue to provide support service, warranty, or updates for a work that has been 289 | modified or installed by the recipient, or for the User Product in which it has been 290 | modified or installed. Access to a network may be denied when the modification itself 291 | materially and adversely affects the operation of the network or violates the rules 292 | and protocols for communication across the network. 293 | 294 | Corresponding Source conveyed, and Installation Information provided, in accord with 295 | this section must be in a format that is publicly documented (and with an 296 | implementation available to the public in source code form), and must require no 297 | special password or key for unpacking, reading or copying. 298 | 299 | ### 7. Additional Terms 300 | 301 | “Additional permissions” are terms that supplement the terms of this 302 | License by making exceptions from one or more of its conditions. Additional 303 | permissions that are applicable to the entire Program shall be treated as though they 304 | were included in this License, to the extent that they are valid under applicable 305 | law. If additional permissions apply only to part of the Program, that part may be 306 | used separately under those permissions, but the entire Program remains governed by 307 | this License without regard to the additional permissions. 308 | 309 | When you convey a copy of a covered work, you may at your option remove any 310 | additional permissions from that copy, or from any part of it. (Additional 311 | permissions may be written to require their own removal in certain cases when you 312 | modify the work.) You may place additional permissions on material, added by you to a 313 | covered work, for which you have or can give appropriate copyright permission. 314 | 315 | Notwithstanding any other provision of this License, for material you add to a 316 | covered work, you may (if authorized by the copyright holders of that material) 317 | supplement the terms of this License with terms: 318 | 319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of 320 | sections 15 and 16 of this License; or 321 | * **b)** Requiring preservation of specified reasonable legal notices or author 322 | attributions in that material or in the Appropriate Legal Notices displayed by works 323 | containing it; or 324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that 325 | modified versions of such material be marked in reasonable ways as different from the 326 | original version; or 327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the 328 | material; or 329 | * **e)** Declining to grant rights under trademark law for use of some trade names, 330 | trademarks, or service marks; or 331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone 332 | who conveys the material (or modified versions of it) with contractual assumptions of 333 | liability to the recipient, for any liability that these contractual assumptions 334 | directly impose on those licensors and authors. 335 | 336 | All other non-permissive additional terms are considered “further 337 | restrictions” within the meaning of section 10. If the Program as you received 338 | it, or any part of it, contains a notice stating that it is governed by this License 339 | along with a term that is a further restriction, you may remove that term. If a 340 | license document contains a further restriction but permits relicensing or conveying 341 | under this License, you may add to a covered work material governed by the terms of 342 | that license document, provided that the further restriction does not survive such 343 | relicensing or conveying. 344 | 345 | If you add terms to a covered work in accord with this section, you must place, in 346 | the relevant source files, a statement of the additional terms that apply to those 347 | files, or a notice indicating where to find the applicable terms. 348 | 349 | Additional terms, permissive or non-permissive, may be stated in the form of a 350 | separately written license, or stated as exceptions; the above requirements apply 351 | either way. 352 | 353 | ### 8. Termination 354 | 355 | You may not propagate or modify a covered work except as expressly provided under 356 | this License. Any attempt otherwise to propagate or modify it is void, and will 357 | automatically terminate your rights under this License (including any patent licenses 358 | granted under the third paragraph of section 11). 359 | 360 | However, if you cease all violation of this License, then your license from a 361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the 362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently, 363 | if the copyright holder fails to notify you of the violation by some reasonable means 364 | prior to 60 days after the cessation. 365 | 366 | Moreover, your license from a particular copyright holder is reinstated permanently 367 | if the copyright holder notifies you of the violation by some reasonable means, this 368 | is the first time you have received notice of violation of this License (for any 369 | work) from that copyright holder, and you cure the violation prior to 30 days after 370 | your receipt of the notice. 371 | 372 | Termination of your rights under this section does not terminate the licenses of 373 | parties who have received copies or rights from you under this License. If your 374 | rights have been terminated and not permanently reinstated, you do not qualify to 375 | receive new licenses for the same material under section 10. 376 | 377 | ### 9. Acceptance Not Required for Having Copies 378 | 379 | You are not required to accept this License in order to receive or run a copy of the 380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of 381 | using peer-to-peer transmission to receive a copy likewise does not require 382 | acceptance. However, nothing other than this License grants you permission to 383 | propagate or modify any covered work. These actions infringe copyright if you do not 384 | accept this License. Therefore, by modifying or propagating a covered work, you 385 | indicate your acceptance of this License to do so. 386 | 387 | ### 10. Automatic Licensing of Downstream Recipients 388 | 389 | Each time you convey a covered work, the recipient automatically receives a license 390 | from the original licensors, to run, modify and propagate that work, subject to this 391 | License. You are not responsible for enforcing compliance by third parties with this 392 | License. 393 | 394 | An “entity transaction” is a transaction transferring control of an 395 | organization, or substantially all assets of one, or subdividing an organization, or 396 | merging organizations. If propagation of a covered work results from an entity 397 | transaction, each party to that transaction who receives a copy of the work also 398 | receives whatever licenses to the work the party's predecessor in interest had or 399 | could give under the previous paragraph, plus a right to possession of the 400 | Corresponding Source of the work from the predecessor in interest, if the predecessor 401 | has it or can get it with reasonable efforts. 402 | 403 | You may not impose any further restrictions on the exercise of the rights granted or 404 | affirmed under this License. For example, you may not impose a license fee, royalty, 405 | or other charge for exercise of rights granted under this License, and you may not 406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging 407 | that any patent claim is infringed by making, using, selling, offering for sale, or 408 | importing the Program or any portion of it. 409 | 410 | ### 11. Patents 411 | 412 | A “contributor” is a copyright holder who authorizes use under this 413 | License of the Program or a work on which the Program is based. The work thus 414 | licensed is called the contributor's “contributor version”. 415 | 416 | A contributor's “essential patent claims” are all patent claims owned or 417 | controlled by the contributor, whether already acquired or hereafter acquired, that 418 | would be infringed by some manner, permitted by this License, of making, using, or 419 | selling its contributor version, but do not include claims that would be infringed 420 | only as a consequence of further modification of the contributor version. For 421 | purposes of this definition, “control” includes the right to grant patent 422 | sublicenses in a manner consistent with the requirements of this License. 423 | 424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license 425 | under the contributor's essential patent claims, to make, use, sell, offer for sale, 426 | import and otherwise run, modify and propagate the contents of its contributor 427 | version. 428 | 429 | In the following three paragraphs, a “patent license” is any express 430 | agreement or commitment, however denominated, not to enforce a patent (such as an 431 | express permission to practice a patent or covenant not to sue for patent 432 | infringement). To “grant” such a patent license to a party means to make 433 | such an agreement or commitment not to enforce a patent against the party. 434 | 435 | If you convey a covered work, knowingly relying on a patent license, and the 436 | Corresponding Source of the work is not available for anyone to copy, free of charge 437 | and under the terms of this License, through a publicly available network server or 438 | other readily accessible means, then you must either **(1)** cause the Corresponding 439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the 440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with 441 | the requirements of this License, to extend the patent license to downstream 442 | recipients. “Knowingly relying” means you have actual knowledge that, but 443 | for the patent license, your conveying the covered work in a country, or your 444 | recipient's use of the covered work in a country, would infringe one or more 445 | identifiable patents in that country that you have reason to believe are valid. 446 | 447 | If, pursuant to or in connection with a single transaction or arrangement, you 448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent 449 | license to some of the parties receiving the covered work authorizing them to use, 450 | propagate, modify or convey a specific copy of the covered work, then the patent 451 | license you grant is automatically extended to all recipients of the covered work and 452 | works based on it. 453 | 454 | A patent license is “discriminatory” if it does not include within the 455 | scope of its coverage, prohibits the exercise of, or is conditioned on the 456 | non-exercise of one or more of the rights that are specifically granted under this 457 | License. You may not convey a covered work if you are a party to an arrangement with 458 | a third party that is in the business of distributing software, under which you make 459 | payment to the third party based on the extent of your activity of conveying the 460 | work, and under which the third party grants, to any of the parties who would receive 461 | the covered work from you, a discriminatory patent license **(a)** in connection with 462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)** 463 | primarily for and in connection with specific products or compilations that contain 464 | the covered work, unless you entered into that arrangement, or that patent license 465 | was granted, prior to 28 March 2007. 466 | 467 | Nothing in this License shall be construed as excluding or limiting any implied 468 | license or other defenses to infringement that may otherwise be available to you 469 | under applicable patent law. 470 | 471 | ### 12. No Surrender of Others' Freedom 472 | 473 | If conditions are imposed on you (whether by court order, agreement or otherwise) 474 | that contradict the conditions of this License, they do not excuse you from the 475 | conditions of this License. If you cannot convey a covered work so as to satisfy 476 | simultaneously your obligations under this License and any other pertinent 477 | obligations, then as a consequence you may not convey it at all. For example, if you 478 | agree to terms that obligate you to collect a royalty for further conveying from 479 | those to whom you convey the Program, the only way you could satisfy both those terms 480 | and this License would be to refrain entirely from conveying the Program. 481 | 482 | ### 13. Use with the GNU Affero General Public License 483 | 484 | Notwithstanding any other provision of this License, you have permission to link or 485 | combine any covered work with a work licensed under version 3 of the GNU Affero 486 | General Public License into a single combined work, and to convey the resulting work. 487 | The terms of this License will continue to apply to the part which is the covered 488 | work, but the special requirements of the GNU Affero General Public License, section 489 | 13, concerning interaction through a network will apply to the combination as such. 490 | 491 | ### 14. Revised Versions of this License 492 | 493 | The Free Software Foundation may publish revised and/or new versions of the GNU 494 | General Public License from time to time. Such new versions will be similar in spirit 495 | to the present version, but may differ in detail to address new problems or concerns. 496 | 497 | Each version is given a distinguishing version number. If the Program specifies that 498 | a certain numbered version of the GNU General Public License “or any later 499 | version” applies to it, you have the option of following the terms and 500 | conditions either of that numbered version or of any later version published by the 501 | Free Software Foundation. If the Program does not specify a version number of the GNU 502 | General Public License, you may choose any version ever published by the Free 503 | Software Foundation. 504 | 505 | If the Program specifies that a proxy can decide which future versions of the GNU 506 | General Public License can be used, that proxy's public statement of acceptance of a 507 | version permanently authorizes you to choose that version for the Program. 508 | 509 | Later license versions may give you additional or different permissions. However, no 510 | additional obligations are imposed on any author or copyright holder as a result of 511 | your choosing to follow a later version. 512 | 513 | ### 15. Disclaimer of Warranty 514 | 515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER 518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE 520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE 521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 522 | 523 | ### 16. Limitation of Liability 524 | 525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY 526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS 527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, 528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE 530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE 531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 532 | POSSIBILITY OF SUCH DAMAGES. 533 | 534 | ### 17. Interpretation of Sections 15 and 16 535 | 536 | If the disclaimer of warranty and limitation of liability provided above cannot be 537 | given local legal effect according to their terms, reviewing courts shall apply local 538 | law that most closely approximates an absolute waiver of all civil liability in 539 | connection with the Program, unless a warranty or assumption of liability accompanies 540 | a copy of the Program in return for a fee. 541 | 542 | _END OF TERMS AND CONDITIONS_ 543 | 544 | ## How to Apply These Terms to Your New Programs 545 | 546 | If you develop a new program, and you want it to be of the greatest possible use to 547 | the public, the best way to achieve this is to make it free software which everyone 548 | can redistribute and change under these terms. 549 | 550 | To do so, attach the following notices to the program. It is safest to attach them 551 | to the start of each source file to most effectively state the exclusion of warranty; 552 | and each file should have at least the “copyright” line and a pointer to 553 | where the full notice is found. 554 | 555 | 556 | Copyright (C) 557 | 558 | This program is free software: you can redistribute it and/or modify 559 | it under the terms of the GNU General Public License as published by 560 | the Free Software Foundation, either version 3 of the License, or 561 | (at your option) any later version. 562 | 563 | This program is distributed in the hope that it will be useful, 564 | but WITHOUT ANY WARRANTY; without even the implied warranty of 565 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 566 | GNU General Public License for more details. 567 | 568 | You should have received a copy of the GNU General Public License 569 | along with this program. If not, see . 570 | 571 | Also add information on how to contact you by electronic and paper mail. 572 | 573 | If the program does terminal interaction, make it output a short notice like this 574 | when it starts in an interactive mode: 575 | 576 | Copyright (C) 577 | This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. 578 | This is free software, and you are welcome to redistribute it 579 | under certain conditions; type 'show c' for details. 580 | 581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of 582 | the General Public License. Of course, your program's commands might be different; 583 | for a GUI interface, you would use an “about box”. 584 | 585 | You should also get your employer (if you work as a programmer) or school, if any, to 586 | sign a “copyright disclaimer” for the program, if necessary. For more 587 | information on this, and how to apply and follow the GNU GPL, see 588 | <>. 589 | 590 | The GNU General Public License does not permit incorporating your program into 591 | proprietary programs. If your program is a subroutine library, you may consider it 592 | more useful to permit linking proprietary applications with the library. If this is 593 | what you want to do, use the GNU Lesser General Public License instead of this 594 | License. But first, please read 595 | <>. 596 | --------------------------------------------------------------------------------