├── .gitignore
├── FOSS4Spec.xlsx
├── FOSS4Spec_2025-05-12.xlsx
├── FOSS4Spectroscopy.Rmd
├── Include
├── Table1.png
└── sorttable.js
├── LICENSE
├── README.md
├── Render.R
├── Utilities
├── dereplicate_repos.R
├── final_manual_check.R
├── review_candidates.R
└── run_searches.R
├── docs
└── index.html
└── ~$FOSS4Spec.xlsx
/.gitignore:
--------------------------------------------------------------------------------
1 | *.Rhistory
2 | .DS_Store
3 | .Rapp.history
4 | .Rproj.user/
5 | *~
6 | Links404.csv
7 | DateReport.csv
8 | Searches/
9 |
--------------------------------------------------------------------------------
/FOSS4Spec.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bryanhanson/FOSS4Spectroscopy/a5dfd759e93b7ac2b59658169a9645e1a011a442/FOSS4Spec.xlsx
--------------------------------------------------------------------------------
/FOSS4Spec_2025-05-12.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bryanhanson/FOSS4Spectroscopy/a5dfd759e93b7ac2b59658169a9645e1a011a442/FOSS4Spec_2025-05-12.xlsx
--------------------------------------------------------------------------------
/FOSS4Spectroscopy.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: FOSS For Spectroscopy
3 | author: Bryan A. Hanson, DePauw University
4 | date: >-
5 | `r paste(format(Sys.time(),
6 | format = "%Y-%m-%d %H:%M", tz = "GMT"), "UTC")`
7 | output:
8 | html_document:
9 | theme: united
10 | ---
11 |
12 |
14 |
15 |
16 |
21 |
22 |
23 |
24 |
25 |
26 | The following table collects information about free and open source software ([FOSS](https://en.wikipedia.org/wiki/Free_and_open-source_software)) for spectroscopy. All information is taken from the respective websites and/or repositories. Some projects have been described in publications. If so, we try to provide a link, but the document may be behind a paywall. This [blog post](https://chemospec.org/posts/2021-04-19-Search-GH-Topics/2021-04-19-Search-GH-Topics.html) give some details about how we find interesting repositories.
27 |
28 | Unless otherwise noted, the software mentioned here:
29 |
30 | * Is suitable for one or more of the following techniques: NMR, IR, Raman, ESR/EPR, fluorescence, XRF, LIBS and UV-Vis.
31 | * Mass *spectrometry* software is not included (see the work by Stanstrup just below).
32 | * Software for MRI is not included.
33 | * Software for remote sensing is generally not included, though some projects involving hyperspectral imaging are included.
34 | * While some folks publish open source add-ons for `Matlab`, `Matlab` is not open source and projects written in `Matlab` are not included.
35 |
36 | Some other places to look:
37 |
38 | * [Stanstrup *et al.*](https://www.mdpi.com/2218-1989/9/10/200) have published a comprehensive paper describing the `R` packages suitable for use in metabolomics, which partially overlaps with the information here. The authors have also created a dynamic [document](https://rformassspectrometry.github.io/metaRbolomics-book/) with the same information and more.
39 | * [awesome-spectra](https://github.com/erwanp/awesome-spectra) is a page somewhat in the spirit of the work here, but apparently depends on authors to add their own material, and is missing some key entries (e.g. no NMR packages).
40 | * [All Things Raman](https://github.com/allthingsraman) is a curated collection of software for Raman spectroscopy.
41 | * The [CRAN Task View for Chemometrics & Computational Physics](https://cran.r-project.org/web/views/ChemPhys.html) includes some `R` packages listed here as well as related software.
42 |
43 | #### How Does One Choose a Package?
44 |
45 | *The projects listed here have been lightly vetted and the process is imperfect!* If a project looks incomplete, is a class project, or I can't tell what it does, it's not included!* Of course, if you feel I have not included your package in error, feel free to request its inclusion. With that in mind, there are still many packages to consider. As a general guide, the excellent checklist provided by [Lortie *et al.*](https://onlinelibrary.wiley.com/doi/full/10.1002/ece3.5970) is included here for your consideration.
46 |
47 |
48 |
49 | ```{r setupR, echo = FALSE, results = "hide"}
50 | # Clean up the workspace but keep the local token, if present
51 | # This is necessary for the local build
52 | keep <- "github_token"
53 | rm(list = ls()[!(ls() %in% keep)])
54 |
55 | suppressPackageStartupMessages(library("knitr"))
56 | suppressPackageStartupMessages(library("gt"))
57 | suppressPackageStartupMessages(library("readxl"))
58 | suppressPackageStartupMessages(library("httr"))
59 | suppressPackageStartupMessages(library("lubridate"))
60 | suppressPackageStartupMessages(library("jsonlite"))
61 | suppressPackageStartupMessages(library("rvest")) #???
62 | suppressPackageStartupMessages(library("stringr"))
63 | suppressPackageStartupMessages(library("xml2"))
64 | suppressPackageStartupMessages(library("webu"))
65 |
66 | opts_chunk$set(echo = FALSE)
67 |
68 | set_config(timeout(40)) # httr setting for GET calls (default 13)
69 | cnt <- 0L # counter for the number of GET calls to Github
70 | username <- "bryanhanson"
71 | ```
72 |
73 |
74 | ```{r readDB}
75 | # Please edit FOSS4Spec.xlsx to add information or make corrections.
76 | # Please follow the conventions of existing entries for consistency.
77 | # Remember the table in the web page is sortable so consistency in
78 | # description and focus is especially important for users to obtain
79 | # useful information easily.
80 |
81 | DF <- as.data.frame(read_excel("FOSS4Spec.xlsx", na = "NA"))
82 | # shorten names for less typing
83 | names(DF) <- c("pkgname", "desc", "lang", "focus", "repo", "web", "pub", "maint", "maint_email", "author_email")
84 | DF <- DF[order(DF$pkgname),]
85 | ```
86 |
87 | ```{r token}
88 | junk <- check_for_github_token(github_token)
89 | ```
90 |
91 | ```{r verifyURLs}
92 | # This takes some time!
93 | ne <- nrow(DF) # number of entries
94 |
95 | # Check all URLs, if the site is down handle so table always looks good.
96 | # Site URL might also just be missing from table, handle this too.
97 | webLink <- rep(FALSE, ne) # If TRUE, there is a link in the input table
98 | repoLink <- rep(FALSE, ne)
99 | pubLink <- rep(FALSE, ne)
100 |
101 | webOK <- rep(FALSE, ne) # If TRUE, URL was reachable
102 | repoOK <- rep(FALSE, ne)
103 | pubOK <- rep(FALSE, ne)
104 |
105 | # These are used for internal reporting
106 | # If TRUE, URL was given but not reachable
107 | badWeb <- rep(FALSE, ne)
108 | badRepo <- rep(FALSE, ne)
109 | badPub <- rep(FALSE, ne)
110 |
111 | for (i in 1:ne) {
112 | if (!is.na(DF$web[i])) {
113 | webOK[i] <- good_url(DF$web[i])
114 | webLink[i] <- TRUE
115 | if (webLink[i] != webOK[i]) badWeb[i] <- TRUE
116 | }
117 |
118 | if (!is.na(DF$repo[i])) {
119 | repoOK[i] <- good_url(DF$repo[i])
120 | repoLink[i] <- TRUE
121 | if (repoLink[i] != repoOK[i]) badRepo[i] <- TRUE
122 | }
123 |
124 | if (!is.na(DF$pub[i])) {
125 | pubOK[i] <- good_url(DF$pub[i])
126 | pubLink[i] <- TRUE
127 | if (pubLink[i] != pubOK[i]) badPub[i] <- TRUE
128 | }
129 | }
130 | # If URLs are bad they will still be added to the table as hyperlinks, but
131 | # those links will give status 404.
132 | # Write a report so maintainers can check & fix if it's on our end
133 | LinkReport <- data.frame(name = DF$pkgname, webLink, webOK, repoLink, repoOK, pubLink, pubOK, stringsAsFactors = FALSE)
134 | keep <- badPub | badRepo | badWeb
135 | LinkReport <- LinkReport[keep,]
136 | if (nrow(LinkReport) > 0) write.csv(LinkReport, row.names = FALSE, file = "Reports/Links404.csv")
137 | ```
138 |
139 | ```{r checkUpdateDate, warning = FALSE}
140 | # Use the info from checking URLs above
141 | webDate <- as.POSIXct(rep(NA, ne)) # see stackoverflow.com/a/33002710/633251
142 | commitDate <- as.POSIXct(rep(NA, ne))
143 | issueDate <- as.POSIXct(rep(NA, ne))
144 | updateDate <- as.POSIXct(rep(NA, ne))
145 |
146 | repoType <- rep("xx", ne)
147 | repoType[grepl("github\\.com", DF$repo)] <- "gh"
148 |
149 | for (i in 1:ne) {
150 |
151 | if (webOK[i]) {
152 | ans <- find_page_date(flatten_web_page(DF$web[i]))
153 | if (!is.na(ans)) webDate[i] <- ans
154 | }
155 |
156 | if (repoOK[i]) {
157 | if (repoType[i] == "gh") {
158 | # NA returned when repo path bad
159 | tmp <- get_github_dates(DF$repo[i], "commits")
160 | cnt <- cnt + 1
161 | if (!is.na(tmp)) commitDate[i] <- ymd(tmp)
162 | tmp <- get_github_dates(DF$repo[i], "issues")
163 | cnt <- cnt + 1
164 | if (!is.na(tmp)) issueDate[i] <- ymd(tmp)
165 | }
166 | }
167 |
168 | # updateDate will be the most recent of webDate, issueDate, commitDate
169 | # If all are NA, a warning is issued so to avoid that do:
170 | if (is.na(webDate[i]) & is.na(commitDate[i]) & is.na(issueDate[i])) next
171 | updateDate[i] <- max(webDate[i], commitDate[i], issueDate[i], na.rm = TRUE)
172 | }
173 | updateDate <- date(updateDate) # -> ymd
174 |
175 | # Write a report
176 | DateReport <- data.frame(name = DF$pkgname, webDate, commitDate, issueDate, updateDate, stringsAsFactors = FALSE)
177 | write.csv(DateReport, row.names = FALSE, file = "Reports/DateReport.csv")
178 | ```
179 |
180 | ```{r createNamelink}
181 | # Additional processing of the input values
182 | # Combine name, website and pub as available to create hyperlink
183 | # If website is missing, use repo instead (otherwise one must edit the original table more)
184 | namelink <- DF$pkgname # There must be at least a pkgname in the input table
185 | for (i in 1:ne) {
186 | if (!is.na(DF$web[i])) {
187 | namelink[i] <- paste("[", DF$pkgname[i], "](", DF$web[i], ")", sep = "")
188 | }
189 | if (is.na(DF$web[i])) {
190 | if (!is.na(DF$repo[i])) {
191 | namelink[i] <- paste("[", DF$pkgname[i], "](", DF$repo[i], ")", sep = "")
192 | }
193 | }
194 | if (!is.na(DF$pub[i])) {
195 | namelink[i] <- paste(namelink[i], " ([pub](", DF$pub[i], "))", sep = "")
196 | }
197 | }
198 | ```
199 |
200 | ```{r createTable}
201 | DF2 <- data.frame(namelink, DF$desc, DF$lang, DF$focus, updateDate, stringsAsFactors = FALSE)
202 | names(DF2) <- c("Name", "Description", "Lang", "Focus", "Status")
203 | ```
204 |
205 |