├── NAMESPACE
├── .Rbuildignore
├── man
    └── hello.Rd
├── DESCRIPTION
├── readgedcom.Rproj
├── R
    ├── hello.R
    └── read_gedcom.R
├── .gitignore
└── README.md


/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/man/hello.Rd:
--------------------------------------------------------------------------------
 1 | \name{hello}
 2 | \alias{hello}
 3 | \title{Hello, World!}
 4 | \usage{
 5 | hello()
 6 | }
 7 | \description{
 8 | Prints 'Hello, world!'.
 9 | }
10 | \examples{
11 | hello()
12 | }
13 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: readgedcom
 2 | Type: Package
 3 | Title: What the Package Does (Title Case)
 4 | Version: 0.1.0
 5 | Author: Who wrote it
 6 | Maintainer: The package maintainer <yourself@somewhere.net>
 7 | Description: More about what it does (maybe more than one line)
 8 |     Use four spaces when indenting paragraphs within the Description.
 9 | License: What license is it under?
10 | Encoding: UTF-8
11 | LazyData: true
12 | 


--------------------------------------------------------------------------------
/readgedcom.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/R/hello.R:
--------------------------------------------------------------------------------
 1 | # Hello, world!
 2 | #
 3 | # This is an example function named 'hello' 
 4 | # which prints 'Hello, world!'.
 5 | #
 6 | # You can learn more about package authoring with RStudio at:
 7 | #
 8 | #   http://r-pkgs.had.co.nz/
 9 | #
10 | # Some useful keyboard shortcuts for package authoring:
11 | #
12 | #   Install Package:           'Cmd + Shift + B'
13 | #   Check Package:             'Cmd + Shift + E'
14 | #   Test Package:              'Cmd + Shift + T'
15 | 
16 | hello <- function() {
17 |   print("Hello, world!")
18 | }
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | .DS_Store
 5 | 
 6 | # Session Data files
 7 | .RData
 8 | 
 9 | # User-specific files
10 | .Ruserdata
11 | 
12 | # Example code in package build process
13 | *-Ex.R
14 | 
15 | # Output files from R CMD build
16 | /*.tar.gz
17 | 
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 | 
21 | # RStudio files
22 | .Rproj.user/
23 | 
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | 
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 | 
31 | # knitr and R markdown default cache directories
32 | *_cache/
33 | /cache/
34 | 
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 | 
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reading GEDCOM
 2 | Reading GEDCOM files and doing analysis in R
 3 | 
 4 | I was surprised to find that with Family history being the 2nd most popular hobby in the United States, that there hasn't been a package built in R to read GEDCOM files. This is my stab at it. I am hoping that this repository can help others learn more about their ancestors using data analysis.
 5 | Right now, with the read_gedcom function, you can read gedcom files and store them in a tidy format. This is still in development, and hopefully it can work to fill your family history needs.
 6 | 
 7 | To get GEDCOM file off of ancestry:
 8 | https://support.ancestry.com/s/article/Uploading-and-Downloading-Trees
 9 | 
10 | I also found this script to get a GEDCOM file off of FamilySearch:
11 | https://github.com/Linekio/getmyancestors
12 | 
13 | There are also many sites that offer free GEDCOM files for celebrities/royalty/etc. 
14 | 
15 | You can start using this package by using:
16 | ```
17 | # install.packages("devtools")
18 | devtools::install_github("jjfitz/readgedcom")
19 | ```
20 | 
21 | Here is also an article that I wrote with exploring my family history:
22 | https://jjfitz.github.io/posts/reading-gedcom-files-r/
23 | 


--------------------------------------------------------------------------------
/R/read_gedcom.R:
--------------------------------------------------------------------------------
  1 | #' Read GEDCOM
  2 | #'
  3 | #' This function allows you to read gedcom files into csv format.
  4 | #' @param filepath
  5 | #' @keywords gedcom
  6 | #' @export
  7 | #' @examples
  8 | #' read_gedcom()
  9 | read_gedcom <- function(file_path) {
 10 |   file <- read_delim(file_path, '\n', "\n\n", col_names = FALSE)
 11 |   start_recording <- FALSE
 12 |   is_first <- 0
 13 |   birthdate <-  NA
 14 |   birthplace <-  NA
 15 |   birthlat <-  NA
 16 |   birthlong <-  NA
 17 |   firstname <- NA
 18 |   lastname <- NA
 19 |   famc <- NA
 20 |   fams <- NA
 21 |   id <- NA
 22 |   deathdate <-  NA
 23 |   deathplace <-  NA
 24 |   deathlat <-  NA
 25 |   deathlong <-  NA
 26 |   sex <-  NA
 27 | 
 28 |   dfTemp <- data.frame(matrix(nrow=1, ncol=14))
 29 |   names(dfTemp) <- c("id", "firstname","lastname","birthdate", "birthplace",
 30 |                      "birthlat", "birthlong", "deathdate", "deathplace",
 31 |                      "deathlat", "deathlong", "sex", "FAMC", "FAMS")
 32 | 
 33 |   for (i in 1:length(file[1][[1]])) {
 34 |     tmpv <- file[1][[1]][[i]]
 35 | 
 36 |     if(str_detect(tmpv, "@ INDI")) {
 37 |       line.to.write <- data_frame(id, firstname, lastname, birthdate, birthplace,
 38 |                                   birthlat, birthlong, deathdate, deathplace,
 39 |                                   deathlat, deathlong, sex, famc, fams)
 40 |       names(line.to.write) <- c("id", "firstname","lastname","birthdate", "birthplace",
 41 |                                 "birthlat", "birthlong", "deathdate", "deathplace",
 42 |                                 "deathlat", "deathlong", "sex","FAMC", "FAMS")
 43 |       dfTemp <- rbind(dfTemp, line.to.write)
 44 |       birthdate <-  NA
 45 |       birthplace <-  NA
 46 |       birthlat <-  NA
 47 |       birthlong <-  NA
 48 |       deathdate <-  NA
 49 |       deathplace <-  NA
 50 |       deathlat <-  NA
 51 |       deathlong <-  NA
 52 |       firstname <- NA
 53 |       lastname <- NA
 54 |       sex <- NA
 55 |       famc <- NA
 56 |       fams <- NA
 57 | 
 58 |       id <- str_extract(tmpv,"(?<=@.)\\d*(?=@)")
 59 |       next
 60 |     }
 61 | 
 62 |     if(str_detect(tmpv, " NAME")) {
 63 |       firstname <- str_extract(tmpv,"(?<=NAME ).+(?= /+.)")
 64 |       lastname <- str_extract(tmpv,"(?<=/).+(?=/)")
 65 |       next
 66 |     }
 67 | 
 68 |     if(str_detect(tmpv, " BIRT")) {
 69 |       birthdate <- str_extract(file[1][[1]][[i+1]],"(?<=DATE ).+")
 70 |       birthplace <- str_extract(file[1][[1]][[i+2]],"(?<=PLAC ).+")
 71 |       birthlat <- str_extract(file[1][[1]][[i+4]],"(?<=LATI ).+")
 72 |       birthlong <- str_extract(file[1][[1]][[i+5]],"(?<=LONG ).+")
 73 |       next
 74 |     }
 75 | 
 76 |     if(str_detect(tmpv, " DEAT")) {
 77 |       deathdate <- str_extract(file[1][[1]][[i+1]],"(?<=DATE ).+")
 78 |       deathplace <- str_extract(file[1][[1]][[i+2]],"(?<=PLAC ).+")
 79 |       deathlat <- str_extract(file[1][[1]][[i+4]],"(?<=LATI ).+")
 80 |       deathlong <- str_extract(file[1][[1]][[i+5]],"(?<=LONG ).+")
 81 |       next
 82 |     }
 83 | 
 84 |     if(str_detect(tmpv, " SEX")) {
 85 |       sex <- str_extract(tmpv,"(?<=SEX ).+")
 86 |       next
 87 |     }
 88 | 
 89 |     if(str_detect(tmpv, " FAMC")) {
 90 |       famc <- str_extract(tmpv,"(?<=@.)\\d*(?=@)")
 91 |       next
 92 |     }
 93 | 
 94 |     if(str_detect(tmpv, " FAMS")) {
 95 |       fams <- paste0(fams, " ", str_extract(tmpv,"(?<=@.)\\d*(?=@)"))
 96 |     }
 97 |   }
 98 | 
 99 |   dfTemp <- as_tibble(dfTemp)
100 | 
101 |   dfTemp$FAMS <- gsub("NA ", "", dfTemp$FAMS)
102 | 
103 |   dfTemp <- dfTemp %>%
104 |     filter(!is.na(id)) %>%
105 |     mutate(birthlat = as.numeric(birthlat),
106 |            birthlong = as.numeric(birthlong),
107 |            deathlat = as.numeric(deathlat),
108 |            deathlong = as.numeric(deathlong)
109 |     )
110 | 
111 |   return(dfTemp)
112 | }
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------