├── lookups
    └── .placeholder
├── results
    └── .placeholder
├── convert_lookups.sh
├── .gitignore
├── schemas
    ├── site_schema.csv
    ├── orgs_schema.csv
    └── launch_schema.csv
├── import_query.sql
├── convert.sh
├── convert_lookups.R
├── README.md
├── queries.sql
└── read_db.R


/lookups/.placeholder:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/results/.placeholder:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/convert_lookups.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "Orgs"
4 | in2csv -f fixed -s schemas/orgs_schema.csv lookups/orgs.txt > results/orgs.csv
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | launches
 2 | results
 3 | lookups
 4 | *.pyc
 5 | .DS_Store
 6 | launches.tar.gz
 7 | launches.db
 8 | .Rhistory
 9 | sample_analysis
10 | 


--------------------------------------------------------------------------------
/schemas/site_schema.csv:
--------------------------------------------------------------------------------
 1 | column,start,length
 2 | site,1,9
 3 | code,10,13
 4 | ucode,23,13
 5 | type,36,5
 6 | state_code,41,9
 7 | tstart,50,13
 8 | tstop,63,13
 9 | short_name,76,18
10 | name,94,81
11 | location,175,53
12 | longitude,228,13
13 | latitude,241,11
14 | error,252,7
15 | parent,259,13
16 | short_english_name,272,18
17 | english_name,290,60
18 | unicode_name,350,14


--------------------------------------------------------------------------------
/schemas/orgs_schema.csv:
--------------------------------------------------------------------------------
 1 | column,start,length
 2 | code,1,9
 3 | ucode,10,9
 4 | state_code,19,7
 5 | type,26,17
 6 | class,43,2
 7 | tstart,45,13
 8 | tstop,58,13
 9 | short_name,71,18
10 | name,89,81
11 | location,170,53
12 | longitude,223,13
13 | latitude,236,11
14 | error,247,8
15 | parent,255,13
16 | short_english_name,268,17
17 | english_name,285,61
18 | unicode_name,346,229


--------------------------------------------------------------------------------
/schemas/launch_schema.csv:
--------------------------------------------------------------------------------
 1 | column,start,length
 2 | tag,1,15
 3 | julian_date,16,11
 4 | date,27,11
 5 | lv_type,48,25
 6 | variant,73,7
 7 | flight_id,80,21
 8 | flight,101,25
 9 | mission,126,25
10 | flight_code,151,25
11 | platform,176,10
12 | launch_site,186,9
13 | launch_pad,195,17
14 | apogee_km,212,7
15 | apoflag,219,2
16 | range,221,5
17 | range_flag,226,2
18 | destination,228,13
19 | agency,241,13
20 | launch_code,254,5
21 | launch_group,259,25
22 | category,284,25
23 | citation1,309,21
24 | citation2,330,21
25 | notes,351,31


--------------------------------------------------------------------------------
/import_query.sql:
--------------------------------------------------------------------------------
 1 | .mode csv
 2 | .import results/launches.csv launches
 3 | .import results/orgs.csv orgs
 4 | 
 5 | ALTER TABLE launches ADD COLUMN year INTEGER;
 6 | ALTER TABLE launches ADD COLUMN launch_type;
 7 | ALTER TABLE launches ADD COLUMN launch_status;
 8 | 
 9 | UPDATE launches SET year = CAST(SUBSTR(tag, 1, 4) AS INTEGER);
10 | UPDATE launches SET launch_type = SUBSTR(launch_code, 1, 1);
11 | UPDATE launches SET launch_status = SUBSTR(launch_code, 2, 1);
12 | 
13 | CREATE UNIQUE INDEX launch_tag_index ON launches(tag);
14 | CREATE UNIQUE INDEX org_code ON orgs(code);
15 | 


--------------------------------------------------------------------------------
/convert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for filename in launches/*; do
 4 |       echo $filename
 5 |       in2csv -f fixed -s schemas/launch_schema.csv $filename > $filename.csv
 6 | done
 7 | 
 8 | for filename in launches/*.csv; do
 9 |       echo $filename
10 |       sed '2 d' $filename > ${filename%.*}_stripped.csv
11 | done
12 | 
13 | mkdir clean_launch
14 | 
15 | for filename in launches/*_stripped.csv; do
16 |       echo $filename
17 |       mv $filename clean_launch/
18 | done
19 | 
20 | csvstack clean_launch/*.csv > results/launches.csv
21 | 
22 | rm launches/*.csv
23 | 
24 | rm clean_launch/*.csv
25 | 
26 | rmdir clean_launch
27 | 


--------------------------------------------------------------------------------
/convert_lookups.R:
--------------------------------------------------------------------------------
 1 | # === A script to replace:: === 
 2 | 
 3 | # #!/bin/bash
 4 | # 
 5 | # echo "Orgs"
 6 | # in2csv -f fixed -s schemas/org_schema.csv lookups/orgs.txt > results/orgs.csv
 7 | 
 8 | library(tidyverse)
 9 | setwd('/Users/yzhou/src/space-launches')
10 | 
11 | 
12 | schema <- read_csv('schemas/orgs_schema.csv')
13 | read_fwf('lookups/orgs.txt', fwf_widths(schema$length, schema$column)) %>%
14 |   write_csv('results/orgs.csv')
15 | 
16 | schema_sites <- read_csv('schemas/site_schema.csv')
17 | read_fwf('lookups/sites.txt', fwf_widths(schema_sites$length, schema_sites$column)) %>%
18 |   write_csv('results/sites.csv')
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # space-launches
 2 | 
 3 | Converts the [JSR Launch Vehicle database](http://www.planet4589.org/space/lvdb/index.html) into a database suitable for analysis.
 4 | 
 5 | ## Setup
 6 | 
 7 | Install csvkit:
 8 | 
 9 | ```
10 | pip install csvkit
11 | ```
12 | 
13 | Getting the files:
14 | 
15 | ```
16 | wget http://www.planet4589.org/space/lvdb/launches.tar.gz
17 | tar xvfz launches.tar.gz -C launches
18 | ```
19 | 
20 | ## Usage
21 | 
22 | Convert fixed-width files into CSVs:
23 | 
24 | ```
25 | ./convert.sh
26 | ./convert_lookups.sh
27 | ```
28 | 
29 | The results can be found in the ``results`` folder.
30 | 
31 | To create a local sqlite database:
32 | 
33 | ```
34 | sqlite3 launches.db < import_query.sql
35 | ```
36 | 
37 | To run the boilerplate queries:
38 | 
39 | ```
40 | sqlite3 launches.db < queries.sql
41 | ```
42 | 
43 | ## Credits
44 | 
45 | * [Tim Fernholz](https://qz.com/author/tfernholz/)
46 | * [Christopher Groskopf](https://qz.com/author/chrisqz/)
47 | 


--------------------------------------------------------------------------------
/queries.sql:
--------------------------------------------------------------------------------
 1 | .headers on
 2 | .mode csv
 3 | 
 4 | .output results/total_by_year.csv
 5 | 
 6 | SELECT
 7 |     year,
 8 |     COUNT(*) AS launch_count
 9 | FROM launches
10 | WHERE launch_type = 'O'
11 | GROUP BY year
12 | ORDER BY year;
13 | 
14 | .output results/failures_by_year.csv
15 | 
16 | SELECT
17 |     year,
18 |     COUNT(*) as total,
19 |     COUNT(CASE WHEN launch_status = 'S' THEN tag END) AS success,
20 |     COUNT(CASE WHEN launch_status = 'F' THEN tag END) AS failure,
21 |     COUNT(CASE WHEN launch_status = 'U' THEN tag END) AS unknown
22 | FROM launches
23 | GROUP BY year
24 | ORDER BY year;
25 | 
26 | .output results/success_by_year.csv
27 | 
28 | SELECT
29 |     year,
30 |     COUNT(*) as total,
31 |     COUNT(CASE WHEN launch_status = 'S' THEN tag END) AS success,
32 |     COUNT(CASE WHEN launch_status = 'F' THEN tag END) AS failure,
33 |     COUNT(CASE WHEN launch_status = 'U' THEN tag END) AS unknown
34 | FROM launches
35 | WHERE launch_type = 'O'
36 | GROUP BY year
37 | ORDER BY year;
38 | 
39 | .output results/class_by_year.csv
40 | 
41 | SELECT
42 |     year,
43 |     COUNT(*) as total,
44 |     COUNT(CASE WHEN class = 'A' THEN tag END) AS academic,
45 |     COUNT(CASE WHEN class = 'B' THEN tag END) AS business,
46 |     COUNT(CASE WHEN class = 'C' THEN tag END) AS military,
47 |     COUNT(CASE WHEN class = 'D' THEN tag END) AS government
48 | FROM
49 |     launches,
50 |     orgs
51 | WHERE launches.agency = orgs.code
52 | GROUP BY year
53 | ORDER BY year;
54 | 
55 | .output results/total_by_country_by_year.csv
56 | 
57 | SELECT
58 |     year,
59 |     COUNT(*) as total,
60 |     COUNT(CASE WHEN state_code = 'US' THEN tag END) AS united_states,
61 |     COUNT(CASE WHEN state_code = 'SU' OR state_code = 'RU' THEN tag END) AS russia,
62 |     COUNT(CASE WHEN state_code = 'CN' THEN tag END) AS china,
63 |     COUNT(CASE WHEN state_code = 'IN' THEN tag END) AS india
64 | FROM
65 |     launches,
66 |     orgs
67 | WHERE launches.agency = orgs.code
68 | GROUP BY year
69 | ORDER BY year;
70 | 
71 | .output stdout
72 | 


--------------------------------------------------------------------------------
/read_db.R:
--------------------------------------------------------------------------------
  1 | # library(RSQLite)
  2 | # 
  3 | # sqlite.driver <- dbDriver("SQLite")
  4 | # db <- DBI::dbConnect(sqlite.driver, dbname = "launches.db")
  5 | # 
  6 | # tables <- dbListTables(db)
  7 | # 
  8 | # launches <- dbReadTable(db,tables[1])
  9 | # orgs <- dbReadTable(db,tables[2])
 10 | # 
 11 | 
 12 | launches <- read_csv('results/launches.csv')
 13 | orgs <- read_csv('results/orgs.csv')
 14 | sites <- read_csv('results/sites.csv')
 15 | 
 16 | 
 17 | launches$year <- as.integer(str_sub(launches$tag, 1,4)) 
 18 | launches$launch_type <- str_sub(launches$launch_code, 1,2) 
 19 | launches$launch_status <- str_sub(launches$launch_code, 2) 
 20 | sites %>%
 21 |   filter(tstart == '-') %>%
 22 |   View()
 23 | 
 24 | # process SITES to get unique ID for siteID <> location
 25 | sites$start_year = as.integer(str_sub(sites$tstart, 1,4))
 26 | 
 27 | sites_tomatch <- sites %>%
 28 |   filter(site!='#') %>%
 29 |   filter(!is.na(longitude)) %>%
 30 |   group_by(site) %>%
 31 |   mutate(latest_start_date = max(start_year, na.rm = T)) %>%
 32 |   filter(start_year == latest_start_date | is.na(start_year)) %>% # only take the site of the latest phase
 33 |   mutate(longitude = as.numeric(longitude), latitude = as.numeric(latitude)) %>%
 34 |   filter(!is.na(longitude)) %>% # take out 25 sites -- minor sites related to Zubov Expedition
 35 |   group_by(site, parent, location) %>%
 36 |   summarise(longitude = mean(longitude, na.rm=T), 
 37 |             latitude = mean(latitude, na.rm=T)
 38 |             ) %>%
 39 |   ungroup() %>%
 40 |   mutate(launch_site = site) %>%
 41 |   select(-site)
 42 | 
 43 | 
 44 | tomap <- launches %>%
 45 |   left_join(sites_tomatch) %>%
 46 |   select(year, launch_site, location, longitude, latitude, parent) %>%
 47 |   filter(!is.na(location))
 48 |   
 49 | 
 50 | library(mapdata)
 51 | world <- map_data('world')
 52 | 
 53 | sample_map <- ggplot()+ 
 54 |   geom_map(data=world, map=world,
 55 |            aes(x=long, y=lat, group=group, map_id=region),
 56 |            fill="white", colour="#7f7f7f", size=0.3)+
 57 |   geom_point(data = tomap,
 58 |            aes(x=longitude, y=latitude), color = 'red', alpha = 0.1)+
 59 |   coord_fixed(1.3)+
 60 |   theme_minimal()
 61 | 
 62 | ggsave('sample_map.pdf', width=12, height = 12/1.3)
 63 |   
 64 | 
 65 | # top_launchers = c('US','SU','RU','J','UK','IN','F','CA','D','AU')
 66 | # launches %>%
 67 | #   select(year, agency) %>%
 68 | #   mutate(ucode = agency) %>%
 69 | #   left_join(orgs) %>%
 70 | #   group_by(name) %>%
 71 | #   count() %>%
 72 | #   arrange(desc(n))
 73 | #   filter(!is.na(state_code)) %>%
 74 | #   filter(state_code %in% top_launchers) %>%
 75 | #   # group_by(state_code) %>%
 76 | #   # summarise(launch_count = sum(n, na.rm=T)) %>%
 77 | #   # arrange(desc(launch_count))
 78 | #   ggplot()+
 79 | #   geom_line(aes(x=year, y=n, group=state_code, color=state_code))
 80 | # 
 81 | # 
 82 | # launches %>%
 83 | #   select(year, agency) %>%
 84 | #   mutate(ucode = agency) %>%
 85 | #   left_join(orgs) %>%
 86 | #   group_by(year, class) %>%
 87 | #   count() %>%
 88 | #   filter(!is.na(class)) %>%
 89 | #   # filter(state_code %in% top_launchers) %>%
 90 | #   # group_by(class) %>%
 91 | #   # summarise(launch_count = sum(n, na.rm=T)) %>%
 92 | #   # arrange(desc(launch_count)) %>%
 93 | #   ggplot()+
 94 | #   # geom_bar(aes(x=class,y=launch_count),stat='identity') %>%
 95 | #   
 96 | #   geom_line(aes(x=year, y=n, group=class, color=class))
 97 | # 
 98 | # 
 99 | # 
100 | # double_names <- (orgs %>%
101 | #   group_by(ucode, state_code) %>%
102 | #   count() %>%
103 | #   group_by(ucode) %>%
104 | #   summarise(count = n()) %>%
105 | #   filter(count >1))$ucode
106 | # 
107 | # orgs %>%
108 | #   group_by(ucode, state_code) %>%
109 | #   count() %>%
110 | #   filter(ucode %in% double_names) %>%
111 | #   View()
112 | 


--------------------------------------------------------------------------------