├── lookups └── .placeholder ├── results └── .placeholder ├── convert_lookups.sh ├── .gitignore ├── schemas ├── site_schema.csv ├── orgs_schema.csv └── launch_schema.csv ├── import_query.sql ├── convert.sh ├── convert_lookups.R ├── README.md ├── queries.sql └── read_db.R /lookups/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /results/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /convert_lookups.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Orgs" 4 | in2csv -f fixed -s schemas/orgs_schema.csv lookups/orgs.txt > results/orgs.csv 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | launches 2 | results 3 | lookups 4 | *.pyc 5 | .DS_Store 6 | launches.tar.gz 7 | launches.db 8 | .Rhistory 9 | sample_analysis 10 | -------------------------------------------------------------------------------- /schemas/site_schema.csv: -------------------------------------------------------------------------------- 1 | column,start,length 2 | site,1,9 3 | code,10,13 4 | ucode,23,13 5 | type,36,5 6 | state_code,41,9 7 | tstart,50,13 8 | tstop,63,13 9 | short_name,76,18 10 | name,94,81 11 | location,175,53 12 | longitude,228,13 13 | latitude,241,11 14 | error,252,7 15 | parent,259,13 16 | short_english_name,272,18 17 | english_name,290,60 18 | unicode_name,350,14 -------------------------------------------------------------------------------- /schemas/orgs_schema.csv: -------------------------------------------------------------------------------- 1 | column,start,length 2 | code,1,9 3 | ucode,10,9 4 | state_code,19,7 5 | type,26,17 6 | class,43,2 7 | tstart,45,13 8 | tstop,58,13 9 | short_name,71,18 10 | name,89,81 11 | location,170,53 12 | longitude,223,13 13 | latitude,236,11 14 | error,247,8 15 | parent,255,13 16 | short_english_name,268,17 17 | english_name,285,61 18 | unicode_name,346,229 -------------------------------------------------------------------------------- /schemas/launch_schema.csv: -------------------------------------------------------------------------------- 1 | column,start,length 2 | tag,1,15 3 | julian_date,16,11 4 | date,27,11 5 | lv_type,48,25 6 | variant,73,7 7 | flight_id,80,21 8 | flight,101,25 9 | mission,126,25 10 | flight_code,151,25 11 | platform,176,10 12 | launch_site,186,9 13 | launch_pad,195,17 14 | apogee_km,212,7 15 | apoflag,219,2 16 | range,221,5 17 | range_flag,226,2 18 | destination,228,13 19 | agency,241,13 20 | launch_code,254,5 21 | launch_group,259,25 22 | category,284,25 23 | citation1,309,21 24 | citation2,330,21 25 | notes,351,31 -------------------------------------------------------------------------------- /import_query.sql: -------------------------------------------------------------------------------- 1 | .mode csv 2 | .import results/launches.csv launches 3 | .import results/orgs.csv orgs 4 | 5 | ALTER TABLE launches ADD COLUMN year INTEGER; 6 | ALTER TABLE launches ADD COLUMN launch_type; 7 | ALTER TABLE launches ADD COLUMN launch_status; 8 | 9 | UPDATE launches SET year = CAST(SUBSTR(tag, 1, 4) AS INTEGER); 10 | UPDATE launches SET launch_type = SUBSTR(launch_code, 1, 1); 11 | UPDATE launches SET launch_status = SUBSTR(launch_code, 2, 1); 12 | 13 | CREATE UNIQUE INDEX launch_tag_index ON launches(tag); 14 | CREATE UNIQUE INDEX org_code ON orgs(code); 15 | -------------------------------------------------------------------------------- /convert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for filename in launches/*; do 4 | echo $filename 5 | in2csv -f fixed -s schemas/launch_schema.csv $filename > $filename.csv 6 | done 7 | 8 | for filename in launches/*.csv; do 9 | echo $filename 10 | sed '2 d' $filename > ${filename%.*}_stripped.csv 11 | done 12 | 13 | mkdir clean_launch 14 | 15 | for filename in launches/*_stripped.csv; do 16 | echo $filename 17 | mv $filename clean_launch/ 18 | done 19 | 20 | csvstack clean_launch/*.csv > results/launches.csv 21 | 22 | rm launches/*.csv 23 | 24 | rm clean_launch/*.csv 25 | 26 | rmdir clean_launch 27 | -------------------------------------------------------------------------------- /convert_lookups.R: -------------------------------------------------------------------------------- 1 | # === A script to replace:: === 2 | 3 | # #!/bin/bash 4 | # 5 | # echo "Orgs" 6 | # in2csv -f fixed -s schemas/org_schema.csv lookups/orgs.txt > results/orgs.csv 7 | 8 | library(tidyverse) 9 | setwd('/Users/yzhou/src/space-launches') 10 | 11 | 12 | schema <- read_csv('schemas/orgs_schema.csv') 13 | read_fwf('lookups/orgs.txt', fwf_widths(schema$length, schema$column)) %>% 14 | write_csv('results/orgs.csv') 15 | 16 | schema_sites <- read_csv('schemas/site_schema.csv') 17 | read_fwf('lookups/sites.txt', fwf_widths(schema_sites$length, schema_sites$column)) %>% 18 | write_csv('results/sites.csv') 19 | 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # space-launches 2 | 3 | Converts the [JSR Launch Vehicle database](http://www.planet4589.org/space/lvdb/index.html) into a database suitable for analysis. 4 | 5 | ## Setup 6 | 7 | Install csvkit: 8 | 9 | ``` 10 | pip install csvkit 11 | ``` 12 | 13 | Getting the files: 14 | 15 | ``` 16 | wget http://www.planet4589.org/space/lvdb/launches.tar.gz 17 | tar xvfz launches.tar.gz -C launches 18 | ``` 19 | 20 | ## Usage 21 | 22 | Convert fixed-width files into CSVs: 23 | 24 | ``` 25 | ./convert.sh 26 | ./convert_lookups.sh 27 | ``` 28 | 29 | The results can be found in the ``results`` folder. 30 | 31 | To create a local sqlite database: 32 | 33 | ``` 34 | sqlite3 launches.db < import_query.sql 35 | ``` 36 | 37 | To run the boilerplate queries: 38 | 39 | ``` 40 | sqlite3 launches.db < queries.sql 41 | ``` 42 | 43 | ## Credits 44 | 45 | * [Tim Fernholz](https://qz.com/author/tfernholz/) 46 | * [Christopher Groskopf](https://qz.com/author/chrisqz/) 47 | -------------------------------------------------------------------------------- /queries.sql: -------------------------------------------------------------------------------- 1 | .headers on 2 | .mode csv 3 | 4 | .output results/total_by_year.csv 5 | 6 | SELECT 7 | year, 8 | COUNT(*) AS launch_count 9 | FROM launches 10 | WHERE launch_type = 'O' 11 | GROUP BY year 12 | ORDER BY year; 13 | 14 | .output results/failures_by_year.csv 15 | 16 | SELECT 17 | year, 18 | COUNT(*) as total, 19 | COUNT(CASE WHEN launch_status = 'S' THEN tag END) AS success, 20 | COUNT(CASE WHEN launch_status = 'F' THEN tag END) AS failure, 21 | COUNT(CASE WHEN launch_status = 'U' THEN tag END) AS unknown 22 | FROM launches 23 | GROUP BY year 24 | ORDER BY year; 25 | 26 | .output results/success_by_year.csv 27 | 28 | SELECT 29 | year, 30 | COUNT(*) as total, 31 | COUNT(CASE WHEN launch_status = 'S' THEN tag END) AS success, 32 | COUNT(CASE WHEN launch_status = 'F' THEN tag END) AS failure, 33 | COUNT(CASE WHEN launch_status = 'U' THEN tag END) AS unknown 34 | FROM launches 35 | WHERE launch_type = 'O' 36 | GROUP BY year 37 | ORDER BY year; 38 | 39 | .output results/class_by_year.csv 40 | 41 | SELECT 42 | year, 43 | COUNT(*) as total, 44 | COUNT(CASE WHEN class = 'A' THEN tag END) AS academic, 45 | COUNT(CASE WHEN class = 'B' THEN tag END) AS business, 46 | COUNT(CASE WHEN class = 'C' THEN tag END) AS military, 47 | COUNT(CASE WHEN class = 'D' THEN tag END) AS government 48 | FROM 49 | launches, 50 | orgs 51 | WHERE launches.agency = orgs.code 52 | GROUP BY year 53 | ORDER BY year; 54 | 55 | .output results/total_by_country_by_year.csv 56 | 57 | SELECT 58 | year, 59 | COUNT(*) as total, 60 | COUNT(CASE WHEN state_code = 'US' THEN tag END) AS united_states, 61 | COUNT(CASE WHEN state_code = 'SU' OR state_code = 'RU' THEN tag END) AS russia, 62 | COUNT(CASE WHEN state_code = 'CN' THEN tag END) AS china, 63 | COUNT(CASE WHEN state_code = 'IN' THEN tag END) AS india 64 | FROM 65 | launches, 66 | orgs 67 | WHERE launches.agency = orgs.code 68 | GROUP BY year 69 | ORDER BY year; 70 | 71 | .output stdout 72 | -------------------------------------------------------------------------------- /read_db.R: -------------------------------------------------------------------------------- 1 | # library(RSQLite) 2 | # 3 | # sqlite.driver <- dbDriver("SQLite") 4 | # db <- DBI::dbConnect(sqlite.driver, dbname = "launches.db") 5 | # 6 | # tables <- dbListTables(db) 7 | # 8 | # launches <- dbReadTable(db,tables[1]) 9 | # orgs <- dbReadTable(db,tables[2]) 10 | # 11 | 12 | launches <- read_csv('results/launches.csv') 13 | orgs <- read_csv('results/orgs.csv') 14 | sites <- read_csv('results/sites.csv') 15 | 16 | 17 | launches$year <- as.integer(str_sub(launches$tag, 1,4)) 18 | launches$launch_type <- str_sub(launches$launch_code, 1,2) 19 | launches$launch_status <- str_sub(launches$launch_code, 2) 20 | sites %>% 21 | filter(tstart == '-') %>% 22 | View() 23 | 24 | # process SITES to get unique ID for siteID <> location 25 | sites$start_year = as.integer(str_sub(sites$tstart, 1,4)) 26 | 27 | sites_tomatch <- sites %>% 28 | filter(site!='#') %>% 29 | filter(!is.na(longitude)) %>% 30 | group_by(site) %>% 31 | mutate(latest_start_date = max(start_year, na.rm = T)) %>% 32 | filter(start_year == latest_start_date | is.na(start_year)) %>% # only take the site of the latest phase 33 | mutate(longitude = as.numeric(longitude), latitude = as.numeric(latitude)) %>% 34 | filter(!is.na(longitude)) %>% # take out 25 sites -- minor sites related to Zubov Expedition 35 | group_by(site, parent, location) %>% 36 | summarise(longitude = mean(longitude, na.rm=T), 37 | latitude = mean(latitude, na.rm=T) 38 | ) %>% 39 | ungroup() %>% 40 | mutate(launch_site = site) %>% 41 | select(-site) 42 | 43 | 44 | tomap <- launches %>% 45 | left_join(sites_tomatch) %>% 46 | select(year, launch_site, location, longitude, latitude, parent) %>% 47 | filter(!is.na(location)) 48 | 49 | 50 | library(mapdata) 51 | world <- map_data('world') 52 | 53 | sample_map <- ggplot()+ 54 | geom_map(data=world, map=world, 55 | aes(x=long, y=lat, group=group, map_id=region), 56 | fill="white", colour="#7f7f7f", size=0.3)+ 57 | geom_point(data = tomap, 58 | aes(x=longitude, y=latitude), color = 'red', alpha = 0.1)+ 59 | coord_fixed(1.3)+ 60 | theme_minimal() 61 | 62 | ggsave('sample_map.pdf', width=12, height = 12/1.3) 63 | 64 | 65 | # top_launchers = c('US','SU','RU','J','UK','IN','F','CA','D','AU') 66 | # launches %>% 67 | # select(year, agency) %>% 68 | # mutate(ucode = agency) %>% 69 | # left_join(orgs) %>% 70 | # group_by(name) %>% 71 | # count() %>% 72 | # arrange(desc(n)) 73 | # filter(!is.na(state_code)) %>% 74 | # filter(state_code %in% top_launchers) %>% 75 | # # group_by(state_code) %>% 76 | # # summarise(launch_count = sum(n, na.rm=T)) %>% 77 | # # arrange(desc(launch_count)) 78 | # ggplot()+ 79 | # geom_line(aes(x=year, y=n, group=state_code, color=state_code)) 80 | # 81 | # 82 | # launches %>% 83 | # select(year, agency) %>% 84 | # mutate(ucode = agency) %>% 85 | # left_join(orgs) %>% 86 | # group_by(year, class) %>% 87 | # count() %>% 88 | # filter(!is.na(class)) %>% 89 | # # filter(state_code %in% top_launchers) %>% 90 | # # group_by(class) %>% 91 | # # summarise(launch_count = sum(n, na.rm=T)) %>% 92 | # # arrange(desc(launch_count)) %>% 93 | # ggplot()+ 94 | # # geom_bar(aes(x=class,y=launch_count),stat='identity') %>% 95 | # 96 | # geom_line(aes(x=year, y=n, group=class, color=class)) 97 | # 98 | # 99 | # 100 | # double_names <- (orgs %>% 101 | # group_by(ucode, state_code) %>% 102 | # count() %>% 103 | # group_by(ucode) %>% 104 | # summarise(count = n()) %>% 105 | # filter(count >1))$ucode 106 | # 107 | # orgs %>% 108 | # group_by(ucode, state_code) %>% 109 | # count() %>% 110 | # filter(ucode %in% double_names) %>% 111 | # View() 112 | --------------------------------------------------------------------------------