├── .gitignore ├── Cargo.toml ├── Makefile ├── NOTES.md ├── README.md ├── cli ├── Cargo.toml └── src │ ├── datasette.rs │ ├── main.rs │ └── markdown.rs ├── core ├── Cargo.toml └── src │ └── lib.rs ├── examples ├── congress │ ├── Makefile │ ├── congress.sql │ └── docs.md ├── nyc-taxi │ └── schema.sql └── simple │ ├── Makefile │ ├── docs.md │ └── schema.sql └── extension ├── Cargo.toml └── src ├── lib.rs └── tables.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | 4 | *.db 5 | dist/ 6 | *.parquet 7 | examples/**/metadata.json 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "cli", 5 | "core", 6 | "extension", 7 | ] 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=$(shell cat VERSION) 2 | 3 | ifeq ($(shell uname -s),Darwin) 4 | CONFIG_DARWIN=y 5 | else ifeq ($(OS),Windows_NT) 6 | CONFIG_WINDOWS=y 7 | else 8 | CONFIG_LINUX=y 9 | endif 10 | 11 | LIBRARY_PREFIX=lib 12 | ifdef CONFIG_DARWIN 13 | LOADABLE_EXTENSION=dylib 14 | endif 15 | 16 | ifdef CONFIG_LINUX 17 | LOADABLE_EXTENSION=so 18 | endif 19 | 20 | 21 | ifdef CONFIG_WINDOWS 22 | LOADABLE_EXTENSION=dll 23 | LIBRARY_PREFIX= 24 | endif 25 | 26 | prefix=dist 27 | TARGET_LOADABLE=$(prefix)/debug/docs0.$(LOADABLE_EXTENSION) 28 | TARGET_LOADABLE_RELEASE=$(prefix)/release/docs0.$(LOADABLE_EXTENSION) 29 | 30 | TARGET_STATIC=$(prefix)/debug/docs0.a 31 | TARGET_STATIC_RELEASE=$(prefix)/release/docs0.a 32 | 33 | TARGET_CLI=$(prefix)/debug/sqlite-docs 34 | TARGET_CLI_RELEASE=$(prefix)/release/sqlite-docs 35 | 36 | TARGET_WHEELS=$(prefix)/debug/wheels 37 | TARGET_WHEELS_RELEASE=$(prefix)/release/wheels 38 | 39 | INTERMEDIATE_PYPACKAGE_EXTENSION=python/sqlite_docs/sqlite_docs/docs0.$(LOADABLE_EXTENSION) 40 | 41 | ifdef target 42 | CARGO_TARGET=--target=$(target) 43 | BUILT_LOCATION=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_docs.$(LOADABLE_EXTENSION) 44 | BUILT_LOCATION_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_docs.$(LOADABLE_EXTENSION) 45 | BUILT_CLI=target/$(target)/debug/sqlite-docs-cli 46 | BUILT_CLI_RELEASE=target/$(target)/release/sqlite-docs-cli 47 | else 48 | CARGO_TARGET= 49 | BUILT_LOCATION=target/debug/$(LIBRARY_PREFIX)sqlite_docs.$(LOADABLE_EXTENSION) 50 | BUILT_LOCATION_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_docs.$(LOADABLE_EXTENSION) 51 | BUILT_CLI=target/debug/sqlite-docs-cli 52 | BUILT_CLI_RELEASE=target/release/sqlite-docs-cli 53 | endif 54 | 55 | ifdef python 56 | PYTHON=$(python) 57 | else 58 | PYTHON=python3 59 | endif 60 | 61 | ifdef IS_MACOS_ARM 62 | RENAME_WHEELS_ARGS=--is-macos-arm 63 | else 64 | RENAME_WHEELS_ARGS= 65 | endif 66 | 67 | $(prefix): 68 | mkdir -p $(prefix)/debug 69 | mkdir -p $(prefix)/release 70 | 71 | $(TARGET_WHEELS): $(prefix) 72 | mkdir -p $(TARGET_WHEELS) 73 | 74 | $(TARGET_WHEELS_RELEASE): $(prefix) 75 | mkdir -p $(TARGET_WHEELS_RELEASE) 76 | 77 | $(TARGET_LOADABLE): $(prefix) $(shell find . -type f -name '*.rs') 78 | cargo build $(CARGO_TARGET) 79 | cp $(BUILT_LOCATION) $@ 80 | 81 | $(TARGET_LOADABLE_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 82 | cargo build --release $(CARGO_TARGET) 83 | cp $(BUILT_LOCATION_RELEASE) $@ 84 | 85 | $(TARGET_CLI): $(prefix) $(shell find . -type f -name '*.rs') 86 | cargo build $(CARGO_TARGET) 87 | cp $(BUILT_CLI) $@ 88 | 89 | $(TARGET_CLI_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') 90 | cargo build --release $(CARGO_TARGET) 91 | cp $(BUILT_CLI_RELEASE) $@ 92 | 93 | python: $(TARGET_WHEELS) $(TARGET_LOADABLE) python/sqlite_docs/setup.py python/sqlite_docs/sqlite_docs/__init__.py .github/workflows/rename-wheels.py 94 | cp $(TARGET_LOADABLE) $(INTERMEDIATE_PYPACKAGE_EXTENSION) 95 | rm $(TARGET_WHEELS)/sqlite_docs* || true 96 | pip3 wheel python/sqlite_docs/ -w $(TARGET_WHEELS) 97 | python3 .github/workflows/rename-wheels.py $(TARGET_WHEELS) $(RENAME_WHEELS_ARGS) 98 | 99 | python-release: $(TARGET_LOADABLE_RELEASE) $(TARGET_WHEELS_RELEASE) python/sqlite_docs/setup.py python/sqlite_docs/sqlite_docs/__init__.py .github/workflows/rename-wheels.py 100 | cp $(TARGET_LOADABLE_RELEASE) $(INTERMEDIATE_PYPACKAGE_EXTENSION) 101 | rm $(TARGET_WHEELS_RELEASE)/sqlite_docs* || true 102 | pip3 wheel python/sqlite_docs/ -w $(TARGET_WHEELS_RELEASE) 103 | python3 .github/workflows/rename-wheels.py $(TARGET_WHEELS_RELEASE) $(RENAME_WHEELS_ARGS) 104 | 105 | datasette: $(TARGET_WHEELS) python/datasette_sqlite_docs/setup.py python/datasette_sqlite_docs/datasette_sqlite_docs/__init__.py 106 | rm $(TARGET_WHEELS)/datasette* || true 107 | pip3 wheel python/datasette_sqlite_docs/ --no-deps -w $(TARGET_WHEELS) 108 | 109 | datasette-release: $(TARGET_WHEELS_RELEASE) python/datasette_sqlite_docs/setup.py python/datasette_sqlite_docs/datasette_sqlite_docs/__init__.py 110 | rm $(TARGET_WHEELS_RELEASE)/datasette* || true 111 | pip3 wheel python/datasette_sqlite_docs/ --no-deps -w $(TARGET_WHEELS_RELEASE) 112 | 113 | npm: VERSION npm/platform-package.README.md.tmpl npm/platform-package.package.json.tmpl npm/sqlite-docs/package.json.tmpl scripts/npm_generate_platform_packages.sh 114 | scripts/npm_generate_platform_packages.sh 115 | 116 | deno: VERSION deno/deno.json.tmpl 117 | scripts/deno_generate_package.sh 118 | 119 | Cargo.toml: VERSION 120 | cargo set-version `cat VERSION` 121 | 122 | python/sqlite_docs/sqlite_docs/version.py: VERSION 123 | printf '__version__ = "%s"\n__version_info__ = tuple(__version__.split("."))\n' `cat VERSION` > $@ 124 | 125 | python/datasette_sqlite_docs/datasette_sqlite_docs/version.py: VERSION 126 | printf '__version__ = "%s"\n__version_info__ = tuple(__version__.split("."))\n' `cat VERSION` > $@ 127 | 128 | bindings/ruby/lib/version.rb: bindings/ruby/lib/version.rb.tmpl VERSION 129 | VERSION=$(VERSION) envsubst < $< > $@ 130 | 131 | ruby: bindings/ruby/lib/version.rb 132 | 133 | version: 134 | make Cargo.toml 135 | make python/sqlite_docs/sqlite_docs/version.py 136 | make python/datasette_sqlite_docs/datasette_sqlite_docs/version.py 137 | make npm 138 | make deno 139 | make ruby 140 | 141 | 142 | format: 143 | cargo fmt 144 | 145 | sqlite-docs.h: cbindgen.toml 146 | rustup run nightly cbindgen --config $< -o $@ 147 | 148 | release: $(TARGET_LOADABLE_RELEASE) $(TARGET_STATIC_RELEASE) 149 | 150 | loadable: $(TARGET_LOADABLE) 151 | loadable-release: $(TARGET_LOADABLE_RELEASE) 152 | 153 | static: $(TARGET_STATIC) 154 | static-release: $(TARGET_STATIC_RELEASE) 155 | 156 | loadable: $(TARGET_CLI) 157 | loadable-release: $(TARGET_CLI_RELEASE) 158 | 159 | debug: loadable static python datasette 160 | release: loadable-release static-release python-release datasette-release 161 | 162 | clean: 163 | rm dist/* 164 | cargo clean 165 | 166 | test-loadable: 167 | $(PYTHON) tests/test-loadable.py 168 | 169 | test-python: 170 | $(PYTHON) tests/test-python.py 171 | 172 | test-npm: 173 | node npm/sqlite-docs/test.js 174 | 175 | test-deno: 176 | deno task --config deno/deno.json test 177 | 178 | test: 179 | make test-loadable 180 | make test-python 181 | make test-npm 182 | make test-deno 183 | 184 | publish-release: 185 | ./scripts/publish_release.sh 186 | 187 | .PHONY: clean \ 188 | test test-loadable test-python test-npm test-deno \ 189 | loadable loadable-release \ 190 | python python-release \ 191 | datasette datasette-release \ 192 | static static-release \ 193 | debug release \ 194 | format version publish-release \ 195 | npm deno ruby 196 | -------------------------------------------------------------------------------- /NOTES.md: -------------------------------------------------------------------------------- 1 | - doc query: table with all 2 | - xinfo: type, notnull, default, primary key, hidden, generated 3 | - foreign_key_list: FK outbound 4 | - foreign_key_list: FK inboud 5 | - doc: description, examples, source? 6 | - generated markdown 7 | - primary key distiction 8 | - other metadata 9 | - tables or headings? 10 | - examples 11 | - core: lexer+parser 12 | 13 | FKS: 14 | 15 | ```sql 16 | .mode box 17 | .header on 18 | 19 | select name from pragma_table_list; 20 | select * from pragma_foreign_key_list('offices'); 21 | select 22 | pragma_table_list.name, 23 | fk.[from], 24 | fk.[table], 25 | fk.[to] 26 | from pragma_table_list 27 | join pragma_foreign_key_list(pragma_table_list.name) as fk; 28 | select 29 | pragma_table_list.name, 30 | fk.[from], 31 | fk.[table], 32 | fk.[to] 33 | from pragma_table_list 34 | full outer join pragma_foreign_key_list(pragma_table_list.name) as fk; 35 | 36 | with tables as ( 37 | select 38 | pragma_table_list.name, 39 | json_group_array(fk.[table]) filter (where fk.[table] is not null) as dep_tables 40 | from pragma_table_list 41 | full outer join pragma_foreign_key_list(pragma_table_list.name) as fk 42 | group by 1 43 | ) 44 | select * 45 | from tables 46 | order by json_array_length(dep_tables), 1; 47 | 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `sqlite-docs` 2 | 3 | A SQLite extension, CLI, and library for documentating SQLite tables, columns, and extensions. 4 | 5 | **Warning** 6 | 7 | > `sqlite-docs` is still young and not fully developed. Watch this repo or [subscribe to my newsletter](https://buttondown.email/alexgarcia) for updates. Also consider [sponsoring my work](https://github.com/sponsors/asg017) if you want this project to release earlier! 8 | 9 | `sqlite-docs` work with "SQLite doc comments" inside `CREATE TABLE` statements, similar to [Rust doc comments](https://doc.rust-lang.org/reference/comments.html#doc-comments), [Go Doc Comments](https://tip.golang.org/doc/comment), and [JSDoc](https://jsdoc.app/). 10 | 11 | Single-line comments that start with an exclamation point (`--!`) are comments and descriptions for the entire table. Single line comments that start with a dash (`---`) are comments and description for the following column. 12 | 13 | ```sql 14 | create table students( 15 | --! All students that attend Foo University. One row per enrolled 16 | --! student, active and historial. 17 | 18 | --- ID assigned to the student at orientation. 19 | --- @details https://foo.edu/students/id-format.html 20 | --- @example 'S10483' 21 | student_id text primary key, 22 | 23 | --- Full name of the student, includes first and last name. 24 | --- @example 'Alex Garcia' 25 | name text, 26 | 27 | --- Birthday of the student, in YYYY-MM-DD format. 28 | --- @example '1970-01-01' 29 | birthdate date, 30 | 31 | --- Number of course units the student has completed, since 32 | -- the last completed academic quarter. 33 | -- @example 62.5 34 | completed_units float 35 | ); 36 | ``` 37 | 38 | Once the tables in your database are documented, you can use the `sqlite-docs` CLI to generate documentation in different formats. Here's an example of generating a markdown data dictionary file: 39 | 40 | ```bash 41 | sqlite-docs generate-markdown my.db -o README.md 42 | ``` 43 | 44 | Or a [Datasette `metadata.json`](https://docs.datasette.io/en/stable/metadata.html): 45 | 46 | ```bash 47 | sqlite-docs generate-datasette my.db -o metadata.json 48 | datasette my.db --metadata metadata.json 49 | ``` 50 | 51 | You'll then find the table and column descriptions in a few places in Datasette's UI. 52 | 53 | | | | 54 | | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | 55 | | | | 56 | 57 | ## Specification 58 | 59 | SQLite doc comments support a few "tags", as denoted by the `@` sign at the beginning of the comment. These include: 60 | 61 | | Tag | Supported for | Description | 62 | | ---------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------- | 63 | | `@example` | Columns | A SQLite literal of an example non-null value the column can contain. | 64 | | `@value` | Columns | A possible non-null value that a column could be. Meant to exhaustively list all enum options a column has | 65 | | `@details` | Columns, Tables | A link or instructions to find more information about the format/contents of a column, such as ID formats, datetime formats, etc. | 66 | | `@source` | Tables | A link or instructions to the raw source of the data backing at able. | 67 | | `@schema` | Columns | | 68 | 69 | ## Limitations 70 | 71 | ### No `ALTER TABLE` support 72 | 73 | SQLite will by persist comments in `CREATE TABLE` statements, in the `sqlite_master` table. 74 | 75 | However, some `ALTER TABLE` statements will erase or modify these comments, such as with `ALTER TABLE x ADD COLUMN`, `ALTER TABLE x REMOVE COLUMN`, and more. 76 | 77 | If you think your table will need `ALTER TABLE` statements in the future, then know that `sqlite-docs` comments may break. You can always create a new table with your new schema, import data from the old table to the new one, then delete the old table. 78 | 79 | ### `sqlite-docs` tools use a weak parser 80 | 81 | `sqlite-docs` uses heuristics and simple line-by-line analysis to extract doc comments. It does not fully parse `CREATE TABLE` statements or use an AST. This means that certain valid SQL will not work with `sqlite-docs` including: 82 | 83 | - Column names with spaces or newline characters 84 | - Column definitions that span multiple lines 85 | - Multi-line comments `/* ... */` 86 | - `FOREIGN KEY` and other constraints 87 | 88 | In the far future `sqlite-docs` will be able to handle all of these cases. If you'd like to see this happen sooner, consider [sponsoring my work](https://github.com/sponsors/asg017). 89 | 90 | ### `sqlite-docs` is early in development 91 | 92 | It'll get better and easier to use! `sqlite-docs` will most likely be distributed as: 93 | 94 | - A CLI for major platforms (MacOS, Linux, Windows etc.) 95 | - A SQLite extension in the [`sqlite-ecosystem`](https://github.com/asg017/sqlite-ecosystem) 96 | - A Datasette plugin 97 | - A high-level Python library for parsing indivudal statements 98 | - A low-level Rust library for parsing individual statements 99 | -------------------------------------------------------------------------------- /cli/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-docs-cli" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rusqlite = { version = "0.29.0", features=["bundled"] } 8 | sqlite-docs-extension = { path = "../extension" } 9 | sqlite-docs-core = { path = "../core" } 10 | serde_json = "1" 11 | serde = "1" 12 | clap = {version="4.1.8", features=["derive"]} 13 | anyhow = "1.0" 14 | indexmap = { version = "2.0.0", features=["serde"] } 15 | -------------------------------------------------------------------------------- /cli/src/datasette.rs: -------------------------------------------------------------------------------- 1 | use indexmap::IndexMap; 2 | use serde::{Deserialize, Serialize}; 3 | use sqlite_docs_core::TableDocs; 4 | 5 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] 6 | pub struct DatasetteMetadata { 7 | pub databases: IndexMap, 8 | } 9 | 10 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] 11 | pub struct DatasetteMetadataDatabase { 12 | tables: IndexMap, 13 | } 14 | 15 | #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] 16 | pub struct DatasetteMetadataTable { 17 | description: Option, 18 | columns: IndexMap, 19 | } 20 | 21 | pub fn generate_metadata(database_name: &str, tables: Vec) -> DatasetteMetadata { 22 | let mut databases = IndexMap::new(); 23 | let database = DatasetteMetadataDatabase { 24 | tables: tables 25 | .into_iter() 26 | .map(|table| { 27 | let columns = table 28 | .columns 29 | .into_iter() 30 | .map(|column| { 31 | ( 32 | column.name, 33 | column.contents.unwrap_or_else(|| "".to_owned()), 34 | ) 35 | }) 36 | .collect(); 37 | 38 | ( 39 | table.name, 40 | DatasetteMetadataTable { 41 | description: table.table_level, 42 | columns, 43 | }, 44 | ) 45 | }) 46 | .collect(), 47 | }; 48 | databases.insert(database_name.to_owned(), database); 49 | DatasetteMetadata { databases } 50 | } 51 | -------------------------------------------------------------------------------- /cli/src/main.rs: -------------------------------------------------------------------------------- 1 | mod datasette; 2 | mod markdown; 3 | 4 | use std::io::Write; 5 | 6 | use anyhow::{anyhow, Context, Result}; 7 | use clap::{Arg, ArgMatches, Command}; 8 | use rusqlite::{ffi::sqlite3_auto_extension, Connection}; 9 | use sqlite_docs_core::TableDocs; 10 | use sqlite_docs_extension::sqlite3_docs_init; 11 | 12 | fn command() -> Command { 13 | Command::new("sqlite-docs") 14 | .version(env!("CARGO_PKG_VERSION")) 15 | .author("Alex Garcia") 16 | .about("Documentation for SQLite tables, comments, and extensions") 17 | .subcommand( 18 | Command::new("generate-datasette") 19 | .about("Generate a metadata.json for Datasette") 20 | .arg(Arg::new("db").required(true)) 21 | .arg(Arg::new("output").alias("o").required(false)), 22 | ) 23 | .subcommand( 24 | Command::new("generate-markdown") 25 | .about("Generate a markdown-formatted data dictionary") 26 | .arg(Arg::new("db").required(true)) 27 | .arg(Arg::new("output").alias("o").required(false)), 28 | ) 29 | } 30 | 31 | fn execute_matches(matches: &ArgMatches) -> Result<()> { 32 | match matches.subcommand() { 33 | Some(("generate-datasette", matches)) => { 34 | let db_path = matches 35 | .get_one::("db") 36 | .context("db is a required argument")?; 37 | let db = Connection::open(db_path)?; 38 | let tables = table_docs_from(&db)?; 39 | let metadata = datasette::generate_metadata( 40 | &std::path::Path::new(db_path) 41 | .file_stem() 42 | .unwrap() 43 | .to_string_lossy(), 44 | tables, 45 | ); 46 | let mut w: Box = match matches.get_one::("output") { 47 | Some(output_path) => Box::new(std::fs::File::open(output_path)?), 48 | None => Box::new(std::io::stdout()), 49 | }; 50 | 51 | writeln!(w, "{}", serde_json::to_string_pretty(&metadata)?)?; 52 | Ok(()) 53 | } 54 | Some(("generate-markdown", matches)) => { 55 | let db_path = matches 56 | .get_one::("db") 57 | .context("db is a required argument")?; 58 | let db = Connection::open(db_path)?; 59 | let tables = table_docs_from(&db)?; 60 | let markdown = markdown::generate_markdown(tables)?; 61 | 62 | let mut w: Box = match matches.get_one::("output") { 63 | Some(output_path) => Box::new(std::fs::File::open(output_path)?), 64 | None => Box::new(std::io::stdout()), 65 | }; 66 | 67 | writeln!(w, "{}", markdown)?; 68 | Ok(()) 69 | } 70 | Some((cmd, _matches)) => Err(anyhow!("unknown subcommand {cmd}")), 71 | None => Ok(()), 72 | } 73 | } 74 | 75 | fn table_docs_from(db: &Connection) -> Result> { 76 | let mut stmt = db 77 | .prepare("select name, docs, columns from sqlite_docs_tables;") 78 | .unwrap(); 79 | let tables: Vec = stmt 80 | .query_map([], |row| { 81 | Ok(TableDocs { 82 | name: row.get(0)?, 83 | table_level: row.get(1)?, 84 | columns: serde_json::from_str(&row.get::(2)?).unwrap(), 85 | }) 86 | })? 87 | .collect::, _>>()?; 88 | Ok(tables) 89 | } 90 | 91 | fn main() -> Result<()> { 92 | unsafe { 93 | sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_docs_init as *const ()))); 94 | } 95 | let command = command(); 96 | let result = execute_matches(&command.get_matches()); 97 | if result.is_err() { 98 | println!("{:?}", result); 99 | std::process::exit(1); 100 | } 101 | Ok(()) 102 | } 103 | -------------------------------------------------------------------------------- /cli/src/markdown.rs: -------------------------------------------------------------------------------- 1 | use sqlite_docs_core::TableDocs; 2 | //use std::io::{Result, Write}; 3 | use anyhow::Result; 4 | use std::fmt::Write; 5 | 6 | pub fn generate_markdown(tables: Vec) -> Result { 7 | let mut md = String::new(); 8 | for table in tables { 9 | writeln!(md, "## {}", table.name)?; 10 | writeln!(md,)?; 11 | writeln!(md, "{}", table.table_level.unwrap_or_else(|| "".to_owned()))?; 12 | writeln!(md,)?; 13 | writeln!(md, "| Column | Description")?; 14 | writeln!(md, "|-|-")?; 15 | for column in table.columns { 16 | write!(md, "|{}", column.name)?; 17 | write!(md, "|{}", column.contents.unwrap_or_else(|| "".to_owned()))?; 18 | writeln!(md,)?; 19 | } 20 | writeln!(md,)?; 21 | } 22 | Ok(md) 23 | } 24 | -------------------------------------------------------------------------------- /core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-docs-core" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | serde = "1.0" 8 | -------------------------------------------------------------------------------- /core/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::vec; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] 6 | pub struct TableDocs { 7 | pub name: String, 8 | pub table_level: Option, 9 | pub columns: Vec, 10 | } 11 | impl TableDocs { 12 | pub fn from_sql(sql: &str, name: &str) -> Self { 13 | let mut table_level = String::new(); 14 | let mut current_column_doc = String::new(); 15 | let mut current_column_example = String::new(); 16 | let mut columns = vec![]; 17 | 18 | for (line_idx, line) in sql.lines().enumerate() { 19 | // skip first "CREATE TABLE" line 20 | if line_idx == 0 { 21 | continue; 22 | } 23 | let line = line.trim_start(); 24 | 25 | // table-level doc comment 26 | if let Some(doc_line) = line.strip_prefix("--!") { 27 | // empty "--! " lines should act as a line break 28 | if doc_line.trim().is_empty() { 29 | table_level.push('\n'); 30 | } else { 31 | table_level.push_str(doc_line); 32 | if !doc_line.ends_with(' ') { 33 | table_level.push(' ') 34 | } 35 | } 36 | } 37 | // column-level doc comment 38 | else if let Some(column_doc_line) = line.strip_prefix("--- ") { 39 | // empty "--- " lines should act as a line break 40 | if column_doc_line.trim().is_empty() { 41 | current_column_doc.push('\n'); 42 | } 43 | // column doc comments with "@example" tags should act as a comment 44 | else if let Some(example) = column_doc_line.strip_prefix("@example") { 45 | current_column_example = example.to_string(); 46 | } else if let Some(_details) = column_doc_line.strip_prefix("@details") { 47 | // TODO 48 | } else { 49 | if !current_column_doc.is_empty() && !current_column_doc.ends_with(' ') { 50 | current_column_doc.push(' ') 51 | } 52 | current_column_doc.push_str(column_doc_line); 53 | } 54 | } 55 | // skip other kind of comments 56 | else if line.starts_with("-- ") { 57 | continue; 58 | } 59 | // assume this line is a SQL column definition, so just get the first "word" 60 | // and assume it's the column name 61 | else if let Some((column_name, _)) = line.split_once(' ') { 62 | columns.push(ColumnDocs { 63 | name: column_name.to_owned(), 64 | contents: if current_column_doc.is_empty() { 65 | None 66 | } else { 67 | Some(current_column_doc.trim().to_owned()) 68 | }, 69 | example: if current_column_example.is_empty() { 70 | None 71 | } else { 72 | Some(current_column_example.trim().to_owned()) 73 | }, 74 | }); 75 | current_column_doc.clear(); 76 | current_column_example.clear(); 77 | } 78 | } 79 | let table_level = if table_level.is_empty() { 80 | None 81 | } else { 82 | Some(table_level.trim().to_owned()) 83 | }; 84 | Self { 85 | name: name.to_owned(), 86 | table_level, 87 | columns, 88 | } 89 | } 90 | } 91 | 92 | #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] 93 | pub struct ColumnDocs { 94 | pub name: String, 95 | pub contents: Option, 96 | pub example: Option, 97 | //pub extra_tags: HashMap, 98 | } 99 | 100 | #[cfg(test)] 101 | mod tests { 102 | use super::*; 103 | #[test] 104 | fn yo() { 105 | assert_eq!( 106 | TableDocs::from_sql( 107 | "create table x( 108 | a int 109 | )", 110 | "x" 111 | ), 112 | TableDocs { 113 | name: "x".to_owned(), 114 | table_level: None, 115 | columns: vec![ColumnDocs { 116 | name: "a".to_string(), 117 | contents: None, 118 | example: None, 119 | }], 120 | } 121 | ); 122 | assert_eq!( 123 | TableDocs::from_sql( 124 | "create table x( 125 | --! doc! 126 | --! doc2! 127 | a int 128 | )", 129 | "x" 130 | ), 131 | TableDocs { 132 | name: "x".to_owned(), 133 | table_level: Some("doc! doc2!".to_string()), 134 | columns: vec![ColumnDocs { 135 | name: "a".to_string(), 136 | contents: None, 137 | example: None, 138 | }], 139 | } 140 | ); 141 | assert_eq!( 142 | TableDocs::from_sql( 143 | "create table x( 144 | --- a random integer 145 | --- @example 1234 146 | -- not included 147 | a int 148 | )", 149 | "x" 150 | ), 151 | TableDocs { 152 | name: "x".to_owned(), 153 | table_level: None, 154 | columns: vec![ColumnDocs { 155 | name: "a".to_string(), 156 | contents: Some("a random integer".to_string()), 157 | example: Some("1234".to_owned()), 158 | }], 159 | } 160 | ); 161 | assert_eq!( 162 | TableDocs::from_sql( 163 | "create table x( 164 | --- a random integer 165 | --- 2nd line 166 | a int 167 | )", 168 | "x" 169 | ), 170 | TableDocs { 171 | name: "x".to_owned(), 172 | table_level: None, 173 | columns: vec![ColumnDocs { 174 | name: "a".to_string(), 175 | contents: Some("a random integer 2nd line".to_string()), 176 | example: None, 177 | }], 178 | } 179 | ); 180 | assert_eq!( 181 | TableDocs::from_sql( 182 | "create table x( 183 | --- a random integer 184 | --- 185 | --- 2nd line, with line break 186 | a int 187 | )", 188 | "x" 189 | ), 190 | TableDocs { 191 | name: "x".to_owned(), 192 | table_level: None, 193 | columns: vec![ColumnDocs { 194 | name: "a".to_string(), 195 | contents: Some("a random integer 2nd line, with line break".to_string()), 196 | example: None, 197 | }], 198 | } 199 | ); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /examples/congress/Makefile: -------------------------------------------------------------------------------- 1 | congress.db: congress.sql 2 | rm $@ 3 | sqlite3 $@ < $< 4 | 5 | metadata.json: congress.db 6 | ../../target/debug/sqlite-docs-cli generate-datasette $< > $@ 7 | 8 | docs.md: congress.db 9 | ../../target/debug/sqlite-docs-cli generate-markdown $< > $@ 10 | 11 | all: metadata.json docs.md 12 | 13 | clean: 14 | rm metadata.json docs.md 15 | 16 | datasette: congress.db metadata.json 17 | datasette serve congress.db -m metadata.json 18 | 19 | .PHONY: datasette all clean 20 | -------------------------------------------------------------------------------- /examples/congress/congress.sql: -------------------------------------------------------------------------------- 1 | .bail on 2 | 3 | begin; 4 | 5 | /* 6 | insert into sqlite_docs(table, docs) 7 | */ 8 | 9 | CREATE TABLE legislators ( 10 | --! All Members of Congress that have ever served in Congress, since 1789, as 11 | --! well as cross-walks into other databases. 12 | --! @source https://github.com/unitedstates/congress-legislators 13 | --! @license CC0-1.0 https://github.com/unitedstates/congress-legislators/blob/main/LICENSE 14 | 15 | --- Primary text ID for a legislator. An alias of the id_bioguide column. 16 | id TEXT PRIMARY KEY, 17 | 18 | --- The full name of the legislator. 19 | name TEXT, 20 | 21 | --- The alphanumeric ID for this legislator in http://bioguide.congress.gov. 22 | --- Note that at one time some legislators (women who had changed their name 23 | --- when they got married) had two entries on the bioguide website. Only one 24 | --- bioguide ID is included here. 25 | --- @example "O000172" 26 | id_bioguide TEXT, 27 | 28 | --- TODO The numeric ID for this legislator on https://www.govtrack.us 29 | --- @example 412804 30 | id_govtrack INTEGER, 31 | 32 | --- The numeric ID for this legislator in Keith Poole's VoteView.com website, 33 | --- originally based on an ID system by the Interuniversity Consortium for 34 | --- Political and Social Research (stored as an integer). 35 | --- @example 21949 36 | id_icpsr INTEGER, 37 | 38 | --- The Wikipedia page name for the person (spaces are given as spaces, not underscores). 39 | --- @example "Alexandria Ocasio-Cortez" 40 | id_wikipedia TEXT, 41 | 42 | --- The Wikidata ID for the person. 43 | --- @details https://www.wikidata.org/wiki/Wikidata:Identifiers 44 | --- @example "Q55223040" 45 | id_wikidata TEXT, 46 | 47 | --- Google's Knowledge Search Graph ID 48 | --- @details https://developers.google.com/knowledge-graph/ 49 | --- @example "kg:/g/11f6y3nqg6" 50 | id_google_entity_id TEXT, 51 | 52 | --- The legislator's recognizable first name. Many people go by a different name than 53 | --- their legal first name, often their legal middle name, and our approach is to ensure 54 | --- that our first + last name fields combine to a recognizable name of the legislator. 55 | --- Normally we'll follow the name as it appears on House.gov or Senate.gov (and bioguide.congress.gov), 56 | --- which follows the legislator's own preference for how they want to be named in official places. 57 | --- However, in some cases the legislator goes by a first name that is merely a common short or informal 58 | --- form of their legal first name (e.g. Chris vs Christopher), and while they may prefer the informal 59 | --- name, we may use their longer legal first name because they would be recognizable by their legal name. 60 | --- If they sign official documents (e.g. letters to agencies, FEC filings) using their longer legal first 61 | --- name, we would use their legal first name and put their preferred shorter name in the nickname field. 62 | --- When legislators go by a first initial and middle name, we set the first name field to the initial 63 | --- (one character plus a period). 64 | --- @example "Alexandria" 65 | name_first TEXT, 66 | 67 | --- The legislator's last name. Some names include non-ASCII characters. 68 | --- When building search systems, it is advised to index both the raw value as 69 | --- well as a value with extended characters replaced with their ASCII equivalents 70 | --- @example "Ocasio-Cortez" 71 | name_last TEXT, 72 | 73 | --- The legislator's birthday, in YYYY-MM-DD format. 74 | --- @example "1989-10-13" 75 | bio_birthday TEXT, 76 | 77 | --- The legislator's gender, either "M" or "F". 78 | --- @example "F" 79 | bio_gender TEXT, 80 | 81 | --- The numeric ID for this legislator on http://history.house.gov/People/Search/. 82 | --- The ID is present only for members who have served in the U.S. House. 83 | id_house_history INTEGER, 84 | 85 | --- The legislator's middle name or middle initial (with period). It is not 86 | --- recommended to display this field, unless the first name field is an 87 | -- initial (one character plus a period). 88 | name_middle TEXT, 89 | 90 | --- The legislator's nick name when used as a common alternative to their first name. 91 | --- Usually displayed within quotes after the first name. If they are generally only 92 | --- known by a nickname, we would likely place the name in the first name field instead. 93 | name_nickname TEXT, 94 | 95 | --- The ballotpedia.org page name for the person (spaces are given as spaces, not underscores). 96 | id_ballotpedia TEXT, 97 | 98 | --- A suffix on the legislator's name, such as "Jr." or "III", but only if they use it in 99 | --- official contexts, such as if it appears on House.gov or Senate.gov. 100 | --- @example "III" 101 | name_suffix TEXT, 102 | 103 | --- When bioguide.congress.gov mistakenly listed a legislator under multiple IDs, this field is 104 | --- a list of alternative IDs. (This often ocurred for women who changed their name.) The IDs in 105 | -- this list probably were removed from bioguide.congress.gov but might still be in use in the wild. 106 | --- @example '["F000246"]' 107 | id_bioguide_previous TEXT, 108 | 109 | --- TODO 110 | --- @example 13283 111 | id_house_history_alternate INTEGER, 112 | 113 | --- TODO 114 | -- @example ' [{"end": "1846-01-12","middle": null,"last": "Levy"}]' 115 | other_names TEXT, 116 | 117 | --- The numeric ID for this legislator on http://thomas.gov and 118 | --- http://beta.congress.gov. The ID is stored as a string with leading zeros preserved. 119 | --- @example 02263 120 | id_thomas TEXT, 121 | 122 | --- The numeric ID for this legislator on C-SPAN's video website, 123 | --- e.g. http://www.c-spanvideo.org/person/1745 124 | --- @example 76364 125 | id_cspan INTEGER, 126 | 127 | --- The numeric ID for this legislator on VoteSmart.org 128 | --- @example 180416 129 | id_votesmart INTEGER, 130 | 131 | --- lis: The alphanumeric ID for this legislator found in Senate roll call votes 132 | --- http://www.senate.gov/pagelayout/legislative/a_three_sections_with_teasers/votes.htm 133 | --- @example TODO 134 | id_lis TEXT, 135 | 136 | --- The legislator's nick name when used as a common alternative to their first name. 137 | --- Usually displayed within quotes after the first name. If they are generally only 138 | --- known by a nickname, we would likely place the name in the first name field instead. 139 | --- @example "Alexandria Ocasio-Cortez" 140 | name_official_full TEXT, 141 | 142 | --- The alphanumeric ID for this legislator on OpenSecrets.org 143 | --- @example "N00041162" 144 | id_opensecrets TEXT, 145 | 146 | --- A list of IDs for this legislator in Federal Election Commission data. 147 | --- @example '["H8NY15148"]' 148 | id_fec TEXT, 149 | 150 | --- The numeric ID for this legislator on maplight.org (stored as an integer). 151 | --- @example 2104 152 | id_maplight INTEGER, 153 | 154 | --- TODO 155 | --- @schema ??? 156 | --- @example '[{ "title": "House Republican Conference Chair", "chamber": "house", "start": "2021-05-14"}]' 157 | leadership_roles TEXT, 158 | 159 | --- TODO 160 | --- @example '[{"name": "Joseph Patrick Kennedy II","relation": "son"}, ...]' 161 | family TEXT 162 | ); 163 | 164 | 165 | CREATE TABLE legislator_terms ( 166 | --! Individual terms that legislators served for. One entry for each election. 167 | 168 | --- The legislator that served the term. 169 | legislator_id TEXT REFERENCES legislators(id), 170 | 171 | --- The type of the term. Either "sen" for senators or "rep" for representatives 172 | --- and delegates to the House. 173 | --- @values 'rep' OR 'sen' 174 | --- @example 'rep' 175 | type TEXT, 176 | 177 | --- The two-letter, uppercase USPS abbreviation for the state that the 178 | --- legislator is serving from. 179 | --- @example 'NY' 180 | state TEXT, 181 | 182 | --- The date legislative service began: the date the legislator was sworn in, 183 | --- if known, or else the beginning of the legislator's term. Since 1935 184 | --- regularly elected terms begin on January 3 at noon on odd-numbered years, 185 | --- but when Congress does not first meet on January 3, term start dates might 186 | --- reflect that swearing-in occurred on a later date. (Prior to 1935, terms 187 | --- began on March 4 of odd-numbered years, see here.) 188 | --- In YYYY-MM-DD format. 189 | --- @example '2019-01-03' 190 | start TEXT, 191 | 192 | --- End date of the legislative term. In YYYY-MM-DD format. 193 | --- @example '2021-01-03' 194 | end TEXT, 195 | 196 | --- TODO 197 | class INTEGER, 198 | 199 | --- The political party of the legislator. If the legislator changed parties, this 200 | --- is the most recent party held during the term and party_affiliations will be set. 201 | --- Values are typically "Democrat", "Independent", or "Republican". 202 | --- The value typically matches the political party of the legislator on the ballot 203 | --- in his or her last election, although for state affiliate parties such as 204 | --- "Democratic Farmer Labor" we will use the national party name ("Democrat") instead 205 | --- to keep the values of this field normalized. 206 | --- @example 'Democrat' 207 | party TEXT, 208 | 209 | --- TODO 210 | --- 0 for At-Large 211 | --- @example 14 212 | district INTEGER, 213 | 214 | --- TODO 215 | --- @example 'appointment' 216 | --- @possible 'appointment' OR 'special-election' 217 | how TEXT, 218 | 219 | --- TODO 220 | --- @example [{"start": "2019-01-03", "end": "2019-07-03", "party": "Republican" }, ... ] 221 | party_affiliations TEXT, 222 | 223 | --- TODO 224 | --- @example 'https://ocasio-cortez.house.gov' 225 | url TEXT, 226 | 227 | --- For independents, the party that the legislator caucuses with, using the same 228 | --- values as the party field--although not required for independents with no 229 | --- caucus. Omitted if the legislator caucuses with the party indicated in the 230 | --- party field. When in doubt about the difference between the party and caucus 231 | --- fields, the party field is what displays after the legislator's name 232 | --- (i.e. "(D)") but the caucus field is what normally determines committee seniority. 233 | --- This field was added starting with terms for the 113th Congress. 234 | --- @example 'Democrat' 235 | caucus TEXT, 236 | 237 | --- TODO 238 | --- @example '229 Cannon House Office Building Washington DC 20515-3214' 239 | address TEXT, 240 | 241 | --- TODO 242 | --- @example '202-225-3965' 243 | phone TEXT, 244 | 245 | --- TODO 246 | --- @example '202-228-5097' 247 | fax TEXT, 248 | 249 | --- TODO 250 | --- @example 'https://www.collins.senate.gov/contact' 251 | contact_form TEXT, 252 | 253 | --- TODO 254 | --- @example '229 Cannon House Office Building' 255 | office TEXT, 256 | 257 | --- TODO 258 | --- @example 'senior' OR 'junior' 259 | state_rank TEXT, 260 | 261 | --- The URL to the official website's RSS feed (only valid if the term is 262 | -- current, otherwise the last known URL). 263 | --- @example 'http://www.collins.senate.gov/public/?a=rss.feed' 264 | rss_url TEXT, 265 | 266 | --- TODO 267 | --- @example 'special-election' 268 | [end-type] TEXT 269 | ); 270 | CREATE TABLE offices ( 271 | --! offices 272 | 273 | --- TODO 274 | id TEXT PRIMARY KEY, 275 | --- TODO 276 | legislator_id TEXT REFERENCES legislators(id), 277 | --- TODO 278 | address TEXT, 279 | --- TODO 280 | suite TEXT, 281 | --- TODO 282 | city TEXT, 283 | --- TODO 284 | state TEXT, 285 | --- TODO 286 | zip TEXT, 287 | --- TODO 288 | latitude FLOAT, 289 | --- TODO 290 | longitude FLOAT, 291 | --- TODO 292 | phone TEXT, 293 | --- TODO 294 | fax TEXT, 295 | --- TODO 296 | building TEXT, 297 | --- TODO 298 | hours TEXT 299 | ); 300 | 301 | CREATE TABLE social_media ( 302 | --! socy meddy 303 | 304 | --- TODO 305 | id TEXT PRIMARY KEY, 306 | --- TODO 307 | legislator_id TEXT REFERENCES legislators(id), 308 | --- TODO 309 | twitter TEXT, 310 | --- TODO 311 | facebook TEXT, 312 | --- TODO 313 | youtube_id TEXT, 314 | --- TODO 315 | twitter_id INTEGER, 316 | --- TODO 317 | youtube TEXT, 318 | --- TODO 319 | instagram TEXT, 320 | --- TODO 321 | instagram_id INTEGER 322 | ); 323 | 324 | CREATE TABLE executives ( 325 | --! execies 326 | 327 | --- TODO 328 | id INTEGER PRIMARY KEY, 329 | --- TODO 330 | name TEXT, 331 | --- TODO 332 | id_bioguide TEXT, 333 | --- TODO 334 | id_govtrack INTEGER, 335 | --- TODO 336 | id_icpsr_prez INTEGER, 337 | --- TODO 338 | name_first TEXT, 339 | --- TODO 340 | name_last TEXT, 341 | --- TODO 342 | bio_birthday TEXT, 343 | --- TODO 344 | bio_gender TEXT, 345 | --- TODO 346 | id_icpsr INTEGER, 347 | --- TODO 348 | name_suffix TEXT, 349 | --- TODO 350 | name_middle TEXT, 351 | --- TODO 352 | id_thomas TEXT, 353 | --- TODO 354 | name_nickname TEXT, 355 | --- TODO 356 | id_lis TEXT, 357 | --- TODO 358 | id_opensecrets TEXT, 359 | --- TODO 360 | id_votesmart INTEGER, 361 | --- TODO 362 | id_fec TEXT, 363 | --- TODO 364 | id_cspan INTEGER, 365 | --- TODO 366 | id_wikipedia TEXT, 367 | --- TODO 368 | id_wikidata TEXT, 369 | --- TODO 370 | id_google_entity_id TEXT, 371 | --- TODO 372 | id_ballotpedia TEXT, 373 | --- TODO 374 | id_house_history INTEGER, 375 | --- TODO 376 | id_maplight INTEGER, 377 | --- TODO 378 | name_official_full TEXT 379 | ); 380 | 381 | CREATE TABLE executive_terms ( 382 | --! execy terms 383 | 384 | --- TODO 385 | type TEXT, 386 | --- TODO 387 | start TEXT, 388 | --- TODO 389 | end TEXT, 390 | --- TODO 391 | party TEXT, 392 | --- TODO 393 | how TEXT, 394 | --- TODO 395 | executive_id INTEGER REFERENCES executives(id) 396 | ); 397 | 398 | commit; 399 | 400 | attach database 'legislators.db' as legislators; 401 | 402 | insert into legislators 403 | select * from legislators.legislators; 404 | -------------------------------------------------------------------------------- /examples/congress/docs.md: -------------------------------------------------------------------------------- 1 | ## legislators 2 | 3 | All Members of Congress that have ever served in Congress, since 1789, as well as cross-walks into other databases. @source https://github.com/unitedstates/congress-legislators @license CC0-1.0 https://github.com/unitedstates/congress-legislators/blob/main/LICENSE 4 | 5 | | Column | Description 6 | |-|- 7 | |id|Primary text ID for a legislator. An alias of the id_bioguide column. 8 | |name|The full name of the legislator. 9 | |id_bioguide|The alphanumeric ID for this legislator in http://bioguide.congress.gov. Note that at one time some legislators (women who had changed their name when they got married) had two entries on the bioguide website. Only one bioguide ID is included here. 10 | |id_govtrack|TODO The numeric ID for this legislator on https://www.govtrack.us 11 | |id_icpsr|The numeric ID for this legislator in Keith Poole's VoteView.com website, originally based on an ID system by the Interuniversity Consortium for Political and Social Research (stored as an integer). 12 | |id_wikipedia|The Wikipedia page name for the person (spaces are given as spaces, not underscores). 13 | |id_wikidata|The Wikidata ID for the person. @details https://www.wikidata.org/wiki/Wikidata:Identifiers 14 | |id_google_entity_id|TODO 15 | |name_first|The legislator's recognizable first name. Many people go by a different name than their legal first name, often their legal middle name, and our approach is to ensure that our first + last name fields combine to a recognizable name of the legislator. Normally we'll follow the name as it appears on House.gov or Senate.gov (and bioguide.congress.gov), which follows the legislator's own preference for how they want to be named in official places. However, in some cases the legislator goes by a first name that is merely a common short or informal form of their legal first name (e.g. Chris vs Christopher), and while they may prefer the informal name, we may use their longer legal first name because they would be recognizable by their legal name. If they sign official documents (e.g. letters to agencies, FEC filings) using their longer legal first name, we would use their legal first name and put their preferred shorter name in the nickname field. When legislators go by a first initial and middle name, we set the first name field to the initial (one character plus a period). 16 | |name_last|The legislator's last name. Some names include non-ASCII characters. When building search systems, it is advised to index both the raw value as well as a value with extended characters replaced with their ASCII equivalents 17 | |bio_birthday|The legislator's birthday, in YYYY-MM-DD format. 18 | |bio_gender|The legislator's gender, either "M" or "F". 19 | |id_house_history|The numeric ID for this legislator on http://history.house.gov/People/Search/. The ID is present only for members who have served in the U.S. House. 20 | |name_middle|The legislator's middle name or middle initial (with period). It is not recommended to display this field, unless the first name field is an 21 | |name_nickname|The legislator's nick name when used as a common alternative to their first name. Usually displayed within quotes after the first name. If they are generally only known by a nickname, we would likely place the name in the first name field instead. 22 | |id_ballotpedia|The ballotpedia.org page name for the person (spaces are given as spaces, not underscores). 23 | |name_suffix|A suffix on the legislator's name, such as "Jr." or "III", but only if they use it in official contexts, such as if it appears on House.gov or Senate.gov. 24 | |id_bioguide_previous|When bioguide.congress.gov mistakenly listed a legislator under multiple IDs, this field is a list of alternative IDs. (This often ocurred for women who changed their name.) The IDs in 25 | |id_house_history_alternate|TODO 26 | |other_names|TODO 27 | |id_thomas|The numeric ID for this legislator on http://thomas.gov and http://beta.congress.gov. The ID is stored as a string with leading zeros preserved. 28 | |id_cspan|The numeric ID for this legislator on C-SPAN's video website, e.g. http://www.c-spanvideo.org/person/1745 29 | |id_votesmart|The numeric ID for this legislator on VoteSmart.org 30 | |id_lis|lis: The alphanumeric ID for this legislator found in Senate roll call votes http://www.senate.gov/pagelayout/legislative/a_three_sections_with_teasers/votes.htm 31 | |name_official_full|The legislator's nick name when used as a common alternative to their first name. Usually displayed within quotes after the first name. If they are generally only known by a nickname, we would likely place the name in the first name field instead. 32 | |id_opensecrets|The alphanumeric ID for this legislator on OpenSecrets.org 33 | |id_fec|A list of IDs for this legislator in Federal Election Commission data. 34 | |id_maplight|The numeric ID for this legislator on maplight.org (stored as an integer). 35 | |leadership_roles|TODO @schema ??? 36 | |family|TODO 37 | 38 | ## legislator_terms 39 | 40 | legos terms 41 | 42 | | Column | Description 43 | |-|- 44 | |legislator_id|TODO 45 | |type|TODO 46 | |state|TODO 47 | |start|TODO 48 | |end|TODO 49 | |class|TODO 50 | |party|TODO 51 | |district|TODO 52 | |how|TODO 53 | |party_affiliations|TODO 54 | |url|TODO 55 | |caucus|TODO 56 | |address|TODO 57 | |phone|TODO 58 | |fax|TODO 59 | |contact_form|TODO 60 | |office|TODO 61 | |state_rank|TODO 62 | |rss_url|TODO 63 | |[end-type]|TODO 64 | 65 | ## offices 66 | 67 | offices 68 | 69 | | Column | Description 70 | |-|- 71 | |id|TODO 72 | |legislator_id|TODO 73 | |address|TODO 74 | |suite|TODO 75 | |city|TODO 76 | |state|TODO 77 | |zip|TODO 78 | |latitude|TODO 79 | |longitude|TODO 80 | |phone|TODO 81 | |fax|TODO 82 | |building|TODO 83 | |hours|TODO 84 | 85 | ## social_media 86 | 87 | socy meddy 88 | 89 | | Column | Description 90 | |-|- 91 | |id|TODO 92 | |legislator_id|TODO 93 | |twitter|TODO 94 | |facebook|TODO 95 | |youtube_id|TODO 96 | |twitter_id|TODO 97 | |youtube|TODO 98 | |instagram|TODO 99 | |instagram_id|TODO 100 | 101 | ## executives 102 | 103 | execies 104 | 105 | | Column | Description 106 | |-|- 107 | |id|TODO 108 | |name|TODO 109 | |id_bioguide|TODO 110 | |id_govtrack|TODO 111 | |id_icpsr_prez|TODO 112 | |name_first|TODO 113 | |name_last|TODO 114 | |bio_birthday|TODO 115 | |bio_gender|TODO 116 | |id_icpsr|TODO 117 | |name_suffix|TODO 118 | |name_middle|TODO 119 | |id_thomas|TODO 120 | |name_nickname|TODO 121 | |id_lis|TODO 122 | |id_opensecrets|TODO 123 | |id_votesmart|TODO 124 | |id_fec|TODO 125 | |id_cspan|TODO 126 | |id_wikipedia|TODO 127 | |id_wikidata|TODO 128 | |id_google_entity_id|TODO 129 | |id_ballotpedia|TODO 130 | |id_house_history|TODO 131 | |id_maplight|TODO 132 | |name_official_full|TODO 133 | 134 | ## executive_terms 135 | 136 | execy terms 137 | 138 | | Column | Description 139 | |-|- 140 | |type|TODO 141 | |start|TODO 142 | |end|TODO 143 | |party|TODO 144 | |how|TODO 145 | |executive_id|TODO 146 | 147 | 148 | -------------------------------------------------------------------------------- /examples/nyc-taxi/schema.sql: -------------------------------------------------------------------------------- 1 | 2 | create table nyc_yellow_taxi_trips( 3 | --! Yellow Taxi trips made in New York City. Each row is a specific trip, with a set start and 4 | --! end time. This table is specifically for yellow taxi cab rides, NOT green or FHV trips. 5 | --! 6 | --! This data is publishecad by the NYC Taxi & Limousine Commission. 7 | --! 8 | --! @source https://www.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_yellow.pdf 9 | 10 | --- A code indicating the TPEP provider that provided the record. 11 | --- @value 1, Creative Mobile Technologies, LLC 12 | --- @value 2, VeriFone Inc. 13 | --- @example 1 14 | vendor_id int, 15 | 16 | --- Datetime when the meter was engaged. In YYYY-MM-DD HH:MM:SS format. 17 | --- @example "2023-01-01 00:32:10" 18 | pickup_datetime datetime, 19 | 20 | --- @example "2023-01-01 00:40:36" 21 | dropoff_datetime datetime, 22 | 23 | --- The number of passengers in the vehicle, as reported by the driver. 24 | --- @example 1 25 | passenger_count int, 26 | 27 | --- The elapsed trip distance in miles reported by the taximeter. 28 | --- @example 0.97 29 | trip_distance float, 30 | 31 | --- TLC Taxi Zone in which the taximeter was engaged 32 | --- @example 161 33 | pickup_location_id int, 34 | 35 | --- TLC Taxi Zone in which the taximeter was disengaged 36 | --- @example 141 37 | dropoff_location_id int, 38 | 39 | --- A numeric code signifying how the passenger paid for the trip. 40 | --- 1: Credit card 41 | --- 2: Cash 42 | --- 3: No charge 43 | --- 4: Dispute 44 | --- 5: Unknown 45 | --- 6: Voided trip 46 | --- @example 4 47 | payment_type int, 48 | 49 | --- The time-and-distance fare calculated by the meter 50 | --- @example 9.3 51 | fare_amount float, 52 | 53 | --- Miscellaneous extras and surcharges. Currently, this only includes the 54 | --- $0.50 and $1 rush hour and overnight charges. 55 | --- @example 1.0 56 | extra_amount float, 57 | 58 | --- $0.50 MTA tax that is automatically triggered based on the metered rate 59 | -- in use. 60 | --- @example 0.5 61 | mta_tax float, 62 | 63 | --- This field is automatically populated for credit card tips. 64 | --- Cash tips are not included. 65 | --- @example 4.0 66 | tip_amount float, 67 | 68 | 69 | --- Total amount of all tolls paid in trip. 70 | --- @example 3.0 71 | tolls_amount float, 72 | 73 | --- The total amount charged to passengers. Does not include cash tips 74 | --- @example 16.9 75 | total_amount float 76 | ); 77 | -------------------------------------------------------------------------------- /examples/simple/Makefile: -------------------------------------------------------------------------------- 1 | students.db: schema.sql 2 | rm $@ || true 3 | sqlite3 $@ < $< 4 | sqlite3 $@ 'insert into students select "S10483", "Alex Garcia", "1970-01-01", 62.5;' 5 | 6 | metadata.json: students.db 7 | ../../target/debug/sqlite-docs-cli generate-datasette $< > $@ 8 | 9 | docs.md: students.db 10 | ../../target/debug/sqlite-docs-cli generate-markdown $< > $@ 11 | 12 | all: metadata.json docs.md 13 | 14 | clean: 15 | rm metadata.json docs.md 16 | 17 | datasette: students.db metadata.json 18 | datasette serve students.db -m metadata.json 19 | 20 | .PHONY: datasette all clean 21 | -------------------------------------------------------------------------------- /examples/simple/docs.md: -------------------------------------------------------------------------------- 1 | ## students 2 | 3 | All students that attend Foo University. One row per enrolled student, active and historial. 4 | 5 | | Column | Description 6 | |-|- 7 | |student_id|ID assigned to the student at orientation. @details https://foo.edu/students/id-format.html 8 | |name|Full name of the student, includes first and last name. 9 | |birthdate|Birthday of the student, in "YYY-MM-DD" format. 10 | |completed_units|Number of course units the student has completed, since 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/simple/schema.sql: -------------------------------------------------------------------------------- 1 | create table students( 2 | --! All students that attend Foo University. One row per enrolled 3 | --! student, active and historial. 4 | 5 | --- ID assigned to the student at orientation. 6 | --- @details https://foo.edu/students/id-format.html 7 | --- @example S10483 8 | student_id text primary key, 9 | 10 | --- Full name of the student, includes first and last name. 11 | --- @example "Alex Garcia" 12 | name text, 13 | 14 | --- Birthday of the student, in "YYY-MM-DD" format. 15 | --- @example "1970-01-01" 16 | birthdate date, 17 | 18 | --- Number of course units the student has completed, since 19 | -- the last completed academic quarter. 20 | -- @example 62.5 21 | completed_units float 22 | ); 23 | -------------------------------------------------------------------------------- /extension/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sqlite-docs-extension" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | sqlite-loadable = {path="../../sqlite-loadable-rs"} 8 | sqlite-docs-core = {path="../core"} 9 | serde_json = "1.0" 10 | serde = "1.0" 11 | 12 | [lib] 13 | crate-type = ["lib", "cdylib", "staticlib"] 14 | -------------------------------------------------------------------------------- /extension/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod tables; 2 | 3 | use sqlite_docs_core::TableDocs; 4 | use sqlite_loadable::prelude::*; 5 | use sqlite_loadable::*; 6 | use tables::DocsTablesTable; 7 | 8 | fn all_tables(db: *mut sqlite3) -> Result> { 9 | let mut tables = vec![]; 10 | let mut stmt = exec::Statement::prepare( 11 | db, 12 | "select name, sql from sqlite_master where type = 'table'", 13 | ) 14 | .unwrap(); 15 | for row in stmt.execute() { 16 | let row = row.unwrap(); 17 | let name = row.get::(0).unwrap(); 18 | let sql = row.get::(1).unwrap(); 19 | tables.push((name, sql)); 20 | } 21 | Ok(tables) 22 | } 23 | fn all_tables_from_context(context: *mut sqlite3_context) -> Result> { 24 | all_tables(api::context_db_handle(context)) 25 | } 26 | fn columns_for(context: *mut sqlite3_context, table: &str) -> Result> { 27 | let mut tables = vec![]; 28 | let mut stmt = exec::Statement::prepare( 29 | api::context_db_handle(context), 30 | "select cid, name from pragma_table_xinfo(?)", 31 | // type, notnull, dflt_value, ok, hidden 32 | ) 33 | .unwrap(); 34 | stmt.bind_text(1, table).unwrap(); 35 | for row in stmt.execute() { 36 | let row = row.unwrap(); 37 | let name = row.get::(0).unwrap(); 38 | let sql = row.get::(1).unwrap(); 39 | tables.push((name, sql)); 40 | } 41 | Ok(tables) 42 | } 43 | 44 | pub fn docs(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 45 | let tables = all_tables_from_context(context)?; 46 | let docs: Vec = tables 47 | .iter() 48 | .map(|(name, sql)| TableDocs::from_sql(sql, name)) 49 | .collect(); 50 | api::result_text(context, serde_json::to_string(&docs).unwrap())?; 51 | Ok(()) 52 | } 53 | pub fn all_tables_(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { 54 | let tables = all_tables_from_context(context)?; 55 | api::result_json(context, serde_json::json!(tables))?; 56 | Ok(()) 57 | } 58 | pub fn columns_for_(context: *mut sqlite3_context, values: &[*mut sqlite3_value]) -> Result<()> { 59 | let columns = columns_for(context, api::value_text(&values[0]).unwrap())?; 60 | api::result_json(context, serde_json::json!(columns))?; 61 | Ok(()) 62 | } 63 | 64 | #[sqlite_entrypoint] 65 | pub fn sqlite3_docs_init(db: *mut sqlite3) -> Result<()> { 66 | let flags = FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC; 67 | define_scalar_function(db, "all_tables", 0, all_tables_, flags)?; 68 | define_scalar_function(db, "columns_for", 1, columns_for_, flags)?; 69 | define_scalar_function(db, "docs", 0, docs, flags)?; 70 | define_table_function::(db, "sqlite_docs_tables", None)?; 71 | Ok(()) 72 | } 73 | -------------------------------------------------------------------------------- /extension/src/tables.rs: -------------------------------------------------------------------------------- 1 | use sqlite_docs_core::TableDocs; 2 | use sqlite_loadable::prelude::*; 3 | use sqlite_loadable::{ 4 | api, 5 | table::{BestIndexError, IndexInfo, VTab, VTabArguments, VTabCursor}, 6 | Result, 7 | }; 8 | 9 | use std::{mem, os::raw::c_int}; 10 | 11 | static CREATE_SQL: &str = "CREATE TABLE x(name, docs, columns)"; 12 | enum Columns { 13 | Name, 14 | Docs, 15 | Columns, 16 | } 17 | fn column(index: i32) -> Option { 18 | match index { 19 | 0 => Some(Columns::Name), 20 | 1 => Some(Columns::Docs), 21 | 2 => Some(Columns::Columns), 22 | _ => None, 23 | } 24 | } 25 | 26 | #[repr(C)] 27 | pub struct DocsTablesTable { 28 | /// must be first 29 | base: sqlite3_vtab, 30 | db: *mut sqlite3, 31 | } 32 | 33 | impl<'vtab> VTab<'vtab> for DocsTablesTable { 34 | type Aux = (); 35 | type Cursor = DocsTablesCursor; 36 | 37 | fn connect( 38 | db: *mut sqlite3, 39 | _aux: Option<&Self::Aux>, 40 | _args: VTabArguments, 41 | ) -> Result<(String, DocsTablesTable)> { 42 | let base: sqlite3_vtab = unsafe { mem::zeroed() }; 43 | let vtab = DocsTablesTable { base, db }; 44 | // TODO db.config(VTabConfig::Innocuous)?; 45 | Ok((CREATE_SQL.to_owned(), vtab)) 46 | } 47 | fn destroy(&self) -> Result<()> { 48 | Ok(()) 49 | } 50 | 51 | fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { 52 | info.set_estimated_cost(100000.0); 53 | info.set_estimated_rows(100000); 54 | info.set_idxnum(1); 55 | Ok(()) 56 | } 57 | 58 | fn open(&mut self) -> Result { 59 | Ok(DocsTablesCursor::new(self.db)) 60 | } 61 | } 62 | 63 | #[repr(C)] 64 | pub struct DocsTablesCursor { 65 | /// Base class. Must be first 66 | base: sqlite3_vtab_cursor, 67 | db: *mut sqlite3, 68 | tables: Option>, 69 | rowid: i64, 70 | } 71 | impl DocsTablesCursor { 72 | fn new(db: *mut sqlite3) -> DocsTablesCursor { 73 | let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; 74 | DocsTablesCursor { 75 | base, 76 | db, 77 | tables: None, 78 | rowid: 0, 79 | } 80 | } 81 | } 82 | 83 | impl VTabCursor for DocsTablesCursor { 84 | fn filter( 85 | &mut self, 86 | _idx_num: c_int, 87 | _idx_str: Option<&str>, 88 | _values: &[*mut sqlite3_value], 89 | ) -> Result<()> { 90 | let tables = crate::all_tables(self.db)?; 91 | let docs: Vec = tables 92 | .iter() 93 | .map(|(name, sql)| TableDocs::from_sql(sql, name)) 94 | .collect(); 95 | self.tables = Some(docs); 96 | self.rowid = 0; 97 | Ok(()) 98 | } 99 | 100 | fn next(&mut self) -> Result<()> { 101 | self.rowid += 1; 102 | Ok(()) 103 | } 104 | 105 | fn eof(&self) -> bool { 106 | self.tables 107 | .as_ref() 108 | .unwrap() 109 | .get(self.rowid as usize) 110 | .is_none() 111 | } 112 | 113 | fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { 114 | let table = self 115 | .tables 116 | .as_ref() 117 | .unwrap() 118 | .get(self.rowid as usize) 119 | .unwrap(); 120 | match column(i) { 121 | Some(Columns::Name) => { 122 | api::result_text(context, table.name.clone())?; 123 | } 124 | Some(Columns::Docs) => match &table.table_level { 125 | Some(docs) => api::result_text(context, docs)?, 126 | None => api::result_null(context), 127 | }, 128 | Some(Columns::Columns) => { 129 | api::result_json(context, serde_json::to_value(&table.columns).unwrap())? 130 | } 131 | _ => (), 132 | } 133 | Ok(()) 134 | } 135 | 136 | fn rowid(&self) -> Result { 137 | Ok(self.rowid) 138 | } 139 | } 140 | --------------------------------------------------------------------------------