├── .gitignore ├── Dockerfile ├── README.md ├── index.ts ├── package-lock.json ├── package.json └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/* 2 | node_modules/* 3 | *.js 4 | *.js.map 5 | terraform-provider-aws/* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:8.2-alpine 2 | COPY . /app 3 | WORKDIR /app 4 | RUN npm i -g nodemon typescript \ 5 | && npm i \ 6 | && tsc \ 7 | && apk --update add git \ 8 | && git clone https://github.com/terraform-providers/terraform-provider-aws.git 9 | CMD node index.js -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Terraform Scrape 2 | Scraping tool used to try to extract data from the terraform docs for the https://github.com/erd0s/terraform-autocomplete Visual Studio Code extension. 3 | 4 | ## NOTE 5 | This is only attempting to scrape from the AWS provider code. I haven't tried on any other providers but please feel free to contribute! 6 | 7 | ## Running 8 | `docker run -ti dirkdirk/terraform-scrape` 9 | 10 | This will output any notices to stderr and output the json that needs to go in terraform-autocomplete/aws-resources.json to stdout. So you could do something like `docker run -ti dirkdirk/terraform-scrape > ../terraform-autocomplete/aws-resources.json` 11 | 12 | ## TODO 13 | * Args reference is wrong in aws_iam_role (https://www.terraform.io/docs/providers/aws/r/iam_role.html) because of the `div.alert-warning` in the middle of the list. 14 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import * as fs from "fs"; 2 | import * as marked from "marked"; 3 | import * as cheerio from "cheerio"; 4 | import * as _ from "lodash"; 5 | 6 | // ================================== INTERFACES ================================== 7 | 8 | interface ArgumentNodes { 9 | argumentNodes: Cheerio[] 10 | } 11 | 12 | interface AttributeNodes { 13 | attributeNodes: Cheerio[] 14 | } 15 | 16 | interface Resource { 17 | name: string; 18 | args: Variable[]; 19 | attrs: Variable[]; 20 | } 21 | 22 | interface Variable { 23 | name: string; 24 | description: string; 25 | } 26 | 27 | // ================================== FUNCTIONS ================================== 28 | 29 | function getParsed(filename: string): Promise { 30 | return new Promise((resolve, reject) => { 31 | var file = fs.readFileSync("terraform-provider-aws/website/docs/r/" + filename) + ""; 32 | marked(file, (err, result) => { 33 | if (err) { 34 | return reject(err); 35 | } 36 | var $ = cheerio.load(result); 37 | resolve($); 38 | }); 39 | }); 40 | } 41 | 42 | function getAllParsed(files: string[]): Promise[] { 43 | return _.map(files, file => { 44 | return getParsed(file); 45 | }); 46 | } 47 | 48 | function getNumWithArgumentReference($s: CheerioStatic[]): number { 49 | var result = _.map($s, $ => { 50 | return $("h2").filter((z, el) => { 51 | return $(el).text() == "Argument Reference"; 52 | }).length; 53 | }); 54 | return result.length; 55 | } 56 | 57 | function getNumWithAttributesReference($s: CheerioStatic[]): number { 58 | var result = _.map($s, $ => { 59 | return $("h2").filter((z, el) => { 60 | return $(el).text() == "Attributes Reference"; 61 | }).length; 62 | }); 63 | return result.length; 64 | } 65 | 66 | /** 67 | * Returns a list of nodes that follow the "Arguments Reference" h2 68 | * 69 | * @param {*} $ - The full page as a cheerio object 70 | */ 71 | function extractArgumentsContent($: CheerioStatic): ArgumentNodes { 72 | var argsH2 = $("h2").filter((z, el) => { 73 | return $(el).text() == "Argument Reference"; 74 | }); 75 | if (argsH2.length != 1) { 76 | throw "Didn't find correct number of h2 > Arguments Reference"; 77 | } 78 | var nodes = []; 79 | var currentNode:any = argsH2[0]; 80 | while (true) { 81 | if (!(currentNode.type == "text" && currentNode["data"] == "\n")) { 82 | nodes.push(currentNode); 83 | } 84 | var nextSibling = _.get(currentNode, "nextSibling"); 85 | if (!nextSibling || _.get(nextSibling, "name") == "h2") { 86 | break; 87 | } 88 | currentNode = _.get(currentNode, "nextSibling"); 89 | } 90 | return {argumentNodes: nodes}; 91 | } 92 | 93 | function extractAttributesContent($: CheerioStatic): AttributeNodes { 94 | var argsH2 = $("h2").filter((z, el) => { 95 | return $(el).text() == "Attribute Reference" || $(el).text() == "Attributes Reference"; 96 | }); 97 | if (argsH2.length != 1) { 98 | console.error(`Didn't find any attributes on ${extractResourceName($)}`); 99 | return {attributeNodes: []}; 100 | // throw `Didn't find correct number of h2 > Attributes Reference on ${extractResourceName($)}`; 101 | } 102 | var nodes = []; 103 | var currentNode:any = argsH2[0]; 104 | while (true) { 105 | if (!(currentNode.type == "text" && currentNode["data"] == "\n")) { 106 | nodes.push(currentNode); 107 | } 108 | var nextSibling = _.get(currentNode, "nextSibling"); 109 | if (!nextSibling || _.get(nextSibling, "name") == "h2") { 110 | break; 111 | } 112 | currentNode = _.get(currentNode, "nextSibling"); 113 | } 114 | return {attributeNodes: nodes}; 115 | } 116 | 117 | function extractArguments(argNodes: ArgumentNodes, $: CheerioStatic): Variable[] { 118 | let nodes = argNodes.argumentNodes; 119 | 120 | // Find the first ul 121 | var firstUl = _.find(nodes, (o:any) => o.name == "ul"); 122 | if (!firstUl) throw "Didn't find a UL when searching through arguments"; 123 | return _.map($(firstUl).find("li"), li => { 124 | let text = $(li).text(); 125 | let regex = /([a-zA-Z0-9_]+) (.+)/; 126 | let result = text.match(regex); 127 | var name, description; 128 | if (!result) { 129 | name = text; 130 | console.error(`Didn't find a description for ${text} on ${extractResourceName($)}`); 131 | } 132 | else { 133 | name = result[1]; 134 | description = result[2]; 135 | } 136 | return { name, description } 137 | }); 138 | } 139 | 140 | function extractAttributes(argNodes: AttributeNodes, $: CheerioStatic): Variable[] { 141 | if (argNodes.attributeNodes.length == 0) return []; 142 | 143 | let nodes = argNodes.attributeNodes; 144 | 145 | // Find the first ul 146 | var firstUl = _.find(nodes, (o:any) => o.name == "ul"); 147 | if (!firstUl) { 148 | console.error(`Didn't find a UL when searching through attributes on ${extractResourceName($)}`); 149 | } 150 | return _.map($(firstUl).find("li"), li => { 151 | let text = $(li).text(); 152 | let regex = /([a-zA-Z0-9_]+) (.+)/; 153 | let result = text.match(regex); 154 | var name, description; 155 | if (!result) { 156 | name = text; 157 | console.error(`Didn't find a description for ${text} on ${extractResourceName($)}`); 158 | } 159 | else { 160 | name = result[1]; 161 | description = result[2]; 162 | } 163 | return { name, description } 164 | }); 165 | } 166 | 167 | function extractResourceName($: CheerioStatic): string { 168 | let name = $("h1").text(); 169 | if (!name) throw "Couldn't extract name"; 170 | return name; 171 | } 172 | 173 | // ================================== CODE ================================== 174 | 175 | var files = fs.readdirSync("terraform-provider-aws/website/docs/r"); 176 | Promise.all(getAllParsed(files)).then($s => { 177 | var resources: Resource[] = _.map($s, $ => { 178 | return { 179 | name: extractResourceName($), 180 | args: extractArguments(extractArgumentsContent($), $), 181 | attrs: extractAttributes(extractAttributesContent($), $) 182 | } 183 | }); 184 | let transformed = _.transform(resources, (result, value, key) => { 185 | result[value.name] = { 186 | args: value.args, 187 | attrs: value.attrs 188 | } 189 | }, {}); 190 | 191 | console.log(JSON.stringify(transformed)); 192 | }); -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "terraform-scrape", 3 | "version": "1.0.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "@types/cheerio": { 8 | "version": "0.22.5", 9 | "resolved": "https://registry.npmjs.org/@types/cheerio/-/cheerio-0.22.5.tgz", 10 | "integrity": "sha512-Moft0SFkFG4RbRykJSsRFnPOS306hMF/lH+Ru/ugej/vzcGBpjzoJrf8P7gupouQYbbiXxhtN/5JMJWlsHYqIw==" 11 | }, 12 | "@types/lodash": { 13 | "version": "4.14.85", 14 | "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.85.tgz", 15 | "integrity": "sha512-HrZiwDl62if0z31+rB99CLlg7WzS7b+KmyW75XAHEl/ZG0De2ACo6skZ89Zh3jOWkjKObN0Apq3MUezg7u9NKQ==" 16 | }, 17 | "@types/node": { 18 | "version": "8.0.53", 19 | "resolved": "https://registry.npmjs.org/@types/node/-/node-8.0.53.tgz", 20 | "integrity": "sha512-54Dm6NwYeiSQmRB1BLXKr5GELi0wFapR1npi8bnZhEcu84d/yQKqnwwXQ56hZ0RUbTG6L5nqDZaN3dgByQXQRQ==" 21 | }, 22 | "boolbase": { 23 | "version": "1.0.0", 24 | "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", 25 | "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" 26 | }, 27 | "cheerio": { 28 | "version": "0.22.0", 29 | "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.22.0.tgz", 30 | "integrity": "sha1-qbqoYKP5tZWmuBsahocxIe06Jp4=", 31 | "requires": { 32 | "css-select": "1.2.0", 33 | "dom-serializer": "0.1.0", 34 | "entities": "1.1.1", 35 | "htmlparser2": "3.9.2", 36 | "lodash.assignin": "4.2.0", 37 | "lodash.bind": "4.2.1", 38 | "lodash.defaults": "4.2.0", 39 | "lodash.filter": "4.6.0", 40 | "lodash.flatten": "4.4.0", 41 | "lodash.foreach": "4.5.0", 42 | "lodash.map": "4.6.0", 43 | "lodash.merge": "4.6.0", 44 | "lodash.pick": "4.4.0", 45 | "lodash.reduce": "4.6.0", 46 | "lodash.reject": "4.6.0", 47 | "lodash.some": "4.6.0" 48 | } 49 | }, 50 | "core-util-is": { 51 | "version": "1.0.2", 52 | "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", 53 | "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" 54 | }, 55 | "css-select": { 56 | "version": "1.2.0", 57 | "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", 58 | "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", 59 | "requires": { 60 | "boolbase": "1.0.0", 61 | "css-what": "2.1.0", 62 | "domutils": "1.5.1", 63 | "nth-check": "1.0.1" 64 | } 65 | }, 66 | "css-what": { 67 | "version": "2.1.0", 68 | "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz", 69 | "integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0=" 70 | }, 71 | "dom-serializer": { 72 | "version": "0.1.0", 73 | "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz", 74 | "integrity": "sha1-BzxpdUbOB4DOI75KKOKT5AvDDII=", 75 | "requires": { 76 | "domelementtype": "1.1.3", 77 | "entities": "1.1.1" 78 | }, 79 | "dependencies": { 80 | "domelementtype": { 81 | "version": "1.1.3", 82 | "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", 83 | "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" 84 | } 85 | } 86 | }, 87 | "domelementtype": { 88 | "version": "1.3.0", 89 | "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz", 90 | "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=" 91 | }, 92 | "domhandler": { 93 | "version": "2.4.1", 94 | "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.1.tgz", 95 | "integrity": "sha1-iS5HAAqZvlW783dP/qBWHYh5wlk=", 96 | "requires": { 97 | "domelementtype": "1.3.0" 98 | } 99 | }, 100 | "domutils": { 101 | "version": "1.5.1", 102 | "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", 103 | "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", 104 | "requires": { 105 | "dom-serializer": "0.1.0", 106 | "domelementtype": "1.3.0" 107 | } 108 | }, 109 | "entities": { 110 | "version": "1.1.1", 111 | "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", 112 | "integrity": "sha1-blwtClYhtdra7O+AuQ7ftc13cvA=" 113 | }, 114 | "htmlparser2": { 115 | "version": "3.9.2", 116 | "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.9.2.tgz", 117 | "integrity": "sha1-G9+HrMoPP55T+k/M6w9LTLsAszg=", 118 | "requires": { 119 | "domelementtype": "1.3.0", 120 | "domhandler": "2.4.1", 121 | "domutils": "1.5.1", 122 | "entities": "1.1.1", 123 | "inherits": "2.0.3", 124 | "readable-stream": "2.3.3" 125 | } 126 | }, 127 | "inherits": { 128 | "version": "2.0.3", 129 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", 130 | "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" 131 | }, 132 | "isarray": { 133 | "version": "1.0.0", 134 | "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", 135 | "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" 136 | }, 137 | "lodash": { 138 | "version": "4.17.4", 139 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz", 140 | "integrity": "sha1-eCA6TRwyiuHYbcpkYONptX9AVa4=" 141 | }, 142 | "lodash.assignin": { 143 | "version": "4.2.0", 144 | "resolved": "https://registry.npmjs.org/lodash.assignin/-/lodash.assignin-4.2.0.tgz", 145 | "integrity": "sha1-uo31+4QesKPoBEIysOJjqNxqKKI=" 146 | }, 147 | "lodash.bind": { 148 | "version": "4.2.1", 149 | "resolved": "https://registry.npmjs.org/lodash.bind/-/lodash.bind-4.2.1.tgz", 150 | "integrity": "sha1-euMBfpOWIqwxt9fX3LGzTbFpDTU=" 151 | }, 152 | "lodash.defaults": { 153 | "version": "4.2.0", 154 | "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", 155 | "integrity": "sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw=" 156 | }, 157 | "lodash.filter": { 158 | "version": "4.6.0", 159 | "resolved": "https://registry.npmjs.org/lodash.filter/-/lodash.filter-4.6.0.tgz", 160 | "integrity": "sha1-ZosdSYFgOuHMWm+nYBQ+SAtMSs4=" 161 | }, 162 | "lodash.flatten": { 163 | "version": "4.4.0", 164 | "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", 165 | "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=" 166 | }, 167 | "lodash.foreach": { 168 | "version": "4.5.0", 169 | "resolved": "https://registry.npmjs.org/lodash.foreach/-/lodash.foreach-4.5.0.tgz", 170 | "integrity": "sha1-Gmo16s5AEoDH8G3d7DUWWrJ+PlM=" 171 | }, 172 | "lodash.map": { 173 | "version": "4.6.0", 174 | "resolved": "https://registry.npmjs.org/lodash.map/-/lodash.map-4.6.0.tgz", 175 | "integrity": "sha1-dx7Hg540c9nEzeKLGTlMNWL09tM=" 176 | }, 177 | "lodash.merge": { 178 | "version": "4.6.0", 179 | "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.0.tgz", 180 | "integrity": "sha1-aYhLoUSsM/5plzemCG3v+t0PicU=" 181 | }, 182 | "lodash.pick": { 183 | "version": "4.4.0", 184 | "resolved": "https://registry.npmjs.org/lodash.pick/-/lodash.pick-4.4.0.tgz", 185 | "integrity": "sha1-UvBWEP/53tQiYRRB7R/BI6AwAbM=" 186 | }, 187 | "lodash.reduce": { 188 | "version": "4.6.0", 189 | "resolved": "https://registry.npmjs.org/lodash.reduce/-/lodash.reduce-4.6.0.tgz", 190 | "integrity": "sha1-8atrg5KZrUj3hKu/R2WW8DuRTTs=" 191 | }, 192 | "lodash.reject": { 193 | "version": "4.6.0", 194 | "resolved": "https://registry.npmjs.org/lodash.reject/-/lodash.reject-4.6.0.tgz", 195 | "integrity": "sha1-gNZJLcFHCGS79YNTO2UfQqn1JBU=" 196 | }, 197 | "lodash.some": { 198 | "version": "4.6.0", 199 | "resolved": "https://registry.npmjs.org/lodash.some/-/lodash.some-4.6.0.tgz", 200 | "integrity": "sha1-G7nzFO9ri63tE7VJFpsqlF62jk0=" 201 | }, 202 | "marked": { 203 | "version": "0.3.6", 204 | "resolved": "https://registry.npmjs.org/marked/-/marked-0.3.6.tgz", 205 | "integrity": "sha1-ssbGGPzOzk74bE/Gy4p8v1rtqNc=" 206 | }, 207 | "nth-check": { 208 | "version": "1.0.1", 209 | "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.1.tgz", 210 | "integrity": "sha1-mSms32KPwsQQmN6rgqxYDPFJquQ=", 211 | "requires": { 212 | "boolbase": "1.0.0" 213 | } 214 | }, 215 | "process-nextick-args": { 216 | "version": "1.0.7", 217 | "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz", 218 | "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=" 219 | }, 220 | "readable-stream": { 221 | "version": "2.3.3", 222 | "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.3.tgz", 223 | "integrity": "sha512-m+qzzcn7KUxEmd1gMbchF+Y2eIUbieUaxkWtptyHywrX0rE8QEYqPC07Vuy4Wm32/xE16NcdBctb8S0Xe/5IeQ==", 224 | "requires": { 225 | "core-util-is": "1.0.2", 226 | "inherits": "2.0.3", 227 | "isarray": "1.0.0", 228 | "process-nextick-args": "1.0.7", 229 | "safe-buffer": "5.1.1", 230 | "string_decoder": "1.0.3", 231 | "util-deprecate": "1.0.2" 232 | } 233 | }, 234 | "safe-buffer": { 235 | "version": "5.1.1", 236 | "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz", 237 | "integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg==" 238 | }, 239 | "string_decoder": { 240 | "version": "1.0.3", 241 | "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.3.tgz", 242 | "integrity": "sha512-4AH6Z5fzNNBcH+6XDMfA/BTt87skxqJlO0lAh3Dker5zThcAxG6mKz+iGu308UKoPPQ8Dcqx/4JhujzltRa+hQ==", 243 | "requires": { 244 | "safe-buffer": "5.1.1" 245 | } 246 | }, 247 | "util-deprecate": { 248 | "version": "1.0.2", 249 | "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", 250 | "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" 251 | } 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "terraform-scrape", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "@types/cheerio": "^0.22.5", 13 | "@types/lodash": "^4.14.85", 14 | "@types/node": "^8.0.53", 15 | "cheerio": "^0.22.0", 16 | "lodash": "^4.17.4", 17 | "marked": "^0.3.6" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2017", 4 | "module": "commonjs", 5 | "sourceMap": true 6 | } 7 | } --------------------------------------------------------------------------------