├── .tool-versions ├── NOTICE ├── templates ├── _.gitignore ├── aws.env ├── emr_tools_demo.py ├── pyspark-emr-7_x.dockerfile ├── pyspark-emr-6_x.dockerfile └── devcontainer.json ├── .gitignore ├── resources ├── emr-icon.png ├── Arch_Amazon-EMR_64.png ├── dark │ ├── alert-circle.svg │ ├── refresh.svg │ └── emr.svg └── light │ ├── alert-circle.svg │ ├── refresh.svg │ └── emr.svg ├── images ├── emr-explorer.png ├── glue-explorer.png ├── pyspark-shell.png └── glue-table-details.png ├── .vscodeignore ├── CODE_OF_CONDUCT.md ├── src ├── explorer │ ├── commands.ts │ ├── glueCatalog.ts │ ├── emrServerless.ts │ ├── emrContainers.ts │ └── emrEC2.ts ├── test │ ├── suite │ │ ├── extension.test.ts │ │ └── index.ts │ └── runTest.ts ├── clients │ ├── s3Client.ts │ ├── emrContainersClient.ts │ ├── glueClient.ts │ ├── emrServerlessClient.ts │ └── emrClient.ts ├── emr_connect.ts ├── utils │ └── quickPickItem.ts ├── emr_deploy.ts ├── emr_containers.ts ├── aws_context.ts ├── panels │ └── glueTablePanel.ts ├── emr_serverless.ts ├── helpers.ts ├── extension.ts ├── commands │ ├── deploy │ │ └── emrEC2Deploy.ts │ └── emrDeploy.ts ├── emr_explorer.ts └── emr_local.ts ├── .vscode ├── tasks.json └── launch.json ├── tsconfig.json ├── .eslintrc.json ├── media ├── glue.css └── emr.svg ├── CHANGELOG.md ├── CONTRIBUTING.md ├── .github └── workflows │ └── publish-extension.yml ├── README.md ├── package.json └── LICENSE /.tool-versions: -------------------------------------------------------------------------------- 1 | nodejs 18.15.0 2 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /templates/_.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .pytest_cache/ 3 | .devcontainer/aws.env 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | dist 3 | node_modules 4 | .vscode-test/ 5 | *.vsix 6 | 7 | # macOS detritus 8 | .DS_Store 9 | -------------------------------------------------------------------------------- /resources/emr-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/resources/emr-icon.png -------------------------------------------------------------------------------- /images/emr-explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/images/emr-explorer.png -------------------------------------------------------------------------------- /images/glue-explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/images/glue-explorer.png -------------------------------------------------------------------------------- /images/pyspark-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/images/pyspark-shell.png -------------------------------------------------------------------------------- /images/glue-table-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/images/glue-table-details.png -------------------------------------------------------------------------------- /templates/aws.env: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE 2 | AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 3 | -------------------------------------------------------------------------------- /resources/Arch_Amazon-EMR_64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-emr-vscode-toolkit/main/resources/Arch_Amazon-EMR_64.png -------------------------------------------------------------------------------- /.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/** 2 | .vscode-test/** 3 | src/** 4 | .gitignore 5 | .yarnrc 6 | vsc-extension-quickstart.md 7 | **/tsconfig.json 8 | **/.eslintrc.json 9 | **/*.map 10 | **/*.ts 11 | node_modules 12 | !node_modules/@vscode/codicons/dist/codicon.css 13 | !node_modules/@vscode/codicons/dist/codicon.ttf -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /resources/dark/alert-circle.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /resources/dark/refresh.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resources/light/alert-circle.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /resources/light/refresh.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/explorer/commands.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | 6 | /** 7 | * Copies the arn of the resource represented by the given node. 8 | */ 9 | export async function copyIdCommand( 10 | node: vscode.TreeItem, 11 | ): Promise { 12 | await vscode.env.clipboard.writeText(node.id!); 13 | } 14 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | // See https://go.microsoft.com/fwlink/?LinkId=733558 2 | // for the documentation about the tasks.json format 3 | { 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "type": "npm", 8 | "script": "esbuild", 9 | "problemMatcher": "$esbuild-watch", 10 | "isBackground": true, 11 | "presentation": { 12 | "reveal": "never" 13 | }, 14 | "group": { 15 | "kind": "build", 16 | "isDefault": true 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/test/suite/extension.test.ts: -------------------------------------------------------------------------------- 1 | import * as assert from 'assert'; 2 | 3 | // You can import and use all API from the 'vscode' module 4 | // as well as import your extension to test it 5 | import * as vscode from 'vscode'; 6 | // import * as myExtension from '../../extension'; 7 | 8 | suite('Extension Test Suite', () => { 9 | vscode.window.showInformationMessage('Start all tests.'); 10 | 11 | test('Sample test', () => { 12 | assert.strictEqual(-1, [1, 2, 3].indexOf(5)); 13 | assert.strictEqual(-1, [1, 2, 3].indexOf(0)); 14 | }); 15 | }); 16 | -------------------------------------------------------------------------------- /templates/emr_tools_demo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | 3 | spark = ( 4 | SparkSession.builder.appName("EMRLocal") 5 | # Uncomment these lines to enable the Glue Data Catalog 6 | # .config("spark.sql.catalogImplementation", "hive") 7 | # .config( 8 | # "spark.hadoop.hive.metastore.client.factory.class", 9 | # "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", 10 | # ) 11 | .getOrCreate() 12 | ) 13 | df = spark.read.csv("s3://noaa-gsod-pds/2022/01001099999.csv", header=True) 14 | print(df.head()) -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "ES2020", 5 | "outDir": "out", 6 | "lib": [ 7 | "ES2020" 8 | ], 9 | "sourceMap": true, 10 | "rootDir": "src", 11 | "strict": true, /* enable all strict type-checking options */ 12 | /* Additional Checks */ 13 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 14 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 15 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "parser": "@typescript-eslint/parser", 4 | "parserOptions": { 5 | "ecmaVersion": 6, 6 | "sourceType": "module" 7 | }, 8 | "plugins": [ 9 | "@typescript-eslint" 10 | ], 11 | "rules": { 12 | "@typescript-eslint/naming-convention": "warn", 13 | "@typescript-eslint/semi": "warn", 14 | "curly": "warn", 15 | "eqeqeq": "warn", 16 | "no-throw-literal": "warn", 17 | "semi": "off" 18 | }, 19 | "ignorePatterns": [ 20 | "out", 21 | "dist", 22 | "**/*.d.ts" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Run Extension", 9 | "type": "extensionHost", 10 | "request": "launch", 11 | "args": [ 12 | "--extensionDevelopmentPath=${workspaceFolder}" 13 | ], 14 | "outFiles": [ 15 | "${workspaceFolder}/out/**/*.js" 16 | ], 17 | "preLaunchTask": "${defaultBuildTask}", 18 | "env": { 19 | "xxAWS_PROFILE": "emr-da-Admin" 20 | } 21 | }, 22 | ] 23 | } -------------------------------------------------------------------------------- /src/test/runTest.ts: -------------------------------------------------------------------------------- 1 | import * as path from 'path'; 2 | 3 | import { runTests } from '@vscode/test-electron'; 4 | 5 | async function main() { 6 | try { 7 | // The folder containing the Extension Manifest package.json 8 | // Passed to `--extensionDevelopmentPath` 9 | const extensionDevelopmentPath = path.resolve(__dirname, '../../'); 10 | 11 | // The path to test runner 12 | // Passed to --extensionTestsPath 13 | const extensionTestsPath = path.resolve(__dirname, './suite/index'); 14 | 15 | // Download VS Code, unzip it and run the integration test 16 | await runTests({ extensionDevelopmentPath, extensionTestsPath }); 17 | } catch (err) { 18 | console.error('Failed to run tests'); 19 | process.exit(1); 20 | } 21 | } 22 | 23 | main(); 24 | -------------------------------------------------------------------------------- /src/test/suite/index.ts: -------------------------------------------------------------------------------- 1 | import * as path from 'path'; 2 | import * as Mocha from 'mocha'; 3 | import * as glob from 'glob'; 4 | 5 | export function run(): Promise { 6 | // Create the mocha test 7 | const mocha = new Mocha({ 8 | ui: 'tdd', 9 | color: true 10 | }); 11 | 12 | const testsRoot = path.resolve(__dirname, '..'); 13 | 14 | return new Promise((c, e) => { 15 | glob('**/**.test.js', { cwd: testsRoot }, (err, files) => { 16 | if (err) { 17 | return e(err); 18 | } 19 | 20 | // Add files to the test suite 21 | files.forEach(f => mocha.addFile(path.resolve(testsRoot, f))); 22 | 23 | try { 24 | // Run the mocha test 25 | mocha.run(failures => { 26 | if (failures > 0) { 27 | e(new Error(`${failures} tests failed.`)); 28 | } else { 29 | c(); 30 | } 31 | }); 32 | } catch (err) { 33 | console.error(err); 34 | e(err); 35 | } 36 | }); 37 | }); 38 | } 39 | -------------------------------------------------------------------------------- /src/clients/s3Client.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { Globals } from "../extension"; 6 | import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"; 7 | 8 | export class DefaultS3Client { 9 | public constructor(private readonly globals: Globals) {} 10 | 11 | private async createS3(): Promise { 12 | return new S3Client(this.globals.awsContext.getClientConfig()); 13 | } 14 | 15 | public async uploadFile(bucket: string, key: string, body: Buffer): Promise { 16 | this.globals.outputChannel.appendLine(`S3: Uploading file to ${bucket}/${key}`); 17 | const s3 = await this.createS3(); 18 | 19 | const params = { 20 | Bucket: bucket, 21 | Key: key, 22 | Body: body, 23 | }; 24 | 25 | try { 26 | const results = await s3.send(new PutObjectCommand(params)); 27 | this.globals.outputChannel.appendLine(`S3: Upload complete.`); 28 | } catch (error) { 29 | vscode.window.showErrorMessage("Error uploading file to S3!" + error); 30 | } 31 | 32 | return undefined; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/emr_connect.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { SSMClient, StartSessionCommand, StartSessionCommandInput } from "@aws-sdk/client-ssm"; 6 | import { EMRCluster } from './emr_explorer'; 7 | 8 | export async function connectToClusterCommand( 9 | cluster: EMRCluster 10 | ): Promise { 11 | // For now, we'll just forward some default ports 12 | // TODO: Get the active primary node of the passed cluster 13 | const targetNode = "i-0ce9287cb94b72a8b"; 14 | console.log("Connecting to ", targetNode); 15 | const client = new SSMClient({ region: "us-west-2" }); 16 | const params = { 17 | DocumentName: "AWS-StartPortForwardingSession", 18 | Target: targetNode, 19 | Parameters: {"portNumber":["22"],"localPortNumber":["2222"]} 20 | 21 | }; 22 | const command = new StartSessionCommand(params); 23 | const response = await client.send(command); 24 | console.log("Response is: ", response); 25 | // Unfortunately, we *ALSO* Need to start a shell session with the actual 26 | // SSM utility. aws-toolkit does ths and has a method for installing it as well. 27 | // (see installSsmCli https://github.com/aws/aws-toolkit-vscode/search?q=installSsmCli) 28 | } -------------------------------------------------------------------------------- /media/glue.css: -------------------------------------------------------------------------------- 1 | .container { 2 | width: 80%; 3 | margin: 0 auto; 4 | } 5 | 6 | body { 7 | width: 100%; 8 | min-height: 100vh; 9 | /* background: #c850c0; 10 | background: -webkit-linear-gradient(45deg, #4158d0, #c850c0); 11 | background: -o-linear-gradient(45deg, #4158d0, #c850c0); 12 | background: -moz-linear-gradient(45deg, #4158d0, #c850c0); 13 | background: linear-gradient(45deg, #4158d0, #c850c0); */ 14 | display: -webkit-box; 15 | display: -webkit-flex; 16 | display: -moz-box; 17 | display: -ms-flexbox; 18 | display: flex; 19 | justify-content: center; 20 | flex-wrap: wrap; 21 | padding: 33px 30px; 22 | } 23 | 24 | .meta { 25 | list-style: none; 26 | font-size: 15px; 27 | margin: 0; 28 | padding: 0; 29 | } 30 | 31 | .meta:not(.meta-vertical) li { 32 | display: inline-flex; 33 | align-items: center; 34 | margin-right: var(--wpex-meta-gutter, 20px); 35 | } 36 | 37 | table { 38 | border-spacing: 1; 39 | border-collapse: collapse; 40 | /* background: #fff; */ 41 | border-radius: 10px; 42 | overflow: hidden; 43 | /* width: 100%; */ 44 | margin: 0px; 45 | position: relative; 46 | } 47 | 48 | table thead tr { 49 | height: 40px; 50 | /* background: #36304a; */ 51 | background: var(--vscode-button-background); 52 | font-weight: bold; 53 | } 54 | 55 | table tbody tr { 56 | height: 30px; 57 | } 58 | 59 | th { 60 | font-family: OpenSans-Regular; 61 | font-size: 18px; 62 | /* color: #fff; */ 63 | line-height: 1.2; 64 | font-weight: unset; 65 | text-align: left; 66 | } 67 | 68 | tbody tr:nth-child(even) { 69 | background-color: var(--vscode-tree-tableOddRowsBackground); /* #f5f5f5; */ 70 | } 71 | 72 | tbody tr { 73 | font-family: OpenSans-Regular; 74 | /* font-size: 15px; */ 75 | /* color: gray; */ 76 | line-height: 1.2; 77 | font-weight: unset; 78 | } 79 | 80 | tbody tr:hover { 81 | /* color: #555; 82 | background-color: #f5f5f5; 83 | color: var(--vscode-editor-background); */ 84 | background-color: var(--vscode-list-hoverBackground); 85 | } 86 | 87 | .column1 { 88 | width: 260px; 89 | padding-left: 40px; 90 | padding-right: 10px; 91 | } 92 | 93 | .column2 { 94 | width: 300px; 95 | } 96 | 97 | i.codicon { 98 | font-size: 20px !important; 99 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | ## 0.6.2 - 2024-05-17 5 | 6 | ### Bug Fixes 7 | 8 | - Fix duplicate entry for EMR 6.2 [#43](https://github.com/awslabs/amazon-emr-vscode-toolkit/pull/43) 9 | 10 | ## 0.6.1 - 2024-05-10 11 | 12 | ### Features 13 | 14 | - Add new EMR releases EMR 6.11 to EMR 6.15 [#41](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/41) 15 | 16 | ## 0.6.0 - 2023-03-24 17 | 18 | ### Features 19 | 20 | - Add EMR 6.10.0 [#24](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/24) 21 | - Add support for logging in EMR Serverless jobs [#20](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/20) 22 | - Allow search in Glue Table details window [#19](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/19) 23 | - Add support to run jobs on EMR on EC2 [#11](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/11) 24 | 25 | ## 0.5.2 - 2023-02-01 26 | 27 | ### Bug Fixes 28 | 29 | - Fix fallback if region selection fails 30 | 31 | ## 0.5.1 (Developer Preview) - 2023-01-25 32 | 33 | ### Features 34 | 35 | - Infer region from selected profile 36 | 37 | ## 0.5.0 (Developer Preview) - 2023-01-24 38 | 39 | ### Features 40 | 41 | - Add EMR 6.9.0 [#16](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/16) 42 | - Add ability to select AWS profile from within extension 43 | 44 | ## 0.4.0 (Developer Preview) - 2022-11-28 45 | 46 | ### Features 47 | 48 | - Add EMR 6.8.0 [#13](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/13) 49 | 50 | ### Bug Fixes 51 | 52 | - Install arm64 version of AWS CLI [#15](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/15) 53 | - Fix icons in Gluet table views [#7](https://github.com/awslabs/amazon-emr-vscode-toolkit/issues/7) 54 | 55 | ## 0.3.0 (Developer Preview) - 2022-10-18 56 | 57 | ### Bug Fixes 58 | 59 | - Fix container build on M1 Macs 60 | 61 | ## 0.2.0 (Developer Preview) - 2022-10-10 62 | 63 | ### Features 64 | 65 | - Add support for deploying single-file PySpark jobs 66 | - Enable the Glue Data Catalog by default for devcontainer 67 | 68 | ### Bug Fixes 69 | 70 | - Paginate job runs for EMR Serverless 71 | 72 | ## 0.1.0 (Developer Preview) - 2022-09-28 73 | 74 | ### Features 75 | 76 | - Add ipykernel to support Jupyter notebooks 77 | 78 | ## 0.0.1 (Developer Preview) - 2022-07-27 79 | 80 | - Initial release 81 | -------------------------------------------------------------------------------- /src/clients/emrContainersClient.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | EMRContainersClient, 7 | ListJobRunsCommand, 8 | ListVirtualClustersCommand, 9 | ListVirtualClustersResponse, 10 | } from "@aws-sdk/client-emr-containers"; 11 | import { Globals } from "../extension"; 12 | 13 | export interface VirtualCluster { 14 | readonly id?: string; 15 | readonly name?: string; 16 | } 17 | 18 | export interface JobRun { 19 | readonly virtualClusterId?: string; 20 | readonly id?: string; 21 | readonly name?: string; 22 | readonly state?: string; 23 | readonly stateDetails?: string; 24 | } 25 | 26 | export class DefaultEMRContainersClient { 27 | public constructor(private readonly globals: Globals) {} 28 | 29 | private async createEMRContainers(): Promise { 30 | return new EMRContainersClient(this.globals.awsContext.getClientConfig()); 31 | } 32 | 33 | public async listVirtualClusters(): Promise { 34 | this.globals.outputChannel.appendLine( 35 | `EMR Containers: Fetching virtual clusters from ${this.globals.awsContext.getRegion()} region.` 36 | ); 37 | const emr = await this.createEMRContainers(); 38 | let virtualClusters: VirtualCluster[]; 39 | 40 | try { 41 | // Note that this requires aws-sdk<=v3.30.0 42 | // due to https://github.com/aws/aws-sdk-js-v3/issues/3511 43 | const result = await emr.send(new ListVirtualClustersCommand({})); 44 | 45 | virtualClusters = result.virtualClusters ?? []; 46 | } catch (error) { 47 | vscode.window.showErrorMessage( 48 | "Error fetching EMR virtual clusters!" + error 49 | ); 50 | return []; 51 | } 52 | 53 | return virtualClusters; 54 | } 55 | 56 | public async listJobRuns(virtualClusterId: string): Promise { 57 | this.globals.outputChannel.appendLine( 58 | `EMR Containers: Fetching job runs for virtual cluster ${virtualClusterId}.` 59 | ); 60 | const emr = await this.createEMRContainers(); 61 | let jobRuns: JobRun[] = []; 62 | 63 | try { 64 | const result = await emr.send( 65 | new ListJobRunsCommand({ 66 | virtualClusterId: virtualClusterId, 67 | }) 68 | ); 69 | jobRuns = result.jobRuns ?? []; 70 | } catch (error) { 71 | vscode.window.showErrorMessage( 72 | "Error fetching EMR virtual cluster job runs!" + error 73 | ); 74 | } 75 | 76 | return jobRuns; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/explorer/glueCatalog.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { DefaultGlueClient } from "../clients/glueClient"; 6 | import { Database, Table } from "@aws-sdk/client-glue"; 7 | 8 | export class GlueCatalogNode 9 | implements vscode.TreeDataProvider 10 | { 11 | private _onDidChangeTreeData: vscode.EventEmitter< 12 | GlueCatalogDatabaseNode | undefined | null | void 13 | > = new vscode.EventEmitter< 14 | GlueCatalogDatabaseNode | undefined | null | void 15 | >(); 16 | readonly onDidChangeTreeData: vscode.Event< 17 | GlueCatalogDatabaseNode | undefined | null | void 18 | > = this._onDidChangeTreeData.event; 19 | 20 | refresh(): void { 21 | this._onDidChangeTreeData.fire(); 22 | } 23 | 24 | public constructor(private readonly glue: DefaultGlueClient) {} 25 | 26 | getTreeItem(element: GlueCatalogDatabaseNode): vscode.TreeItem { 27 | return element; 28 | } 29 | 30 | async getChildren( 31 | element?: GlueCatalogDatabaseNode 32 | ): Promise { 33 | if (element) { 34 | return Promise.resolve(element.getChildren()); 35 | } else { 36 | const glueDatabases = await this.glue.listDatabases(); 37 | if (glueDatabases.length === 0) { 38 | return Promise.resolve([new vscode.TreeItem("No databases found")]);; 39 | } else { 40 | return Promise.resolve( 41 | glueDatabases.map( 42 | (glueDatabase: Database) => 43 | new GlueCatalogDatabaseNode(glueDatabase.Name!, this.glue) 44 | ) 45 | ); 46 | } 47 | 48 | } 49 | } 50 | } 51 | 52 | export class GlueCatalogDatabaseNode extends vscode.TreeItem { 53 | constructor( 54 | private readonly databaseName: string, 55 | private readonly glue: DefaultGlueClient 56 | ) { 57 | super(databaseName, vscode.TreeItemCollapsibleState.Collapsed); 58 | this.contextValue = "Glue Database"; 59 | } 60 | 61 | getTreeItem(element: GlueCatalogTable): vscode.TreeItem { 62 | return element; 63 | } 64 | 65 | getChildren( 66 | element?: GlueCatalogTable 67 | ): Thenable { 68 | return Promise.resolve( 69 | this.glue 70 | .listTables(this.databaseName) 71 | .then((tables) => 72 | tables.map((table) => new GlueCatalogTable(table)) 73 | ) 74 | ); 75 | } 76 | 77 | } 78 | 79 | export class GlueCatalogTable extends vscode.TreeItem { 80 | constructor( 81 | private readonly gluetable: Table 82 | ) { 83 | super(`${gluetable.Name}`); 84 | this.description = `v${gluetable.VersionId}`; 85 | this.contextValue = "GlueCatalogTable"; 86 | this.id = `${gluetable.Name} ${gluetable.DatabaseName}`; 87 | this.tooltip = gluetable.VersionId; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /templates/pyspark-emr-7_x.dockerfile: -------------------------------------------------------------------------------- 1 | # See here for image details: https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images-steps.html 2 | # Arguments used to build the image URI - update to your desired region/EMR release version per the link above 3 | ARG RELEASE="emr-7.2.0" 4 | ARG RELEASE_TAG="latest" 5 | ARG REGION="us-west-2" 6 | ARG EMR_ACCOUNT_ID="895885662937" 7 | ARG TARGETARCH 8 | 9 | FROM ${EMR_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/spark/${RELEASE}:${RELEASE_TAG} 10 | 11 | # Switch to the root user to do some customization 12 | USER root 13 | 14 | # Update Spark config for local development 15 | RUN echo -e "\nspark.submit.deployMode\tclient\nspark.master\tlocal[*]\nspark.hadoop.fs.s3.customAWSCredentialsProvider\tcom.amazonaws.auth.EnvironmentVariableCredentialsProvider\n" >> /etc/spark/conf/spark-defaults.conf 16 | 17 | # Use the Glue Data Catalog 18 | RUN echo -e "\n# Enable Glue Data Catalog\nspark.sql.catalogImplementation\thive\nspark.hadoop.hive.metastore.client.factory.class\tcom.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory\n" >> /etc/spark/conf/spark-defaults.conf 19 | 20 | # Configure log4j to ignore EC2 metadata access failure-related error messages 21 | RUN if [ -f /etc/spark/conf/log4j.properties ]; then echo -e "\n\nlog4j.logger.com.amazonaws.internal.InstanceMetadataServiceResourceFetcher=FATAL\nlog4j.logger.com.amazonaws.util.EC2MetadataUtils=FATAL" >> /etc/spark/conf/log4j.properties; fi 22 | RUN if [ -f /etc/spark/conf/log4j2.properties ]; then echo -e "\n\nlogger.metadata.name = com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.internal.InstanceMetadataServiceResourceFetcher\nlogger.metadata.level = fatal\nlogger.ec2meta.name = com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.util.EC2MetadataUtils\nlogger.ec2meta.level = fatal\n" >> /etc/spark/conf/log4j2.properties; fi 23 | 24 | # Don't log INFO messages to the console 25 | RUN if [ -f /etc/spark/conf/log4j.properties ]; then sed -i s/log4j.rootCategory=.*/log4j.rootCategory=WARN,console/ /etc/spark/conf/log4j.properties; fi 26 | RUN if [ -f /etc/spark/conf/log4j2.properties ]; then sed -i 's/rootLogger.level = info/rootLogger.level = warn/' /etc/spark/conf/log4j2.properties; fi 27 | 28 | # Allow hadoop user to sudo for admin tasks 29 | RUN dnf install -y sudo && \ 30 | echo "hadoop ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 31 | 32 | # Upgrade to AWS CLI v2 33 | RUN dnf install -y git unzip 34 | RUN if [ "$TARGETARCH" = "arm64" ]; then curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"; else curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; fi && \ 35 | unzip awscliv2.zip && \ 36 | ./aws/install && \ 37 | rm -rf aws awscliv2.zip 38 | 39 | # ipykernel depends on pusutil, which does not publish wheels for aarch64 40 | RUN if [ "$TARGETARCH" != "amd64" ]; then dnf install -y gcc python3-devel; fi 41 | 42 | # Upgrade pip first 43 | RUN dnf update pip 44 | 45 | # Enable Jupyter notebooks and pytest 46 | RUN python3 -m pip install \ 47 | ipykernel \ 48 | pytest 49 | 50 | # Switch back to the default user 51 | USER hadoop:hadoop 52 | -------------------------------------------------------------------------------- /templates/pyspark-emr-6_x.dockerfile: -------------------------------------------------------------------------------- 1 | # See here for image details: https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/docker-custom-images-steps.html 2 | # Arguments used to build the image URI - update to your desired region/EMR release version per the link above 3 | ARG RELEASE="emr-6.10.0" 4 | ARG RELEASE_TAG="latest" 5 | ARG REGION="us-west-2" 6 | ARG EMR_ACCOUNT_ID="895885662937" 7 | ARG TARGETARCH 8 | 9 | FROM ${EMR_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/spark/${RELEASE}:${RELEASE_TAG} 10 | 11 | # Switch to the root user to do some customization 12 | USER root 13 | 14 | # Update Spark config for local development 15 | RUN echo -e "\nspark.submit.deployMode\tclient\nspark.master\tlocal[*]\nspark.hadoop.fs.s3.customAWSCredentialsProvider\tcom.amazonaws.auth.EnvironmentVariableCredentialsProvider\n" >> /etc/spark/conf/spark-defaults.conf 16 | 17 | # Use the Glue Data Catalog 18 | RUN echo -e "\n# Enable Glue Data Catalog\nspark.sql.catalogImplementation\thive\nspark.hadoop.hive.metastore.client.factory.class\tcom.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory\n" >> /etc/spark/conf/spark-defaults.conf 19 | 20 | # Configure log4j to ignore EC2 metadata access failure-related error messages 21 | RUN if [ -f /etc/spark/conf/log4j.properties ]; then echo -e "\n\nlog4j.logger.com.amazonaws.internal.InstanceMetadataServiceResourceFetcher=FATAL\nlog4j.logger.com.amazonaws.util.EC2MetadataUtils=FATAL" >> /etc/spark/conf/log4j.properties; fi 22 | RUN if [ -f /etc/spark/conf/log4j2.properties ]; then echo -e "\n\nlogger.metadata.name = com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.internal.InstanceMetadataServiceResourceFetcher\nlogger.metadata.level = fatal\nlogger.ec2meta.name = com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.util.EC2MetadataUtils\nlogger.ec2meta.level = fatal\n" >> /etc/spark/conf/log4j2.properties; fi 23 | 24 | # Don't log INFO messages to the console 25 | RUN if [ -f /etc/spark/conf/log4j.properties ]; then sed -i s/log4j.rootCategory=.*/log4j.rootCategory=WARN,console/ /etc/spark/conf/log4j.properties; fi 26 | RUN if [ -f /etc/spark/conf/log4j2.properties ]; then sed -i 's/rootLogger.level = info/rootLogger.level = warn/' /etc/spark/conf/log4j2.properties; fi 27 | 28 | # Allow hadoop user to sudo for admin tasks 29 | RUN yum install -y sudo && \ 30 | echo "hadoop ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 31 | 32 | # Upgrade to AWS CLI v2 33 | RUN yum install -y git unzip 34 | RUN if [ "$TARGETARCH" = "arm64" ]; then curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"; else curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; fi && \ 35 | unzip awscliv2.zip && \ 36 | ./aws/install && \ 37 | rm -rf aws awscliv2.zip 38 | 39 | # ipykernel depends on pusutil, which does not publish wheels for aarch64 40 | RUN if [ "$TARGETARCH" != "amd64" ]; then yum install -y gcc python3-devel; fi 41 | 42 | # Upgrade pip first 43 | RUN python3 -m pip install -U pip 44 | 45 | # Enable Jupyter notebooks and pytest 46 | RUN python3 -m pip install \ 47 | ipykernel \ 48 | pytest 49 | 50 | # Switch back to the default user 51 | USER hadoop:hadoop 52 | -------------------------------------------------------------------------------- /media/emr.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon-Resource/Analytics/Res_Amazon-EMR_HDFS-Cluster_48_Light 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /resources/dark/emr.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon-Resource/Analytics/Res_Amazon-EMR_HDFS-Cluster_48_Dark 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /resources/light/emr.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Icon-Resource/Analytics/Res_Amazon-EMR_HDFS-Cluster_48_Light 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /templates/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.234.0/containers/debian 3 | { 4 | "name": "EMR", 5 | "build": { 6 | "dockerfile": "Dockerfile", 7 | // Update 'VARIANT' to pick an Debian version: bullseye, buster 8 | // Use bullseye on local arm64/Apple Silicon. 9 | "args": { 10 | "RELEASE": "emr-6.10.0", 11 | "RELEASE_TAG": "latest", 12 | "REGION": "us-west-2", 13 | "EMR_ACCOUNT_ID": "895885662937" 14 | } 15 | }, 16 | 17 | // Variables defined here will override those in runArgs below. 18 | // So users should have one _OR_ the other. But not both. 19 | // Ideally, --env-file would be optional and override these, but it's opposkte. 20 | "containerEnv": { 21 | // "AWS_ACCESS_KEY_ID": "${localEnv:AWS_ACCESS_KEY_ID}", 22 | // "AWS_SECRET_ACCESS_KEY": "${localEnv:AWS_SECRET_ACCESS_KEY}", 23 | // "AWS_SESSION_TOKEN": "${localEnv:AWS_SESSION_TOKEN}", 24 | // "AWS_REGION": "${localEnv:AWS_REGION}", 25 | "AWS_EC2_METADATA_DISABLED": "true" 26 | }, 27 | 28 | "remoteEnv": { 29 | "PYTHONPATH": "/usr/lib/spark/python/lib/py4j-src.zip:/usr/lib/spark/python", 30 | "PATH": "${containerEnv:PATH}:/home/hadoop/.local/bin" 31 | }, 32 | 33 | // "runArgs": ["--env-file", "${localWorkspaceFolder}/.env"], 34 | 35 | // "postStartCommand": "if [ ! -v AWS_REGION ]; then export AWS_REGION=us-east-1; fi", 36 | // This unfortunately runs as the hadoop user, so doesn't quite work 37 | // "postCreateCommand": "echo alias pyspark=\\'pyspark --deploy-mode client --master local[*] --conf spark.hadoop.fs.s3.customAWSCredentialsProvider=com.amazonaws.auth.EnvironmentVariableCredentialsProvider\\' >> /etc/bash.bashrc", 38 | // "initializeCommand": "AWS_PROFILE=${env:AWS_PROFILE} aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 895885662937.dkr.ecr.us-west-2.amazonaws.com", 39 | "postStartCommand": "sudo /usr/lib/spark/sbin/start-history-server.sh", 40 | 41 | // Set *default* container specific settings.json values on container create. 42 | "settings": { 43 | "python.defaultInterpreterPath:": "python3" 44 | }, 45 | 46 | // Add the IDs of extensions you want installed when the container is created. 47 | "extensions": ["ms-python.python", "ms-python.black-formatter", "AmazonEMR.emr-tools"], 48 | 49 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 50 | "forwardPorts": [18080] 51 | 52 | // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. 53 | // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], 54 | // This is probably the *RIGHT* way to do typical aws creds, but it doesn't work with credential_process 55 | // “mounts”: [ “source=${localEnv:HOME}/.aws,target=/root/.aws,type=bind,consistency=cached” ] 56 | 57 | // Uncomment when using a ptrace-based debugger like C++, Go, and Rust 58 | // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], 59 | 60 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 61 | // "remoteUser": "vscode", 62 | } 63 | -------------------------------------------------------------------------------- /src/explorer/emrServerless.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | DefaultEMRServerlessClient, 7 | JobRun, 8 | } from "../clients/emrServerlessClient"; 9 | import { globals } from "../extension"; 10 | 11 | export class EMRServerlessNode 12 | implements vscode.TreeDataProvider 13 | { 14 | private _onDidChangeTreeData: vscode.EventEmitter< 15 | EMRApplicationNode | undefined | null | void 16 | > = new vscode.EventEmitter(); 17 | readonly onDidChangeTreeData: vscode.Event< 18 | EMRApplicationNode | undefined | null | void 19 | > = this._onDidChangeTreeData.event; 20 | 21 | refresh(): void { 22 | this._onDidChangeTreeData.fire(); 23 | } 24 | 25 | public constructor(private readonly emr: DefaultEMRServerlessClient) {} 26 | 27 | getTreeItem(element: EMRApplicationNode): vscode.TreeItem { 28 | return element; 29 | } 30 | 31 | async getChildren(element?: EMRApplicationNode): Promise { 32 | if (element) { 33 | return Promise.resolve(element.getChildren()); 34 | } else { 35 | const applications = await this.emr.listApplications(); 36 | if (applications.length === 0) { 37 | return Promise.resolve([new vscode.TreeItem("No applications found")]);; 38 | } else { 39 | return Promise.resolve( 40 | applications.map( 41 | (app) => new EMRApplicationNode(app.id!, app.name!, this.emr) 42 | ) 43 | ); 44 | } 45 | } 46 | } 47 | } 48 | 49 | export class EMRApplicationNode extends vscode.TreeItem { 50 | constructor( 51 | public readonly id: string, 52 | private readonly name: string, 53 | private readonly emr: DefaultEMRServerlessClient 54 | ) { 55 | super(name, vscode.TreeItemCollapsibleState.Collapsed); 56 | this.tooltip = `${this.name} (${this.id})`; 57 | this.description = this.id; 58 | this.contextValue = "EMRServerlessApplication"; 59 | } 60 | 61 | getTreeItem(element: EMRServerlessJob): vscode.TreeItem { 62 | return element; 63 | } 64 | 65 | getChildren(element?: EMRServerlessJob): Thenable { 66 | return Promise.resolve( 67 | this.emr 68 | .listJobRuns(this.id) 69 | .then((jobruns) => 70 | jobruns.map((jobRun) => new EMRServerlessJob(this.id, jobRun)) 71 | ) 72 | ); 73 | } 74 | } 75 | 76 | export class EMRServerlessJob extends vscode.TreeItem { 77 | constructor( 78 | private readonly virtualClusterId: string, 79 | private readonly jobRun: JobRun 80 | ) { 81 | super(`${jobRun.name || jobRun.id} [${jobRun.state}]`); 82 | this.id = jobRun.id; 83 | this.description = jobRun.id; 84 | this.tooltip = jobRun.stateDetails; 85 | this.contextValue = "EMRServerlessJob"; 86 | 87 | if (jobRun.state === "FAILED") { 88 | this.iconPath = { 89 | dark: vscode.Uri.joinPath( 90 | globals.context.extensionUri, 91 | "resources", 92 | "dark", 93 | "alert-circle.svg" 94 | ), 95 | light: vscode.Uri.joinPath( 96 | globals.context.extensionUri, 97 | "resources", 98 | "light", 99 | "alert-circle.svg" 100 | ), 101 | }; 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/explorer/emrContainers.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | DefaultEMRContainersClient, 7 | JobRun, 8 | } from "../clients/emrContainersClient"; 9 | import { globals } from "../extension"; 10 | 11 | export class EMRContainersNode 12 | implements vscode.TreeDataProvider 13 | { 14 | private _onDidChangeTreeData: vscode.EventEmitter< 15 | EMRVirtualClusterNode | undefined | null | void 16 | > = new vscode.EventEmitter< 17 | EMRVirtualClusterNode | undefined | null | void 18 | >(); 19 | readonly onDidChangeTreeData: vscode.Event< 20 | EMRVirtualClusterNode | undefined | null | void 21 | > = this._onDidChangeTreeData.event; 22 | 23 | refresh(): void { 24 | this._onDidChangeTreeData.fire(); 25 | } 26 | 27 | public constructor(private readonly emr: DefaultEMRContainersClient) {} 28 | 29 | getTreeItem(element: EMRVirtualClusterNode): vscode.TreeItem { 30 | return element; 31 | } 32 | 33 | async getChildren( 34 | element?: EMRVirtualClusterNode 35 | ): Promise { 36 | if (element) { 37 | return Promise.resolve(element.getChildren()); 38 | } else { 39 | const virtualClusters = await this.emr.listVirtualClusters(); 40 | if (virtualClusters.length ===0) { 41 | return Promise.resolve([new vscode.TreeItem("No virtual clusters found")]);; 42 | } else { 43 | return Promise.resolve( 44 | virtualClusters.map( 45 | (cluster) => 46 | new EMRVirtualClusterNode(cluster.id!, cluster.name!, this.emr) 47 | ) 48 | ); 49 | } 50 | } 51 | } 52 | } 53 | 54 | export class EMRVirtualClusterNode extends vscode.TreeItem { 55 | constructor( 56 | public readonly id: string, 57 | private readonly name: string, 58 | private readonly emr: DefaultEMRContainersClient 59 | ) { 60 | super(name, vscode.TreeItemCollapsibleState.Collapsed); 61 | this.tooltip = `${this.name} (${this.id})`; 62 | this.description = this.id; 63 | this.contextValue = "EMRVirtualCluster"; 64 | } 65 | 66 | getTreeItem(element: EMRVirtualClusterJob): vscode.TreeItem { 67 | return element; 68 | } 69 | 70 | getChildren( 71 | element?: EMRVirtualClusterJob 72 | ): Thenable { 73 | return Promise.resolve( 74 | this.emr 75 | .listJobRuns(this.id) 76 | .then((jobruns) => 77 | jobruns.map((jobRun) => new EMRVirtualClusterJob(this.id, jobRun)) 78 | ) 79 | ); 80 | } 81 | } 82 | 83 | export class EMRVirtualClusterJob extends vscode.TreeItem { 84 | constructor( 85 | private readonly virtualClusterId: string, 86 | private readonly jobRun: JobRun 87 | ) { 88 | super(`${jobRun.name || jobRun.id} [${jobRun.state}]`); 89 | this.id = jobRun.id; 90 | this.description = jobRun.id; 91 | this.tooltip = jobRun.stateDetails; 92 | this.contextValue = "EMRVirtualClusterJob"; 93 | 94 | if (jobRun.state === "FAILED") { 95 | this.iconPath = { 96 | dark: vscode.Uri.joinPath( 97 | globals.context.extensionUri, 98 | "resources", 99 | "dark", 100 | "alert-circle.svg" 101 | ), 102 | light: vscode.Uri.joinPath( 103 | globals.context.extensionUri, 104 | "resources", 105 | "light", 106 | "alert-circle.svg" 107 | ), 108 | }; 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/clients/glueClient.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | GlueClient, 7 | GetDatabasesCommand, 8 | Database, 9 | Table, 10 | GetTablesRequest, 11 | GetTablesCommand, 12 | GetTableCommand, 13 | GetTableRequest 14 | } from "@aws-sdk/client-glue"; 15 | import { Globals } from "../extension"; 16 | 17 | export class DefaultGlueClient { 18 | public constructor(private readonly globals: Globals) { } 19 | 20 | private async createGlueClient(): Promise { 21 | return new GlueClient(this.globals.awsContext.getClientConfig()); 22 | } 23 | 24 | public async listDatabases(): Promise { 25 | this.globals.outputChannel.appendLine( 26 | `Glue Catalog: Fetching databases from ${this.globals.awsContext.getRegion()} region.` 27 | ); 28 | const glue = await this.createGlueClient(); 29 | let databases; 30 | try { 31 | const result = await glue.send(new GetDatabasesCommand({})); 32 | 33 | databases = result.DatabaseList ?? []; 34 | } catch (error) { 35 | vscode.window.showErrorMessage( 36 | "Error fetching Glue Databases!" + error 37 | ); 38 | return []; 39 | } 40 | 41 | return databases; 42 | } 43 | 44 | 45 | public async listTables(databaseName: string): Promise { 46 | 47 | this.globals.outputChannel.appendLine( 48 | `Glue Catalog: Fetching tables of database ${databaseName} from ${this.globals.awsContext.getRegion()} region.` 49 | ); 50 | const glue = await this.createGlueClient(); 51 | let tables: Table[]; 52 | 53 | try { 54 | 55 | let input: GetTablesRequest = { 56 | // eslint-disable-next-line @typescript-eslint/naming-convention 57 | DatabaseName: databaseName, 58 | }; 59 | 60 | const result = await glue.send(new GetTablesCommand(input)); 61 | 62 | tables = result.TableList ?? []; 63 | 64 | } catch (error) { 65 | vscode.window.showErrorMessage( 66 | "Error fetching Glue Tables!" + error 67 | ); 68 | return []; 69 | 70 | } 71 | 72 | return tables; 73 | 74 | } 75 | 76 | public async getTable(tableName: string, databaseName: string): Promise{ 77 | 78 | this.globals.outputChannel.appendLine( 79 | `Glue Catalog: Fetching table of database ${databaseName} from ${this.globals.awsContext.getRegion()} region.` 80 | ); 81 | const glue = await this.createGlueClient(); 82 | let table: Table | undefined; 83 | 84 | try { 85 | 86 | let input: GetTableRequest = { 87 | // eslint-disable-next-line @typescript-eslint/naming-convention 88 | DatabaseName: databaseName, 89 | // eslint-disable-next-line @typescript-eslint/naming-convention 90 | Name: tableName, 91 | }; 92 | 93 | const result = await glue.send(new GetTableCommand(input)); 94 | 95 | table = result.Table || undefined; 96 | 97 | } catch (error) { 98 | vscode.window.showErrorMessage( 99 | "Error fetching Glue Tables!" + error 100 | ); 101 | return undefined; 102 | 103 | } 104 | 105 | return table; 106 | 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /src/utils/quickPickItem.ts: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------------------------- 2 | * Copyright (c) Microsoft Corporation. All rights reserved. 3 | * Licensed under the MIT License. See License.txt in the project root for license information. 4 | *--------------------------------------------------------------------------------------------*/ 5 | 6 | import * as path from "path"; 7 | import { Uri, window, Disposable } from "vscode"; 8 | import { QuickPickItem } from "vscode"; 9 | import { workspace } from "vscode"; 10 | import glob = require("glob"); 11 | 12 | /** 13 | * A file opener using window.createQuickPick(). 14 | * 15 | * It shows how the list of items can be dynamically updated based on 16 | * the user's input in the filter field. 17 | */ 18 | export async function quickOpen() { 19 | const uri = await pickFile(); 20 | if (uri) { 21 | const document = await workspace.openTextDocument(uri); 22 | await window.showTextDocument(document); 23 | } 24 | } 25 | 26 | class FileItem implements QuickPickItem { 27 | label: string; 28 | description: string; 29 | 30 | constructor(public base: Uri, public uri: Uri) { 31 | this.label = path.basename(uri.fsPath); 32 | this.description = path.dirname(path.relative(base.fsPath, uri.fsPath)); 33 | } 34 | } 35 | 36 | class MessageItem implements QuickPickItem { 37 | label: string; 38 | description = ""; 39 | detail: string; 40 | 41 | constructor(public base: Uri, public message: string) { 42 | this.label = message.replace(/\r?\n/g, " "); 43 | this.detail = base.fsPath; 44 | } 45 | } 46 | 47 | export async function pickFile(placeHolder?: string) { 48 | const disposables: Disposable[] = []; 49 | try { 50 | return await new Promise((resolve, reject) => { 51 | const input = window.createQuickPick(); 52 | input.placeholder = placeHolder 53 | ? placeHolder 54 | : "Type to search for files"; 55 | disposables.push( 56 | input.onDidChangeValue((value) => { 57 | input.items = []; 58 | if (!value) { 59 | return; 60 | } 61 | input.busy = true; 62 | const cwds = workspace.workspaceFolders 63 | ? workspace.workspaceFolders.map((f) => f.uri.fsPath) 64 | : [process.cwd()]; 65 | cwds.map((cwd) => { 66 | glob(`**/${value}*`, {cwd, nodir: true}, function (err, filenames) { 67 | if (!err) { 68 | input.items = input.items.concat( 69 | filenames.map( 70 | (filename) => 71 | new FileItem( 72 | Uri.file(cwd), 73 | Uri.file(path.join(cwd, filename)) 74 | ) 75 | ) 76 | ); 77 | } 78 | if (err) { 79 | input.items = input.items.concat([ 80 | new MessageItem(Uri.file(cwd), err.message), 81 | ]); 82 | } 83 | input.busy = false; 84 | }); 85 | }); 86 | }), 87 | input.onDidChangeSelection((items) => { 88 | const item = items[0]; 89 | if (item instanceof FileItem) { 90 | resolve(item.uri); 91 | input.hide(); 92 | } 93 | }), 94 | input.onDidHide(() => { 95 | resolve(undefined); 96 | input.dispose(); 97 | }) 98 | ); 99 | input.show(); 100 | }); 101 | } finally { 102 | disposables.forEach((d) => d.dispose()); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /.github/workflows/publish-extension.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | release: 4 | types: 5 | - published 6 | workflow_dispatch: 7 | inputs: 8 | publishMS: 9 | description: "Publish to the Microsoft Marketplace" 10 | type: boolean 11 | required: true 12 | default: "true" 13 | publishOVSX: 14 | description: "Publish to Open VSX" 15 | type: boolean 16 | required: true 17 | default: "true" 18 | publishGH: 19 | description: "Publish to GitHub Releases" 20 | type: boolean 21 | required: true 22 | default: "true" 23 | 24 | jobs: 25 | package: 26 | name: Package 27 | runs-on: ubuntu-latest 28 | outputs: 29 | packageName: ${{ steps.setup.outputs.packageName }} 30 | tag: ${{ steps.setup-tag.outputs.tag }} 31 | version: ${{ steps.setup-tag.outputs.version }} 32 | steps: 33 | - uses: actions/checkout@v3 34 | - uses: actions/setup-node@v3 35 | with: 36 | node-version: 18 37 | registry-url: https://registry.npmjs.org/ 38 | 39 | - name: Install dependencies 40 | run: npm i 41 | 42 | - name: Setup package path 43 | id: setup 44 | run: echo "packageName=$(node -e "console.log(require('./package.json').name + '-' + require('./package.json').version + '.vsix')")" >> "$GITHUB_OUTPUT" 45 | 46 | - name: Package 47 | run: | 48 | npx vsce package --out ${{ steps.setup.outputs.packageName }} 49 | 50 | - uses: actions/upload-artifact@v3 51 | with: 52 | name: ${{ steps.setup.outputs.packageName }} 53 | path: ./${{ steps.setup.outputs.packageName }} 54 | if-no-files-found: error 55 | 56 | - name: Setup tag 57 | id: setup-tag 58 | run: | 59 | $version = (Get-Content ./package.json -Raw | ConvertFrom-Json).version 60 | Write-Host "tag: v$version" 61 | Write-Host "tag=v$version" >> "$GITHUB_OUTPUT" 62 | Write-Host "version=$version" >> "$GITHUB_OUTPUT" 63 | shell: pwsh 64 | 65 | publishMS: 66 | name: Publish to VS marketplace 67 | runs-on: ubuntu-latest 68 | needs: package 69 | if: github.event.inputs.publishMS == 'true' 70 | steps: 71 | - uses: actions/checkout@v3 72 | - uses: actions/download-artifact@v3 73 | with: 74 | name: ${{ needs.package.outputs.packageName }} 75 | - name: Publish to VS marketplace 76 | run: npx vsce publish --packagePath ./${{ needs.package.outputs.packageName }} -p ${{ secrets.VSCE_PAT }} 77 | 78 | publishOVSX: 79 | name: Publish to Open VSX 80 | runs-on: ubuntu-latest 81 | needs: package 82 | if: github.event.inputs.publishOVSX == 'true' 83 | steps: 84 | - uses: actions/checkout@v3 85 | - uses: actions/download-artifact@v3 86 | with: 87 | name: ${{ needs.package.outputs.packageName }} 88 | - name: Publish to Open VSX 89 | run: npx ovsx publish ./${{ needs.package.outputs.packageName }} -p ${{ secrets.OVSX_PAT }} 90 | 91 | publishGH: 92 | name: Publish to GitHub releases 93 | runs-on: ubuntu-latest 94 | permissions: 95 | contents: write 96 | needs: package 97 | if: github.event.inputs.publishGH == 'true' 98 | steps: 99 | - uses: actions/download-artifact@v3 100 | with: 101 | name: ${{ needs.package.outputs.packageName }} 102 | 103 | - name: Create Release 104 | id: create-release 105 | uses: softprops/action-gh-release@v1 106 | with: 107 | files: ${{ needs.package.outputs.packageName }} 108 | tag_name: ${{ needs.package.outputs.tag }} 109 | release: Release ${{ needs.package.outputs.version }} 110 | draft: false 111 | prerelease: false 112 | generate_release_notes: true 113 | -------------------------------------------------------------------------------- /src/clients/emrServerlessClient.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | EMRServerlessClient, 7 | JobDriver, 8 | JobRunSummary, 9 | ListApplicationsCommand, 10 | ListJobRunsCommand, 11 | ListJobRunsRequest, 12 | StartJobRunCommand, 13 | StartJobRunCommandInput, 14 | StartJobRunRequest, 15 | } from "@aws-sdk/client-emr-serverless"; 16 | import { Globals } from "../extension"; 17 | 18 | export interface Application { 19 | readonly id?: string; 20 | readonly name?: string; 21 | } 22 | 23 | export interface JobRun { 24 | readonly applicationId?: string; 25 | readonly id?: string; 26 | readonly name?: string; 27 | readonly state?: string; 28 | readonly stateDetails?: string; 29 | } 30 | 31 | export class DefaultEMRServerlessClient { 32 | public constructor(private readonly globals: Globals) {} 33 | 34 | private async createEMRServerless(): Promise { 35 | return new EMRServerlessClient(this.globals.awsContext.getClientConfig()); 36 | } 37 | 38 | public async listApplications(): Promise { 39 | this.globals.outputChannel.appendLine( 40 | `EMR Serverless: Fetching applications from ${this.globals.awsContext.getRegion()} region.` 41 | ); 42 | const emr = await this.createEMRServerless(); 43 | let applications: Application[]; 44 | 45 | try { 46 | const result = await emr.send(new ListApplicationsCommand({})); 47 | 48 | applications = result.applications ?? []; 49 | } catch (error) { 50 | vscode.window.showErrorMessage( 51 | "Error fetching EMR Serverless applications!" + error 52 | ); 53 | return []; 54 | } 55 | 56 | return applications; 57 | } 58 | 59 | public async listJobRuns(applicationId: string): Promise { 60 | this.globals.outputChannel.appendLine( 61 | `EMR Serverless: Fetching job runs for application ${applicationId}.` 62 | ); 63 | const emr = await this.createEMRServerless(); 64 | let jobRuns: JobRun[] = []; 65 | let request: ListJobRunsRequest = { 66 | applicationId: applicationId, 67 | }; 68 | 69 | try { 70 | do { 71 | const result = await emr.send(new ListJobRunsCommand(request)); 72 | jobRuns = jobRuns.concat(result.jobRuns ?? []); 73 | if (!result.nextToken || jobRuns.length >= 100) { 74 | break; 75 | } 76 | request['nextToken'] = result.nextToken; 77 | } while (request['nextToken']); 78 | } catch (error) { 79 | vscode.window.showErrorMessage( 80 | "Error fetching EMR application job runs!" + error 81 | ); 82 | } 83 | 84 | return jobRuns; 85 | } 86 | 87 | public async startJobRun(applicationId: string, executionRoleARN: string, entryPoint: string, logPrefix: string): Promise { 88 | this.globals.outputChannel.appendLine( 89 | `EMR Serverless: Starting job run (${applicationId}).` 90 | ); 91 | 92 | const emr = await this.createEMRServerless(); 93 | let jobRun: JobRun = {}; 94 | 95 | let jobRunParams: StartJobRunCommandInput = { 96 | applicationId, 97 | executionRoleArn: executionRoleARN, 98 | jobDriver: { 99 | sparkSubmit: {entryPoint: entryPoint} 100 | } 101 | }; 102 | 103 | if (logPrefix) { 104 | jobRunParams.configurationOverrides = { 105 | monitoringConfiguration: { 106 | s3MonitoringConfiguration: {logUri: logPrefix} 107 | } 108 | }; 109 | } 110 | 111 | try { 112 | const result = await emr.send( 113 | new StartJobRunCommand(jobRunParams) 114 | ); 115 | } catch (error) { 116 | vscode.window.showErrorMessage( 117 | "There was an error running the EMR Serverless job:" + error 118 | ); 119 | } 120 | 121 | 122 | return jobRun; 123 | } 124 | } -------------------------------------------------------------------------------- /src/emr_deploy.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // We can package and deploy EMR jobs 5 | // EMR on EC2 6 | // - Optionally package into (virtualenv|container) 7 | // - Upload script to S3 (ask for deployment bucket once) 8 | // - Call EMR Add Step 9 | // - Show window with driver logs? 10 | // EMR on EKS 11 | // - Optionally package into container image / push to ECR 12 | // - Upload script to S3 (ask for deployment bucket once) 13 | // - Ask for jobRole 14 | // - Call startJobRun 15 | // EMR Serverless 16 | // - Optionally package into virtualenv 17 | // - Upload script to S3 (ask for deployment bucket once) 18 | // - Ask for jobRole 19 | // - Ask for S3 log bucket/prefix 20 | // - Call startJobRun 21 | 22 | import { QuickPickOptions, window } from "vscode"; 23 | import { EMRContainersProvider } from "./emr_containers"; 24 | import { EMREC2Provider } from "./emr_explorer"; 25 | import { EMRServerlessProvider } from "./emr_serverless"; 26 | 27 | export class EMRDeployer { 28 | emrOnEC2: EMREC2Provider; 29 | emrOnEKS: EMRContainersProvider; 30 | emrServerless: EMRServerlessProvider; 31 | 32 | constructor(emrEC2: EMREC2Provider, emrEks: EMRContainersProvider, emrServerless: EMRServerlessProvider) { 33 | this.emrOnEC2 = emrEC2; 34 | this.emrOnEKS = emrEks; 35 | this.emrServerless = emrServerless; 36 | } 37 | 38 | public async run() { 39 | const deployType = await this.pickDeploymentType(); 40 | console.log(deployType); 41 | if (deployType === "EMR on EC2") { 42 | const clusterID = await this.pickEMRCluster(); 43 | console.log(clusterID); 44 | const bucket = await this.pickS3Bucket(); 45 | console.log(bucket); 46 | } else if (deployType === "EMR on EKS") { 47 | const clusterID = await this.pickVirtualEMRCluster(); 48 | console.log(clusterID); 49 | const bucket = await this.pickS3Bucket(); 50 | console.log(bucket); 51 | const role = await this.pickEMRContainersRole(); 52 | } else if (deployType === "EMR Serverless") { 53 | // TODO (2022-06-15): We need a way for the user to interrupt this workflow 54 | const applicationID = await this.pickApplicationID(); 55 | const bucket = await this.pickS3Bucket(); 56 | const role = await this.pickEMRServerlessRole(); 57 | 58 | const jobId = this.emrServerless.triggerServerlessJob( 59 | applicationID!, 60 | `s3://${bucket}/code/pyspark/extreme_weather.py`, 61 | `s3://${bucket}/logs/`, 62 | role!, 63 | ); 64 | console.log(jobId); 65 | } 66 | } 67 | 68 | async pickDeploymentType() { 69 | const pick = await window.showQuickPick([ 70 | "EMR on EC2", 71 | "EMR on EKS", 72 | "EMR Serverless", 73 | ]); 74 | return pick; 75 | } 76 | 77 | // Should probably move these into the providers themselves. 78 | async pickEMRCluster() { 79 | const pick = await window.showQuickPick(["j-123", "j-456"]); 80 | return pick; 81 | } 82 | 83 | async pickVirtualEMRCluster() { 84 | const pick = await window.showQuickPick(["1234567890"]); 85 | return pick; 86 | } 87 | 88 | async pickApplicationID() { 89 | return await window.showInputBox({ 90 | title: "Select an EMR Serverless application", 91 | placeHolder: "00f1d2h27340f60l", 92 | ignoreFocusOut: true, 93 | }); 94 | } 95 | 96 | async pickS3Bucket() { 97 | return await window.showQuickPick(["dacort-demo-code"]); 98 | } 99 | 100 | async pickEMRContainersRole() { 101 | return await window.showInputBox({ 102 | title: "Select EMR on EKS execution role", 103 | placeHolder: "iam:xxx:role/emr-containers", 104 | // prompt: "This role must have access to ", 105 | ignoreFocusOut: true, // The user might switch to another window to get the role arn 106 | }); 107 | } 108 | 109 | async pickEMRServerlessRole() { 110 | return await window.showInputBox({ 111 | title: "Select EMR Serverless execution role", 112 | placeHolder: "iam:xxx:role/emr-serverless", 113 | // prompt: "This role must have access to ", 114 | ignoreFocusOut: true, // The user might switch to another window to get the role arn 115 | }); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/emr_containers.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | EMRContainersClient, 7 | JobRun, 8 | ListJobRunsCommand, 9 | ListVirtualClustersCommand, 10 | } from "@aws-sdk/client-emr-containers"; 11 | import { Globals } from "./extension"; 12 | 13 | export class EMRContainersProvider 14 | implements vscode.TreeDataProvider 15 | { 16 | emrContainersClient: EMRContainersClient; 17 | globals: Globals; 18 | private _onDidChangeTreeData: vscode.EventEmitter< 19 | EMRVirtualCluster | undefined | null | void 20 | > = new vscode.EventEmitter(); 21 | readonly onDidChangeTreeData: vscode.Event< 22 | EMRVirtualCluster | undefined | null | void 23 | > = this._onDidChangeTreeData.event; 24 | 25 | refresh(): void { 26 | this._onDidChangeTreeData.fire(); 27 | } 28 | 29 | constructor(globals: Globals) { 30 | this.globals = globals; 31 | this.globals.outputChannel.appendLine( 32 | "Profile is " + 33 | process.env.AWS_PROFILE + 34 | " and region is" + 35 | this.globals.selectedRegion 36 | ); 37 | this.emrContainersClient = new EMRContainersClient({ 38 | region: this.globals.selectedRegion, 39 | }); 40 | } 41 | 42 | getTreeItem(element: EMRVirtualCluster): vscode.TreeItem { 43 | return element; 44 | } 45 | 46 | getChildren(element?: EMRVirtualCluster): Thenable { 47 | if (element) { 48 | return Promise.resolve(element.getChildren()); 49 | } else { 50 | return Promise.resolve( 51 | this.listEMRVirtualClusters(this.emrContainersClient) 52 | ); 53 | } 54 | } 55 | 56 | private async listEMRVirtualClusters( 57 | client: EMRContainersClient 58 | ): Promise { 59 | const params = {}; 60 | try { 61 | // Note that this requires aws-sdk<=v3.30.0 62 | // due to https://github.com/aws/aws-sdk-js-v3/issues/3511 63 | const result = await client.send(new ListVirtualClustersCommand(params)); 64 | vscode.window.showInformationMessage("Fetching EMR Virtual clusters"); 65 | this.globals.outputChannel.appendLine("Fetching EMR Virtual clusters"); 66 | 67 | return (result.virtualClusters || []).map((cluster) => { 68 | return new EMRVirtualCluster( 69 | this.emrContainersClient, 70 | cluster.name || "", 71 | cluster.id || "" 72 | ); 73 | }); 74 | } catch (error) { 75 | vscode.window.showErrorMessage("Bummer!" + error); 76 | console.log("There was an error fetching clusters", error); 77 | return []; 78 | } 79 | } 80 | } 81 | 82 | // EOD: Pass cluster to EMRVIrtualCluster 83 | // Update listJobRuns 84 | 85 | class EMRVirtualCluster extends vscode.TreeItem { 86 | constructor( 87 | private readonly client: EMRContainersClient, 88 | public readonly name: string, 89 | public readonly id: string 90 | ) { 91 | super(name, vscode.TreeItemCollapsibleState.Collapsed); 92 | this.tooltip = `${this.name} (${this.id})`; 93 | this.description = this.id; 94 | this.client = client; 95 | } 96 | 97 | getTreeItem(element: EMRVirtualClusterJob): vscode.TreeItem { 98 | return element; 99 | } 100 | 101 | getChildren( 102 | element?: EMRVirtualClusterJob 103 | ): Thenable { 104 | return Promise.resolve(this.listJobRuns()); 105 | } 106 | 107 | private async listJobRuns(): Promise { 108 | const params = {}; 109 | try { 110 | const result = await this.client.send( 111 | new ListJobRunsCommand({ virtualClusterId: this.id }) 112 | ); 113 | return ( 114 | result.jobRuns?.map((jobRun) => { 115 | return new EMRVirtualClusterJob(this.client, this.id, jobRun); 116 | }) || [] 117 | ); 118 | } catch (error) { 119 | vscode.window.showErrorMessage("Error fetching job runs!" + error); 120 | return []; 121 | } 122 | } 123 | } 124 | 125 | class EMRVirtualClusterJob extends vscode.TreeItem { 126 | constructor( 127 | private readonly client: EMRContainersClient, 128 | private readonly virtualClusterId: string, 129 | private readonly jobRun: JobRun 130 | ) { 131 | super(jobRun.name!); 132 | this.id = jobRun.id; 133 | this.description = jobRun.state; 134 | this.tooltip = jobRun.stateDetails; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/aws_context.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { DescribeRegionsCommand, EC2Client } from "@aws-sdk/client-ec2"; 5 | import { fromIni } from "@aws-sdk/credential-providers"; 6 | import { loadSharedConfigFiles } from "@aws-sdk/shared-ini-file-loader"; 7 | import { AwsCredentialIdentityProvider } from "@aws-sdk/types"; 8 | import { Event, EventEmitter, window } from "vscode"; 9 | 10 | interface ClientConfig { 11 | region: string; 12 | credentials?: AwsCredentialIdentityProvider; 13 | } 14 | 15 | const DEFAULT_PROFILE_NAME = "default"; 16 | const DEFAULT_REGION = "us-east-1"; 17 | 18 | export class AwsContextCommands { 19 | private _profileName?: string; 20 | private _selectedRegion?: string; 21 | private _onDidRegionChange = new EventEmitter(); 22 | private _onDidConfigChange = new EventEmitter(); 23 | 24 | public constructor() {} 25 | 26 | public getClientConfig(): ClientConfig { 27 | let clientConfig: ClientConfig = { region: this.getRegion() }; 28 | 29 | // If the user has an AWS_PROFILE environment set, or they've specified a profile name, use that. 30 | if (this.getProfileName()) { 31 | const _profile = this.getProfileName()!; 32 | clientConfig.credentials = fromIni({ profile: _profile }); 33 | } 34 | 35 | return clientConfig; 36 | } 37 | public get onDidConfigChange(): Event { 38 | return this._onDidConfigChange.event; 39 | } 40 | 41 | public getRegion(): string { 42 | // Determine the region in this order 43 | // 1. Region set explicity by user or derived from their profile 44 | // 2. Region defined in environment variables 45 | // 3. Default region 46 | return ( 47 | this._selectedRegion || 48 | process.env.AWS_REGION || 49 | process.env.AWS_DEFAULT_REGION || 50 | DEFAULT_REGION 51 | ); 52 | } 53 | public get onDidRegionChange(): Event { 54 | return this._onDidRegionChange.event; 55 | } 56 | 57 | public getProfileName(): string | undefined { 58 | // Prefer locally-set profile over AWS_PROFILE environment variable 59 | return this._profileName || process.env.AWS_PROFILE; 60 | } 61 | 62 | public async onCommandSetProfile() { 63 | const profileName = await this.getProfileNameFromUser(); 64 | if (!profileName) { 65 | return; 66 | } 67 | 68 | // See if a region is defined in their profile 69 | const region = (await loadSharedConfigFiles()).configFile?.[profileName] 70 | ?.region; 71 | if (region) { 72 | console.log("Region found in profile: " + region); 73 | this._selectedRegion = region; 74 | } 75 | 76 | this._profileName = profileName; 77 | console.log("Setting profile to", profileName); 78 | this._onDidConfigChange.fire(); 79 | } 80 | 81 | public async getProfileNameFromUser(): Promise { 82 | const sharedIniFileLoader = require("@aws-sdk/shared-ini-file-loader"); 83 | const profiles = await sharedIniFileLoader.loadSharedConfigFiles(); 84 | const profileNames = Object.keys(profiles.configFile); 85 | 86 | const result = await window.showQuickPick(profileNames, { 87 | placeHolder: "Select AWS Profile", 88 | }); 89 | return result; 90 | } 91 | 92 | public async onCommandSetRegion() { 93 | const region = await this.getRegionFromUser(); 94 | if (!region) { 95 | return; 96 | } 97 | 98 | this._selectedRegion = region; 99 | this._onDidConfigChange.fire(); 100 | } 101 | 102 | public async getRegionFromUser(): Promise { 103 | const client = new EC2Client({}); 104 | const command = new DescribeRegionsCommand({}); 105 | try { 106 | const response = await client.send(command); 107 | if (!response) { 108 | return await window.showInputBox({ 109 | title: "Set your desired AWS Region", 110 | placeHolder: "us-east-1", 111 | }); 112 | } 113 | 114 | const regionNames: string[] = response.Regions 115 | ? response.Regions.map((r) => r.RegionName!) 116 | : []; 117 | const result = await window.showQuickPick(regionNames, { 118 | placeHolder: "Select AWS Region", 119 | }); 120 | return result; 121 | } catch (err) { 122 | console.log("Error fetching regions: ", err); 123 | return await window.showInputBox({ 124 | title: "Set your desired AWS Region", 125 | placeHolder: "us-east-1", 126 | }); 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/explorer/emrEC2.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | Cluster, 7 | ClusterApp, 8 | ClusterStep, 9 | DefaultEMRClient, 10 | } from "../clients/emrClient"; 11 | import { EMREC2Filter } from "../emr_explorer"; 12 | import { globals } from "../extension"; 13 | 14 | export class EMRNode implements vscode.TreeDataProvider { 15 | private _onDidChangeTreeData: vscode.EventEmitter< 16 | EMRClusterNode | undefined | null | void 17 | > = new vscode.EventEmitter(); 18 | readonly onDidChangeTreeData: vscode.Event< 19 | EMRClusterNode | undefined | null | void 20 | > = this._onDidChangeTreeData.event; 21 | 22 | refresh(): void { 23 | this._onDidChangeTreeData.fire(); 24 | } 25 | 26 | public constructor( 27 | private readonly emr: DefaultEMRClient, 28 | private stateFilter: EMREC2Filter 29 | ) { 30 | stateFilter.onDidChange((message) => { 31 | this.refresh(); 32 | }); 33 | } 34 | 35 | getTreeItem(element: EMRClusterNode): vscode.TreeItem { 36 | return element; 37 | } 38 | 39 | async getChildren(element?: EMRClusterNode): Promise { 40 | if (element) { 41 | return Promise.resolve(element.getChildren()); 42 | } else { 43 | const clusters = await this.emr.listClusters(this.stateFilter); 44 | if (clusters.length === 0) { 45 | return Promise.resolve([new vscode.TreeItem("No clusters found")]); 46 | } else { 47 | return Promise.resolve( 48 | clusters.map( 49 | (cluster) => 50 | new EMRClusterNode(cluster.id!, cluster.name!, this.emr) 51 | ) 52 | ); 53 | } 54 | } 55 | } 56 | } 57 | 58 | export class EMRClusterNode extends vscode.TreeItem { 59 | constructor( 60 | public readonly id: string, 61 | private readonly name: string, 62 | private readonly emr: DefaultEMRClient 63 | ) { 64 | super(name, vscode.TreeItemCollapsibleState.Collapsed); 65 | this.tooltip = `${name} (${id})`; 66 | this.description = id; 67 | this.contextValue = "EMRCluster"; 68 | } 69 | 70 | public async getChildren( 71 | element?: EMRClusterNode 72 | ): Promise { 73 | const response = await this.emr.describeCluster(this.id); 74 | 75 | return [ 76 | new EMRClusterAppsNode(response?.apps), 77 | response 78 | ? new EMRClusterStepsNode(response, this.emr) 79 | : new vscode.TreeItem("Steps"), 80 | ]; 81 | } 82 | } 83 | 84 | class EMRClusterAppsNode extends vscode.TreeItem { 85 | constructor(private readonly apps: ClusterApp[] | undefined) { 86 | super("Apps", vscode.TreeItemCollapsibleState.Collapsed); 87 | } 88 | 89 | getTreeItem(element: EMRClusterAppsNode): vscode.TreeItem { 90 | return element; 91 | } 92 | 93 | getChildren(): vscode.TreeItem[] { 94 | return (this.apps || []).map((item) => new EMRAppNode(item)); 95 | } 96 | } 97 | 98 | class EMRAppNode extends vscode.TreeItem { 99 | constructor(private readonly app: ClusterApp) { 100 | super(app.name || "Unknown"); 101 | this.description = app.version; 102 | } 103 | } 104 | 105 | class EMRClusterStepsNode extends vscode.TreeItem { 106 | constructor( 107 | private readonly cluster: Cluster, 108 | private readonly emr: DefaultEMRClient 109 | ) { 110 | super("Steps", vscode.TreeItemCollapsibleState.Collapsed); 111 | } 112 | 113 | getTreeItem(element: EMRClusterStepsNode): vscode.TreeItem { 114 | return element; 115 | } 116 | 117 | async getChildren( 118 | element?: EMRClusterStepsNode | undefined 119 | ): Promise { 120 | const response = await this.emr.listSteps(this.cluster.id!); 121 | const emptyStep = new vscode.TreeItem("[No Steps found]"); 122 | if (response.length === 0) { 123 | return [emptyStep]; 124 | } else { 125 | return response.map((item) => new EMRStepNode(item)); 126 | } 127 | } 128 | } 129 | 130 | class EMRStepNode extends vscode.TreeItem { 131 | constructor(private readonly step: ClusterStep) { 132 | super(`${step.name || step.id} [${step.state}]`); 133 | this.id = step.id; 134 | this.description = step.id; 135 | this.tooltip = step.stateDetails; 136 | this.contextValue = "EMRClusterStep"; 137 | 138 | if (step.state === "FAILED") { 139 | this.iconPath = { 140 | dark: vscode.Uri.joinPath( 141 | globals.context.extensionUri, 142 | "resources", 143 | "dark", 144 | "alert-circle.svg" 145 | ), 146 | light: vscode.Uri.joinPath( 147 | globals.context.extensionUri, 148 | "resources", 149 | "light", 150 | "alert-circle.svg" 151 | ), 152 | }; 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/panels/glueTablePanel.ts: -------------------------------------------------------------------------------- 1 | import { Table } from "@aws-sdk/client-glue"; 2 | import * as vscode from "vscode"; 3 | import { DefaultGlueClient } from "../clients/glueClient"; 4 | import * as crypto from "crypto"; 5 | 6 | 7 | 8 | export async function getWebviewContent(node: vscode.TreeItem, glueClient: DefaultGlueClient, extensionUri: vscode.Uri, webview: vscode.Webview) { 9 | 10 | const databaseName: string = node.id!.split(" ")[1]; 11 | const tableName: string = node.label!.toString().split(" ")[0]; 12 | 13 | const table: Table | undefined = await glueClient.getTable(tableName, databaseName); 14 | let nonce = crypto.randomBytes(16).toString('base64'); 15 | let jsScript: string = ` 16 | const items = {array-json-objects}; 17 | 18 | function parseSchema(s) { 19 | if (s.startsWith("array<")) { 20 | return new Array(parseSchema(s.slice(6, -1))); 21 | } else if (s.startsWith("struct<")) { 22 | return parseStructFields(s.slice(7, -1)); 23 | } else if (s.indexOf(":") > -1) { 24 | return parseStructFields(s); 25 | } else { 26 | return s; 27 | } 28 | } 29 | 30 | reCommaSplit = RegExp(",(?![^<]*>)"); 31 | reColonSplit = RegExp(":(?![^<]*>)"); 32 | 33 | // Ref: https://spark.apache.org/docs/2.1.2/api/python/_modules/pyspark/sql/types.html 34 | function parseStructFields(s) { 35 | const parts = s.split(reCommaSplit); 36 | const fields = {}; 37 | 38 | parts.forEach((part) => { 39 | const name_and_type = part.split(reColonSplit); 40 | const field_name = name_and_type[0]; 41 | field_type = parseSchema(name_and_type[1]); 42 | fields[field_name] = field_type; 43 | }); 44 | return fields; 45 | } 46 | 47 | function loadTableData(tableColumns) { 48 | const table = document.getElementById("glueTableColumn"); 49 | 50 | tableColumns.forEach( tableColumn => { 51 | let typeValue = tableColumn.Type; 52 | if (typeValue && typeValue.indexOf('<') > -1) { 53 | typeValue = "struct: " + JSON.stringify(parseSchema(typeValue), null, 2); 54 | } 55 | let row = table.insertRow(); 56 | let name = row.insertCell(0); 57 | p = document.createElement("pre") 58 | p.appendChild(document.createTextNode(tableColumn.Name)); 59 | name.appendChild(p); 60 | name.className = "column1"; 61 | let dataType = row.insertCell(1); 62 | p2 = document.createElement("pre") 63 | p2.appendChild(document.createTextNode(typeValue)); 64 | dataType.appendChild(p2); 65 | }); 66 | } 67 | 68 | loadTableData(items);`; 69 | 70 | jsScript = jsScript.replace("{array-json-objects}", JSON.stringify(table?.StorageDescriptor?.Columns)); 71 | const codiconsUri = webview.asWebviewUri(vscode.Uri.joinPath(extensionUri, 'node_modules', '@vscode/codicons', 'dist', 'codicon.css')); 72 | const styleUri = webview.asWebviewUri(vscode.Uri.joinPath( extensionUri, 'media', 'glue.css')); 73 | 74 | // const stylePath = vscode.Uri.file(path.join(context.extensionPath, 'media', 'glue.css')); 75 | // const styleSrc = panel.webview.asWebviewUri(stylePath); 76 | 77 | // Tip: Install the es6-string-html VS Code extension to enable code highlighting below 78 | return /*html*/ ` 79 | 80 | 81 | 82 | 83 | 84 | 88 | 89 | 90 | 91 | 92 | 93 |
94 |

Glue Table details

95 |
    96 |
  • ${table!.DatabaseName}
  • 97 |
  • ${table!.Name}
  • 98 |
  • version: ${table!.VersionId}
  • 99 |
100 | 101 |

Columns

102 |
103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 |
Column NameData Type
111 | 112 | 113 | 114 | 117 | 118 | 119 | `; 120 | } -------------------------------------------------------------------------------- /src/emr_serverless.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | EMRServerlessClient, 7 | JobRunSummary, 8 | ListApplicationsCommand, 9 | ListJobRunsCommand, 10 | StartJobRunCommand, 11 | } from "@aws-sdk/client-emr-serverless"; 12 | 13 | export class EMRServerlessProvider 14 | implements vscode.TreeDataProvider 15 | { 16 | emrServerlessClient: EMRServerlessClient; 17 | private _ondidChangeTreeData: vscode.EventEmitter< 18 | EMRServerlessApplication | undefined | null | void 19 | > = new vscode.EventEmitter< 20 | EMRServerlessApplication | undefined | null | void 21 | >(); 22 | readonly onDidChangeTreeData: vscode.Event< 23 | EMRServerlessApplication | undefined | null | void 24 | > = this._ondidChangeTreeData.event; 25 | 26 | refresh(): void { 27 | this._ondidChangeTreeData.fire(); 28 | } 29 | 30 | constructor() { 31 | this.emrServerlessClient = new EMRServerlessClient({ region: "us-west-2" }); 32 | } 33 | 34 | getTreeItem(element: EMRServerlessApplication): vscode.TreeItem { 35 | return element; 36 | } 37 | 38 | getChildren(element?: EMRServerlessApplication): Thenable { 39 | if (element) { 40 | return Promise.resolve(element.getChildren()); 41 | } else { 42 | return Promise.resolve( 43 | this.listEMRServerlessApplications(this.emrServerlessClient) 44 | ); 45 | } 46 | } 47 | 48 | public async triggerServerlessJob( 49 | applicationID: string, 50 | entryPoint: string, 51 | logPath: string, 52 | jobRole: string, 53 | ): Promise { 54 | const params = { 55 | applicationId: applicationID, 56 | executionRoleArn: jobRole, 57 | name: "test-deploy", 58 | jobDriver: { 59 | sparkSubmit: { 60 | entryPoint: entryPoint 61 | } 62 | }, 63 | configurationOverrides: { 64 | monitoringConfiguration: { 65 | s3MonitoringConfiguration: { 66 | logUri: logPath 67 | } 68 | } 69 | } 70 | }; 71 | 72 | const startJobRunRequest = new StartJobRunCommand(params); 73 | const result = await this.emrServerlessClient.send(startJobRunRequest); 74 | return result.jobRunId!; 75 | } 76 | 77 | private async listEMRServerlessApplications( 78 | client: EMRServerlessClient 79 | ): Promise { 80 | const params = {}; 81 | try { 82 | const result = await client.send(new ListApplicationsCommand(params)); 83 | vscode.window.showInformationMessage("Fetching EMR Serverless applications"); 84 | return (result.applications || []).map(app => { 85 | return new EMRServerlessApplication( 86 | this.emrServerlessClient, 87 | app.name || "", 88 | app.id || "", 89 | ); 90 | }); 91 | } catch (error) { 92 | vscode.window.showErrorMessage("Error fetching EMR Serverless applications" + error); 93 | console.log(error); 94 | return []; 95 | } 96 | } 97 | } 98 | 99 | class EMRServerlessApplication extends vscode.TreeItem { 100 | constructor( 101 | private readonly client: EMRServerlessClient, 102 | public readonly name: string, 103 | public readonly id: string 104 | ) { 105 | super(name, vscode.TreeItemCollapsibleState.Collapsed); 106 | this.tooltip = `${this.name} (${this.id})`; 107 | this.description = this.id; 108 | this.client = client; 109 | } 110 | 111 | getTreeItem(element: EMRServerlessJobRun): vscode.TreeItem { 112 | return element; 113 | } 114 | 115 | getChildren(element?: EMRServerlessJobRun): Thenable { 116 | return Promise.resolve( 117 | this.listJobRuns() 118 | ); 119 | } 120 | 121 | private async listJobRuns(): Promise { 122 | const params = {}; 123 | try { 124 | const result = await this.client.send(new ListJobRunsCommand({ applicationId: this.id })); 125 | return result.jobRuns?.map(jobRun => { 126 | return new EMRServerlessJobRun(this.client, this.id, jobRun); 127 | }) || []; 128 | } catch (error) { 129 | vscode.window.showErrorMessage("Error fetching job runs!" + error); 130 | return []; 131 | } 132 | } 133 | } 134 | 135 | class EMRServerlessJobRun extends vscode.TreeItem { 136 | constructor( 137 | private readonly client: EMRServerlessClient, 138 | private readonly applicationId: string, 139 | private readonly jobRun: JobRunSummary, 140 | ) { 141 | super(jobRun.name? `${jobRun.name} (${jobRun.id}` : jobRun.id!); 142 | this.id = jobRun.id; 143 | this.description = jobRun.state; 144 | this.tooltip = jobRun.stateDetails; 145 | } 146 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon EMR Toolkit for VS Code (Developer Preview) 2 | 3 | EMR Toolkit is a VS Code Extension to make it easier to develop Spark jobs on EMR. 4 | 5 | ## Requirements 6 | 7 | - A local [AWS profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) 8 | - Access to the AWS API to list EMR and Glue resources 9 | - [Docker](https://docs.docker.com/install/) (if you want to use the devcontainer) 10 | 11 | ## Features 12 | 13 | - [Amazon EMR Explorer](#amazon-emr-explorer) 14 | - [Glue Data Catalog Explorer](#glue-catalog-explorer) 15 | - [EMR Development Container](#pyspark-emr-development-container) 16 | - [Spark shell support](#spark-shell-support) 17 | - [Jupyter Notebook support](#jupyter-notebook-support) 18 | - [EMR Serverless Deployment](#emr-serverless-deployment) 19 | 20 | ### Amazon EMR Explorer 21 | 22 | The Amazon EMR Explorer allows you to browse job runs and steps across EMR on EC2, EMR on EKS, and EMR Serverless. To see the Explorer, choose the EMR icon in the Activity bar. 23 | 24 | > **Note**: If you do not have default AWS credentials or `AWS_PROFILE` environment variable, use the `EMR: Select AWS Profile` command to select your profile. 25 | 26 | ![](images/emr-explorer.png) 27 | 28 | ### Glue Catalog Explorer 29 | 30 | The Glue Catalog Explorer displays databases and tables in the Glue Data Catalog. By right-clicking on a table, you can select `View Glue Table` that will show the table columns. 31 | 32 | ![](images/glue-explorer.png) 33 | 34 | ![](images/glue-table-details.png) 35 | 36 | ### PySpark EMR Development Container 37 | 38 | The toolkit provides an `EMR: Create local Spark environment` command that creates a [development container](https://code.visualstudio.com/docs/remote/containers) based off of an EMR on EKS image for the EMR version you choose. This container can be used to develop Spark and PySpark code locally that is fully compatible with your remote EMR environment. 39 | 40 | You choose a region and EMR version you want to use, and the extension creates the relevant `Dockerfile` and `devcontainer.json`. 41 | 42 | Once the container is created, follow the instructions in the `emr-local.md` file to authenticate to ECR and use the `Dev--Containers: Reopen in Container` command to build and open your local Spark environment. 43 | 44 | You can choose to configure AWS authentication in the container in 1 of 3 ways: 45 | 46 | - **Use existing ~/.aws config** - This mounts your ~/.aws directory to the container. 47 | - **Environment variables** - If you already have [AWS environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html) configured in your shell, the container will reference those variables. 48 | - **.env file** - Creates a `.devcontainer/aws.env` file that you can populate with AWS credentials. 49 | 50 | #### Spark Shell Support 51 | 52 | The EMR Development Container is configured to run Spark in local mode. You can use it like any Spark-enabled environment. Inside the VS Code Terminal, you can use the `pyspark` or `spark-shell` commands to start a local Spark session. 53 | 54 | ![](images/pyspark-shell.png) 55 | 56 | #### Jupyter Notebook Support 57 | 58 | By default, the EMR Development Container also supports Jupyter. Use the **Create: New Jupyter Notebook** command to create a new Jupyter notebook. The following code snippet shows how to initialize a Spark Session inside the notebook. By default, the Container environment is also configured to use the Glue Data Catalog so you can use `spark.sql` commands against Glue tables. 59 | 60 | ```python 61 | from pyspark.sql import SparkSession 62 | 63 | spark = ( 64 | SparkSession.builder.appName("EMRLocal") 65 | .getOrCreate() 66 | ) 67 | ``` 68 | 69 | #### EMR Serverless Deployment 70 | 71 | You can deploy and run a single PySpark file on EMR Serverless with the **EMR Serverless: Deploy and run PySpark job** command. You'll be prompted for the following information: 72 | 73 | - **S3 URI** - Your PySpark file will be copied here 74 | - **IAM Role** - A [job runtime role](https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/getting-started.html#gs-prerequisites) that can be used to run your EMR Serverless job 75 | - **EMR Serverless Application ID** - The ID of an existing EMR Serverless Spark application 76 | - **Filename** - The name of the local PySpark file you want to run on EMR Serverless 77 | 78 | https://user-images.githubusercontent.com/1512/195953681-4e7e7102-4974-45b1-a695-195e91d45124.mp4 79 | 80 | 81 | ## Future Considerations 82 | 83 | - Allow for the ability to select different profiles 84 | - Persist state (region selection) 85 | - Create a Java environment 86 | - Automate deployments to EMR 87 | - Create virtualenv and upload to S3 88 | - Pack pom into jar file 89 | - Link to open logs in S3 or CloudWatch 90 | - Testing :) https://vscode.rocks/testing/ 91 | 92 | 93 | ## Feedback Notes 94 | 95 | I'm looking for feedback in a few different areas: 96 | 97 | - How do you use Spark on EMR today? 98 | - EMR on EC2, EMR on EKS, or EMR Serverless 99 | - PySpark, Scala Spark, or SparkSQL 100 | - Does the tool work as expected for browsing your EMR resources 101 | - Do you find the devcontainer useful for local development 102 | - What functionality is missing that you would like to see 103 | 104 | ## Security 105 | 106 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 107 | 108 | ## License 109 | 110 | This project is licensed under the Apache-2.0 License. 111 | -------------------------------------------------------------------------------- /src/clients/emrClient.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { Globals } from "../extension"; 6 | import { 7 | DescribeClusterCommand, 8 | EMR, 9 | ListClustersCommand, 10 | ListStepsCommand, 11 | AddJobFlowStepsCommand, 12 | AddJobFlowStepsCommandInput, 13 | } from "@aws-sdk/client-emr"; 14 | import { EMREC2Filter } from "../emr_explorer"; 15 | 16 | export interface Cluster { 17 | readonly id?: string; 18 | readonly name?: string; 19 | readonly apps?: ClusterApp[]; 20 | readonly instanceCollectionType?: string; 21 | } 22 | 23 | export interface ClusterApp { 24 | readonly name: string; 25 | readonly version: string; 26 | } 27 | 28 | export interface ClusterStep { 29 | readonly id?: string; 30 | readonly name?: string; 31 | readonly state?: string; 32 | readonly stateDetails?: string; 33 | } 34 | 35 | export class DefaultEMRClient { 36 | public constructor(private readonly globals: Globals) {} 37 | 38 | private async createEMR(): Promise { 39 | return new EMR(this.globals.awsContext.getClientConfig()); 40 | } 41 | 42 | public async listClusters(stateFilter?: EMREC2Filter): Promise { 43 | const emr = await this.createEMR(); 44 | const showStates = stateFilter 45 | ? stateFilter.getStates() 46 | : EMREC2Filter.defaultStates; 47 | const params = { 48 | // eslint-disable-next-line @typescript-eslint/naming-convention 49 | ClusterStates: showStates, 50 | }; 51 | let clusters: Cluster[]; 52 | 53 | this.globals.outputChannel.appendLine( 54 | `EMR: Fetching clusters from ${this.globals.awsContext.getRegion()} region.` 55 | ); 56 | 57 | try { 58 | const result = await emr.send(new ListClustersCommand(params)); 59 | // clusters = result.Clusters 60 | // ? result.Clusters.map(c => { 61 | // return {id: c.Id, name: c.Name}; 62 | // }) 63 | // : []; 64 | clusters = result.Clusters 65 | ? result.Clusters.map(({ Id: id, Name: name }) => ({ id, name })) 66 | : []; 67 | } catch (error) { 68 | vscode.window.showErrorMessage("Error fetching EMR clusters!" + error); 69 | return []; 70 | } 71 | return clusters; 72 | } 73 | 74 | public async describeCluster( 75 | clusterId: string 76 | ): Promise { 77 | const emr = await this.createEMR(); 78 | let cluster: Cluster; 79 | 80 | try { 81 | const result = await emr.send( 82 | // eslint-disable-next-line @typescript-eslint/naming-convention 83 | new DescribeClusterCommand({ ClusterId: clusterId }) 84 | ); 85 | cluster = { 86 | id: result.Cluster?.Id, 87 | name: result.Cluster?.Name, 88 | apps: result.Cluster?.Applications?.map( 89 | ({ Name: name, Version: version }) => ({ name, version }) 90 | ), 91 | instanceCollectionType: result.Cluster?.InstanceCollectionType, 92 | }; 93 | } catch (error) { 94 | vscode.window.showErrorMessage( 95 | "Error fetching EMR cluster details!" + error 96 | ); 97 | return undefined; 98 | } 99 | 100 | return cluster; 101 | } 102 | 103 | public async listSteps(clusterId: string): Promise { 104 | const emr = await this.createEMR(); 105 | let steps: ClusterStep[]; 106 | 107 | this.globals.outputChannel.appendLine( 108 | `EMR: Fetching cluster steps from ${clusterId} cluster.` 109 | ); 110 | 111 | try { 112 | const result = await emr.send( 113 | // eslint-disable-next-line @typescript-eslint/naming-convention 114 | new ListStepsCommand({ ClusterId: clusterId }) 115 | ); 116 | steps = result.Steps 117 | ? result.Steps.map((s) => { 118 | return { 119 | id: s.Id, 120 | name: s.Name, 121 | state: s.Status?.State, 122 | stateDetails: s.Status?.StateChangeReason?.Message, 123 | }; 124 | }) 125 | : []; 126 | } catch (error) { 127 | vscode.window.showErrorMessage( 128 | "Error fetching EMR cluster steps!" + error 129 | ); 130 | return []; 131 | } 132 | return steps; 133 | } 134 | 135 | public async startJobRun( 136 | clusterId: string, 137 | entryPoint: string 138 | ): Promise { 139 | this.globals.outputChannel.appendLine( 140 | `EMR on EC2: Starting job run (${clusterId}).` 141 | ); 142 | 143 | const emr = await this.createEMR(); 144 | let step: ClusterStep = {}; 145 | let scriptName = entryPoint.split("/").reverse()[0]; 146 | 147 | let jobRunParams: AddJobFlowStepsCommandInput = { 148 | JobFlowId: clusterId, 149 | Steps: [ 150 | { 151 | Name: `vs-code: ${scriptName}`, 152 | HadoopJarStep: { 153 | Jar: "command-runner.jar", 154 | Args: ["spark-submit", "--deploy-mode", "cluster", entryPoint], 155 | }, 156 | }, 157 | ], 158 | }; 159 | 160 | try { 161 | const result = await emr.send(new AddJobFlowStepsCommand(jobRunParams)); 162 | let step: ClusterStep = { id: result.StepIds![0] }; 163 | return step; 164 | } catch (error) { 165 | vscode.window.showErrorMessage( 166 | "There was an error running the EMR on EC2 job:" + error 167 | ); 168 | } 169 | 170 | return step; 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "emr-tools", 3 | "displayName": "Amazon EMR Toolkit", 4 | "publisher": "AmazonEMR", 5 | "description": "The extension for developers building Spark applications to run in EMR clusters.", 6 | "version": "0.7.0", 7 | "preview": false, 8 | "icon": "resources/emr-icon.png", 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/awslabs/amazon-emr-vscode-toolkit" 12 | }, 13 | "engines": { 14 | "vscode": "^1.66.0" 15 | }, 16 | "categories": [ 17 | "Other" 18 | ], 19 | "activationEvents": [ 20 | "onCommand:emr-tools-v2.localEnvironmentMagic", 21 | "onCommand:emr-tools-v2.deployEMRServerless", 22 | "onCommand:emr-tools-v2.deployEMREC2", 23 | "onCommand:emr-tools-v2.selectRegion", 24 | "onCommand:emr-tools-v2.selectProfile", 25 | "onView:emrExplorer", 26 | "onView:emrContainersExplorer", 27 | "onView:emrServerlessExplorer", 28 | "onView:glueCatalogExplorer" 29 | ], 30 | "main": "./out/main.js", 31 | "contributes": { 32 | "commands": [ 33 | { 34 | "command": "emr-tools-v2.selectRegion", 35 | "title": "EMR: Select AWS Region" 36 | }, 37 | { 38 | "command": "emr-tools-v2.selectProfile", 39 | "title": "EMR: Select AWS Profile" 40 | }, 41 | { 42 | "command": "emr-tools-v2.localEnvironmentMagic", 43 | "title": "EMR: Create local Spark environment" 44 | }, 45 | { 46 | "command": "emr-tools-v2.deployEMRServerless", 47 | "title": "EMR Serverless: Run job" 48 | }, 49 | { 50 | "command": "emr-tools-v2.deployEMREC2", 51 | "title": "EMR on EC2: Run job" 52 | }, 53 | { 54 | "command": "emr-tools-v2.refreshEntry", 55 | "title": "EMR: Refresh", 56 | "icon": { 57 | "light": "resources/light/refresh.svg", 58 | "dark": "resources/dark/refresh.svg" 59 | } 60 | }, 61 | { 62 | "command": "emr-tools-v2.refreshContainerEntry", 63 | "title": "EMR Containers: Refresh", 64 | "icon": { 65 | "light": "resources/light/refresh.svg", 66 | "dark": "resources/dark/refresh.svg" 67 | } 68 | }, 69 | { 70 | "command": "emr-tools-v2.refreshServerlessEntry", 71 | "title": "EMR Serverless: Refresh", 72 | "icon": { 73 | "light": "resources/light/refresh.svg", 74 | "dark": "resources/dark/refresh.svg" 75 | } 76 | }, 77 | { 78 | "command": "emr-tools-v2.refreshGlueCatalog", 79 | "title": "Glue Catalog: Refresh", 80 | "icon": { 81 | "light": "resources/light/refresh.svg", 82 | "dark": "resources/dark/refresh.svg" 83 | } 84 | }, 85 | { 86 | "command": "emr-tools-v2.filterClusters", 87 | "title": "Include Cluster States" 88 | }, 89 | { 90 | "command": "emr-tools-v2.copyId", 91 | "title": "Copy ID" 92 | }, 93 | { 94 | "command": "emr-tools-v2.viewGlueTable", 95 | "title": "View Glue Table" 96 | } 97 | ], 98 | "views": { 99 | "emr-explorer": [ 100 | { 101 | "id": "emrExplorer", 102 | "name": "EMR Explorer" 103 | }, 104 | { 105 | "id": "emrContainersExplorer", 106 | "name": "EMR Containers" 107 | }, 108 | { 109 | "id": "emrServerlessExplorer", 110 | "name": "EMR Serverless" 111 | }, 112 | { 113 | "id": "glueCatalogExplorer", 114 | "name": "Glue Catalog Explorer" 115 | } 116 | ] 117 | }, 118 | "menus": { 119 | "view/title": [ 120 | { 121 | "command": "emr-tools-v2.refreshEntry", 122 | "when": "view == emrExplorer", 123 | "group": "navigation" 124 | }, 125 | { 126 | "command": "emr-tools-v2.refreshContainerEntry", 127 | "when": "view == emrContainersExplorer", 128 | "group": "navigation" 129 | }, 130 | { 131 | "command": "emr-tools-v2.refreshServerlessEntry", 132 | "when": "view == emrServerlessExplorer", 133 | "group": "navigation" 134 | }, 135 | { 136 | "command": "emr-tools-v2.refreshGlueCatalog", 137 | "when": "view == glueCatalogExplorer", 138 | "group": "navigation" 139 | }, 140 | { 141 | "command": "emr-tools-v2.filterClusters", 142 | "when": "view == emrExplorer" 143 | } 144 | ], 145 | "view/item/context": [ 146 | { 147 | "command": "emr-tools-v2.copyId", 148 | "when": "viewItem =~ /^(EMRCluster|EMRClusterStep|EMRVirtualCluster|EMRVirtualClusterJob|EMRServerlessApplication|EMRServerlessJob)/", 149 | "group": "0@1" 150 | }, 151 | { 152 | "command": "emr-tools-v2.viewGlueTable", 153 | "when": "viewItem =~ /^GlueCatalogTable/", 154 | "group": "0@2" 155 | } 156 | ] 157 | }, 158 | "viewsContainers": { 159 | "activitybar": [ 160 | { 161 | "id": "emr-explorer", 162 | "title": "Amazon EMR", 163 | "icon": "media/emr.svg" 164 | } 165 | ] 166 | } 167 | }, 168 | "scripts": { 169 | "vscode:prepublish": "npm run esbuild-base -- --minify", 170 | "esbuild-base": "esbuild ./src/extension.ts --bundle --outfile=out/main.js --external:vscode --format=cjs --platform=node", 171 | "esbuild": "npm run esbuild-base -- --sourcemap", 172 | "esbuild-watch": "npm run esbuild-base -- --sourcemap --watch", 173 | "pretest": "npm run compile && npm run lint", 174 | "lint": "eslint src --ext ts", 175 | "test": "node ./out/test/runTest.js" 176 | }, 177 | "devDependencies": { 178 | "@types/glob": "^7.2.0", 179 | "@types/mocha": "^9.1.0", 180 | "@types/node": "14.x", 181 | "@types/vscode": "^1.66.0", 182 | "@typescript-eslint/eslint-plugin": "^5.16.0", 183 | "@typescript-eslint/parser": "^5.16.0", 184 | "@vscode/test-electron": "^2.1.3", 185 | "esbuild": "^0.14.50", 186 | "eslint": "^8.11.0", 187 | "glob": "^7.2.3", 188 | "mocha": "^9.2.2", 189 | "typescript": "^4.5.5" 190 | }, 191 | "dependencies": { 192 | "@aws-sdk/client-ec2": "^3.130.0", 193 | "@aws-sdk/client-emr": "3.30.0", 194 | "@aws-sdk/client-emr-containers": "3.30.0", 195 | "@aws-sdk/client-emr-serverless": "3.105.0", 196 | "@aws-sdk/client-glue": "^3.181.0", 197 | "@aws-sdk/client-s3": "^3.168.0", 198 | "@aws-sdk/client-ssm": "^3.95.0", 199 | "@aws-sdk/credential-providers": "^3.256.0", 200 | "@aws-sdk/shared-ini-file-loader": "3.127.0", 201 | "@aws-sdk/types": "^3.254.0", 202 | "@vscode/codicons": "^0.0.32" 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/helpers.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // ------------------------------------------------------- 5 | // Helper code that wraps the API for the multi-step case. 6 | // ------------------------------------------------------- 7 | 8 | import { QuickPickItem, QuickInputButton, QuickInput, Disposable, window, QuickInputButtons } from "vscode"; 9 | 10 | class InputFlowAction { 11 | static back = new InputFlowAction(); 12 | static cancel = new InputFlowAction(); 13 | static resume = new InputFlowAction(); 14 | } 15 | 16 | type InputStep = (input: MultiStepInput) => Thenable; 17 | 18 | interface QuickPickParameters { 19 | title: string; 20 | step: number; 21 | totalSteps: number; 22 | items: T[]; 23 | activeItem?: T; 24 | placeholder: string; 25 | buttons?: QuickInputButton[]; 26 | shouldResume: () => Thenable; 27 | } 28 | 29 | interface InputBoxParameters { 30 | title: string; 31 | step: number; 32 | totalSteps: number; 33 | value: string; 34 | prompt: string; 35 | validate: (value: string) => Promise; 36 | buttons?: QuickInputButton[]; 37 | shouldResume: () => Thenable; 38 | ignoreFocusOut?: boolean; 39 | } 40 | 41 | export class MultiStepInput { 42 | static async run(start: InputStep) { 43 | const input = new MultiStepInput(); 44 | return input.stepThrough(start); 45 | } 46 | 47 | private current?: QuickInput; 48 | private steps: InputStep[] = []; 49 | 50 | private async stepThrough(start: InputStep) { 51 | let step: InputStep | void = start; 52 | while (step) { 53 | this.steps.push(step); 54 | if (this.current) { 55 | this.current.enabled = false; 56 | this.current.busy = true; 57 | } 58 | try { 59 | step = await step(this); 60 | } catch (err) { 61 | if (err === InputFlowAction.back) { 62 | this.steps.pop(); 63 | step = this.steps.pop(); 64 | } else if (err === InputFlowAction.resume) { 65 | step = this.steps.pop(); 66 | } else if (err === InputFlowAction.cancel) { 67 | step = undefined; 68 | } else { 69 | throw err; 70 | } 71 | } 72 | } 73 | if (this.current) { 74 | this.current.dispose(); 75 | } 76 | } 77 | 78 | async showQuickPick< 79 | T extends QuickPickItem, 80 | P extends QuickPickParameters 81 | >({ 82 | title, 83 | step, 84 | totalSteps, 85 | items, 86 | activeItem, 87 | placeholder, 88 | buttons, 89 | shouldResume, 90 | }: P) { 91 | const disposables: Disposable[] = []; 92 | try { 93 | return await new Promise< 94 | T | (P extends { buttons: (infer I)[] } ? I : never) 95 | >((resolve, reject) => { 96 | const input = window.createQuickPick(); 97 | input.title = title; 98 | input.step = step; 99 | input.totalSteps = totalSteps; 100 | input.placeholder = placeholder; 101 | input.items = items; 102 | if (activeItem) { 103 | input.activeItems = [activeItem]; 104 | } 105 | input.buttons = [ 106 | ...(this.steps.length > 1 ? [QuickInputButtons.Back] : []), 107 | ...(buttons || []), 108 | ]; 109 | disposables.push( 110 | input.onDidTriggerButton((item) => { 111 | if (item === QuickInputButtons.Back) { 112 | reject(InputFlowAction.back); 113 | } else { 114 | resolve(item); 115 | } 116 | }), 117 | input.onDidChangeSelection((items) => resolve(items[0])), 118 | input.onDidHide(() => { 119 | (async () => { 120 | reject( 121 | shouldResume && (await shouldResume()) 122 | ? InputFlowAction.resume 123 | : InputFlowAction.cancel 124 | ); 125 | })().catch(reject); 126 | }) 127 | ); 128 | if (this.current) { 129 | this.current.dispose(); 130 | } 131 | this.current = input; 132 | this.current.show(); 133 | }); 134 | } finally { 135 | disposables.forEach((d) => d.dispose()); 136 | } 137 | } 138 | 139 | async showInputBox

({ 140 | title, 141 | step, 142 | totalSteps, 143 | value, 144 | prompt, 145 | validate, 146 | buttons, 147 | shouldResume, 148 | ignoreFocusOut, 149 | }: P) { 150 | const disposables: Disposable[] = []; 151 | try { 152 | return await new Promise< 153 | string | (P extends { buttons: (infer I)[] } ? I : never) 154 | >((resolve, reject) => { 155 | const input = window.createInputBox(); 156 | input.title = title; 157 | input.step = step; 158 | input.totalSteps = totalSteps; 159 | input.value = value || ""; 160 | input.prompt = prompt; 161 | input.buttons = [ 162 | ...(this.steps.length > 1 ? [QuickInputButtons.Back] : []), 163 | ...(buttons || []), 164 | ]; 165 | input.ignoreFocusOut = ignoreFocusOut ? ignoreFocusOut : false; 166 | let validating = validate(""); 167 | disposables.push( 168 | input.onDidTriggerButton((item) => { 169 | if (item === QuickInputButtons.Back) { 170 | reject(InputFlowAction.back); 171 | } else { 172 | resolve(item); 173 | } 174 | }), 175 | input.onDidAccept(async () => { 176 | const value = input.value; 177 | input.enabled = false; 178 | input.busy = true; 179 | if (!(await validate(value))) { 180 | resolve(value); 181 | } 182 | input.enabled = true; 183 | input.busy = false; 184 | }), 185 | input.onDidChangeValue(async (text) => { 186 | const current = validate(text); 187 | validating = current; 188 | const validationMessage = await current; 189 | if (current === validating) { 190 | input.validationMessage = validationMessage; 191 | } 192 | }), 193 | input.onDidHide(() => { 194 | (async () => { 195 | reject( 196 | shouldResume && (await shouldResume()) 197 | ? InputFlowAction.resume 198 | : InputFlowAction.cancel 199 | ); 200 | })().catch(reject); 201 | }) 202 | ); 203 | if (this.current) { 204 | this.current.dispose(); 205 | } 206 | this.current = input; 207 | this.current.show(); 208 | }); 209 | } finally { 210 | disposables.forEach((d) => d.dispose()); 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/extension.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // The module 'vscode' contains the VS Code extensibility API 5 | // Import the module and reference it with the alias vscode in your code below 6 | import path = require("path"); 7 | import * as vscode from "vscode"; 8 | import { AwsContextCommands } from "./aws_context"; 9 | import { DefaultEMRClient } from "./clients/emrClient"; 10 | import { DefaultEMRContainersClient } from "./clients/emrContainersClient"; 11 | import { DefaultEMRServerlessClient } from "./clients/emrServerlessClient"; 12 | import { DefaultGlueClient } from "./clients/glueClient"; 13 | import { DefaultS3Client } from "./clients/s3Client"; 14 | import { EMREC2Deploy } from "./commands/deploy/emrEC2Deploy"; 15 | import { EMRServerlessDeploy } from "./commands/emrDeploy"; 16 | import { EMREC2Filter } from "./emr_explorer"; 17 | import { EMRLocalEnvironment } from "./emr_local"; 18 | import { copyIdCommand } from "./explorer/commands"; 19 | import { EMRContainersNode } from "./explorer/emrContainers"; 20 | import { EMRNode } from "./explorer/emrEC2"; 21 | import { EMRServerlessNode } from "./explorer/emrServerless"; 22 | import { GlueCatalogNode } from "./explorer/glueCatalog"; 23 | import { getWebviewContent } from "./panels/glueTablePanel"; 24 | 25 | 26 | // Workaround for https://github.com/aws/aws-sdk-js-v3/issues/3807 27 | declare global { 28 | interface ReadableStream {} 29 | } 30 | 31 | // We create a global namespace for common variables 32 | export interface Globals { 33 | context: vscode.ExtensionContext; 34 | outputChannel: vscode.OutputChannel; 35 | awsContext: AwsContextCommands; 36 | selectedRegion: string; 37 | selectedProfile: string; 38 | } 39 | const globals = {} as Globals; 40 | export { globals }; 41 | 42 | // this method is called when your extension is activated 43 | // your extension is activated the very first time the command is executed 44 | export function activate(context: vscode.ExtensionContext) { 45 | const logger = vscode.window.createOutputChannel("Amazon EMR"); 46 | globals.outputChannel = logger; 47 | 48 | // Allow users to set profile and region 49 | const awsContext = new AwsContextCommands(); 50 | globals.awsContext = awsContext; 51 | 52 | // Allow other modules to access vscode context 53 | globals.context = context; 54 | 55 | context.subscriptions.push( 56 | vscode.commands.registerCommand("emr-tools-v2.selectProfile", async () => { 57 | await awsContext.onCommandSetProfile(); 58 | }) 59 | ); 60 | 61 | context.subscriptions.push( 62 | vscode.commands.registerCommand("emr-tools-v2.selectRegion", async () => { 63 | await awsContext.onCommandSetRegion(); 64 | }) 65 | ); 66 | 67 | const treeFilter = new EMREC2Filter(); 68 | context.subscriptions.push( 69 | vscode.commands.registerCommand("emr-tools-v2.filterClusters", async () => { 70 | await treeFilter.run(); 71 | }) 72 | ); 73 | 74 | // EMR on EC2 support 75 | const emrEC2Client = new DefaultEMRClient(globals); 76 | const emrExplorer = new EMRNode(emrEC2Client, treeFilter); 77 | vscode.window.registerTreeDataProvider("emrExplorer", emrExplorer); 78 | vscode.commands.registerCommand("emr-tools-v2.refreshEntry", () => 79 | emrExplorer.refresh() 80 | ); 81 | 82 | // Tree data providers 83 | // const emrTools = new EMREC2Provider( 84 | // vscode.workspace.rootPath + "", 85 | // treeFilter, 86 | // logger 87 | // ); 88 | // vscode.window.registerTreeDataProvider("emrExplorer", emrTools); 89 | // vscode.commands.registerCommand("emr-tools-v2.refreshEntry", () => 90 | // emrTools.refresh() 91 | // ); 92 | // vscode.commands.registerCommand( 93 | // "emr-tools-v2.connectToCluster", 94 | // async (cluster: EMRCluster) => { 95 | // await connectToClusterCommand(cluster); 96 | // } 97 | // ); 98 | context.subscriptions.push( 99 | vscode.commands.registerCommand( 100 | "emr-tools-v2.copyId", 101 | async (node: vscode.TreeItem) => await copyIdCommand(node) 102 | ) 103 | ); 104 | 105 | context.subscriptions.push( 106 | vscode.commands.registerCommand( 107 | "emr-tools-v2.viewGlueTable", 108 | async (node: vscode.TreeItem) => { 109 | const panel = vscode.window.createWebviewPanel( 110 | "glue-table", node.id!.split(" ").reverse().join("."), 111 | vscode.ViewColumn.One, 112 | { 113 | enableScripts: true, 114 | enableFindWidget: true 115 | }); 116 | 117 | panel.webview.html = await getWebviewContent(node, new DefaultGlueClient(globals), context.extensionUri, panel.webview);} 118 | ) 119 | ); 120 | 121 | // EMR on EKS support 122 | // const emrContainerTools = new EMRContainersProvider(globals); 123 | const emrContainerExplorer = new EMRContainersNode( 124 | new DefaultEMRContainersClient(globals) 125 | ); 126 | vscode.window.registerTreeDataProvider( 127 | "emrContainersExplorer", 128 | emrContainerExplorer 129 | ); 130 | vscode.commands.registerCommand("emr-tools-v2.refreshContainerEntry", () => 131 | emrContainerExplorer.refresh() 132 | ); 133 | 134 | // Glue support 135 | const glueCatalogExplorer = new GlueCatalogNode( 136 | new DefaultGlueClient(globals) 137 | ); 138 | vscode.window.registerTreeDataProvider( 139 | "glueCatalogExplorer", 140 | glueCatalogExplorer 141 | ); 142 | 143 | vscode.commands.registerCommand("emr-tools-v2.refreshGlueCatalog", () => 144 | glueCatalogExplorer.refresh() 145 | ); 146 | 147 | // EMR Serverless support 148 | // const emrServerlessTools = new EMRServerlessProvider(); 149 | const emrServerlessClient = new DefaultEMRServerlessClient(globals); 150 | const emrServerlessTools = new EMRServerlessNode(emrServerlessClient); 151 | vscode.window.registerTreeDataProvider( 152 | "emrServerlessExplorer", 153 | emrServerlessTools 154 | ); 155 | vscode.commands.registerCommand("emr-tools-v2.refreshServerlessEntry", () => 156 | emrServerlessTools.refresh() 157 | ); 158 | 159 | // When the region changes, refresh all our explorers 160 | globals.awsContext.onDidConfigChange(() => { 161 | emrExplorer.refresh(); 162 | emrContainerExplorer.refresh(); 163 | emrServerlessTools.refresh(); 164 | glueCatalogExplorer.refresh(); 165 | }); 166 | 167 | const s3Client = new DefaultS3Client(globals); 168 | const emrServerlessDeployer = new EMRServerlessDeploy(context, emrServerlessClient, s3Client); 169 | context.subscriptions.push( 170 | vscode.commands.registerCommand( 171 | "emr-tools-v2.deployEMRServerless", async() => { 172 | await emrServerlessDeployer.run(); 173 | } 174 | ) 175 | ); 176 | 177 | const emrEC2Deployer = new EMREC2Deploy(context, emrEC2Client, s3Client); 178 | context.subscriptions.push( 179 | vscode.commands.registerCommand( 180 | "emr-tools-v2.deployEMREC2",async() => { 181 | await emrEC2Deployer.run(); 182 | } 183 | ) 184 | ); 185 | 186 | // Deployment support for all our available options 187 | // Removing until future release :) 188 | // context.subscriptions.push( 189 | // vscode.commands.registerCommand( 190 | // "emr-tools-v2.deploy", async () => { 191 | // await new EMRDeployer(emrTools, emrContainerTools, emrServerlessTools).run(); 192 | // } 193 | // ) 194 | // ); 195 | 196 | // Local environment support 197 | const emrLocalCreator = new EMRLocalEnvironment(context); 198 | context.subscriptions.push( 199 | vscode.commands.registerCommand( 200 | "emr-tools-v2.localEnvironmentMagic", 201 | async () => { 202 | await emrLocalCreator.run(); 203 | } 204 | ) 205 | ); 206 | 207 | // Use the console to output diagnostic information (console.log) and errors (console.error) 208 | // This line of code will only be executed once when your extension is activated 209 | console.log('Congratulations, your extension "emr-tools-v2" is now active!'); 210 | } 211 | // this method is called when your extension is deactivated 212 | export function deactivate() {} 213 | -------------------------------------------------------------------------------- /src/commands/deploy/emrEC2Deploy.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // We want folks to be able to developer EMR jobs locally. 5 | // We give them an option to create an EMR environment 6 | // They select: 7 | // - Type of job (pyspark, scala, SQL) 8 | // - EMR Release (only those supported by EMR on EKS) 9 | // - Region (used to build the local Image URI) 10 | 11 | import { QuickPickItem } from "vscode"; 12 | import { MultiStepInput } from "./../../helpers"; 13 | import * as fs from "fs"; 14 | import * as vscode from "vscode"; 15 | import { 16 | DefaultEMRClient, 17 | ClusterStep, 18 | } from "../../clients/emrClient"; 19 | import { DefaultS3Client } from "../../clients/s3Client"; 20 | import { pickFile } from "../../utils/quickPickItem"; 21 | import { basename } from "path"; 22 | 23 | // Step 1, add EMR Deploy option for EMR on EC2 24 | // Command: "EMR on EC2: Deploy and start step" 25 | // Process: 26 | // - Ask for (and save): 27 | // - S3 bucket/prefix for code location 28 | // - IAM Job Role ARN (todo) 29 | // - Copy main entry script to S3 30 | // - call StartJobRunCommand 31 | 32 | 33 | interface State { 34 | title: string; 35 | step: number; 36 | totalSteps: number; 37 | resourceGroup: QuickPickItem | string; 38 | 39 | s3TargetURI: string; 40 | clusterID: string; 41 | jobRoleARN: string; 42 | s3LogTargetURI: string; 43 | srcScriptURI: string; 44 | } 45 | 46 | const TOTAL_STEPS = 3; 47 | 48 | export class EMREC2Deploy { 49 | context: vscode.ExtensionContext; 50 | title: string; 51 | previousClusterID: string | undefined; 52 | previousS3TargetURI: string | undefined; 53 | previousS3LogTargetURI: string | undefined; 54 | previousJobRoleARN: string | undefined; 55 | 56 | 57 | 58 | constructor( 59 | context: vscode.ExtensionContext, 60 | private readonly emr: DefaultEMRClient, 61 | private readonly s3: DefaultS3Client 62 | ) { 63 | this.context = context; 64 | this.title = "Deploy to EMR on EC2"; 65 | 66 | this.previousClusterID = undefined; 67 | this.previousS3TargetURI = undefined; 68 | this.previousS3LogTargetURI = undefined; 69 | this.previousJobRoleARN = undefined; 70 | } 71 | 72 | 73 | 74 | async collectInputs() { 75 | const state = {} as Partial; 76 | await MultiStepInput.run((input) => this.insertS3TargetURI(input, state)); 77 | return state as State; 78 | } 79 | 80 | 81 | 82 | async insertS3TargetURI( 83 | input: MultiStepInput, 84 | state: Partial 85 | ) { 86 | let defaultTarget = "s3://bucket-name/prefix/"; 87 | if (this.previousS3TargetURI) { 88 | defaultTarget = this.previousS3TargetURI; 89 | } 90 | const pick = await input.showInputBox({ 91 | title: this.title, 92 | step: 1, 93 | totalSteps: TOTAL_STEPS, 94 | value: defaultTarget, 95 | prompt: "Provide an S3 URI where you want to upload your code.", 96 | validate: this.validateBucketURI, 97 | shouldResume: this.shouldResume, 98 | ignoreFocusOut: true, 99 | }); 100 | 101 | state.s3TargetURI = pick.valueOf(); 102 | this.previousS3TargetURI = state.s3TargetURI; 103 | return (input: MultiStepInput) => this.selectClusterID(input, state); 104 | } 105 | 106 | 107 | async insertS3LogTargetURI( 108 | input: MultiStepInput, 109 | state: Partial 110 | ) { 111 | let defaultTarget = "s3://bucket-name/logs/"; 112 | if (this.previousS3LogTargetURI !== undefined) { 113 | defaultTarget = this.previousS3LogTargetURI; 114 | } else if (state.s3TargetURI) { 115 | let codeBucket = this.extractBucketName(state.s3TargetURI!); 116 | defaultTarget = `s3://${codeBucket}/logs/`; 117 | } 118 | const pick = await input.showInputBox({ 119 | title: this.title, 120 | step: 2, 121 | totalSteps: TOTAL_STEPS, 122 | value: defaultTarget, 123 | prompt: "Provide an S3 URI for Spark logs (leave blank to disable).", 124 | validate: this.validateOptionalBucketURI.bind(this), 125 | shouldResume: this.shouldResume, 126 | ignoreFocusOut: true, 127 | }); 128 | 129 | state.s3LogTargetURI = pick.valueOf(); 130 | this.previousS3LogTargetURI = state.s3LogTargetURI; 131 | return (input: MultiStepInput) => this.selectClusterID(input, state); 132 | } 133 | 134 | async insertJobRoleARN( 135 | input: MultiStepInput, 136 | state: Partial 137 | ) { 138 | let defaultJobRole = this.previousJobRoleARN ? this.previousJobRoleARN : "arn:aws:iam::xxx:role/job-role"; 139 | const pick = await input.showInputBox({ 140 | title: this.title, 141 | step: 3, 142 | totalSteps: TOTAL_STEPS, 143 | value: defaultJobRole, 144 | prompt: 145 | "Provide an IAM Role that has access to the resources for your job.", 146 | validate: this.validateJobRole, 147 | shouldResume: this.shouldResume, 148 | ignoreFocusOut: true, 149 | }); 150 | 151 | state.jobRoleARN = pick.valueOf(); 152 | this.previousJobRoleARN = state.jobRoleARN; 153 | return (input: MultiStepInput) => this.selectClusterID(input, state); 154 | } 155 | 156 | async selectClusterID( 157 | input: MultiStepInput, 158 | state: Partial 159 | ) { 160 | let defaultClusterId = this.previousClusterID ? this.previousClusterID : "j-AABBCCDD00112"; 161 | // TODO: Populate the list of cluster IDs automatically 162 | const pick = await input.showInputBox({ 163 | title: this.title, 164 | step: 2, 165 | totalSteps: TOTAL_STEPS, 166 | value: defaultClusterId, 167 | prompt: "Provide the EMR Cluster ID.", 168 | validate: this.validateClusterID, 169 | shouldResume: this.shouldResume, 170 | ignoreFocusOut: true, 171 | }); 172 | 173 | state.clusterID = pick.valueOf(); 174 | this.previousClusterID = state.clusterID; 175 | return (input: MultiStepInput) => this.selectSourceFile(input, state); 176 | } 177 | 178 | async selectSourceFile( 179 | input: MultiStepInput, 180 | state: Partial 181 | ) { 182 | const uri = await pickFile("Type the filename with your source code."); 183 | if (uri) { 184 | state.srcScriptURI = uri.fsPath; 185 | } 186 | } 187 | 188 | async validateOptionalBucketURI(uri: string): Promise { 189 | if (uri === "" || uri === undefined) { 190 | return undefined; 191 | } 192 | 193 | return this.validateBucketURI(uri); 194 | } 195 | 196 | async validateBucketURI(uri: string): Promise { 197 | if (!uri.startsWith("s3://")) { 198 | return "S3 location must start with s3://"; 199 | } 200 | return undefined; 201 | } 202 | 203 | extractBucketName(uri: string): string { 204 | return uri.split("/")[2]; 205 | } 206 | 207 | async validateJobRole(uri: string): Promise { 208 | if (!uri.startsWith("arn:aws:iam::")) { 209 | return "Job role must be a full ARN: arn:aws:iam:::role/"; 210 | } 211 | return undefined; 212 | } 213 | 214 | async validateClusterID( 215 | clusterId: string 216 | ): Promise { 217 | if (!clusterId.startsWith("j-")) { 218 | return "Cluster must begin with 'j-'"; 219 | } 220 | if (clusterId.length !== 15) { 221 | return "Provide the Cluster ID, like j-AABBCCDD00112"; 222 | } 223 | return undefined; 224 | } 225 | 226 | shouldResume() { 227 | // Could show a notification with the option to resume. 228 | return new Promise((resolve, reject) => { 229 | // noop 230 | }); 231 | } 232 | 233 | public async run() { 234 | const state = await this.collectInputs(); 235 | 236 | const detail = `Entry point: ${state.s3TargetURI}${basename( 237 | state.srcScriptURI 238 | )}\Cluster ID: ${state.clusterID}`; 239 | 240 | const confirmDeployment = await vscode.window 241 | .showInformationMessage( 242 | "Confirm EMR on EC2 deployment", 243 | { modal: true, detail }, 244 | "Yes" 245 | ) 246 | .then((answer) => { 247 | return answer === "Yes"; 248 | }); 249 | 250 | if (confirmDeployment) { 251 | await this.deploy( 252 | state.clusterID, 253 | state.jobRoleARN, 254 | state.srcScriptURI, 255 | state.s3TargetURI, 256 | state.s3LogTargetURI, 257 | ); 258 | } 259 | } 260 | 261 | private async deploy( 262 | clusterID: string, 263 | executionRoleARN: string, 264 | sourceFile: string, 265 | s3TargetURI: string, 266 | s3LogTargetURI: string, 267 | ) { 268 | const data = fs.readFileSync(sourceFile); 269 | const bucketName = s3TargetURI.split("/")[2]; 270 | const key = s3TargetURI.split("/").slice(3).join("/"); 271 | const fullS3Key = `${key.replace(/\/$/, '')}/${basename(sourceFile)}`; 272 | const fullS3Path = `s3://${bucketName}/${fullS3Key}`; 273 | 274 | await this.s3.uploadFile(bucketName, fullS3Key, data); 275 | 276 | this.emr.startJobRun(clusterID, fullS3Path); 277 | 278 | vscode.window.showInformationMessage("Your job has been submitted, refresh the EMR view to keep an eye on it."); 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /src/commands/emrDeploy.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // We want folks to be able to developer EMR jobs locally. 5 | // We give them an option to create an EMR environment 6 | // They select: 7 | // - Type of job (pyspark, scala, SQL) 8 | // - EMR Release (only those supported by EMR on EKS) 9 | // - Region (used to build the local Image URI) 10 | 11 | import { QuickPickItem, window } from "vscode"; 12 | import { MultiStepInput } from "./../helpers"; 13 | import * as fs from "fs"; 14 | import * as vscode from "vscode"; 15 | import { 16 | DefaultEMRServerlessClient, 17 | JobRun, 18 | } from "../clients/emrServerlessClient"; 19 | import { DefaultS3Client } from "../clients/s3Client"; 20 | import { pickFile } from "../utils/quickPickItem"; 21 | import { basename } from "path"; 22 | 23 | // Step 1, add EMR Deploy option for EMR Serverless 24 | // Command: "EMR Serverless: Deploy and start job" 25 | // Process: 26 | // - Ask for (and save): 27 | // - S3 bucket/prefix for code location 28 | // - IAM Job Role ARN 29 | // - S3 log bucket (optional) 30 | // - Copy main entry script to S3 31 | // - Copy any additional .py files to s3 32 | // - call StartJobRunCommand 33 | 34 | // - open explorer view ;) 35 | 36 | 37 | interface State { 38 | title: string; 39 | step: number; 40 | totalSteps: number; 41 | resourceGroup: QuickPickItem | string; 42 | 43 | s3TargetURI: string; 44 | applicationID: string; 45 | jobRoleARN: string; 46 | s3LogTargetURI: string; 47 | srcScriptURI: string; 48 | } 49 | 50 | const TOTAL_STEPS = 5; 51 | 52 | export class EMRServerlessDeploy { 53 | context: vscode.ExtensionContext; 54 | title: string; 55 | previousAppID: string | undefined; 56 | previousS3TargetURI: string | undefined; 57 | previousS3LogTargetURI: string | undefined; 58 | previousJobRoleARN: string | undefined; 59 | 60 | 61 | 62 | constructor( 63 | context: vscode.ExtensionContext, 64 | private readonly emr: DefaultEMRServerlessClient, 65 | private readonly s3: DefaultS3Client 66 | ) { 67 | this.context = context; 68 | this.title = "Deploy to EMR Serverless"; 69 | 70 | this.previousAppID = undefined; 71 | this.previousS3TargetURI = undefined; 72 | this.previousS3LogTargetURI = undefined; 73 | this.previousJobRoleARN = undefined; 74 | } 75 | 76 | 77 | 78 | async collectInputs() { 79 | const state = {} as Partial; 80 | await MultiStepInput.run((input) => this.insertS3TargetURI(input, state)); 81 | return state as State; 82 | } 83 | 84 | 85 | 86 | async insertS3TargetURI( 87 | input: MultiStepInput, 88 | state: Partial 89 | ) { 90 | let defaultTarget = "s3://bucket-name/prefix/"; 91 | if (this.previousS3TargetURI) { 92 | defaultTarget = this.previousS3TargetURI; 93 | } 94 | const pick = await input.showInputBox({ 95 | title: this.title, 96 | step: 1, 97 | totalSteps: TOTAL_STEPS, 98 | value: defaultTarget, 99 | prompt: "Provide an S3 URI where you want to upload your code.", 100 | validate: this.validateBucketURI, 101 | shouldResume: this.shouldResume, 102 | ignoreFocusOut: true, 103 | }); 104 | 105 | state.s3TargetURI = pick.valueOf(); 106 | this.previousS3TargetURI = state.s3TargetURI; 107 | return (input: MultiStepInput) => this.insertS3LogTargetURI(input, state); 108 | } 109 | 110 | 111 | async insertS3LogTargetURI( 112 | input: MultiStepInput, 113 | state: Partial 114 | ) { 115 | let defaultTarget = "s3://bucket-name/logs/"; 116 | if (this.previousS3LogTargetURI !== undefined) { 117 | defaultTarget = this.previousS3LogTargetURI; 118 | } else if (state.s3TargetURI) { 119 | let codeBucket = this.extractBucketName(state.s3TargetURI!); 120 | defaultTarget = `s3://${codeBucket}/logs/`; 121 | } 122 | const pick = await input.showInputBox({ 123 | title: this.title, 124 | step: 2, 125 | totalSteps: TOTAL_STEPS, 126 | value: defaultTarget, 127 | prompt: "Provide an S3 URI for Spark logs (leave blank to disable).", 128 | validate: this.validateOptionalBucketURI.bind(this), 129 | shouldResume: this.shouldResume, 130 | ignoreFocusOut: true, 131 | }); 132 | 133 | state.s3LogTargetURI = pick.valueOf(); 134 | this.previousS3LogTargetURI = state.s3LogTargetURI; 135 | return (input: MultiStepInput) => this.insertJobRoleARN(input, state); 136 | } 137 | 138 | async insertJobRoleARN( 139 | input: MultiStepInput, 140 | state: Partial 141 | ) { 142 | let defaultJobRole = this.previousJobRoleARN ? this.previousJobRoleARN : "arn:aws:iam::xxx:role/job-role"; 143 | const pick = await input.showInputBox({ 144 | title: this.title, 145 | step: 3, 146 | totalSteps: TOTAL_STEPS, 147 | value: defaultJobRole, 148 | prompt: 149 | "Provide an IAM Role that has access to the resources for your job.", 150 | validate: this.validateJobRole, 151 | shouldResume: this.shouldResume, 152 | ignoreFocusOut: true, 153 | }); 154 | 155 | state.jobRoleARN = pick.valueOf(); 156 | this.previousJobRoleARN = state.jobRoleARN; 157 | return (input: MultiStepInput) => this.selectApplicationID(input, state); 158 | } 159 | 160 | async selectApplicationID( 161 | input: MultiStepInput, 162 | state: Partial 163 | ) { 164 | let defaultAppId = this.previousAppID ? this.previousAppID : "00f3aabbccdd1234"; 165 | // TODO: Populate the list of application IDs automatically 166 | const pick = await input.showInputBox({ 167 | title: this.title, 168 | step: 4, 169 | totalSteps: TOTAL_STEPS, 170 | value: defaultAppId, 171 | prompt: "Provide the EMR Serverless Application ID.", 172 | validate: this.validateApplicationID, 173 | shouldResume: this.shouldResume, 174 | ignoreFocusOut: true, 175 | }); 176 | 177 | state.applicationID = pick.valueOf(); 178 | this.previousAppID = state.applicationID; 179 | return (input: MultiStepInput) => this.selectSourceFile(input, state); 180 | } 181 | 182 | async selectSourceFile( 183 | input: MultiStepInput, 184 | state: Partial 185 | ) { 186 | const uri = await pickFile("Type the filename with your source code."); 187 | if (uri) { 188 | state.srcScriptURI = uri.fsPath; 189 | } 190 | } 191 | 192 | async validateOptionalBucketURI(uri: string): Promise { 193 | if (uri === "" || uri === undefined) { 194 | return undefined; 195 | } 196 | 197 | return this.validateBucketURI(uri); 198 | } 199 | 200 | async validateBucketURI(uri: string): Promise { 201 | if (!uri.startsWith("s3://")) { 202 | return "S3 location must start with s3://"; 203 | } 204 | return undefined; 205 | } 206 | 207 | extractBucketName(uri: string): string { 208 | return uri.split("/")[2]; 209 | } 210 | 211 | async validateJobRole(uri: string): Promise { 212 | if (!uri.startsWith("arn:aws:iam::")) { 213 | return "Job role must be a full ARN: arn:aws:iam:::role/"; 214 | } 215 | return undefined; 216 | } 217 | 218 | async validateApplicationID( 219 | appId: string 220 | ): Promise { 221 | if (appId.length !== 16) { 222 | return "Provide just the Application ID, like 00f3ranvrvchl625"; 223 | } 224 | return undefined; 225 | } 226 | 227 | shouldResume() { 228 | // Could show a notification with the option to resume. 229 | return new Promise((resolve, reject) => { 230 | // noop 231 | }); 232 | } 233 | 234 | public async run() { 235 | const state = await this.collectInputs(); 236 | 237 | const detail = `Entry point: ${state.s3TargetURI}${basename( 238 | state.srcScriptURI 239 | )}\nApplication ID: ${state.applicationID}\nJob Role: ${state.jobRoleARN}`; 240 | 241 | const confirmDeployment = await vscode.window 242 | .showInformationMessage( 243 | "Confirm EMR Serverless deployment", 244 | { modal: true, detail }, 245 | "Yes" 246 | ) 247 | .then((answer) => { 248 | return answer === "Yes"; 249 | }); 250 | 251 | if (confirmDeployment) { 252 | await this.deploy( 253 | state.applicationID, 254 | state.jobRoleARN, 255 | state.srcScriptURI, 256 | state.s3TargetURI, 257 | state.s3LogTargetURI, 258 | ); 259 | } 260 | // Do I do a "deploy" and "run" 261 | } 262 | 263 | private async deploy( 264 | applicationID: string, 265 | executionRoleARN: string, 266 | sourceFile: string, 267 | s3TargetURI: string, 268 | s3LogTargetURI: string, 269 | ) { 270 | const data = fs.readFileSync(sourceFile); 271 | const bucketName = s3TargetURI.split("/")[2]; 272 | const key = s3TargetURI.split("/").slice(3).join("/"); 273 | const fullS3Key = `${key.replace(/\/$/, '')}/${basename(sourceFile)}`; 274 | const fullS3Path = `s3://${bucketName}/${fullS3Key}`; 275 | 276 | await this.s3.uploadFile(bucketName, fullS3Key, data); 277 | 278 | this.emr.startJobRun(applicationID, executionRoleARN,fullS3Path, s3LogTargetURI); 279 | 280 | vscode.window.showInformationMessage("Your job has been submitted, refresh the EMR Serverless view to keep an eye on it."); 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /src/emr_explorer.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as vscode from "vscode"; 5 | import { 6 | EMR, 7 | ListClustersCommand, 8 | ClusterState, 9 | DescribeClusterCommand, 10 | Application, 11 | ClusterSummary, 12 | InstanceCollectionType, 13 | ListInstanceGroupsCommand, 14 | ListInstanceFleetsCommand, 15 | InstanceFleetStatus, 16 | Cluster, 17 | ListInstancesCommand, 18 | InstanceTypeSpecification, 19 | InstanceGroupType, 20 | InstanceStateChangeReason, 21 | InstanceState, 22 | InstanceFleetType, 23 | } from "@aws-sdk/client-emr"; 24 | import { window } from "vscode"; 25 | 26 | 27 | export class EMREC2Provider 28 | implements vscode.TreeDataProvider 29 | { 30 | emrClient: EMR; 31 | private _onDidChangeTreeData: vscode.EventEmitter< 32 | EMRCluster | undefined | null | void 33 | > = new vscode.EventEmitter(); 34 | readonly onDidChangeTreeData: vscode.Event< 35 | EMRCluster | undefined | null | void 36 | > = this._onDidChangeTreeData.event; 37 | 38 | refresh(): void { 39 | this._onDidChangeTreeData.fire(); 40 | } 41 | 42 | constructor(private workspaceRoot: string, private stateFilter: EMREC2Filter, private logger: vscode.OutputChannel) { 43 | this.emrClient = new EMR({ region: "us-west-2" }); 44 | this.stateFilter = stateFilter; 45 | this.logger = logger; 46 | } 47 | 48 | getTreeItem(element: EMRCluster): vscode.TreeItem { 49 | return element; 50 | } 51 | 52 | getChildren(element?: EMRCluster): Thenable { 53 | if (element) { 54 | return Promise.resolve(element.getChildren()); 55 | } else { 56 | return Promise.resolve(this.listEMRClusters(this.emrClient)); 57 | } 58 | } 59 | 60 | private async listEMRClusters(client: EMR): Promise { 61 | // Currently only show running or waiting clusters 62 | const showStates = this.stateFilter.getStates(); 63 | const params = { 64 | // eslint-disable-next-line @typescript-eslint/naming-convention 65 | // ClusterStates: [ClusterState.RUNNING, ClusterState.WAITING, ClusterState.TERMINATED, ClusterState.TERMINATING], 66 | ClusterStates: showStates, 67 | }; 68 | this.logger.appendLine("Fetching clusters in state: " + [...showStates].join(", ")); 69 | vscode.window.showInformationMessage( 70 | "Fetching clusters in state: " + [...showStates].join(", ") 71 | ); 72 | try { 73 | const result = await client.send(new ListClustersCommand(params)); 74 | return (result.Clusters || []).map((cluster) => { 75 | return new EMRCluster( 76 | cluster, 77 | this.emrClient, 78 | vscode.TreeItemCollapsibleState.Collapsed 79 | ); 80 | }); 81 | } catch (e) { 82 | vscode.window.showErrorMessage("Bummer!" + e); 83 | console.log("There was an error fetching clusters", e); 84 | return []; 85 | } 86 | } 87 | } 88 | 89 | export class EMRCluster extends vscode.TreeItem { 90 | constructor( 91 | private readonly details: ClusterSummary, 92 | private readonly emr: EMR, 93 | public readonly collapsibleState: vscode.TreeItemCollapsibleState 94 | ) { 95 | super(details.Name || "No name", collapsibleState); 96 | this.tooltip = `${details.Name} (${details.Id})`; 97 | this.description = details.Id; 98 | this.contextValue = 'EMRCluster'; 99 | } 100 | 101 | public async getChildren(element?: EMRCluster): Promise { 102 | const response = await this.emr.send( 103 | // eslint-disable-next-line @typescript-eslint/naming-convention 104 | new DescribeClusterCommand({ ClusterId: this.details.Id }) 105 | ); 106 | // TODO (2022-04-13): ERROR CHECKING! 107 | return [ 108 | new EMRClusterApps( 109 | this.emr, 110 | response.Cluster ? response.Cluster.Applications : undefined 111 | ), 112 | new EMRClusterInstances( 113 | this.emr, response.Cluster!, 114 | ), 115 | ]; 116 | } 117 | } 118 | 119 | class EMRClusterApps extends vscode.TreeItem { 120 | constructor( 121 | private readonly emr: EMR, 122 | private readonly apps: Application[] | undefined 123 | ) { 124 | super("Apps", vscode.TreeItemCollapsibleState.Collapsed); 125 | } 126 | 127 | getTreeItem(element: EMRClusterApps): vscode.TreeItem { 128 | return element; 129 | } 130 | 131 | getChildren(): vscode.TreeItem[] { 132 | return (this.apps || []).map((item) => new EMRAppNode(item)); 133 | } 134 | } 135 | 136 | class EMRClusterInstances extends vscode.TreeItem { 137 | constructor( 138 | private readonly emr: EMR, 139 | private readonly cluster: Cluster, 140 | ) { 141 | super("Instances", vscode.TreeItemCollapsibleState.Collapsed); 142 | 143 | this.cluster = cluster; 144 | } 145 | 146 | getTreeItem(element: EMRClusterInstances): vscode.TreeItem { 147 | return element; 148 | } 149 | 150 | public async getChildren(element?: EMRClusterInstances|undefined): Promise { 151 | // TODO (2022-04-13): Pagination 152 | let instanceCollectionMapping: Map = new Map(); 153 | 154 | if (this.cluster.InstanceCollectionType === InstanceCollectionType.INSTANCE_GROUP) { 155 | const response = await this.emr.send( 156 | new ListInstanceGroupsCommand({ ClusterId: this.cluster.Id }) 157 | ); 158 | instanceCollectionMapping.set("master", response.InstanceGroups?.filter(item => item.InstanceGroupType === InstanceGroupType.MASTER).map(item => item.Id as string) || []); 159 | instanceCollectionMapping.set("core", response.InstanceGroups?.filter(item => item.InstanceGroupType === InstanceGroupType.CORE).map(item => item.Id as string) || []); 160 | instanceCollectionMapping.set("task", response.InstanceGroups?.filter(item => item.InstanceGroupType === InstanceGroupType.TASK).map(item => item.Id as string) || []); 161 | 162 | } else if ( 163 | this.cluster.InstanceCollectionType === InstanceCollectionType.INSTANCE_FLEET 164 | ) { 165 | const response = await this.emr.send( 166 | new ListInstanceFleetsCommand({ ClusterId: this.cluster.Id }) 167 | ); 168 | instanceCollectionMapping.set("master", response.InstanceFleets?.filter(item => item.InstanceFleetType === InstanceFleetType.MASTER).map(item => item.Id as string) || []); 169 | instanceCollectionMapping.set("core", response.InstanceFleets?.filter(item => item.InstanceFleetType === InstanceGroupType.CORE).map(item => item.Id as string) || []); 170 | instanceCollectionMapping.set("task", response.InstanceFleets?.filter(item => item.InstanceFleetType === InstanceGroupType.TASK).map(item => item.Id as string) || []); 171 | } 172 | 173 | const instances = await this.emr.send( 174 | new ListInstancesCommand({ClusterId: this.cluster.Id, InstanceStates: [InstanceState.RUNNING, InstanceState.BOOTSTRAPPING, InstanceState.PROVISIONING]}) 175 | ); 176 | 177 | const instanceTypeMapping = { 178 | master: instances.Instances?.filter(item => instanceCollectionMapping.get("master")?.includes(item.InstanceGroupId!) ), 179 | core: instances.Instances?.filter(item => instanceCollectionMapping.get("core")?.includes(item.InstanceGroupId!)), 180 | task: instances.Instances?.filter(item => instanceCollectionMapping.get("task")?.includes(item.InstanceGroupId!)), 181 | }; 182 | 183 | return [ 184 | new InstanceNodeTree( 185 | "Primary", 186 | instanceTypeMapping.master?.map(item => new InstanceNodeTree(item.Ec2InstanceId!, undefined, item.InstanceType)), 187 | ), 188 | new InstanceNodeTree( 189 | "Core", 190 | instanceTypeMapping.core?.map(item => new InstanceNodeTree(item.Ec2InstanceId!, undefined, item.InstanceType)), 191 | ), 192 | new InstanceNodeTree( 193 | "Task", 194 | instanceTypeMapping.task?.map(item => new InstanceNodeTree(item.Ec2InstanceId!, undefined, item.InstanceType)), 195 | ), 196 | ]; 197 | } 198 | } 199 | 200 | class EMRAppNode extends vscode.TreeItem { 201 | constructor(private readonly app: Application) { 202 | super(app.Name || "Unknown"); 203 | this.description = app.Version; 204 | } 205 | } 206 | 207 | class InstanceNodeTree extends vscode.TreeItem { 208 | children: InstanceNodeTree[]|undefined; 209 | 210 | constructor(label: string, children?: InstanceNodeTree[], description?: string) { 211 | super(label, children === undefined ? vscode.TreeItemCollapsibleState.None : vscode.TreeItemCollapsibleState.Collapsed); 212 | this.children = children; 213 | if (description) { this.description = description;} 214 | } 215 | 216 | getChildren(element?: InstanceNodeTree): InstanceNodeTree[] { 217 | return this.children || []; 218 | } 219 | 220 | } 221 | 222 | export class EMREC2Filter { 223 | static defaultStates = [ClusterState.RUNNING, ClusterState.WAITING]; 224 | private _showStates: Set; 225 | private _onDidChange = new vscode.EventEmitter(); 226 | 227 | constructor() { 228 | // Default set of states 229 | this._showStates = new Set(EMREC2Filter.defaultStates); 230 | } 231 | 232 | public get onDidChange(): vscode.Event { 233 | return this._onDidChange.event; 234 | } 235 | 236 | public async run() { 237 | // TODO (2022-06-13): Refactor this to All / Active / Terminated / Failed 238 | const allStates = [ 239 | { 240 | name: "Running", 241 | state: ClusterState.RUNNING, 242 | }, 243 | { 244 | name: "Waiting", 245 | state: ClusterState.WAITING, 246 | }, 247 | { 248 | name: "Terminated", 249 | state: ClusterState.TERMINATED, 250 | }, 251 | { 252 | name: "Terminating", 253 | state: ClusterState.TERMINATING, 254 | }, 255 | { 256 | name: "Failed", 257 | state: ClusterState.TERMINATED_WITH_ERRORS, 258 | } 259 | ]; 260 | 261 | const items = []; 262 | for (const s of allStates) { 263 | items.push({ 264 | label: s.name, 265 | picked: this._showStates ? this._showStates.has(s.state) : false, 266 | state: s.state, 267 | }); 268 | } 269 | 270 | const result = await window.showQuickPick(items, { 271 | placeHolder: "Show or hide cluster states", 272 | canPickMany: true, 273 | }); 274 | 275 | if (!result) { return false; } 276 | 277 | this._showStates = new Set(result.map(res => res.state!)); 278 | this._onDidChange.fire("yolo"); 279 | 280 | return true; 281 | } 282 | 283 | public getStates() { 284 | return [...this._showStates]; 285 | } 286 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /src/emr_local.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // We want folks to be able to developer EMR jobs locally. 5 | // We give them an option to create an EMR environment 6 | // They select: 7 | // - Type of job (pyspark, scala, SQL) 8 | // - EMR Release (only those supported by EMR on EKS) 9 | // - Region (used to build the local Image URI) 10 | 11 | import { QuickPickItem, window } from "vscode"; 12 | import { MultiStepInput } from "./helpers"; 13 | import * as fs from "fs"; 14 | import * as vscode from "vscode"; 15 | import path = require("path"); 16 | 17 | function welcomeText(region: string, accountId: string, authType: string) { 18 | const envUpdate = (authType === "ENV_FILE") ? "- Update .devcontainer/aws.env with your AWS credentials.\n": ""; 19 | return `# EMR Local Container 20 | 21 | ## Getting Started 22 | 23 | Thanks for installing your local EMR environment. To get started, there are a few steps. 24 | 25 | ${envUpdate}- Login to ECR with the following command: 26 | 27 | aws ecr get-login-password --region ${region} \\ 28 | | docker login \\ 29 | --username AWS \\ 30 | --password-stdin \\ 31 | ${accountId}.dkr.ecr.${region}.amazonaws.com 32 | 33 | - Use the \`Remote-Containers: Reopen in Container\` command to build your new environment. 34 | 35 | ## Usage tips 36 | 37 | - You can start a new shell with the \`pyspark\` command in a terminal. 38 | - If you've configured your AWS credentials in \`.env\`, you should have access to everything you need. 39 | - A sample PySpark script has been created for you in the \`emr_tools_demo.py\` file. 40 | 41 | `; 42 | } 43 | 44 | function getEmrMajorVersion(emrRelease: string): number | null { 45 | 46 | // Regular expression to match emr-5.x[.y] or emr-6.x[.y] or emr-7.x[.y] format 47 | const emrPattern = /^emr-(5|6|7)\.\d+(\.\d+)?$/; 48 | const match = emrRelease.match(emrPattern); 49 | 50 | if (match) { 51 | return parseInt(match[1]); // Returns 5, 6, or 7 52 | } 53 | return null; 54 | } 55 | 56 | 57 | export class EMRLocalEnvironment { 58 | context: vscode.ExtensionContext; 59 | 60 | constructor(context: vscode.ExtensionContext) { 61 | this.context = context; 62 | } 63 | 64 | public async run() { 65 | const title = "Create Local Environment"; 66 | 67 | interface State { 68 | title: string; 69 | step: number; 70 | totalSteps: number; 71 | resourceGroup: QuickPickItem | string; 72 | 73 | jobType: string; 74 | emrRelease: string; 75 | region: string; 76 | accountId: string; 77 | authType: string; 78 | } 79 | 80 | interface RegionMapping { 81 | label: string; 82 | accountId: string; 83 | } 84 | 85 | interface AuthOption { 86 | label: string; 87 | code: string; 88 | } 89 | 90 | interface EMRContainerEntry { 91 | label: string; 92 | releaseVersion: string; 93 | } 94 | 95 | const emrReleases = [ 96 | { label: "EMR 7.7.0", releaseVersion: "emr-7.7.0" }, 97 | { label: "EMR 7.6.0", releaseVersion: "emr-7.6.0" }, 98 | { label: "EMR 7.5.0", releaseVersion: "emr-7.5.0" }, 99 | { label: "EMR 7.4.0", releaseVersion: "emr-7.4.0" }, 100 | { label: "EMR 7.3.0", releaseVersion: "emr-7.3.0" }, 101 | { label: "EMR 7.2.0", releaseVersion: "emr-7.2.0" }, 102 | { label: "EMR 7.1.0", releaseVersion: "emr-7.1.0" }, 103 | { label: "EMR 7.0.0", releaseVersion: "emr-7.0.0" }, 104 | { label: "EMR 6.15.0", releaseVersion: "emr-6.15.0" }, 105 | { label: "EMR 6.14.0", releaseVersion: "emr-6.14.0" }, 106 | { label: "EMR 6.13.0", releaseVersion: "emr-6.13.0" }, 107 | { label: "EMR 6.12.0", releaseVersion: "emr-6.12.0" }, 108 | { label: "EMR 6.11.0", releaseVersion: "emr-6.11.0" }, 109 | { label: "EMR 6.10.0", releaseVersion: "emr-6.10.0" }, 110 | { label: "EMR 6.9.0", releaseVersion: "emr-6.9.0" }, 111 | { label: "EMR 6.8.0", releaseVersion: "emr-6.8.0" }, 112 | { label: "EMR 6.7.0", releaseVersion: "emr-6.7.0" }, 113 | { label: "EMR 6.6.0", releaseVersion: "emr-6.6.0" }, 114 | { label: "EMR 6.5.0", releaseVersion: "emr-6.5.0" }, 115 | { label: "EMR 6.4.0", releaseVersion: "emr-6.4.0" }, 116 | { label: "EMR 6.3.0", releaseVersion: "emr-6.3.0" }, 117 | { label: "EMR 6.2.0", releaseVersion: "emr-6.2.0" }, 118 | { label: "EMR 5.35.0", releaseVersion: "emr-5.35.0" }, 119 | { label: "EMR 5.34.0", releaseVersion: "emr-5.34.0" }, 120 | { label: "EMR 5.33.0", releaseVersion: "emr-5.33.0" }, 121 | { label: "EMR 5.32.0", releaseVersion: "emr-5.32.0" }, 122 | ]; 123 | 124 | async function collectInputs() { 125 | const state = {} as Partial; 126 | await MultiStepInput.run((input) => pickJobType(input, state)); 127 | return state as State; 128 | } 129 | 130 | async function pickJobType(input: MultiStepInput, state: Partial) { 131 | const pick = await input.showQuickPick({ 132 | title, 133 | step: 1, 134 | totalSteps: 4, 135 | placeholder: "Pick a sample job type", 136 | items: [{ label: "PySpark" }], 137 | activeItem: 138 | typeof state.resourceGroup !== "string" 139 | ? state.resourceGroup 140 | : undefined, 141 | shouldResume: shouldResume, 142 | }); 143 | 144 | state.jobType = pick.label; 145 | return (input: MultiStepInput) => pickEMRRelease(input, state); 146 | } 147 | 148 | async function pickEMRRelease( 149 | input: MultiStepInput, 150 | state: Partial 151 | ) { 152 | const pick = await input.showQuickPick({ 153 | title, 154 | step: 2, 155 | totalSteps: 4, 156 | placeholder: "Pick an EMR release version", 157 | items: emrReleases, 158 | shouldResume: shouldResume, 159 | }); 160 | 161 | state.emrRelease = (pick as EMRContainerEntry).releaseVersion; 162 | return (input: MultiStepInput) => pickImageRegion(input, state); 163 | } 164 | 165 | async function pickImageRegion( 166 | input: MultiStepInput, 167 | state: Partial 168 | ) { 169 | const regionMapping = [ 170 | { label: "ap-east-1", accountId: "736135916053" }, 171 | { label: "ap-northeast-1", accountId: "059004520145" }, 172 | { label: "ap-northeast-2", accountId: "996579266876" }, 173 | { label: "ap-northeast-3", accountId: "705689932349" }, 174 | { label: "ap-southeast-3", accountId: "946962994502" }, 175 | { label: "ap-south-1", accountId: "235914868574" }, 176 | { label: "ap-south-2", accountId: "691480105545" }, 177 | { label: "ap-southeast-1", accountId: "671219180197" }, 178 | { label: "ap-southeast-2", accountId: "038297999601" }, 179 | { label: "ca-central-1", accountId: "351826393999" }, 180 | { label: "eu-central-1", accountId: "107292555468" }, 181 | { label: "eu-central-2", accountId: "314408114945" }, 182 | { label: "eu-north-1", accountId: "830386416364" }, 183 | { label: "eu-west-1", accountId: "483788554619" }, 184 | { label: "eu-west-2", accountId: "118780647275" }, 185 | { label: "eu-west-3", accountId: "307523725174" }, 186 | { label: "eu-south-1", accountId: "238014973495" }, 187 | { label: "eu-south-2", accountId: "350796622945" }, 188 | { label: "il-central-1", accountId: "395734710648" }, 189 | { label: "me-south-1", accountId: "008085056818" }, 190 | { label: "me-central-1", accountId: "818935616732" }, 191 | { label: "sa-east-1", accountId: "052806832358" }, 192 | { label: "us-east-1", accountId: "755674844232" }, 193 | { label: "us-east-2", accountId: "711395599931" }, 194 | { label: "us-west-1", accountId: "608033475327" }, 195 | { label: "us-west-2", accountId: "895885662937" }, 196 | { label: "af-south-1", accountId: "358491847878" }, 197 | ]; 198 | const pick = await input.showQuickPick({ 199 | title, 200 | step: 3, 201 | totalSteps: 4, 202 | placeholder: "Pick a region to pull the container image from", 203 | items: regionMapping, 204 | shouldResume: shouldResume, 205 | }); 206 | 207 | state.region = pick.label; 208 | state.accountId = (pick as RegionMapping).accountId; 209 | 210 | return (input: MultiStepInput) => pickAuthenticationType(input, state); 211 | } 212 | 213 | async function pickAuthenticationType( 214 | input: MultiStepInput, 215 | state: Partial 216 | ) { 217 | const areEnvVarsSet = 218 | process.env.AWS_ACCESS_KEY_ID !== undefined && 219 | process.env.AWS_SECRET_ACCESS_KEY !== undefined; 220 | const authOptions = [ 221 | { 222 | label: "Use existing ~/.aws config", 223 | code: "AWS_CONFIG", 224 | description: "Mount your ~/.aws directory to the container.", 225 | }, 226 | { 227 | label: "Environment Variables", 228 | code: "ENV_VAR", 229 | description: `If you already have AWS_* environment variables defined.`, 230 | }, 231 | { 232 | label: ".env file", 233 | code: "ENV_FILE", 234 | description: "A sample file will be created for you.", 235 | }, 236 | { 237 | label: "None", 238 | code: "NONE", 239 | description: 240 | "Requires you to define credentials yourself in the container", 241 | }, 242 | ]; 243 | 244 | const pick = await input.showQuickPick({ 245 | title, 246 | step: 4, 247 | totalSteps: 4, 248 | placeholder: "Select an authentication mechanism for your container", 249 | items: authOptions, 250 | shouldResume: shouldResume, 251 | }); 252 | 253 | state.authType = (pick as AuthOption).code; 254 | } 255 | 256 | function shouldResume() { 257 | // Could show a notification with the option to resume. 258 | return new Promise((resolve, reject) => { 259 | // noop 260 | }); 261 | } 262 | 263 | const state = await collectInputs(); 264 | 265 | // We made it here, now we can create the local environment for the user 266 | await this.createDevContainer( 267 | state.emrRelease, 268 | state.region, 269 | state.accountId, 270 | state.authType 271 | ); 272 | } 273 | 274 | private async createDevContainer( 275 | release: string, 276 | region: string, 277 | account: string, 278 | authType: string 279 | ) { 280 | const stripJSONComments = (data: string) => { 281 | var re = new RegExp("//(.*)", "g"); 282 | return data.replace(re, ""); 283 | }; 284 | 285 | // selectWorkspace will be useful 286 | // https://github.com/cantonios/vscode-project-templates/blob/b8e7f65c82fd4fe210c1c188f96eeabdd2b3b317/src/projectTemplatesPlugin.ts#L45 287 | if (vscode.workspace.workspaceFolders === undefined) { 288 | vscode.window.showErrorMessage( 289 | "Amazon EMR: Working folder not found, open a folder and try again." 290 | ); 291 | return; 292 | } 293 | 294 | const wsPath = vscode.workspace.workspaceFolders[0].uri.fsPath; 295 | if (!fs.existsSync(wsPath + "/.devcontainer")) { 296 | fs.mkdirSync(wsPath + "/.devcontainer"); 297 | } 298 | const targetDcPath = vscode.Uri.file(wsPath + "/.devcontainer"); 299 | 300 | const demoFileName = "emr_tools_demo.py"; 301 | const samplePyspark = this.context.asAbsolutePath( 302 | path.join("templates", demoFileName) 303 | ); 304 | 305 | const dcPath = this.context.asAbsolutePath( 306 | path.join("templates", "devcontainer.json") 307 | ); 308 | const envPath = this.context.asAbsolutePath( 309 | path.join("templates", "aws.env") 310 | ); 311 | // TODO: Don't implement this yet - we wouldn't want to overwrite a .gitignore 312 | const gitIgnorePath = this.context.asAbsolutePath( 313 | path.join("templates", "_.gitignore") 314 | ); 315 | const devContainerConfig = JSON.parse( 316 | stripJSONComments(fs.readFileSync(dcPath).toString()) 317 | ); 318 | // Update the devcontainer with the requisite release and Image URI details 319 | devContainerConfig["build"]["args"]["RELEASE"] = release; 320 | devContainerConfig["build"]["args"]["REGION"] = region; 321 | devContainerConfig["build"]["args"]["EMR_ACCOUNT_ID"] = account; 322 | 323 | // This is useful to prevent EC2 Metadata errors as well as allows pyspark in Jupyter to work 324 | devContainerConfig["containerEnv"]["AWS_REGION"] = region; 325 | 326 | // Depending on auth type, set the corresponding section in the devcontainer 327 | if (authType === "AWS_CONFIG") { 328 | devContainerConfig["mounts"] = [ 329 | "source=${localEnv:HOME}${localEnv:USERPROFILE}/.aws,target=/home/hadoop/.aws,type=bind" 330 | ]; 331 | } else if (authType === "ENV_VAR") { 332 | devContainerConfig['containerEnv'] = { 333 | ...devContainerConfig['containerEnv'], 334 | ...{ 335 | /* eslint-disable @typescript-eslint/naming-convention */ 336 | "AWS_ACCESS_KEY_ID": "${localEnv:AWS_ACCESS_KEY_ID}", 337 | "AWS_SECRET_ACCESS_KEY": "${localEnv:AWS_SECRET_ACCESS_KEY}", 338 | "AWS_SESSION_TOKEN": "${localEnv:AWS_SESSION_TOKEN}", 339 | /* eslint-enable @typescript-eslint/naming-convention */ 340 | } 341 | }; 342 | } else if (authType === "ENV_FILE") { 343 | devContainerConfig['runArgs'] = [ 344 | "--env-file", "${localWorkspaceFolder}/.devcontainer/aws.env" 345 | ]; 346 | fs.copyFileSync(envPath, targetDcPath.fsPath + "/aws.env"); 347 | } 348 | 349 | // TODO (2022-07-22): Optionally, add mounts of ~/.aws exists 350 | // "source=${env:HOME}${env:USERPROFILE}/.aws,target=/home/hadoop/.aws,type=bind" 351 | // Also make adding environment credentials optional...they could get exposed in logs 352 | 353 | let dockerfilePath; 354 | 355 | if (getEmrMajorVersion(release) === 5 || getEmrMajorVersion(release) === 6) { 356 | dockerfilePath = this.context.asAbsolutePath( 357 | path.join("templates", "pyspark-emr-6_x.dockerfile") 358 | ); 359 | } else if (getEmrMajorVersion(release) === 7) { 360 | dockerfilePath = this.context.asAbsolutePath( 361 | path.join("templates", "pyspark-emr-7_x.dockerfile") 362 | ); 363 | } else { 364 | throw new Error(`EMR version ${release} not supported`); 365 | } 366 | 367 | 368 | let dockerfile; 369 | 370 | if (dockerfilePath) { 371 | dockerfile = fs.readFileSync(dockerfilePath).toString(); 372 | } 373 | 374 | 375 | fs.writeFileSync( 376 | targetDcPath.fsPath + "/devcontainer.json", 377 | JSON.stringify(devContainerConfig, null, " ") 378 | ); 379 | fs.writeFileSync(targetDcPath.fsPath + "/Dockerfile", dockerfile!); 380 | fs.copyFileSync(samplePyspark, wsPath + `/${demoFileName}`); 381 | 382 | const howtoPath = vscode.Uri.file(wsPath).fsPath + "/emr-local.md"; 383 | fs.writeFileSync(howtoPath, welcomeText(region, account, authType)); 384 | vscode.workspace 385 | .openTextDocument(howtoPath) 386 | .then((a: vscode.TextDocument) => { 387 | vscode.window.showTextDocument(a, 1, false); 388 | }); 389 | 390 | // var setting: vscode.Uri = vscode.Uri.parse("untitled:" + "emr-local.md"); 391 | // vscode.workspace 392 | // .openTextDocument(setting) 393 | // .then((a: vscode.TextDocument) => { 394 | // vscode.window.showTextDocument(a, 1, false).then((e) => { 395 | // e.edit((edit) => { 396 | // edit.insert( 397 | // new vscode.Position(0, 0), 398 | // welcomeText(region, account) 399 | // ); 400 | // }); 401 | // }); 402 | // }); 403 | } 404 | } 405 | --------------------------------------------------------------------------------