├── .github └── workflows │ └── python.yml ├── .gitignore ├── LICENSE.txt ├── Makefile ├── README.md ├── clickhouse_cli ├── __init__.py ├── __main__.py ├── cli.py ├── clickhouse-cli.rc.sample ├── clickhouse │ ├── __init__.py │ ├── client.py │ ├── definitions.py │ ├── exceptions.py │ └── sqlparse_patch.py ├── config.py ├── helpers.py └── ui │ ├── __init__.py │ ├── completer.py │ ├── lexer.py │ ├── parseutils │ ├── __init__.py │ ├── ctes.py │ ├── helpers.py │ ├── meta.py │ ├── tables.py │ └── utils.py │ ├── prompt.py │ └── style.py ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py └── test_cli.py └── tox.ini /.github/workflows/python.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: [ master ] 12 | 13 | jobs: 14 | test: 15 | name: Run tests on ${{ matrix.py }} 16 | runs-on: ubuntu-22.04 17 | strategy: 18 | matrix: 19 | py: 20 | - "3.11" 21 | - "3.10" 22 | - "3.9" 23 | - "3.8" 24 | - "3.7" 25 | - "pypy-3.9" 26 | - "pypy-3.8" 27 | - "pypy-3.7" 28 | 29 | steps: 30 | - uses: actions/checkout@v3 31 | - name: Set up Python ${{ matrix.py }} 32 | uses: actions/setup-python@v4 33 | with: 34 | python-version: ${{ matrix.py }} 35 | check-latest: true 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip flit 39 | flit install --deps=develop 40 | - name: Lint with flake8 41 | run: flake8 clickhouse_cli --count --max-complexity=31 --show-source --statistics 42 | - name: Test with pytest 43 | run: pytest --cov=clickhouse_cli 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.egg 3 | *.egg-info 4 | dist 5 | build 6 | .coverage 7 | .tox/* 8 | .python-version 9 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | clickhouse-cli 2 | 3 | The MIT License (MIT) 4 | Copyright (c) 2016-2017 Igor Hatarist 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | -------------------------------- 24 | 25 | pgcli (autocompleter + parseutils module with modifications) 26 | 27 | Copyright (c) 2015, Amjith Ramanujam 28 | All rights reserved. 29 | 30 | Redistribution and use in source and binary forms, with or without modification, 31 | are permitted provided that the following conditions are met: 32 | 33 | * Redistributions of source code must retain the above copyright notice, this 34 | list of conditions and the following disclaimer. 35 | 36 | * Redistributions in binary form must reproduce the above copyright notice, this 37 | list of conditions and the following disclaimer in the documentation and/or 38 | other materials provided with the distribution. 39 | 40 | * Neither the name of the {organization} nor the names of its 41 | contributors may be used to endorse or promote products derived from 42 | this software without specific prior written permission. 43 | 44 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 45 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 46 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 47 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 48 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 49 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 50 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 51 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON=`which python` 2 | 3 | dev: 4 | $(PYTHON) -m pip install --upgrade pip flit 5 | $(PYTHON) -m flit install --deps=develop 6 | 7 | build: clean 8 | $(PYTHON) -m flit build 9 | 10 | install: build 11 | $(PYTHON) -m flit install --deps=production 12 | 13 | clean: 14 | $(PYTHON) setup.py clean 15 | find . -name '*.pyc' -delete 16 | find . -name '*~' -delete 17 | rm -rf clickhouse_cli.egg-info build dist 18 | 19 | format: 20 | black clickhouse_cli 21 | 22 | lint: 23 | flake8 clickhouse_cli 24 | 25 | test: 26 | tox 27 | 28 | register: 29 | $(PYTHON) setup.py register -r pypi 30 | 31 | upload: 32 | $(PYTHON) setup.py sdist upload -r pypi 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clickhouse-cli 2 | 3 | An unofficial command-line client for the [ClickHouse](https://clickhouse.yandex/) DBMS. 4 | It implements some common and awesome things, such as: 5 | 6 | - Autocompletion (work in progress) 7 | - Syntax highlighting for the queries & data output (Pretty* formats) 8 | - Multiquery & multiline modes by default - paste anything as much as you want! 9 | - Pager support (`less`) for the data output 10 | - Custom, PostgreSQL-like commands like `\d+ table_name` or `\ps`. See `\?` 11 | - [User-defined functions](#user-defined-functions) 12 | 13 | But it works over the HTTP port, so there are some limitations for now: 14 | 15 | - Doesn't fully support sessions. `SET` options are stored locally and are sent with every request. 16 | 17 | ## Install 18 | 19 | Python 3.7+ is required. 20 | 21 | $ pip3 install clickhouse-cli 22 | 23 | 24 | ## Options 25 | 26 | $ clickhouse-cli --help 27 | Usage: clickhouse-cli [OPTIONS] [SQLFILE] 28 | 29 | A third-party client for the ClickHouse DBMS. 30 | 31 | Options: 32 | -h, --host TEXT Server host (hostname, or URL) 33 | -p, --port INTEGER Server HTTP port 34 | -u, --user TEXT User 35 | -P, --password Password 36 | -d, --database TEXT Database 37 | -s, --settings TEXT Query string to be appended to every query 38 | -c, --cookie TEXT Cookie header to be sent with every query 39 | -f, --format TEXT Data format for the interactive mode 40 | -F, --format-stdin TEXT Data format for stdin/file queries 41 | -m, --multiline Enable multiline shell 42 | -k, --insecure Allow insecure server connections when using SSL 43 | --stacktrace Print stacktraces received from the server. 44 | --version Show the version and exit. 45 | --help Show this message and exit. 46 | 47 | 48 | ## Configuration file 49 | 50 | `~/.clickhouse-cli.rc` is here for your service! 51 | 52 | [defaults] 53 | # Default connection options that will be used if the relevant argument was omitted. 54 | 55 | host = 127.0.0.1 56 | port = 8123 57 | db = default 58 | user = default 59 | password = 60 | cookie = 61 | 62 | # It's not secure to store the password here in plain text. 63 | 64 | 65 | [main] 66 | # Allow insecure server connections when using SSL 67 | insecure = False 68 | # Disable multiline mode by default 69 | multiline = False 70 | 71 | # Show SQL statements timing 72 | timing = True 73 | 74 | # Preferred data format for the interactive mode 75 | format = PrettyCompact 76 | 77 | # Preferred data format for the non-interactive mode (file/stdin) 78 | format_stdin = TabSeparated 79 | 80 | # Show the reformatted query after its execution 81 | show_formatted_query = True 82 | 83 | # Syntax highlighting 84 | highlight = True 85 | 86 | # Syntax highlight certain output in the interactive mode: 87 | highlight_output = True 88 | 89 | # Syntax highlighting in TrueColor (if supported, see https://gist.github.com/XVilka/8346728) 90 | highlight_truecolor = True 91 | 92 | # Pygments Highlight theme (check out https://help.farbox.com/pygments.html for available themes) 93 | highlight_theme = default 94 | 95 | # Show the output via pager (if defined) 96 | pager = False 97 | 98 | 99 | [settings] 100 | # You can place the server-side settings here! 101 | 102 | # max_memory_usage = 20000000000 103 | 104 | 105 | ## Quickstart 106 | 107 | $ clickhouse-cli 108 | clickhouse-cli version: 0.1.6 109 | Connecting to localhost:8123 110 | Connected to ClickHouse server. 111 | 112 | :) help 113 | 114 | clickhouse-cli's custom commands: 115 | --------------------------------- 116 | USE Change the current database. 117 | SET Set an option for the current CLI session. 118 | QUIT Exit clickhouse-cli. 119 | HELP Show this help message. 120 | 121 | PostgreSQL-like custom commands: 122 | -------------------------------- 123 | \l Show databases. 124 | \c Change the current database. 125 | \d, \dt Show tables in the current database. 126 | \d+ Show table's schema. 127 | \ps Show current queries. 128 | \kill Kill query by its ID. 129 | 130 | Query suffixes: 131 | --------------- 132 | \g, \G Use the Vertical format. 133 | \p Enable the pager. 134 | 135 | :) \l 136 | 137 | ┌─name───────┐ 138 | │ default │ 139 | │ quickstart │ 140 | │ system │ 141 | └────────────┘ 142 | 143 | Ok. 3 rows in set. Elapsed: 0.022 sec. 144 | 145 | :) USE quickstart 146 | 147 | Changed the current database to quickstart. 148 | 149 | Ok. 150 | 151 | :) \dt 152 | 153 | ┌─name───┐ 154 | │ ontime │ 155 | └────────┘ 156 | 157 | Ok. 1 row in set. Elapsed: 0.012 sec. 158 | 159 | :) SELECT OriginCityName, count(*) AS flights 160 | FROM ontime GROUP BY OriginCityName ORDER BY flights DESC LIMIT 5 161 | 162 | ┌─OriginCityName────────┬──flights─┐ 163 | │ Chicago, IL │ 10536203 │ 164 | │ Atlanta, GA │ 8867847 │ 165 | │ Dallas/Fort Worth, TX │ 7601863 │ 166 | │ Houston, TX │ 5714988 │ 167 | │ Los Angeles, CA │ 5575119 │ 168 | └───────────────────────┴──────────┘ 169 | 170 | Ok. 5 rows in set. Elapsed: 1.317 sec. 171 | 172 | 173 | ## Advanced usage 174 | 175 | ### Environment variables 176 | 177 | The available environment variables are: 178 | 179 | - `CLICKHOUSE_HOST` 180 | - `CLICKHOUSE_PORT` 181 | - `CLICKHOUSE_USER` 182 | - `CLICKHOUSE_PASSWORD` 183 | - `CLICKHOUSE_DATABASE` 184 | - `CLICKHOUSE_COOKIE` 185 | 186 | The order of precedence is: 187 | 188 | - command argument 189 | - environment variable 190 | - default value in the `~/.clickhouse-cli.rc` 191 | 192 | ### Reading from file / stdin 193 | 194 | $ echo 'SELECT 1, 2, 3; SELECT 4, 5, 6;' | clickhouse-cli 195 | 1 2 3 196 | 197 | 4 5 6 198 | 199 | $ cat test.sql 200 | SELECT 1, 2, 3; 201 | SELECT 4, 5, 6; 202 | 203 | $ clickhouse-cli test.sql 204 | 1 2 3 205 | 206 | 4 5 6 207 | 208 | $ clickhouse-cli -F CSV <<< 'SELECT 1, 2, 3 UNION ALL SELECT 4, 5, 6' 209 | 1,2,3 210 | 4,5,6 211 | 212 | ### Inserting the data from file 213 | 214 | $ clickhouse-cli -q 'CREATE TABLE test (date Date, s String, i UInt64) ENGINE = TinyLog' 215 | 216 | $ cat data.csv 217 | 2017-01-01,hello,1 218 | 2017-02-02,world,2 219 | 220 | $ clickhouse-cli -q 'INSERT INTO test (date, s, i)' -F CSV data.csv 221 | 222 | Ok. Elapsed: 0.037 sec. 223 | 224 | $ clickhouse-cli -q 'SELECT * FROM test' 225 | 2017-01-01 hello 1 226 | 2017-02-02 world 2 227 | 228 | ### Custom settings 229 | 230 | $ clickhouse-cli -h 10.1.1.14 -s 'max_memory_usage=20000000000&enable_http_compression=1' 231 | 232 | ### User-defined functions 233 | 234 | Oh boy. It's a **very dirty** (and **very untested**) hack that lets you define your own functions or, actually, whatever you want, 235 | by running a find & replace operation over the query before sending the query to the server. 236 | 237 | Say, you often run queries that parse some JSON, so you use `visitParamExtractString` all the time: 238 | 239 | :) SELECT date, ip, visitParamExtractString(headers, 'User-Agent') AS ua FROM visits LIMIT 1; 240 | 241 | Even with autocompletion, this makes it harder to work with such queries. 242 | With this feature, you'll be able to create custom find & replace pairs to make things a little bit easier (or harder; it depends). 243 | Put this in your `.clickhouse-cli.rc`: 244 | 245 | udf = { 246 | r'header\((.*?)\)': r'visitParamExtractString(headers, \1)', 247 | } 248 | 249 | And rejoice! 250 | 251 | :) SELECT date, ip, header('User-Agent') AS ua FROM visits LIMIT 1; 252 | 253 | The client will replace the matching expressions with another ones, and the query will execute correctly. 254 | See [.clickhouse-cli.rc](https://github.com/hatarist/clickhouse-cli/blob/master/clickhouse_cli/clickhouse-cli.rc.sample) for a full example. 255 | -------------------------------------------------------------------------------- /clickhouse_cli/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.9" 2 | -------------------------------------------------------------------------------- /clickhouse_cli/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | """Entrypoint module for `python -m clickhouse_cli`. 4 | 5 | Why does this file exist, and why __main__? For more info, read: 6 | - https://www.python.org/dev/peps/pep-0338/ 7 | - https://docs.python.org/2/using/cmdline.html#cmdoption-m 8 | - https://docs.python.org/3/using/cmdline.html#cmdoption-m 9 | """ 10 | 11 | import sys 12 | 13 | from clickhouse_cli.cli import run_cli 14 | 15 | if __name__ == "__main__": 16 | sys.exit(run_cli()) 17 | -------------------------------------------------------------------------------- /clickhouse_cli/cli.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import http.client 3 | import json 4 | import os 5 | import re 6 | import shutil 7 | import sys 8 | from configparser import NoOptionError 9 | from datetime import datetime 10 | from urllib.parse import parse_qs, urlparse 11 | from uuid import uuid4 12 | 13 | import click 14 | import pygments 15 | import sqlparse 16 | from prompt_toolkit import Application, PromptSession 17 | from prompt_toolkit.completion import DynamicCompleter, ThreadedCompleter 18 | from prompt_toolkit.history import FileHistory 19 | from prompt_toolkit.layout.containers import Window 20 | from prompt_toolkit.layout.controls import BufferControl 21 | from prompt_toolkit.layout.layout import Layout 22 | from prompt_toolkit.lexers import PygmentsLexer 23 | from pygments.formatters import TerminalFormatter, TerminalTrueColorFormatter 24 | 25 | import clickhouse_cli.helpers 26 | from clickhouse_cli import __version__ 27 | from clickhouse_cli.clickhouse.client import Client, ConnectionError, DBException, TimeoutError 28 | from clickhouse_cli.clickhouse.definitions import EXIT_COMMANDS, PRETTY_FORMATS 29 | from clickhouse_cli.clickhouse.sqlparse_patch import KEYWORDS 30 | from clickhouse_cli.config import read_config 31 | from clickhouse_cli.helpers import numberunit_fmt, parse_headers_stream, sizeof_fmt 32 | from clickhouse_cli.ui.completer import CHCompleter 33 | from clickhouse_cli.ui.lexer import CHLexer, CHPrettyFormatLexer 34 | from clickhouse_cli.ui.prompt import CLIBuffer, get_continuation_tokens, get_prompt_tokens, is_multiline, kb 35 | from clickhouse_cli.ui.style import Echo, get_ch_pygments_style, get_ch_style 36 | 37 | # monkey-patch sqlparse 38 | sqlparse.keywords.SQL_REGEX = CHLexer.tokens 39 | sqlparse.keywords.KEYWORDS = KEYWORDS 40 | sqlparse.keywords.KEYWORDS_COMMON = {} 41 | sqlparse.keywords.KEYWORDS_ORACLE = {} 42 | 43 | # monkey-patch http.client 44 | http.client.parse_headers = parse_headers_stream 45 | 46 | 47 | def show_version(): 48 | print("clickhouse-cli version: {version}".format(version=__version__)) 49 | 50 | 51 | class CLI: 52 | def __init__( 53 | self, 54 | host, 55 | port, 56 | user, 57 | password, 58 | database, 59 | settings, 60 | format, 61 | format_stdin, 62 | multiline, 63 | stacktrace, 64 | vi_mode, 65 | cookie, 66 | insecure, 67 | ): 68 | self.config = None 69 | 70 | self.host = host 71 | self.port = port 72 | self.user = user 73 | self.password = password 74 | self.database = database 75 | self.cookie = cookie 76 | self.settings = {k: v[0] for k, v in parse_qs(settings).items()} 77 | self.format = format 78 | self.format_stdin = format_stdin 79 | self.multiline = multiline 80 | self.stacktrace = stacktrace 81 | self.vi_mode = vi_mode 82 | self.server_version = None 83 | self.insecure = insecure 84 | 85 | self.query_ids = [] 86 | self.client = None 87 | self.echo = Echo(verbose=True, colors=True) 88 | self.progress = None 89 | 90 | self.metadata = {} 91 | 92 | def connect(self): 93 | self.scheme = "http" 94 | if "://" in self.host: 95 | u = urlparse(self.host, allow_fragments=False) 96 | self.host = u.hostname 97 | self.port = u.port or self.port 98 | self.scheme = u.scheme 99 | self.url = "{scheme}://{host}:{port}/".format(scheme=self.scheme, host=self.host, port=self.port) 100 | self.client = Client( 101 | self.url, 102 | self.user, 103 | self.password, 104 | self.database, 105 | self.cookie, 106 | self.stacktrace, 107 | self.conn_timeout, 108 | self.conn_timeout_retry, 109 | self.conn_timeout_retry_delay, 110 | not self.insecure, 111 | ) 112 | 113 | self.echo.print("Connecting to {host}:{port}".format(host=self.host, port=self.port)) 114 | 115 | try: 116 | for key, value in self.settings.items(): 117 | self.client.query("SET {}={}".format(key, value), fmt="Null") 118 | 119 | response = self.client.query("SELECT version();", fmt="TabSeparated") 120 | except TimeoutError: 121 | self.echo.error("Error: Connection timeout.") 122 | return False 123 | except ConnectionError as e: 124 | self.echo.error("Error: Failed to connect. (%s)" % e) 125 | return False 126 | except DBException as e: 127 | self.echo.error("Error:") 128 | self.echo.error(e.error) 129 | 130 | if self.stacktrace and e.stacktrace: 131 | self.echo.print("Stack trace:") 132 | self.echo.print(e.stacktrace) 133 | 134 | return False 135 | 136 | if not response.data.endswith("\n"): 137 | self.echo.error( 138 | "Error: Request failed: `SELECT version();` query failed with status code: {}.".format( 139 | response.status_code 140 | ) 141 | ) 142 | self.echo.error(response.data) 143 | 144 | return False 145 | 146 | version = response.data.strip().split(".") 147 | self.server_version = (int(version[0]), int(version[1]), version[2]) 148 | 149 | self.echo.success("Connected to ClickHouse server v{0}.{1}.{2}.\n".format(*self.server_version)) 150 | return True 151 | 152 | def load_config(self): 153 | self.config = read_config() 154 | 155 | self.insecure = self.insecure or self.config.getboolean("main", "insecure") 156 | self.multiline = self.multiline or self.config.getboolean("main", "multiline") 157 | self.vi_mode = self.vi_mode or self.config.getboolean("main", "vi_mode") 158 | self.format = self.format or self.config.get("main", "format") 159 | self.format_stdin = self.format_stdin or self.config.get("main", "format_stdin") 160 | self.show_formatted_query = self.config.getboolean("main", "show_formatted_query") 161 | self.highlight = self.config.getboolean("main", "highlight") 162 | # forcefully disable `highlight_output` in (u)rxvt (https://github.com/hatarist/clickhouse-cli/issues/20) 163 | self.highlight_output = ( 164 | False 165 | if os.environ.get("TERM", "").startswith("rxvt") 166 | else self.config.getboolean("main", "highlight_output") 167 | ) 168 | self.highlight_truecolor = self.config.getboolean("main", "highlight_truecolor") and os.environ.get( 169 | "COLORTERM" 170 | ) 171 | self.highlight_theme = self.config.get("main", "highlight_theme", fallback=None) 172 | self.complete_while_typing = self.config.getboolean("main", "complete_while_typing") 173 | 174 | try: 175 | udf = self.config.get("main", "udf") 176 | except NoOptionError: 177 | udf = "" 178 | 179 | if udf: 180 | self.udf = ast.literal_eval(udf.strip()) or {} 181 | else: 182 | self.udf = {} 183 | 184 | self.refresh_metadata_on_start = self.config.getboolean("main", "refresh_metadata_on_start") 185 | self.refresh_metadata_on_query = self.config.getboolean("main", "refresh_metadata_on_query") 186 | 187 | self.conn_timeout = self.config.getfloat("http", "conn_timeout") 188 | self.conn_timeout_retry = self.config.getint("http", "conn_timeout_retry") 189 | self.conn_timeout_retry_delay = self.config.getfloat("http", "conn_timeout_retry_delay") 190 | 191 | self.host = ( 192 | self.host or os.environ.get("CLICKHOUSE_HOST", "") or self.config.get("defaults", "host") or "127.0.0.1" 193 | ) 194 | self.port = ( 195 | self.port or int(os.environ.get("CLICKHOUSE_PORT", "0")) or self.config.get("defaults", "port") or 8123 196 | ) 197 | self.user = ( 198 | self.user or os.environ.get("CLICKHOUSE_USER", "") or self.config.get("defaults", "user") or "default" 199 | ) 200 | self.password = ( 201 | self.password or os.environ.get("CLICKHOUSE_PASSWORD", "") or self.config.get("defaults", "password") 202 | ) 203 | self.database = ( 204 | self.database 205 | or os.environ.get("CLICKHOUSE_DATABASE", "") 206 | or self.config.get("defaults", "db") 207 | or "default" 208 | ) 209 | self.cookie = self.cookie or os.environ.get("CLICKHOUSE_COOKIE", "") or self.config.get("defaults", "cookie") 210 | 211 | config_settings = dict(self.config.items("settings")) 212 | arg_settings = self.settings 213 | config_settings.update(arg_settings) 214 | self.settings = config_settings 215 | 216 | self.echo.colors = self.highlight 217 | 218 | def run(self, query, data): 219 | self.load_config() 220 | 221 | if data or query is not None: 222 | self.format = self.format_stdin 223 | self.echo.verbose = False 224 | 225 | if self.echo.verbose: 226 | show_version() 227 | 228 | if not self.connect(): 229 | return 230 | 231 | if self.client: 232 | self.client.settings = self.settings 233 | self.client.cli_settings = { 234 | "multiline": self.multiline, 235 | "vi_mode": self.vi_mode, 236 | "format": self.format, 237 | "format_stdin": self.format_stdin, 238 | "show_formatted_query": self.show_formatted_query, 239 | "highlight": self.highlight, 240 | "highlight_output": self.highlight_output, 241 | "refresh_metadata_on_start": self.refresh_metadata_on_start, 242 | "refresh_metadata_on_query": self.refresh_metadata_on_query, 243 | } 244 | 245 | if data and query is None: 246 | # cat stuff.sql | clickhouse-cli 247 | # clickhouse-cli stuff.sql 248 | for subdata in data: 249 | self.handle_input(subdata.read(), verbose=False, refresh_metadata=False) 250 | 251 | return 252 | 253 | if not data and query is not None: 254 | # clickhouse-cli -q 'SELECT 1' 255 | return self.handle_query(query, stream=False) 256 | 257 | if data and query is not None: 258 | # cat stuff.csv | clickhouse-cli -q 'INSERT INTO stuff' 259 | # clickhouse-cli -q 'INSERT INTO stuff' stuff.csv 260 | for subdata in data: 261 | compress = "gzip" if os.path.splitext(subdata.name)[1] == ".gz" else False 262 | 263 | self.handle_query(query, data=subdata, stream=True, compress=compress) 264 | 265 | return 266 | 267 | buffer = CLIBuffer( 268 | client=self.client, 269 | multiline=self.multiline, 270 | metadata=self.metadata, 271 | ) 272 | 273 | root_container = Window(content=BufferControl(buffer=buffer)) 274 | 275 | layout = Layout(root_container) 276 | 277 | hist = FileHistory(filename=os.path.expanduser("~/.clickhouse-cli_history")) 278 | self.completer = CHCompleter(self.client, self.metadata) 279 | 280 | self.session = PromptSession( 281 | style=get_ch_style(self.highlight_theme) if self.highlight else None, 282 | lexer=PygmentsLexer(CHLexer) if self.highlight else None, 283 | message=get_prompt_tokens()[0][1], 284 | prompt_continuation=get_continuation_tokens()[0][1], 285 | multiline=is_multiline(self.multiline), 286 | vi_mode=self.vi_mode, 287 | history=hist, 288 | key_bindings=kb, 289 | complete_while_typing=self.complete_while_typing, 290 | completer=ThreadedCompleter(DynamicCompleter(lambda: self.completer)), 291 | ) 292 | 293 | self.app = Application( 294 | layout=layout, 295 | # buffer=buffer, 296 | ) 297 | 298 | if self.refresh_metadata_on_start: 299 | self.app.current_buffer.completer.refresh_metadata() 300 | 301 | try: 302 | while True: 303 | try: 304 | cli_input = self.session.prompt() 305 | self.handle_input(cli_input) 306 | except KeyboardInterrupt: 307 | # Attempt to terminate queries 308 | for query_id in self.query_ids: 309 | self.client.kill_query(query_id) 310 | 311 | self.echo.error("\nQuery was terminated.") 312 | finally: 313 | self.query_ids = [] 314 | except EOFError: 315 | self.echo.success("Bye.") 316 | 317 | def handle_input(self, input_data, verbose=True, refresh_metadata=True): 318 | force_pager = False 319 | if input_data.endswith(r"\p" if isinstance(input_data, str) else rb"\p"): 320 | input_data = input_data[:-2] 321 | force_pager = True 322 | 323 | # FIXME: A dirty dirty hack to make multiple queries (per one paste) work. 324 | self.query_ids = [] 325 | for query in sqlparse.split(input_data): 326 | query_id = str(uuid4()) 327 | self.query_ids.append(query_id) 328 | self.handle_query(query, verbose=verbose, query_id=query_id, force_pager=force_pager) 329 | 330 | if refresh_metadata and input_data: 331 | self.app.current_buffer.completer.refresh_metadata() 332 | 333 | def handle_query( 334 | self, 335 | query, 336 | data=None, 337 | stream=False, 338 | verbose=False, 339 | query_id=None, 340 | compress=False, 341 | **kwargs, 342 | ): 343 | if query.rstrip(";") == "": 344 | return 345 | 346 | elif query.lower() in EXIT_COMMANDS: 347 | raise EOFError 348 | 349 | elif query.lower() in (r"\?", "help"): 350 | rows = [ 351 | ["", ""], 352 | ["clickhouse-cli's custom commands:", ""], 353 | ["---------------------------------", ""], 354 | ["USE", "Change the current database."], 355 | ["SET", "Set an option for the current CLI session."], 356 | ["QUIT", "Exit clickhouse-cli."], 357 | ["HELP", "Show this help message."], 358 | ["", ""], 359 | ["PostgreSQL-like custom commands:", ""], 360 | ["--------------------------------", ""], 361 | [r"\l", "Show databases."], 362 | [r"\c", "Change the current database."], 363 | [r"\d, \dt", "Show tables in the current database."], 364 | [r"\d+", "Show table's schema."], 365 | [r"\ps", "Show current queries."], 366 | [r"\kill", "Kill query by its ID."], 367 | ["", ""], 368 | ["Query suffixes:", ""], 369 | ["---------------", ""], 370 | [r"\g, \G", "Use the Vertical format."], 371 | [r"\p", "Enable the pager."], 372 | ] 373 | 374 | for row in rows: 375 | self.echo.success("{:<8s}".format(row[0]), nl=False) 376 | self.echo.info(row[1]) 377 | return 378 | 379 | elif query in (r"\d", r"\dt"): 380 | query = "SHOW TABLES" 381 | 382 | elif query.startswith(r"\d+ "): 383 | query = "DESCRIBE TABLE " + query[4:] 384 | 385 | elif query == r"\l": 386 | query = "SHOW DATABASES" 387 | 388 | elif query.startswith(r"\c "): 389 | query = "USE " + query[3:] 390 | 391 | elif query.startswith(r"\ps"): 392 | query = ( 393 | "SELECT query_id, user, address, elapsed, read_rows, memory_usage " 394 | "FROM system.processes WHERE query_id != '{}'" 395 | ).format(query_id) 396 | 397 | elif query.startswith(r"\kill "): 398 | self.client.kill_query(query[6:]) 399 | return 400 | 401 | response = "" 402 | 403 | self.progress_reset() 404 | 405 | if self.udf: 406 | for regex, replacement in self.udf.items(): 407 | query = re.sub(regex, replacement, query) 408 | 409 | try: 410 | response = self.client.query( 411 | query, 412 | fmt=self.format, 413 | data=data, 414 | stream=stream, 415 | verbose=verbose, 416 | query_id=query_id, 417 | compress=compress, 418 | ) 419 | except TimeoutError: 420 | self.echo.error("Error: Connection timeout.") 421 | return 422 | except ConnectionError as e: 423 | self.echo.error("Error: Failed to connect. (%s)" % e) 424 | return 425 | except DBException as e: 426 | self.progress_reset() 427 | self.echo.error("\nQuery:") 428 | self.echo.error(query) 429 | self.echo.error("\n\nReceived exception from server:") 430 | self.echo.error(e.error) 431 | 432 | if self.stacktrace and e.stacktrace: 433 | self.echo.print("\nStack trace:") 434 | self.echo.print(e.stacktrace) 435 | 436 | self.echo.print("\nElapsed: {elapsed:.3f} sec.\n".format(elapsed=e.response.elapsed.total_seconds())) 437 | 438 | return 439 | 440 | total_rows, total_bytes = self.progress_reset() 441 | 442 | self.echo.print() 443 | 444 | if stream: 445 | data = response.iter_lines() if hasattr(response, "iter_lines") else response.data 446 | for line in data: 447 | print(line.decode("utf-8", "ignore")) 448 | 449 | else: 450 | if response.data != "": 451 | print_func = print 452 | 453 | if self.config.getboolean("main", "pager") or kwargs.pop("force_pager", False): 454 | print_func = self.echo.pager 455 | 456 | should_highlight_output = ( 457 | verbose and self.highlight and self.highlight_output and response.format in PRETTY_FORMATS 458 | ) 459 | 460 | formatter = TerminalFormatter() 461 | 462 | if self.highlight and self.highlight_output and self.highlight_truecolor: 463 | formatter = TerminalTrueColorFormatter(style=get_ch_pygments_style(self.highlight_theme)) 464 | 465 | if should_highlight_output: 466 | print_func(pygments.highlight(response.data, CHPrettyFormatLexer(), formatter)) 467 | else: 468 | print_func(response.data, end="") 469 | 470 | if response.message != "": 471 | self.echo.print(response.message) 472 | self.echo.print() 473 | 474 | self.echo.success("Ok. ", nl=False) 475 | 476 | if response.rows is not None: 477 | self.echo.print( 478 | "{rows_count} row{rows_plural} in set.".format( 479 | rows_count=response.rows, 480 | rows_plural="s" if response.rows != 1 else "", 481 | ), 482 | end=" ", 483 | ) 484 | 485 | if self.config.getboolean("main", "timing") and response.time_elapsed is not None: 486 | self.echo.print( 487 | "Elapsed: {elapsed:.3f} sec. Processed: {rows} rows, {bytes} ({avg_rps} rows/s, {avg_bps}/s)".format( 488 | elapsed=response.time_elapsed, 489 | rows=numberunit_fmt(total_rows), 490 | bytes=sizeof_fmt(total_bytes), 491 | avg_rps=numberunit_fmt(total_rows / max(response.time_elapsed, 0.001)), 492 | avg_bps=sizeof_fmt(total_bytes / max(response.time_elapsed, 0.001)), 493 | ), 494 | end="", 495 | ) 496 | 497 | self.echo.print("\n") 498 | 499 | def progress_update(self, line): 500 | if not self.config.getboolean("main", "timing") and not self.echo.verbose: 501 | return 502 | # Parse X-ClickHouse-Progress header 503 | now = datetime.now() 504 | progress = json.loads(line[23:].decode().strip()) 505 | progress = { 506 | "timestamp": now, 507 | "read_rows": int(progress["read_rows"]), 508 | "total_rows": int(progress["total_rows"] if "total_rows" in progress else progress["total_rows_to_read"]), 509 | "read_bytes": int(progress["read_bytes"]), 510 | } 511 | # Calculate percentage completed and format initial message 512 | progress["percents"] = ( 513 | int((progress["read_rows"] / progress["total_rows"]) * 100) if progress["total_rows"] > 0 else 0 514 | ) 515 | message = "Progress: {} rows, {}".format( 516 | numberunit_fmt(progress["read_rows"]), sizeof_fmt(progress["read_bytes"]) 517 | ) 518 | # Calculate row and byte read velocity 519 | if self.progress: 520 | delta = (now - self.progress["timestamp"]).total_seconds() 521 | if delta > 0: 522 | rps = (progress["read_rows"] - self.progress["read_rows"]) / delta 523 | bps = (progress["read_bytes"] - self.progress["read_bytes"]) / delta 524 | message += " ({} rows/s, {}/s)".format(numberunit_fmt(rps), sizeof_fmt(bps)) 525 | self.progress = progress 526 | self.progress_print(message, progress["percents"]) 527 | 528 | def progress_reset(self): 529 | if not self.echo.verbose: 530 | return (0, 0) 531 | 532 | progress = self.progress 533 | self.progress = None 534 | clickhouse_cli.helpers.trace_headers_stream = self.progress_update 535 | # Clear printed progress (if any) 536 | columns = shutil.get_terminal_size((80, 0)).columns 537 | sys.stdout.write("\u001b[%dD" % columns + " " * columns) 538 | sys.stdout.flush() 539 | # Report totals 540 | if progress: 541 | return (progress["read_rows"], progress["read_bytes"]) 542 | return (0, 0) 543 | 544 | def progress_print(self, message, percents): 545 | suffix = "%3d%%" % percents 546 | columns = shutil.get_terminal_size((80, 0)).columns 547 | bars_max = columns - (len(message) + len(suffix) + 3) 548 | bars = int(percents * (bars_max / 100)) if (bars_max > 0) else 0 549 | message = "{} \033[42m{}\033[0m{} {}".format(message, " " * bars, " " * (bars_max - bars), suffix) 550 | sys.stdout.write("\u001b[%dD" % columns + message) 551 | sys.stdout.flush() 552 | 553 | 554 | @click.command(context_settings={"ignore_unknown_options": True}) 555 | @click.option( 556 | "--host", 557 | "-h", 558 | help="Server host, set to https://: if you want to use HTTPS", 559 | ) 560 | @click.option("--port", "-p", type=click.INT, help="Server HTTP/HTTPS port") 561 | @click.option("--user", "-u", help="User") 562 | @click.option("--password", "-P", is_flag=True, help="Ask for a password in STDIN") 563 | @click.option("--arg-password", "-B", help="Argument as a password") 564 | @click.option("--database", "-d", help="Database") 565 | @click.option("--settings", "-s", help="Query string to be sent with every query") 566 | @click.option("--cookie", "-c", help="Cookie header to be sent with every query") 567 | @click.option("--query", "-q", help="Query to execute") 568 | @click.option( 569 | "--insecure", 570 | "-k", 571 | is_flag=True, 572 | help="Allow insecure server connections when using SSL", 573 | ) 574 | @click.option("--format", "-f", help="Data format for the interactive mode") 575 | @click.option("--format-stdin", "-F", help="Data format for stdin/file queries") 576 | @click.option("--multiline", "-m", is_flag=True, help="Enable multiline shell") 577 | @click.option("--stacktrace", is_flag=True, help="Print stacktraces received from the server.") 578 | @click.option("--vi-mode", is_flag=True, help="Enable Vi input mode") 579 | @click.option("--version", is_flag=True, help="Show the version and exit.") 580 | @click.argument("files", nargs=-1, type=click.File("rb")) 581 | def run_cli( 582 | host, 583 | port, 584 | user, 585 | password, 586 | arg_password, 587 | database, 588 | settings, 589 | query, 590 | format, 591 | format_stdin, 592 | multiline, 593 | stacktrace, 594 | vi_mode, 595 | cookie, 596 | version, 597 | files, 598 | insecure, 599 | ): 600 | """ 601 | A third-party client for the ClickHouse DBMS. 602 | """ 603 | if version: 604 | return show_version() 605 | 606 | if arg_password: 607 | password = arg_password 608 | elif password: 609 | password = click.prompt("Password", hide_input=True, show_default=False, type=str) 610 | 611 | data_input = () 612 | 613 | # Read from STDIN if non-interactive mode 614 | stdin = click.get_binary_stream("stdin") 615 | if not stdin.isatty(): 616 | data_input += (stdin,) 617 | 618 | # Read the given file 619 | if files: 620 | data_input += files 621 | 622 | # TODO: Rename the CLI's instance into something more feasible 623 | cli = CLI( 624 | host, 625 | port, 626 | user, 627 | password, 628 | database, 629 | settings, 630 | format, 631 | format_stdin, 632 | multiline, 633 | stacktrace, 634 | vi_mode, 635 | cookie, 636 | insecure, 637 | ) 638 | cli.run(query, data_input) 639 | return 0 640 | 641 | 642 | if __name__ == "__main__": 643 | run_cli() 644 | -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse-cli.rc.sample: -------------------------------------------------------------------------------- 1 | [defaults] 2 | # Default connection options that will be used if the relevant argument was omitted. 3 | 4 | host = 127.0.0.1 5 | port = 8123 6 | db = default 7 | user = default 8 | password = 9 | cookie = 10 | 11 | # It's not secure to store the password here in plain text. 12 | 13 | [main] 14 | # Allow insecure server connections when using SSL 15 | insecure = False 16 | # Disable multiline mode by default 17 | multiline = False 18 | 19 | # Disable Vi mode by default 20 | # 21 | # Ideally, we want to read this value from `inputrc`, but this isn't possible 22 | # until https://github.com/prompt-toolkit/python-prompt-toolkit/issues/56 is 23 | # resolved. 24 | vi_mode = False 25 | 26 | # Show SQL statements timing 27 | timing = True 28 | 29 | # Preferred data format for the interactive mode 30 | format = PrettyCompact 31 | 32 | # Preferred data format for the non-interactive mode (file/stdin) 33 | format_stdin = TabSeparated 34 | 35 | # Show the reformatted query after its execution 36 | show_formatted_query = True 37 | 38 | # Syntax highlighting 39 | highlight = True 40 | 41 | # Syntax highlight certain output in the interactive mode: 42 | highlight_output = True 43 | 44 | # Syntax highlighting in TrueColor (if supported, see https://gist.github.com/XVilka/8346728) 45 | highlight_truecolor = True 46 | 47 | # Highlight theme (uncomment line below & check out https://help.farbox.com/pygments.html for available themes) 48 | #highlight_theme = default 49 | 50 | # if True, enables completion on every typed character (i.e. space) 51 | complete_while_typing = False 52 | 53 | # Show the output via pager (if defined) 54 | pager = False 55 | 56 | # Refresh metadata (databases, tables, column names) for autocompletion... 57 | # ...on the application start 58 | refresh_metadata_on_start = True 59 | 60 | # ...after each query (if set to True, may slow down usage) 61 | refresh_metadata_on_query = False 62 | 63 | 64 | # A horrible "user-defined functions" hack, powered with regexp and a little bit of insanity! 65 | # It makes the client find & replace queries to keep (or get on; it depends) your nerves. 66 | # It's commented out by default to not to enrage anyone. An example is provided below, though. 67 | 68 | #udf = { 69 | # r'header\((.*?)\)': r'visitParamExtractString(headers, \1)', 70 | # } 71 | 72 | # If you uncomment that above, please make sure that the trailing bracket is indented. 73 | # The example above will let you run such a query: 74 | # SELECT date, ip, visitParamExtractString(headers, 'User-Agent') FROM visits LIMIT 3; 75 | # with much less typing: 76 | # SELECT date, ip, header('User-Agent') FROM visits LIMIT 3; 77 | 78 | 79 | [http] 80 | 81 | # Timeout for the connection to the ClickHouse HTTP server (in seconds) 82 | conn_timeout = 10.0 83 | 84 | # Amount of retries if the connection was timed out 85 | conn_timeout_retry = 0 86 | 87 | # A dynamic delay between retries (see "urllib3 Retry backoff_factor") 88 | conn_timeout_retry_delay = 0.5 89 | 90 | 91 | [settings] 92 | # You can place the server-side settings here! 93 | 94 | # max_memory_usage = 20000000000 95 | -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatarist/clickhouse-cli/81450224395407e101bb0a219c5b85c2e6ad7594/clickhouse_cli/clickhouse/__init__.py -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse/client.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | import uuid 4 | 5 | import pygments 6 | import requests 7 | import sqlparse 8 | from pygments.formatters import TerminalFormatter, TerminalTrueColorFormatter 9 | from requests.packages.urllib3.util.retry import Retry 10 | from sqlparse.tokens import Keyword, Newline, Whitespace 11 | 12 | from clickhouse_cli import __version__ 13 | from clickhouse_cli.clickhouse.definitions import FORMATTABLE_QUERIES 14 | from clickhouse_cli.clickhouse.exceptions import ConnectionError, DBException, TimeoutError 15 | from clickhouse_cli.helpers import chain_streams 16 | from clickhouse_cli.ui.lexer import CHLexer 17 | from clickhouse_cli.ui.style import Echo, get_ch_pygments_style 18 | 19 | USER_AGENT = "clickhouse-cli/{0}".format(__version__) 20 | 21 | logger = logging.getLogger("main") 22 | echo = Echo() 23 | 24 | 25 | class Response(object): 26 | def __init__(self, query, fmt, response="", message="", stream=False): 27 | self.query = query 28 | self.message = message 29 | self.format = fmt 30 | self.stream = stream 31 | self.time_elapsed = None 32 | self.rows = None 33 | self.status_code = None 34 | 35 | if isinstance(response, requests.Response): 36 | self.time_elapsed = response.elapsed.total_seconds() 37 | self.status_code = response.status_code 38 | 39 | if stream: 40 | self.data = response.iter_lines() 41 | self.rows = None 42 | return 43 | 44 | self.data = response.text 45 | 46 | lines = self.data.split("\n") 47 | 48 | if self.data == "" or not lines: 49 | self.rows = 0 50 | elif fmt.startswith("Pretty"): 51 | self.rows = sum(1 for line in lines if line.startswith("│")) 52 | elif fmt in ("TabSeparated", "TSV", "CSV"): 53 | self.rows = len(lines) - 1 54 | elif fmt in ("TabSeparatedWithNames", "TSVWithNames", "CSVWithNames"): 55 | self.rows = len(lines) - 2 56 | elif fmt in ("TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"): 57 | self.rows = len(lines) - 3 58 | else: 59 | self.data = response 60 | 61 | 62 | class Client(object): 63 | def __init__( 64 | self, 65 | url, 66 | user, 67 | password, 68 | database, 69 | cookie, 70 | stacktrace=False, 71 | timeout=10.0, 72 | timeout_retry=0, 73 | timeout_retry_delay=0.0, 74 | verify=True, 75 | ): 76 | self.url = url 77 | self.user = user 78 | self.password = password or "" 79 | self.database = database 80 | self.cookie = cookie 81 | self.session_id = str(uuid.uuid4()) 82 | self.cli_settings = {} 83 | self.stacktrace = stacktrace 84 | self.timeout = timeout 85 | self.session = requests.Session() 86 | self.verify = verify 87 | 88 | retries = Retry( 89 | connect=timeout_retry, 90 | # method_whitelist={'GET', 'POST'}, # enabling retries for POST may be a bad idea 91 | backoff_factor=timeout_retry_delay, 92 | ) 93 | self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries)) 94 | 95 | def _query( 96 | self, 97 | method, 98 | query, 99 | extra_params, 100 | fmt, 101 | stream, 102 | data=None, 103 | compress=False, 104 | **kwargs, 105 | ): 106 | params = {"session_id": self.session_id} 107 | params.update(extra_params) 108 | 109 | headers = {"Accept-Encoding": "identity", "User-Agent": USER_AGENT} 110 | if compress: 111 | headers["Content-Encoding"] = "gzip" 112 | 113 | if self.cookie: 114 | headers["Cookie"] = self.cookie 115 | 116 | response = None 117 | 118 | if not query.endswith("\n"): 119 | query += "\n" 120 | 121 | streams = [io.BytesIO(query.encode())] 122 | if data is not None: 123 | streams.append(data) 124 | data_stream = chain_streams(streams) 125 | 126 | try: 127 | response = self.session.request( 128 | method, 129 | self.url, 130 | data=data_stream, 131 | params=params, 132 | auth=(self.user, self.password), 133 | stream=stream, 134 | headers=headers, 135 | timeout=(self.timeout, None), 136 | verify=self.verify, 137 | **kwargs, 138 | ) 139 | except requests.exceptions.ConnectTimeout as e: 140 | raise TimeoutError(*e.args) from e 141 | except ( 142 | requests.exceptions.ConnectionError, 143 | requests.packages.urllib3.exceptions.NewConnectionError, 144 | ) as e: 145 | raise ConnectionError(*e.args) from e 146 | 147 | if response is not None and response.status_code != 200: 148 | raise DBException(response, query=query) 149 | 150 | return Response(query, fmt, response, stream=stream) 151 | 152 | def test_query(self): 153 | params = {"database": self.database} 154 | return self._query( 155 | "GET", 156 | "SELECT 1", 157 | params, 158 | fmt="Null", 159 | stream=False, 160 | ) 161 | 162 | def kill_query(self, query_id): 163 | return self._query( 164 | "GET", 165 | "SELECT 1", 166 | {"replace_running_query": 1, "query_id": query_id}, 167 | fmt="Null", 168 | stream=False, 169 | ) 170 | 171 | def query( 172 | self, 173 | query, 174 | data=None, 175 | fmt="PrettyCompact", 176 | stream=False, 177 | verbose=False, 178 | query_id=None, 179 | compress=False, 180 | **kwargs, 181 | ): 182 | if query.lstrip()[:6].upper().startswith("INSERT"): 183 | query_split = query.split() 184 | else: 185 | query = sqlparse.format(query, strip_comments=True).rstrip(";") 186 | 187 | if verbose and self.cli_settings.get("show_formatted_query"): 188 | # Highlight & reformat the SQL query 189 | formatted_query = sqlparse.format( 190 | query, 191 | reindent_aligned=True, 192 | indent_width=2, 193 | # keyword_case='upper' # works poorly in a few cases 194 | ) 195 | 196 | formatter = TerminalFormatter() 197 | 198 | if self.cli_settings.get("highlight") and self.cli_settings.get("highlight_truecolor"): 199 | formatter = TerminalTrueColorFormatter( 200 | style=get_ch_pygments_style(self.cli_settings.get("highlight_theme")) 201 | ) 202 | 203 | print("\n" + pygments.highlight(formatted_query, CHLexer(), formatter)) 204 | 205 | # TODO: use sqlparse's parser instead 206 | query_split = query.split() 207 | 208 | if not query_split: 209 | return Response(query, fmt) 210 | 211 | # Since sessions aren't supported over HTTP, we have to make some quirks: 212 | # USE database; 213 | if query_split[0].upper() == "USE" and len(query_split) == 2: 214 | old_database = self.database 215 | self.database = query_split[1] 216 | try: 217 | self.test_query() 218 | except DBException as e: 219 | self.database = old_database 220 | raise e 221 | 222 | return Response( 223 | query, 224 | fmt, 225 | message="Changed the current database to {0}.".format(self.database), 226 | ) 227 | 228 | # Set response format 229 | if query_split[0].upper() in FORMATTABLE_QUERIES and len(query_split) >= 2: 230 | if query_split[-2].upper() == "FORMAT": 231 | fmt = query_split[-1] 232 | elif query_split[-2].upper() != "FORMAT": 233 | if query_split[0].upper() != "INSERT" or data is not None: 234 | if query[-2:] in (r"\g", r"\G"): 235 | query = query[:-2] + " FORMAT Vertical" 236 | else: 237 | query = query + " FORMAT {fmt}".format(fmt=fmt) 238 | 239 | params = {"database": self.database, "stacktrace": int(self.stacktrace)} 240 | if query_id: 241 | params["query_id"] = query_id 242 | 243 | has_outfile = False 244 | if query_split[0].upper() == "SELECT": 245 | # Detect INTO OUTFILE at the end of the query 246 | t_query = [ 247 | t.value.upper() if t.ttype == Keyword else t.value 248 | for t in sqlparse.parse(query)[0] 249 | if t.ttype not in (Whitespace, Newline) 250 | ] 251 | 252 | try: 253 | last_tokens = t_query[-5:] 254 | into_pos = last_tokens.index("INTO") 255 | has_outfile = into_pos >= 0 and last_tokens.index("OUTFILE") == into_pos + 1 256 | 257 | if has_outfile: 258 | path = last_tokens[into_pos + 2].strip("'") 259 | # Remove `INTO OUTFILE '/path/to/file.out'` 260 | last_tokens.pop(into_pos) 261 | last_tokens.pop(into_pos) 262 | last_tokens.pop(into_pos) 263 | query = " ".join(t_query[:-5] + last_tokens) 264 | except ValueError: 265 | has_outfile = False 266 | 267 | method = "POST" 268 | response = self._query( 269 | method, 270 | query, 271 | params, 272 | fmt=fmt, 273 | stream=stream, 274 | data=data, 275 | compress=compress, 276 | **kwargs, 277 | ) 278 | 279 | if has_outfile: 280 | try: 281 | with open(path, "wb") as f: 282 | if not f: 283 | return response 284 | 285 | if stream: 286 | for line in response.iter_lines(): 287 | f.write(line) 288 | else: 289 | f.write(response.data.encode()) 290 | except Exception as e: 291 | echo.warning("Caught an exception when writing to file: {0}".format(e)) 292 | 293 | return response 294 | -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse/definitions.py: -------------------------------------------------------------------------------- 1 | from clickhouse_cli.clickhouse.sqlparse_patch import KEYWORDS as sqlparse_keywords 2 | 3 | # TODO: Run `SELECT name FROM system.functions` instead of this hardcoding 4 | FUNCTIONS = ( 5 | "abs", 6 | "acos", 7 | "addDays", 8 | "addHours", 9 | "addMinutes", 10 | "addMonths", 11 | "addQuarters", 12 | "addSeconds", 13 | "addWeeks", 14 | "addYears", 15 | "alphaTokens", 16 | "and", 17 | "any", 18 | "anyHeavy", 19 | "anyLast", 20 | "appendTrailingCharIfAbsent", 21 | "argMax", 22 | "argMin", 23 | "array", 24 | "arrayAll", 25 | "arrayConcat", 26 | "arrayCount", 27 | "arrayCumSum", 28 | "arrayCumSumNonNegative", 29 | "arrayDifference", 30 | "arrayDistinct", 31 | "arrayElement", 32 | "arrayEnumerate", 33 | "arrayEnumerateDense", 34 | "arrayEnumerateDenseRanked", 35 | "arrayEnumerateUniq", 36 | "arrayEnumerateUniqRanked", 37 | "arrayExists", 38 | "arrayFilter", 39 | "arrayFirst", 40 | "arrayFirstIndex", 41 | "arrayFlatten", 42 | "arrayIntersect", 43 | "arrayJoin", 44 | "arrayMap", 45 | "arrayPopBack", 46 | "arrayPopFront", 47 | "arrayPushBack", 48 | "arrayPushFront", 49 | "arrayReduce", 50 | "arrayResize", 51 | "arrayReverse", 52 | "arrayReverseSort", 53 | "arraySlice", 54 | "arraySort", 55 | "arrayStringConcat", 56 | "arraySum", 57 | "arrayUniq", 58 | "arrayWithConstant", 59 | "asin", 60 | "assumeNotNull", 61 | "atan", 62 | "avg", 63 | "bar", 64 | "base64Decode", 65 | "base64Encode", 66 | "basename", 67 | "BIT_AND", 68 | "BIT_OR", 69 | "BIT_XOR", 70 | "bitAnd", 71 | "bitmapAnd", 72 | "bitmapAndCardinality", 73 | "bitmapAndnot", 74 | "bitmapAndnotCardinality", 75 | "bitmapBuild", 76 | "bitmapCardinality", 77 | "bitmapContains", 78 | "bitmapHasAll", 79 | "bitmapHasAny", 80 | "bitmapOr", 81 | "bitmapOrCardinality", 82 | "bitmapToArray", 83 | "bitmapXor", 84 | "bitmapXorCardinality", 85 | "bitmaskToArray", 86 | "bitmaskToList", 87 | "bitNot", 88 | "bitOr", 89 | "bitRotateLeft", 90 | "bitRotateRight", 91 | "bitShiftLeft", 92 | "bitShiftRight", 93 | "bitTest", 94 | "bitTestAll", 95 | "bitTestAny", 96 | "bitXor", 97 | "blockNumber", 98 | "blockSize", 99 | "boundingRatio", 100 | "caseWithExpr", 101 | "caseWithExpression", 102 | "caseWithoutExpr", 103 | "caseWithoutExpression", 104 | "CAST", 105 | "cbrt", 106 | "ceil", 107 | "ceiling", 108 | "CHAR_LENGTH", 109 | "CHARACTER_LENGTH", 110 | "cityHash64", 111 | "coalesce", 112 | "concat", 113 | "concatAssumeInjective", 114 | "convertCharset", 115 | "corr", 116 | "corrStable", 117 | "cos", 118 | "count", 119 | "countEqual", 120 | "COVAR_POP", 121 | "COVAR_SAMP", 122 | "covarPop", 123 | "covarPopStable", 124 | "covarSamp", 125 | "covarSampStable", 126 | "CRC32", 127 | "currentDatabase", 128 | "cutFragment", 129 | "cutIPv6", 130 | "cutQueryString", 131 | "cutQueryStringAndFragment", 132 | "cutToFirstSignificantSubdomain", 133 | "cutURLParameter", 134 | "cutWWW", 135 | "dateDiff", 136 | "decodeURLComponent", 137 | "defaultValueOfArgumentType", 138 | "dictGet", 139 | "dictGetDate", 140 | "dictGetDateOrDefault", 141 | "dictGetDateTime", 142 | "dictGetDateTimeOrDefault", 143 | "dictGetFloat32", 144 | "dictGetFloat32OrDefault", 145 | "dictGetFloat64", 146 | "dictGetFloat64OrDefault", 147 | "dictGetHierarchy", 148 | "dictGetInt16", 149 | "dictGetInt16OrDefault", 150 | "dictGetInt32", 151 | "dictGetInt32OrDefault", 152 | "dictGetInt64", 153 | "dictGetInt64OrDefault", 154 | "dictGetInt8", 155 | "dictGetInt8OrDefault", 156 | "dictGetOrDefault", 157 | "dictGetString", 158 | "dictGetStringOrDefault", 159 | "dictGetUInt16", 160 | "dictGetUInt16OrDefault", 161 | "dictGetUInt32", 162 | "dictGetUInt32OrDefault", 163 | "dictGetUInt64", 164 | "dictGetUInt64OrDefault", 165 | "dictGetUInt8", 166 | "dictGetUInt8OrDefault", 167 | "dictGetUUID", 168 | "dictGetUUIDOrDefault", 169 | "dictHas", 170 | "dictIsIn", 171 | "divide", 172 | "domain", 173 | "domainWithoutWWW", 174 | "dumpColumnStructure", 175 | "e", 176 | "empty", 177 | "emptyArrayDate", 178 | "emptyArrayDateTime", 179 | "emptyArrayFloat32", 180 | "emptyArrayFloat64", 181 | "emptyArrayInt16", 182 | "emptyArrayInt32", 183 | "emptyArrayInt64", 184 | "emptyArrayInt8", 185 | "emptyArrayString", 186 | "emptyArrayToSingle", 187 | "emptyArrayUInt16", 188 | "emptyArrayUInt32", 189 | "emptyArrayUInt64", 190 | "emptyArrayUInt8", 191 | "endsWith", 192 | "entropy", 193 | "equals", 194 | "erf", 195 | "erfc", 196 | "evalMLMethod", 197 | "exp", 198 | "exp10", 199 | "exp2", 200 | "extract", 201 | "extractAll", 202 | "extractURLParameter", 203 | "extractURLParameterNames", 204 | "extractURLParameters", 205 | "farmHash64", 206 | "filesystemAvailable", 207 | "filesystemCapacity", 208 | "filesystemFree", 209 | "finalizeAggregation", 210 | "findClusterIndex", 211 | "findClusterValue", 212 | "firstSignificantSubdomain", 213 | "flatten", 214 | "floor", 215 | "format", 216 | "formatDateTime", 217 | "formatReadableSize", 218 | "fragment", 219 | "gccMurmurHash", 220 | "gcd", 221 | "generateUUIDv4", 222 | "geohashDecode", 223 | "geohashEncode", 224 | "geoToH3", 225 | "getSizeOfEnumType", 226 | "globalIn", 227 | "globalNotIn", 228 | "greatCircleDistance", 229 | "greater", 230 | "greaterOrEquals", 231 | "greatest", 232 | "groupArray", 233 | "groupArrayInsertAt", 234 | "groupArrayMovingAvg", 235 | "groupArrayMovingSum", 236 | "groupBitAnd", 237 | "groupBitmap", 238 | "groupBitOr", 239 | "groupBitXor", 240 | "groupUniqArray", 241 | "halfMD5", 242 | "has", 243 | "hasAll", 244 | "hasAny", 245 | "hasColumnInTable", 246 | "hex", 247 | "histogram", 248 | "hiveHash", 249 | "hostName", 250 | "identity", 251 | "if", 252 | "ifNull", 253 | "ignore", 254 | "ignoreExceptNull", 255 | "in", 256 | "indexHint", 257 | "indexOf", 258 | "intDiv", 259 | "intDivOrZero", 260 | "intExp10", 261 | "intExp2", 262 | "intHash32", 263 | "intHash64", 264 | "IPv4CIDRToRange", 265 | "IPv4NumToString", 266 | "IPv4NumToStringClassC", 267 | "IPv4StringToNum", 268 | "IPv4ToIPv6", 269 | "IPv6CIDRToRange", 270 | "IPv6NumToString", 271 | "IPv6StringToNum", 272 | "isFinite", 273 | "isInfinite", 274 | "isNaN", 275 | "isNotNull", 276 | "isNull", 277 | "isValidUTF8", 278 | "javaHash", 279 | "joinGet", 280 | "JSONExtract", 281 | "JSONExtractBool", 282 | "JSONExtractFloat", 283 | "JSONExtractInt", 284 | "JSONExtractKeysAndValues", 285 | "JSONExtractRaw", 286 | "JSONExtractString", 287 | "JSONExtractUInt", 288 | "JSONHas", 289 | "JSONKey", 290 | "JSONLength", 291 | "JSONType", 292 | "jumpConsistentHash", 293 | "kurtPop", 294 | "kurtSamp", 295 | "lcase", 296 | "lcm", 297 | "least", 298 | "length", 299 | "lengthUTF8", 300 | "less", 301 | "lessOrEquals", 302 | "lgamma", 303 | "like", 304 | "ln", 305 | "locate", 306 | "log", 307 | "log10", 308 | "log2", 309 | "lowCardinalityIndices", 310 | "lowCardinalityKeys", 311 | "lower", 312 | "lowerUTF8", 313 | "MACNumToString", 314 | "MACStringToNum", 315 | "MACStringToOUI", 316 | "match", 317 | "materialize", 318 | "max", 319 | "maxIntersections", 320 | "maxIntersectionsPosition", 321 | "MD5", 322 | "median", 323 | "medianDeterministic", 324 | "medianExact", 325 | "medianExactWeighted", 326 | "medianTDigest", 327 | "medianTDigestWeighted", 328 | "medianTiming", 329 | "medianTimingWeighted", 330 | "metroHash64", 331 | "mid", 332 | "min", 333 | "minus", 334 | "modelEvaluate", 335 | "modulo", 336 | "multiFuzzyMatchAny", 337 | "multiFuzzyMatchAnyIndex", 338 | "multiIf", 339 | "multiMatchAny", 340 | "multiMatchAnyIndex", 341 | "multiply", 342 | "multiSearchAllPositions", 343 | "multiSearchAllPositionsCaseInsensitive", 344 | "multiSearchAllPositionsCaseInsensitiveUTF8", 345 | "multiSearchAllPositionsUTF8", 346 | "multiSearchAny", 347 | "multiSearchAnyCaseInsensitive", 348 | "multiSearchAnyCaseInsensitiveUTF8", 349 | "multiSearchAnyUTF8", 350 | "multiSearchFirstIndex", 351 | "multiSearchFirstIndexCaseInsensitive", 352 | "multiSearchFirstIndexCaseInsensitiveUTF8", 353 | "multiSearchFirstIndexUTF8", 354 | "multiSearchFirstPosition", 355 | "multiSearchFirstPositionCaseInsensitive", 356 | "multiSearchFirstPositionCaseInsensitiveUTF8", 357 | "multiSearchFirstPositionUTF8", 358 | "murmurHash2_32", 359 | "murmurHash2_64", 360 | "murmurHash3_128", 361 | "murmurHash3_32", 362 | "murmurHash3_64", 363 | "negate", 364 | "ngramDistance", 365 | "ngramDistanceCaseInsensitive", 366 | "ngramDistanceCaseInsensitiveUTF8", 367 | "ngramDistanceUTF8", 368 | "ngramSearch", 369 | "ngramSearchCaseInsensitive", 370 | "ngramSearchCaseInsensitiveUTF8", 371 | "ngramSearchUTF8", 372 | "not", 373 | "notEmpty", 374 | "notEquals", 375 | "notIn", 376 | "notLike", 377 | "now", 378 | "nullIf", 379 | "or", 380 | "OSHierarchy", 381 | "OSIn", 382 | "OSToRoot", 383 | "parseDateTimeBestEffort", 384 | "parseDateTimeBestEffortOrNull", 385 | "parseDateTimeBestEffortOrZero", 386 | "path", 387 | "pathFull", 388 | "pi", 389 | "plus", 390 | "pointInEllipses", 391 | "pointInPolygon", 392 | "position", 393 | "positionCaseInsensitive", 394 | "positionCaseInsensitiveUTF8", 395 | "positionUTF8", 396 | "pow", 397 | "power", 398 | "protocol", 399 | "quantile", 400 | "quantileDeterministic", 401 | "quantileExact", 402 | "quantileExactWeighted", 403 | "quantiles", 404 | "quantilesDeterministic", 405 | "quantilesExact", 406 | "quantilesExactWeighted", 407 | "quantilesTDigest", 408 | "quantilesTDigestWeighted", 409 | "quantilesTiming", 410 | "quantilesTimingWeighted", 411 | "quantileTDigest", 412 | "quantileTDigestWeighted", 413 | "quantileTiming", 414 | "quantileTimingWeighted", 415 | "queryString", 416 | "queryStringAndFragment", 417 | "rand", 418 | "rand64", 419 | "randConstant", 420 | "range", 421 | "regexpQuoteMeta", 422 | "regionHierarchy", 423 | "regionIn", 424 | "regionToArea", 425 | "regionToCity", 426 | "regionToContinent", 427 | "regionToCountry", 428 | "regionToDistrict", 429 | "regionToName", 430 | "regionToPopulation", 431 | "regionToTopContinent", 432 | "reinterpretAsDate", 433 | "reinterpretAsDateTime", 434 | "reinterpretAsFixedString", 435 | "reinterpretAsFloat32", 436 | "reinterpretAsFloat64", 437 | "reinterpretAsInt16", 438 | "reinterpretAsInt32", 439 | "reinterpretAsInt64", 440 | "reinterpretAsInt8", 441 | "reinterpretAsString", 442 | "reinterpretAsUInt16", 443 | "reinterpretAsUInt32", 444 | "reinterpretAsUInt64", 445 | "reinterpretAsUInt8", 446 | "replace", 447 | "replaceAll", 448 | "replaceOne", 449 | "replaceRegexpAll", 450 | "replaceRegexpOne", 451 | "replicate", 452 | "retention", 453 | "reverse", 454 | "reverseUTF8", 455 | "round", 456 | "roundAge", 457 | "roundDown", 458 | "roundDuration", 459 | "roundToExp2", 460 | "rowNumberInAllBlocks", 461 | "rowNumberInBlock", 462 | "runningAccumulate", 463 | "runningDifference", 464 | "runningDifferenceStartingWithFirstValue", 465 | "SEHierarchy", 466 | "SEIn", 467 | "sequenceCount", 468 | "sequenceMatch", 469 | "SEToRoot", 470 | "SHA1", 471 | "SHA224", 472 | "SHA256", 473 | "simpleLinearRegression", 474 | "sin", 475 | "sipHash128", 476 | "sipHash64", 477 | "skewPop", 478 | "skewSamp", 479 | "sleep", 480 | "sleepEachRow", 481 | "splitByChar", 482 | "splitByString", 483 | "sqrt", 484 | "startsWith", 485 | "STDDEV_POP", 486 | "STDDEV_SAMP", 487 | "stddevPop", 488 | "stddevPopStable", 489 | "stddevSamp", 490 | "stddevSampStable", 491 | "stochasticLinearRegression", 492 | "stochasticLogisticRegression", 493 | "substr", 494 | "substring", 495 | "substringUTF8", 496 | "subtractDays", 497 | "subtractHours", 498 | "subtractMinutes", 499 | "subtractMonths", 500 | "subtractQuarters", 501 | "subtractSeconds", 502 | "subtractWeeks", 503 | "subtractYears", 504 | "sum", 505 | "sumburConsistentHash", 506 | "sumKahan", 507 | "sumMap", 508 | "sumMapFiltered", 509 | "sumMapFilteredWithOverflow", 510 | "sumMapWithOverflow", 511 | "sumWithOverflow", 512 | "tan", 513 | "tgamma", 514 | "throwIf", 515 | "timeSeriesGroupRateSum", 516 | "timeSeriesGroupSum", 517 | "timeSlot", 518 | "timeSlots", 519 | "timezone", 520 | "toColumnTypeName", 521 | "toDate", 522 | "toDateOrNull", 523 | "toDateOrZero", 524 | "toDateTime", 525 | "toDateTimeOrNull", 526 | "toDateTimeOrZero", 527 | "today", 528 | "toDayOfMonth", 529 | "toDayOfWeek", 530 | "toDayOfYear", 531 | "toDecimal128", 532 | "toDecimal128OrNull", 533 | "toDecimal128OrZero", 534 | "toDecimal32", 535 | "toDecimal32OrNull", 536 | "toDecimal32OrZero", 537 | "toDecimal64", 538 | "toDecimal64OrNull", 539 | "toDecimal64OrZero", 540 | "toFixedString", 541 | "toFloat32", 542 | "toFloat32OrNull", 543 | "toFloat32OrZero", 544 | "toFloat64", 545 | "toFloat64OrNull", 546 | "toFloat64OrZero", 547 | "toHour", 548 | "toInt16", 549 | "toInt16OrNull", 550 | "toInt16OrZero", 551 | "toInt32", 552 | "toInt32OrNull", 553 | "toInt32OrZero", 554 | "toInt64", 555 | "toInt64OrNull", 556 | "toInt64OrZero", 557 | "toInt8", 558 | "toInt8OrNull", 559 | "toInt8OrZero", 560 | "toIntervalDay", 561 | "toIntervalHour", 562 | "toIntervalMinute", 563 | "toIntervalMonth", 564 | "toIntervalQuarter", 565 | "toIntervalSecond", 566 | "toIntervalWeek", 567 | "toIntervalYear", 568 | "toIPv4", 569 | "toIPv6", 570 | "toISOWeek", 571 | "toISOYear", 572 | "toLowCardinality", 573 | "toMinute", 574 | "toMonday", 575 | "toMonth", 576 | "toNullable", 577 | "topK", 578 | "topKWeighted", 579 | "topLevelDomain", 580 | "toQuarter", 581 | "toRelativeDayNum", 582 | "toRelativeHourNum", 583 | "toRelativeMinuteNum", 584 | "toRelativeMonthNum", 585 | "toRelativeQuarterNum", 586 | "toRelativeSecondNum", 587 | "toRelativeWeekNum", 588 | "toRelativeYearNum", 589 | "toSecond", 590 | "toStartOfDay", 591 | "toStartOfFifteenMinutes", 592 | "toStartOfFiveMinute", 593 | "toStartOfHour", 594 | "toStartOfInterval", 595 | "toStartOfISOYear", 596 | "toStartOfMinute", 597 | "toStartOfMonth", 598 | "toStartOfQuarter", 599 | "toStartOfTenMinutes", 600 | "toStartOfYear", 601 | "toString", 602 | "toStringCutToZero", 603 | "toTime", 604 | "toTimeZone", 605 | "toTypeName", 606 | "toUInt16", 607 | "toUInt16OrNull", 608 | "toUInt16OrZero", 609 | "toUInt32", 610 | "toUInt32OrNull", 611 | "toUInt32OrZero", 612 | "toUInt64", 613 | "toUInt64OrNull", 614 | "toUInt64OrZero", 615 | "toUInt8", 616 | "toUInt8OrNull", 617 | "toUInt8OrZero", 618 | "toUnixTimestamp", 619 | "toUUID", 620 | "toValidUTF8", 621 | "toYear", 622 | "toYYYYMM", 623 | "toYYYYMMDD", 624 | "toYYYYMMDDhhmmss", 625 | "transform", 626 | "trimBoth", 627 | "trimLeft", 628 | "trimRight", 629 | "trunc", 630 | "truncate", 631 | "tryBase64Decode", 632 | "tuple", 633 | "tupleElement", 634 | "ucase", 635 | "unhex", 636 | "uniq", 637 | "uniqCombined", 638 | "uniqExact", 639 | "uniqHLL12", 640 | "uniqUpTo", 641 | "upper", 642 | "upperUTF8", 643 | "uptime", 644 | "URLHash", 645 | "URLHierarchy", 646 | "URLPathHierarchy", 647 | "UUIDNumToString", 648 | "UUIDStringToNum", 649 | "VAR_POP", 650 | "VAR_SAMP", 651 | "varPop", 652 | "varPopStable", 653 | "varSamp", 654 | "varSampStable", 655 | "version", 656 | "visibleWidth", 657 | "visitParamExtractBool", 658 | "visitParamExtractFloat", 659 | "visitParamExtractInt", 660 | "visitParamExtractRaw", 661 | "visitParamExtractString", 662 | "visitParamExtractUInt", 663 | "visitParamHas", 664 | "windowFunnel", 665 | "xor", 666 | "xxHash32", 667 | "xxHash64", 668 | "yandexConsistentHash", 669 | "yesterday", 670 | ) 671 | 672 | CASE_INSENSITIVE_FUNCTIONS = ( 673 | "COUNT", 674 | "CORR", 675 | "VAR_SAMP", 676 | "VAR_POP", 677 | "STDDEV_SAMP", 678 | "STDDEV_POP", 679 | "COVAR_SAMP", 680 | "COVAR_POP", 681 | "AVG", 682 | "SUM", 683 | "MIN", 684 | "MAX", 685 | ) 686 | 687 | AGGREGATION_FUNCTIONS_BASE = ( 688 | "any", 689 | "anyHeavy", 690 | "anyLast", 691 | "argMax", 692 | "argMin", 693 | "avg", 694 | "corr", 695 | "count", 696 | "covarPop", 697 | "covarSamp", 698 | "groupArray", 699 | "groupArrayInsertAt", 700 | "groupBitAnd", 701 | "groupBitmap", 702 | "groupBitOr", 703 | "groupBitXor", 704 | "groupUniqArray", 705 | "kurtPop", 706 | "kurtSamp", 707 | "max", 708 | "median", 709 | "min", 710 | "quantile", 711 | "quantileDeterministic", 712 | "quantileExact", 713 | "quantileExactWeighted", 714 | "quantiles", 715 | "quantileTDigest", 716 | "quantileTiming", 717 | "quantileTimingWeighted", 718 | "simpleLinearRegression", 719 | "skewPop", 720 | "skewSamp", 721 | "stddevPop", 722 | "stddevSamp", 723 | "stochasticLinearRegression", 724 | "stochasticLogisticRegression", 725 | "sum", 726 | "sumMap", 727 | "sumWithOverflow", 728 | "timeSeriesGroupRateSum", 729 | "timeSeriesGroupSum", 730 | "topK", 731 | "uniq", 732 | "uniqCombined", 733 | "uniqExact", 734 | "uniqHLL12", 735 | "varPop", 736 | "varSamp", 737 | ) 738 | 739 | AGGREGATION_FUNCTIONS = ( 740 | AGGREGATION_FUNCTIONS_BASE 741 | + tuple(name + "If" for name in AGGREGATION_FUNCTIONS_BASE) 742 | + tuple(name + "Array" for name in AGGREGATION_FUNCTIONS_BASE) 743 | + tuple(name + "Merge" for name in AGGREGATION_FUNCTIONS_BASE) 744 | + tuple(name + "State" for name in AGGREGATION_FUNCTIONS_BASE) 745 | + tuple(name + "MergeState" for name in AGGREGATION_FUNCTIONS_BASE) 746 | + tuple(name + "forEach" for name in AGGREGATION_FUNCTIONS_BASE) 747 | ) 748 | 749 | DATATYPES = ( 750 | "UInt8", 751 | "UInt16", 752 | "UInt32", 753 | "UInt64", 754 | "Int8", 755 | "Int16", 756 | "Int32", 757 | "Int64", 758 | "Float32", 759 | "Float64", 760 | "Decimal", 761 | "Boolean", 762 | "UUID", 763 | "String", 764 | "FixedString", 765 | "Date", 766 | "DateTime", 767 | "Enum", 768 | "Array", 769 | "Nullable", 770 | "Tuple", 771 | "Nested", 772 | ) 773 | 774 | OPERATORS = ( 775 | "LIKE", 776 | "NOT LIKE", 777 | "IN", 778 | "NOT IN", 779 | "GLOBAL IN", 780 | "GLOBAL NOT IN", 781 | "AND", 782 | "OR", 783 | "NOT", 784 | "BETWEEN", 785 | ) 786 | 787 | PRETTY_FORMATS = ( 788 | "Pretty", 789 | "PrettyCompact", 790 | "PrettyCompactMonoBlock", 791 | "PrettyCompactNoEscapes", 792 | "PrettyNoEscapes", 793 | "PrettySpace", 794 | "PrettySpaceNoEscapes", 795 | ) 796 | 797 | FORMATS = PRETTY_FORMATS + ( 798 | "TabSeparated", 799 | "TabSeparatedRaw", 800 | "TabSeparatedWithNames", 801 | "TabSeparatedWithNamesAndTypes", 802 | "CSV", 803 | "CSVWithNames", 804 | "Values", 805 | "Vertical", 806 | "JSON", 807 | "JSONCompact", 808 | "JSONEachRow", 809 | "TSKV", 810 | "Protobuf", 811 | "Parquet", 812 | "RowBinary", 813 | "Native", 814 | "Null", 815 | "XML", 816 | "CapnProto", 817 | ) 818 | 819 | READ_QUERIES = ( 820 | "SELECT", 821 | "SHOW", 822 | "DESC", 823 | "DESCRIBE", 824 | "USE", 825 | "EXISTS", 826 | "WITH", 827 | ) 828 | 829 | WRITE_QUERIES = ( 830 | "INSERT", 831 | "CREATE", 832 | "ATTACH", 833 | "DETACH", 834 | "DROP", 835 | "RENAME", 836 | "ALTER", 837 | "SET", 838 | "OPTIMIZE", 839 | ) 840 | 841 | FORMATTABLE_QUERIES = ( 842 | "INSERT", 843 | "SELECT", 844 | "WITH", 845 | "SHOW", 846 | "DESC", 847 | "DESCRIBE", 848 | "EXISTS", 849 | ) 850 | 851 | KEYWORDS = tuple(sqlparse_keywords.keys()) 852 | 853 | EXIT_COMMANDS = ( 854 | "exit", 855 | "quit", 856 | "logout", 857 | "учше", 858 | "йгше", 859 | "дщпщге", 860 | "exit;", 861 | "quit;", 862 | "logout;", 863 | "учшеж", 864 | "йгшеж", 865 | "дщпщгеж", 866 | "q", 867 | "й", 868 | r"\q", 869 | r"\Q", 870 | ":q", 871 | r"\й", 872 | r"\Й", 873 | "Жй", 874 | ) 875 | 876 | CREATE_SUBCOMMANDS = ( 877 | "DATABASE", 878 | "TABLE", 879 | "VIEW", 880 | ) 881 | 882 | DROP_SUBCOMMANDS = ("DATABASE", "TABLE") 883 | 884 | SHOW_SUBCOMMANDS = ( 885 | "DATABASES", 886 | "TABLES", 887 | "PROCESSLIST", 888 | "CREATE TABLE", 889 | ) 890 | 891 | HELP_COMMANDS = ( 892 | "help", 893 | r"\?", 894 | ) 895 | 896 | REDIRECTION_COMMANDS = ( 897 | r"\d", 898 | r"\d+", 899 | r"\dt", 900 | r"\c", 901 | r"\l", 902 | r"\ps", 903 | r"\kill", 904 | ) 905 | 906 | INTERNAL_COMMANDS = EXIT_COMMANDS + HELP_COMMANDS + REDIRECTION_COMMANDS 907 | -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse/exceptions.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class DBException(Exception): 5 | regex = ( 6 | r"Code: (?P\d+), e\.displayText\(\) = ([\w:]+: )?(?P[\w\W]+),\s+" 7 | r"e\.what\(\) = (?P[\w:]+)(,\s+)?" 8 | r"(Stack trace:\n\n(?P[\w\W]*)\n)?" 9 | ) 10 | 11 | def __init__(self, response, query): 12 | self.response = response 13 | self.query = query 14 | self.error_code = 0 15 | self.error = "" 16 | self.stacktrace = "" 17 | 18 | try: 19 | info = re.search(self.regex, response.text).groupdict() 20 | self.error_code = info["code"] 21 | self.error = info["text"] 22 | self.stacktrace = info["stacktrace"] or "" 23 | except Exception: 24 | self.error = self.response.text 25 | 26 | def __str__(self): 27 | return "Query:\n{0}\n\nResponse:\n{1}".format(self.query, self.response.text) 28 | 29 | 30 | class TimeoutError(Exception): 31 | pass 32 | 33 | 34 | class ConnectionError(Exception): 35 | pass 36 | -------------------------------------------------------------------------------- /clickhouse_cli/clickhouse/sqlparse_patch.py: -------------------------------------------------------------------------------- 1 | from sqlparse import tokens 2 | 3 | KEYWORDS = { 4 | "ADD": tokens.Keyword, 5 | "AFTER": tokens.Keyword, 6 | "ALIAS": tokens.Keyword, 7 | "ALL": tokens.Keyword, 8 | "ALTER": tokens.Keyword.DDL, 9 | "AND": tokens.Keyword, 10 | "ANY": tokens.Keyword, 11 | "ARRAY": tokens.Keyword, 12 | "AS": tokens.Keyword, 13 | "ASC": tokens.Keyword.Order, 14 | "ATTACH": tokens.Keyword.DDL, 15 | "BETWEEN": tokens.Keyword, 16 | "BY": tokens.Keyword, 17 | "CASE": tokens.Keyword, 18 | "CAST": tokens.Keyword, 19 | "CHECK": tokens.Keyword.DDL, 20 | "COLUMN": tokens.Keyword, 21 | "COPY": tokens.Keyword, 22 | "CREATE": tokens.Keyword.DDL, 23 | "DATABASE": tokens.Keyword, 24 | "DATABASES": tokens.Keyword, 25 | # 'DEFAULT': tokens.Keyword, 26 | "DELETE": tokens.Keyword.DML, 27 | "DESC": tokens.Keyword, 28 | "DESCRIBE": tokens.Keyword.DDL, 29 | "DETACH": tokens.Keyword.DDL, 30 | "DISTINCT": tokens.Keyword, 31 | "DROP": tokens.Keyword.DDL, 32 | "ELSE": tokens.Keyword, 33 | "END": tokens.Keyword, 34 | "ENGINE": tokens.Keyword, 35 | "EXISTS": tokens.Keyword, 36 | "FALSE": tokens.Keyword, 37 | "FETCH": tokens.Keyword, 38 | "FINAL": tokens.Keyword, 39 | "FIRST": tokens.Keyword, 40 | "FORMAT": tokens.Keyword, 41 | "FREEZE": tokens.Keyword, 42 | "FROM": tokens.Keyword, 43 | "FULL": tokens.Keyword, 44 | "GLOBAL": tokens.Keyword, 45 | "GROUP": tokens.Keyword, 46 | "HAVING": tokens.Keyword, 47 | "IF": tokens.Keyword, 48 | "IN": tokens.Keyword, 49 | "INNER": tokens.Keyword, 50 | "INSERT": tokens.Keyword.DML, 51 | "INTO": tokens.Keyword, 52 | "IS": tokens.Keyword, 53 | "JOIN": tokens.Keyword, 54 | "KEY": tokens.Keyword, 55 | "LEFT": tokens.Keyword, 56 | "LIKE": tokens.Keyword, 57 | "LIMIT": tokens.Keyword, 58 | "MATERIALIZED": tokens.Keyword, 59 | "MODIFY": tokens.Keyword, 60 | "NAME": tokens.Keyword, 61 | "NOT": tokens.Keyword, 62 | "OF": tokens.Keyword, 63 | "ON": tokens.Keyword, 64 | "OPTIMIZE": tokens.Keyword.DDL, 65 | "OR": tokens.Keyword, 66 | "ORDER": tokens.Keyword, 67 | "OUTER": tokens.Keyword, 68 | "OUTFILE": tokens.Keyword, 69 | "PART": tokens.Keyword, 70 | "PARTITION": tokens.Keyword, 71 | "POPULATE": tokens.Keyword, 72 | "PREWHERE": tokens.Keyword, 73 | "PRIMARY": tokens.Keyword, 74 | "RENAME": tokens.Keyword.DDL, 75 | "RESHARD": tokens.Keyword, 76 | "RIGHT": tokens.Keyword, 77 | "SELECT": tokens.Keyword.DML, 78 | "SET": tokens.Keyword.DDL, 79 | "SETTINGS": tokens.Keyword, 80 | "SHOW": tokens.Keyword.DDL, 81 | "TABLE": tokens.Keyword, 82 | "TABLES": tokens.Keyword, 83 | "TEMPORARY": tokens.Keyword, 84 | "THEN": tokens.Keyword, 85 | "TO": tokens.Keyword, 86 | "TOTALS": tokens.Keyword, 87 | "UNION": tokens.Keyword, 88 | "UNREPLICATED": tokens.Keyword, 89 | "USE": tokens.Keyword.DDL, 90 | "USING": tokens.Keyword, 91 | "VALUES": tokens.Keyword, 92 | "VIEW": tokens.Keyword, 93 | "WHEN": tokens.Keyword, 94 | "WHERE": tokens.Keyword, 95 | "WITH": tokens.Keyword, 96 | } 97 | -------------------------------------------------------------------------------- /clickhouse_cli/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from configparser import ConfigParser 4 | 5 | from clickhouse_cli.ui.style import Echo 6 | 7 | PACKAGE_ROOT = os.path.dirname(__file__) 8 | DEFAULT_CONFIG = os.path.join(PACKAGE_ROOT, "clickhouse-cli.rc.sample") 9 | USER_CONFIG = "~/.clickhouse-cli.rc" 10 | 11 | 12 | echo = Echo() 13 | 14 | 15 | def read_config(): 16 | config = ConfigParser() 17 | try: 18 | config.read_file(open(DEFAULT_CONFIG)) 19 | config.read([os.path.expanduser(USER_CONFIG)]) 20 | except (IOError, OSError) as e: 21 | echo.warning("You don't have permission to read '{0}'.".format(e.filename)) 22 | return 23 | 24 | return config 25 | 26 | 27 | def write_default_config(source, destination, overwrite=False): 28 | destination = os.path.expanduser(destination) 29 | if not overwrite and os.path.exists(destination): 30 | return 31 | 32 | shutil.copyfile(source, destination) 33 | -------------------------------------------------------------------------------- /clickhouse_cli/helpers.py: -------------------------------------------------------------------------------- 1 | import email.parser 2 | import http.client 3 | import io 4 | 5 | 6 | def sizeof_fmt(num, suffix="B"): 7 | for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: 8 | if abs(num) < 1024.0: 9 | return "%3.1f%s%s" % (num, unit, suffix) 10 | num /= 1024.0 11 | return "%.1f%s%s" % (num, "Yi", suffix) 12 | 13 | 14 | def numberunit_fmt(num): 15 | if not num: 16 | return "0" 17 | 18 | for unit in ["", "thousand", "million", "billion", "trillion"]: 19 | if abs(num) < 1000.0: 20 | return ("%3.1f %s" % (num, unit)).strip() 21 | num /= 1000.0 22 | return "%.1f %s" % (num, "quadrillion") 23 | 24 | 25 | def trace_headers_stream(*args): 26 | pass 27 | 28 | 29 | def parse_headers_stream(fp, _class=http.client.HTTPMessage): 30 | """A modified version of http.client.parse_headers.""" 31 | headers = [] 32 | 33 | while True: 34 | line = fp.readline(http.client._MAXLINE + 1) 35 | if len(line) > http.client._MAXLINE: 36 | raise http.client.LineTooLong("header line") 37 | 38 | if line.startswith(b"X-ClickHouse-Progress: ") and trace_headers_stream: 39 | trace_headers_stream(line) 40 | else: 41 | headers.append(line) 42 | 43 | # _MAXHEADERS check was removed here since ClickHouse may send a lot of Progress headers. 44 | 45 | if line in (b"\r\n", b"\n", b""): 46 | break 47 | hstring = b"".join(headers).decode("iso-8859-1") 48 | return email.parser.Parser(_class=_class).parsestr(hstring) 49 | 50 | 51 | def chain_streams(streams, buffer_size=io.DEFAULT_BUFFER_SIZE): 52 | """ 53 | https://stackoverflow.com/questions/24528278/stream-multiple-files-into-a-readable-object-in-python 54 | 55 | Chain an iterable of streams together into a single buffered stream. 56 | Usage: 57 | def generate_open_file_streams(): 58 | for file in filenames: 59 | yield open(file, 'rb') 60 | f = chain_streams(generate_open_file_streams()) 61 | f.read() 62 | """ 63 | 64 | class ChainStream(io.RawIOBase): 65 | def __init__(self): 66 | self.leftover = b"" 67 | self.stream_iter = iter(streams) 68 | try: 69 | self.stream = next(self.stream_iter) 70 | except StopIteration: 71 | self.stream = None 72 | 73 | def readable(self): 74 | return True 75 | 76 | def _read_next_chunk(self, max_length): 77 | # Return 0 or more bytes from the current stream, first returning all 78 | # leftover bytes. If the stream is closed returns b'' 79 | if self.leftover: 80 | return self.leftover 81 | elif self.stream is not None: 82 | return self.stream.read(max_length) 83 | else: 84 | return b"" 85 | 86 | def readinto(self, b): 87 | buffer_length = len(b) 88 | chunk = self._read_next_chunk(buffer_length) 89 | while len(chunk) == 0: 90 | # move to next stream 91 | if self.stream is not None: 92 | self.stream.close() 93 | try: 94 | self.stream = next(self.stream_iter) 95 | chunk = self._read_next_chunk(buffer_length) 96 | except StopIteration: 97 | # No more streams to chain together 98 | self.stream = None 99 | return 0 # indicate EOF 100 | output, self.leftover = chunk[:buffer_length], chunk[buffer_length:] 101 | b[: len(output)] = output 102 | return len(output) 103 | 104 | return io.BufferedReader(ChainStream(), buffer_size=buffer_size) 105 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatarist/clickhouse-cli/81450224395407e101bb0a219c5b85c2e6ad7594/clickhouse_cli/ui/__init__.py -------------------------------------------------------------------------------- /clickhouse_cli/ui/completer.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import re 3 | from collections import OrderedDict, defaultdict, namedtuple 4 | from itertools import count 5 | 6 | from prompt_toolkit.completion import Completer, Completion, PathCompleter 7 | from prompt_toolkit.document import Document 8 | 9 | from clickhouse_cli.clickhouse.definitions import DATATYPES, FORMATS, FUNCTIONS, KEYWORDS 10 | from clickhouse_cli.ui.parseutils.helpers import ( 11 | Alias, 12 | Candidate, 13 | Column, 14 | Database, 15 | Datatype, 16 | Format, 17 | FromClauseItem, 18 | Function, 19 | Join, 20 | JoinCondition, 21 | Keyword, 22 | Match, 23 | Path, 24 | PrevalenceCounter, 25 | Schema, 26 | SchemaObject, 27 | Special, 28 | Table, 29 | View, 30 | _Candidate, 31 | generate_alias, 32 | normalize_ref, 33 | suggest_type, 34 | ) 35 | from clickhouse_cli.ui.parseutils.meta import ColumnMetadata, ForeignKey 36 | from clickhouse_cli.ui.parseutils.tables import TableReference 37 | from clickhouse_cli.ui.parseutils.utils import last_word 38 | 39 | 40 | class CHCompleter(Completer): 41 | def __init__(self, client, metadata): 42 | super(CHCompleter, self).__init__() 43 | self.client = client 44 | self.smart_completion = True 45 | self.prioritizer = PrevalenceCounter() 46 | self.qualify_columns = "always" # 'if_more_than_one_table' 47 | self.asterisk_column_order = "table_order" 48 | self.keyword_casing = "upper" 49 | 50 | self.search_path_filter = None 51 | self.generate_aliases = None 52 | self.casing_file = None 53 | self.generate_casing_file = None 54 | 55 | self.search_path = [] 56 | self.casing = {} 57 | self.reserved_words = set() 58 | for x in KEYWORDS: 59 | self.reserved_words.update(x.split()) 60 | self.name_pattern = re.compile(r"^[_a-z][_a-z0-9\$]*$") 61 | 62 | self.metadata = metadata 63 | self.metadata["all"] = set(KEYWORDS + FUNCTIONS) 64 | self.metadata["databases"] = {} 65 | self.metadata["tables"] = {} 66 | self.metadata["views"] = {} 67 | self.metadata["functions"] = {} 68 | self.metadata["datatypes"] = DATATYPES 69 | 70 | def _select(self, query, flatten=True, *args, **kwargs): 71 | data = self.client.query(query, fmt="TabSeparated").data 72 | if data is not None: 73 | return [row if flatten else row.split("\t") for row in data.rstrip("\n").split("\n")] 74 | 75 | def get_completion(self, word, keywords, ignore_case=False, suffix=""): 76 | for keyword in keywords: 77 | if ignore_case: 78 | k = self._match(word.lower(), keyword.lower()) 79 | else: 80 | k = self._match(word, keyword) 81 | 82 | if k is not None: 83 | yield Completion(keyword + suffix, -len(word)) 84 | 85 | def get_single_match(self, word, match): 86 | return [Completion(match, -len(word))] 87 | 88 | def refresh_metadata(self): 89 | try: 90 | self.metadata["databases"] = self.get_databases() 91 | self.metadata["tables"] = self.get_tables_and_columns() 92 | self.metadata["views"] = {} 93 | self.metadata["functions"] = {} 94 | self.metadata["datatypes"] = DATATYPES 95 | except Exception: 96 | pass # We don't want to brag about the broken autocompletion 97 | 98 | def get_tables_and_columns(self): 99 | data = self._select("SELECT database, table, name, type FROM system.columns;", flatten=False) 100 | result = defaultdict(dict) 101 | 102 | class Col(object): 103 | def __init__(self, name, datatype): 104 | self.name = name 105 | self.datatype = datatype 106 | 107 | def values(self): 108 | return [self] 109 | 110 | for database, table, name, datatype in data: 111 | if table not in result[database]: 112 | result[database][table] = {} 113 | 114 | result[database][table][name] = Col(name, datatype) 115 | 116 | return result 117 | 118 | def get_tables(self, database=None): 119 | if database is None: 120 | return self._select("SHOW TABLES") 121 | else: 122 | return self._select("SHOW TABLES FROM {}".format(database)) 123 | 124 | def get_databases(self): 125 | return self._select("SHOW DATABASES") 126 | 127 | def get_table_field_names(self, table, database=None): 128 | if database is None: 129 | result = self._select("DESCRIBE TABLE {}".format(table), flatten=False) 130 | else: 131 | result = self._select("DESCRIBE TABLE {}.{}".format(database, table), flatten=False) 132 | return [field[0] for field in result] 133 | 134 | def escape_name(self, name): 135 | if name and ( 136 | not self.name_pattern.match(name) or name.upper() in self.reserved_words or name.upper() in FUNCTIONS 137 | ): 138 | name = '"%s"' % name 139 | 140 | return name 141 | 142 | def unescape_name(self, name): 143 | """Unquote a string.""" 144 | if name and name[0] == '"' and name[-1] == '"': 145 | name = name[1:-1] 146 | 147 | return name 148 | 149 | def escaped_names(self, names): 150 | return [self.escape_name(name) for name in names] 151 | 152 | def extend_database_names(self, databases): 153 | databases = self.escaped_names(databases) 154 | self.metadata["databases"].extend(databases) 155 | 156 | def extend_keywords(self, additional_keywords): 157 | KEYWORDS.extend(additional_keywords) 158 | self.metadata["all"].update(additional_keywords) 159 | 160 | def extend_schemata(self, schemata): 161 | # FIXME 162 | 163 | # # schemata is a list of schema names 164 | # schemata = self.escaped_names(schemata) 165 | # metadata = self.metadata['tables'] 166 | # for schema in schemata: 167 | # metadata[schema] = {} 168 | 169 | # # dbmetadata.values() are the 'tables' and 'functions' dicts 170 | # for metadata in self.metadata.values(): 171 | # for schema in schemata: 172 | # metadata[schema] = {} 173 | 174 | # self.metadata['all'].update(schemata) 175 | 176 | return self.metadata["databases"] 177 | 178 | def extend_casing(self, words): 179 | """extend casing data 180 | 181 | :return: 182 | """ 183 | # casing should be a dict {lowercasename:PreferredCasingName} 184 | self.casing = dict((word.lower(), word) for word in words) 185 | 186 | def extend_relations(self, data, kind): 187 | """extend metadata for tables or views 188 | 189 | :param data: list of (schema_name, rel_name) tuples 190 | :param kind: either 'tables' or 'views' 191 | :return: 192 | """ 193 | 194 | data = [self.escaped_names(d) for d in data] 195 | 196 | # dbmetadata['tables']['schema_name']['table_name'] should be an 197 | # OrderedDict {column_name:ColumnMetaData}. 198 | metadata = self.metadata[kind] 199 | for schema, relname in data: 200 | try: 201 | metadata[schema][relname] = OrderedDict() 202 | except KeyError: 203 | pass 204 | self.metadata["all"].add(relname) 205 | 206 | def extend_columns(self, column_data, kind): 207 | """extend column metadata 208 | 209 | :param column_data: list of (schema_name, rel_name, column_name, column_type) tuples 210 | :param kind: either 'tables' or 'views' 211 | :return: 212 | """ 213 | metadata = self.metadata[kind] 214 | for schema, relname, colname, datatype in column_data: 215 | (schema, relname, colname) = self.escaped_names([schema, relname, colname]) 216 | column = ColumnMetadata(name=colname, datatype=datatype, foreignkeys=[]) 217 | metadata[schema][relname][colname] = column 218 | self.metadata["all"].add(colname) 219 | 220 | def extend_functions(self, func_data): 221 | # func_data is a list of function metadata namedtuples 222 | # with fields schema_name, func_name, arg_list, result, 223 | # is_aggregate, is_window, is_set_returning 224 | 225 | # dbmetadata['schema_name']['functions']['function_name'] should return 226 | # the function metadata namedtuple for the corresponding function 227 | metadata = self.metadata["functions"] 228 | 229 | for f in func_data: 230 | schema, func = self.escaped_names([f.schema_name, f.func_name]) 231 | 232 | if func in metadata[schema]: 233 | metadata[schema][func].append(f) 234 | else: 235 | metadata[schema][func] = [f] 236 | 237 | self.metadata["all"].add(func) 238 | 239 | def extend_foreignkeys(self, fk_data): 240 | # fk_data is a list of ForeignKey namedtuples, with fields 241 | # parentschema, childschema, parenttable, childtable, 242 | # parentcolumns, childcolumns 243 | 244 | # These are added as a list of ForeignKey namedtuples to the 245 | # ColumnMetadata namedtuple for both the child and parent 246 | meta = self.metadata["tables"] 247 | 248 | for fk in fk_data: 249 | e = self.escaped_names 250 | parentschema, childschema = e([fk.parentschema, fk.childschema]) 251 | parenttable, childtable = e([fk.parenttable, fk.childtable]) 252 | childcol, parcol = e([fk.childcolumn, fk.parentcolumn]) 253 | childcolmeta = meta[childschema][childtable][childcol] 254 | parcolmeta = meta[parentschema][parenttable][parcol] 255 | fk = ForeignKey(parentschema, parenttable, parcol, childschema, childtable, childcol) 256 | childcolmeta.foreignkeys.append((fk)) 257 | parcolmeta.foreignkeys.append((fk)) 258 | 259 | def extend_datatypes(self, type_data): 260 | # dbmetadata['datatypes'][schema_name][type_name] should store type 261 | # metadata, such as composite type field names. Currently, we're not 262 | # storing any metadata beyond typename, so just store None 263 | meta = self.metadata["datatypes"] 264 | 265 | for t in type_data: 266 | schema, type_name = self.escaped_names(t) 267 | meta[schema][type_name] = None 268 | self.metadata["all"].add(type_name) 269 | 270 | def extend_query_history(self, text, is_init=False): 271 | if is_init: 272 | # During completer initialization, only load keyword preferences, 273 | # not names 274 | self.prioritizer.update_keywords(text) 275 | else: 276 | self.prioritizer.update(text) 277 | 278 | def set_search_path(self, search_path): 279 | self.search_path = self.escaped_names(search_path) 280 | 281 | def reset_completions(self): 282 | self.special_commands = [] 283 | self.search_path = [] 284 | self.metadata["databases"] = [] 285 | self.metadata["tables"] = {} 286 | self.metadata["views"] = {} 287 | self.metadata["functions"] = {} 288 | self.metadata["datatypes"] = DATATYPES 289 | self.metadata["all"] = set(KEYWORDS + FUNCTIONS) 290 | 291 | def find_matches(self, text, collection, mode="fuzzy", meta=None): 292 | """Find completion matches for the given text. 293 | 294 | Given the user's input text and a collection of available 295 | completions, find completions matching the last word of the 296 | text. 297 | 298 | `collection` can be either a list of strings or a list of Candidate 299 | namedtuples. 300 | `mode` can be either 'fuzzy', or 'strict' 301 | 'fuzzy': fuzzy matching, ties broken by name prevalance 302 | `keyword`: start only matching, ties broken by keyword prevalance 303 | 304 | yields prompt_toolkit Completion instances for any matches found 305 | in the collection of available completions. 306 | 307 | """ 308 | if not collection: 309 | return [] 310 | prio_order = [ 311 | "keyword", 312 | "function", 313 | "view", 314 | "table", 315 | "datatype", 316 | "database", 317 | "schema", 318 | "column", 319 | "table alias", 320 | "join", 321 | "name join", 322 | "fk join", 323 | ] 324 | type_priority = prio_order.index(meta) if meta in prio_order else -1 325 | text = last_word(text, include="most_punctuations").lower() 326 | text_len = len(text) 327 | 328 | if text and text[0] == '"': 329 | # text starts with double quote; user is manually escaping a name 330 | # Match on everything that follows the double-quote. Note that 331 | # text_len is calculated before removing the quote, so the 332 | # Completion.position value is correct 333 | text = text[1:] 334 | 335 | if mode == "fuzzy": 336 | fuzzy = True 337 | priority_func = self.prioritizer.name_count 338 | else: 339 | fuzzy = False 340 | priority_func = self.prioritizer.keyword_count 341 | 342 | # Construct a `_match` function for either fuzzy or non-fuzzy matching 343 | # The match function returns a 2-tuple used for sorting the matches, 344 | # or None if the item doesn't match 345 | # Note: higher priority values mean more important, so use negative 346 | # signs to flip the direction of the tuple 347 | if fuzzy: 348 | regex = ".*?".join(map(re.escape, text)) 349 | pat = re.compile("(%s)" % regex) 350 | 351 | def _match(item): 352 | if item.lower()[: len(text) + 1] in (text, text + " "): 353 | # Exact match of first word in suggestion 354 | # This is to get exact alias matches to the top 355 | # E.g. for input `e`, 'Entries E' should be on top 356 | # (before e.g. `EndUsers EU`) 357 | return float("Infinity"), -1 358 | r = pat.search(self.unescape_name(item.lower())) 359 | if r: 360 | return -len(r.group()), -r.start() 361 | 362 | else: 363 | match_end_limit = len(text) 364 | 365 | def _match(item): 366 | match_point = item.lower().find(text, 0, match_end_limit) 367 | if match_point >= 0: 368 | # Use negative infinity to force keywords to sort after all 369 | # fuzzy matches 370 | return -float("Infinity"), -match_point 371 | 372 | matches = [] 373 | for cand in collection: 374 | if isinstance(cand, _Candidate): 375 | item, prio, display_meta, synonyms, prio2 = cand 376 | if display_meta is None: 377 | display_meta = meta 378 | syn_matches = (_match(x) for x in synonyms) 379 | # Nones need to be removed to avoid max() crashing in Python 3 380 | syn_matches = [m for m in syn_matches if m] 381 | sort_key = max(syn_matches) if syn_matches else None 382 | else: 383 | item, display_meta, prio, prio2 = cand, meta, 0, 0 384 | sort_key = _match(cand) 385 | 386 | if sort_key: 387 | if display_meta and len(display_meta) > 50: 388 | # Truncate meta-text to 50 characters, if necessary 389 | display_meta = display_meta[:47] + "..." 390 | 391 | # Lexical order of items in the collection, used for 392 | # tiebreaking items with the same match group length and start 393 | # position. Since we use *higher* priority to mean "more 394 | # important," we use -ord(c) to prioritize "aa" > "ab" and end 395 | # with 1 to prioritize shorter strings (ie "user" > "users"). 396 | # We first do a case-insensitive sort and then a 397 | # case-sensitive one as a tie breaker. 398 | # We also use the unescape_name to make sure quoted names have 399 | # the same priority as unquoted names. 400 | lexical_priority = ( 401 | tuple(0 if c in (" _") else -ord(c) for c in self.unescape_name(item.lower())) 402 | + (1,) 403 | + tuple(c for c in item) 404 | ) 405 | 406 | item = self.case(item) 407 | priority = ( 408 | sort_key, 409 | type_priority, 410 | prio, 411 | priority_func(item), 412 | prio2, 413 | lexical_priority, 414 | ) 415 | 416 | matches.append( 417 | Match( 418 | completion=Completion(item, -text_len, display_meta=display_meta), 419 | priority=priority, 420 | ) 421 | ) 422 | return matches 423 | 424 | def case(self, word): 425 | return self.casing.get(word, word) 426 | 427 | def get_completions(self, document, complete_event, smart_completion=None): 428 | word_before_cursor = document.get_word_before_cursor(WORD=True) 429 | 430 | if smart_completion is None: 431 | smart_completion = self.smart_completion 432 | 433 | # If smart_completion is off then match any word that starts with 434 | # 'word_before_cursor'. 435 | if not smart_completion: 436 | matches = self.find_matches(word_before_cursor, self.metadata["all"], mode="strict") 437 | completions = [m.completion for m in matches] 438 | return sorted(completions, key=operator.attrgetter("text")) 439 | 440 | matches = [] 441 | suggestions = suggest_type(document.text, document.text_before_cursor) 442 | 443 | for suggestion in suggestions: 444 | suggestion_type = type(suggestion) 445 | 446 | # Map suggestion type to method 447 | # e.g. 'table' -> self.get_table_matches 448 | matcher = self.suggestion_matchers[suggestion_type] 449 | matches.extend(matcher(self, suggestion, word_before_cursor)) 450 | 451 | # Sort matches so highest priorities are first 452 | 453 | # FIXME: Breaks the order of fields in table 454 | # matches = sorted(matches, key=operator.attrgetter('priority'), reverse=True) 455 | return [m.completion for m in matches] 456 | 457 | def get_column_matches(self, suggestion, word_before_cursor): 458 | tables = suggestion.table_refs 459 | do_qualify = ( 460 | suggestion.qualifiable 461 | and { 462 | "always": True, 463 | "never": False, 464 | "if_more_than_one_table": len(tables) > 1, 465 | }[self.qualify_columns] 466 | ) 467 | 468 | def qualify(col, tbl): 469 | return tbl + "." + self.case(col) if do_qualify else self.case(col) 470 | 471 | scoped_cols = self.populate_scoped_cols(tables, suggestion.local_tables) 472 | 473 | colit = scoped_cols.items 474 | 475 | def make_cand(name, ref): 476 | synonyms = (name, generate_alias(self.case(name))) 477 | return Candidate(qualify(name, ref), 0, "column", synonyms) 478 | 479 | flat_cols = [] 480 | for t, cols in colit(): 481 | for c in cols: 482 | flat_cols.append(make_cand(c.name, t.ref)) 483 | if suggestion.require_last_table: 484 | # require_last_table is used for 'tb11 JOIN tbl2 USING (...' which should 485 | # suggest only columns that appear in the last table and one more 486 | ltbl = tables[-1].ref 487 | flat_cols = list( 488 | set(c.name for t, cs in colit() if t.ref == ltbl for c in cs) 489 | & set(c.name for t, cs in colit() if t.ref != ltbl for c in cs) 490 | ) 491 | lastword = last_word(word_before_cursor, include="most_punctuations") 492 | if lastword == "*": 493 | if self.asterisk_column_order == "alphabetic": 494 | flat_cols.sort() 495 | for cols in scoped_cols.values(): 496 | cols.sort(key=operator.attrgetter("name")) 497 | if lastword != word_before_cursor and len(tables) == 1 and word_before_cursor[-len(lastword) - 1] == ".": 498 | # User typed x.*; replicate "x." for all columns except the 499 | # first, which gets the original (as we only replace the "*"") 500 | sep = ", " + word_before_cursor[:-1] 501 | collist = sep.join(self.case(c.completion) for c in flat_cols) 502 | else: 503 | collist = ", ".join(qualify(c.name, t.ref) for t, cs in colit() for c in cs) 504 | 505 | return [ 506 | Match( 507 | completion=Completion(collist, -1, display_meta="columns", display="*"), 508 | priority=(1, 1, 1), 509 | ) 510 | ] 511 | 512 | return self.find_matches(word_before_cursor, flat_cols, meta="column") 513 | 514 | def alias(self, tbl, tbls): 515 | """Generate a unique table alias 516 | tbl - name of the table to alias, quoted if it needs to be 517 | tbls - TableReference iterable of tables already in query 518 | """ 519 | tbl = self.case(tbl) 520 | tbls = set(normalize_ref(t.ref) for t in tbls) 521 | if self.generate_aliases: 522 | tbl = generate_alias(self.unescape_name(tbl)) 523 | if normalize_ref(tbl) not in tbls: 524 | return tbl 525 | elif tbl[0] == '"': 526 | aliases = ('"' + tbl[1:-1] + str(i) + '"' for i in count(2)) 527 | else: 528 | aliases = (tbl + str(i) for i in count(2)) 529 | return next(a for a in aliases if normalize_ref(a) not in tbls) 530 | 531 | def get_join_matches(self, suggestion, word_before_cursor): 532 | tbls = suggestion.table_refs 533 | cols = self.populate_scoped_cols(tbls) 534 | # Set up some data structures for efficient access 535 | qualified = dict((normalize_ref(t.ref), t.schema) for t in tbls) 536 | ref_prio = dict((normalize_ref(t.ref), n) for n, t in enumerate(tbls)) 537 | refs = set(normalize_ref(t.ref) for t in tbls) 538 | other_tbls = set((t.schema, t.name) for t in list(cols)[:-1]) 539 | joins = [] 540 | # Iterate over FKs in existing tables to find potential joins 541 | fks = ((fk, rtbl, rcol) for rtbl, rcols in cols.items() for rcol in rcols for fk in rcol.foreignkeys) 542 | col = namedtuple("col", "schema tbl col") 543 | for fk, rtbl, rcol in fks: 544 | right = col(rtbl.schema, rtbl.name, rcol.name) 545 | child = col(fk.childschema, fk.childtable, fk.childcolumn) 546 | parent = col(fk.parentschema, fk.parenttable, fk.parentcolumn) 547 | left = child if parent == right else parent 548 | if suggestion.schema and left.schema != suggestion.schema: 549 | continue 550 | c = self.case 551 | if self.generate_aliases or normalize_ref(left.tbl) in refs: 552 | lref = self.alias(left.tbl, suggestion.table_refs) 553 | join = "{0} {4} ON {4}.{1} = {2}.{3}".format(c(left.tbl), c(left.col), rtbl.ref, c(right.col), lref) 554 | else: 555 | join = "{0} ON {0}.{1} = {2}.{3}".format(c(left.tbl), c(left.col), rtbl.ref, c(right.col)) 556 | alias = generate_alias(self.case(left.tbl)) 557 | synonyms = [ 558 | join, 559 | "{0} ON {0}.{1} = {2}.{3}".format(alias, c(left.col), rtbl.ref, c(right.col)), 560 | ] 561 | # Schema-qualify if (1) new table in same schema as old, and old 562 | # is schema-qualified, or (2) new in other schema, except public 563 | if not suggestion.schema and ( 564 | qualified[normalize_ref(rtbl.ref)] 565 | and left.schema == right.schema 566 | or left.schema not in (right.schema, "default") 567 | ): 568 | join = left.schema + "." + join 569 | prio = ref_prio[normalize_ref(rtbl.ref)] * 2 + (0 if (left.schema, left.tbl) in other_tbls else 1) 570 | joins.append(Candidate(join, prio, "join", synonyms=synonyms)) 571 | 572 | return self.find_matches(word_before_cursor, joins, meta="join") 573 | 574 | def get_join_condition_matches(self, suggestion, word_before_cursor): 575 | col = namedtuple("col", "schema tbl col") 576 | tbls = self.populate_scoped_cols(suggestion.table_refs).items 577 | cols = [(t, c) for t, cs in tbls() for c in cs] 578 | try: 579 | lref = (suggestion.parent or suggestion.table_refs[-1]).ref 580 | ltbl, lcols = [(t, cs) for (t, cs) in tbls() if t.ref == lref][-1] 581 | except IndexError: # The user typed an incorrect table qualifier 582 | return [] 583 | conds, found_conds = [], set() 584 | 585 | def add_cond(lcol, rcol, rref, prio, meta): 586 | prefix = "" if suggestion.parent else ltbl.ref + "." 587 | case = self.case 588 | cond = prefix + case(lcol) + " = " + rref + "." + case(rcol) 589 | if cond not in found_conds: 590 | found_conds.add(cond) 591 | conds.append(Candidate(cond, prio + ref_prio[rref], meta)) 592 | 593 | def list_dict(pairs): # Turns [(a, b), (a, c)] into {a: [b, c]} 594 | d = defaultdict(list) 595 | for pair in pairs: 596 | d[pair[0]].append(pair[1]) 597 | return d 598 | 599 | # Tables that are closer to the cursor get higher prio 600 | ref_prio = dict((tbl.ref, num) for num, tbl in enumerate(suggestion.table_refs)) 601 | # Map (schema, table, col) to tables 602 | coldict = list_dict(((t.schema, t.name, c.name), t) for t, c in cols if t.ref != lref) 603 | # For each fk from the left table, generate a join condition if 604 | # the other table is also in the scope 605 | fks = ((fk, lcol.name) for lcol in lcols for fk in lcol.foreignkeys) 606 | for fk, lcol in fks: 607 | left = col(ltbl.schema, ltbl.name, lcol) 608 | child = col(fk.childschema, fk.childtable, fk.childcolumn) 609 | par = col(fk.parentschema, fk.parenttable, fk.parentcolumn) 610 | left, right = (child, par) if left == child else (par, child) 611 | for rtbl in coldict[right]: 612 | add_cond(left.col, right.col, rtbl.ref, 2000, "fk join") 613 | # For name matching, use a {(colname, coltype): TableReference} dict 614 | coltyp = namedtuple("coltyp", "name datatype") 615 | col_table = list_dict((coltyp(c.name, c.datatype), t) for t, c in cols) 616 | # Find all name-match join conditions 617 | for c in (coltyp(c.name, c.datatype) for c in lcols): 618 | for rtbl in (t for t in col_table[c] if t.ref != ltbl.ref): 619 | prio = 1000 if c.datatype in ("integer", "bigint", "smallint") else 0 620 | add_cond(c.name, c.name, rtbl.ref, prio, "name join") 621 | 622 | return self.find_matches(word_before_cursor, conds, meta="join") 623 | 624 | def get_function_matches(self, suggestion, word_before_cursor, alias=False): 625 | def _cand(func, alias): 626 | return self._make_cand(func, alias, suggestion) 627 | 628 | if suggestion.filter == "for_from_clause": 629 | # Only suggest functions allowed in FROM clause 630 | def filt(f): 631 | return not f.is_aggregate and not f.is_window 632 | 633 | funcs = [_cand(f, alias) for f in self.populate_functions(suggestion.schema, filt)] 634 | else: 635 | fs = self.populate_schema_objects(suggestion.schema, "functions") 636 | funcs = [_cand(f, alias=False) for f in fs] 637 | 638 | # Function overloading means we way have multiple functions of the same 639 | # name at this point, so keep unique names only 640 | funcs = set(funcs) 641 | 642 | funcs = self.find_matches(word_before_cursor, funcs, meta="function") 643 | 644 | if not suggestion.schema and not suggestion.filter: 645 | # also suggest hardcoded functions using startswith matching 646 | predefined_funcs = self.find_matches(word_before_cursor, FUNCTIONS, mode="strict", meta="function") 647 | funcs.extend(predefined_funcs) 648 | 649 | return funcs 650 | 651 | def get_from_clause_item_matches(self, suggestion, word_before_cursor): 652 | alias = self.generate_aliases 653 | s = suggestion 654 | t_sug = Table(s.schema, s.table_refs, s.local_tables) 655 | v_sug = View(s.schema, s.table_refs) 656 | f_sug = Function(s.schema, s.table_refs, filter="for_from_clause") 657 | return ( 658 | self.get_table_matches(t_sug, word_before_cursor, alias) 659 | + self.get_view_matches(v_sug, word_before_cursor, alias) 660 | + self.get_function_matches(f_sug, word_before_cursor, alias) 661 | ) 662 | 663 | # Note: tbl is a SchemaObject 664 | def _make_cand(self, tbl, do_alias, suggestion): 665 | cased_tbl = self.case(tbl.name) 666 | if do_alias: 667 | alias = self.alias(cased_tbl, suggestion.table_refs) 668 | synonyms = (cased_tbl, generate_alias(cased_tbl)) 669 | maybe_parens = "()" if tbl.function else "" 670 | maybe_alias = (" " + alias) if do_alias else "" 671 | maybe_schema = (self.case(tbl.schema) + ".") if tbl.schema else "" 672 | item = maybe_schema + cased_tbl + maybe_parens + maybe_alias 673 | prio2 = 0 if tbl.schema else 1 674 | return Candidate(item, synonyms=synonyms, prio2=prio2) 675 | 676 | def get_table_matches(self, suggestion, word_before_cursor, alias=False): 677 | tables = self.populate_schema_objects(suggestion.schema, "tables") 678 | tables.extend(SchemaObject(tbl.name) for tbl in suggestion.local_tables) 679 | tables = [self._make_cand(t, alias, suggestion) for t in tables] 680 | return self.find_matches(word_before_cursor, tables, meta="table") 681 | 682 | def get_view_matches(self, suggestion, word_before_cursor, alias=False): 683 | views = self.populate_schema_objects(suggestion.schema, "views") 684 | views = [self._make_cand(v, alias, suggestion) for v in views] 685 | return self.find_matches(word_before_cursor, views, meta="view") 686 | 687 | def get_alias_matches(self, suggestion, word_before_cursor): 688 | aliases = suggestion.aliases 689 | return self.find_matches(word_before_cursor, aliases, meta="table alias") 690 | 691 | def get_database_matches(self, _, word_before_cursor): 692 | return self.find_matches(word_before_cursor, self.metadata["databases"], meta="database") 693 | 694 | def get_keyword_matches(self, _, word_before_cursor): 695 | casing = self.keyword_casing 696 | if casing == "auto": 697 | if word_before_cursor and word_before_cursor[-1].islower(): 698 | casing = "lower" 699 | else: 700 | casing = "upper" 701 | 702 | if casing == "upper": 703 | keywords = [k.upper() for k in KEYWORDS] 704 | else: 705 | keywords = [k.lower() for k in KEYWORDS] 706 | 707 | return self.find_matches(word_before_cursor, keywords, mode="strict", meta="keyword") 708 | 709 | def get_path_matches(self, _, word_before_cursor): 710 | completer = PathCompleter(expanduser=True) 711 | document = Document(text=word_before_cursor, cursor_position=len(word_before_cursor)) 712 | for c in completer.get_completions(document, None): 713 | yield Match(completion=c, priority=(0,)) 714 | 715 | def get_special_matches(self, _, word_before_cursor): 716 | return [] 717 | 718 | def get_datatype_matches(self, suggestion, word_before_cursor): 719 | return self.find_matches(word_before_cursor, DATATYPES, mode="strict", meta="datatype") 720 | 721 | def get_format_matches(self, suggestion, word_before_cursor): 722 | return self.find_matches(word_before_cursor, FORMATS, mode="strict", meta="format") 723 | 724 | suggestion_matchers = { 725 | FromClauseItem: get_from_clause_item_matches, 726 | JoinCondition: get_join_condition_matches, 727 | Join: get_join_matches, 728 | Column: get_column_matches, 729 | Function: get_function_matches, 730 | Schema: get_database_matches, 731 | Table: get_table_matches, 732 | View: get_view_matches, 733 | Alias: get_alias_matches, 734 | Database: get_database_matches, 735 | Keyword: get_keyword_matches, 736 | Special: get_special_matches, 737 | Datatype: get_datatype_matches, 738 | Format: get_format_matches, 739 | Path: get_path_matches, 740 | } 741 | 742 | def populate_scoped_cols(self, scoped_tbls, local_tbls=()): 743 | """Find all columns in a set of scoped_tables 744 | :param scoped_tbls: list of TableReference namedtuples 745 | :param local_tbls: tuple(TableMetadata) 746 | :return: {TableReference:{colname:ColumnMetaData}} 747 | """ 748 | ctes = dict((normalize_ref(t.name), t.columns) for t in local_tbls) 749 | columns = OrderedDict() 750 | meta = self.metadata 751 | 752 | def addcols(schema, rel, alias, reltype, cols): 753 | tbl = TableReference(schema, rel, alias, reltype == "functions") 754 | if tbl not in columns: 755 | columns[tbl] = [] 756 | columns[tbl].extend(cols) 757 | 758 | for tbl in scoped_tbls: 759 | # Local tables should shadow database tables 760 | if tbl.schema is None and normalize_ref(tbl.name) in ctes: 761 | cols = ctes[normalize_ref(tbl.name)] 762 | addcols(None, tbl.name, "CTE", tbl.alias, cols) 763 | continue 764 | schemas = [tbl.schema] if tbl.schema else self.search_path 765 | for schema in schemas: 766 | relname = self.escape_name(tbl.name) 767 | schema = self.escape_name(schema) 768 | if tbl.is_function: 769 | # Return column names from a set-returning function 770 | # Get an array of FunctionMetadata objects 771 | functions = meta["functions"].get(schema, {}).get(relname) 772 | for func in functions or []: 773 | # func is a FunctionMetadata object 774 | cols = func.fields() 775 | addcols(schema, relname, tbl.alias, "functions", cols) 776 | else: 777 | for reltype in ("tables", "views"): 778 | cols = meta[reltype].get(schema, {}).get(relname) 779 | if cols: 780 | cols = cols.values() 781 | addcols(schema, relname, tbl.alias, reltype, cols) 782 | break 783 | 784 | return columns 785 | 786 | def _get_schemas(self, obj_typ, schema): 787 | """Returns a list of schemas from which to suggest objects 788 | schema is the schema qualification input by the user (if any) 789 | """ 790 | metadata = self.metadata[obj_typ] 791 | if schema: 792 | schema = self.escape_name(schema) 793 | return [schema] if schema in metadata else [] 794 | return self.search_path if self.search_path_filter else metadata.keys() 795 | 796 | def _maybe_schema(self, schema, parent): 797 | return None if parent or schema in self.search_path else schema 798 | 799 | def populate_schema_objects(self, schema, obj_type): 800 | """Returns a list of SchemaObjects representing tables, views, funcs 801 | schema is the schema qualification input by the user (if any) 802 | """ 803 | 804 | return [ 805 | SchemaObject( 806 | name=obj, 807 | schema=(self._maybe_schema(schema=sch, parent=schema)), 808 | function=(obj_type == "functions"), 809 | ) 810 | for sch in self._get_schemas(obj_type, schema) 811 | for obj in self.metadata[obj_type][sch].keys() 812 | ] 813 | 814 | def populate_functions(self, schema, filter_func): 815 | """Returns a list of function names 816 | 817 | filter_func is a function that accepts a FunctionMetadata namedtuple 818 | and returns a boolean indicating whether that function should be 819 | kept or discarded 820 | """ 821 | 822 | # Because of multiple dispatch, we can have multiple functions 823 | # with the same name, which is why `for meta in metas` is necessary 824 | # in the comprehensions below 825 | return [ 826 | SchemaObject( 827 | name=func, 828 | schema=(self._maybe_schema(schema=sch, parent=schema)), 829 | function=True, 830 | ) 831 | for sch in self._get_schemas("functions", schema) 832 | for (func, metas) in self.metadata["functions"][sch].items() 833 | for meta in metas 834 | if filter_func(meta) 835 | ] 836 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/lexer.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pygments.lexer import RegexLexer, bygroups, words 4 | from pygments.token import Comment, Generic, Keyword, Name, Number, Operator, Punctuation, String, Text, Whitespace 5 | 6 | from clickhouse_cli.clickhouse.definitions import ( 7 | AGGREGATION_FUNCTIONS, 8 | CASE_INSENSITIVE_FUNCTIONS, 9 | DATATYPES, 10 | FORMATS, 11 | FUNCTIONS, 12 | KEYWORDS, 13 | OPERATORS, 14 | ) 15 | 16 | line_re = re.compile(".*?\n") 17 | 18 | 19 | class CHLexer(RegexLexer): 20 | name = "Clickhouse" 21 | aliases = ["clickhouse"] 22 | filenames = ["*.sql"] 23 | mimetypes = ["text/x-clickhouse-sql"] 24 | 25 | tokens = { 26 | "root": [ 27 | (r"\s+", Text), 28 | (r"(--\s*).*?\n", Comment), 29 | (r"/\*", Comment.Multiline, "multiline-comments"), 30 | (r"[0-9]+", Number), 31 | (r"[0-9]*\.[0-9]+(e[+-][0-9]+)", Number), 32 | (r"'(\\\\|\\'|''|[^'])*'", String), 33 | (r'"(\\\\|\\"|""|[^"])*"', String), 34 | (r"`(\\\\|\\`|``|[^`])*`", String), 35 | (r"[+*/<>=~!@#%^&|`?-]", Operator), 36 | (words(OPERATORS, prefix=r"(?i)", suffix=r"\b"), Keyword), 37 | (words(DATATYPES, suffix=r"\b"), Keyword.Type), 38 | (words(FORMATS), Name.Label), 39 | ( 40 | words(AGGREGATION_FUNCTIONS, suffix=r"(\s*)(\()"), 41 | bygroups(Name.Function, Text, Punctuation), 42 | ), 43 | ( 44 | words(CASE_INSENSITIVE_FUNCTIONS, prefix=r"(?i)", suffix=r"\b"), 45 | Name.Function, 46 | ), 47 | ( 48 | words(FUNCTIONS, suffix=r"(\s*)(\()"), 49 | bygroups(Name.Function, Text, Punctuation), 50 | ), 51 | (words(KEYWORDS, prefix=r"(?i)", suffix=r"\b"), Keyword), 52 | (r"^\\(\?|\w+)", Text), 53 | (r"(?i)[a-z_]\w*", Text), 54 | (r"(?i)[;:()\[\],.]", Punctuation), 55 | (r"'", String.Single, "string"), 56 | (r"[a-z_]\w*", Name), 57 | (r"[;:()\[\]{},.]", Punctuation), 58 | ], 59 | "multiline-comments": [ 60 | (r"/\*", Comment.Multiline, "multiline-comments"), 61 | (r"\*/", Comment.Multiline, "#pop"), 62 | (r"[^/*]+", Comment.Multiline), 63 | (r"[/*]", Comment.Multiline), 64 | ], 65 | "string": [ 66 | (r"[^']+", String.Single), 67 | (r"''", String.Single), 68 | (r"'", String.Single, "#pop"), 69 | ], 70 | "quoted-ident": [ 71 | (r'[^"]+', String.Name), 72 | (r'""', String.Name), 73 | (r'"', String.Name, "#pop"), 74 | ], 75 | } 76 | 77 | 78 | class CHPrettyFormatLexer(RegexLexer): 79 | tokens = { 80 | "root": [ 81 | (r"([^┌─┬┐│││└─┴┘├─┼┤]+)", Generic.Output), 82 | (r"([┌─┬┐│││└─┴┘├─┼┤]+)", Whitespace), 83 | ] 84 | } 85 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatarist/clickhouse-cli/81450224395407e101bb0a219c5b85c2e6ad7594/clickhouse_cli/ui/parseutils/__init__.py -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/ctes.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from sqlparse import parse 4 | from sqlparse.sql import Identifier, IdentifierList, Parenthesis 5 | from sqlparse.tokens import CTE, DML, Keyword 6 | 7 | from .meta import ColumnMetadata, TableMetadata 8 | 9 | # TableExpression is a namedtuple representing a CTE, used internally 10 | # name: cte alias assigned in the query 11 | # columns: list of column names 12 | # start: index into the original string of the left parens starting the CTE 13 | # stop: index into the original string of the right parens ending the CTE 14 | TableExpression = namedtuple("TableExpression", "name columns start stop") 15 | 16 | 17 | def isolate_query_ctes(full_text, text_before_cursor): 18 | """Simplify a query by converting CTEs into table metadata objects""" 19 | 20 | if not full_text: 21 | return full_text, text_before_cursor, tuple() 22 | 23 | ctes = extract_ctes(full_text)[0] 24 | if not ctes: 25 | return full_text, text_before_cursor, () 26 | 27 | current_position = len(text_before_cursor) 28 | meta = [] 29 | 30 | for cte in ctes: 31 | if cte.start < current_position < cte.stop: 32 | # Currently editing a cte - treat its body as the current full_text 33 | text_before_cursor = full_text[cte.start : current_position] 34 | full_text = full_text[cte.start : cte.stop] 35 | return full_text, text_before_cursor, meta 36 | 37 | # Append this cte to the list of available table metadata 38 | cols = (ColumnMetadata(name, None, ()) for name in cte.columns) 39 | meta.append(TableMetadata(cte.name, cols)) 40 | 41 | # Editing past the last cte (ie the main body of the query) 42 | full_text = full_text[ctes[-1].stop :] 43 | text_before_cursor = text_before_cursor[ctes[-1].stop : current_position] 44 | 45 | return full_text, text_before_cursor, tuple(meta) 46 | 47 | 48 | def extract_ctes(sql): 49 | """Extract constant table expresseions from a query 50 | 51 | Returns tuple (ctes, remainder_sql) 52 | 53 | ctes is a list of TableExpression namedtuples 54 | remainder_sql is the text from the original query after the CTEs have 55 | been stripped. 56 | """ 57 | 58 | p = parse(sql)[0] 59 | 60 | # Make sure the first meaningful token is "WITH" which is necessary to 61 | # define CTEs 62 | idx, tok = p.token_next(-1, skip_ws=True, skip_cm=True) 63 | if not (tok and tok.ttype == CTE): 64 | return [], sql 65 | 66 | # Get the next (meaningful) token, which should be the first CTE 67 | idx, tok = p.token_next(idx) 68 | if not tok: 69 | return ([], "") 70 | start_pos = token_start_pos(p.tokens, idx) 71 | ctes = [] 72 | 73 | if isinstance(tok, IdentifierList): 74 | # Multiple ctes 75 | for t in tok.get_identifiers(): 76 | cte_start_offset = token_start_pos(tok.tokens, tok.token_index(t)) 77 | cte = get_cte_from_token(t, start_pos + cte_start_offset) 78 | if not cte: 79 | continue 80 | ctes.append(cte) 81 | elif isinstance(tok, Identifier): 82 | # A single CTE 83 | cte = get_cte_from_token(tok, start_pos) 84 | if cte: 85 | ctes.append(cte) 86 | 87 | idx = p.token_index(tok) + 1 88 | 89 | # Collapse everything after the ctes into a remainder query 90 | remainder = "".join(str(tok) for tok in p.tokens[idx:]) 91 | 92 | return ctes, remainder 93 | 94 | 95 | def get_cte_from_token(tok, pos0): 96 | cte_name = tok.get_real_name() 97 | if not cte_name: 98 | return None 99 | 100 | # Find the start position of the opening parens enclosing the cte body 101 | idx, parens = tok.token_next_by(Parenthesis) 102 | if not parens: 103 | return None 104 | 105 | start_pos = pos0 + token_start_pos(tok.tokens, idx) 106 | cte_len = len(str(parens)) # includes parens 107 | stop_pos = start_pos + cte_len 108 | 109 | column_names = extract_column_names(parens) 110 | 111 | return TableExpression(cte_name, column_names, start_pos, stop_pos) 112 | 113 | 114 | def extract_column_names(parsed): 115 | # Find the first DML token to check if it's a SELECT or INSERT/UPDATE/DELETE 116 | idx, tok = parsed.token_next_by(t=DML) 117 | tok_val = tok and tok.value.lower() 118 | 119 | if tok_val in ("insert", "update", "delete"): 120 | # Jump ahead to the RETURNING clause where the list of column names is 121 | idx, tok = parsed.token_next_by(idx, (Keyword, "returning")) 122 | elif not tok_val == "select": 123 | # Must be invalid CTE 124 | return () 125 | 126 | # The next token should be either a column name, or a list of column names 127 | idx, tok = parsed.token_next(idx, skip_ws=True, skip_cm=True) 128 | return tuple(t.get_name() for t in _identifiers(tok)) 129 | 130 | 131 | def token_start_pos(tokens, idx): 132 | return sum(len(str(t)) for t in tokens[:idx]) 133 | 134 | 135 | def _identifiers(tok): 136 | if isinstance(tok, IdentifierList): 137 | for t in tok.get_identifiers(): 138 | # NB: IdentifierList.get_identifiers() can return non-identifiers! 139 | if isinstance(t, Identifier): 140 | yield t 141 | elif isinstance(tok, Identifier): 142 | yield tok 143 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/helpers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict, namedtuple 3 | 4 | import sqlparse 5 | from sqlparse.sql import Comparison, Identifier, Where 6 | from sqlparse.tokens import Name 7 | 8 | from clickhouse_cli.clickhouse.definitions import KEYWORDS 9 | from clickhouse_cli.ui.parseutils.ctes import isolate_query_ctes 10 | from clickhouse_cli.ui.parseutils.tables import TableReference, extract_tables 11 | from clickhouse_cli.ui.parseutils.utils import find_prev_keyword, last_word, parse_partial_identifier 12 | 13 | Special = namedtuple("Special", []) 14 | Database = namedtuple("Database", []) 15 | Schema = Database 16 | # FromClauseItem is a table/view/function used in the FROM clause 17 | # `table_refs` contains the list of tables/... already in the statement, 18 | # used to ensure that the alias we suggest is unique 19 | FromClauseItem = namedtuple("FromClauseItem", "schema table_refs local_tables") 20 | Table = namedtuple("Table", ["schema", "table_refs", "local_tables"]) 21 | View = namedtuple("View", ["schema", "table_refs"]) 22 | # JoinConditions are suggested after ON, e.g. 'foo.barid = bar.barid' 23 | JoinCondition = namedtuple("JoinCondition", ["table_refs", "parent"]) 24 | # Joins are suggested after JOIN, e.g. 'foo ON foo.barid = bar.barid' 25 | Join = namedtuple("Join", ["table_refs", "schema"]) 26 | 27 | Function = namedtuple("Function", ["schema", "table_refs", "filter"]) 28 | # For convenience, don't require the `filter` argument in Function constructor 29 | Function.__new__.__defaults__ = (None, tuple(), None) 30 | Table.__new__.__defaults__ = (None, tuple(), tuple()) 31 | View.__new__.__defaults__ = (None, tuple()) 32 | FromClauseItem.__new__.__defaults__ = (None, tuple(), tuple()) 33 | 34 | Column = namedtuple("Column", ["table_refs", "require_last_table", "local_tables", "qualifiable"]) 35 | Column.__new__.__defaults__ = (None, None, tuple(), False) 36 | 37 | Keyword = namedtuple("Keyword", []) 38 | Datatype = namedtuple("Datatype", ["schema"]) 39 | Format = namedtuple("Format", []) 40 | Alias = namedtuple("Alias", ["aliases"]) 41 | 42 | Path = namedtuple("Path", []) 43 | 44 | 45 | white_space_regex = re.compile("\\s+", re.MULTILINE) 46 | 47 | 48 | def _compile_regex(keyword): 49 | # Surround the keyword with word boundaries and replace interior whitespace 50 | # with whitespace wildcards 51 | pattern = "\\b" + white_space_regex.sub(r"\\s+", keyword) + "\\b" 52 | return re.compile(pattern, re.MULTILINE | re.IGNORECASE) 53 | 54 | 55 | keyword_regexs = dict((kw, _compile_regex(kw)) for kw in KEYWORDS) 56 | 57 | 58 | class PrevalenceCounter(object): 59 | def __init__(self): 60 | self.keyword_counts = defaultdict(int) 61 | self.name_counts = defaultdict(int) 62 | 63 | def update(self, text): 64 | self.update_keywords(text) 65 | self.update_names(text) 66 | 67 | def update_names(self, text): 68 | for parsed in sqlparse.parse(text): 69 | for token in parsed.flatten(): 70 | if token.ttype in Name: 71 | self.name_counts[token.value] += 1 72 | 73 | def clear_names(self): 74 | self.name_counts = defaultdict(int) 75 | 76 | def update_keywords(self, text): 77 | # Count keywords. Can't rely for sqlparse for this, because it's 78 | # database agnostic 79 | for keyword, regex in keyword_regexs.items(): 80 | for _ in regex.finditer(text): 81 | self.keyword_counts[keyword] += 1 82 | 83 | def keyword_count(self, keyword): 84 | return self.keyword_counts[keyword] 85 | 86 | def name_count(self, name): 87 | return self.name_counts[name] 88 | 89 | 90 | class SqlStatement(object): 91 | def __init__(self, full_text, text_before_cursor): 92 | self.identifier = None 93 | self.word_before_cursor = word_before_cursor = last_word(text_before_cursor, include="many_punctuations") 94 | 95 | full_text, text_before_cursor, self.local_tables = isolate_query_ctes(full_text, text_before_cursor) 96 | 97 | self.text_before_cursor_including_last_word = text_before_cursor 98 | 99 | # If we've partially typed a word then word_before_cursor won't be an 100 | # empty string. In that case we want to remove the partially typed 101 | # string before sending it to the sqlparser. Otherwise the last token 102 | # will always be the partially typed string which renders the smart 103 | # completion useless because it will always return the list of 104 | # keywords as completion. 105 | if self.word_before_cursor: 106 | if word_before_cursor[-1] == "(" or word_before_cursor[0] == "\\": 107 | parsed = sqlparse.parse(text_before_cursor) 108 | else: 109 | text_before_cursor = text_before_cursor[: -len(word_before_cursor)] 110 | parsed = sqlparse.parse(text_before_cursor) 111 | self.identifier = parse_partial_identifier(word_before_cursor) 112 | else: 113 | parsed = sqlparse.parse(text_before_cursor) 114 | 115 | full_text, text_before_cursor, parsed = _split_multiple_statements(full_text, text_before_cursor, parsed) 116 | 117 | self.full_text = full_text 118 | self.text_before_cursor = text_before_cursor 119 | self.parsed = parsed 120 | 121 | self.last_token = parsed and parsed.token_prev(len(parsed.tokens))[1] or "" 122 | 123 | def is_insert(self): 124 | return self.parsed.token_first().value.lower() == "insert" 125 | 126 | def get_tables(self, scope="full"): 127 | """Gets the tables available in the statement. 128 | param `scope:` possible values: 'full', 'insert', 'before' 129 | If 'insert', only the first table is returned. 130 | If 'before', only tables before the cursor are returned. 131 | If not 'insert' and the stmt is an insert, the first table is skipped. 132 | """ 133 | tables = extract_tables(self.full_text if scope == "full" else self.text_before_cursor) 134 | if scope == "insert": 135 | tables = tables[:1] 136 | elif self.is_insert(): 137 | tables = tables[1:] 138 | return tables 139 | 140 | def get_identifier_schema(self): 141 | schema = (self.identifier and self.identifier.get_parent_name()) or None 142 | # If schema name is unquoted, lower-case it 143 | if schema and self.identifier.value[0] != '"': 144 | schema = schema.lower() 145 | 146 | return schema 147 | 148 | def reduce_to_prev_keyword(self, n_skip=0): 149 | prev_keyword, self.text_before_cursor = find_prev_keyword(self.text_before_cursor, n_skip=n_skip) 150 | return prev_keyword 151 | 152 | 153 | def suggest_type(full_text, text_before_cursor): 154 | """Takes the full_text that is typed so far and also the text before the 155 | cursor to suggest completion type and scope. 156 | 157 | Returns a tuple with a type of entity ('table', 'column' etc) and a scope. 158 | A scope for a column category will be a list of tables. 159 | """ 160 | 161 | if full_text.startswith("\\i "): 162 | return (Path(),) 163 | 164 | # This is a temporary hack; the exception handling 165 | # here should be removed once sqlparse has been fixed 166 | try: 167 | stmt = SqlStatement(full_text, text_before_cursor) 168 | except (TypeError, AttributeError): 169 | return [] 170 | 171 | # # Check for special commands and handle those separately 172 | # if stmt.parsed: 173 | # # Be careful here because trivial whitespace is parsed as a 174 | # # statement, but the statement won't have a first token 175 | 176 | # tok1 = stmt.parsed.token_first() 177 | # if tok1 and tok1.value == '\\': 178 | # text = stmt.text_before_cursor + stmt.word_before_cursor 179 | # return suggest_special(text) 180 | 181 | return suggest_based_on_last_token(stmt.last_token, stmt) 182 | 183 | 184 | function_body_pattern = re.compile(r"(\\$.*?\\$)([\s\S]*?)\\1", re.M) 185 | 186 | 187 | def _find_function_body(text): 188 | split = function_body_pattern.search(text) 189 | return (split.start(2), split.end(2)) if split else (None, None) 190 | 191 | 192 | def _statement_from_function(full_text, text_before_cursor, statement): 193 | current_pos = len(text_before_cursor) 194 | body_start, body_end = _find_function_body(full_text) 195 | if body_start is None: 196 | return full_text, text_before_cursor, statement 197 | if not body_start <= current_pos < body_end: 198 | return full_text, text_before_cursor, statement 199 | full_text = full_text[body_start:body_end] 200 | text_before_cursor = text_before_cursor[body_start:] 201 | parsed = sqlparse.parse(text_before_cursor) 202 | return _split_multiple_statements(full_text, text_before_cursor, parsed) 203 | 204 | 205 | def _split_multiple_statements(full_text, text_before_cursor, parsed): 206 | if len(parsed) > 1: 207 | # Multiple statements being edited -- isolate the current one by 208 | # cumulatively summing statement lengths to find the one that bounds 209 | # the current position 210 | current_pos = len(text_before_cursor) 211 | stmt_start, stmt_end = 0, 0 212 | 213 | for statement in parsed: 214 | stmt_len = len(str(statement)) 215 | stmt_start, stmt_end = stmt_end, stmt_end + stmt_len 216 | 217 | if stmt_end >= current_pos: 218 | text_before_cursor = full_text[stmt_start:current_pos] 219 | full_text = full_text[stmt_start:] 220 | break 221 | 222 | elif parsed: 223 | # A single statement 224 | statement = parsed[0] 225 | else: 226 | # The empty string 227 | return full_text, text_before_cursor, None 228 | 229 | token2 = None 230 | if statement.get_type() in ("CREATE", "CREATE OR REPLACE"): 231 | token1 = statement.token_first() 232 | if token1: 233 | token1_idx = statement.token_index(token1) 234 | token2 = statement.token_next(token1_idx)[1] 235 | if token2 and token2.value.upper() == "FUNCTION": 236 | full_text, text_before_cursor, statement = _statement_from_function(full_text, text_before_cursor, statement) 237 | return full_text, text_before_cursor, statement 238 | 239 | 240 | def suggest_based_on_last_token(token, stmt): 241 | if isinstance(token, str): 242 | token_v = token.lower() 243 | elif isinstance(token, Comparison): 244 | # If 'token' is a Comparison type such as 245 | # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling 246 | # token.value on the comparison type will only return the lhs of the 247 | # comparison. In this case a.id. So we need to do token.tokens to get 248 | # both sides of the comparison and pick the last token out of that 249 | # list. 250 | token_v = token.tokens[-1].value.lower() 251 | elif isinstance(token, Where): 252 | # sqlparse groups all tokens from the where clause into a single token 253 | # list. This means that token.value may be something like 254 | # 'where foo > 5 and '. We need to look "inside" token.tokens to handle 255 | # suggestions in complicated where clauses correctly 256 | prev_keyword = stmt.reduce_to_prev_keyword() 257 | return suggest_based_on_last_token(prev_keyword, stmt) 258 | elif isinstance(token, Identifier): 259 | # If the previous token is an identifier, we can suggest datatypes if 260 | # we're in a parenthesized column/field list, e.g.: 261 | # CREATE TABLE foo (Identifier 262 | # CREATE FUNCTION foo (Identifier 263 | # If we're not in a parenthesized list, the most likely scenario is the 264 | # user is about to specify an alias, e.g.: 265 | # SELECT Identifier 266 | # SELECT foo FROM Identifier 267 | prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor) 268 | if prev_keyword and prev_keyword.value == "(": 269 | # Suggest datatypes 270 | return suggest_based_on_last_token("type", stmt) 271 | else: 272 | return (Keyword(),) 273 | else: 274 | token_v = token.value.lower() 275 | 276 | if not token: 277 | return (Keyword(), Special()) 278 | elif token_v.endswith("("): 279 | p = sqlparse.parse(stmt.text_before_cursor)[0] 280 | 281 | if p.tokens and isinstance(p.tokens[-1], Where): 282 | # Four possibilities: 283 | # 1 - Parenthesized clause like "WHERE foo AND (" 284 | # Suggest columns/functions 285 | # 2 - Function call like "WHERE foo(" 286 | # Suggest columns/functions 287 | # 3 - Subquery expression like "WHERE EXISTS (" 288 | # Suggest keywords, in order to do a subquery 289 | # 4 - Subquery OR array comparison like "WHERE foo = ANY(" 290 | # Suggest columns/functions AND keywords. (If we wanted to be 291 | # really fancy, we could suggest only array-typed columns) 292 | 293 | column_suggestions = suggest_based_on_last_token("where", stmt) 294 | 295 | # Check for a subquery expression (cases 3 & 4) 296 | where = p.tokens[-1] 297 | prev_tok = where.token_prev(len(where.tokens) - 1)[1] 298 | 299 | if isinstance(prev_tok, Comparison): 300 | # e.g. "SELECT foo FROM bar WHERE foo = ANY(" 301 | prev_tok = prev_tok.tokens[-1] 302 | 303 | prev_tok = prev_tok.value.lower() 304 | if prev_tok == "exists": 305 | return (Keyword(),) 306 | else: 307 | return column_suggestions 308 | 309 | # Get the token before the parens 310 | prev_tok = p.token_prev(len(p.tokens) - 1)[1] 311 | 312 | if prev_tok and prev_tok.value and prev_tok.value.lower().split(" ")[-1] == "using": 313 | # tbl1 INNER JOIN tbl2 USING (col1, col2) 314 | tables = stmt.get_tables("before") 315 | # suggest columns that are present in more than one table 316 | return ( 317 | Column( 318 | table_refs=tables, 319 | require_last_table=True, 320 | local_tables=stmt.local_tables, 321 | ), 322 | ) 323 | 324 | elif p.token_first().value.lower() == "select": 325 | # If the lparen is preceeded by a space chances are we're about to 326 | # do a sub-select. 327 | if last_word(stmt.text_before_cursor, "all_punctuations").startswith("("): 328 | return (Keyword(),) 329 | prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1] 330 | if prev_prev_tok and prev_prev_tok.normalized == "INTO": 331 | return (Column(table_refs=stmt.get_tables("insert")),) 332 | # We're probably in a function argument list 333 | return ( 334 | Column( 335 | table_refs=extract_tables(stmt.full_text), 336 | local_tables=stmt.local_tables, 337 | qualifiable=True, 338 | ), 339 | ) 340 | elif token_v == "set": 341 | return (Column(table_refs=stmt.get_tables(), local_tables=stmt.local_tables),) 342 | elif token_v in ("select", "where", "having", "by", "distinct"): 343 | # Check for a table alias or schema qualification 344 | parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None 345 | tables = stmt.get_tables() 346 | if parent: 347 | tables = tuple(t for t in tables if identifies(parent, t)) 348 | return ( 349 | Column(table_refs=tables, local_tables=stmt.local_tables), 350 | Table(schema=parent), 351 | View(schema=parent), 352 | Function(schema=parent), 353 | ) 354 | else: 355 | tables = tuple( 356 | TableReference( 357 | schema=table.schema or "default", 358 | name=table.name, 359 | alias=table.alias, 360 | is_function=table.is_function, 361 | ) 362 | for table in tables 363 | ) 364 | return ( 365 | Column(table_refs=tables, local_tables=stmt.local_tables), 366 | Function(schema=None), 367 | Keyword(), 368 | ) 369 | elif token_v == "as": 370 | # Don't suggest anything for aliases 371 | return () 372 | elif (token_v.endswith("join") and token.is_keyword) or ( 373 | token_v in ("copy", "from", "update", "into", "describe", "truncate") 374 | ): 375 | schema = stmt.get_identifier_schema() 376 | tables = extract_tables(stmt.text_before_cursor) 377 | is_join = token_v.endswith("join") and token.is_keyword 378 | 379 | # Suggest tables from either the currently-selected schema or the 380 | # public schema if no schema has been specified 381 | suggest = [] 382 | 383 | if not schema: 384 | # Suggest schemas 385 | suggest.insert(0, Schema()) 386 | 387 | if token_v == "from" or is_join: 388 | suggest.append(FromClauseItem(schema=schema, table_refs=tables, local_tables=stmt.local_tables)) 389 | elif token_v == "truncate": 390 | suggest.append(Table(schema)) 391 | else: 392 | suggest.extend((Table(schema), View(schema))) 393 | 394 | if is_join and _allow_join(stmt.parsed): 395 | tables = stmt.get_tables("before") 396 | suggest.append(Join(table_refs=tables, schema=schema)) 397 | 398 | return tuple(suggest) 399 | 400 | elif token_v in ("table", "view", "function"): 401 | # E.g. 'DROP FUNCTION ', 'ALTER TABLE ' 402 | rel_type = {"table": Table, "view": View, "function": Function}[token_v] 403 | schema = stmt.get_identifier_schema() 404 | if schema: 405 | return (rel_type(schema=schema),) 406 | else: 407 | return (Schema(), rel_type(schema=schema)) 408 | 409 | elif token_v == "column": 410 | # E.g. 'ALTER TABLE foo ALTER COLUMN bar 411 | return (Column(table_refs=stmt.get_tables()),) 412 | 413 | elif token_v == "on": 414 | tables = stmt.get_tables("before") 415 | parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None 416 | if parent: 417 | # "ON parent." 418 | # parent can be either a schema name or table alias 419 | filteredtables = tuple(t for t in tables if identifies(parent, t)) 420 | sugs = [ 421 | Column(table_refs=filteredtables, local_tables=stmt.local_tables), 422 | Table(schema=parent), 423 | View(schema=parent), 424 | Function(schema=parent), 425 | ] 426 | if filteredtables and _allow_join_condition(stmt.parsed): 427 | sugs.append(JoinCondition(table_refs=tables, parent=filteredtables[-1])) 428 | return tuple(sugs) 429 | else: 430 | # ON 431 | # Use table alias if there is one, otherwise the table name 432 | aliases = tuple(t.ref for t in tables) 433 | if _allow_join_condition(stmt.parsed): 434 | return ( 435 | Alias(aliases=aliases), 436 | JoinCondition(table_refs=tables, parent=None), 437 | ) 438 | else: 439 | return (Alias(aliases=aliases),) 440 | 441 | elif token_v in ("c", "use", "database", "template"): 442 | # "\c ", "DROP DATABASE ", 443 | # "CREATE DATABASE WITH TEMPLATE " 444 | return (Database(),) 445 | elif token_v == "schema": 446 | # DROP SCHEMA schema_name 447 | return (Schema(),) 448 | elif token_v.endswith(",") or token_v in ("=", "and", "or"): 449 | prev_keyword = stmt.reduce_to_prev_keyword() 450 | if prev_keyword: 451 | return suggest_based_on_last_token(prev_keyword, stmt) 452 | else: 453 | return () 454 | elif token_v == "format": 455 | return (Format(),) 456 | elif token_v in ("type", "::"): 457 | # ALTER TABLE foo SET DATA TYPE bar 458 | # SELECT foo::bar 459 | # Note that tables are a form of composite type in postgresql, so 460 | # they're suggested here as well 461 | schema = stmt.get_identifier_schema() 462 | suggestions = [Datatype(schema=schema), Table(schema=schema)] 463 | if not schema: 464 | suggestions.append(Schema()) 465 | return tuple(suggestions) 466 | elif token_v == "alter": 467 | return (Keyword(),) 468 | elif token.is_keyword: 469 | # token is a keyword we haven't implemented any special handling for 470 | # go backwards in the query until we find one we do recognize 471 | prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1) 472 | if prev_keyword: 473 | return suggest_based_on_last_token(prev_keyword, stmt) 474 | else: 475 | return (Keyword(),) 476 | else: 477 | return (Keyword(),) 478 | 479 | 480 | def identifies(id, ref): 481 | """Returns true if string `id` matches TableReference `ref`""" 482 | return id == ref.alias or id == ref.name or (ref.schema and (id == ref.schema + "." + ref.name)) 483 | 484 | 485 | def _allow_join_condition(statement): 486 | """ 487 | Tests if a join condition should be suggested 488 | 489 | We need this to avoid bad suggestions when entering e.g. 490 | select * from tbl1 a join tbl2 b on a.id = 491 | So check that the preceding token is a ON, AND, or OR keyword, instead of 492 | e.g. an equals sign. 493 | 494 | :param statement: an sqlparse.sql.Statement 495 | :return: boolean 496 | """ 497 | 498 | if not statement or not statement.tokens: 499 | return False 500 | 501 | last_tok = statement.token_prev(len(statement.tokens))[1] 502 | return last_tok.value.lower() in ("on", "and", "or") 503 | 504 | 505 | def _allow_join(statement): 506 | """ 507 | Tests if a join should be suggested 508 | 509 | We need this to avoid bad suggestions when entering e.g. 510 | select * from tbl1 a join tbl2 b 511 | So check that the preceding token is a JOIN keyword 512 | 513 | :param statement: an sqlparse.sql.Statement 514 | :return: boolean 515 | """ 516 | 517 | if not statement or not statement.tokens: 518 | return False 519 | 520 | last_tok = statement.token_prev(len(statement.tokens))[1] 521 | return last_tok.value.lower().endswith("join") and last_tok.value.lower() not in ( 522 | "cross join", 523 | "natural join", 524 | ) 525 | 526 | 527 | Match = namedtuple("Match", ["completion", "priority"]) 528 | _SchemaObject = namedtuple("SchemaObject", ["name", "schema", "function"]) 529 | 530 | 531 | def SchemaObject(name, schema=None, function=False): 532 | return _SchemaObject(name, schema, function) 533 | 534 | 535 | _Candidate = namedtuple("Candidate", ["completion", "prio", "meta", "synonyms", "prio2"]) 536 | 537 | 538 | def Candidate(completion, prio=None, meta=None, synonyms=None, prio2=None): 539 | return _Candidate(completion, prio, meta, synonyms or [completion], prio2) 540 | 541 | 542 | def normalize_ref(ref): 543 | return ref if ref[0] == '"' else '"' + ref.lower() + '"' 544 | 545 | 546 | def generate_alias(tbl): 547 | """Generate a table alias, consisting of all upper-case letters in 548 | the table name, or, if there are no upper-case letters, the first letter + 549 | all letters preceded by _ 550 | param tbl - unescaped name of the table to alias 551 | """ 552 | return "".join( 553 | [c for c in tbl if c.isupper()] or [c for c, prev in zip(tbl, "_" + tbl) if prev == "_" and c != "_"] 554 | ) 555 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/meta.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | ColumnMetadata = namedtuple("ColumnMetadata", ["name", "datatype", "foreignkeys"]) 4 | ForeignKey = namedtuple( 5 | "ForeignKey", 6 | [ 7 | "parentschema", 8 | "parenttable", 9 | "parentcolumn", 10 | "childschema", 11 | "childtable", 12 | "childcolumn", 13 | ], 14 | ) 15 | TableMetadata = namedtuple("TableMetadata", "name columns") 16 | 17 | 18 | class FunctionMetadata(object): 19 | def __init__( 20 | self, 21 | schema_name, 22 | func_name, 23 | arg_names, 24 | arg_types, 25 | arg_modes, 26 | return_type, 27 | is_aggregate, 28 | is_window, 29 | is_set_returning, 30 | ): 31 | """Class for describing a postgresql function""" 32 | 33 | self.schema_name = schema_name 34 | self.func_name = func_name 35 | 36 | self.arg_modes = tuple(arg_modes) if arg_modes else None 37 | self.arg_names = tuple(arg_names) if arg_names else None 38 | 39 | # Be flexible in not requiring arg_types -- use None as a placeholder 40 | # for each arg. (Used for compatibility with old versions of postgresql 41 | # where such info is hard to get. 42 | if arg_types: 43 | self.arg_types = tuple(arg_types) 44 | elif arg_modes: 45 | self.arg_types = tuple([None] * len(arg_modes)) 46 | elif arg_names: 47 | self.arg_types = tuple([None] * len(arg_names)) 48 | else: 49 | self.arg_types = None 50 | 51 | self.return_type = return_type.strip() 52 | self.is_aggregate = is_aggregate 53 | self.is_window = is_window 54 | self.is_set_returning = is_set_returning 55 | 56 | def __eq__(self, other): 57 | return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ 58 | 59 | def __ne__(self, other): 60 | return not self.__eq__(other) 61 | 62 | def __hash__(self): 63 | return hash( 64 | ( 65 | self.schema_name, 66 | self.func_name, 67 | self.arg_names, 68 | self.arg_types, 69 | self.arg_modes, 70 | self.return_type, 71 | self.is_aggregate, 72 | self.is_window, 73 | self.is_set_returning, 74 | ) 75 | ) 76 | 77 | def __repr__(self): 78 | return ( 79 | "%s(schema_name=%r, func_name=%r, arg_names=%r, arg_types=%r, arg_modes=%r, " 80 | "return_type=%r, is_aggregate=%r, is_window=%r, is_set_returning=%r)" 81 | ) % ( 82 | self.__class__.__name__, 83 | self.schema_name, 84 | self.func_name, 85 | self.arg_names, 86 | self.arg_types, 87 | self.arg_modes, 88 | self.return_type, 89 | self.is_aggregate, 90 | self.is_window, 91 | self.is_set_returning, 92 | ) 93 | 94 | def fields(self): 95 | """Returns a list of output-field ColumnMetadata namedtuples""" 96 | 97 | if self.return_type.lower() == "void": 98 | return [] 99 | elif not self.arg_modes: 100 | # For functions without output parameters, the function name 101 | # is used as the name of the output column. 102 | # E.g. 'SELECT unnest FROM unnest(...);' 103 | return [ColumnMetadata(self.func_name, self.return_type, [])] 104 | 105 | return [ 106 | ColumnMetadata(n, t, []) 107 | for n, t, m in zip(self.arg_names, self.arg_types, self.arg_modes) 108 | if m in ("o", "b", "t") 109 | ] # OUT, INOUT, TABLE 110 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/tables.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from collections import namedtuple 4 | 5 | import sqlparse 6 | from sqlparse.sql import Function, Identifier, IdentifierList 7 | from sqlparse.tokens import DML, Keyword, Punctuation 8 | 9 | TableReference = namedtuple("TableReference", ["schema", "name", "alias", "is_function"]) 10 | TableReference.ref = property( 11 | lambda self: self.alias or (self.name if self.name.islower() or self.name[0] == '"' else '"' + self.name + '"') 12 | ) 13 | 14 | 15 | # This code is borrowed from sqlparse example script. 16 | # 17 | def is_subselect(parsed): 18 | if not parsed.is_group: 19 | return False 20 | for item in parsed.tokens: 21 | if item.ttype is DML and item.value.upper() in ("SELECT", "INSERT", "CREATE"): 22 | return True 23 | return False 24 | 25 | 26 | def _identifier_is_function(identifier): 27 | return any(isinstance(t, Function) for t in identifier.tokens) 28 | 29 | 30 | def extract_from_part(parsed, stop_at_punctuation=True): 31 | tbl_prefix_seen = False 32 | for item in parsed.tokens: 33 | if tbl_prefix_seen: 34 | if is_subselect(item): 35 | for x in extract_from_part(item, stop_at_punctuation): 36 | yield x 37 | elif stop_at_punctuation and item.ttype is Punctuation: 38 | raise StopIteration 39 | # An incomplete nested select won't be recognized correctly as a 40 | # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes 41 | # the second FROM to trigger this elif condition resulting in a 42 | # StopIteration. So we need to ignore the keyword if the keyword 43 | # FROM. 44 | # Also 'SELECT * FROM abc JOIN def' will trigger this elif 45 | # condition. So we need to ignore the keyword JOIN and its variants 46 | # INNER JOIN, FULL OUTER JOIN, etc. 47 | elif ( 48 | item.ttype is Keyword 49 | and (not item.value.upper() == "FROM") 50 | and (not item.value.upper().endswith("JOIN")) 51 | ): 52 | tbl_prefix_seen = False 53 | else: 54 | yield item 55 | elif item.ttype is Keyword or item.ttype is Keyword.DML: 56 | item_val = item.value.upper() 57 | if item_val in ( 58 | "COPY", 59 | "FROM", 60 | "INTO", 61 | "UPDATE", 62 | "TABLE", 63 | ) or item_val.endswith("JOIN"): 64 | tbl_prefix_seen = True 65 | # 'SELECT a, FROM abc' will detect FROM as part of the column list. 66 | # So this check here is necessary. 67 | elif isinstance(item, IdentifierList): 68 | for identifier in item.get_identifiers(): 69 | if identifier.ttype is Keyword and identifier.value.upper() == "FROM": 70 | tbl_prefix_seen = True 71 | break 72 | 73 | 74 | def extract_table_identifiers(token_stream, allow_functions=True): 75 | """Yields tuples of TableReference namedtuples.""" 76 | 77 | # We need to do some massaging of the names because postgres is case- 78 | # insensitive and '"Foo"' is not the same table as 'Foo' (while 'foo' is) 79 | def parse_identifier(item): 80 | name = item.get_real_name() 81 | schema_name = item.get_parent_name() 82 | alias = item.get_alias() 83 | if not name: 84 | schema_name = None 85 | name = item.get_name() 86 | alias = alias or name 87 | schema_quoted = schema_name and item.value[0] == '"' 88 | if schema_name and not schema_quoted: 89 | schema_name = schema_name.lower() 90 | quote_count = item.value.count('"') 91 | name_quoted = quote_count > 2 or (quote_count and not schema_quoted) 92 | alias_quoted = alias and item.value[-1] == '"' 93 | if alias_quoted or name_quoted and not alias and name.islower(): 94 | alias = '"' + (alias or name) + '"' 95 | if name and not name_quoted and not name.islower(): 96 | if not alias: 97 | alias = name 98 | name = name.lower() 99 | return schema_name, name, alias 100 | 101 | for item in token_stream: 102 | if isinstance(item, IdentifierList): 103 | for identifier in item.get_identifiers(): 104 | # Sometimes Keywords (such as FROM ) are classified as 105 | # identifiers which don't have the get_real_name() method. 106 | try: 107 | schema_name = identifier.get_parent_name() 108 | real_name = identifier.get_real_name() 109 | is_function = allow_functions and _identifier_is_function(identifier) 110 | except AttributeError: 111 | continue 112 | if real_name: 113 | yield TableReference(schema_name, real_name, identifier.get_alias(), is_function) 114 | elif isinstance(item, Identifier): 115 | schema_name, real_name, alias = parse_identifier(item) 116 | is_function = allow_functions and _identifier_is_function(item) 117 | 118 | yield TableReference(schema_name, real_name, alias, is_function) 119 | elif isinstance(item, Function): 120 | schema_name, real_name, alias = parse_identifier(item) 121 | yield TableReference(None, real_name, alias, allow_functions) 122 | 123 | 124 | # extract_tables is inspired from examples in the sqlparse lib. 125 | def extract_tables(sql): 126 | """ 127 | Extract the table names from an SQL statment. 128 | Returns a list of TableReference namedtuples. 129 | """ 130 | parsed = sqlparse.parse(sql) 131 | if not parsed: 132 | return () 133 | 134 | # INSERT statements must stop looking for tables at the sign of first 135 | # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2) 136 | # abc is the table name, but if we don't stop at the first lparen, then 137 | # we'll identify abc, col1 and col2 as table names. 138 | insert_stmt = parsed[0].token_first().value.lower() == "insert" 139 | stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt) 140 | 141 | # Kludge: sqlparse mistakenly identifies insert statements as 142 | # function calls due to the parenthesized column list, e.g. interprets 143 | # "insert into foo (bar, baz)" as a function call to foo with arguments 144 | # (bar, baz). So don't allow any identifiers in insert statements 145 | # to have is_function=True 146 | identifiers = extract_table_identifiers(stream, allow_functions=not insert_stmt) 147 | # In the case 'sche.', we get an empty TableReference; remove that 148 | return tuple(i for i in identifiers if i.name) 149 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/parseutils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import re 4 | 5 | import sqlparse 6 | from sqlparse.sql import Identifier 7 | from sqlparse.tokens import Error, Token 8 | 9 | cleanup_regex = { 10 | # This matches only alphanumerics and underscores. 11 | "alphanum_underscore": re.compile(r"(\w+)$"), 12 | # This matches everything except spaces, parens, colon, and comma 13 | "many_punctuations": re.compile(r"([^():,\s]+)$"), 14 | # This matches everything except spaces, parens, colon, comma, and period 15 | "most_punctuations": re.compile(r"([^\.():,\s]+)$"), 16 | # This matches everything except a space. 17 | "all_punctuations": re.compile(r"([^\s]+)$"), 18 | } 19 | 20 | 21 | def last_word(text, include="alphanum_underscore"): 22 | """ 23 | Find the last word in a sentence. 24 | """ 25 | 26 | if not text: # Empty string 27 | return "" 28 | 29 | if text[-1].isspace(): 30 | return "" 31 | else: 32 | regex = cleanup_regex[include] 33 | matches = regex.search(text) 34 | if matches: 35 | return matches.group(0) 36 | else: 37 | return "" 38 | 39 | 40 | def find_prev_keyword(sql, n_skip=0): 41 | """ 42 | Find the last sql keyword in an SQL statement. 43 | 44 | Returns the value of the last keyword, and the text of the query with 45 | everything after the last keyword stripped. 46 | """ 47 | if not sql.strip(): 48 | return None, "" 49 | 50 | parsed = sqlparse.parse(sql)[0] 51 | flattened = list(parsed.flatten()) 52 | flattened = flattened[: len(flattened) - n_skip] 53 | 54 | logical_operators = ("AND", "OR", "NOT", "BETWEEN") 55 | 56 | for t in reversed(flattened): 57 | if t.value == "(" or (t.is_keyword and (t.value.upper() not in logical_operators)): 58 | # Find the location of token t in the original parsed statement 59 | # We can't use parsed.token_index(t) because t may be a child token 60 | # inside a TokenList, in which case token_index thows an error 61 | # Minimal example: 62 | # p = sqlparse.parse('select * from foo where bar') 63 | # t = list(p.flatten())[-3] # The "Where" token 64 | # p.token_index(t) # Throws ValueError: not in list 65 | idx = flattened.index(t) 66 | 67 | # Combine the string values of all tokens in the original list 68 | # up to and including the target keyword token t, to produce a 69 | # query string with everything after the keyword token removed 70 | text = "".join(tok.value for tok in flattened[: idx + 1]) 71 | return t, text 72 | 73 | return None, "" 74 | 75 | 76 | # Postgresql dollar quote signs look like `$$` or `$tag$` 77 | dollar_quote_regex = re.compile(r"^\$[^$]*\$$") 78 | 79 | 80 | def is_open_quote(sql): 81 | """Returns true if the query contains an unclosed quote.""" 82 | 83 | # parsed can contain one or more semi-colon separated commands 84 | parsed = sqlparse.parse(sql) 85 | return any(_parsed_is_open_quote(p) for p in parsed) 86 | 87 | 88 | def _parsed_is_open_quote(parsed): 89 | # Look for unmatched single quotes, or unmatched dollar sign quotes 90 | return any(tok.match(Token.Error, ("'", "$")) for tok in parsed.flatten()) 91 | 92 | 93 | def parse_partial_identifier(word): 94 | """ 95 | Attempt to parse a (partially typed) word as an identifier. 96 | 97 | word may include a schema qualification, like `schema_name.partial_name` 98 | or `schema_name.` There may also be unclosed quotation marks, like 99 | `"schema` or `schema."partial_name`. 100 | 101 | :param word: string representing a (partially complete) identifier 102 | :return: sqlparse.sql.Identifier, or None 103 | """ 104 | 105 | p = sqlparse.parse(word)[0] 106 | n_tok = len(p.tokens) 107 | if n_tok == 1 and isinstance(p.tokens[0], Identifier): 108 | return p.tokens[0] 109 | elif p.token_next_by(m=(Error, '"'))[1]: 110 | # An unmatched double quote, e.g. '"foo', 'foo."', or 'foo."bar' 111 | # Close the double quote, then reparse 112 | return parse_partial_identifier(word + '"') 113 | else: 114 | return None 115 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/prompt.py: -------------------------------------------------------------------------------- 1 | from prompt_toolkit.application import get_app 2 | from prompt_toolkit.buffer import Buffer 3 | from prompt_toolkit.enums import DEFAULT_BUFFER, SEARCH_BUFFER 4 | from prompt_toolkit.filters import Condition, HasFocus 5 | from prompt_toolkit.key_binding import KeyBindings 6 | from prompt_toolkit.keys import Keys 7 | from pygments.token import Token 8 | 9 | from clickhouse_cli.clickhouse.definitions import INTERNAL_COMMANDS 10 | from clickhouse_cli.ui.completer import CHCompleter 11 | 12 | # from prompt_toolkit.formatted_text import PygmentsTokens 13 | 14 | 15 | kb = KeyBindings() 16 | 17 | 18 | def is_multiline(multiline): 19 | @Condition 20 | def cond(): 21 | doc = get_app().layout.get_buffer_by_name(DEFAULT_BUFFER).document 22 | if not multiline: 23 | return False 24 | else: 25 | return not query_is_finished(doc.text) 26 | 27 | return cond 28 | 29 | 30 | class CLIBuffer(Buffer): 31 | def __init__(self, client, multiline, metadata, *args, **kwargs): 32 | super(CLIBuffer, self).__init__( 33 | *args, 34 | completer=CHCompleter(client, metadata), 35 | enable_history_search=True, 36 | # doesn't seem to have any effect on prompt_toolkit 2.x's PromptSession 37 | # multiline=is_multiline(multiline), 38 | **kwargs, 39 | ) 40 | 41 | 42 | def query_is_finished(text, multiline=False): 43 | text = text.strip() 44 | return (not multiline and text == "") or text.endswith(";") or text in INTERNAL_COMMANDS 45 | 46 | 47 | def get_prompt_tokens(*args): 48 | return [ 49 | (Token.Prompt, " :) "), 50 | ] 51 | 52 | 53 | def get_continuation_tokens(*args): 54 | return [(Token.Prompt, " ] ")] 55 | 56 | 57 | @kb.add(Keys.ControlC, filter=HasFocus(DEFAULT_BUFFER)) 58 | def reset_buffer(event): 59 | buffer = event.app.current_buffer 60 | if buffer.complete_state: 61 | buffer.cancel_completion() 62 | else: 63 | buffer.reset() 64 | 65 | 66 | @kb.add(Keys.ControlC, filter=HasFocus(SEARCH_BUFFER)) 67 | def reset_search_buffer(event): 68 | buffer = event.app.current_buffer 69 | if buffer.document.text: 70 | buffer.reset() 71 | else: 72 | event.cli.push_focus(DEFAULT_BUFFER) 73 | 74 | 75 | @kb.add("tab") 76 | def autocomplete(event): 77 | """Force autocompletion at cursor.""" 78 | buffer = event.app.current_buffer 79 | if buffer.complete_state: 80 | buffer.complete_next() 81 | else: 82 | buffer.start_completion(select_first=True) 83 | 84 | 85 | @kb.add("c-space") 86 | def autocomplete_ctrl_space(event): 87 | buffer = event.app.current_buffer 88 | if buffer.complete_state: 89 | buffer.complete_next() 90 | else: 91 | buffer.start_completion(select_first=False) 92 | -------------------------------------------------------------------------------- /clickhouse_cli/ui/style.py: -------------------------------------------------------------------------------- 1 | from click import echo_via_pager, secho 2 | from prompt_toolkit.styles.pygments import style_from_pygments_cls 3 | from pygments.style import Style 4 | from pygments.styles import get_style_by_name 5 | from pygments.token import Comment, Error, Generic, Keyword, Name, Number, Operator, String, Token, Whitespace 6 | 7 | RED = "#cb0f1e" 8 | ORANGE = "#de9014" 9 | YELLOW = "#e6cd09" 10 | GREEN = "#21aa52" 11 | AQUA = "#41c2b7" 12 | BLUE = "#387be8" 13 | PURPLE = "#860093" 14 | 15 | 16 | class CHPygmentsStyleDefault(Style): 17 | background_color = "#202020" 18 | highlight_color = "#404040" 19 | 20 | styles = { 21 | Token: "#d0d0d0", 22 | Whitespace: "#666666", 23 | Comment: "italic #999999", 24 | Comment.Preproc: "noitalic bold #cd2828", 25 | Comment.Special: "noitalic bold #e50808 bg:#520000", 26 | Keyword: "bold #6ab825", 27 | Keyword.Pseudo: "nobold", 28 | Operator.Word: "bold #6ab825", 29 | String: "#ed9d13", 30 | String.Other: "#ffa500", 31 | Number: "#3677a9", 32 | Name.Builtin: "#24909d", 33 | Name.Variable: "#40ffff", 34 | Name.Constant: "#40ffff", 35 | Name.Class: "underline #447fcf", 36 | Name.Function: "#447fcf", 37 | Name.Namespace: "underline #447fcf", 38 | Name.Exception: "#bbbbbb", 39 | Name.Tag: "bold #6ab825", 40 | Name.Attribute: "#bbbbbb", 41 | Name.Decorator: "#ffa500", 42 | Generic.Heading: "bold #ffffff", 43 | Generic.Subheading: "underline #ffffff", 44 | Generic.Deleted: "#d22323", 45 | Generic.Inserted: "#589819", 46 | Generic.Error: "#d22323", 47 | Generic.Emph: "italic", 48 | Generic.Strong: "bold", 49 | Generic.Prompt: "#eeeeee", 50 | Generic.Output: "#ffffff", 51 | Generic.Traceback: "#d22323", 52 | Error: "bg:#e3d2d2 #a61717", 53 | } 54 | 55 | 56 | def get_ch_pygments_style(theme=None): 57 | if theme is not None: 58 | return get_style_by_name(theme) 59 | return CHPygmentsStyleDefault 60 | 61 | 62 | def get_ch_style(theme=None): 63 | return style_from_pygments_cls(get_ch_pygments_style(theme)) 64 | 65 | 66 | class Echo(object): 67 | def __init__(self, verbose=True, colors=True): 68 | self.verbose = verbose 69 | self.colors = colors 70 | 71 | def _echo(self, *args, **kwargs): 72 | if not self.colors: 73 | kwargs.pop("fg", None) 74 | if self.verbose: 75 | return secho(*args, **kwargs) 76 | 77 | def info(self, text, *args, **kwargs): 78 | self._echo(text, *args, **kwargs) 79 | 80 | def success(self, text, *args, **kwargs): 81 | self._echo(text, fg="green", *args, **kwargs) 82 | 83 | def warning(self, text, *args, **kwargs): 84 | self._echo(text, fg="yellow", *args, **kwargs) 85 | 86 | def error(self, text, *args, **kwargs): 87 | secho(text, fg="red", *args, **kwargs) 88 | 89 | def print(self, *args, **kwargs): 90 | if self.verbose: 91 | return print(*args, **kwargs) 92 | 93 | def pager(self, text, end=None): 94 | return echo_via_pager(text) 95 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "clickhouse-cli" 7 | description = "A third-party client for the Clickhouse DBMS server." 8 | authors = [{name = "Igor Hatarist", email = "igor@hatari.st"}] 9 | readme = "README.md" 10 | dynamic = ["version"] 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3 :: Only", 19 | "Programming Language :: Python :: 3.7", 20 | "Programming Language :: Python :: 3.8", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "Programming Language :: Python :: 3.11", 24 | "Programming Language :: Python :: Implementation :: PyPy", 25 | "Topic :: Database", 26 | "Topic :: Software Development", 27 | ] 28 | requires-python = ">=3.7" 29 | dependencies = [ 30 | "click>=6.6", 31 | "prompt-toolkit>=2.0", 32 | "pygments>=2.1.3", 33 | "requests>=2.11.1", 34 | "sqlparse>=0.2.2,<0.4.4", 35 | ] 36 | 37 | [project.urls] 38 | Home = "https://github.com/hatarist/clickhouse-cli" 39 | Documentation = "https://github.com/hatarist/clickhouse-cli" 40 | Source = "https://github.com/hatarist/clickhouse-cli" 41 | Tracker = "https://github.com/hatarist/clickhouse-cli/issues" 42 | 43 | [project.scripts] 44 | clickhouse-cli = "clickhouse_cli.cli:run_cli" 45 | 46 | [project.optional-dependencies] 47 | dev = [ 48 | "flake8", 49 | "build", 50 | ] 51 | test = [ 52 | "pytest", 53 | "pytest-cov", 54 | ] 55 | doc = [ 56 | "sphinx", 57 | ] 58 | tox = [ 59 | "virtualenv", 60 | "tox", 61 | ] 62 | 63 | [tool.flit.sdist] 64 | include = [ 65 | "tests/*.py", 66 | "LICENSE.txt", 67 | "Makefile", 68 | "tox.ini", 69 | ] 70 | 71 | [tool.isort] 72 | profile = "black" 73 | # line_length=119 74 | # indent=' ' 75 | # multi_line_output=5 76 | 77 | [tool.black] 78 | line-length = 119 79 | target-version = ['py37'] 80 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click>=6.6 2 | prompt-toolkit>=3.0 3 | pygments>=2.1.3 4 | requests>=2.11.1 5 | sqlparse>=0.2.2,<0.4.4 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description_file=README.md 3 | 4 | [flake8] 5 | extend-ignore = E203 6 | max-line-length = 120 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from clickhouse_cli import __version__ 3 | 4 | setup( 5 | version=__version__, 6 | long_description='', 7 | package_data={'clickhouse_cli': ['clickhouse-cli.rc.sample']}, 8 | packages=[ 9 | 'clickhouse_cli', 10 | 'clickhouse_cli.clickhouse', 11 | 'clickhouse_cli.ui', 12 | 'clickhouse_cli.ui.parseutils', 13 | ], 14 | zip_safe=False, 15 | ) 16 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatarist/clickhouse-cli/81450224395407e101bb0a219c5b85c2e6ad7594/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from clickhouse_cli.cli import run_cli 4 | 5 | 6 | def test_main_help(): 7 | # Call with the --help option as a basic sanity check. 8 | with pytest.raises(SystemExit) as exinfo: 9 | run_cli(["--help", ]) 10 | assert exinfo.value.code == 0 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skip_missing_interpreters = True 3 | envlist = 4 | py37 5 | py38 6 | py39 7 | py310 8 | py311 9 | flake8 10 | 11 | [testenv] 12 | deps = 13 | pytest 14 | pytest-cov 15 | commands = 16 | pytest --cov=clickhouse_cli {posargs} 17 | 18 | [testenv:flake8] 19 | deps = 20 | flake8 21 | commands = 22 | flake8 clickhouse_cli tests setup.py 23 | 24 | [flake8] 25 | extend-ignore = E501 26 | --------------------------------------------------------------------------------