├── .github ├── FUNDING.yml └── workflows │ └── build-and-package.yaml ├── .gitignore ├── LICENSE ├── QSQL-NOTES.md ├── README.markdown ├── benchmark-config.sh ├── bin ├── .qrc ├── __init__.py ├── q.bat └── q.py ├── conftest.py ├── dist ├── fpm-config ├── test-rpm-inside-container.sh ├── test-using-deb.sh └── test-using-rpm.sh ├── doc ├── AUTHORS ├── IMPLEMENTATION.markdown ├── LICENSE ├── RATIONALE.markdown ├── THANKS └── USAGE.markdown ├── examples ├── EXAMPLES.markdown ├── exampledatafile └── group-emails-example ├── mkdocs ├── README.md ├── docs │ ├── .DS_Store │ ├── about.md │ ├── fsg9b9b1.txt │ ├── google0efeb4ff0a886e81.html │ ├── img │ │ ├── bg_hr.png │ │ ├── blacktocat.png │ │ ├── icon_download.png │ │ ├── q-logo.png │ │ ├── q-logo1.ico │ │ ├── q-logo1.png │ │ ├── sprite_download.png │ │ ├── sprite_download3.png │ │ ├── sprite_download4.png │ │ └── torii-favicon.ico │ ├── index.md │ ├── index_cn.md │ ├── js │ │ └── google-analytics.js │ └── stylesheets │ │ └── extra.css ├── generate-web-site.sh ├── mkdocs.yml ├── requirements.txt └── theme │ └── main.html ├── prepare-benchmark-env ├── pyoxidizer.bzl ├── pytest.ini ├── requirements.txt ├── run-benchmark ├── run-coverage.sh ├── run-tests.sh ├── setup.py ├── test-requirements.txt └── test ├── BENCHMARK.md ├── __init__.py ├── benchmark-results └── source-files-1443b7418b46594ad256abd9db4a7671cb251e6a │ └── 2020-09-17-v2.0.17 │ ├── octosql_v0.3.0.benchmark-results │ ├── q-benchmark-2.7.18.benchmark-results │ ├── q-benchmark-3.6.4.benchmark-results │ ├── q-benchmark-3.7.9.benchmark-results │ ├── q-benchmark-3.8.5.benchmark-results │ ├── summary.benchmark-results │ └── textql_2.0.3.benchmark-results └── test_suite.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: harelba 4 | -------------------------------------------------------------------------------- /.github/workflows/build-and-package.yaml: -------------------------------------------------------------------------------- 1 | name: BuildAndPackage 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | branches: master 8 | pull_request: 9 | branches: master 10 | paths-ignore: 11 | - "*.md" 12 | - "*.markdown" 13 | - "mkdocs/**/*" 14 | tags-ignore: 15 | - "*" 16 | 17 | jobs: 18 | version_info: 19 | runs-on: ubuntu-18.04 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v2 23 | - id: vars 24 | run: | 25 | set -x -e 26 | 27 | echo "github event ref is ${{ github.ref }}" 28 | 29 | if [ "x${{ startsWith(github.ref, 'refs/tags/v') }}" == "xtrue" ] 30 | then 31 | echo "Trigger was a version tag - ${{ github.ref }}" 32 | echo ::set-output name=q_version::${GITHUB_REF#refs/tags/v} 33 | echo ::set-output name=is_release::true 34 | else 35 | # For testing version propagation inside the PR 36 | echo "Either branch of a non-version tag - setting version to 0.0.0" 37 | echo ::set-output name=q_version::0.0.0 38 | echo ::set-output name=is_release::false 39 | fi 40 | 41 | outputs: 42 | q_version: ${{ steps.vars.outputs.q_version }} 43 | is_release: ${{ steps.vars.outputs.is_release }} 44 | 45 | check_version_info: 46 | runs-on: ubuntu-18.04 47 | needs: version_info 48 | steps: 49 | - name: test q_version 50 | run: | 51 | set -e -x 52 | 53 | echo "outputs: ${{ toJson(needs.version_info) }}" 54 | 55 | create-man: 56 | runs-on: ubuntu-18.04 57 | steps: 58 | - name: Checkout 59 | uses: actions/checkout@v2 60 | - name: Install Ruby 61 | uses: ruby/setup-ruby@v1 62 | with: 63 | ruby-version: '2.6' 64 | - name: Create man page 65 | run: | 66 | set -x -e 67 | gem install ronn 68 | 69 | ronn doc/USAGE.markdown 70 | # Must be gzipped, otherwise debian does not install it 71 | gzip doc/USAGE 72 | - name: Upload man page 73 | uses: actions/upload-artifact@v1.0.0 74 | with: 75 | name: q-man-page 76 | path: doc/USAGE.gz 77 | 78 | build-linux: 79 | runs-on: ubuntu-18.04 80 | steps: 81 | - name: Checkout 82 | uses: actions/checkout@v2 83 | - name: Cache pyox 84 | uses: actions/cache@v2 85 | with: 86 | path: | 87 | ~/.cache/pyoxidizer 88 | key: ${{ runner.os }}-pyox 89 | - name: Install pyoxidizer 90 | run: | 91 | set -e -x 92 | 93 | sudo apt-get update 94 | sudo apt-get install -y zip sqlite3 rpm 95 | 96 | curl -o pyoxidizer.zip -L "https://github.com/indygreg/PyOxidizer/releases/download/pyoxidizer%2F0.17/pyoxidizer-0.17.0-linux_x86_64.zip" 97 | unzip pyoxidizer.zip 98 | chmod +x ./pyoxidizer 99 | - name: Create Q Executable - Linux 100 | run: | 101 | set -e -x 102 | 103 | ./pyoxidizer build --release 104 | 105 | export Q_EXECUTABLE=./build/x86_64-unknown-linux-gnu/release/install/q 106 | chmod 755 $Q_EXECUTABLE 107 | 108 | seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite 109 | 110 | mkdir -p packages/linux/ 111 | cp $Q_EXECUTABLE packages/linux/linux-q 112 | - name: Upload Linux Executable 113 | uses: actions/upload-artifact@v1.0.0 114 | with: 115 | name: linux-q 116 | path: packages/linux/linux-q 117 | 118 | test-linux: 119 | needs: build-linux 120 | runs-on: ubuntu-18.04 121 | steps: 122 | - name: Checkout 123 | uses: actions/checkout@v2 124 | - name: Install Python for Testing 125 | uses: actions/setup-python@v2 126 | with: 127 | python-version: '3.8.12' 128 | architecture: 'x64' 129 | - name: Prepare Testing 130 | run: | 131 | set -e -x 132 | 133 | pip3 install -r test-requirements.txt 134 | - name: Download Linux Executable 135 | uses: actions/download-artifact@v2 136 | with: 137 | name: linux-q 138 | - name: Run Tests on Linux Executable 139 | run: | 140 | set -x -e 141 | 142 | find ./ -ls 143 | 144 | chmod 755 ./linux-q 145 | 146 | Q_EXECUTABLE=`pwd`/linux-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v 147 | 148 | package-linux-deb: 149 | needs: [test-linux, create-man, version_info] 150 | runs-on: ubuntu-18.04 151 | steps: 152 | - name: Checkout 153 | uses: actions/checkout@v2 154 | - name: Install Ruby 155 | uses: ruby/setup-ruby@v1 156 | with: 157 | ruby-version: '2.6' 158 | - name: Downoad man page 159 | uses: actions/download-artifact@v2 160 | with: 161 | name: q-man-page 162 | - name: Download Linux Executable 163 | uses: actions/download-artifact@v2 164 | with: 165 | name: linux-q 166 | - name: Build DEB Package 167 | run: | 168 | set -e -x 169 | 170 | mkdir -p packages/linux/ 171 | 172 | find ./ -ls 173 | 174 | chmod 755 ./linux-q 175 | 176 | export q_version=${{ needs.version_info.outputs.q_version }} 177 | 178 | gem install fpm 179 | cp dist/fpm-config ~/.fpm 180 | fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-${q_version}-1.x86_64.deb --version ${q_version} ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz 181 | - name: Upload DEB Package 182 | uses: actions/upload-artifact@v1.0.0 183 | with: 184 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb 185 | path: packages/linux/q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb 186 | 187 | test-deb-packaging: 188 | runs-on: ubuntu-18.04 189 | needs: [package-linux-deb, version_info] 190 | steps: 191 | - name: Checkout 192 | uses: actions/checkout@v2 193 | - name: Download DEB 194 | uses: actions/download-artifact@v2 195 | with: 196 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb 197 | - name: Install Python for Testing 198 | uses: actions/setup-python@v2 199 | with: 200 | python-version: '3.8.12' 201 | architecture: 'x64' 202 | - name: Prepare Testing 203 | run: | 204 | set -e -x 205 | 206 | pip3 install -r test-requirements.txt 207 | - name: Test DEB Package Installation 208 | run: ./dist/test-using-deb.sh ./q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb 209 | 210 | package-linux-rpm: 211 | needs: [test-linux, create-man, version_info] 212 | runs-on: ubuntu-18.04 213 | steps: 214 | - name: Checkout 215 | uses: actions/checkout@v2 216 | - name: Install Ruby 217 | uses: ruby/setup-ruby@v1 218 | with: 219 | ruby-version: '2.6' 220 | - name: Download man page 221 | uses: actions/download-artifact@v2 222 | with: 223 | name: q-man-page 224 | - name: Download Linux Executable 225 | uses: actions/download-artifact@v2 226 | with: 227 | name: linux-q 228 | - name: Build RPM Package 229 | run: | 230 | set -e -x 231 | 232 | mkdir -p packages/linux 233 | 234 | 235 | chmod 755 ./linux-q 236 | 237 | export q_version=${{ needs.version_info.outputs.q_version }} 238 | 239 | gem install fpm 240 | cp dist/fpm-config ~/.fpm 241 | fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-${q_version}.x86_64.rpm --version ${q_version} ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz 242 | - name: Upload RPM Package 243 | uses: actions/upload-artifact@v1.0.0 244 | with: 245 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm 246 | path: packages/linux/q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm 247 | 248 | test-rpm-packaging: 249 | runs-on: ubuntu-18.04 250 | needs: [package-linux-rpm, version_info] 251 | steps: 252 | - name: Checkout 253 | uses: actions/checkout@v2 254 | - name: Download RPM 255 | uses: actions/download-artifact@v2 256 | with: 257 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm 258 | - name: Retest using RPM 259 | run: ./dist/test-using-rpm.sh ./q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm 260 | 261 | build-mac: 262 | runs-on: macos-11 263 | steps: 264 | - name: Checkout 265 | uses: actions/checkout@v2 266 | - name: Cache pyox 267 | uses: actions/cache@v2 268 | with: 269 | path: | 270 | ~/.cache/pyoxidizer 271 | key: ${{ runner.os }}-pyox 272 | - name: Install pyoxidizer 273 | run: | 274 | set -e -x 275 | 276 | curl -o pyoxidizer.zip -L "https://github.com/indygreg/PyOxidizer/releases/download/pyoxidizer%2F0.17/pyoxidizer-0.17.0-macos-universal.zip" 277 | unzip pyoxidizer.zip 278 | mv macos-universal/pyoxidizer ./pyoxidizer 279 | 280 | chmod +x ./pyoxidizer 281 | - name: Create Q Executable - Mac 282 | run: | 283 | set -e -x 284 | 285 | ./pyoxidizer build --release 286 | 287 | export Q_EXECUTABLE=./build/x86_64-apple-darwin/release/install/q 288 | chmod 755 $Q_EXECUTABLE 289 | 290 | seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite 291 | 292 | mkdir -p packages/macos/ 293 | cp $Q_EXECUTABLE packages/macos/macos-q 294 | - name: Upload MacOS Executable 295 | uses: actions/upload-artifact@v1.0.0 296 | with: 297 | name: macos-q 298 | path: packages/macos/macos-q 299 | 300 | test-mac: 301 | needs: build-mac 302 | runs-on: macos-11 303 | steps: 304 | - name: Checkout 305 | uses: actions/checkout@v2 306 | - name: Install Python for Testing 307 | uses: actions/setup-python@v2 308 | with: 309 | python-version: '3.8.12' 310 | architecture: 'x64' 311 | - name: Prepare Testing 312 | run: | 313 | set -e -x 314 | 315 | pip3 install wheel 316 | 317 | pip3 install -r test-requirements.txt 318 | - name: Download MacOS Executable 319 | uses: actions/download-artifact@v2 320 | with: 321 | name: macos-q 322 | - name: Run Tests on MacOS Executable 323 | run: | 324 | set -e -x 325 | 326 | chmod 755 ./macos-q 327 | 328 | Q_EXECUTABLE=`pwd`/macos-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v 329 | 330 | not-package-mac: 331 | # create-man is not needed, as it's generated inside the brew formula independently 332 | needs: [test-mac] 333 | runs-on: macos-11 334 | steps: 335 | - name: Checkout 336 | uses: actions/checkout@v2 337 | - name: Not Packaging Mac 338 | run: | 339 | echo "homebrew mac cannot be packaged from the source code itself, due to the package build process of homebrew. See https://github.com/harelba/homebrew-q" 340 | 341 | not-test-mac-packaging: 342 | needs: not-package-mac 343 | runs-on: macos-11 344 | steps: 345 | - name: Checkout 346 | uses: actions/checkout@v2 347 | - name: Not Testing Mac Packaging 348 | run: | 349 | echo "homebrew mac packaging cannot be tested here, due to the package build process of homebrew. See https://github.com/harelba/homebrew-q" 350 | 351 | build-windows: 352 | runs-on: windows-latest 353 | needs: version_info 354 | steps: 355 | - name: Checkout 356 | uses: actions/checkout@v2 357 | - name: Install MSVC build tools 358 | uses: ilammy/msvc-dev-cmd@v1 359 | - name: Install Python 360 | uses: actions/setup-python@v2 361 | with: 362 | python-version: '3.8.10' 363 | architecture: 'x64' 364 | - name: Install pyoxidizer 365 | shell: bash 366 | run: | 367 | set -x -e 368 | 369 | python3 -V 370 | pip3 -V 371 | 372 | pip3 install pyoxidizer 373 | - name: Create Q Executable - Windows 374 | shell: bash 375 | run: | 376 | set -e -x 377 | 378 | pyoxidizer build --release --var Q_VERSION ${{ needs.version_info.outputs.q_version }} 379 | 380 | export Q_EXECUTABLE=./build/x86_64-pc-windows-msvc/release/install/q 381 | chmod 755 $Q_EXECUTABLE 382 | 383 | seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite 384 | 385 | mkdir -p packages/windows/ 386 | cp $Q_EXECUTABLE packages/windows/win-q.exe 387 | 388 | find ./ -ls 389 | - name: Upload Linux Executable 390 | uses: actions/upload-artifact@v1.0.0 391 | with: 392 | name: win-q.exe 393 | path: packages/windows/win-q.exe 394 | 395 | not-really-test-windows: 396 | needs: build-windows 397 | runs-on: windows-latest 398 | steps: 399 | - name: Checkout 400 | uses: actions/checkout@v2 401 | - name: Install Python for Testing 402 | uses: actions/setup-python@v2 403 | with: 404 | python-version: '3.8' 405 | architecture: 'x64' 406 | - name: Download Windows Executable 407 | uses: actions/download-artifact@v2 408 | with: 409 | name: win-q.exe 410 | - name: Not-Really-Test Windows 411 | shell: bash 412 | continue-on-error: true 413 | run: | 414 | echo "Tests are not compatible with Windows (path separators, tmp folder names etc.). Only a sanity wil be tested" 415 | 416 | chmod +x ./win-q.exe 417 | 418 | seq 1 10000 | ./win-q.exe -c 1 "select sum(c1),count(*) from -" -S some-db.sqlite 419 | 420 | package-windows: 421 | needs: [create-man, not-really-test-windows, version_info] 422 | runs-on: windows-latest 423 | steps: 424 | - name: Checkout 425 | uses: actions/checkout@v2 426 | - name: Install MSVC build tools 427 | uses: ilammy/msvc-dev-cmd@v1 428 | - name: Install Python 429 | uses: actions/setup-python@v2 430 | with: 431 | python-version: '3.8.10' 432 | architecture: 'x64' 433 | - name: Install pyoxidizer 434 | shell: bash 435 | run: | 436 | set -x -e 437 | 438 | python3 -V 439 | pip3 -V 440 | 441 | pip3 install pyoxidizer 442 | - name: Create Q MSI - Windows 443 | shell: bash 444 | run: | 445 | set -e -x 446 | 447 | pyoxidizer build --release msi_installer --var Q_VERSION ${{ needs.version_info.outputs.q_version }} 448 | 449 | export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi 450 | chmod 755 $Q_MSI 451 | 452 | mkdir -p packages/windows/ 453 | cp $Q_MSI packages/windows/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi 454 | 455 | - name: Upload Windows MSI 456 | uses: actions/upload-artifact@v1.0.0 457 | with: 458 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi 459 | path: packages/windows/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi 460 | 461 | test-windows-packaging: 462 | needs: [package-windows, version_info] 463 | runs-on: windows-latest 464 | steps: 465 | - name: Checkout 466 | uses: actions/checkout@v2 467 | - name: Download Windows Package 468 | uses: actions/download-artifact@v2 469 | with: 470 | name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi 471 | - name: Test Install of MSI 472 | continue-on-error: true 473 | shell: powershell 474 | run: | 475 | $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait 476 | $process.ExitCode 477 | gc msi-install.log 478 | 479 | exit $process.ExitCode 480 | - name: Test Uninstall of MSI 481 | continue-on-error: true 482 | shell: powershell 483 | run: | 484 | $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi /norestart /quiet" -PassThru -Wait 485 | $process.ExitCode 486 | exit $process.ExitCode 487 | 488 | perform-prerelease: 489 | # We'd like artifacts to be uploaded regardless of tests succeeded or not, 490 | # this is why the dependency here is not on test-X-packaging jobs 491 | needs: [package-linux-deb, package-linux-rpm, not-package-mac, package-windows, version_info] 492 | runs-on: ubuntu-latest 493 | if: needs.version_info.outputs.is_release == 'false' 494 | steps: 495 | - name: Download All Artifacts 496 | uses: actions/download-artifact@v2 497 | with: 498 | path: artifacts/ 499 | - name: Timestamp pre-release 500 | run: | 501 | set -e -x 502 | 503 | echo "Workflow finished at $(date)" >> artifacts/workflow-finish-time.txt 504 | - name: Create pre-release 505 | uses: "marvinpinto/action-automatic-releases@v1.2.1" 506 | with: 507 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 508 | automatic_release_tag: "latest" 509 | prerelease: true 510 | title: "Next Release Development Build" 511 | files: | 512 | artifacts/**/* 513 | 514 | perform-release: 515 | needs: [not-test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging, version_info] 516 | runs-on: ubuntu-latest 517 | if: needs.version_info.outputs.is_release == 'true' 518 | steps: 519 | - name: Download All Artifacts 520 | uses: actions/download-artifact@v2 521 | with: 522 | path: artifacts/ 523 | - uses: "marvinpinto/action-automatic-releases@v1.2.1" 524 | with: 525 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 526 | prerelease: false 527 | files: | 528 | artifacts/**/* 529 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | q.spec 3 | q.1 4 | *.pyc 5 | .vagrant 6 | rpm_build_area 7 | *.deb 8 | setup.exe 9 | win_output 10 | win_build 11 | packages 12 | .idea/ 13 | dist/windows/ 14 | generated-site/ 15 | benchmark_data.tar.gz 16 | _benchmark_data/ 17 | q.egg-info/ 18 | .pytest_cache/ 19 | *.qsql 20 | htmlcov/ 21 | *.sqlite 22 | *.tar.gz 23 | .coverage 24 | .DS_Store 25 | *.egg 26 | -------------------------------------------------------------------------------- /QSQL-NOTES.md: -------------------------------------------------------------------------------- 1 | 2 | ## Major changes and additions in the new `3.x` version 3 | This is the list of new/changed functionality in this version. Large changes, please make sure to read the details if you're already using q. 4 | 5 | * **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` 6 | * **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. 7 | * **Direct querying of the `qsql` cache files** - The user can query directly from the `qsql` files, removing the need for the original files. Just use `select ... from .qsql`. Please wait until the non-beta version is out before thinking about deleting any of your original files... 8 | * **Revamped `.qrc` mechanism** - allows opting-in to caching without specifying it in every query. By default, caching is **disabled**, for backward compatibility and for finding usability issues. 9 | * **Save-to-db is now reusable for queries** - `--save-db-to-disk` option (`-S`) has been enhanced to match the new capabilities. You can query the resulting file directly through q, using the method mentioned above (it's just a standard sqlite database). 10 | * **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it. Internally, q is now packaged with Python 3.8. After everything cools down, I'll probably bump this to 3.9/3.10. 11 | * **Minimal Linux Version Bumped** - Works with CentOS 8, Ubuntu 18.04+, Debian 10+. Currently only for x86_64. Depends on glibc version 2.25+. Haven't tested it on other architectures. Issuing other architectures will be possible later on 12 | * **Completely revamped binary packaging** - Using [pyoxidizer](https://github.com/indygreg/PyOxidizer) 13 | 14 | The following sections provide the details of each of the new functionalities in this major version. 15 | 16 | ## Automatic caching of data files 17 | Speeding up subsequent reads from the same file by several orders of magnitude by automatically creating an immutable cache file for each tabular text file. 18 | 19 | For example, reading a 0.9GB file with 1M rows and 100 columns without caching takes ~50 seconds. When the cache exists, querying the same file will take around ~1-2 seconds. Obviously, the cache can be used in order to perform any query and not just the original query that was used for creating the cache. 20 | 21 | When caching is enabled, the cache is created on the first read of a file, and used automatically when reading it in other queries. A separate cache is being created for each file that is being used, allowing reuse in multiple use-cases. For example, if two csv files each have their own cache file from previous queries, then running a query that JOINs these two files would use the caches as well (without loading the data into memory), speeding it up considerably. 22 | 23 | The tradeoff for using cache files is disk space - A new file with the postfix `.qsql` is created and automatically detected and used in queries as needed. This file is essentially a standard sqlite file (with some additional metadata tables), and can be used directly by any standard sqlite tool later on. 24 | 25 | For backward compatibility, the caching option is not turned on by default. You'd need to use the new `-C ` to determine the caching mode. Available options are as follows: 26 | * `none` - The default, provides the original q's behaviour without caching 27 | * `read` - Only reads cache files if they exists, but doesn't create any new ones 28 | * `readwrite` - Uses cache files if they exists, or creates new ones if they don't. Writing new cache files doesn't interfere with the actual run of the query, so this option can be used in order to dynamically create the cache files if they don't exist 29 | 30 | Content signatures are being stored in the caches, allowing to detect a state where the original file has been modified after the cache has been created. q will issue an error if this happens. For now, just delete the `.qsql` file in order to recreate the cache. In the future, another `-C` option would be added to automatically recreate the updated cache in such a case. Notice that the content signature contains various q flags which affect parsing, so make sure to use the same parameters to q when performing the queries, otherwise q will issue an error. 31 | 32 | Notice that when running with `-A`, the cache is not written, even when `-C` is set to `readwrite`. This is due to the fact that `-A` does not really read the entire content of the files. For now, if you'd like to just prepare the cache without running the actual query, you can run it with a `select 1` query or something, although in terms of speed it will mostly not matter. If there's demand for adding an explicit `prepare caches only` option, I'll consider adding it. 33 | 34 | ## Revamped `.qrc` mechanism 35 | Adding `-C ` for each query can be cumbersome at some point, so the `.qrc` file has been revamped for easy addition of default parameters. 36 | 37 | For example, if you want the caching behaviour to be `read` all the time, then just add a `~/.qrc` file, and set the following in it: 38 | ``` 39 | [options] 40 | caching_mode=read 41 | ``` 42 | 43 | All other flags and parameters to q can be controlled by the `.qrc` file. To see the proper names for each parameter, run `q --dump-defaults` and it will dump a default `.qrc` file that contains all parameters to `stdout`. 44 | 45 | ## Direct querying of standard sqlite databases 46 | q now supports direct querying of standard sqlite databases. The syntax for accessing a table inside an sqlite database is `:::`. A query can contain any mix of sqlite files, qsql files or regular delimited files. 47 | 48 | For example, this command joins two tables from two separate sqlite databases: 49 | ``` 50 | $ q "select count(*) from mydatabase1.sqlite:::mytable1 a left join mydatabase2.sqlite:::mytable2 b on (a.c1 = b.c1)" 51 | ``` 52 | 53 | Running queries on sqlite databases does not usually entail loading the data into memory. Databases are attached to a virtual database and queried directly from disk. This means that querying speed is practically identical to standard sqlite access. This is also true when multiple sqlite databases are used in a single query. The same mechanism is being used by q whenever it uses a qsql file (either directly or as a cache of a delimited fild). 54 | 55 | sqlite itself does have a pre-compiled limit of the number of databases that can be attached simultanously. If this limit is reached, then q will attach as many databases as possible, and then continue processing by loading additional tables into memory in order to execute the query. The standard limit in sqlite3 (unless compiled specifically with another limit) is 10 databases. This allows q to access as many as 8 user databases without having to load any data into memory (2 databases are always used for q's internal logic). Using more databases in a single query than this pre-compiled sqlite limit would slow things down, since some of the data would go into memory, but the query should still provide correct results. 56 | 57 | Whenever the sqlite database file contains only one table, the table name part can be ommitted, and the user can specify only the sqlite-filename as the table name. For example, querying an sqlite database `mydatabase.sqlite` that only has one table `mytable` is possible with `q "SELECT ... FROM mydatabase.sqlite"`. There's no need to specify the table name in this case. 58 | 59 | Since `.qsql` files are also standard sqlite files, they can be queried directly as well. This allows the user to actually delete the original CSV file and use the caches as if they were the original files. For example: 60 | 61 | ``` 62 | $ q "select count(*) from myfile.csv.qsql" 63 | ``` 64 | 65 | Notice that there's no need to write the `:::` as part of the table name, since `qsql` files that are created as caches contain only one table (e.g. the table matching the original file). 66 | 67 | Running a query that uses an sqlite/qsql database without specifying a table name will fail if there is more than one table in the database, showing the list of existing tables. This can be used in order to detect which tables exist in the database without resorting to other tools. For example: 68 | ``` 69 | $ q "select * from chinook.db:::blah" 70 | Table blah could not be found in sqlite file chinook.db . Existing table names: albums,sqlite_sequence,artists,customers,employees,genres,invoices,invoice_items,media_types,playlists,playlist_track,tracks,sqlite_stat1 71 | ``` 72 | 73 | ## Storing source data into a disk database 74 | The `-S` option (`--save-db-to-disk`) has been modified to match the new capabilities. It works with all types of input tables/files, and writes the output database as a standard sqlite database. I've considered making the output a multi-table `qsql` file (e.g. with the additional metadata that q uses), but some things still need to be ironed out in order to make these qsql files work seamlessly with all other aspects of q. This will probably happen in the next version. 75 | 76 | This database can be accessed directly by q later on, by providing `:::` as the table name in the query. The table names that are chosen match the original file names, but go through the following process: 77 | * The names are normalised in order to by compatible with sqlite restrictions (e.g. `x.csv` is normalised to `x_dot_csv`) 78 | * duplicate table names are de-deduped by adding `_` to their names (e.g. two different csv files in separate folders which both have the name `companies` will be written to the file as `companies` and `companies_2`) 79 | 80 | This table-name normalisation happens also inside `.qsql` cache files, but in most cases there won't be any need to know these table names, since q automatically detects table names for databases which have a single-table. 81 | 82 | ## File-concatenation and wildcard-matching features - Breaking change 83 | File concatenation using '+' has been removed in this version, which is a breaking change. 84 | 85 | This was a controversial feature anyway, and can be done using standard SQL relatively easily. It also complicated the caching implementation significantly, and it seemed that it was not worth it. If there's demand for bringing this feature back, please write to me and I'll consider re-adding it. 86 | 87 | If you have a case of using file concatenation, you can use the following SQL instead: 88 | ``` 89 | # Instead of writing 90 | $ q "select * from myfile1+myfile2" 91 | # Use the following: 92 | $ q "select * from (select * from myfile1 UNION ALL select * from myfile2)" 93 | ``` 94 | 95 | This will provide the same results, but the error checking is a bit less robust, so be mindful on whether you're performing the right query on the right files. 96 | 97 | Conceptually, this is similar to wildcard matching (e.g. `select * from myfolder/myfile*`), but I have decided to leave wildcard-matching intact, since it seems to be a more common use-case. Cache creation and use is limited for now when using wildcards. Use the same method as described above for file concatenation if you wanna make sure that caches are being used. 98 | 99 | After this version is fully stabilised, I'll make more efforts to consolidate wildcard (and perhaps concatenation) to fully utilise caching seamlessly. 100 | 101 | ## Code runs only on python 3 102 | Removed the dual py2/py3 support. Since q is packaged as a self-contained executable, along with python 3.8 itself, then this is not needed anymore. 103 | 104 | Users which for some reason still use q's main source code file directly and use python 2 would need to stay with the latest 2.0.19 release. In some next version, q's code structure is going to change significantly anyway in order to become a standard python module, so using the main source code file directly would not be possible. 105 | 106 | If you are such a user, and this decision hurts you considerably, please ping me. 107 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | [![Build and Package](https://github.com/harelba/q/workflows/BuildAndPackage/badge.svg?branch=master)](https://github.com/harelba/q/actions?query=branch%3Amaster) 2 | 3 | # q - Text as Data 4 | q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. 5 | 6 | q allows the following: 7 | 8 | * Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file. 9 | * Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory 10 | 11 | The following table shows the impact of using caching: 12 | 13 | | Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | 14 | |:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| 15 | | 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | 16 | | 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | 17 | | 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | 18 | | 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | 19 | | 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | 20 | 21 | Notice that for the current version, caching is **not enabled** by default, since the caches take disk space. Use `-C readwrite` or `-C read` to enable it for a query, or add `caching_mode` to `.qrc` to set a new default. 22 | 23 | q's web site is [https://harelba.github.io/q/](https://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. 24 | 25 | 26 | ## Usage Examples 27 | q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and provides full support for multiple character encodings. 28 | 29 | Here are some example commands to get the idea: 30 | 31 | ```bash 32 | $ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" 33 | 34 | $ ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" 35 | 36 | $ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite3:::tracks t on (a.album_id = t.album_id)" 37 | ``` 38 | 39 | Detailed examples are in [here](https://harelba.github.io/q/#examples) 40 | 41 | ## Installation. 42 | **New Major Version `3.1.6` is out with a lot of significant additions.** 43 | 44 | Instructions for all OSs are [here](https://harelba.github.io/q/#installation). 45 | 46 | The previous version `2.0.19` Can still be downloaded from [here](https://github.com/harelba/q/releases/tag/2.0.19) 47 | 48 | ## Contact 49 | Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. 50 | 51 | Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/) 52 | 53 | Twitter [@harelba](https://twitter.com/harelba) 54 | 55 | Email [harelba@gmail.com](mailto:harelba@gmail.com) 56 | 57 | q on twitter: [#qtextasdata](https://twitter.com/hashtag/qtextasdata?src=hashtag_click) 58 | 59 | Patreon: [harelba](https://www.patreon.com/harelba) - All the money received is donated to the [Center for the Prevention and Treatment of Domestic Violence](https://www.gov.il/he/departments/bureaus/molsa-almab-ramla) in my hometown - Ramla, Israel. 60 | 61 | 62 | -------------------------------------------------------------------------------- /benchmark-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BENCHMARK_PYTHON_VERSIONS=(3.8.5) 4 | -------------------------------------------------------------------------------- /bin/.qrc: -------------------------------------------------------------------------------- 1 | # 2 | # q options ini file. Put either in your home folder as .qrc or in the working directory 3 | # (both will be merged in that order) 4 | # 5 | # All options should reside in an [options] section 6 | # 7 | # Available options: 8 | # * delimiter - escaped string (e.g. use \t for tab or \x20 for space) 9 | # * outputdelimiter - escaped string (e.g. use \t for tab or \x20 for space) 10 | # * gzipped - boolean True or False 11 | # * beautify - boolean True or False 12 | # * header_skip - integer number of lines to skip at the beginning of the file 13 | # * formatting - regular string - post-query formatting - see docs for details 14 | # * encoding - regular string - required encoding. 15 | # 16 | # All options have a matching command line option. See --help for details on defaults 17 | 18 | [options] 19 | #delimiter: \t 20 | #output_delimiter: \t 21 | #gzipped: False 22 | #beautify: True 23 | #skip_header: False 24 | #formatting: 1=%4.3f,2=%4.3f 25 | #encoding: UTF-8 26 | -------------------------------------------------------------------------------- /bin/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | -------------------------------------------------------------------------------- /bin/q.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | setlocal 4 | if exist "%~dp0..\python.exe" ( "%~dp0..\python" "%~dp0q" %* ) else ( python "%~dp0q" %* ) 5 | endlocal 6 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Required so pytest can find files properly 4 | 5 | 6 | -------------------------------------------------------------------------------- /dist/fpm-config: -------------------------------------------------------------------------------- 1 | -s dir 2 | --name q-text-as-data 3 | --license GPLv3 4 | --architecture x86_64 5 | --description "q allows to perform SQL-like statements on tabular text data." 6 | --url https://github.com/harelba/q 7 | --maintainer "Harel Ben-Attia " 8 | -------------------------------------------------------------------------------- /dist/test-rpm-inside-container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | yum install -y python38 sqlite perl gcc python3-devel sqlite-devel 6 | pip3 install -r test-requirements.txt 7 | 8 | rpm -i $1 9 | Q_EXECUTABLE=q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v 10 | -------------------------------------------------------------------------------- /dist/test-using-deb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | sudo dpkg -i $1 7 | Q_EXECUTABLE=q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v 8 | 9 | -------------------------------------------------------------------------------- /dist/test-using-rpm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | RPM_LOCATION=$1 7 | 8 | docker run -i -v `pwd`:/q-sources -w /q-sources centos:8 /bin/bash -e -x ./dist/test-rpm-inside-container.sh ${RPM_LOCATION} 9 | -------------------------------------------------------------------------------- /doc/AUTHORS: -------------------------------------------------------------------------------- 1 | Copyright (C) 2012-2014 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter) 2 | 3 | Harel Ben-Attia wrote the main program 4 | 5 | -------------------------------------------------------------------------------- /doc/IMPLEMENTATION.markdown: -------------------------------------------------------------------------------- 1 | # q - Treating Text as a Database 2 | 3 | ## Implementation 4 | 5 | The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). 6 | 7 | Please note that there is currently no checks and bounds on data size - It's up to the user to make sure things don't get too big. 8 | 9 | Please make sure to read the limitations section as well. 10 | 11 | Code wise, I'm planning for a big refactoring, and I have added full test suite in the latest version, so it'll be easier to do properly. 12 | 13 | ## Tests 14 | 15 | The code includes a test suite runnable through `test/test-all`. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail. Additional ideas related to testing are most welcome. 16 | 17 | ## Contact 18 | Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. 19 | 20 | Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter 21 | 22 | -------------------------------------------------------------------------------- /doc/LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /doc/RATIONALE.markdown: -------------------------------------------------------------------------------- 1 | # q - Treating Text as a Database 2 | 3 | ## Why aren't other Linux tools enough? 4 | The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. 5 | 6 | One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.). 7 | 8 | ## Philosophy 9 | This tool has been designed with general Linux/Unix design principles in mind. If you're interested in these general design principles, read the amazing book http://catb.org/~esr/writings/taoup/ and specifically http://catb.org/~esr/writings/taoup/html/ch01s06.html. If you believe that the way this tool works goes strongly against any of the principles, I would love to hear your view about it. 10 | 11 | ## Contact 12 | Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. 13 | 14 | Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter 15 | 16 | -------------------------------------------------------------------------------- /doc/THANKS: -------------------------------------------------------------------------------- 1 | Copyright (C) 2012-2014 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter) 2 | 3 | Jens Neu (jens@zeeroos.de) - For writing the initial RPM package spec 4 | barsnick (https://github.com/barsnick) - Thanks for additional RPM help 5 | StreakyCobra (https://github.com/StreakyCobra) - For providing Arch Linux RPMs 6 | -------------------------------------------------------------------------------- /doc/USAGE.markdown: -------------------------------------------------------------------------------- 1 | # q - Text as Data 2 | 3 | ## SYNOPSIS 4 | `q ` 5 | 6 | Example Execution for a delimited file: 7 | 8 | q "select * from myfile.csv" 9 | 10 | Example Execution for an sqlite3 database: 11 | 12 | q "select * from mydatabase.sqlite:::my_table_name" 13 | 14 | or 15 | 16 | q "select * from mydatabase.sqlite" 17 | 18 | if the database file contains only one table 19 | 20 | Auto-caching of delimited files can be activated through `-C readwrite` (writes new caches if needed) or `-C read` (only reads existing cache files) 21 | 22 | Setting the default caching mode (`-C`) can be done by writing a `~/.qrc` file. See docs for more info. 23 | 24 | ## DESCRIPTION 25 | q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. 26 | 27 | q allows the following: 28 | 29 | * Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file 30 | * Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory 31 | 32 | Query should be an SQL-like query which contains filenames instead of table names (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). 33 | 34 | The following filename types are supported: 35 | 36 | * Delimited-file filenames, including relative/absolute paths 37 | * sqlite3 database filenames, with an additional `:::` for accessing a specific table. If a database contains only one table, then denoting the table name is not needed. Examples: `mydatabase.sqlite3:::users_table` or `my_single_table_database.sqlite`. 38 | 39 | Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. q "SELECT c3,c8 from ..."). 40 | 41 | Use `-d` to specify the input delimiter. 42 | 43 | Column types are auto detected by the tool, no casting is needed. 44 | 45 | Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard. 46 | 47 | Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. 48 | 49 | All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table), with the exception of CTE (for now). 50 | 51 | See https://github.com/harelba/q for more details. 52 | 53 | ## QUERY 54 | q gets one parameter - An SQL-like query. 55 | 56 | Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed. 57 | 58 | JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. 59 | 60 | The SQL syntax itself is sqlite's syntax. For details look at https://www.sqlite.org/lang.html or search the net for examples. 61 | 62 | **NOTE:** Full type detection is implemented, so there is no need for any casting or anything. 63 | 64 | **NOTE2:** When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. 65 | 66 | ## RUNTIME OPTIONS 67 | q can also get some runtime flags. The following parameters can be used, all optional: 68 | 69 | ```` 70 | Options: 71 | -h, --help show this help message and exit 72 | -v, --version Print version 73 | -V, --verbose Print debug info in case of problems 74 | -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME 75 | Save database to an sqlite database file 76 | -C CACHING_MODE, --caching-mode=CACHING_MODE 77 | Choose the autocaching mode (none/read/readwrite). 78 | Autocaches files to disk db so further queries will be 79 | faster. Caching is done to a side-file with the same 80 | name of the table, but with an added extension .qsql 81 | --dump-defaults Dump all default values for parameters and exit. Can 82 | be used in order to make sure .qrc file content is 83 | being read properly. 84 | --max-attached-sqlite-databases=MAX_ATTACHED_SQLITE_DATABASES 85 | Set the maximum number of concurrently-attached sqlite 86 | dbs. This is a compile time definition of sqlite. q's 87 | performance will slow down once this limit is reached 88 | for a query, since it will perform table copies in 89 | order to avoid that limit. 90 | --overwrite-qsql=OVERWRITE_QSQL 91 | When used, qsql files (both caches and store-to-db) 92 | will be overwritten if they already exist. Use with 93 | care. 94 | 95 | Input Data Options: 96 | -H, --skip-header Skip header row. This has been changed from earlier 97 | version - Only one header row is supported, and the 98 | header row is used for column naming 99 | -d DELIMITER, --delimiter=DELIMITER 100 | Field delimiter. If none specified, then space is used 101 | as the delimiter. 102 | -p, --pipe-delimited 103 | Same as -d '|'. Added for convenience and readability 104 | -t, --tab-delimited 105 | Same as -d . Just a shorthand for handling 106 | standard tab delimited file You can use $'\t' if you 107 | want (this is how Linux expects to provide tabs in the 108 | command line 109 | -e ENCODING, --encoding=ENCODING 110 | Input file encoding. Defaults to UTF-8. set to none 111 | for not setting any encoding - faster, but at your own 112 | risk... 113 | -z, --gzipped Data is gzipped. Useful for reading from stdin. For 114 | files, .gz means automatic gunzipping 115 | -A, --analyze-only Analyze sample input and provide information about 116 | data types 117 | -m MODE, --mode=MODE 118 | Data parsing mode. fluffy, relaxed and strict. In 119 | strict mode, the -c column-count parameter must be 120 | supplied as well 121 | -c COLUMN_COUNT, --column-count=COLUMN_COUNT 122 | Specific column count when using relaxed or strict 123 | mode 124 | -k, --keep-leading-whitespace 125 | Keep leading whitespace in values. Default behavior 126 | strips leading whitespace off values, in order to 127 | provide out-of-the-box usability for simple use cases. 128 | If you need to preserve whitespace, use this flag. 129 | --disable-double-double-quoting 130 | Disable support for double double-quoting for escaping 131 | the double quote character. By default, you can use "" 132 | inside double quoted fields to escape double quotes. 133 | Mainly for backward compatibility. 134 | --disable-escaped-double-quoting 135 | Disable support for escaped double-quoting for 136 | escaping the double quote character. By default, you 137 | can use \" inside double quoted fields to escape 138 | double quotes. Mainly for backward compatibility. 139 | --as-text Don't detect column types - All columns will be 140 | treated as text columns 141 | -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE 142 | Input quoting mode. Possible values are all, minimal 143 | and none. Note the slightly misleading parameter name, 144 | and see the matching -W parameter for output quoting. 145 | -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT 146 | Sets the maximum column length. 147 | -U, --with-universal-newlines 148 | Expect universal newlines in the data. Limitation: -U 149 | works only with regular files for now, stdin or .gz 150 | files are not supported yet. 151 | 152 | Output Options: 153 | -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER 154 | Field delimiter for output. If none specified, then 155 | the -d delimiter is used if present, or space if no 156 | delimiter is specified 157 | -P, --pipe-delimited-output 158 | Same as -D '|'. Added for convenience and readability. 159 | -T, --tab-delimited-output 160 | Same as -D . Just a shorthand for outputting tab 161 | delimited output. You can use -D $'\t' if you want. 162 | -O, --output-header 163 | Output header line. Output column-names are determined 164 | from the query itself. Use column aliases in order to 165 | set your column names in the query. For example, 166 | 'select name FirstName,value1/value2 MyCalculation 167 | from ...'. This can be used even if there was no 168 | header in the input. 169 | -b, --beautify Beautify output according to actual values. Might be 170 | slow... 171 | -f FORMATTING, --formatting=FORMATTING 172 | Output-level formatting, in the format X=fmt,Y=fmt 173 | etc, where X,Y are output column numbers (e.g. 1 for 174 | first SELECT column etc. 175 | -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING 176 | Output encoding. Defaults to 'none', leading to 177 | selecting the system/terminal encoding 178 | -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE 179 | Output quoting mode. Possible values are all, minimal, 180 | nonnumeric and none. Note the slightly misleading 181 | parameter name, and see the matching -w parameter for 182 | input quoting. 183 | -L, --list-user-functions 184 | List all user functions 185 | 186 | Query Related Options: 187 | -q QUERY_FILENAME, --query-filename=QUERY_FILENAME 188 | Read query from the provided filename instead of the 189 | command line, possibly using the provided query 190 | encoding (using -Q). 191 | -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING 192 | query text encoding. Experimental. Please send your 193 | feedback on this 194 | ``` 195 | 196 | ### Table names 197 | The table names are the actual file names that you want to read from. Path names are allowed. Use "-" if you want to read from stdin (e.g. `q "SELECT * FROM -"`) 198 | 199 | Wildcard matches are supported - For example: `SELECT ... FROM ... mydata*.dat` 200 | 201 | Files with .gz extension are considered to be gzipped and decompressed on the fly. 202 | 203 | ### Parsing Modes 204 | q supports two parsing modes: 205 | 206 | * `relaxed` - This is the default mode. It tries to lean towards simplicity of use. When a row doesn't contains enough columns, they'll be filled with nulls, and when there are too many, the extra values will be merged to the last column. Defining the number of expected columns in this mode is done using the `-c` parameter. If it is not provided, then the number of columns is detected automatically (In most use cases, there is no need to specify `-c`) 207 | * `strict` - Strict mode is for hardcore csv/tsv parsing. Whenever a row doesn't contain the proper number of columns, processing will stop. `-c` must be provided when using this mode 208 | 209 | ### Output formatting option 210 | The format of F is as a list of X=f separated by commas, where X is a column number and f is a python format: 211 | 212 | * X - column number - This is the SELECTed column (or expression) number, not the one from the original table. E.g, 1 is the first SELECTed column, 3 is the third SELECTed column. 213 | * f - A python formatting string such as {} - See https://www.w3schools.com/python/ref_string_format.asp for details if needed. 214 | 215 | ## EXAMPLES 216 | Example 1: `ls -ltrd * | q "select c1,count(1) from - group by c1"` 217 | 218 | This example would print a count of each unique permission string in the current folder. 219 | 220 | Example 2: `seq 1 1000 | q "select avg(c1),sum(c1) from -"` 221 | 222 | This example would provide the average and the sum of the numbers in the range 1 to 1000 223 | 224 | Example 3: `sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc"` 225 | 226 | This example will output the total size in MB per user+group in the /tmp subtree 227 | 228 | Example 4: `ps -ef | q -H "select UID,count(*) cnt from - group by UID order by cnt desc limit 3"` 229 | 230 | This example will show process counts per UID, calculated from ps data. Note that the column names provided by ps are being used as column name in the query (The -H flag activates that option) 231 | 232 | ## AUTHOR 233 | Harel Ben-Attia (harelba@gmail.com) 234 | 235 | [@harelba](https://twitter.com/harelba) on Twitter 236 | 237 | Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. 238 | 239 | ## COPYRIGHT 240 | Copyright (C) 2012--2021 Harel Ben Attia 241 | 242 | This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. 243 | 244 | This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA 245 | 246 | 247 | -------------------------------------------------------------------------------- /examples/EXAMPLES.markdown: -------------------------------------------------------------------------------- 1 | # q - Treating Text as a Database 2 | 3 | See below for a JOIN example. 4 | 5 | ## Tutorial 6 | This is a tutorial for beginners. If you're familiar with the concept and just wanna see some full fledged examples, take a look [here](README.markdown#examples) in the main page. 7 | 8 | Tutorial steps: 9 | 10 | 1. We'll start with a simple example and work from there. The file `exampledatafile` contains the output of an `ls -l` command, a list of files in some directory. In this example we'll do some calculations on this file list. 11 | * The following commands will count the lines in the file *exampledatafile*, effectively getting the number of files in the directory. The output will be exactly as if we ran the `wc -l` command. 12 | 13 | q "SELECT COUNT(1) FROM exampledatafile" 14 | 15 | cat exampledatafile | q "SELECT COUNT(1) FROM -" 16 | 17 | * Now, let's assume we want to know the number of files per date in the directory. Notice that the date is in column 6. 18 | 19 | q "SELECT c6,COUNT(1) FROM exampledatafile GROUP BY c6" 20 | 21 | * The results will show the number of files per date. However, there's a lot of "noise" - dates in which there is only one file. Let's leave only the ones which have 3 files or more: 22 | 23 | q "SELECT c6,COUNT(1) AS cnt FROM exampledatafile GROUP BY c6 HAVING cnt >= 3" 24 | 25 | * Now, let's see if we can get something more interesting. The following command will provide the **total size** of the files for each date. Notice that the file size is in c5. 26 | 27 | q "SELECT c6,SUM(c5) AS size FROM exampledatafile GROUP BY c6" 28 | 29 | * We can see the results. However, the sums are in bytes. Let's show the same results but in KB: 30 | 31 | q "SELECT c6,SUM(c5)/1024.0 AS size FROM exampledatafile GROUP BY c6" 32 | 33 | * The last command provided us with a list of results, but there is no order and the list is too long. Let's get the Top 5 dates: 34 | 35 | q "SELECT c6,SUM(c5)/1024.0 AS size FROM exampledatafile GROUP BY c6 ORDER BY size DESC LIMIT 5" 36 | 37 | * Now we'll see how we can format the output itself, so it looks better: 38 | 39 | q -f "2=%4.2f" "SELECT c6,SUM(c5)/1024.0 AS size FROM exampledatafile GROUP BY c6 ORDER BY size DESC LIMIT 5" 40 | 41 | * (An example of using JOIN will be added here - In the mean time just remember you have to use table alias for JOINed "tables") 42 | 43 | 2. A more complicated example, showing time manipulation. Let's assume that we have a file with a timestamp as its first column. We'll show how it's possible to get the number of rows per full minute: 44 | 45 | q "SELECT DATETIME(ROUND(c1/60000)*60000/1000,'unixepoch','-05:00') as min, COUNT(1) FROM datafile*.gz GROUP BY min" 46 | 47 | There are several things to notice here: 48 | 49 | * The timestamp value is in the first column, hence c1. 50 | * The timestamp is assumed to be a unix epoch timestamp, but in ms, and DATETIME accepts seconds, so we need to divide by 1000 51 | * The full-minute rounding is done by dividing by 60000 (ms), rounding and then multiplying by the same amount. Rounding to an hour, for example, would be the same except for having 3600000 instead of 60000. 52 | * We use DATETIME's capability in order to output the time in localtime format. In that case, it's converted to New York time (hence the -5 hours) 53 | * The filename is actually all files matching `datafile*.gz` - Multiple files can be read, and since they have a .gz extension, they are decompressed on the fly. 54 | * **NOTE:** For non-SQL people, the date manipulation may seem odd at first, but this is standard SQL processing for timestamps and it's easy to get used to. 55 | 56 | ## JOIN example 57 | 58 | __Command 1 (Join data from two files):__ 59 | 60 | The following command _joins_ an ls output (`exampledatafile`) and a file containing rows of **group-name,email** (`group-emails-example`) and provides a row of **filename,email** for each of the emails of the group. For brevity of output, there is also a filter for a specific filename called `ppp` which is achieved using a WHERE clause. 61 | ```bash 62 | q "select myfiles.c8,emails.c2 from exampledatafile myfiles join group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = 'ppp'" 63 | ``` 64 | 65 | __Output 1: (rows of filename,email):__ 66 | ```bash 67 | ppp dip.1@otherdomain.com 68 | ppp dip.2@otherdomain.com 69 | ``` 70 | 71 | You can see that the ppp filename appears twice, each time matched to one of the emails of the group `dip` to which it belongs. Take a look at the files [`exampledatafile`](exampledatafile) and [`group-emails-example`](group-emails-example) for the data. 72 | 73 | ## Writing the data into an sqlite3 database 74 | q now supports writing its data into a disk base sqlite3 database file. In order to write the data to a database disk use the `-S` parameter (`--save-db-to-disk`) with a filename as a parameter. Note that you still need to provide a query as a parameter, even though it will not be executed. The tool will provide the proper sqlite3 query to run after writing the data to the database, allowing you to copy-paste it into the sqlite3 command line. If you don't care about running any query, just use "select 1" as the query. 75 | 76 | Here's an example that will write the output into `some.db` for further processing. Note that we've added the `-c 1` parameter to prevent q warning us about having only one column. 77 | ``` 78 | $ seq 1 100 | ./q "select count(*) from -" -S some.db -c 1 79 | Going to save data into a disk database: some.db 80 | Data has been loaded in 0.002 seconds 81 | Saving data to db file some.db 82 | Data has been saved into some.db . Saving has taken 0.018 seconds 83 | Query to run on the database: select count(*) from `-`; 84 | 85 | $ sqlite3 some.db 86 | SQLite version 3.19.3 2017-06-27 16:48:08 87 | Enter ".help" for usage hints. 88 | sqlite> .tables 89 | - 90 | sqlite> .schema 91 | CREATE TABLE IF NOT EXISTS "-" ("c1" INT); 92 | sqlite> select count(*) from `-`; 93 | 100 94 | sqlite> 95 | ``` 96 | 97 | Note that table names are explicitly set to the filenames in the original query (e.g. filenames), which means that in many cases you'd need to escape the table names in sqlite3 with backticks. For example, the name of the table above is `-`, and in order to use it in an sqlite3 query, it is backticked, otherwise it won't conform to a proper table name. I've decided to emphasize consistency and simplicity in this case, instead of trying to provide some normalization/sanitation of filenames, since I believe that doing it would cause much confusion and will be less effective. Any ideas and comments are this are most welcome obviously. 98 | 99 | ### Choosing the method of writing the sqlite3 database 100 | There's another parameter that controls the method of writing to the sqlite3 database - `--save-db-to-disk-method`. The value can either be `standard` or `fast`. The fast method requires changes in the packaging of q, since it's dependent on another python module (https://github.com/husio/python-sqlite3-backup by @husio - Thanks!). However, there are some complications with seamlessly packaging it without possibly causing some backward compatibility issues (see PR #159 for some details), so it's not the standard method as of yet. If you're an advanced user, and in need for the faster method due to very large files etc., you'd need to manually install this python package for the fast method to work - Run `pip install sqlitebck` on your python installation. Obviously, I'm considering this as a bug that I need to fix. 101 | 102 | ## Installation 103 | Installation instructions can be found [here](../doc/INSTALL.markdown) 104 | 105 | ## Contact 106 | Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. 107 | 108 | Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter 109 | 110 | -------------------------------------------------------------------------------- /examples/exampledatafile: -------------------------------------------------------------------------------- 1 | -rw-r--r-- 1 root root 2064 2006-11-23 21:33 netscsid.conf 2 | -rw-r--r-- 1 root root 1343 2007-01-09 20:39 wodim.conf 3 | -rw-r--r-- 1 root root 112 2007-06-22 18:08 apg.conf 4 | -rw-r--r-- 1 root root 15752 2009-07-25 18:13 ltrace.conf 5 | -rw-r--r-- 1 root root 624 2010-05-16 14:18 mtools.conf 6 | -rw-r--r-- 1 root root 395 2010-06-20 11:11 anacrontab 7 | -rw-r--r-- 1 root root 18673 2010-10-18 06:49 globash.rc 8 | -rw-r--r-- 1 root root 23958 2010-11-15 10:07 mime.types 9 | -rw-r--r-- 1 root root 449 2010-11-15 10:07 mailcap.order 10 | -rw-r--r-- 1 root root 8453 2010-12-03 22:32 nanorc 11 | -rwxr-xr-x 1 root root 268 2010-12-07 12:10 rmt 12 | -rw-r--r-- 1 root root 1147 2011-01-04 16:27 rarfiles.lst 13 | -rw-r--r-- 1 root root 600 2011-03-09 13:22 deluser.conf 14 | drwxr-xr-x 2 root root 4096 2011-03-15 23:05 ODBCDataSources 15 | -rw-r--r-- 1 root root 0 2011-03-15 23:05 odbc.ini 16 | -rw-r--r-- 1 root root 801 2011-03-17 20:09 mke2fs.conf 17 | drwxr-xr-x 2 root root 4096 2011-04-30 19:12 insserv.conf.d 18 | -rw-r--r-- 1 root root 839 2011-04-30 19:12 insserv.conf 19 | drwxr-xr-x 3 root root 4096 2011-04-30 19:12 insserv 20 | -rw-r--r-- 1 root root 373 2011-05-01 02:15 rearj.cfg 21 | -rw-r--r-- 1 root root 1260 2011-05-02 15:19 ucf.conf 22 | -rw-r----- 1 root daemon 144 2011-05-16 13:32 at.deny 23 | -rw-r--r-- 1 root root 4496 2011-05-17 23:21 wgetrc 24 | drwxr-xr-x 2 root root 4096 2011-05-18 12:01 libpaper.d 25 | -rw-r--r-- 1 root root 1975 2011-05-18 13:00 bash.bashrc 26 | -rw-r----- 1 root fuse 216 2011-05-18 13:12 fuse.conf 27 | -rw-r--r-- 1 root root 19666 2011-05-24 18:26 services 28 | -rw-r--r-- 1 root root 887 2011-05-24 18:26 rpc 29 | -rw-r--r-- 1 root root 2859 2011-05-24 18:26 protocols 30 | -rw-r--r-- 1 root root 4728 2011-06-07 14:10 hdparm.conf 31 | -rw-r--r-- 1 root root 2083 2011-06-10 19:58 sysctl.conf 32 | -rw-r--r-- 1 root root 2290 2011-06-14 18:51 libuser.conf 33 | -rw-r--r-- 1 root root 1195 2011-06-17 20:13 rsyslog.conf 34 | -rw-r--r-- 1 root root 2570 2011-06-22 13:39 locale.alias 35 | -rw-r--r-- 1 root root 2969 2011-06-23 10:01 debconf.conf 36 | -rw-r--r-- 1 root root 3828 2011-06-24 12:28 securetty 37 | -rw-r--r-- 1 root root 10551 2011-06-24 12:28 login.defs 38 | -rw-r--r-- 1 root root 91 2011-07-08 20:13 networks 39 | -rw-r--r-- 1 root root 267 2011-07-08 20:13 legal 40 | -rw-r--r-- 1 root root 92 2011-07-08 20:13 host.conf 41 | -rw-r--r-- 1 root root 11 2011-07-08 20:13 debian_version 42 | -rw-r--r-- 1 root root 10183 2011-07-18 23:45 sensors3.conf 43 | -rw-r--r-- 1 root root 3587 2011-07-27 14:14 lftp.conf 44 | -rw-r--r-- 1 root root 5173 2011-07-27 14:32 manpath.config 45 | -rw-r--r-- 1 root root 645 2011-07-27 14:36 ts.conf 46 | -rw-r--r-- 1 root root 1586 2011-07-27 14:57 request-key.conf 47 | -rw-r--r-- 1 root root 111 2011-08-08 23:52 magic.mime 48 | -rw-r--r-- 1 root root 111 2011-08-08 23:52 magic 49 | -rw-r--r-- 1 root root 321 2011-08-09 19:16 blkid.conf 50 | drwxr-xr-x 2 root root 4096 2011-08-09 19:19 usb_modeswitch.d 51 | -rw-r--r-- 1 root root 3279 2011-08-11 15:59 lsb-base-logging.sh 52 | -rw-r--r-- 1 root root 326 2011-08-17 16:15 updatedb.conf 53 | -rw-r--r-- 1 root root 552 2011-08-19 04:05 pam.conf 54 | -rw-r--r-- 1 root root 652 2011-08-25 16:14 zsh_command_not_found 55 | -rw-r--r-- 1 root root 592 2011-08-26 11:58 usb_modeswitch.conf 56 | -rw-r--r-- 1 root root 1721 2011-09-01 19:49 inputrc 57 | -r--r----- 1 root root 574 2011-09-11 22:09 sudoers 58 | drwxr-xr-x 2 root root 4096 2011-09-19 12:51 lsb-base 59 | -rw-r--r-- 1 root root 724 2011-09-20 03:04 crontab 60 | -rw-r--r-- 1 root root 643 2011-09-20 08:04 colord.conf 61 | -rw-r--r-- 1 root root 599 2011-10-04 18:19 logrotate.conf 62 | -rw-r--r-- 1 root root 344 2011-10-04 21:56 bindresvport.blacklist 63 | -rw-r--r-- 1 root root 3343 2011-10-04 21:56 gai.conf 64 | -rw-r--r-- 1 root root 58753 2011-10-04 22:53 bash_completion 65 | drwxr-xr-x 2 root root 4096 2011-10-05 22:05 update-notifier 66 | -rw-r--r-- 1 root root 100 2011-10-08 01:45 lsb-release 67 | -rw-r--r-- 1 root root 13 2011-10-09 09:31 issue.net 68 | -rw-r--r-- 1 root root 20 2011-10-09 09:31 issue 69 | -rw-r--r-- 1 root root 1309 2011-10-09 09:41 kerneloops.conf 70 | drwxr-xr-x 2 root root 4096 2011-10-12 16:26 opt 71 | -rw-r--r-- 1 root root 34 2011-10-12 16:26 ld.so.conf 72 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 terminfo 73 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 python2.7 74 | -rw-r--r-- 1 root root 547 2011-10-12 16:27 profile 75 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 iproute2 76 | -rw-r--r-- 1 root root 79 2011-10-12 16:27 environment 77 | -rw-r--r-- 1 root root 165 2011-10-12 16:27 shells 78 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 depmod.d 79 | -rw-r--r-- 1 root root 2981 2011-10-12 16:27 adduser.conf 80 | drwxr-xr-x 3 root root 4096 2011-10-12 16:27 udev 81 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 sysctl.d 82 | -rwxr-xr-x 1 root root 306 2011-10-12 16:27 rc.local 83 | drwxr-xr-x 6 root root 4096 2011-10-12 16:27 network 84 | drwxr-xr-x 5 root root 4096 2011-10-12 16:27 initramfs-tools 85 | drwxr-xr-x 3 root root 4096 2011-10-12 16:27 systemd 86 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 sudoers.d 87 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 vim 88 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 newt 89 | drwxr-xr-x 4 root root 4096 2011-10-12 16:27 dhcp 90 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 cron.hourly 91 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 python 92 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 kbd 93 | drwxr-xr-x 2 root root 4096 2011-10-12 16:27 console-setup 94 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 ca-certificates 95 | drwxr-xr-x 4 root root 4096 2011-10-12 16:28 perl 96 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 pkcs11 97 | drwxr-xr-x 5 root root 4096 2011-10-12 16:28 pm 98 | drwxr-xr-x 6 root root 4096 2011-10-12 16:28 gconf 99 | drwxr-xr-x 6 root root 4096 2011-10-12 16:28 apm 100 | drwxr-xr-x 5 root root 4096 2011-10-12 16:28 polkit-1 101 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 emacs 102 | drwxr-xr-x 5 root root 4096 2011-10-12 16:28 ConsoleKit 103 | drwxr-xr-x 4 root root 4096 2011-10-12 16:28 ghostscript 104 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 doc-base 105 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 gnome-settings-daemon 106 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 etc 107 | drwxr-xr-x 3 root root 4096 2011-10-12 16:28 sound 108 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 gnome-vfs-2.0 109 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 ifplugd 110 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 dhcp3 111 | drwxr-xr-x 4 root root 4096 2011-10-12 16:29 fonts 112 | drwxr-xr-x 4 root root 4096 2011-10-12 16:29 ssl 113 | -rw-r--r-- 1 root root 7014 2011-10-12 16:29 ca-certificates.conf 114 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 foomatic 115 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 gtk-3.0 116 | -rw-r--r-- 1 root root 880 2011-10-12 16:29 hosts.deny 117 | -rw-r--r-- 1 root root 580 2011-10-12 16:29 hosts.allow 118 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 sensors.d 119 | drwxr-xr-x 4 root root 4096 2011-10-12 16:29 dbus-1 120 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 groff 121 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 calendar 122 | drwxr-xr-x 4 root root 4096 2011-10-12 16:29 security 123 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 apparmor 124 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 profile.d 125 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 grub.d 126 | drwxr-s--- 2 root dip 4096 2011-10-12 16:29 chatscripts 127 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 update-manager 128 | drwxr-xr-x 3 root root 4096 2011-10-12 16:29 ufw 129 | drwxr-xr-x 2 root root 4096 2011-10-12 16:29 rsyslog.d 130 | drwxr-xr-x 3 root root 4096 2011-10-12 16:30 acpi 131 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 gnome-app-install 132 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 cron.monthly 133 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 cron.d 134 | drwxr-xr-x 5 root root 4096 2011-10-12 16:30 apport 135 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 cron.weekly 136 | drwxr-xr-x 3 root root 4096 2011-10-12 16:30 avahi 137 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 at-spi2 138 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 bluetooth 139 | drwxr-xr-x 3 root root 4096 2011-10-12 16:30 sgml 140 | drwxr-xr-x 4 root root 4096 2011-10-12 16:30 defoma 141 | drwxr-xr-x 3 root root 4096 2011-10-12 16:30 compizconfig 142 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 checkbox.d 143 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 skel 144 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 gdb 145 | drwxr-xr-x 3 root root 4096 2011-10-12 16:30 firefox 146 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 obex-data-server 147 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 UPower 148 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 snmp 149 | -rw-r--r-- 1 root root 513 2011-10-12 16:30 nsswitch.conf 150 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 wpa_supplicant 151 | drwxr-xr-x 8 root dip 4096 2011-10-12 16:30 ppp 152 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 pcmcia 153 | drwxr-xr-x 5 root root 4096 2011-10-12 16:30 NetworkManager 154 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 cupshelpers 155 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 xml 156 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 thunderbird 157 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 update-motd.d 158 | drwxr-xr-x 4 root root 4096 2011-10-12 16:30 speech-dispatcher 159 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 ginn 160 | drwxr-xr-x 2 root root 12288 2011-10-12 16:30 brltty 161 | -rw-r--r-- 1 root root 33 2011-10-12 16:30 brlapi.key 162 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 gamin 163 | -rw-r--r-- 1 root root 7649 2011-10-12 16:30 pnm2ppa.conf 164 | drwxr-xr-x 2 root root 4096 2011-10-12 16:30 hp 165 | drwxr-xr-x 4 root root 4096 2011-10-12 16:30 mono 166 | drwxr-xr-x 2 root root 4096 2011-10-12 16:31 xul-ext 167 | drwxr-xr-x 3 root root 4096 2011-10-12 16:31 sane.d 168 | -rw-r--r-- 1 root root 54 2011-10-12 16:31 crypttab 169 | -rw-r--r-- 1 root root 227 2011-12-18 11:43 hosts 170 | -rw-r--r-- 1 root root 13 2011-12-18 11:43 hostname 171 | -rw-r--r-- 1 root root 10 2011-12-18 11:45 adjtime 172 | drwxr-xr-x 2 root root 4096 2011-12-18 11:51 libreoffice 173 | drwxr-xr-x 2 root root 4096 2011-12-18 11:52 dictionaries-common 174 | -rw-r--r-- 1 root root 350 2011-12-18 11:52 popularity-contest.conf 175 | -rw-r--r-- 1 root root 7 2011-12-18 11:52 papersize 176 | -rw-r--r-- 1 root root 91 2011-12-18 11:52 kernel-img.conf 177 | -rw-r--r-- 1 root root 15 2011-12-18 12:02 timezone 178 | -rw-r--r-- 1 root root 2197 2011-12-18 12:02 localtime 179 | drwxr-xr-x 2 root root 4096 2011-12-18 12:04 ldap 180 | drwxr-xr-x 2 root root 4096 2011-12-18 12:04 pulse 181 | drwxr-xr-x 2 root root 4096 2011-12-18 12:04 timidity 182 | drwxr-xr-x 2 root root 4096 2011-12-18 12:04 wildmidi 183 | drwxr-xr-x 2 root root 4096 2011-12-18 12:04 gtk-2.0 184 | drwxr-xr-x 5 root root 4096 2011-12-18 12:05 java-6-openjdk 185 | drwxr-xr-x 2 root root 4096 2011-12-18 12:05 icedtea-web 186 | drwxr-xr-x 6 root root 4096 2011-12-18 12:08 kernel 187 | drwxr-xr-x 3 root root 4096 2011-12-18 12:09 OpenCL 188 | drwxr-xr-x 3 root root 4096 2011-12-18 12:09 dkms 189 | drwxr-xr-x 2 root root 4096 2011-12-18 12:09 modprobe.d 190 | -rw------- 1 root harel 0 2011-12-18 13:21 mtab.fuselock 191 | drwxr-xr-x 2 root root 4096 2011-12-18 13:30 gnome 192 | drwxr-xr-x 4 root root 4096 2011-12-18 14:44 java-6-sun 193 | drwxr-xr-x 2 root root 4096 2011-12-18 15:06 subversion 194 | drwxr-xr-x 2 root root 4096 2011-12-18 15:37 bonobo-activation 195 | drwxr-xr-x 2 root root 4096 2011-12-19 10:13 purple 196 | drwxr-xr-x 2 root root 4096 2011-12-19 14:27 lightdm 197 | drwxr-xr-x 2 root root 4096 2011-12-19 22:49 ld.so.conf.d 198 | drwxr-xr-x 5 root root 4096 2011-12-19 22:50 xdg 199 | drwxr-xr-x 6 root root 4096 2011-12-19 23:19 resolvconf 200 | drwxr-xr-x 2 root root 4096 2011-12-19 23:19 rcS.d 201 | drwxr-xr-x 2 root root 4096 2011-12-22 18:57 ssh 202 | drwxr-xr-x 2 root root 4096 2011-12-23 12:05 qt3 203 | drwxr-xr-x 2 root root 4096 2011-12-23 16:09 openvpn 204 | drwxr-xr-x 4 root root 4096 2011-12-23 17:02 vlc 205 | drwxr-xr-x 4 root root 4096 2011-12-23 17:17 dconf 206 | drwxr-xr-x 6 root root 4096 2011-12-23 17:17 gdm 207 | drwxr-xr-x 3 root root 4096 2011-12-24 18:47 samba 208 | drwxr-xr-x 2 root root 4096 2011-12-25 10:39 gtags 209 | drwxr-xr-x 2 root root 4096 2012-01-03 16:01 cron.daily 210 | drwxr-xr-x 7 root root 4096 2012-01-03 16:01 apache2 211 | -rw-r--r-- 1 root root 664 2012-01-06 11:11 fstab.bak 212 | -rw-r--r-- 1 root root 211 2012-01-10 09:40 modules 213 | -rw------- 1 root root 789 2012-01-11 17:49 gshadow- 214 | -rw------- 1 root root 951 2012-01-11 17:49 group- 215 | -rw------- 1 root root 1343 2012-01-11 17:49 shadow- 216 | -rw------- 1 root root 1863 2012-01-11 17:49 passwd- 217 | -rw-r----- 1 root shadow 1343 2012-01-11 17:49 shadow 218 | -rw-r--r-- 1 root root 1878 2012-01-11 17:49 passwd 219 | drwxr-xr-x 5 root root 4096 2012-01-11 17:49 logcheck 220 | drwxr-xr-x 8 root root 4096 2012-01-11 17:49 apparmor.d 221 | drwxr-xr-x 2 root root 4096 2012-01-11 17:49 init 222 | drwxr-xr-x 3 root root 4096 2012-01-11 17:49 mysql 223 | drwxr-xr-x 4 root root 4096 2012-01-13 12:47 dpkg 224 | drwxr-xr-x 3 root root 4096 2012-01-13 12:47 bash_completion.d 225 | drwxr-xr-x 2 root root 4096 2012-01-13 12:48 R 226 | drwxr-xr-x 10 root root 4096 2012-01-16 16:08 X11 227 | drwxr-xr-x 2 root root 12288 2012-01-21 19:44 alternatives 228 | -rw-r--r-- 1 root root 773 2012-01-22 14:03 fstab 229 | drwxr-xr-x 3 root root 4096 2012-01-27 10:53 java 230 | drwxr-xr-x 3 root root 4096 2012-01-28 17:24 gimp 231 | drwxr-xr-x 6 root root 4096 2012-01-28 17:27 apt 232 | -rw-r--r-- 1 root root 23432 2012-01-28 17:35 mailcap 233 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 logrotate.d 234 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 default 235 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 init.d 236 | -rw-r--r-- 1 root root 972 2012-01-28 17:35 group 237 | -rw-r----- 1 root shadow 807 2012-01-28 17:35 gshadow 238 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 pam.d 239 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc6.d 240 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc5.d 241 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc4.d 242 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc3.d 243 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc2.d 244 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc1.d 245 | drwxr-xr-x 2 root root 4096 2012-01-28 17:35 rc0.d 246 | -rw-r--r-- 1 root root 136548 2012-01-28 17:35 ld.so.cache 247 | -rw-r--r-- 1 root root 697 2012-01-31 00:40 mtab 248 | drwxr-xr-x 4 root lp 4096 2012-01-31 00:48 cups 249 | -------------------------------------------------------------------------------- /examples/group-emails-example: -------------------------------------------------------------------------------- 1 | root root.1@mydomain.com 2 | harel harel.1@mydomain.com 3 | root root.2@mydomain.com 4 | root root.3@mydomain.com 5 | daemon daemon.1@otherdomain.com 6 | dip dip.1@otherdomain.com 7 | dip dip.2@otherdomain.com 8 | fuse fuse.A@mydomain.com 9 | fuse fuse.B@mydomain.com 10 | fuse fuse.C@mydomain.com 11 | lpa lpa.1@mydomain.com 12 | shadow forsaken.1@mydomain.com 13 | -------------------------------------------------------------------------------- /mkdocs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Generate web site 3 | 4 | # mkdocs folder under project root 5 | $ `cd mkdocs` 6 | 7 | * create a pyenv virtual environment 8 | 9 | $ `pip install -r requirements.txt` 10 | 11 | $ `./generate-web-site.sh` (static files will be generated into `./generated-site`) 12 | 13 | $ `git checkout gh-pages` 14 | 15 | $ `cd ../` # back to project root 16 | 17 | $ `scp -r mkdocs/generated-site/* ./` 18 | 19 | $ `git add` all modified files 20 | 21 | * commit to git 22 | 23 | $ `git push origin gh-pages` 24 | 25 | -------------------------------------------------------------------------------- /mkdocs/docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/.DS_Store -------------------------------------------------------------------------------- /mkdocs/docs/about.md: -------------------------------------------------------------------------------- 1 | # About 2 | 3 | ### Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/) 4 | 5 | ### Twitter [@harelba](https://twitter.com/harelba) 6 | 7 | ### Email [harelba@gmail.com](mailto:harelba@gmail.com) 8 | 9 | ### Patreon [harelba](https://www.patreon.com/harelba) 10 | All the money received is donated to the [Center for the Prevention and Treatment of Domestic Violence](https://www.gov.il/he/departments/bureaus/molsa-almab-ramla) in my hometown - Ramla, Israel. 11 | 12 | Become a Patron! 13 | 14 | ### Chinese translation [jinzhencheng@outlook.com](mailto:jinzhencheng@outlook.com) 15 | 16 | -------------------------------------------------------------------------------- /mkdocs/docs/fsg9b9b1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/fsg9b9b1.txt -------------------------------------------------------------------------------- /mkdocs/docs/google0efeb4ff0a886e81.html: -------------------------------------------------------------------------------- 1 | google-site-verification: google0efeb4ff0a886e81.html -------------------------------------------------------------------------------- /mkdocs/docs/img/bg_hr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/bg_hr.png -------------------------------------------------------------------------------- /mkdocs/docs/img/blacktocat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/blacktocat.png -------------------------------------------------------------------------------- /mkdocs/docs/img/icon_download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/icon_download.png -------------------------------------------------------------------------------- /mkdocs/docs/img/q-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/q-logo.png -------------------------------------------------------------------------------- /mkdocs/docs/img/q-logo1.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/q-logo1.ico -------------------------------------------------------------------------------- /mkdocs/docs/img/q-logo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/q-logo1.png -------------------------------------------------------------------------------- /mkdocs/docs/img/sprite_download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/sprite_download.png -------------------------------------------------------------------------------- /mkdocs/docs/img/sprite_download3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/sprite_download3.png -------------------------------------------------------------------------------- /mkdocs/docs/img/sprite_download4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/sprite_download4.png -------------------------------------------------------------------------------- /mkdocs/docs/img/torii-favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/mkdocs/docs/img/torii-favicon.ico -------------------------------------------------------------------------------- /mkdocs/docs/index.md: -------------------------------------------------------------------------------- 1 | # q - Run SQL directly on CSV or TSV files 2 | 3 | [![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) 4 | [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) 5 | 6 | ## Overview 7 | q's purpose is to bring SQL expressive power to the Linux command line by providing easy access to text as actual data, and allowing direct access to multi-file sqlite3 databases. 8 | 9 | ```bash 10 | q 11 | ``` 12 | 13 | q allows the following: 14 | 15 | * Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file 16 | 17 | ```bash 18 | # Simple query from a file, columns are named c1...cN 19 | q "select c1,c5 from myfile.csv" 20 | 21 | # -d '|' sets the input delimiter, -H says there's a header 22 | q -d '|' -H "select my_field from myfile.delimited-file-with-pipes" 23 | 24 | # -C readwrite writes a cache for the csv file 25 | q -d , -H "select my_field from myfile.csv" -C readwrite 26 | 27 | # -C read tells q to use the cache 28 | q -d , -H "select my_field from myfile.csv" -C read 29 | 30 | # Setting the default caching mode (`-C`) can be done by writing a `~/.qrc` file 31 | ``` 32 | 33 | * Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory 34 | 35 | ```bash 36 | q "select * from mydatabase.sqlite:::my_table_name" 37 | 38 | or 39 | 40 | q "select * from mydatabase.sqlite" 41 | 42 | if the database file contains only one table 43 | 44 | # sqlite files are autodetected, no need for any special filename extension 45 | ``` 46 | 47 | The following table shows the impact of using caching: 48 | 49 | | Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | 50 | |:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| 51 | | 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | 52 | | 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | 53 | | 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | 54 | | 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | 55 | | 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | 56 | 57 | Notice that for the current version, caching is **not enabled** by default, since the caches take disk space. Use `-C readwrite` or `-C read` to enable it for a query, or add `caching_mode` to `.qrc` to set a new default. 58 | 59 | q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and provides full support for multiple character encodings. 60 | 61 | The new features - autocaching, direct querying of sqlite database and the use of `~/.qrc` file are described in detail in [here](https://github.com/harelba/q/blob/master/QSQL-NOTES.md). 62 | 63 | Download the tool using the links in the [installation](#installation) below and play with it. 64 | 65 | ### Encodings 66 | | | | 67 | |:--------------------------------------:|:-----------------------------------------------:| 68 | | 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート | 69 | | 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются | 70 | 71 | **Non-english users:** q fully supports all types of encoding. Use `-e data-encoding` to set the input data encoding, `-Q query-encoding` to set the query encoding, and use `-E output-encoding` to set the output encoding. Sensible defaults are in place for all three parameters. Please contact me if you encounter any issues and I'd be glad to help. 72 | 73 | **Files with BOM:** Files which contain a BOM ([Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark)) are not properly supported inside python's csv module. q contains a workaround that allows reading UTF8 files which contain a BOM - Use `-e utf-8-sig` for this. I plan to separate the BOM handling from the encoding itself, which would allow to support BOMs for all encodings. 74 | 75 | ## Installation 76 | 77 | | Format | Instructions | Comments | 78 | :---|:---|:---| 79 | |[OSX](https://github.com/harelba/q/releases/download/v3.1.6/macos-q)|Run `brew install harelba/q/q` in order to install q (moved it to its own tap), or download the standalone executable directly from the link on the left|A man page is available, just run `man q`|| 80 | |[RPM Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| 81 | |[DEB Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| 82 | |[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| 83 | |[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.6.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| 84 | |[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.6.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| 85 | 86 | I will add packages for additional Linux Distributions if there's demand for it. If you're interested in another Linux distribution, please ping me. It's relatively easy to add new ones with the new packaging flow. 87 | 88 | The previous version `2.0.19` can be downloaded directly from [here](https://github.com/harelba/q/releases/tag/2.0.19). Please let me know if for some reason the new version is not suitable for your needs, and you're planning on using the previous one. 89 | 90 | ## Requirements 91 | q is packaged as a compiled standalone-executable that has no dependencies, not even python itself. This was done by using the awesome [pyoxidizer](https://github.com/indygreg/PyOxidizer) project. 92 | 93 | 94 | ## Examples 95 | 96 | This section shows example flows that highlight the main features. For more basic examples, see [here](#getting-started-examples). 97 | 98 | ### Basic Examples: 99 | 100 | ```bash 101 | # Prepare some data 102 | $ seq 1 1000000 > myfile.csv 103 | 104 | # Query it 105 | $ q "select sum(c1),count(*) from myfile.csv where c1 % 3 = 0" 106 | 166666833333 333333 107 | 108 | # Use q to query from stdin 109 | $ ps -ef | q -b -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" 110 | 501 288 111 | 0 115 112 | 270 17 113 | ``` 114 | 115 | ### Auto-caching Examples 116 | 117 | ```bash 118 | # (time command output has been shortened for berevity) 119 | 120 | # Prepare some data 121 | $ seq 1 1000000 > myfile.csv 122 | 123 | # Read from the resulting file 124 | $ time q "select sum(c1),count(*) from myfile.csv" 125 | 500000500000 1000000 126 | total_time=4.108 seconds 127 | 128 | # Running with `-C readwrite` auto-creates a cache file if there is none. The cache filename would be myfile.csv.qsql. The query runs as usual 129 | $ time q "select sum(c1),count(*) from myfile.csv" -C readwrite 130 | 500000500000 1000000 131 | total_time=4.057 seconds 132 | 133 | # Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticable 134 | $ time q "select sum(c1),count(*) from myfile.csv" -C read 135 | 500000500000 1000000 136 | total_time=0.229 seconds 137 | 138 | # Now let's try another query on that file. Notice the short query duration. The cache is being used for any query that uses this file, and queries on multiple files that contain caches will reuse the cache as well. 139 | $ time q "select avg(c1) from myfile.csv" -C read 140 | 500000.5 141 | total_time=0.217 seconds 142 | 143 | # You can also query the qsql file directly, as it's just a standard sqlite3 DB file (see next section for q's support of reading directly from sqlite DBs) 144 | $ time q "select sum(c1),count(*) from myfile.csv.qsql" 145 | 500000500000 1000000 146 | total_time=0.226 seconds 147 | 148 | # Now let's delete the original csv file (be careful when deleting original data) 149 | $ rm -vf myfile.csv 150 | 151 | # Running another query directly on the qsql file just works 152 | $ time q "select sum(c1),count(*) from myfile.csv.qsql" 153 | 500000500000 1000000 154 | total_time=0.226 seconds 155 | 156 | # See the `.qrc` section below if you want to set the default `-C` (`--caching-mode`) to something other than `none` (the default) 157 | ``` 158 | 159 | ### Direct sqlite Querying Examples 160 | 161 | ```bash 162 | # Download example sqlite3 database from https://www.sqlitetutorial.net/sqlite-sample-database/ and unzip it. The resulting file will be chinook.db 163 | $ curl -L https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip | tar -xvf - 164 | 165 | # Now we can query the database directly, specifying the name of the table in the query (:::) 166 | $ q "select count(*) from chinook.db:::albums" 167 | 347 168 | 169 | # Let's take the top 5 longest tracks of album id 34. The -b option just beautifies the output, and -O tells q to output the column names as headers 170 | $ q "select * from chinook.db:::tracks where albumid = '34' order by milliseconds desc limit 5" -b -O 171 | TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice 172 | 407 "Só Tinha De Ser Com Você" 34 1 7 Vários 389642 13085596 0.99 173 | 398 "Only A Dream In Rio" 34 1 7 Vários 371356 12192989 0.99 174 | 393 "Tarde Em Itapoã" 34 1 7 Vários 313704 10344491 0.99 175 | 401 "Momentos Que Marcam" 34 1 7 Vários 280137 9313740 0.99 176 | 391 "Garota De Ipanema" 34 1 7 Vários 279536 9141343 0.99 177 | 178 | # Let's now copy the chinook database to another file, as if it's just another different database 179 | $ cp chinook.db another_db.db 180 | 181 | # Now we can run a join query between the two databases. They could have been any two different databases, using the copy of chinook is just for simplicity 182 | # Let's get the top-5 longest albums, using albums from the first database and tracks from the second database. The track times are converted to seconds, and rounded to two digits after the decimal point. 183 | $ q -b -O "select a.title,round(sum(t.milliseconds)/1000.0/60,2) total_album_time_seconds from chinook.db:::albums a left join another_database.db:::tracks t on (a.albumid = t.albumid) group by a.albumid order by total_album_time_seconds desc limit 5" 184 | Title total_album_time_seconds 185 | "Lost, Season 3" 1177.76 186 | "Battlestar Galactica (Classic), Season 1" 1170.23 187 | "Lost, Season 1" 1080.92 188 | "Lost, Season 2" 1054.83 189 | "Heroes, Season 1" 996.34 190 | ``` 191 | 192 | ### Analysis Examples 193 | 194 | ```bash 195 | # Let's create a simple CSV file without a header. Make sure to copy only the three lines, press enter, and 196 | # then press Ctrl-D to exit so the file will be written. 197 | $ cat > some-data-without-header.csv 198 | harel,1,2 199 | ben,3,4 200 | attia,5,6 201 | 202 | 203 | # Let's run q on it with -A, to see the detected structure of the file. `-d ,` sets the delimiter to a comma 204 | $ q -d , "select * from some-data-without-header.csv" -A 205 | Table: /Users/harelben-attia/dev/harelba/q/some-data-without-header.csv 206 | Sources: 207 | source_type: file source: /Users/harelben-attia/dev/harelba/q/some-data-without-header.csv 208 | Fields: 209 | `c1` - text 210 | `c2` - int 211 | `c3` - int 212 | 213 | # Now let's create another simple CSV file, this time with a header (-H tells q to expect a header in the file) 214 | $ cat > some-data.csv 215 | planet_id,name,diameter_km,length_of_day_hours 216 | 1000,Earth,12756,24 217 | 2000,Mars,6792,24.7 218 | 3000,Jupiter,142984,9.9 219 | 220 | 221 | # Let's run q with -A to see the analysis results. 222 | $ q -b -O -H -d , "select * from some-data.csv" -A 223 | Table: /Users/harelben-attia/dev/harelba/q/some-data.csv 224 | Sources: 225 | source_type: file source: /Users/harelben-attia/dev/harelba/q/some-data.csv 226 | Fields: 227 | `planet_id` - int 228 | `name` - text 229 | `diameter_km` - int 230 | `length_of_day_hours` - real 231 | 232 | # Let's run it with `-C readwrite` so a cache will be created 233 | $ q -b -O -H -d , "select * from some-data.csv" -C readwrite 234 | planet_id,name ,diameter_km,length_of_day_hours 235 | 1000 ,Earth ,12756 ,24.0 236 | 2000 ,Mars ,6792 ,24.7 237 | 3000 ,Jupiter,142984 ,9.9 238 | 239 | # Running another query that uses some-data.csv with -A will now show that a qsql exists for that file. The source-type 240 | # will be "file-with-unused-qsql". The qsql cache is not being used, since by default, q does not activate caching 241 | # so backward compatibility is maintained 242 | $ q -b -O -H -d , "select * from some-data.csv" -A 243 | Table: /Users/harelben-attia/dev/harelba/q/some-data.csv 244 | Sources: 245 | source_type: file-with-unused-qsql source: /Users/harelben-attia/dev/harelba/q/some-data.csv 246 | Fields: 247 | `planet_id` - int 248 | `name` - text 249 | `diameter_km` - int 250 | `length_of_day_hours` - real 251 | 252 | # Now let's run another query, this time with `-C read`, telling q to use the qsql caches. This time source-type will 253 | # be "qsql-file-with-original", and the cache will be used when querying: 254 | $ q -b -O -H -d , "select * from some-data.csv" -A -C read 255 | Table: /Users/harelben-attia/dev/harelba/q/some-data.csv 256 | Sources: 257 | source_type: qsql-file-with-original source: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql 258 | Fields: 259 | `planet_id` - int 260 | `name` - text 261 | `diameter_km` - int 262 | `length_of_day_hours` - real 263 | 264 | # Let's now read directly from the qsql file. Notice the change in the table name inside the query. `-C read` is not needed 265 | # here. The source-type will be "qsql-file" 266 | $ q -b -O -H -d , "select * from some-data.csv.qsql" -A 267 | Table: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql 268 | Sources: 269 | source_type: qsql-file source: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql 270 | Fields: 271 | `planet_id` - int 272 | `name` - text 273 | `diameter_km` - int 274 | `length_of_day_hours` - real 275 | ``` 276 | 277 | ## Usage 278 | Query should be an SQL-like query which contains filenames instead of table names (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). 279 | 280 | All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported. 281 | 282 | q gets a full SQL query as a parameter. Remember to double-quote the query. 283 | 284 | Historically, q supports multiple queries on the same command-line, loading each data file only once, even if it is used by multiple queries on the same q invocation. This is still supported. However, due to the new automatic-caching capabilities, this is not really required. Activate caching, and a cache file will be automatically created for each file. q Will use the cache behind the scenes in order to speed up queries. The speed up is extremely significant, so consider using caching for large files. 285 | 286 | The following filename types are supported: 287 | 288 | * **Delimited-file filenames** - including relative/absolute paths. E.g. `./my_folder/my_file.csv` or `/var/tmp/my_file.csv` 289 | * **sqlite3 database filenames** 290 | * **With Multiple Tables** - Add an additional `:::` for accessing a specific table. For example `mydatabase.sqlite3:::users_table`. 291 | * **With One Table Only** - Just specify the database filename, no need for a table name postfix. For example `my_single_table_database.sqlite`. 292 | * **`.qsql` cache files** - q can auto-generate cache files for delimited files, and they can be queried directly as a table, since they contain only one table, as they are essentially standard sqlite datbases 293 | 294 | Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`). 295 | 296 | Use `-d` to specify the input delimiter. 297 | 298 | Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns. 299 | 300 | Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard. Make sure to use single-quotes around the query, so bash/zsh won't interpret the backticks. 301 | 302 | Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. 303 | 304 | JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. 305 | 306 | The SQL syntax itself is sqlite's syntax. For details look at https://www.sqlite.org/lang.html or search the net for examples. 307 | 308 | NOTE: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. 309 | 310 | ``` bash 311 | Options: 312 | -h, --help show this help message and exit 313 | -v, --version Print version 314 | -V, --verbose Print debug info in case of problems 315 | -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME 316 | Save database to an sqlite database file 317 | -C CACHING_MODE, --caching-mode=CACHING_MODE 318 | Choose the autocaching mode (none/read/readwrite). 319 | Autocaches files to disk db so further queries will be 320 | faster. Caching is done to a side-file with the same 321 | name of the table, but with an added extension .qsql 322 | --dump-defaults Dump all default values for parameters and exit. Can 323 | be used in order to make sure .qrc file content is 324 | being read properly. 325 | --max-attached-sqlite-databases=MAX_ATTACHED_SQLITE_DATABASES 326 | Set the maximum number of concurrently-attached sqlite 327 | dbs. This is a compile time definition of sqlite. q's 328 | performance will slow down once this limit is reached 329 | for a query, since it will perform table copies in 330 | order to avoid that limit. 331 | --overwrite-qsql=OVERWRITE_QSQL 332 | When used, qsql files (both caches and store-to-db) 333 | will be overwritten if they already exist. Use with 334 | care. 335 | 336 | Input Data Options: 337 | -H, --skip-header Skip header row. This has been changed from earlier 338 | version - Only one header row is supported, and the 339 | header row is used for column naming 340 | -d DELIMITER, --delimiter=DELIMITER 341 | Field delimiter. If none specified, then space is used 342 | as the delimiter. 343 | -p, --pipe-delimited 344 | Same as -d '|'. Added for convenience and readability 345 | -t, --tab-delimited 346 | Same as -d . Just a shorthand for handling 347 | standard tab delimited file You can use $'\t' if you 348 | want (this is how Linux expects to provide tabs in the 349 | command line 350 | -e ENCODING, --encoding=ENCODING 351 | Input file encoding. Defaults to UTF-8. set to none 352 | for not setting any encoding - faster, but at your own 353 | risk... 354 | -z, --gzipped Data is gzipped. Useful for reading from stdin. For 355 | files, .gz means automatic gunzipping 356 | -A, --analyze-only Analyze sample input and provide information about 357 | data types 358 | -m MODE, --mode=MODE 359 | Data parsing mode. fluffy, relaxed and strict. In 360 | strict mode, the -c column-count parameter must be 361 | supplied as well 362 | -c COLUMN_COUNT, --column-count=COLUMN_COUNT 363 | Specific column count when using relaxed or strict 364 | mode 365 | -k, --keep-leading-whitespace 366 | Keep leading whitespace in values. Default behavior 367 | strips leading whitespace off values, in order to 368 | provide out-of-the-box usability for simple use cases. 369 | If you need to preserve whitespace, use this flag. 370 | --disable-double-double-quoting 371 | Disable support for double double-quoting for escaping 372 | the double quote character. By default, you can use "" 373 | inside double quoted fields to escape double quotes. 374 | Mainly for backward compatibility. 375 | --disable-escaped-double-quoting 376 | Disable support for escaped double-quoting for 377 | escaping the double quote character. By default, you 378 | can use \" inside double quoted fields to escape 379 | double quotes. Mainly for backward compatibility. 380 | --as-text Don't detect column types - All columns will be 381 | treated as text columns 382 | -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE 383 | Input quoting mode. Possible values are all, minimal 384 | and none. Note the slightly misleading parameter name, 385 | and see the matching -W parameter for output quoting. 386 | -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT 387 | Sets the maximum column length. 388 | -U, --with-universal-newlines 389 | Expect universal newlines in the data. Limitation: -U 390 | works only with regular files for now, stdin or .gz 391 | files are not supported yet. 392 | 393 | Output Options: 394 | -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER 395 | Field delimiter for output. If none specified, then 396 | the -d delimiter is used if present, or space if no 397 | delimiter is specified 398 | -P, --pipe-delimited-output 399 | Same as -D '|'. Added for convenience and readability. 400 | -T, --tab-delimited-output 401 | Same as -D . Just a shorthand for outputting tab 402 | delimited output. You can use -D $'\t' if you want. 403 | -O, --output-header 404 | Output header line. Output column-names are determined 405 | from the query itself. Use column aliases in order to 406 | set your column names in the query. For example, 407 | 'select name FirstName,value1/value2 MyCalculation 408 | from ...'. This can be used even if there was no 409 | header in the input. 410 | -b, --beautify Beautify output according to actual values. Might be 411 | slow... 412 | -f FORMATTING, --formatting=FORMATTING 413 | Output-level formatting, in the format X=fmt,Y=fmt 414 | etc, where X,Y are output column numbers (e.g. 1 for 415 | first SELECT column etc. 416 | -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING 417 | Output encoding. Defaults to 'none', leading to 418 | selecting the system/terminal encoding 419 | -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE 420 | Output quoting mode. Possible values are all, minimal, 421 | nonnumeric and none. Note the slightly misleading 422 | parameter name, and see the matching -w parameter for 423 | input quoting. 424 | -L, --list-user-functions 425 | List all user functions 426 | 427 | Query Related Options: 428 | -q QUERY_FILENAME, --query-filename=QUERY_FILENAME 429 | Read query from the provided filename instead of the 430 | command line, possibly using the provided query 431 | encoding (using -Q). 432 | -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING 433 | query text encoding. Experimental. Please send your 434 | feedback on this 435 | ``` 436 | 437 | ### Setting the default values for parameters 438 | It's possible to set default values for parameters which are used often by configuring them in the file `~/.qrc`. 439 | 440 | The file format is as follows: 441 | ```bash 442 | [options] 443 | = 444 | ``` 445 | 446 | It's possible to generate a default `.qrc` file by running `q --dump-defaults` and write the output into the `.qrc` file. 447 | 448 | One valuable use-case for this could be setting the caching-mode to `read`. This will make q automatically use generated `.qsql` cache files if they exist. Whenever you want a cache file to be generated, just use `-C readwrite` and a `.qsql` file will be generated if it doesn't exist. 449 | 450 | Here's the content of the `~/.qrc` file for enabling cache reads by default: 451 | ```bash 452 | [options] 453 | caching_mode=read 454 | ``` 455 | 456 | ## Getting Started Examples 457 | This section shows some more basic examples of simple SQL constructs. 458 | 459 | For some more complex use-cases, see the [examples](#examples) at the beginning of the documentation. 460 | 461 | NOTES: 462 | 463 | * The `-H` flag in the examples below signifies that the file has a header row which is used for naming columns. 464 | * The `-t` flag is just a shortcut for saying that the file is a tab-separated file (any delimiter is supported - Use the `-d` flag). 465 | * Queries are given using upper case for clarity, but actual query keywords such as SELECT and WHERE are not really case sensitive. 466 | 467 | Basic Example List: 468 | 469 | * [Example 1 - COUNT DISTINCT values of specific field (uuid of clicks data)](#example-1) 470 | * [Example 2 - Filter numeric data, controlling ORDERing and LIMITing output](#example-2) 471 | * [Example 3 - Illustrate GROUP BY](#example-3) 472 | * [Example 4 - More complex GROUP BY (group by time expression)](#example-4) 473 | * [Example 5 - Read input from standard input](#example-5) 474 | * [Example 6 - Use column names from header row](#example-6) 475 | * [Example 7 - JOIN two files](#example-7) 476 | 477 | ### Example 1 478 | Perform a COUNT DISTINCT values of specific field (uuid of clicks data). 479 | 480 | ``` bash 481 | q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv" 482 | ``` 483 | Output 484 | ``` bash 485 | 229 486 | ``` 487 | ### Example 2 488 | Filter numeric data, controlling ORDERing and LIMITing output 489 | 490 | Note that q understands that the column is numeric and filters according to its numeric value (real numeric value comparison, not string comparison). 491 | 492 | ``` bash 493 | q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5" 494 | ``` 495 | Output: 496 | ``` bash 497 | 2cfab5ceca922a1a2179dc4687a3b26e 1.0 498 | f6de737b5aa2c46a3db3208413a54d64 0.986665809568 499 | 766025d25479b95a224bd614141feee5 0.977105183282 500 | 2c09058a1b82c6dbcf9dc463e73eddd2 0.703255121794 501 | ``` 502 | 503 | ### Example 3 504 | Illustrate GROUP BY 505 | 506 | ``` bash 507 | q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine" 508 | ``` 509 | Output: 510 | ``` bash 511 | 47d9087db433b9ba.domain.com 400000 512 | ``` 513 | 514 | ### Example 4 515 | More complex GROUP BY (group by time expression) 516 | 517 | ``` bash 518 | q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute" 519 | ``` 520 | Output: 521 | ``` bash 522 | 07:00 138148 523 | 07:01 140026 524 | 07:02 121826 525 | ``` 526 | 527 | ### Example 5 528 | Read input from standard input 529 | 530 | Calculates the total size per user/group in the /tmp subtree. 531 | 532 | ``` bash 533 | sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc" 534 | ``` 535 | Output: 536 | ``` bash 537 | mapred hadoop 304.00390625 538 | root root 8.0431451797485 539 | smith smith 4.34389972687 540 | ``` 541 | 542 | ### Example 6 543 | Use column names from header row 544 | 545 | Calculate the top 3 user ids with the largest number of owned processes, sorted in descending order. 546 | 547 | Note the usage of the autodetected column name UID in the query. 548 | 549 | ``` bash 550 | ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" 551 | ``` 552 | Output: 553 | ``` bash 554 | root 152 555 | harel 119 556 | avahi 2 557 | ``` 558 | 559 | ### Example 7 560 | JOIN two files 561 | 562 | The following command joins an ls output (exampledatafile) and a file containing rows of group-name,email (group-emails-example) and provides a row of filename,email for each of the emails of the group. For brevity of output, there is also a filter for a specific filename called ppp which is achieved using a WHERE clause. 563 | 564 | ``` bash 565 | q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'" 566 | ``` 567 | Output: 568 | ``` bash 569 | ppp dip.1@otherdomain.com 570 | ppp dip.2@otherdomain.com 571 | ``` 572 | 573 | You can see that the ppp filename appears twice, each time matched to one of the emails of the group dip to which it belongs. Take a look at the files `exampledatafile` and `group-emails-example` for the data. 574 | 575 | Column name detection is supported for JOIN scenarios as well. Just specify `-H` in the command line and make sure that the source files contain the header rows. 576 | 577 | ## Implementation 578 | Behind the scenes q creates a "virtual" sqlite3 database that does not contain data of its own, but attaches to multiple other databases as follows: 579 | 580 | * When reading delimited files or data from `stdin`, it will analyze the data and construct an in-memory "adhoc database" that contains it. This adhoc database will be attached to the virtual database 581 | * When a delimited file has a `.qsql` cache, it will attach to that file directly, without having to read it into memory 582 | * When querying a standard sqlite3 file, it will be attached to the virtual database to it as well, without reading it into memory. sqlite3 files are auto-detected, no need for any special filename extension 583 | 584 | The user query will be executed directly on the virtual database, using the attached databases. 585 | 586 | sqlite3 itself has a limit on the number of attached databases (usually 10). If that limit is reached, q will automatically attach databases until that limit is reached, and will load additional tables into the adhoc database's in-memory database. 587 | 588 | Please make sure to read the [limitations](#limitations) section as well. 589 | 590 | ## Development 591 | 592 | ### Tests 593 | The code includes a test suite runnable through `run-tests.sh`. By default, it uses the python source code for running the tests. However, it is possible to provide a path to an actual executable to the tests using the `Q_EXECUTABLE` env var. This is actually being used during the build and packaging process, in order to test the resulting binary. 594 | 595 | ## Limitations 596 | Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities. 597 | 598 | * Common Table Expressions (CTE) are not supported for now. Will be implemented soon - See [here](https://github.com/harelba/q/issues/67) and [here](https://github.com/harelba/q/issues/124) for details. 599 | * `FROM ` is not supported 600 | * Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file 601 | * Some rare cases of subqueries are not supported yet. 602 | * Queries with more than 10 different sqlite3 databases will load some data into memory 603 | * up to 500 tables are supported in a single query 604 | 605 | ## Rationale 606 | Have you ever stared at a text file on the screen, hoping it would have been a database so you could ask anything you want about it? I had that feeling many times, and I've finally understood that it's not the database that I want. It's the language - SQL. 607 | 608 | SQL is a declarative language for data, and as such it allows me to define what I want without caring about how exactly it's done. This is the reason SQL is so powerful, because it treats data as data and not as bits and bytes (and chars). 609 | 610 | The goal of this tool is to provide a bridge between the world of text files and of SQL. 611 | 612 | ### Why aren't other Linux tools enough? 613 | The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. 614 | 615 | One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.). 616 | 617 | ### Philosophy 618 | This tool has been designed with general Linux/Unix design principles in mind. If you're interested in these general design principles, read this amazing [book](http://catb.org/~esr/writings/taoup/) and specifically [this part](http://catb.org/~esr/writings/taoup/html/ch01s06.html). If you believe that the way this tool works goes strongly against any of the principles, I would love to hear your view about it. 619 | 620 | ## Future 621 | 622 | * Expose python as a python module - Planned as a goal after the new version `3.x` is out 623 | 624 | 625 | -------------------------------------------------------------------------------- /mkdocs/docs/index_cn.md: -------------------------------------------------------------------------------- 1 | # q - 直接在CSV或TSV文件上运行SQL 2 | 3 | [![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) 4 | [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) 5 | 6 | 7 | ## 概述 8 | q 是一个可以运行在 CSV / TSV 文件(或其他表格式的文本文件)上运行类SQL命令的命令行工具。 9 | 10 | q 将普通文本(如上述)作为数据库表,且支持所有的SQL语法如:WHERE、GROUP BY、各种JOIN等。此外,还拥有自动识别列名和列类型及广泛支持多种编码的特性。 11 | 12 | ``` bash 13 | q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" 14 | ``` 15 | 16 | ``` bash 17 | ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" 18 | ``` 19 | 20 | 查看[示例](#示例)或[安装](#安装)体验. 21 | 22 | | | | 23 | |:--------------------------------------:|:-----------------------------------------------:| 24 | | 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート | 25 | | 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются | 26 | 27 | 28 | **非英语用户:** q 完全支持所有类型的字符编码。 使用 `-e data-encoding` 设置输入编码; 使用 `-Q query-encoding` 设置查询编码; 使用 `-E output-encoding` 设置输出编码; 29 | 如上三个参数均设有合理的默认值。
30 | 31 | > 如果遇到问题请与我联系,期待与你交流。 32 | 33 | **含有BOM的文件:** python的csv模块并不能很好的支持含有[Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark) 的文件。针对该种情况,使用 `-e utf-8-sig` 命令参数可读取包含BOM的UTF8编码文件。 34 | 35 | > 我们计划将BOM相关处理与编码'解耦', 这样就可以支持所有编码的BOM文件了。 36 | 37 | ## 安装 38 | 39 | | 格式 | 说明 | 备注 | 40 | |:---|:---|:---| 41 | |[OSX](https://github.com/harelba/q/releases/download/2.0.19/q-x86_64-Darwin)|运行 `brew install q`| 该方式暂不支持MAN手册, 可以使用 `q --help` 查看帮助|| 42 | |[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| 运行 `rpm -ivh ` 如果安装过旧版则运行 `rpm -U ` | 该方式支持MAN手册,可运行`man q`查看| 43 | |[DEB Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data_2.0.19-2_amd64.deb)| 运行 `sudo dpkg -i `|该方式支持MAN手册,可运行`man q`查看| 44 | |[Windows Installer](https://github.com/harelba/q/releases/download/2.0.19/q-AMD64-Windows-installer.exe)|运行安装可执行文件,一直点击下一步、下一步... q.exe 将被添加至PATH,以便于随处运行|PATH更新后并不会即时生效,重新打开cmd命令窗口便可| 45 | |[tar.gz](https://github.com/harelba/q/archive/2.0.19.tar.gz)|最新稳定版的所有源码文件。提示,q.py 文件不能直接使用,因为它需要python依赖|| 46 | |[zip](https://github.com/harelba/q/archive/2.0.19.zip)|最新稳定版的所有源码文件。提示,q.py 文件不能直接使用,因为它需要python依赖|| 47 | 48 | **旧版本可以在这儿[下载](https://github.com/harelba/packages-for-q) 。按理说不会有人愿意用旧版本,要是你计划使用旧版,希望能与你交流。** 49 | 50 | ## 须知 51 | 从`2.0.9`版本开始,不需要任何外部依赖。Python(3.7)和其他所需的库包含在了安装文件中且与系统隔离。 52 | 53 | ## 使用 54 | 55 | ``` bash 56 | q "" 57 | 58 | 最简单的执行语句:q "SELECT * FROM myfile" 该语句会输出文件内容 59 | ``` 60 | 61 | q 支持在表格式的文本上执行类SQL命令。它的初衷是为Linux命令行附加SQL的表达力且实现对文本数据的轻松访问。 62 | 63 | 类SQL的查询将*文件名(或标准输入流)看作表名*。查询语句会作为命令输入的一个参数(使用引号包裹),如果将多个文件看作一张表,可以这样写 `文件名1+文件名2....`或者使用通配符(比如:`my_files*.csv`)。 64 | 65 | 使用 `-H` 表示输入内容中包含表头。该情况下列名会被自动识别,如果没有指定该参数,列名将会被以`cX`命名,`X`从1开始(比如: `q "SELECT c3,c8 from ..."`) 。 66 | 67 | 使用 `-d` 声明输入的分隔符。 68 | 69 | 列类型可由工具自动识别,无需强制转换。 提示,使用`--as-text` 可以强制将所有列类型转换为文本类型。 70 | 71 | 依据sqlite规范,如果列名中含有空格,需要使用反引号 (即:`) 引起来。 72 | 73 | 完全支持查询/输入/输出的编码设置(q 力争提供一种开箱即用的方法), 可以分别使用`-Q`,`-e` 和 `-E`来指定编码设置类型。 74 | 75 | 支持所有的sqlite3 SQL方法,包括文件之间的 JOIN(可以为文件设置别名)操作。在下面的[限制](#限制)小节可以看到一些少有使用的、欠支持的说明。 76 | 77 | ### 查询 78 | 79 | q 的每一个参数都是由双引号包裹的一条完整的SQL语句。所有的查询语句会依次执行,最终结果以标准输出流形式输出。 提示,在同一命令行中执行多条查询语句时,仅在执行第一条查询语句时需要耗时载入数据,其他查询语句即时执行。 80 | 81 | 支持所有标准SQL语法,条件(WHERE 和 HAVING)、GROUP BY、ORDER BY等。 82 | 83 | 在WHERE条件查询中,支持JOIN操作和子查询,但在FROM子句中并不支持。JOIN操作时,可以为文件起别名。 84 | 85 | SQL语法同sqlite的语法,详情见 https://www.sqlite.org/lang.html 或上网找一些示例。 86 | 87 | **注意**: 88 | 89 | * 支持所有类型的自动识别,无需强制转换或其他操作。 90 | 91 | * 如果重命名输出列,则需要为列指定别名并使用 `-O` 声明。如: `q -O -H "select count(*) cnt,sum(*) as mysum from -"` 便会将`cnt`和`mysum`作为列名输出。 92 | 93 | ### 指令 94 | 95 | ``` bash 96 | 使用: 97 | q 支持在表格式的文本数据上执行类SQL查询。 98 | 99 | 它的初衷是为Linux命令行附加SQL的表达力且实现对文本数据的轻松访问。 100 | 101 | 基本操作是 q "SQL查询语句" 表名便是文件名(使用 - 从标注输入中读取数据)。若输入内容包含表头时,可以使用 -H 指定列名。若无表头,则列将会自动命名为 c1...cN。 102 | 103 | 列类型可被自动识别。可以使用 -A 命令查看每列的名称及其类型。 104 | 105 | 可以使用 -d (或 -t) 指定分隔符,使用 -D 指定输出分割符。 106 | 107 | 支持所有的sqlite3 SQL方法。 108 | 109 | 示例: 110 | 111 | 例子1: ls -ltrd * | q "select c1,count(1) from - group by c1" 112 | 上例将会输出当前目录下,所有文件的权限表达式分组及每组数量。 113 | 114 | 例子2: seq 1 1000 | q "select avg(c1),sum(c1) from -" 115 | 上例将会输出1到1000的平均数与和数。 116 | 117 | 例子3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" 118 | 上例将会输出在/tmp目录下,相同'用户+组'的文件所占用的MB磁盘空间。 119 | 120 | 更多详情见 https://github.com/harelba/q/ 或查看帮助 121 | 122 | 选项: 123 | -h, --help 显示此帮助信息并退出 124 | -v, --version 显示版本号 125 | -V, --verbose 出现问题时显示调试信息 126 | -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME 127 | 将数据库保存为一个 sqlite 数据库文件 128 | --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD 129 | 保存数据库到磁盘的方法 130 | 'standard' 不需要任何设置 131 | 'fast'需要手动在python的安装目录下执行`pip install sqlitebck` 132 | 打包的问题解决后,'fast'即被作为默认方式 133 | 数据相关的选项: 134 | 135 | -H, --skip-header 忽略表头,在早期的版本中已修改为:仅支持用于标明列名的一行表头 136 | -d DELIMITER, --delimiter=DELIMITER 137 | 列分隔符,若无特别指定,默认为空格符 138 | -p, --pipe-delimited 139 | 作用同 -d '|',为了方便和可读性提供该参数 140 | -t, --tab-delimited 141 | 作用同 -d ,这仅是一种简写,也可以在Linux命令行中使用$'\t' 142 | -e ENCODING, --encoding=ENCODING 143 | 输入文件的编码,默认是UTF-8 144 | -z, --gzipped 压缩数据,对于从输入流读取文件非常高效 .gz 是自动压缩后文件扩展名 145 | -A, --analyze-only 简单分析:各列的数据类型 146 | -m MODE, --mode=MODE 147 | 数据解析模式: 松散, 宽松和严格。在严格模式下必须指定 -c 148 | --column-count 参数。 149 | -c COLUMN_COUNT, --column-count=COLUMN_COUNT 150 | 当使用宽松或严格模式时,用于指定列的数量 151 | -k, --keep-leading-whitespace 152 | 保留每列前的空格。为了使其开箱即用,默认去除了列前的空格 153 | 如果有需要,可以指定该参数 154 | --disable-double-double-quoting 155 | 禁止一对双引号的转义。默认可以使用 "" 转义双引号 156 | 主要为了向后兼容 157 | --disable-escaped-double-quoting 158 | 禁止转义双引号 159 | 默认可以在双引号字段中使用 \" 进行转义 160 | 主要为了向后兼容 161 | --as-text 不识别列类型(所有列被当作文本类型) 162 | -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE 163 | 输入内容的转义模式,可选值 all、minimal、none 164 | 该参数稍有误导性,-W 指定输出内容的转义模式 165 | -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT 166 | 设置列的最大长度 167 | -U, --with-universal-newlines 168 | 设置通用换行符 169 | -U 参数当前仅适用于常规文件,输入流或.gz类文件暂不支持 170 | 171 | 输出相关的选项: 172 | -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER 173 | 输出列间的分隔符 174 | 若未指定,则与 -d 指定的分隔符相同;若均为指定,则默认为空格符 175 | -P, --pipe-delimited-output 176 | 同 -D '|' 为了方便和可读性提供该参数 177 | -T, --tab-delimited-output 178 | 同 -D 这仅是一种简写,也可以在Linux命令行中使用$'\t' 179 | -O, --output-header 180 | 输出表头,输出的列名是由查询中指定的别名 181 | 如: 'select name FirstName, value1/value2 MyCalculation 182 | from ...' 即使输入时未指定表头仍可使用该参数。 183 | -b, --beautify 美化输出结果,可能较慢... 184 | -f FORMATTING, --formatting=FORMATTING 185 | 格式化输出列 186 | 如格式X=fmt,Y=fmt等,上述中的X、Y是指第几列(如:1 表示 SELECT 187 | 的第一列) 188 | -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING 189 | 输出内容的编码,默认是 'none',跟随系统或终端的编码 190 | -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE 191 | 输出内容的转义模式,可选值 all、minimal、none 192 | 该参数稍有误导性,-w 指定输入内容的转义模式 193 | -L, --list-user-functions 194 | 列出所有内置函数 195 | 196 | 查询相关的参数: 197 | -q QUERY_FILENAME, --query-filename=QUERY_FILENAME 198 | 指定文件名,由文件中读取查询语句。 199 | 该操作常与查询编码(使用 -Q)一同使用 200 | -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING 201 | 查询编码(包含查询语句的文件编码) 202 | 实验性参数,对该参数的意见可反馈 203 | ``` 204 | 205 | ## 示例 206 | 下述 `-H` 参数的例子,表示文件中含有表头时使用该参数。 207 | 208 | `-t` 参数是指定文件以 tab 作为分隔符的缩写(可以使用 `-d` 参数指定任意分隔符)。 209 | 210 | 为了清楚起见,查询关键字均使用大写,实际上关键字(如 SELECT、WHERE等)对大小写并不敏感。 211 | 212 | 示例目录: 213 | 214 | * [例1 - 统计指定列唯一值的数量](#1) 215 | * [例2 - 数值条件过滤、排序并限制输出数](#2) 216 | * [例3 - GROUP BY简单示例](#3) 217 | * [例4 - GROUP BY进阶示例 (以时间格式分组)](#4) 218 | * [例5 - 标准输入流作为输入](#5) 219 | * [例6 - 使用表头中列名](#6) 220 | * [例7 - JOIN 两个文件](#7) 221 | 222 | ### 例1 223 | 对指定字段(点击数据中的uuid)执行 COUNT DISTINCT 224 | 225 | ``` bash 226 | q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv" 227 | ``` 228 | 输出: 229 | ``` bash 230 | 229 231 | ``` 232 | 233 | ### 例2 234 | 过滤数值数据、排序并限制输出数量 235 | 236 | 注意:q 将其看作数值类型并对其进行数值过滤(数值比较而不是字符串比较) 237 | 238 | ``` bash 239 | q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5" 240 | ``` 241 | 输出: 242 | ``` bash 243 | 2cfab5ceca922a1a2179dc4687a3b26e 1.0 244 | f6de737b5aa2c46a3db3208413a54d64 0.986665809568 245 | 766025d25479b95a224bd614141feee5 0.977105183282 246 | 2c09058a1b82c6dbcf9dc463e73eddd2 0.703255121794 247 | ``` 248 | 249 | ### 例3 250 | GROUP BY 简单示例 251 | 252 | ``` bash 253 | q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine" 254 | ``` 255 | 输出: 256 | ``` bash 257 | 47d9087db433b9ba.domain.com 400000 258 | ``` 259 | 260 | ### 例4 261 | GROUP BY进阶示例 (以时间格式分组) 262 | 263 | ``` bash 264 | q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute" 265 | ``` 266 | 输出: 267 | ``` bash 268 | 07:00 138148 269 | 07:01 140026 270 | 07:02 121826 271 | ``` 272 | 273 | ### 例5 274 | 标准输入流作为输入 275 | 276 | 计算 /tmp 目录下各 user/group 的占用空间大小 277 | 278 | ``` bash 279 | sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc" 280 | ``` 281 | 输出: 282 | ``` bash 283 | mapred hadoop 304.00390625 284 | root root 8.0431451797485 285 | smith smith 4.34389972687 286 | ``` 287 | 288 | ### 例6 289 | 使用表头中列名 290 | 291 | 计算拥有进程数最多的前3位用户名及其数量 292 | 293 | 注意: 该查询中自动识别了列名 294 | 295 | ``` bash 296 | ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" 297 | ``` 298 | 输出: 299 | ``` bash 300 | root 152 301 | harel 119 302 | avahi 2 303 | ``` 304 | 305 | ### 例7 306 | JOIN 两个文件 307 | 308 | 如下命令中JOIN一个ls命令输出内容文件(exampledatafile) 和一个包含group_name、email两列字段的文件(group-emails-example),每一邮件组均包含filename、email列, 为了输出简便,使用WHERE条件过滤出名为 ppp 的文件 309 | 310 | ``` bash 311 | q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'" 312 | ``` 313 | 输出: 314 | ``` bash 315 | ppp dip.1@otherdomain.com 316 | ppp dip.2@otherdomain.com 317 | ``` 318 | 可以看出 ppp 文件出现了两次,每次都匹配到了它所属的dip邮件组(如例中 dip.1@... / dip2@...),可以在 `exampledatafile` 和 `group-emails-example` 文件中查看数据。 319 | 320 | JOIN 的应用场景中也支持列名识别,在查询包含表头的文件时,只需指定 `-H` 参数即可。 321 | 322 | ## 声明 323 | 为了避免引用外部依赖,当前是使用由Python编写的内存数据库实现的。当前是支持 SELECT 语句及 各种JOIN ( 目前仅在 WHERE 语句中支持子查询)。 324 | 若想对数据进一步分析,可以使用 `--save-db-to-disk` 参数,以将结果输出为 sqlite 数据库文件,然后使用 `sqlite3` 语句来执行查询操作。 325 | 326 | 需要提示的是,当前并没有对数据量的大小进行检测和限制 - 也就是说,需要用户自己掌控文件大小。 327 | 328 | 请务必阅读[限制](#限制)小节。 329 | 330 | ## 开发 331 | 332 | ### 测试 333 | 源码中包含了测试用例,可以通过 `test/test-all` 来执行。若想要提交 PR的话,一定先确保其均执行成功。 334 | 335 | ## 限制 336 | 如下罗列了一些已知的限制,若你的使用场景中需要用到以下标明的限制,请联系我。 337 | 338 | * 不支持 `FROM ` 339 | * 不支持公用表表达式(CTE) 340 | * 不支持文件名中包含空格 (可以将文件以标准输入流的方式输入 q 或重命名文件) 341 | * 不支持较少用到的子查询 342 | 343 | ## 原理 344 | 你是否曾经盯着屏幕上的文本文件发呆,希望它要是数据库就好了,这样就可以找出自己想要的内容?我曾有过很多次,最终顿悟。我想要的不是数据库,而是 SQL。 345 | 346 | SQL 是一种面向数据声明的语言,它允许自定义数据内容而无需关心其执行过程。这也正是SQL强大之处,因为它对于数据'所见即所得',而不是将数据看作字节码。 347 | 348 | 本工具的目的是:在文本文件和SQL之间搭建一座桥梁。 349 | 350 | ### 为什么其他Linux工具不能满足需求? 351 | 传统的Linux工具库也很酷,我也经常使用它们, 但Linux的整体理念是为任一部分搭配最好的工具。本工具为传统Linux工具集新添了 SQL 族类工具,其他工具并不会失去本来优势。 352 | 事实上,我也经常将 q 和其他Linux工具搭配使用,就如同使用管道将 awk/sed 和 grep 搭配使用一样。 353 | 354 | 另外需要注意的是,许多Linux工具就将文本看作文本,而不是数据。从这个意义上来讲,可以将 q 看作提供了 SQL 功能(如:表达式、排序、分组、聚合等)的元工具。 355 | 356 | ### 理念 357 | 358 | 本工具的设计遵从了 Linux/Unix 的传统设计原则。若你对这些设计原则感兴趣,可以阅读 [这本书](http://catb.org/~esr/writings/taoup/) ,尤其是书中 [这部分](http://catb.org/~esr/writings/taoup/html/ch01s06.html) 359 | 若你认为本工具工作方式与之背道而驰,愿洗耳恭听你的建议。 360 | 361 | ## 展望 362 | 363 | * 主要方向:将其作为python的模块公开。 在公开之前,需要对处理标准输入流做一些内部API的完善。 364 | * 支持分布式以提高算力。 365 | 366 | 367 | 368 | -------------------------------------------------------------------------------- /mkdocs/docs/js/google-analytics.js: -------------------------------------------------------------------------------- 1 | // Monitor all download links in GA 2 | 3 | var dlCnt = 0; 4 | var tocCnt = 0; 5 | 6 | function GAizeDownloadLink(a) { 7 | var url = a.href; 8 | var x = url.indexOf("?"); 9 | if (x != -1) { 10 | url = url.substr(0, x); 11 | } 12 | var url_test = url.match(/^http.*(archive\/|releases\/)(?.*)/); 13 | if (url_test) { 14 | a.event_action = url_test.groups.path; 15 | console.log("Converting download link to be GA aware: " + url + " . download path is " + a.event_action); 16 | dlCnt = dlCnt + 1; 17 | a.onclick = function() { 18 | console.log("Sending GA event for link" + url); 19 | var that = this; 20 | gtag('event','perform download', { 'event_category': 'Downloads', 'event_label': 'Download ' + this.event_action , 'value': 1 }); 21 | setTimeout(function() { 22 | location.href = that.href; 23 | }, 500); 24 | return false; 25 | }; 26 | } 27 | } 28 | 29 | function GAizeTOCLink(l) { 30 | tocCnt = tocCnt + 1; 31 | l.onclick = function() { 32 | url_test = l.href.match(/^https?:\/\/.+(#.*)$/i); 33 | toc_name = url_test[1]; 34 | var that = this; 35 | console.log("Sending GA event for toc link " + this.href); 36 | 37 | gtag('event','navigate', { 'event_category': 'Navigation', 'event_label': 'go to ' + toc_name, 'value': 1 }); 38 | setTimeout(function() { 39 | location.href = that.href; 40 | }, 250); 41 | return false; 42 | }; 43 | 44 | } 45 | 46 | window.onload = function() { 47 | var anchors = document.getElementsByTagName('a'); 48 | for (i = 0; i < anchors.length; i++) { 49 | GAizeDownloadLink(anchors[i]); 50 | } 51 | var toc_links = document.querySelectorAll('div.md-sidebar[data-md-component=toc] a.md-nav__link'); 52 | for (i = 0; i < toc_links.length; i++) { 53 | GAizeTOCLink(toc_links[i]); 54 | } 55 | console.log("Converted " + dlCnt + " download links and " + tocCnt + " TOC links to be GA aware"); 56 | } 57 | -------------------------------------------------------------------------------- /mkdocs/docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | 2 | div.md-content pre { 3 | background-color: black; 4 | color: #41FF00; 5 | } 6 | 7 | .md-typeset code pre { 8 | background-color: black; 9 | color: #41FF00; 10 | } 11 | 12 | .md-typeset p code { 13 | color: rgba(0,0,0,.87); 14 | } 15 | 16 | .md-typeset code.bash { 17 | color: #41FF00; 18 | } 19 | 20 | .md-typeset__scrollwrap { 21 | text-align: center; 22 | } 23 | 24 | .md-typeset .headerlink { 25 | opacity: 50%; 26 | } 27 | 28 | article.md-content__inner.md-typeset>p { 29 | text-align: left; 30 | } 31 | 32 | .md-nav__link[data-md-state=blur] { 33 | color: rgba(0.3,0.5,0.4,.4) 34 | } 35 | 36 | .md-nav__link[data-md-state=current] { 37 | font-weight: 700; 38 | } 39 | -------------------------------------------------------------------------------- /mkdocs/generate-web-site.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdocs build -c -s -d ./generated-site 4 | -------------------------------------------------------------------------------- /mkdocs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: q - Text as Data 2 | site_url: https://harelba.github.io/q/ 3 | repo_url: https://github.com/harelba/q 4 | edit_uri: "" 5 | site_description: Text as Data - q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). 6 | site_author: Harel Ben-Attia 7 | copyright: 'Copyright © 2012-2019 Harel Ben-Attia' 8 | google_analytics: 9 | - "UA-48316355-1" 10 | - "auto" 11 | nav: 12 | - Home: index.md 13 | - 首页: index_cn.md 14 | - About: about.md 15 | theme: 16 | name: material 17 | language: 'en' 18 | palette: 19 | primary: purple 20 | accent: amber 21 | fonts: 22 | text: 'Roboto' 23 | code: 'Roboto Mono' 24 | favicon: 'img/q-logo1.ico' 25 | logo: 'img/q-logo1.ico' 26 | custom_dir: 'theme' 27 | extra: 28 | social: 29 | - type: 'github' 30 | link: 'https://github.com/harelba' 31 | - type: 'twitter' 32 | link: 'https://twitter.com/harelba' 33 | - type: 'linkedin' 34 | link: 'https://www.linkedin.com/in/harelba' 35 | extra_css: 36 | - 'stylesheets/extra.css' 37 | extra_javascript: 38 | - 'js/google-analytics.js' 39 | markdown_extensions: 40 | - meta 41 | - toc: 42 | permalink: true 43 | - tables 44 | - fenced_code 45 | - admonition 46 | # - codehilite 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /mkdocs/requirements.txt: -------------------------------------------------------------------------------- 1 | Click==7.0 2 | Deprecated==1.2.7 3 | Jinja2==2.10.3 4 | Markdown==3.1.1 5 | MarkupSafe==1.1.1 6 | PyGithub==1.45 7 | PyJWT==1.7.1 8 | PyYAML==5.3 9 | Pygments==2.5.2 10 | certifi==2019.11.28 11 | chardet==3.0.4 12 | htmlmin==0.1.12 13 | idna==2.8 14 | jsmin==2.2.2 15 | livereload==2.6.1 16 | mkdocs-bootstrap4==0.1.2 17 | mkdocs-bootswatch==1.0 18 | mkdocs-git-committers-plugin==0.1.8 19 | mkdocs-material==4.6.0 20 | mkdocs-minify-plugin==0.2.1 21 | mkdocs==1.0.4 22 | pep562==1.0 23 | pymdown-extensions==6.2.1 24 | requests==2.22.0 25 | six==1.14.0 26 | tornado==6.0.3 27 | urllib3==1.25.8 28 | wrapt==1.11.2 29 | -------------------------------------------------------------------------------- /mkdocs/theme/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block analytics %} 4 | 5 | {% set analytics = config.google_analytics %} 6 | 7 | 28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /prepare-benchmark-env: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | eval "$(pyenv init -)" 6 | eval "$(pyenv virtualenv-init -)" 7 | 8 | source benchmark-config.sh 9 | 10 | if [ ! -f ./benchmark_data.tar.gz ]; 11 | then 12 | echo benchmark data not found. downloading it 13 | curl "https://s3.amazonaws.com/harelba-q-public/benchmark_data.tar.gz" -o ./benchmark_data.tar.gz 14 | else 15 | echo no need to download benchmark data 16 | fi 17 | 18 | if [ ! -d ./_benchmark_data ]; 19 | then 20 | echo extracting benchmark data 21 | tar xvfz benchmark_data.tar.gz 22 | echo benchmark data is ready 23 | else 24 | echo no need to extract benchmark data 25 | fi 26 | 27 | for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" 28 | do 29 | echo installing $ver 30 | pyenv install -s $ver 31 | 32 | venv_name=q-benchmark-$ver 33 | echo create venv $venv_name 34 | pyenv virtualenv -f $ver $venv_name 35 | echo activate venv $venv_name 36 | pyenv activate $venv_name 37 | pyenv version 38 | echo installing requirements $venv_name 39 | pip install -r ./requirements.txt 40 | echo deactivating $venv_name 41 | pyenv deactivate 42 | done 43 | 44 | 45 | -------------------------------------------------------------------------------- /pyoxidizer.bzl: -------------------------------------------------------------------------------- 1 | # This file defines how PyOxidizer application building and packaging is 2 | # performed. See PyOxidizer's documentation at 3 | # https://pyoxidizer.readthedocs.io/en/stable/ for details of this 4 | # configuration file format. 5 | 6 | PYTHON_VERSION = VARS.get("PYTHON_VERSION","3.8") 7 | Q_VERSION = VARS.get("Q_VERSION","0.0.1") 8 | 9 | # Configuration files consist of functions which define build "targets." 10 | # This function creates a Python executable and installs it in a destination 11 | # directory. 12 | def make_exe(): 13 | dist = default_python_distribution(python_version=PYTHON_VERSION) 14 | 15 | policy = dist.make_python_packaging_policy() 16 | policy.set_resource_handling_mode("classify") 17 | policy.resources_location = "in-memory" 18 | policy.resources_location_fallback = "filesystem-relative:Lib" 19 | policy.allow_in_memory_shared_library_loading = False 20 | 21 | python_config = dist.make_python_interpreter_config() 22 | 23 | python_config.run_module = "bin.q" 24 | 25 | exe = dist.to_python_executable( 26 | name="q", 27 | 28 | packaging_policy=policy, 29 | 30 | config=python_config, 31 | ) 32 | 33 | exe.pip_install(["wheel"]) 34 | 35 | exe.add_python_resources(exe.pip_install(["-r", "requirements.txt"])) 36 | exe.add_python_resources(exe.pip_install(["-e", "."])) 37 | 38 | exe.add_python_resources(exe.read_package_root( 39 | path="./", 40 | packages=["bin"], 41 | )) 42 | 43 | return exe 44 | 45 | def make_embedded_resources(exe): 46 | return exe.to_embedded_resources() 47 | 48 | def make_install(exe): 49 | # Create an object that represents our installed application file layout. 50 | files = FileManifest() 51 | 52 | # Add the generated executable to our install layout in the root directory. 53 | files.add_python_resource(".", exe) 54 | 55 | return files 56 | 57 | def make_msi(exe): 58 | # See the full docs for more. But this will convert your Python executable 59 | # into a `WiXMSIBuilder` Starlark type, which will be converted to a Windows 60 | # .msi installer when it is built. 61 | builder = exe.to_wix_msi_builder( 62 | # Simple identifier of your app. 63 | "q", 64 | # The name of your application. 65 | "q-text-as-data", 66 | # The version of your application. 67 | Q_VERSION, 68 | # The author/manufacturer of your application. 69 | "Harel Ben-Attia" 70 | ) 71 | return builder 72 | 73 | 74 | # Dynamically enable automatic code signing. 75 | def register_code_signers(): 76 | # You will need to run with `pyoxidizer build --var ENABLE_CODE_SIGNING 1` for 77 | # this if block to be evaluated. 78 | if not VARS.get("ENABLE_CODE_SIGNING"): 79 | return 80 | 81 | # Use a code signing certificate in a .pfx/.p12 file, prompting the 82 | # user for its path and password to open. 83 | # pfx_path = prompt_input("path to code signing certificate file") 84 | # pfx_password = prompt_password( 85 | # "password for code signing certificate file", 86 | # confirm = True 87 | # ) 88 | # signer = code_signer_from_pfx_file(pfx_path, pfx_password) 89 | 90 | # Use a code signing certificate in the Windows certificate store, specified 91 | # by its SHA-1 thumbprint. (This allows you to use YubiKeys and other 92 | # hardware tokens if they speak to the Windows certificate APIs.) 93 | # sha1_thumbprint = prompt_input( 94 | # "SHA-1 thumbprint of code signing certificate in Windows store" 95 | # ) 96 | # signer = code_signer_from_windows_store_sha1_thumbprint(sha1_thumbprint) 97 | 98 | # Choose a code signing certificate automatically from the Windows 99 | # certificate store. 100 | # signer = code_signer_from_windows_store_auto() 101 | 102 | # Activate your signer so it gets called automatically. 103 | # signer.activate() 104 | 105 | 106 | # Call our function to set up automatic code signers. 107 | register_code_signers() 108 | 109 | # Tell PyOxidizer about the build targets defined above. 110 | register_target("exe", make_exe) 111 | register_target("resources", make_embedded_resources, depends=["exe"], default_build_script=True) 112 | register_target("install", make_install, depends=["exe"], default=True) 113 | register_target("msi_installer", make_msi, depends=["exe"]) 114 | 115 | # Resolve whatever targets the invoker of this configuration file is requesting 116 | # be resolved. 117 | resolve_targets() 118 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | benchmark: Benchmark tests 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | six==1.11.0 2 | flake8==3.6.0 3 | setuptools<45.0.0 4 | -------------------------------------------------------------------------------- /run-benchmark: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: ./run-benchmark.sh 4 | set -e 5 | 6 | get_abs_filename() { 7 | # $1 : relative filename 8 | echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" 9 | } 10 | 11 | eval "$(pyenv init -)" 12 | eval "$(pyenv virtualenv-init -)" 13 | 14 | if [ "x$1" == "x" ]; 15 | then 16 | echo Benchmark id must be provided as a parameter 17 | exit 1 18 | fi 19 | Q_BENCHMARK_ID=$1 20 | shift 21 | 22 | if [ "x$1" == "x" ]; 23 | then 24 | EFFECTIVE_Q_EXECUTABLE="source-files-$(git rev-parse HEAD)" 25 | else 26 | ABS_Q_EXECUTABLE="$(get_abs_filename $1)" 27 | export Q_EXECUTABLE=$ABS_Q_EXECUTABLE 28 | if [ ! -f $ABS_Q_EXECUTABLE ] 29 | then 30 | echo "q executable must exist ($ABS_Q_EXECUTABLE)" 31 | exit 1 32 | fi 33 | EFFECTIVE_Q_EXECUTABLE="${ABS_Q_EXECUTABLE//\//__}" 34 | shift 35 | fi 36 | 37 | echo "Q executable to use is $EFFECTIVE_Q_EXECUTABLE" 38 | 39 | PYTEST_OPTIONS="$@" 40 | echo "pytest options are $PYTEST_OPTIONS" 41 | 42 | mkdir -p ./test/benchmark-results 43 | 44 | # Must be provided to the benchmark code so it knows where to write the results to 45 | export Q_BENCHMARK_RESULTS_FOLDER="./test/benchmark-results/${EFFECTIVE_Q_EXECUTABLE}/${Q_BENCHMARK_ID}/" 46 | echo Benchmark results folder is $Q_BENCHMARK_RESULTS_FOLDER 47 | mkdir -p $Q_BENCHMARK_RESULTS_FOLDER 48 | 49 | source benchmark-config.sh 50 | LATEST_PYTHON_VERSION=${BENCHMARK_PYTHON_VERSIONS[${#BENCHMARK_PYTHON_VERSIONS[@]}-1]} 51 | 52 | ALL_FILES=() 53 | 54 | for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" 55 | do 56 | venv_name=q-benchmark-$ver 57 | echo activating $venv_name 58 | pyenv activate $venv_name 59 | echo "==== testing inside $venv_name ===" 60 | if [[ -f $Q_BENCHMARK_RESULTS_FOLDER/${venv_name}.benchmark-results ]] 61 | then 62 | echo "Results files for version $ver already exists skipping benchmark for this version" 63 | continue 64 | fi 65 | 66 | export Q_BENCHMARK_NAME=${venv_name} 67 | export Q_BENCHMARK_ADDITIONAL_PARAMS="-C read" 68 | 69 | Q_BENCHMARK_NAME=${venv_name}-with-caching Q_BENCHMARK_DATA_DIR=./_benchmark_data_with_qsql_caches pytest -m benchmark -k test_q_matrix -v -s $PYTEST_OPTIONS 70 | Q_BENCHMARK_NAME=${venv_name} Q_BENCHMARK_DATA_DIR=./_benchmark_data pytest -m benchmark -k test_q_matrix -v -s $PYTEST_OPTIONS 71 | 72 | RESULT_FILE="${Q_BENCHMARK_RESULTS_FOLDER}/$venv_name.benchmark-results" 73 | echo "==== Done. Results are in $RESULT_FILE" 74 | ALL_FILES[${#ALL_FILES[@]}]="$RESULT_FILE" 75 | echo "Deactivating" 76 | pyenv deactivate 77 | done 78 | 79 | exit 0 80 | 81 | pyenv activate q-benchmark-${LATEST_PYTHON_VERSION} 82 | echo "==== testing textql ===" 83 | if [[ -f `ls $Q_BENCHMARK_RESULTS_FOLDER/textql*.benchmark-results` ]] 84 | then 85 | echo "Results files for textql already exist. Skipping benchmark for textql" 86 | else 87 | pytest -m benchmark -k test_textql_matrix -v -s $PYTEST_OPTIONS 88 | RESULT_FILE="textql*.benchmark-results" 89 | ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" 90 | echo "Done. Results are in textql.benchmark-results" 91 | fi 92 | 93 | echo "==== testing octosql ===" 94 | if [[ -f $Q_BENCHMARK_RESULTS_FOLDER/octosql.benchmark-results ]] 95 | then 96 | echo "Results files for octosql aready exist. Skipping benchmark for octosql" 97 | else 98 | pytest -m benchmark -k test_octosql_matrix -v -s $PYTEST_OPTIONS 99 | RESULT_FILE="octosql*.benchmark-results" 100 | ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" 101 | echo "Done. Results are in octosql.benchmark-results" 102 | fi 103 | 104 | summary_file="$Q_BENCHMARK_RESULTS_FOLDER/summary.benchmark-results" 105 | 106 | rm -vf $summary_file 107 | 108 | paste ${ALL_FILES[*]} > $summary_file 109 | echo "Done. final results file is $summary_file" 110 | pyenv deactivate 111 | -------------------------------------------------------------------------------- /run-coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | rm -vf ./htmlcov/* 6 | 7 | pytest -m "not benchmark" --cov --cov-report html "$@" 8 | 9 | function cleanup() { 10 | kill %1 11 | } 12 | 13 | # TODO Fix 14 | 15 | # python -m http.server 8000 16 | # open http://localhost:8000/htmlcov/ 17 | 18 | 19 | -------------------------------------------------------------------------------- /run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pytest -m 'not benchmark' "$@" 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import setuptools 5 | 6 | q_version = '3.1.6' 7 | 8 | with open("README.markdown", "r", encoding="utf-8") as fh: 9 | long_description = fh.read() 10 | 11 | setup( 12 | name='q', 13 | url='https://github.com/harelba/q', 14 | license='LICENSE', 15 | version=q_version, 16 | author='Harel Ben-Attia', 17 | description="Run SQL directly on CSV or TSV files", 18 | long_description=long_description, 19 | long_description_content_type="text/markdown", 20 | author_email='harelba@gmail.com', 21 | install_requires=[ 22 | 'six==1.11.0' 23 | ], 24 | package_dir={"": "bin"}, 25 | packages=setuptools.find_packages(where="bin"), 26 | entry_points={ 27 | 'console_scripts': [ 28 | 'q = bin.q:run_standalone' 29 | ] 30 | } 31 | ) 32 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.2 2 | flake8==3.6.0 3 | six==1.11.0 4 | -------------------------------------------------------------------------------- /test/BENCHMARK.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | NOTE: *Please don't use or publish this benchmark data yet. See below for details* 4 | 5 | # Update 6 | q now provides inherent automatic caching capabilities, writing the CSV/TSV file to a `.qsql` file that sits beside the original file. After the cache exists (created as part of an initial query on a file), q knows to use it behind the scenes without changing the query itself, speeding up performance significantly. 7 | 8 | The following table shows the impact of using caching in q: 9 | 10 | | Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | 11 | |:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| 12 | | 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | 13 | | 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | 14 | | 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | 15 | | 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | 16 | | 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | 17 | 18 | 19 | Effectively, `.qsql` files are just standard sqlite3 files, with an additional metadata table that is used for detecting changes in the original delimited file. This means that any tool that can read sqlite3 files can use these files directly. The tradeoff is of course the additional disk usage that the cache files take. 20 | 21 | A good side-effect to this addition, is that q now knows how to directly query multi-file sqlite3 databases. This means that the user can query any sqlite3 database file, or the `.qsql` file itself, even when the original file doesn't exist anymore. For example: 22 | 23 | ```bash 24 | q "select a.*,b.* from my_file.csv.qsql a left join some-sqlite3-database:::some_table_name b on (a.id = b.id)" 25 | ``` 26 | 27 | NOTE: In the current version, caching is not enabled by default - Use `-C readwrite` to enable reading+writing cache files, or `-C read` to just read any existing cache files. A `~/.qrc` file can be added in order to make these options the default if you want. 28 | 29 | The benchmark results below reflect the peformance without the caching, e.g. directly reading the delimited files, parsing them and performing the query. 30 | 31 | I'll update benchmark results later on to provide cached results as well. 32 | 33 | # Overview 34 | This just a preliminary benchmark, originally created for validating performance optimizations and suggestions from users, and analyzing q's move to python3. After writing it, I thought it might be interesting to test its speed against textql and octosql as well. 35 | 36 | The results I'm getting are somewhat surprising, to the point of me questioning them a bit, so it would be great to validate the further before finalizing the benchmark results. 37 | 38 | The most surprising results are as follows: 39 | * python3 vs python2 - A huge improvement (for large files, execution times with python 3 are around 40% of the times for python 2) 40 | * python3 vs textql (written in golang) - Seems that textql becomes slower than the python3 q version as the data sizes grows (both rows and columns) 41 | 42 | I would love to validate these results by having other people run the benchmark as well and send me their results. 43 | 44 | If you're interested, follow the instructions and run the benchmark on your machine. After the benchmark is finished, send me the final results file, along with some details about your hardware, and i'll add it to the spreadsheet. 45 | 46 | I've tried to make running the benchmark as seamless as possible, but there obviously might be errors/issues. Please contact me if you encounter any issue, or just open a ticket. 47 | 48 | # Benchmark 49 | This is an initial version of the benchmark, along with some results. The following is compared: 50 | * q running on multiple python versions 51 | * textql 2.0.3 52 | * octosql v0.3.0 53 | 54 | The specific python versions which are being tested are specified in `benchmark-config.sh`. 55 | 56 | This is by no means a scientific benchmark, and it only focuses on the data loading time which is the only significant factor for comparison (e.g. the query itself is a very simple count query). Also, it does not try to provide any usability comparison between q and textql/octosql, an interesting topic on its own. 57 | 58 | ## Methodology 59 | The idea was to compare the time sensitivity of row and column count. 60 | 61 | * Row counts: 1,10,100,1000,10000,100000,1000000 62 | * Column counts: 1,5,10,20,50,100 63 | * Iterations for each combination: 10 64 | 65 | File sizes: 66 | * 1M rows by 100 columns - 976MB (~1GB) - Largest file 67 | * 1M rows by 50 columns - 477MB 68 | 69 | The benchmark executes simple `select count(*) from ` queries for each combination, calculating the mean and stddev of each set of iterations. The stddev is used in order to measure the validity of the results. 70 | 71 | The graphs below only compare the means of the results, the standard deviations are written into the google sheet itself, and can be viewed there if needed. 72 | 73 | Instructions on how to run the benchmark are at the bottom section of this document, after the results section. 74 | 75 | ## Hardware 76 | OSX Catalina on a 15" Macbook Pro from Mid 2015, with 16GB of RAM, and an internal Flash Drive of 256GB. 77 | 78 | ## Results 79 | (Results are automatically updated from the baseline tab in the google spreadsheet). 80 | 81 | Detailed results below. 82 | 83 | Summary: 84 | * All python 3 versions (3.6/3.7/3.8) provide similar results across all scales. 85 | * python 3.x provides significantly better results than python2. Improvement grows as the file size grows (20% improvement for small files, up to ~70% improvement for the largest file) 86 | * textql seems to provide faster results than q (py3) for smaller files, up to around 30MB of data. As the size grows further, it becomes slower than q, up to 80% (74 seconds vs 41 seconds) for the largest file 87 | * The larger the files, textql becomes slower than q-py3 (up to 80% more time than q for the largest file) 88 | * octosql is significantly slower than both q and textql, even for small files with a low number of rows and columns 89 | 90 | ### Data for 1M rows 91 | 92 | #### Run time durations for 1M rows and different column counts: 93 | | rows | columns | File Size | python 2.7 | python 3.6 | python 3.7 | python 3.8 | textql | octosql | 94 | |:-------: |:-------: |:---------: |:----------: |:----------: |:----------: |:----------: |:------: |:-------: | 95 | | 1000000 | 1 | 17M | 5.15 | 4.24 | 4.08 | 3.98 | 2.90 | 49.95 | 96 | | 1000000 | 5 | 37M | 10.68 | 5.37 | 5.26 | 5.14 | 5.88 | 54.69 | 97 | | 1000000 | 10 | 89M | 17.56 | 7.25 | 7.15 | 7.01 | 9.69 | 65.32 | 98 | | 1000000 | 20 | 192M | 30.28 | 10.96 | 10.78 | 10.64 | 17.34 | 83.94 | 99 | | 1000000 | 50 | 477M | 71.56 | 21.98 | 21.59 | 21.70 | 38.57 | 158.26 | 100 | | 1000000 | 100 | 986M | 131.86 | 41.71 | 40.82 | 41.02 | 74.62 | 289.58 | 101 | 102 | #### Comparison between python 3.x and python 2 run times (1M rows): 103 | (>100% is slower than q-py2, <100% is faster than q-py2) 104 | 105 | | rows | columns | file size | q-py2 runtime | q-py3.6 vs q-py2 runtime | q-py3.7 vs q-py2 runtime | q-py3.8 vs q-py2 runtime | 106 | |:-------: |:-------: |:---------: |:-------------: |:------------------------: |:------------------------: |:------------------------: | 107 | | 1000000 | 1 | 17M | 100.00% | 82.34% | 79.34% | 77.36% | 108 | | 1000000 | 5 | 37M | 100.00% | 50.25% | 49.22% | 48.08% | 109 | | 1000000 | 10 | 89M | 100.00% | 41.30% | 40.69% | 39.93% | 110 | | 1000000 | 20 | 192M | 100.00% | 36.18% | 35.59% | 35.14% | 111 | | 1000000 | 50 | 477M | 100.00% | 30.71% | 30.17% | 30.32% | 112 | | 1000000 | 100 | 986M | 100.00% | 31.63% | 30.96% | 31.11% | 113 | 114 | #### textql and octosql comparison against q-py3 run time (1M rows): 115 | (>100% is slower than q-py3, <100% is faster than q-py3) 116 | 117 | | rows | columns | file size | avg q-py3 runtime | textql vs q-py3 runtime | octosql vs q-py3 runtime | 118 | |:-------: |:-------: |:---------: |:-----------------: |:-----------------------: |:------------------------: | 119 | | 1000000 | 1 | 17M | 100.00% | 70.67% | 1217.76% | 120 | | 1000000 | 5 | 37M | 100.00% | 111.86% | 1040.70% | 121 | | 1000000 | 10 | 89M | 100.00% | 135.80% | 915.28% | 122 | | 1000000 | 20 | 192M | 100.00% | 160.67% | 777.92% | 123 | | 1000000 | 50 | 477M | 100.00% | 177.26% | 727.40% | 124 | | 1000000 | 100 | 986M | 100.00% | 181.19% | 703.15% | 125 | 126 | ### Sensitivity to column count 127 | Based on a the largest file size of 1,000,000 rows. 128 | 129 | ![Sensitivity to column count](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1585602598&format=image) 130 | 131 | ### Sensitivity to line count (per column count) 132 | 133 | #### 1 Column Table 134 | ![1 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1119350798&format=image) 135 | 136 | #### 5 Column Table 137 | ![5 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=599223098&format=image) 138 | 139 | #### 10 Column Table 140 | ![10 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=82695414&format=image) 141 | 142 | #### 20 Column Table 143 | ![20 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1573199483&format=image) 144 | 145 | #### 50 Column Table 146 | ![50 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=448568670&format=image) 147 | 148 | #### 100 Column Table 149 | ![100 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=2101488258&format=image) 150 | 151 | ## Running the benchmark 152 | Please note that the initial run generates large files, so you'd need more than 3GB of free space available. All the generated files reside in the `_benchmark_data/` folder. 153 | 154 | Part of the preparation flow will download the benchmark data as needed. 155 | 156 | ### Preparations 157 | * Prerequisites: 158 | * pyenv installed 159 | * pyenv-virtualenv installed 160 | * [`textql`](https://github.com/dinedal/textql#install) 161 | * [`octosql`](https://github.com/cube2222/octosql#installation) 162 | 163 | Run `./prepare-benchmark-env` 164 | 165 | ### Execution 166 | Run `./run-benchmark `. 167 | 168 | Benchmark output files will be written to `./benchmark-results///`. 169 | 170 | * `benchmark-id` is the id you wanna give the benchmark. 171 | * `q-executable` is the name of the q executable being used for the benchmark. If none has been provided through Q_EXECUTABLE, then the value will be the last commit hash. Note that there is no checking of whether the working tree is clean. 172 | 173 | The summary of benchmark will be written to `./benchmark-results//summary.benchmark-results`` 174 | 175 | By default, the benchmark will use the source python files inside the project. If you wanna run it on one of the standalone binary executable, the set Q_EXECUTABLE to the full path of the q binary. 176 | 177 | For anyone helping with running the benchmark, don't use this parameter for now, just test against a clean checkout of the code using `./run-benchmark `. 178 | 179 | ## Benchmark Development info 180 | ### Running against the standalone binary 181 | * `./run-benchmark` can accept a second parameter with the q executable. If it gets this parameter, it will use this path for running q. This provides a way to test the standalone q binaries in the new packaging format. When this parameter does not exist, the benchmark is executed directly from the source code. 182 | 183 | ### Updating the benchmark markdown document file 184 | The results should reside in the following [google sheet](https://docs.google.com/spreadsheets/d/1Ljr8YIJwUQ5F4wr6ATga5Aajpu1CvQp1pe52KGrLkbY/edit?usp=sharing). 185 | 186 | add a new tab to the google sheet, and paste the content of `summary.benchmark-results` to the new sheet. 187 | 188 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harelba/q/03e8b395055747a45f8c12480fd4ed95c2b4e906/test/__init__.py -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/octosql_v0.3.0.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 2 | 1 1 0.582091641426 0.0235290239617 3 | 10 1 0.596219730377 0.0320124029461 4 | 100 1 0.575977492332 0.0199296245316 5 | 1000 1 0.56785056591 0.00846389017466 6 | 10000 1 1.1466334343 0.00760108698846 7 | 100000 1 5.49565172195 0.131791932977 8 | 1000000 1 49.9513648033 0.443430523063 9 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 10 | 1 5 0.582160949707 0.0274409391571 11 | 10 5 0.57046456337 0.0199413000359 12 | 100 5 0.585747480392 0.0372543971623 13 | 1000 5 0.572268772125 0.00384300349763 14 | 10000 5 1.15530762672 0.0117990775856 15 | 100000 5 6.10629923344 0.146711842919 16 | 1000000 5 54.6851765394 0.315486399525 17 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 18 | 1 10 0.586222410202 0.0232479065914 19 | 10 10 0.59000480175 0.0186508192447 20 | 100 10 0.581873703003 0.0331332482772 21 | 1000 10 0.569027900696 0.0103675493106 22 | 10000 10 1.40067322254 0.00583352224401 23 | 100000 10 7.30705575943 0.0165839217599 24 | 1000000 10 65.3242264032 0.512552576414 25 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 26 | 1 20 0.571048212051 0.0166919396871 27 | 10 20 0.594776701927 0.0368900941023 28 | 100 20 0.561370825768 0.00907051791451 29 | 1000 20 0.577527880669 0.00983965108957 30 | 10000 20 1.90710241795 0.00757011452155 31 | 100000 20 9.8267291069 0.127844155326 32 | 1000000 20 83.9448960066 0.46121344046 33 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 34 | 1 50 0.572030115128 0.0253648479103 35 | 10 50 0.56993534565 0.0230474303306 36 | 100 50 0.563336873055 0.00964411866903 37 | 1000 50 0.826378440857 0.00941629472813 38 | 10000 50 3.27872717381 0.126592845956 39 | 100000 50 17.890055728 0.116794666005 40 | 1000000 50 158.262442636 0.826290454446 41 | lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 42 | 1 100 0.569358110428 0.0279801762531 43 | 10 100 0.580981063843 0.0272341107532 44 | 100 100 0.559471726418 0.00668155858429 45 | 1000 100 1.08161640167 0.00698594638512 46 | 10000 100 5.67823712826 0.0123398407167 47 | 100000 100 32.2797194242 0.315508270241 48 | 1000000 100 289.582628798 0.929455236817 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-2.7.18.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 2 | 1 1 0.106449890137 0.002010027753 3 | 10 1 0.106737875938 0.00224112203891 4 | 100 1 0.107839012146 0.00102954061006 5 | 1000 1 0.113026666641 0.00147361890226 6 | 10000 1 0.160376381874 0.00569766179806 7 | 100000 1 0.608236479759 0.00604026519608 8 | 1000000 1 5.14807910919 0.0584474028762 9 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 10 | 1 5 0.106719517708 0.00236752032369 11 | 10 5 0.107823801041 0.00238873169438 12 | 100 5 0.109785079956 0.0013047675259 13 | 1000 5 0.120395207405 0.00207224422629 14 | 10000 5 0.21783041954 0.00522254475716 15 | 100000 5 1.17115747929 0.0221394865225 16 | 1000000 5 10.6830974817 0.339822977934 17 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 18 | 1 10 0.104981088638 0.00166552032929 19 | 10 10 0.108320140839 0.00204034349199 20 | 100 10 0.112528729439 0.00168376477305 21 | 1000 10 0.13019015789 0.00253773120965 22 | 10000 10 0.284891676903 0.00384009140782 23 | 100000 10 1.84725661278 0.00860738744089 24 | 1000000 10 17.5610994339 0.228322442172 25 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 26 | 1 20 0.106477689743 0.00254429925697 27 | 10 20 0.108580899239 0.00173704653824 28 | 100 20 0.118750286102 0.00247623639866 29 | 1000 20 0.146431708336 0.00249685551944 30 | 10000 20 0.419492387772 0.00248210434668 31 | 100000 20 3.15847921371 0.0550301268026 32 | 1000000 20 30.279082489 0.124978814506 33 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 34 | 1 50 0.105411934853 0.00171651054128 35 | 10 50 0.109102797508 0.00111620290512 36 | 100 50 0.135682177544 0.00196166766665 37 | 1000 50 0.198261427879 0.00396172489054 38 | 10000 50 0.821499919891 0.0111642692132 39 | 100000 50 7.05980975628 0.121182371277 40 | 1000000 50 71.5645889759 5.02009516291 41 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev 42 | 1 100 0.10662381649 0.00193146624495 43 | 10 100 0.110662698746 0.00171461379583 44 | 100 100 0.163547992706 0.00166570196628 45 | 1000 100 0.280023741722 0.00337543024145 46 | 10000 100 1.46053376198 0.0221691284465 47 | 100000 100 13.2369835854 0.309375896258 48 | 1000000 100 131.864977288 1.22415449691 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.6.4.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 2 | 1 1 0.10342762470245362 0.0017673875851759295 3 | 10 1 0.10239293575286865 0.0012505611685910795 4 | 100 1 0.10317318439483643 0.0010581783881541751 5 | 1000 1 0.10687050819396973 0.0014050135772919004 6 | 10000 1 0.1447664737701416 0.001841256227287192 7 | 100000 1 0.5162809371948243 0.006962985088492867 8 | 1000000 1 4.238853335380554 0.04834401143632507 9 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 10 | 1 5 0.10211825370788574 0.0022568191323651568 11 | 10 5 0.1025341272354126 0.0016446470901070106 12 | 100 5 0.1053577184677124 0.0015298114223855884 13 | 1000 5 0.10980842113494874 0.002536098780902228 14 | 10000 5 0.1590113162994385 0.003123074098301634 15 | 100000 5 0.6348223447799682 0.0082691507829872 16 | 1000000 5 5.368562030792236 0.11628913334105236 17 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 18 | 1 10 0.10251858234405517 0.0015963869535345293 19 | 10 10 0.10278875827789306 0.0009920577082124496 20 | 100 10 0.10715732574462891 0.002033320000941064 21 | 1000 10 0.11389360427856446 0.0023603847702423973 22 | 10000 10 0.17806434631347656 0.001114054252191835 23 | 100000 10 0.8252989768981933 0.0037080843359275904 24 | 1000000 10 7.252838873863221 0.029052130546213153 25 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 26 | 1 20 0.10367965698242188 0.003661761341842434 27 | 10 20 0.10489590167999267 0.001977141196109372 28 | 100 20 0.11108210086822509 0.0014801173497056886 29 | 1000 20 0.12110791206359864 0.001648524669420912 30 | 10000 20 0.2178968906402588 0.0019298316207276716 31 | 100000 20 1.1962245225906372 0.010541407803235559 32 | 1000000 20 10.956057572364807 0.12677108174061705 33 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 34 | 1 50 0.10458300113677979 0.0016367630302744722 35 | 10 50 0.10616152286529541 0.002345135740908088 36 | 100 50 0.12375867366790771 0.00238414904864133 37 | 1000 50 0.14462883472442628 0.0022428030896492978 38 | 10000 50 0.34488487243652344 0.004867441221052092 39 | 100000 50 2.3394312858581543 0.02263239858944125 40 | 1000000 50 21.979821610450745 0.09080404939303836 41 | lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev 42 | 1 100 0.10372309684753418 0.0010299126833031144 43 | 10 100 0.10784556865692138 0.0016557634029464607 44 | 100 100 0.14526791572570802 0.0028194506905186724 45 | 1000 100 0.18315494060516357 0.0023585311962114673 46 | 10000 100 0.5586131334304809 0.004808492789681402 47 | 100000 100 4.287398314476013 0.00957500108409644 48 | 1000000 100 41.706851434707644 0.4161526076289425 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.7.9.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 2 | 1 1 0.08099310398101807 0.001417385651688644 3 | 10 1 0.0822291374206543 0.0014809900020001858 4 | 100 1 0.08169686794281006 0.002108157069167563 5 | 1000 1 0.08690853118896484 0.0012595326919263487 6 | 10000 1 0.12215542793273926 0.0020152625320395434 7 | 100000 1 0.4825761795043945 0.0050418000028856335 8 | 1000000 1 4.084399747848511 0.027731958079814215 9 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 10 | 1 5 0.0817826271057129 0.002665533758836163 11 | 10 5 0.08261749744415284 0.0019205430658525572 12 | 100 5 0.08472237586975098 0.002571239449841039 13 | 1000 5 0.08973510265350342 0.002323797583077552 14 | 10000 5 0.13746986389160157 0.001964971666036654 15 | 100000 5 0.60649254322052 0.007131635266871318 16 | 1000000 5 5.2585612535476685 0.05661789407928516 17 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 18 | 1 10 0.08112843036651611 0.002251300165899426 19 | 10 10 0.08175232410430908 0.0014557171018568637 20 | 100 10 0.08572309017181397 0.0019643550214810675 21 | 1000 10 0.09268453121185302 0.001816414236580489 22 | 10000 10 0.15538835525512695 0.0024978076091814994 23 | 100000 10 0.7879442930221557 0.009412516078916211 24 | 1000000 10 7.146207928657532 0.06659760176757985 25 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 26 | 1 20 0.08142082691192627 0.001304584466639188 27 | 10 20 0.08197519779205323 0.0014842098503865223 28 | 100 20 0.08949971199035645 0.0009937446141285785 29 | 1000 20 0.09955930709838867 0.0013978961740806384 30 | 10000 20 0.1966566801071167 0.0028489273218240147 31 | 100000 20 1.1518636226654053 0.006410720031542237 32 | 1000000 20 10.776052689552307 0.04739925571001746 33 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 34 | 1 50 0.08237688541412354 0.0016494314799953837 35 | 10 50 0.08519520759582519 0.002610550182895596 36 | 100 50 0.10423583984375 0.0018808335751867933 37 | 1000 50 0.12195603847503662 0.0023611894043373983 38 | 10000 50 0.3163540124893188 0.002761333651520998 39 | 100000 50 2.237372374534607 0.009955353920396077 40 | 1000000 50 21.59097549915314 0.081188190530421 41 | lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev 42 | 1 100 0.08336784839630126 0.0013840724401561887 43 | 10 100 0.0864112138748169 0.0017946939354350697 44 | 100 100 0.12199611663818359 0.0013003743156634682 45 | 1000 100 0.15871686935424806 0.0035993681064501234 46 | 10000 100 0.5243751525878906 0.004370273273595629 47 | 100000 100 4.175828623771667 0.016127303710583043 48 | 1000000 100 40.82292411327362 0.12328165162380703 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.8.5.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 2 | 1 1 0.10138180255889892 0.0017947074090971444 3 | 10 1 0.10056869983673096 0.003442371291904885 4 | 100 1 0.10126984119415283 0.0016392348107127808 5 | 1000 1 0.10484635829925537 0.0019743937339163262 6 | 10000 1 0.1400548219680786 0.0024523366133394117 7 | 100000 1 0.4901275157928467 0.003970374711691596 8 | 1000000 1 3.982502889633179 0.045292138461945054 9 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 10 | 1 5 0.09946837425231933 0.0018876161478998787 11 | 10 5 0.099178147315979 0.0014194733014858227 12 | 100 5 0.10171806812286377 0.0017580984705406846 13 | 1000 5 0.10602672100067138 0.002000261880840017 14 | 10000 5 0.15207929611206056 0.0015802680033212048 15 | 100000 5 0.609218978881836 0.006150144273259608 16 | 1000000 5 5.13688440322876 0.03649575898109647 17 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 18 | 1 10 0.09925477504730225 0.002168389758635997 19 | 10 10 0.09943633079528809 0.0016154501074880502 20 | 100 10 0.10376312732696533 0.0017275485891005433 21 | 1000 10 0.11087138652801513 0.0016934328033239559 22 | 10000 10 0.17246220111846924 0.0023824485659318527 23 | 100000 10 0.7999232530593872 0.003442975393506892 24 | 1000000 10 7.012071299552917 0.059217904448851263 25 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 26 | 1 20 0.10027089118957519 0.0020291529595204906 27 | 10 20 0.10038816928863525 0.001957086760826999 28 | 100 20 0.10723590850830078 0.0013833918448622436 29 | 1000 20 0.11735000610351562 0.0020318895390750882 30 | 10000 20 0.21264209747314453 0.00482341642419078 31 | 100000 20 1.1567201137542724 0.002987096441878969 32 | 1000000 20 10.640758633613586 0.06116581724028616 33 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 34 | 1 50 0.10066506862640381 0.002051307639276982 35 | 10 50 0.10588631629943848 0.0035835389655972105 36 | 100 50 0.11841504573822022 0.001608174845404568 37 | 1000 50 0.14032282829284667 0.002640027148889162 38 | 10000 50 0.33160474300384524 0.0027796660009712947 39 | 100000 50 2.258401036262512 0.011041280982383895 40 | 1000000 50 21.70080256462097 0.15897944629180621 41 | lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev 42 | 1 100 0.10147004127502442 0.0021285682695135768 43 | 10 100 0.10471885204315186 0.001248479289219899 44 | 100 100 0.13894760608673096 0.002307980025026551 45 | 1000 100 0.17586205005645753 0.0023822296091426 46 | 10000 100 0.5414002418518067 0.0036291866664635458 47 | 100000 100 4.222555088996887 0.08562968951916528 48 | 1000000 100 41.021552324295044 0.16033566363076862 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/summary.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 2 | 1 1 0.106449890137 0.002010027753 1 1 0.10342762470245362 0.0017673875851759295 1 1 0.08099310398101807 0.001417385651688644 1 1 0.10138180255889892 0.0017947074090971444 1 1 0.0196103572845 0.00207355214257 1 1 0.582091641426 0.0235290239617 3 | 10 1 0.106737875938 0.00224112203891 10 1 0.10239293575286865 0.0012505611685910795 10 1 0.0822291374206543 0.0014809900020001858 10 1 0.10056869983673096 0.003442371291904885 10 1 0.0186784029007 0.000970810220668 10 1 0.596219730377 0.0320124029461 4 | 100 1 0.107839012146 0.00102954061006 100 1 0.10317318439483643 0.0010581783881541751 100 1 0.08169686794281006 0.002108157069167563 100 1 0.10126984119415283 0.0016392348107127808 100 1 0.019472026825 0.00181951524514 100 1 0.575977492332 0.0199296245316 5 | 1000 1 0.113026666641 0.00147361890226 1000 1 0.10687050819396973 0.0014050135772919004 1000 1 0.08690853118896484 0.0012595326919263487 1000 1 0.10484635829925537 0.0019743937339163262 1000 1 0.022180891037 0.00116649968967 1000 1 0.56785056591 0.00846389017466 6 | 10000 1 0.160376381874 0.00569766179806 10000 1 0.1447664737701416 0.001841256227287192 10000 1 0.12215542793273926 0.0020152625320395434 10000 1 0.1400548219680786 0.0024523366133394117 10000 1 0.051066827774 0.0018168767618 10000 1 1.1466334343 0.00760108698846 7 | 100000 1 0.608236479759 0.00604026519608 100000 1 0.5162809371948243 0.006962985088492867 100000 1 0.4825761795043945 0.0050418000028856335 100000 1 0.4901275157928467 0.003970374711691596 100000 1 0.307463979721 0.00246268029188 100000 1 5.49565172195 0.131791932977 8 | 1000000 1 5.14807910919 0.0584474028762 1000000 1 4.238853335380554 0.04834401143632507 1000000 1 4.084399747848511 0.027731958079814215 1000000 1 3.982502889633179 0.045292138461945054 1000000 1 2.89862303734 0.022182722976 1000000 1 49.9513648033 0.443430523063 9 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 10 | 1 5 0.106719517708 0.00236752032369 1 5 0.10211825370788574 0.0022568191323651568 1 5 0.0817826271057129 0.002665533758836163 1 5 0.09946837425231933 0.0018876161478998787 1 5 0.0195286750793 0.0017840569109 1 5 0.582160949707 0.0274409391571 11 | 10 5 0.107823801041 0.00238873169438 10 5 0.1025341272354126 0.0016446470901070106 10 5 0.08261749744415284 0.0019205430658525572 10 5 0.099178147315979 0.0014194733014858227 10 5 0.0183676958084 0.000925251595491 10 5 0.57046456337 0.0199413000359 12 | 100 5 0.109785079956 0.0013047675259 100 5 0.1053577184677124 0.0015298114223855884 100 5 0.08472237586975098 0.002571239449841039 100 5 0.10171806812286377 0.0017580984705406846 100 5 0.0199447393417 0.000907007099218 100 5 0.585747480392 0.0372543971623 13 | 1000 5 0.120395207405 0.00207224422629 1000 5 0.10980842113494874 0.002536098780902228 1000 5 0.08973510265350342 0.002323797583077552 1000 5 0.10602672100067138 0.002000261880840017 1000 5 0.0263328790665 0.00165486505938 1000 5 0.572268772125 0.00384300349763 14 | 10000 5 0.21783041954 0.00522254475716 10000 5 0.1590113162994385 0.003123074098301634 10000 5 0.13746986389160157 0.001964971666036654 10000 5 0.15207929611206056 0.0015802680033212048 10000 5 0.0826982736588 0.00152451583229 10000 5 1.15530762672 0.0117990775856 15 | 100000 5 1.17115747929 0.0221394865225 100000 5 0.6348223447799682 0.0082691507829872 100000 5 0.60649254322052 0.007131635266871318 100000 5 0.609218978881836 0.006150144273259608 100000 5 0.60660867691 0.00395761320274 100000 5 6.10629923344 0.146711842919 16 | 1000000 5 10.6830974817 0.339822977934 1000000 5 5.368562030792236 0.11628913334105236 1000000 5 5.2585612535476685 0.05661789407928516 1000000 5 5.13688440322876 0.03649575898109647 1000000 5 5.87811236382 0.0304332294491 1000000 5 54.6851765394 0.315486399525 17 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 18 | 1 10 0.104981088638 0.00166552032929 1 10 0.10251858234405517 0.0015963869535345293 1 10 0.08112843036651611 0.002251300165899426 1 10 0.09925477504730225 0.002168389758635997 1 10 0.0191783189774 0.00107718516178 1 10 0.586222410202 0.0232479065914 19 | 10 10 0.108320140839 0.00204034349199 10 10 0.10278875827789306 0.0009920577082124496 10 10 0.08175232410430908 0.0014557171018568637 10 10 0.09943633079528809 0.0016154501074880502 10 10 0.0185215950012 0.000840353961363 10 10 0.59000480175 0.0186508192447 20 | 100 10 0.112528729439 0.00168376477305 100 10 0.10715732574462891 0.002033320000941064 100 10 0.08572309017181397 0.0019643550214810675 100 10 0.10376312732696533 0.0017275485891005433 100 10 0.0209223031998 0.00164494657684 100 10 0.581873703003 0.0331332482772 21 | 1000 10 0.13019015789 0.00253773120965 1000 10 0.11389360427856446 0.0023603847702423973 1000 10 0.09268453121185302 0.001816414236580489 1000 10 0.11087138652801513 0.0016934328033239559 1000 10 0.0309282779694 0.00110848590345 1000 10 0.569027900696 0.0103675493106 22 | 10000 10 0.284891676903 0.00384009140782 10000 10 0.17806434631347656 0.001114054252191835 10000 10 0.15538835525512695 0.0024978076091814994 10000 10 0.17246220111846924 0.0023824485659318527 10000 10 0.121016025543 0.00105071105139 10000 10 1.40067322254 0.00583352224401 23 | 100000 10 1.84725661278 0.00860738744089 100000 10 0.8252989768981933 0.0037080843359275904 100000 10 0.7879442930221557 0.009412516078916211 100000 10 0.7999232530593872 0.003442975393506892 100000 10 0.987622976303 0.00699348302979 100000 10 7.30705575943 0.0165839217599 24 | 1000000 10 17.5610994339 0.228322442172 1000000 10 7.252838873863221 0.029052130546213153 1000000 10 7.146207928657532 0.06659760176757985 1000000 10 7.012071299552917 0.059217904448851263 1000000 10 9.69240145683 0.0354453778052 1000000 10 65.3242264032 0.512552576414 25 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 26 | 1 20 0.106477689743 0.00254429925697 1 20 0.10367965698242188 0.003661761341842434 1 20 0.08142082691192627 0.001304584466639188 1 20 0.10027089118957519 0.0020291529595204906 1 20 0.0202306985855 0.00159619251952 1 20 0.571048212051 0.0166919396871 27 | 10 20 0.108580899239 0.00173704653824 10 20 0.10489590167999267 0.001977141196109372 10 20 0.08197519779205323 0.0014842098503865223 10 20 0.10038816928863525 0.001957086760826999 10 20 0.0187650680542 0.000845692486156 10 20 0.594776701927 0.0368900941023 28 | 100 20 0.118750286102 0.00247623639866 100 20 0.11108210086822509 0.0014801173497056886 100 20 0.08949971199035645 0.0009937446141285785 100 20 0.10723590850830078 0.0013833918448622436 100 20 0.0211876153946 0.000993808448942 100 20 0.561370825768 0.00907051791451 29 | 1000 20 0.146431708336 0.00249685551944 1000 20 0.12110791206359864 0.001648524669420912 1000 20 0.09955930709838867 0.0013978961740806384 1000 20 0.11735000610351562 0.0020318895390750882 1000 20 0.0404737234116 0.00122415059261 1000 20 0.577527880669 0.00983965108957 30 | 10000 20 0.419492387772 0.00248210434668 10000 20 0.2178968906402588 0.0019298316207276716 10000 20 0.1966566801071167 0.0028489273218240147 10000 20 0.21264209747314453 0.00482341642419078 10000 20 0.197762489319 0.00198188642677 10000 20 1.90710241795 0.00757011452155 31 | 100000 20 3.15847921371 0.0550301268026 100000 20 1.1962245225906372 0.010541407803235559 100000 20 1.1518636226654053 0.006410720031542237 100000 20 1.1567201137542724 0.002987096441878969 100000 20 1.75432097912 0.00692372147543 100000 20 9.8267291069 0.127844155326 32 | 1000000 20 30.279082489 0.124978814506 1000000 20 10.956057572364807 0.12677108174061705 1000000 20 10.776052689552307 0.04739925571001746 1000000 20 10.640758633613586 0.06116581724028616 1000000 20 17.3383012295 0.0410164637448 1000000 20 83.9448960066 0.46121344046 33 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 34 | 1 50 0.105411934853 0.00171651054128 1 50 0.10458300113677979 0.0016367630302744722 1 50 0.08237688541412354 0.0016494314799953837 1 50 0.10066506862640381 0.002051307639276982 1 50 0.0205577373505 0.00133922342068 1 50 0.572030115128 0.0253648479103 35 | 10 50 0.109102797508 0.00111620290512 10 50 0.10616152286529541 0.002345135740908088 10 50 0.08519520759582519 0.002610550182895596 10 50 0.10588631629943848 0.0035835389655972105 10 50 0.0195438146591 0.000791630611893 10 50 0.56993534565 0.0230474303306 36 | 100 50 0.135682177544 0.00196166766665 100 50 0.12375867366790771 0.00238414904864133 100 50 0.10423583984375 0.0018808335751867933 100 50 0.11841504573822022 0.001608174845404568 100 50 0.0246078014374 0.00108949795701 100 50 0.563336873055 0.00964411866903 37 | 1000 50 0.198261427879 0.00396172489054 1000 50 0.14462883472442628 0.0022428030896492978 1000 50 0.12195603847503662 0.0023611894043373983 1000 50 0.14032282829284667 0.002640027148889162 1000 50 0.063302564621 0.00058195987294 1000 50 0.826378440857 0.00941629472813 38 | 10000 50 0.821499919891 0.0111642692132 10000 50 0.34488487243652344 0.004867441221052092 10000 50 0.3163540124893188 0.002761333651520998 10000 50 0.33160474300384524 0.0027796660009712947 10000 50 0.410061001778 0.00294901155085 10000 50 3.27872717381 0.126592845956 39 | 100000 50 7.05980975628 0.121182371277 100000 50 2.3394312858581543 0.02263239858944125 100000 50 2.237372374534607 0.009955353920396077 100000 50 2.258401036262512 0.011041280982383895 100000 50 3.87797718048 0.0123467913678 100000 50 17.890055728 0.116794666005 40 | 1000000 50 71.5645889759 5.02009516291 1000000 50 21.979821610450745 0.09080404939303836 1000000 50 21.59097549915314 0.081188190530421 1000000 50 21.70080256462097 0.15897944629180621 1000000 50 38.5674883366 0.0602820291386 1000000 50 158.262442636 0.826290454446 41 | lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev 42 | 1 100 0.10662381649 0.00193146624495 1 100 0.10372309684753418 0.0010299126833031144 1 100 0.08336784839630126 0.0013840724401561887 1 100 0.10147004127502442 0.0021285682695135768 1 100 0.0216581106186 0.00103280947157 1 100 0.569358110428 0.0279801762531 43 | 10 100 0.110662698746 0.00171461379583 10 100 0.10784556865692138 0.0016557634029464607 10 100 0.0864112138748169 0.0017946939354350697 10 100 0.10471885204315186 0.001248479289219899 10 100 0.021723818779 0.000920429257416 10 100 0.580981063843 0.0272341107532 44 | 100 100 0.163547992706 0.00166570196628 100 100 0.14526791572570802 0.0028194506905186724 100 100 0.12199611663818359 0.0013003743156634682 100 100 0.13894760608673096 0.002307980025026551 100 100 0.0299471855164 0.00130217326679 100 100 0.559471726418 0.00668155858429 45 | 1000 100 0.280023741722 0.00337543024145 1000 100 0.18315494060516357 0.0023585311962114673 1000 100 0.15871686935424806 0.0035993681064501234 1000 100 0.17586205005645753 0.0023822296091426 1000 100 0.0996923923492 0.00155352212734 1000 100 1.08161640167 0.00698594638512 46 | 10000 100 1.46053376198 0.0221691284465 10000 100 0.5586131334304809 0.004808492789681402 10000 100 0.5243751525878906 0.004370273273595629 10000 100 0.5414002418518067 0.0036291866664635458 10000 100 0.767001605034 0.00328944029633 10000 100 5.67823712826 0.0123398407167 47 | 100000 100 13.2369835854 0.309375896258 100000 100 4.287398314476013 0.00957500108409644 100000 100 4.175828623771667 0.016127303710583043 100000 100 4.222555088996887 0.08562968951916528 100000 100 7.46734063625 0.0262039846119 100000 100 32.2797194242 0.315508270241 48 | 1000000 100 131.864977288 1.22415449691 1000000 100 41.706851434707644 0.4161526076289425 1000000 100 40.82292411327362 0.12328165162380703 1000000 100 41.021552324295044 0.16033566363076862 1000000 100 74.6216712952 0.0994037504394 1000000 100 289.582628798 0.929455236817 49 | -------------------------------------------------------------------------------- /test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/textql_2.0.3.benchmark-results: -------------------------------------------------------------------------------- 1 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 2 | 1 1 0.0196103572845 0.00207355214257 3 | 10 1 0.0186784029007 0.000970810220668 4 | 100 1 0.019472026825 0.00181951524514 5 | 1000 1 0.022180891037 0.00116649968967 6 | 10000 1 0.051066827774 0.0018168767618 7 | 100000 1 0.307463979721 0.00246268029188 8 | 1000000 1 2.89862303734 0.022182722976 9 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 10 | 1 5 0.0195286750793 0.0017840569109 11 | 10 5 0.0183676958084 0.000925251595491 12 | 100 5 0.0199447393417 0.000907007099218 13 | 1000 5 0.0263328790665 0.00165486505938 14 | 10000 5 0.0826982736588 0.00152451583229 15 | 100000 5 0.60660867691 0.00395761320274 16 | 1000000 5 5.87811236382 0.0304332294491 17 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 18 | 1 10 0.0191783189774 0.00107718516178 19 | 10 10 0.0185215950012 0.000840353961363 20 | 100 10 0.0209223031998 0.00164494657684 21 | 1000 10 0.0309282779694 0.00110848590345 22 | 10000 10 0.121016025543 0.00105071105139 23 | 100000 10 0.987622976303 0.00699348302979 24 | 1000000 10 9.69240145683 0.0354453778052 25 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 26 | 1 20 0.0202306985855 0.00159619251952 27 | 10 20 0.0187650680542 0.000845692486156 28 | 100 20 0.0211876153946 0.000993808448942 29 | 1000 20 0.0404737234116 0.00122415059261 30 | 10000 20 0.197762489319 0.00198188642677 31 | 100000 20 1.75432097912 0.00692372147543 32 | 1000000 20 17.3383012295 0.0410164637448 33 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 34 | 1 50 0.0205577373505 0.00133922342068 35 | 10 50 0.0195438146591 0.000791630611893 36 | 100 50 0.0246078014374 0.00108949795701 37 | 1000 50 0.063302564621 0.00058195987294 38 | 10000 50 0.410061001778 0.00294901155085 39 | 100000 50 3.87797718048 0.0123467913678 40 | 1000000 50 38.5674883366 0.0602820291386 41 | lines columns textql_2.0.3_mean textql_2.0.3_stddev 42 | 1 100 0.0216581106186 0.00103280947157 43 | 10 100 0.021723818779 0.000920429257416 44 | 100 100 0.0299471855164 0.00130217326679 45 | 1000 100 0.0996923923492 0.00155352212734 46 | 10000 100 0.767001605034 0.00328944029633 47 | 100000 100 7.46734063625 0.0262039846119 48 | 1000000 100 74.6216712952 0.0994037504394 49 | --------------------------------------------------------------------------------