├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── Jenkinsfile ├── LICENSE ├── README.md ├── docs ├── .gitignore ├── Makefile ├── class.rst ├── cli.template ├── conf.py ├── index.rst ├── introduction.md ├── make.bat ├── perl │ ├── Makefile │ ├── cgi_to_db.rst │ ├── combined_log_format_to_db.rst │ ├── csv_to_db.rst │ ├── db_to_csv.rst │ ├── db_to_html_table.rst │ ├── dbcol.rst │ ├── dbcolcopylast.rst │ ├── dbcolcreate.rst │ ├── dbcoldefine.rst │ ├── dbcolhisto.rst │ ├── dbcolize.rst │ ├── dbcolmerge.rst │ ├── dbcolmovingstats.rst │ ├── dbcolneaten.rst │ ├── dbcolpercentile.rst │ ├── dbcolrename.rst │ ├── dbcolscorrelate.rst │ ├── dbcolsplittocols.rst │ ├── dbcolsplittorows.rst │ ├── dbcolsregression.rst │ ├── dbcolstats.rst │ ├── dbcolstatscores.rst │ ├── dbcoltype.rst │ ├── dbfilealter.rst │ ├── dbfilecat.rst │ ├── dbfilediff.rst │ ├── dbfilepivot.rst │ ├── dbfilestripcomments.rst │ ├── dbfilevalidate.rst │ ├── dbformmail.rst │ ├── dbjoin.rst │ ├── dblistize.rst │ ├── dbmapreduce.rst │ ├── dbmerge.rst │ ├── dbmerge2.rst │ ├── dbmultistats.rst │ ├── dbrecolize.rst │ ├── dbrow.rst │ ├── dbrowaccumulate.rst │ ├── dbrowcount.rst │ ├── dbrowdiff.rst │ ├── dbrowenumerate.rst │ ├── dbroweval.rst │ ├── dbrowuniq.rst │ ├── dbrvstatdiff.rst │ ├── dbsort.rst │ ├── dbstats.rst │ ├── html_table_to_db.rst │ ├── kitrace_to_db.rst │ ├── ns_to_db.rst │ ├── sqlselect_to_db.rst │ ├── tabdelim_to_db.rst │ ├── tcpdump_to_db.rst │ ├── xml_to_db.rst │ └── yaml_to_db.rst ├── perltools.rst ├── requirements.txt └── tools │ ├── images │ └── myheat.png │ ├── index.rst │ ├── pdb2sql.md │ ├── pdb2sql.rst │ ├── pdb2tex.md │ ├── pdb2tex.rst │ ├── pdb2to1.md │ ├── pdb2to1.rst │ ├── pdbaddtypes.md │ ├── pdbaddtypes.rst │ ├── pdbaugment.md │ ├── pdbaugment.rst │ ├── pdbcdf.md │ ├── pdbcdf.rst │ ├── pdbcoluniq.md │ ├── pdbcoluniq.rst │ ├── pdbdatetoepoch.md │ ├── pdbdatetoepoch.rst │ ├── pdbensure.md │ ├── pdbensure.rst │ ├── pdbepochtodate.md │ ├── pdbepochtodate.rst │ ├── pdbfgrep.rst │ ├── pdbformat.md │ ├── pdbformat.rst │ ├── pdbfullpivot.md │ ├── pdbfullpivot.rst │ ├── pdbheatmap.md │ ├── pdbheatmap.rst │ ├── pdbjinja.md │ ├── pdbjinja.rst │ ├── pdbkeyedsort.md │ ├── pdbkeyedsort.rst │ ├── pdbnormalize.md │ ├── pdbnormalize.rst │ ├── pdbreescape.md │ ├── pdbreescape.rst │ ├── pdbrow.md │ ├── pdbrow.rst │ ├── pdbroweval.rst │ ├── pdbsplitter.md │ ├── pdbsplitter.rst │ ├── pdbsum.md │ ├── pdbsum.rst │ ├── pdbtopn.md │ ├── pdbtopn.rst │ ├── pdbzerofill.md │ └── pdbzerofill.rst ├── pyfsdb ├── __init__.py ├── fsdb.py ├── obsolete │ ├── __init__.py │ ├── db2tex.py │ ├── dbaugment.py │ ├── dbcoluniq.py │ ├── dbdatetoepoch.py │ ├── dbensure.py │ ├── dbformat.py │ ├── dbfullpivot.py │ ├── dbheatmap.py │ ├── dbkeyedsort.py │ ├── dbnormalize.py │ ├── dbreescape.py │ ├── dbreversepivot.py │ ├── dbsplitter.py │ ├── dbsum.py │ ├── dbtopn.py │ └── dbzerofill.py ├── tests │ ├── noheader.fsdb │ ├── test_add_types.py │ ├── test_column_renames.py │ ├── test_coluniq.py │ ├── test_command_parsing.py │ ├── test_comments_at_top.fsdb │ ├── test_comments_at_top.test.fsdb │ ├── test_fsdb_class.py │ ├── test_json.py │ ├── test_label_shrink.py │ ├── test_msgpack.py │ ├── test_pdbaugment.py │ ├── test_pdbcdf.py │ ├── test_pdbjinja.py │ ├── test_pdbrow.py │ ├── test_pdbroweval.py │ ├── test_sql.py │ ├── test_utf8.py │ ├── testout.fsdb │ ├── tests.fsdb │ └── testscomp.fsdb.xz └── tools │ ├── __init__.py │ ├── bro2fsdb.py │ ├── fsdb2json.py │ ├── fsdb2many.py │ ├── json2fsdb.py │ ├── msgpack2pdb.py │ ├── pdb2msgpack.py │ ├── pdb2sql.py │ ├── pdb2tex.py │ ├── pdb2to1.py │ ├── pdbaddtypes.py │ ├── pdbaugment.py │ ├── pdbcdf.py │ ├── pdbcoluniq.py │ ├── pdbdatetoepoch.py │ ├── pdbensure.py │ ├── pdbepochtodate.py │ ├── pdbfgrep.py │ ├── pdbformat.py │ ├── pdbfullpivot.py │ ├── pdbheatmap.py │ ├── pdbjinja.py │ ├── pdbkeyedsort.py │ ├── pdbnormalize.py │ ├── pdbreescape.py │ ├── pdbrelplot.py │ ├── pdbreversepivot.py │ ├── pdbroc.py │ ├── pdbrow.py │ ├── pdbroweval.py │ ├── pdbsplitter.py │ ├── pdbsum.py │ ├── pdbtopn.py │ └── pdbzerofill.py └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | /pyfsdb.egg-info/ 3 | /dist/ 4 | /build/ 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: https://github.com/psf/black 12 | rev: 22.3.0 13 | hooks: 14 | - id: black 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | # Ruff version. 17 | rev: v0.0.290 18 | hooks: 19 | - id: ruff 20 | - repo: local 21 | hooks: 22 | - id: pytest 23 | name: pytest 24 | entry: bash -c "PYTHONPATH=. pytest-3" 25 | language: system 26 | pass_filenames: false 27 | always_run: true 28 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | builder: html 5 | configuration: docs/conf.py 6 | fail_on_warning: false 7 | python: 8 | install: 9 | - method: pip 10 | path: . 11 | - requirements: docs/requirements.txt 12 | build: 13 | os: "ubuntu-22.04" 14 | tools: 15 | python: "3.11" 16 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent { 3 | docker { 4 | image 'docker.io/python:3' 5 | } 6 | } 7 | stages { 8 | stage('Preparation') { 9 | steps { 10 | withEnv(["HOME=${env.WORKSPACE}"]) { 11 | sh 'pip install --user pandas' 12 | } 13 | } 14 | } 15 | stage ('Build') { 16 | steps { 17 | sh 'python3 setup.py build' 18 | } 19 | } 20 | stage ('Test') { 21 | steps { 22 | withEnv(["HOME=${env.WORKSPACE}"]) { 23 | sh 'python3 setup.py test' 24 | } 25 | } 26 | } 27 | } 28 | post { 29 | failure { 30 | emailext( 31 | subject: "FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", 32 | body: """

FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]':

33 |

Check console output at "${env.JOB_NAME} [${env.BUILD_NUMBER}]"

""", 34 | recipientProviders: [[$class: 'DevelopersRecipientProvider']] 35 | ) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2025 University of Southern California, Information Sciences Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | SUBMODULES = $(patsubst %.md,%.rst,$(wildcard tools/*.md)) 12 | 13 | # build rest files from markdown 14 | %.rst: %.md cli.template Makefile 15 | pandoc -i $< -o $@ > $@ 16 | TOOL=$$(echo $$(basename $@) | sed 's/.rst//') ; \ 17 | cat cli.template | perl -p -e "s/TOOL/$$TOOL/g" >> $@ 18 | 19 | .PHONY: help Makefile submodules 20 | 21 | submodules: $(SUBMODULES) 22 | echo $(SUBMODULES) 23 | echo $(wildcard tools/*.md) 24 | 25 | # Put it first so that "make" without argument is like "make help". 26 | help: 27 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 28 | 29 | # Catch-all target: route all unknown targets to Sphinx using the new 30 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 31 | html: Makefile submodules 32 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 33 | 34 | latex: Makefile 35 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 36 | 37 | latexpdf: Makefile 38 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 39 | 40 | clean: Makefile 41 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 42 | -------------------------------------------------------------------------------- /docs/class.rst: -------------------------------------------------------------------------------- 1 | FSDB Class Description 2 | ====================== 3 | 4 | .. currentmodule: pyfsdb 5 | .. autoclass:: pyfsdb.Fsdb 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/cli.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | Command Line Arguments 4 | ^^^^^^^^^^^^^^^^^^^^^^ 5 | 6 | .. sphinx_argparse_cli:: 7 | :module: pyfsdb.tools.TOOL 8 | :func: parse_args 9 | :hook: 10 | :prog: TOOL 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = "PyFSDB" 21 | copyright = "2020-2023, Wes Hardaker" 22 | author = "Wes Hardaker" 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = "2.4.3" 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | "myst_parser", 34 | # "sphinxarg.ext", 35 | "sphinx_argparse_cli", 36 | "sphinx.ext.autodoc", 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ["_templates"] 41 | 42 | # List of patterns, relative to source directory, that match files and 43 | # directories to ignore when looking for source files. 44 | # This pattern also affects html_static_path and html_extra_path. 45 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 46 | 47 | 48 | # -- Options for HTML output ------------------------------------------------- 49 | 50 | # The theme to use for HTML and HTML Help pages. See the documentation for 51 | # a list of builtin themes. 52 | # 53 | html_theme = "alabaster" 54 | 55 | # Add any paths that contain custom static files (such as style sheets) here, 56 | # relative to this directory. They are copied after the builtin static files, 57 | # so a file named "default.css" will overwrite the builtin "default.css". 58 | html_static_path = ["_static"] 59 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | FSDB - File System Data Base 2 | ================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 3 6 | :caption: Contents: 7 | 8 | introduction 9 | perlover 10 | tools/index 11 | perltools 12 | class 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.https://www.sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/perl/Makefile: -------------------------------------------------------------------------------- 1 | VERSION=3.1 2 | SRC=~/src/Fsdb-$(VERSION) 3 | 4 | BUILD=buildperl scripts perlindex perloverview 5 | 6 | all: $(BUILD) 7 | 8 | .PHONY: $(BUILD) 9 | 10 | buildperl: 11 | cd $(SRC) ; \ 12 | perl Makefile.PL ; \ 13 | make 14 | 15 | scripts: 16 | outdir=`pwd` ; \ 17 | cd $(SRC)/blib/script ; \ 18 | for i in * ; do \ 19 | pod2man $$i | \ 20 | pandoc -f man -i - -t rst -o - | \ 21 | perl -n -e 'if (/^NAME$$/) { $$_ = <>; $$_ = <>; $$_ = <>; print("$$_" . "=" x 70 . "\n\n"); $$printit = 1; $$_ = <>; $$_ = <>; } if (/^SYNOPSIS$$/) { print("*NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version $(VERSION)*\n\n")} ; if ("$$printit" eq "1") { s/^(==+)/"-" x length($$1)/e; print; }' \ 22 | > $$outdir/$$(basename $$i).rst ; \ 23 | done 24 | 25 | perlindex: 26 | outfile=../perltools.rst ; \ 27 | echo -e "Perl FSDB Tools\n===============\n\n.. toctree::\n :maxdepth: 1\n :caption: FSDB Perl Scripts:\n" > $$outfile ; \ 28 | for i in *.rst ; do \ 29 | echo " perl/$$i" >> $$outfile ; \ 30 | done 31 | 32 | perloverview: 33 | pod2man $(SRC)/lib/Fsdb.pm | \ 34 | pandoc -f man -i - -t rst -o - | \ 35 | perl -n -e 'if (/^NAME$$/) { $$_ = <>; $$_ = <>; $$_ = <>; print("$$_" . "=" x 70 . "\n\n"); $$printit = 1; $$_ = <>; $$_ = <>; } if (/^SYNOPSIS$$/) { print("*NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version $(VERSION)*\n\n")} ; if ("$$printit" eq "1") { s/^(==+)/"-" x length($$1)/e; print; }' \ 36 | > ../perloverview.rst 37 | -------------------------------------------------------------------------------- /docs/perl/csv_to_db.rst: -------------------------------------------------------------------------------- 1 | csv_to_db - convert comma-separated-value data into fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | csv_to_db 86 | 87 | This program is distributed under terms of the GNU general public 88 | license, version 2. See the file COPYING with the distribution for 89 | details. 90 | -------------------------------------------------------------------------------- /docs/perl/db_to_csv.rst: -------------------------------------------------------------------------------- 1 | db_to_csv - convert fsdb to the comma-separated-value file-format 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | db_to_csv [-C] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Covert an existing fsdb file to comma-separated value format. 15 | 16 | Input is fsdb format. 17 | 18 | Output is CSV-format plain text (*not* fsdb). 19 | 20 | OPTIONS 21 | ------- 22 | 23 | -C or <--omit-comments> 24 | Also strip all comments. 25 | 26 | This module also supports the standard fsdb options: 27 | 28 | -d 29 | Enable debugging output. 30 | 31 | -i or --input InputSource 32 | Read from InputSource, typically a file name, or ``-`` for standard 33 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 34 | objects. 35 | 36 | -o or --output OutputDestination 37 | Write to OutputDestination, typically a file name, or ``-`` for 38 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 39 | Fsdb::BoundedQueue objects. 40 | 41 | --autorun or --noautorun 42 | By default, programs process automatically, but Fsdb::Filter objects 43 | in Perl do not run until you invoke the **run()** method. The 44 | ``--(no)autorun`` option controls that behavior within Perl. 45 | 46 | --help 47 | Show help. 48 | 49 | --man 50 | Show full manual. 51 | 52 | SAMPLE USAGE 53 | ------------ 54 | 55 | Input: 56 | ------ 57 | 58 | #fsdb -F S paper papertitle reviewer reviewername score1 score2 score3 59 | score4 score5 1 test, paper 2 Smith 4 4 - - - 2 other paper 3 Jones 3 3 60 | - - - 2 input double space 3 Jones 3 3 - - - # \| csv_to_db 61 | 62 | Command: 63 | -------- 64 | 65 | cat data.fsdb \| db_to_csv 66 | 67 | Output: 68 | ------- 69 | 70 | paper,papertitle,reviewer,reviewername,score1,score2,score3,score4,score5 71 | 1,"test, paper",2,Smith,4,4,-,-,- 2,"other paper",3,Jones,3,3,-,-,- 72 | 2,"input double space",3,Jones,3,3,-,-,- # \| csv_to_db # \| db_to_csv 73 | 74 | SEE ALSO 75 | -------- 76 | 77 | Fsdb. dbfilealter. csv_to_db 78 | 79 | AUTHOR and COPYRIGHT 80 | -------------------- 81 | 82 | Copyright (C) 2007-2018 by John Heidemann 83 | 84 | This program is distributed under terms of the GNU general public 85 | license, version 2. See the file COPYING with the distribution for 86 | details. 87 | -------------------------------------------------------------------------------- /docs/perl/db_to_html_table.rst: -------------------------------------------------------------------------------- 1 | db_to_html_table - convert db to an HTML table 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | db_to_html_table [-g N] dest.html 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Covert an existing dbtable to an HTML table. The output is a fragment of 15 | an HTML page; we assume the user fills in the rest (head and body, 16 | etc.). 17 | 18 | Input is fsdb format. 19 | 20 | Output is HTML code (*not* fsdb), with HTML-specific characters (less 21 | than, greater than, ampersand) are escaped. (The fsdb-1.x version 22 | assumed input was ISO-8859-1; we now assume both input and output are 23 | unicode. This change is considered a feature of the 21st century.) 24 | 25 | OPTIONS 26 | ------- 27 | 28 | -g N or <--group-count N> 29 | Color groups of *N* consecutive rows with one background color. 30 | 31 | This module also supports the standard fsdb options: 32 | 33 | -d 34 | Enable debugging output. 35 | 36 | -i or --input InputSource 37 | Read from InputSource, typically a file name, or ``-`` for standard 38 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 39 | objects. 40 | 41 | -o or --output OutputDestination 42 | Write to OutputDestination, typically a file name, or ``-`` for 43 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 44 | Fsdb::BoundedQueue objects. 45 | 46 | --autorun or --noautorun 47 | By default, programs process automatically, but Fsdb::Filter objects 48 | in Perl do not run until you invoke the **run()** method. The 49 | ``--(no)autorun`` option controls that behavior within Perl. 50 | 51 | --help 52 | Show help. 53 | 54 | --man 55 | Show full manual. 56 | 57 | SAMPLE USAGE 58 | ------------ 59 | 60 | Input: 61 | ------ 62 | 63 | #fsdb -F S account passwd uid gid fullname homedir shell johnh \* 2274 64 | 134 John & Ampersand /home/johnh /bin/bash greg \* 2275 134 Greg < 65 | Lessthan /home/greg /bin/bash root \* 0 0 Root ; Semi /root /bin/bash 66 | four \* 1 1 Fourth Row /home/four /bin/bash 67 | 68 | Command: 69 | -------- 70 | 71 | cat data.fsdb \| db_to_csv -g 3 72 | 73 | Output: 74 | ------- 75 | 76 | 77 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 |
account passwd uid gidfullname homedir shell
johnh * 2274 134John & Ampersand /home/johnh /bin/bash
greg * 2275134 Greg < Lessthan /home/greg/bin/bash
root *0 0 Root ; Semi /root/bin/bash
four *1 1 Fourth Row /home/four/bin/bash
87 | 88 | SEE ALSO 89 | -------- 90 | 91 | Fsdb. dbcolneaten. dbfileadjust. html_table_to_db. 92 | 93 | AUTHOR and COPYRIGHT 94 | -------------------- 95 | 96 | Copyright (C) 2007-2015 by John Heidemann 97 | 98 | This program is distributed under terms of the GNU general public 99 | license, version 2. See the file COPYING with the distribution for 100 | details. 101 | -------------------------------------------------------------------------------- /docs/perl/dbcol.rst: -------------------------------------------------------------------------------- 1 | dbcol - select columns from an Fsdb file 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcol [-v] [-e -] [column...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Select one or more columns from the input database. If a value is given 15 | for empty columns with the -e option, then any named columns which don't 16 | exist will be created. Otherwise, non-existent columns are an error. 17 | 18 | Note: a safer way to create columns is dbcolcreate. 19 | 20 | OPTIONS 21 | ------- 22 | 23 | -r or --relaxed-errors 24 | Relaxed error checking: ignore columns that aren't there. 25 | 26 | -v or --invert-match 27 | Output all columns except those listed (like grep -v). 28 | 29 | -a or --all 30 | Output all columns, in addition to those listed. (Thus ``-a foo`` 31 | will move column foo to the first column.) 32 | 33 | -e EmptyValue or --empty 34 | Specify the value newly created columns get. 35 | 36 | --saveoutput $OUT_REF 37 | Save output writer (for integration with other fsdb filters). 38 | 39 | and the standard fsdb options: 40 | 41 | -d 42 | Enable debugging output. 43 | 44 | -i or --input InputSource 45 | Read from InputSource, typically a file, or - for standard input, or 46 | (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects. 47 | 48 | -o or --output OutputDestination 49 | Write to OutputDestination, typically a file, or - for standard 50 | output, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 51 | objects. 52 | 53 | --autorun or --noautorun 54 | By default, programs process automatically, but Fsdb::Filter objects 55 | in Perl do not run until you invoke the **run()** method. The 56 | ``--(no)autorun`` option controls that behavior within Perl. 57 | 58 | --header H 59 | Use H as the full Fsdb header, rather than reading a header from then 60 | input. 61 | 62 | --help 63 | Show help. 64 | 65 | --man 66 | Show full manual. 67 | 68 | SAMPLE USAGE 69 | ------------ 70 | 71 | Input: 72 | ------ 73 | 74 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134 75 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 76 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 77 | database 78 | 79 | Command: 80 | -------- 81 | 82 | cat DATA/passwd.fsdb account \| dbcol account 83 | 84 | Output: 85 | ------- 86 | 87 | #fsdb account johnh greg root # this is a simple database # \| dbcol 88 | account 89 | 90 | SEE ALSO 91 | -------- 92 | 93 | **dbcolcreate** (1), **Fsdb** (3) 94 | 95 | AUTHOR and COPYRIGHT 96 | -------------------- 97 | 98 | Copyright (C) 1991-2022 by John Heidemann 99 | 100 | This program is distributed under terms of the GNU general public 101 | license, version 2. See the file COPYING with the distribution for 102 | details. 103 | -------------------------------------------------------------------------------- /docs/perl/dbcolcopylast.rst: -------------------------------------------------------------------------------- 1 | dbcolcopylast - create new columns that are copies of prior columns 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolcopylast [-e EMPTY] [column...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | For each COLUMN, create a new column copylast_COLUMN that is the last 15 | value for that column---that is, the value of that column from the row 16 | before. 17 | 18 | OPTIONS 19 | ------- 20 | 21 | -e EmptyValue or --empty 22 | Specify the value newly created columns get. 23 | 24 | This module also supports the standard fsdb options: 25 | 26 | -d 27 | Enable debugging output. 28 | 29 | -i or --input InputSource 30 | Read from InputSource, typically a file name, or ``-`` for standard 31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 32 | objects. 33 | 34 | -o or --output OutputDestination 35 | Write to OutputDestination, typically a file name, or ``-`` for 36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 37 | Fsdb::BoundedQueue objects. 38 | 39 | --autorun or --noautorun 40 | By default, programs process automatically, but Fsdb::Filter objects 41 | in Perl do not run until you invoke the **run()** method. The 42 | ``--(no)autorun`` option controls that behavior within Perl. 43 | 44 | --help 45 | Show help. 46 | 47 | --man 48 | Show full manual. 49 | 50 | SAMPLE USAGE 51 | ------------ 52 | 53 | Input: 54 | ------ 55 | 56 | #fsdb test a b 57 | 58 | Command: 59 | -------- 60 | 61 | cat data.fsdb \| dbcolcopylast foo 62 | 63 | Output: 64 | ------- 65 | 66 | #fsdb test foo a - b - 67 | 68 | SEE ALSO 69 | -------- 70 | 71 | Fsdb. 72 | 73 | AUTHOR and COPYRIGHT 74 | -------------------- 75 | 76 | Copyright (C) 1991-2022 by John Heidemann 77 | 78 | This program is distributed under terms of the GNU general public 79 | license, version 2. See the file COPYING with the distribution for 80 | details. 81 | -------------------------------------------------------------------------------- /docs/perl/dbcolcreate.rst: -------------------------------------------------------------------------------- 1 | dbcolcreate - create new columns 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolcreate NewColumn1 [NewColumn2] 10 | 11 | or 12 | 13 | dbcolcreate -e DefaultValue NewColumnWithDefault 14 | 15 | DESCRIPTION 16 | ----------- 17 | 18 | Create columns ``NewColumn1``, etc. with an optional ``DefaultValue``. 19 | 20 | OPTIONS 21 | ------- 22 | 23 | -e EmptyValue or --empty 24 | Specify the value newly created columns get. 25 | 26 | -f or --first 27 | Put all new columns as the first columns of each row. By default, 28 | they go at the end of each row. 29 | 30 | --no-recreate-fatal 31 | By default, creating an existing column is an error. With 32 | **--no-recreate-fatal**, we ignore re-creation. 33 | 34 | This module also supports the standard fsdb options: 35 | 36 | -d 37 | Enable debugging output. 38 | 39 | -i or --input InputSource 40 | Read from InputSource, typically a file name, or ``-`` for standard 41 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 42 | objects. 43 | 44 | -o or --output OutputDestination 45 | Write to OutputDestination, typically a file name, or ``-`` for 46 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 47 | Fsdb::BoundedQueue objects. 48 | 49 | --autorun or --noautorun 50 | By default, programs process automatically, but Fsdb::Filter objects 51 | in Perl do not run until you invoke the **run()** method. The 52 | ``--(no)autorun`` option controls that behavior within Perl. 53 | 54 | --header H 55 | Use H as the full Fsdb header, rather than reading a header from then 56 | input. 57 | 58 | --help 59 | Show help. 60 | 61 | --man 62 | Show full manual. 63 | 64 | SAMPLE USAGE 65 | ------------ 66 | 67 | Input: 68 | ------ 69 | 70 | #fsdb test a b 71 | 72 | Command: 73 | -------- 74 | 75 | cat data.fsdb \| dbcolcreate foo 76 | 77 | Output: 78 | ------- 79 | 80 | #fsdb test foo a - b - 81 | 82 | SEE ALSO 83 | -------- 84 | 85 | Fsdb. 86 | 87 | AUTHOR and COPYRIGHT 88 | -------------------- 89 | 90 | Copyright (C) 1991-2022 by John Heidemann 91 | 92 | This program is distributed under terms of the GNU general public 93 | license, version 2. See the file COPYING with the distribution for 94 | details. 95 | -------------------------------------------------------------------------------- /docs/perl/dbcoldefine.rst: -------------------------------------------------------------------------------- 1 | dbcoldefine - define the columns of a plain text file to make it an Fsdb 2 | ====================================================================== 3 | 4 | 5 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 6 | 7 | SYNOPSIS 8 | -------- 9 | 10 | dbcoldefine [-F x] [column...] 11 | 12 | DESCRIPTION 13 | ----------- 14 | 15 | This program writes a new header before the data with the specified 16 | column names. It does *not* do any validation of the data contents; it 17 | is up to the user to verify that, other than the header, the input 18 | datastream is a correctly formatted Fsdb file. 19 | 20 | OPTIONS 21 | ------- 22 | 23 | -F or --fs or --fieldseparator s 24 | Specify the field separator. 25 | 26 | --header H 27 | Give the columns and field separator as a full Fsdb header (including 28 | ``#fsdb``). Can only be used alone, not with other specifications. 29 | 30 | This module also supports the standard fsdb options: 31 | 32 | -d 33 | Enable debugging output. 34 | 35 | -i or --input InputSource 36 | Read from InputSource, typically a file name, or ``-`` for standard 37 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 38 | objects. 39 | 40 | -o or --output OutputDestination 41 | Write to OutputDestination, typically a file name, or ``-`` for 42 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 43 | Fsdb::BoundedQueue objects. 44 | 45 | --autorun or --noautorun 46 | By default, programs process automatically, but Fsdb::Filter objects 47 | in Perl do not run until you invoke the **run()** method. The 48 | ``--(no)autorun`` option controls that behavior within Perl. 49 | 50 | --help 51 | Show help. 52 | 53 | --man 54 | Show full manual. 55 | 56 | SAMPLE USAGE 57 | ------------ 58 | 59 | Input: 60 | ------ 61 | 62 | 102400 4937974.964736 102400 4585247.875904 102400 5098141.207123 63 | 64 | Command: 65 | -------- 66 | 67 | cat DATA/http_bandwidth \| dbcoldefine size bw 68 | 69 | Output: 70 | ------- 71 | 72 | #fsdb size bw 102400 4937974.964736 102400 4585247.875904 102400 73 | 5098141.207123 # \| dbcoldefine size bw 74 | 75 | SEE ALSO 76 | -------- 77 | 78 | Fsdb. dbfilestripcomments 79 | 80 | AUTHOR and COPYRIGHT 81 | -------------------- 82 | 83 | Copyright (C) 1991-2016 by John Heidemann 84 | 85 | This program is distributed under terms of the GNU general public 86 | license, version 2. See the file COPYING with the distribution for 87 | details. 88 | -------------------------------------------------------------------------------- /docs/perl/dbcolize.rst: -------------------------------------------------------------------------------- 1 | dbcolize - DEPRECATED, now use dbfilealter 2 | ====================================================================== 3 | -------------------------------------------------------------------------------- /docs/perl/dbcolmerge.rst: -------------------------------------------------------------------------------- 1 | dbcolmerge - merge multiple columns into one 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolmerge [-C ElementSeparator] [columns...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | For each row, merge multiple columns down to a single column (always a 15 | string), joining elements with ElementSeparator (defaults to a single 16 | underscore). 17 | 18 | OPTIONS 19 | ------- 20 | 21 | -C S or --element-separator S 22 | Specify the separator used to join columns. (Defaults to a single 23 | underscore.) 24 | 25 | -e E or --empty E 26 | give value E as the value for empty (null) records 27 | 28 | This module also supports the standard fsdb options: 29 | 30 | -d 31 | Enable debugging output. 32 | 33 | -i or --input InputSource 34 | Read from InputSource, typically a file name, or ``-`` for standard 35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 36 | objects. 37 | 38 | -o or --output OutputDestination 39 | Write to OutputDestination, typically a file name, or ``-`` for 40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 41 | Fsdb::BoundedQueue objects. 42 | 43 | --autorun or --noautorun 44 | By default, programs process automatically, but Fsdb::Filter objects 45 | in Perl do not run until you invoke the **run()** method. The 46 | ``--(no)autorun`` option controls that behavior within Perl. 47 | 48 | --header H 49 | Use H as the full Fsdb header, rather than reading a header from then 50 | input. 51 | 52 | --help 53 | Show help. 54 | 55 | --man 56 | Show full manual. 57 | 58 | SAMPLE USAGE 59 | ------------ 60 | 61 | Input: 62 | ------ 63 | 64 | #fsdb first last John Heidemann Greg Johnson Root - # this is a simple 65 | database # \| /home/johnh/BIN/DB/dbcol fullname # \| dbcolrename 66 | fullname first_last # \| /home/johnh/BIN/DB/dbcolsplit -C \_ first_last 67 | # \| /home/johnh/BIN/DB/dbcol first last 68 | 69 | Command: 70 | -------- 71 | 72 | cat data.fsdb \| dbcolmerge -C \_ first last 73 | 74 | Output: 75 | ------- 76 | 77 | #fsdb first last first_last John Heidemann John_Heidemann Greg Johnson 78 | Greg_Johnson Root - Root\_ # this is a simple database # \| 79 | /home/johnh/BIN/DB/dbcol fullname # \| dbcolrename fullname first_last # 80 | \| /home/johnh/BIN/DB/dbcolsplit first_last # \| 81 | /home/johnh/BIN/DB/dbcol first last # \| /home/johnh/BIN/DB/dbcolmerge 82 | -C \_ first last 83 | 84 | SEE ALSO 85 | -------- 86 | 87 | Fsdb. dbcolsplittocols. dbcolsplittorows. dbcolrename. 88 | 89 | AUTHOR and COPYRIGHT 90 | -------------------- 91 | 92 | Copyright (C) 1991-2022 by John Heidemann 93 | 94 | This program is distributed under terms of the GNU general public 95 | license, version 2. See the file COPYING with the distribution for 96 | details. 97 | -------------------------------------------------------------------------------- /docs/perl/dbcolneaten.rst: -------------------------------------------------------------------------------- 1 | dbcolneaten - pretty-print columns of Fsdb data (assuming a monospaced 2 | ====================================================================== 3 | 4 | 5 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 6 | 7 | SYNOPSIS 8 | -------- 9 | 10 | dbcolneaten [-E] [field_settings] 11 | 12 | DESCRIPTION 13 | ----------- 14 | 15 | dbcolneaten arranges that the Fsdb data appears in neat columns if you 16 | view it with a monospaced font. To do this, it pads out each field with 17 | spaces to line up the next field. 18 | 19 | Field settings are of the form 20 | 21 | field op value 22 | 23 | OP is >=, =, or <= specifying that the width of that FIELD must be more, 24 | equal, or less than that VALUE 25 | 26 | dbcolneaten runs in O(1) memory but disk space proportional to the size 27 | of data. 28 | 29 | OPTIONS 30 | ------- 31 | 32 | -E or --noeoln 33 | Omit padding for the last column (at the end-of-the-line). (Default 34 | behavior.) 35 | 36 | -e or --eoln 37 | Do padding and include an extra field separator after the last 38 | column. (Useful if you're interactively adding a column.) 39 | 40 | This module also supports the standard fsdb options: 41 | 42 | -d 43 | Enable debugging output. 44 | 45 | -i or --input InputSource 46 | Read from InputSource, typically a file name, or ``-`` for standard 47 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 48 | objects. 49 | 50 | -o or --output OutputDestination 51 | Write to OutputDestination, typically a file name, or ``-`` for 52 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 53 | Fsdb::BoundedQueue objects. 54 | 55 | --autorun or --noautorun 56 | By default, programs process automatically, but Fsdb::Filter objects 57 | in Perl do not run until you invoke the **run()** method. The 58 | ``--(no)autorun`` option controls that behavior within Perl. 59 | 60 | --help 61 | Show help. 62 | 63 | --man 64 | Show full manual. 65 | 66 | SAMPLE USAGE 67 | ------------ 68 | 69 | Input: 70 | ------ 71 | 72 | #fsdb fullname homedir uid gid Mr._John_Heidemann_Junior /home/johnh 73 | 2274 134 Greg_Johnson /home/greg 2275 134 Root /root 0 0 # this is a 74 | simple database # \| dbcol fullname homedir uid gid 75 | 76 | Command: 77 | -------- 78 | 79 | dbcolneaten 80 | 81 | Output: 82 | ------- 83 | 84 | #fsdb -F s fullname homedir uid gid Mr._John_Heidemann_Junior 85 | /home/johnh 2274 134 Greg_Johnson /home/greg 2275 134 Root /root 0 0 # 86 | this is a simple database # \| dbcol fullname homedir uid gid # \| 87 | dbcolneaten 88 | 89 | BUGS 90 | ---- 91 | 92 | Does not handle tab separators correctly. 93 | 94 | SEE ALSO 95 | -------- 96 | 97 | Fsdb. 98 | 99 | AUTHOR and COPYRIGHT 100 | -------------------- 101 | 102 | Copyright (C) 1991-2018 by John Heidemann 103 | 104 | This program is distributed under terms of the GNU general public 105 | license, version 2. See the file COPYING with the distribution for 106 | details. 107 | -------------------------------------------------------------------------------- /docs/perl/dbcolrename.rst: -------------------------------------------------------------------------------- 1 | dbcolrename - change the names of columns in a fsdb schema 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolrename OldName1 NewName1 [OldName2 NewName2] ... 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Dbcolrename changes the names of columns in a fsdb schema, mapping 15 | OldName1 to NewName1, and so on for multiple pairs of column names. 16 | 17 | Note that it is valid to do overlapping renames like 18 | ``dbcolrename a b b a``. 19 | 20 | OPTIONS 21 | ------- 22 | 23 | No non-standard options. 24 | 25 | This module also supports the standard fsdb options: 26 | 27 | -d 28 | Enable debugging output. 29 | 30 | -i or --input InputSource 31 | Read from InputSource, typically a file name, or ``-`` for standard 32 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 33 | objects. 34 | 35 | -o or --output OutputDestination 36 | Write to OutputDestination, typically a file name, or ``-`` for 37 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 38 | Fsdb::BoundedQueue objects. 39 | 40 | --autorun or --noautorun 41 | By default, programs process automatically, but Fsdb::Filter objects 42 | in Perl do not run until you invoke the **run()** method. The 43 | ``--(no)autorun`` option controls that behavior within Perl. 44 | 45 | --help 46 | Show help. 47 | 48 | --man 49 | Show full manual. 50 | 51 | SAMPLE USAGE 52 | ------------ 53 | 54 | Input: 55 | ------ 56 | 57 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134 58 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 59 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 60 | database 61 | 62 | Command: 63 | -------- 64 | 65 | cat DATA/passwd.fsdb \| dbcolrename fullname first_last 66 | 67 | Output: 68 | ------- 69 | 70 | #fsdb account passwd uid gid first_last homedir shell johnh \* 2274 134 71 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 72 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 73 | database # \| dbcolrename fullname first_last 74 | 75 | SEE ALSO 76 | -------- 77 | 78 | Fsdb. 79 | 80 | AUTHOR and COPYRIGHT 81 | -------------------- 82 | 83 | Copyright (C) 1991-2022 by John Heidemann 84 | 85 | This program is distributed under terms of the GNU general public 86 | license, version 2. See the file COPYING with the distribution for 87 | details. 88 | -------------------------------------------------------------------------------- /docs/perl/dbcolscorrelate.rst: -------------------------------------------------------------------------------- 1 | dbcolscorrelate - find the coefficient of correlation over columns 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolscorrelate column1 column2 [column3...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Compute the coefficient of correlation over two (or more) columns. 15 | 16 | The output is one line of correlations. 17 | 18 | With exactly two columns, a new column *correlation* is created. 19 | 20 | With more than two columns, correlations are computed for each pairwise 21 | combination of rows, and each output column is given a name which is the 22 | concatenation of the two source rows, joined with an underscore. 23 | 24 | By default, we compute the *population correlation coefficient* (usually 25 | designed rho, X) and assume we see all members of the population. With 26 | the **--sample** option we instead compute the *sample correlation 27 | coefficient*, usually designated *r*. (Be careful in that the default 28 | here to full-population is the *opposite* of the default in dbcolstats.) 29 | 30 | This program requires a complete copy of the input data on disk. 31 | 32 | OPTIONS 33 | ------- 34 | 35 | --sample 36 | Select a the Pearson product-moment correlation coefficient (the 37 | sample correlation coefficient, usually designated *r*). 38 | 39 | --nosample 40 | Select a the Pearson product-moment correlation coefficient (the 41 | sample correlation coefficient, usually designated *r*). 42 | 43 | -f FORMAT or --format FORMAT 44 | Specify a **printf** (3)-style format for output statistics. Defaults 45 | to ``%.5g``. 46 | 47 | -T TmpDir 48 | where to put tmp files. Also uses environment variable TMPDIR, if -T 49 | is not specified. Default is /tmp. 50 | 51 | This module also supports the standard fsdb options: 52 | 53 | -d 54 | Enable debugging output. 55 | 56 | -i or --input InputSource 57 | Read from InputSource, typically a file name, or ``-`` for standard 58 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 59 | objects. 60 | 61 | -o or --output OutputDestination 62 | Write to OutputDestination, typically a file name, or ``-`` for 63 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 64 | Fsdb::BoundedQueue objects. 65 | 66 | --autorun or --noautorun 67 | By default, programs process automatically, but Fsdb::Filter objects 68 | in Perl do not run until you invoke the **run()** method. The 69 | ``--(no)autorun`` option controls that behavior within Perl. 70 | 71 | --help 72 | Show help. 73 | 74 | --man 75 | Show full manual. 76 | 77 | SAMPLE USAGE 78 | ------------ 79 | 80 | Input: 81 | ------ 82 | 83 | #fsdb name id test1 test2 a 1 80 81 b 2 70 71 c 3 65 66 d 4 90 91 e 5 70 84 | 71 f 6 90 91 85 | 86 | Command: 87 | -------- 88 | 89 | cat DATA/more_grades.fsdb \| dbcolscorrelate test1 test2 90 | 91 | Output: 92 | ------- 93 | 94 | #fsdb correlation:d 0.83329 # \| dbcolscorrelate test1 test2 95 | 96 | SEE ALSO 97 | -------- 98 | 99 | Fsdb, dbcolstatscores, dbcolsregression, dbrvstatdiff. 100 | 101 | AUTHOR and COPYRIGHT 102 | -------------------- 103 | 104 | Copyright (C) 1998-2022 by John Heidemann 105 | 106 | This program is distributed under terms of the GNU general public 107 | license, version 2. See the file COPYING with the distribution for 108 | details. 109 | -------------------------------------------------------------------------------- /docs/perl/dbcolsplittorows.rst: -------------------------------------------------------------------------------- 1 | dbcolsplittorows - split an existing column into multiple new rows 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolsplittorows [-C ElementSeperator] [-e null] [-E] [-N 10 | enumerated-name] column [column...] 11 | 12 | DESCRIPTION 13 | ----------- 14 | 15 | Split column into pieces, outputting one row for each piece. 16 | 17 | By default, any empty fields are ignored. If an empty field value is 18 | given with -e, then they produce output. 19 | 20 | When a null value is given, empty fields at the beginning and end of 21 | lines are suppressed (like perl split). Unlike perl, if ALL fields are 22 | empty, we generate one (and not zero) empty fields. 23 | 24 | The inverse of this commend is dbfilepivot. 25 | 26 | OPTIONS 27 | ------- 28 | 29 | -C S or --element-separator S 30 | Specify the separator used to split columns. (Defaults to a single 31 | underscore.) 32 | 33 | -E or --enumerate 34 | Enumerate output columns: rather than assuming the column name uses 35 | the element separator, we keep it whole and fill in with indexes 36 | starting from 0. 37 | 38 | -N or --new-name N 39 | Name the new column N for enumeration. Defaults to ``index``. 40 | 41 | -e E or --empty E 42 | give value E as the value for empty (null) records 43 | 44 | This module also supports the standard fsdb options: 45 | 46 | -d 47 | Enable debugging output. 48 | 49 | -i or --input InputSource 50 | Read from InputSource, typically a file name, or ``-`` for standard 51 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 52 | objects. 53 | 54 | -o or --output OutputDestination 55 | Write to OutputDestination, typically a file name, or ``-`` for 56 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 57 | Fsdb::BoundedQueue objects. 58 | 59 | --autorun or --noautorun 60 | By default, programs process automatically, but Fsdb::Filter objects 61 | in Perl do not run until you invoke the **run()** method. The 62 | ``--(no)autorun`` option controls that behavior within Perl. 63 | 64 | --help 65 | Show help. 66 | 67 | --man 68 | Show full manual. 69 | 70 | SAMPLE USAGE 71 | ------------ 72 | 73 | Input: 74 | ------ 75 | 76 | #fsdb name uid John_Heidemann 2274 Greg_Johnson 2275 Root 0 # this is a 77 | simple database # \| dbcol fullname uid # \| dbcolrename fullname name 78 | 79 | Command: 80 | -------- 81 | 82 | cat data.fsdb \| dbcolsplittorows name 83 | 84 | Output: 85 | ------- 86 | 87 | #fsdb name uid John 2274 Heidemann 2274 Greg 2275 Johnson 2275 Root 0 # 88 | this is a simple database # \| dbcol fullname uid # \| dbcolrename 89 | fullname name # \| dbcolsplittorows name 90 | 91 | SEE ALSO 92 | -------- 93 | 94 | **Fsdb** (1). **dbcolmerge** (1). **dbcolsplittocols** (1). 95 | **dbcolrename** (1). **dbfilepvot** (1). 96 | 97 | AUTHOR and COPYRIGHT 98 | -------------------- 99 | 100 | Copyright (C) 1991-2018 by John Heidemann 101 | 102 | This program is distributed under terms of the GNU general public 103 | license, version 2. See the file COPYING with the distribution for 104 | details. 105 | -------------------------------------------------------------------------------- /docs/perl/dbcolsregression.rst: -------------------------------------------------------------------------------- 1 | dbcolsregression - compute linear regression between two columns 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcolsregression [-a] column1 column2 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Compute linear regression over ``column1`` and ``column2``. Outputs 15 | slope, intercept, and correlation coefficient. 16 | 17 | OPTIONS 18 | ------- 19 | 20 | -a or --include-non-numeric 21 | Compute stats over all records (treat non-numeric records as zero 22 | rather than just ignoring them). 23 | 24 | -f FORMAT or --format FORMAT 25 | Specify a **printf** (3)-style format for output statistics. Defaults 26 | to ``%.5g``. 27 | 28 | This module also supports the standard fsdb options: 29 | 30 | -d 31 | Enable debugging output. 32 | 33 | -i or --input InputSource 34 | Read from InputSource, typically a file name, or ``-`` for standard 35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 36 | objects. 37 | 38 | -o or --output OutputDestination 39 | Write to OutputDestination, typically a file name, or ``-`` for 40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 41 | Fsdb::BoundedQueue objects. 42 | 43 | --autorun or --noautorun 44 | By default, programs process automatically, but Fsdb::Filter objects 45 | in Perl do not run until you invoke the **run()** method. The 46 | ``--(no)autorun`` option controls that behavior within Perl. 47 | 48 | --help 49 | Show help. 50 | 51 | --man 52 | Show full manual. 53 | 54 | SAMPLE USAGE 55 | ------------ 56 | 57 | Input: 58 | ------ 59 | 60 | #fsdb x y 160 126 180 103 200 82 220 75 240 82 260 40 280 20 61 | 62 | Command: 63 | -------- 64 | 65 | cat DATA/xy.fsdb \| dbcolsregression x y \| dblistize 66 | 67 | Output: 68 | ------- 69 | 70 | #fsdb -R C slope:d intercept:d confcoeff:d n:q slope: -0.79286 71 | intercept: 249.86 confcoeff: -0.95426 n: 7 # \| dbcolsregression x y # 72 | confidence intervals assume normal distribution and small n. # \| 73 | dblistize 74 | 75 | Sample data from 76 | 77 | by Stefan Waner and Steven R. Costenoble. 78 | 79 | SEE ALSO 80 | -------- 81 | 82 | dbcolstats, dbcolscorrelate, Fsdb. 83 | 84 | AUTHOR and COPYRIGHT 85 | -------------------- 86 | 87 | Copyright (C) 1997-2022 by John Heidemann 88 | 89 | This program is distributed under terms of the GNU general public 90 | license, version 2. See the file COPYING with the distribution for 91 | details. 92 | -------------------------------------------------------------------------------- /docs/perl/dbcoltype.rst: -------------------------------------------------------------------------------- 1 | dbcoltype - define (or redefine) types for columns of an Fsdb file 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbcol [-v] [column type...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Define the type of each column, where COLUMN and TYPE are pairs. Or, 15 | with the ``-v`` option, redefine all types as string. 16 | 17 | The data does not change (just the header). 18 | 19 | OPTIONS 20 | ------- 21 | 22 | -v or --clear-types 23 | Remove definitions from columns that are listed, or from all columns 24 | if none are listed. The effect is to restore types to their default 25 | type of a (string). 26 | 27 | and the standard fsdb options: 28 | 29 | -d 30 | Enable debugging output. 31 | 32 | -i or --input InputSource 33 | Read from InputSource, typically a file, or - for standard input, or 34 | (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects. 35 | 36 | -o or --output OutputDestination 37 | Write to OutputDestination, typically a file, or - for standard 38 | output, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 39 | objects. 40 | 41 | --autorun or --noautorun 42 | By default, programs process automatically, but Fsdb::Filter objects 43 | in Perl do not run until you invoke the **run()** method. The 44 | ``--(no)autorun`` option controls that behavior within Perl. 45 | 46 | --header H 47 | Use H as the full Fsdb header, rather than reading a header from then 48 | input. 49 | 50 | --help 51 | Show help. 52 | 53 | --man 54 | Show full manual. 55 | 56 | SAMPLE USAGE 57 | ------------ 58 | 59 | Input: 60 | ------ 61 | 62 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134 63 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 64 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 65 | database 66 | 67 | Command: 68 | -------- 69 | 70 | cat DATA/passwd.fsdb account \| dbcoltype uid l gid l 71 | 72 | Output: 73 | ------- 74 | 75 | #fsdb account passwd uid:l gid:l fullname homedir shell johnh \* 2274 76 | 134 John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 77 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 78 | database 79 | 80 | SEE ALSO 81 | -------- 82 | 83 | **dbcoldefine** (1), **dbcolcreate** (1), **Fsdb** (3). 84 | 85 | AUTHOR and COPYRIGHT 86 | -------------------- 87 | 88 | Copyright (C) 2022 by John Heidemann 89 | 90 | This program is distributed under terms of the GNU general public 91 | license, version 2. See the file COPYING with the distribution for 92 | details. 93 | -------------------------------------------------------------------------------- /docs/perl/dbfilecat.rst: -------------------------------------------------------------------------------- 1 | dbfilecat - concatenate two files with identical schema 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbfilecat --input A.fsdb [--input B.fsdb...] 10 | 11 | or 12 | 13 | echo A.fsdb \| dbfilecat --xargs 14 | 15 | DESCRIPTION 16 | ----------- 17 | 18 | Concatenate all provided input files, producing one result. We remove 19 | extra header lines. 20 | 21 | Inputs can both be specified with ``--input``, or one can come from 22 | standard input and the other from ``--input``. With ``--xargs``, each 23 | line of standard input is a filename for input. 24 | 25 | Inputs must have identical schemas (columns, column order, and field 26 | separators). 27 | 28 | Like dbmerge, but no worries about sorting, and with no arguments we 29 | read standard input (although that's not very useful). 30 | 31 | OPTIONS 32 | ------- 33 | 34 | General option: 35 | 36 | --xargs 37 | Expect that input filenames are given, one-per-line, on standard 38 | input. (In this case, merging can start incrementally. 39 | 40 | --removeinputs 41 | Delete the source files after they have been consumed. (Defaults off, 42 | leaving the inputs in place.) This module also supports the standard 43 | fsdb options: 44 | 45 | -d 46 | Enable debugging output. 47 | 48 | -i or --input InputSource 49 | Read from InputSource, typically a file name, or ``-`` for standard 50 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 51 | objects. 52 | 53 | -o or --output OutputDestination 54 | Write to OutputDestination, typically a file name, or ``-`` for 55 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 56 | Fsdb::BoundedQueue objects. 57 | 58 | --autorun or --noautorun 59 | By default, programs process automatically, but Fsdb::Filter objects 60 | in Perl do not run until you invoke the **run()** method. The 61 | ``--(no)autorun`` option controls that behavior within Perl. 62 | 63 | --help 64 | Show help. 65 | 66 | --man 67 | Show full manual. 68 | 69 | SAMPLE USAGE 70 | ------------ 71 | 72 | Input: 73 | ------ 74 | 75 | File *a.fsdb*: 76 | 77 | #fsdb cid cname 11 numanal 10 pascal 78 | 79 | File *b.fsdb*: 80 | 81 | #fsdb cid cname 12 os 13 statistics 82 | 83 | Command: 84 | -------- 85 | 86 | dbfilecat --input a.fsdb --input b.fsdb 87 | 88 | Output: 89 | ------- 90 | 91 | #fsdb cid cname 11 numanal 10 pascal 12 os 13 statistics # \| dbmerge 92 | --input a.fsdb --input b.fsdb 93 | 94 | SEE ALSO 95 | -------- 96 | 97 | **dbmerge** (1), **Fsdb** (3) 98 | 99 | AUTHOR and COPYRIGHT 100 | -------------------- 101 | 102 | Copyright (C) 2013-2018 by John Heidemann 103 | 104 | This program is distributed under terms of the GNU general public 105 | license, version 2. See the file COPYING with the distribution for 106 | details. 107 | -------------------------------------------------------------------------------- /docs/perl/dbfilestripcomments.rst: -------------------------------------------------------------------------------- 1 | dbfilestripcomments - remove comments from a fsdb file 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbfilestripcomments [-h] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Remove any comments in a file, including the header. This makes the file 15 | unreadable by other Fsdb utilities, but perhaps more readable by humans. 16 | 17 | With the -h option, leave the header. 18 | 19 | OPTIONS 20 | ------- 21 | 22 | -h or --header 23 | Retain the header. 24 | 25 | This module also supports the standard fsdb options: 26 | 27 | -d 28 | Enable debugging output. 29 | 30 | -i or --input InputSource 31 | Read from InputSource, typically a file name, or ``-`` for standard 32 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 33 | objects. 34 | 35 | -o or --output OutputDestination 36 | Write to OutputDestination, typically a file name, or ``-`` for 37 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 38 | Fsdb::BoundedQueue objects. 39 | 40 | --autorun or --noautorun 41 | By default, programs process automatically, but Fsdb::Filter objects 42 | in Perl do not run until you invoke the **run()** method. The 43 | ``--(no)autorun`` option controls that behavior within Perl. 44 | 45 | --help 46 | Show help. 47 | 48 | --man 49 | Show full manual. 50 | 51 | SAMPLE USAGE 52 | ------------ 53 | 54 | Input: 55 | ------ 56 | 57 | #fsdb -R C experiment mean stddev pct_rsd conf_range conf_low conf_high 58 | conf_pct sum sum_squared min max n experiment: ufs_mab_sys mean: 37.25 59 | stddev: 0.070711 pct_rsd: 0.18983 conf_range: 0.6353 conf_low: 36.615 60 | conf_high: 37.885 conf_pct: 0.95 sum: 74.5 sum_squared: 2775.1 min: 37.2 61 | max: 37.3 n: 2 # \| /home/johnh/BIN/DB/dbmultistats experiment duration 62 | # \| /home/johnh/BIN/DB/dblistize 63 | 64 | Command: 65 | -------- 66 | 67 | cat data.fsdb \| dbfilestripcomments 68 | 69 | Output: 70 | ------- 71 | 72 | experiment: ufs_mab_sys mean: 37.25 stddev: 0.070711 pct_rsd: 0.18983 73 | conf_range: 0.6353 conf_low: 36.615 conf_high: 37.885 conf_pct: 0.95 74 | sum: 74.5 sum_squared: 2775.1 min: 37.2 max: 37.3 n: 2 75 | 76 | SEE ALSO 77 | -------- 78 | 79 | Fsdb. dbcoldefine. 80 | 81 | AUTHOR and COPYRIGHT 82 | -------------------- 83 | 84 | Copyright (C) 1991-2008 by John Heidemann 85 | 86 | This program is distributed under terms of the GNU general public 87 | license, version 2. See the file COPYING with the distribution for 88 | details. 89 | -------------------------------------------------------------------------------- /docs/perl/dbfilevalidate.rst: -------------------------------------------------------------------------------- 1 | dbfilevalidate - insure the source input is a well-formed Fsdb file 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbfilevalidate [-vc] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Validates the input file to make sure it is a well-formed fsdb file. If 15 | the file is well-formed, it outputs the whole file and exits with a good 16 | exit code. For invalid files, it exits with an error exit code and 17 | embedded error messages in the stream as comments with \**\* in them. 18 | 19 | Currently this program checks for rows with missing or extra columns. 20 | 21 | OPTIONS 22 | ------- 23 | 24 | -v or --errors-only 25 | Output only broken lines, not the whole thing. 26 | 27 | -c or --correct 28 | Correct errors, if possible. Pad out rows with the empty value; 29 | truncate rows with extra values. If errors can be corrected the 30 | program exits with a good return code. 31 | 32 | "-e E" or "--empty E" 33 | give value E as the value for empty (null) records 34 | 35 | This module also supports the standard fsdb options: 36 | 37 | -d 38 | Enable debugging output. 39 | 40 | -i or --input InputSource 41 | Read from InputSource, typically a file name, or ``-`` for standard 42 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 43 | objects. 44 | 45 | -o or --output OutputDestination 46 | Write to OutputDestination, typically a file name, or ``-`` for 47 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 48 | Fsdb::BoundedQueue objects. 49 | 50 | --autorun or --noautorun 51 | By default, programs process automatically, but Fsdb::Filter objects 52 | in Perl do not run until you invoke the **run()** method. The 53 | ``--(no)autorun`` option controls that behavior within Perl. 54 | 55 | --help 56 | Show help. 57 | 58 | --man 59 | Show full manual. 60 | 61 | SAMPLE USAGE 62 | ------------ 63 | 64 | Input: 65 | ------ 66 | 67 | #fsdb sid cid 1 10 2 1 12 2 12 68 | 69 | Command: 70 | -------- 71 | 72 | cat TEST/dbfilevalidate_ex.in \| dbvalidate 73 | 74 | Output: 75 | ------- 76 | 77 | #fsdb sid cid 1 10 2 # \**\* line above is missing field cid. 1 12 2 12 78 | # \| dbfilevalidate 79 | 80 | SEE ALSO 81 | -------- 82 | 83 | Fsdb. 84 | 85 | AUTHOR and COPYRIGHT 86 | -------------------- 87 | 88 | Copyright (C) 1991-2008 by John Heidemann 89 | 90 | This program is distributed under terms of the GNU general public 91 | license, version 2. See the file COPYING with the distribution for 92 | details. 93 | -------------------------------------------------------------------------------- /docs/perl/dblistize.rst: -------------------------------------------------------------------------------- 1 | dblistize - DEPRECATED, now use dbfilealter 2 | ====================================================================== 3 | -------------------------------------------------------------------------------- /docs/perl/dbrecolize.rst: -------------------------------------------------------------------------------- 1 | dbrecolize - DEPRECATED, now use dbfilealter 2 | ====================================================================== 3 | -------------------------------------------------------------------------------- /docs/perl/dbrow.rst: -------------------------------------------------------------------------------- 1 | dbrow - select rows from an Fsdb file based on arbitrary conditions 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbrow [-vw] CONDITION [CONDITION...] 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Select rows for which all CONDITIONS are true. Conditions are specified 15 | as Perl code, in which column names are be embedded, preceded by 16 | underscores. 17 | 18 | OPTIONS 19 | ------- 20 | 21 | -v 22 | Invert the selection, picking rows where at least one condition does 23 | *not* match. 24 | 25 | This module also supports the standard fsdb options: 26 | 27 | -d 28 | Enable debugging output. 29 | 30 | -w or --warnings 31 | Enable warnings in user supplied code. 32 | 33 | -i or --input InputSource 34 | Read from InputSource, typically a file name, or ``-`` for standard 35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 36 | objects. 37 | 38 | -o or --output OutputDestination 39 | Write to OutputDestination, typically a file name, or ``-`` for 40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 41 | Fsdb::BoundedQueue objects. 42 | 43 | --autorun or --noautorun 44 | By default, programs process automatically, but Fsdb::Filter objects 45 | in Perl do not run until you invoke the **run()** method. The 46 | ``--(no)autorun`` option controls that behavior within Perl. 47 | 48 | --header H 49 | Use H as the full Fsdb header, rather than reading a header from then 50 | input. 51 | 52 | --help 53 | Show help. 54 | 55 | --man 56 | Show full manual. 57 | 58 | SAMPLE USAGE 59 | ------------ 60 | 61 | Input: 62 | ------ 63 | 64 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134 65 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 66 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 67 | database 68 | 69 | Command: 70 | -------- 71 | 72 | cat DATA/passwd.fsdb \| dbrow \_fullname =~ /John/ 73 | 74 | Output: 75 | ------- 76 | 77 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134 78 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 79 | /home/greg /bin/bash # this is a simple database # \| 80 | /home/johnh/BIN/DB/dbrow 81 | 82 | BUGS 83 | ---- 84 | 85 | Doesn't detect references to unknown columns in conditions. 86 | 87 | END #' for font-lock mode. exit 1; 88 | 89 | AUTHOR and COPYRIGHT 90 | -------------------- 91 | 92 | Copyright (C) 1991-2018 by John Heidemann 93 | 94 | This program is distributed under terms of the GNU general public 95 | license, version 2. See the file COPYING with the distribution for 96 | details. 97 | -------------------------------------------------------------------------------- /docs/perl/dbrowaccumulate.rst: -------------------------------------------------------------------------------- 1 | dbrowaccumulate - compute a running sum of a column 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbrowaccumulate [-C increment_constant] [-I initial_value] [-c 10 | increment_column] [-N new_column_name] 11 | 12 | DESCRIPTION 13 | ----------- 14 | 15 | Compute a running sum over a column of data, or of a constant 16 | incremented per row, perhaps to generate a cumulative distribution. 17 | 18 | What to accumulate is specified by ``-c`` or ``-C``. 19 | 20 | The new column is named by the ``-N`` argument, defaulting to ``accum``. 21 | 22 | OPTIONS 23 | ------- 24 | 25 | -c or --column COLUMN 26 | Accumulate values from the given COLUMN. No default. 27 | 28 | -C or --constant K 29 | Accumulate the given constant K for each row of input. No default. 30 | 31 | -I or --initial-value I 32 | Start accumulation at value I. Defaults to zero. 33 | 34 | -N or --new-name N 35 | Name the new column N. Defaults to ``accum``. 36 | 37 | This module also supports the standard fsdb options: 38 | 39 | -d 40 | Enable debugging output. 41 | 42 | -i or --input InputSource 43 | Read from InputSource, typically a file name, or ``-`` for standard 44 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 45 | objects. 46 | 47 | -o or --output OutputDestination 48 | Write to OutputDestination, typically a file name, or ``-`` for 49 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 50 | Fsdb::BoundedQueue objects. 51 | 52 | --autorun or --noautorun 53 | By default, programs process automatically, but Fsdb::Filter objects 54 | in Perl do not run until you invoke the **run()** method. The 55 | ``--(no)autorun`` option controls that behavior within Perl. 56 | 57 | --help 58 | Show help. 59 | 60 | --man 61 | Show full manual. 62 | 63 | SAMPLE USAGE 64 | ------------ 65 | 66 | Input: 67 | ------ 68 | 69 | #fsdb diff 0.0 00.000938 00.001611 00.001736 00.002006 00.002049 # \| 70 | /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol diff # \| dbsort 71 | diff 72 | 73 | Command: 74 | -------- 75 | 76 | cat DATA/kitrace.fsdb \| dbrowaccumulate -c diff 77 | 78 | Output: 79 | ------- 80 | 81 | #fsdb diff accum 0.0 0 00.000938 .000938 00.001611 .002549 00.001736 82 | .004285 00.002006 .006291 00.002049 .00834 # \| /home/johnh/BIN/DB/dbrow 83 | # \| /home/johnh/BIN/DB/dbcol diff # \| dbsort diff # \| 84 | /home/johnh/BIN/DB/dbrowaccumulate diff 85 | 86 | SEE ALSO 87 | -------- 88 | 89 | Fsdb, dbrowenumerate. 90 | 91 | AUTHOR and COPYRIGHT 92 | -------------------- 93 | 94 | Copyright (C) 1991-2022 by John Heidemann 95 | 96 | This program is distributed under terms of the GNU general public 97 | license, version 2. See the file COPYING with the distribution for 98 | details. 99 | -------------------------------------------------------------------------------- /docs/perl/dbrowcount.rst: -------------------------------------------------------------------------------- 1 | dbrowcount - count the number of rows in an Fsdb stream 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbrowcount 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Count the number of rows and write out a new fsdb file with one column 15 | (n) and one value: the number of rows. This program is a strict subset 16 | of dbcolstats. 17 | 18 | Although there are other ways to get a count of rows (``dbcolstats``, or 19 | ``dbrowaccumulate -C 1`` and some processing), counting is so common it 20 | warrants its own command. (For example, consider how often ``wc -l`` is 21 | used in regular shell scripting.) There are some gross and subtle 22 | differences, though, in that ``dbrowcount`` doesn't require one to 23 | specify a column to search, and it also doesn't look for and skip null 24 | data items. 25 | 26 | OPTIONS 27 | ------- 28 | 29 | No program-specific options. 30 | 31 | This module also supports the standard fsdb options: 32 | 33 | -d 34 | Enable debugging output. 35 | 36 | -i or --input InputSource 37 | Read from InputSource, typically a file name, or ``-`` for standard 38 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 39 | objects. 40 | 41 | -o or --output OutputDestination 42 | Write to OutputDestination, typically a file name, or ``-`` for 43 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 44 | Fsdb::BoundedQueue objects. 45 | 46 | --autorun or --noautorun 47 | By default, programs process automatically, but Fsdb::Filter objects 48 | in Perl do not run until you invoke the **run()** method. The 49 | ``--(no)autorun`` option controls that behavior within Perl. 50 | 51 | --help 52 | Show help. 53 | 54 | --man 55 | Show full manual. 56 | 57 | SAMPLE USAGE 58 | ------------ 59 | 60 | Input: 61 | ------ 62 | 63 | #fsdb absdiff 0 0.046953 0.072074 0.075413 0.094088 0.096602 # \| 64 | /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol event clock # \| 65 | dbrowdiff clock # \| /home/johnh/BIN/DB/dbcol absdiff 66 | 67 | Command: 68 | -------- 69 | 70 | cat data.fsdb \| dbrowcount 71 | 72 | Output: 73 | ------- 74 | 75 | #fsdb n 6 # \| /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol 76 | event clock # \| dbrowdiff clock # \| /home/johnh/BIN/DB/dbcol absdiff 77 | 78 | Input 2: 79 | -------- 80 | 81 | As another example, this input produces the same output as above in 82 | ``dbrowcount``, but different output in ``dbstats``: 83 | 84 | #fsdb absdiff - - - - - - # \| /home/johnh/BIN/DB/dbrow # \| 85 | /home/johnh/BIN/DB/dbcol event clock # \| dbrowdiff clock # \| 86 | /home/johnh/BIN/DB/dbcol absdiff 87 | 88 | SEE ALSO 89 | -------- 90 | 91 | **dbcolaccumulate** (1), **dbcolstats** (1), **Fsdb** (3) 92 | 93 | AUTHOR and COPYRIGHT 94 | -------------------- 95 | 96 | Copyright (C) 2007-2022 by John Heidemann 97 | 98 | This program is distributed under terms of the GNU general public 99 | license, version 2. See the file COPYING with the distribution for 100 | details. 101 | -------------------------------------------------------------------------------- /docs/perl/dbrowenumerate.rst: -------------------------------------------------------------------------------- 1 | dbrowenumerate - enumerate rows, starting from zero 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | dbrowenumerate 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Add a new column \``count'', incremented for each row of data, starting 15 | with zero. Use dbrowaccumulate for control over initial value or 16 | increment; this module is just a wrapper around that. 17 | 18 | OPTIONS 19 | ------- 20 | 21 | -N or --new-name N 22 | Name the new column N. Defaults to ``count``. 23 | 24 | This module also supports the standard jdb options: 25 | 26 | -d 27 | Enable debugging output. 28 | 29 | -i or --input InputSource 30 | Read from InputSource, typically a file name, or ``-`` for standard 31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 32 | objects. 33 | 34 | -o or --output OutputDestination 35 | Write to OutputDestination, typically a file name, or ``-`` for 36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 37 | Fsdb::BoundedQueue objects. 38 | 39 | --autorun or --noautorun 40 | By default, programs process automatically, but Fsdb::Filter objects 41 | in Perl do not run until you invoke the **run()** method. The 42 | ``--(no)autorun`` option controls that behavior within Perl. 43 | 44 | --help 45 | Show help. 46 | 47 | --man 48 | Show full manual. 49 | 50 | SAMPLE USAGE 51 | ------------ 52 | 53 | Input: 54 | ------ 55 | 56 | #h account passwd uid gid fullname homedir shell johnh \* 2274 134 57 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson 58 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple 59 | database 60 | 61 | Command: 62 | -------- 63 | 64 | cat DATA/passwd.jdb \| dbrowenumerate 65 | 66 | Output: 67 | ------- 68 | 69 | #h account passwd uid gid fullname homedir shell count johnh \* 2274 134 70 | John_Heidemann /home/johnh /bin/bash 0 greg \* 2275 134 Greg_Johnson 71 | /home/greg /bin/bash 1 root \* 0 0 Root /root /bin/bash 2 # this is a 72 | simple database # \| /home/johnh/BIN/DB/dbrowenumerate 73 | 74 | SEE ALSO 75 | -------- 76 | 77 | Fsdb, dbrowaccumulate. 78 | 79 | CLASS FUNCTIONS 80 | --------------- 81 | -------------------------------------------------------------------------------- /docs/perl/dbstats.rst: -------------------------------------------------------------------------------- 1 | dbstats - DEPRICATED, now use dbcolstats 2 | ====================================================================== 3 | -------------------------------------------------------------------------------- /docs/perl/html_table_to_db.rst: -------------------------------------------------------------------------------- 1 | html_table_to_db - convert HTML tables into fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | html_table_to_db dest.fsdb 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Converts a HTML table to Fsdb format. 15 | 16 | The input is an HTML table (*not* fsdb). Column names are taken from 17 | ``TH`` elements, or defined as ``column0`` through ``columnN`` if no 18 | such elements appear. 19 | 20 | The output is two-space-separated fsdb. (Someday more general field 21 | separators should be supported.) Fsdb fields are normalized version of 22 | the html file: multiple spaces are compressed to one. 23 | 24 | This module also supports the standard fsdb options: 25 | 26 | -d 27 | Enable debugging output. 28 | 29 | -i or --input InputSource 30 | Read from InputSource, typically a file name, or ``-`` for standard 31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 32 | objects. 33 | 34 | -o or --output OutputDestination 35 | Write to OutputDestination, typically a file name, or ``-`` for 36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 37 | Fsdb::BoundedQueue objects. 38 | 39 | --autorun or --noautorun 40 | By default, programs process automatically, but Fsdb::Filter objects 41 | in Perl do not run until you invoke the **run()** method. The 42 | ``--(no)autorun`` option controls that behavior within Perl. 43 | 44 | --help 45 | Show help. 46 | 47 | --man 48 | Show full manual. 49 | 50 | SAMPLE USAGE 51 | ------------ 52 | 53 | Input: 54 | ------ 55 | 56 | 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 |
account passwd uid gidfullname homedir shell
johnh * 2274 134John & Ampersand /home/johnh /bin/bash
greg * 2275134 Greg < Lessthan /home/greg/bin/bash
root *0 0 Root ; Semi /root/bin/bash
four *1 1 Fourth Row /home/four/bin/bash
67 | 68 | Command: 69 | -------- 70 | 71 | html_table_to_db 72 | 73 | Output: 74 | ------- 75 | 76 | #fsdb -F S account passwd uid gid fullname homedir shell johnh \* 2274 77 | 134 John & Ampersand /home/johnh /bin/bash greg \* 2275 134 Greg < 78 | Lessthan /home/greg /bin/bash root \* 0 0 Root ; Semi /root /bin/bash 79 | four \* 1 1 Fourth Row /home/four /bin/bash 80 | 81 | SEE ALSO 82 | -------- 83 | 84 | Fsdb. db_to_html_table. 85 | 86 | AUTHOR and COPYRIGHT 87 | -------------------- 88 | 89 | Copyright (C) 1991-2015 by John Heidemann 90 | 91 | This program is distributed under terms of the GNU general public 92 | license, version 2. See the file COPYING with the distribution for 93 | details. 94 | -------------------------------------------------------------------------------- /docs/perl/ns_to_db.rst: -------------------------------------------------------------------------------- 1 | ns_to_db - convert one of ns's output format to jdb 2 | ====================================================================== 3 | 4 | KNOWN BUGS 5 | ---------- 6 | 7 | No test case. 8 | -------------------------------------------------------------------------------- /docs/perl/sqlselect_to_db.rst: -------------------------------------------------------------------------------- 1 | sqlselect_to_db - convert MySQL or MariaDB selected tables to fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | sqlselect_to_db dest.fsdb 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Converts a MySQL or MariaDB tables to Fsdb format. 15 | 16 | The input is *not* fsdb. The first non-box row is taken to be the names 17 | of the columns. 18 | 19 | The output is two-space-separated fsdb. (Someday more general field 20 | separators should be supported.) 21 | 22 | This module also supports the standard fsdb options: 23 | 24 | -d 25 | Enable debugging output. 26 | 27 | -i or --input InputSource 28 | Read from InputSource, typically a file name, or ``-`` for standard 29 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 30 | objects. 31 | 32 | -o or --output OutputDestination 33 | Write to OutputDestination, typically a file name, or ``-`` for 34 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 35 | Fsdb::BoundedQueue objects. 36 | 37 | --autorun or --noautorun 38 | By default, programs process automatically, but Fsdb::Filter objects 39 | in Perl do not run until you invoke the **run()** method. The 40 | ``--(no)autorun`` option controls that behavior within Perl. 41 | 42 | --help 43 | Show help. 44 | 45 | --man 46 | Show full manual. 47 | 48 | SAMPLE USAGE 49 | ------------ 50 | 51 | Input: 52 | ------ 53 | 54 | +----------------+---------------+--------------------+------+-------------------------+ 55 | \| username \| firstname \| lastname \| id \| email \| 56 | +----------------+---------------+--------------------+------+-------------------------+ 57 | \| johnh \| John \| Heidemann \| 134 \| johnh@isi.edu \| 58 | +----------------+---------------+--------------------+------+-------------------------+ 59 | 1 row in set (0.01 sec) 60 | 61 | Command: 62 | -------- 63 | 64 | sqlselect_to_db 65 | 66 | Output: 67 | ------- 68 | 69 | #fsdb -F S username firstname lastname id email johnh John Heidemann 134 70 | johnh@isi.edu # \| sqlselect_to_db 71 | 72 | SEE ALSO 73 | -------- 74 | 75 | Fsdb. db_to_csv. 76 | 77 | AUTHOR and COPYRIGHT 78 | -------------------- 79 | 80 | Copyright (C) 2014-2018 by John Heidemann 81 | 82 | This program is distributed under terms of the GNU general public 83 | license, version 2. See the file COPYING with the distribution for 84 | details. 85 | -------------------------------------------------------------------------------- /docs/perl/tabdelim_to_db.rst: -------------------------------------------------------------------------------- 1 | tabdelim_to_db - convert tab-delimited data into fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | tabdelim_to_db target.fsdb 10 | 11 | DESCRIPTION 12 | ----------- 13 | 14 | Converts a tab-delimited data stream to Fsdb format. 15 | 16 | The input is tab-delimited (*not* fsdb): the first row is taken to be 17 | the names of the columns; tabs separate columns. 18 | 19 | The output is a fsdb file with a proper header and a tab 20 | field-separator. 21 | 22 | This module also supports the standard fsdb options: 23 | 24 | -d 25 | Enable debugging output. 26 | 27 | -i or --input InputSource 28 | Read from InputSource, typically a file name, or ``-`` for standard 29 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue 30 | objects. 31 | 32 | -o or --output OutputDestination 33 | Write to OutputDestination, typically a file name, or ``-`` for 34 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or 35 | Fsdb::BoundedQueue objects. 36 | 37 | --autorun or --noautorun 38 | By default, programs process automatically, but Fsdb::Filter objects 39 | in Perl do not run until you invoke the **run()** method. The 40 | ``--(no)autorun`` option controls that behavior within Perl. 41 | 42 | --help 43 | Show help. 44 | 45 | --man 46 | Show full manual. 47 | 48 | SAMPLE USAGE 49 | ------------ 50 | 51 | Input: 52 | ------ 53 | 54 | name email test1 Tommy Trojan tt@usc.edu 80 Joe Bruin joeb@ucla.edu 85 55 | J. Random jr@caltech.edu 90 56 | 57 | Command: 58 | -------- 59 | 60 | tabdelim_to_db 61 | 62 | Output: 63 | ------- 64 | 65 | #fsdb -Ft name email test1 Tommy Trojan tt@usc.edu 80 Joe Bruin 66 | joeb@ucla.edu 85 J. Random jr@caltech.edu 90 # \| dbcoldefine name email 67 | test1 68 | 69 | SEE ALSO 70 | -------- 71 | 72 | Fsdb. 73 | 74 | AUTHOR and COPYRIGHT 75 | -------------------- 76 | 77 | Copyright (C) 1991-2008 by John Heidemann 78 | 79 | This program is distributed under terms of the GNU general public 80 | license, version 2. See the file COPYING with the distribution for 81 | details. 82 | -------------------------------------------------------------------------------- /docs/perl/xml_to_db.rst: -------------------------------------------------------------------------------- 1 | xml_to_db - convert a subset of XML into fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | xml_to_db -k EntityField 75 | 76 | 77 | Command: 78 | -------- 79 | 80 | xml_to_db -k files/file 99 | 100 | This program is distributed under terms of the GNU general public 101 | license, version 2. See the file COPYING with the distribution for 102 | details. 103 | -------------------------------------------------------------------------------- /docs/perl/yaml_to_db.rst: -------------------------------------------------------------------------------- 1 | yaml_to_db - convert a subset of YAML into fsdb 2 | ====================================================================== 3 | 4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1* 5 | 6 | SYNOPSIS 7 | -------- 8 | 9 | yaml_to_db 92 | 93 | This program is distributed under terms of the GNU general public 94 | license, version 2. See the file COPYING with the distribution for 95 | details. 96 | -------------------------------------------------------------------------------- /docs/perltools.rst: -------------------------------------------------------------------------------- 1 | Perl FSDB Tools 2 | =============== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: FSDB Perl Scripts: 7 | 8 | perl/cgi_to_db.rst 9 | perl/combined_log_format_to_db.rst 10 | perl/csv_to_db.rst 11 | perl/dbcolcopylast.rst 12 | perl/dbcolcreate.rst 13 | perl/dbcoldefine.rst 14 | perl/dbcolhisto.rst 15 | perl/dbcolize.rst 16 | perl/dbcolmerge.rst 17 | perl/dbcolmovingstats.rst 18 | perl/dbcolneaten.rst 19 | perl/dbcolpercentile.rst 20 | perl/dbcolrename.rst 21 | perl/dbcol.rst 22 | perl/dbcolscorrelate.rst 23 | perl/dbcolsplittocols.rst 24 | perl/dbcolsplittorows.rst 25 | perl/dbcolsregression.rst 26 | perl/dbcolstatscores.rst 27 | perl/dbcolstats.rst 28 | perl/dbcoltype.rst 29 | perl/dbfilealter.rst 30 | perl/dbfilecat.rst 31 | perl/dbfilediff.rst 32 | perl/dbfilepivot.rst 33 | perl/dbfilestripcomments.rst 34 | perl/dbfilevalidate.rst 35 | perl/dbformmail.rst 36 | perl/dbjoin.rst 37 | perl/dblistize.rst 38 | perl/dbmapreduce.rst 39 | perl/dbmerge2.rst 40 | perl/dbmerge.rst 41 | perl/dbmultistats.rst 42 | perl/dbrecolize.rst 43 | perl/dbrowaccumulate.rst 44 | perl/dbrowcount.rst 45 | perl/dbrowdiff.rst 46 | perl/dbrowenumerate.rst 47 | perl/dbroweval.rst 48 | perl/dbrow.rst 49 | perl/dbrowuniq.rst 50 | perl/dbrvstatdiff.rst 51 | perl/dbsort.rst 52 | perl/dbstats.rst 53 | perl/db_to_csv.rst 54 | perl/db_to_html_table.rst 55 | perl/html_table_to_db.rst 56 | perl/kitrace_to_db.rst 57 | perl/ns_to_db.rst 58 | perl/sqlselect_to_db.rst 59 | perl/tabdelim_to_db.rst 60 | perl/tcpdump_to_db.rst 61 | perl/xml_to_db.rst 62 | perl/yaml_to_db.rst 63 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-argparse_cli 3 | sphinx-argparse 4 | myst-parser 5 | dateparser 6 | matplotlib 7 | -------------------------------------------------------------------------------- /docs/tools/images/myheat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/docs/tools/images/myheat.png -------------------------------------------------------------------------------- /docs/tools/index.rst: -------------------------------------------------------------------------------- 1 | PyFSDB Command Line Tools 2 | ========================= 3 | 4 | The following shell tools come with PyFSDB and can be used for generic 5 | command line processing of FSDB data. We break the list of tools up 6 | into different categories (although some tools may technical belong to 7 | multiple categories, we place them in only one). 8 | 9 | Note: the `python` based tools begin with the `pdb` prefix to 10 | distinguish themselves from their `perl` counter-parts (which begin 11 | with `db`). 12 | 13 | 14 | Data filtering and modification tools 15 | ------------------------------------- 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | :caption: Contents: 20 | 21 | pdbrow 22 | pdbroweval 23 | pdbensure 24 | pdbaugment 25 | pdbfgrep 26 | pdbnormalize 27 | pdbcdf 28 | pdbdatetoepoch 29 | pdbepochtodate 30 | pdbkeyedsort 31 | pdbsum 32 | pdbzerofill 33 | 34 | Data conversion tools 35 | --------------------- 36 | 37 | .. toctree:: 38 | :maxdepth: 1 39 | :caption: Contents: 40 | 41 | pdb2to1 42 | pdbaddtypes 43 | pdbformat 44 | pdbjinja 45 | pdb2tex 46 | pdb2sql 47 | pdbsplitter 48 | pdbfullpivot 49 | pdbreescape 50 | 51 | Data analysis tools 52 | ------------------- 53 | 54 | .. toctree:: 55 | :maxdepth: 1 56 | :caption: Contents: 57 | 58 | pdbcoluniq 59 | pdbtopn 60 | pdbheatmap 61 | -------------------------------------------------------------------------------- /docs/tools/pdb2sql.md: -------------------------------------------------------------------------------- 1 | ### pdb2sql - uploads an FSDB file into a database 2 | 3 | `pdb2sql` converts an FSDB file into a latex table/tabular output. Specifically, it can both create a table, delete existing rows, add indexes to certain rows, add additional columns and values etc. It currently supports two different types of databases (*sqlite3* and *postgres*), which are selectable by the *-t* switch. 4 | 5 | #### Example input (*myfile.fsdb*): 6 | 7 | ``` 8 | #fsdb -F t col1:l two:a andthree:d 9 | 1 key1 42.0 10 | 2 key2 123.0 11 | 3 key1 90.2 12 | ``` 13 | 14 | #### Example command usage 15 | 16 | ``` 17 | $ pdb2sql -T newtable -i two -t sqlite3 myfile.fsdb output.sqlite3 18 | $ echo "select * from newtable" | sqlite3 output.sqlite3 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` 24 | 1|key1|42.0 25 | 2|key2|123.0 26 | 3|key1|90.2 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /docs/tools/pdb2sql.rst: -------------------------------------------------------------------------------- 1 | pdb2sql - uploads an FSDB file into a database 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdb2sql`` converts an FSDB file into a latex table/tabular output. 5 | Specifically, it can both create a table, delete existing rows, add 6 | indexes to certain rows, add additional columns and values etc. It 7 | currently supports two different types of databases (*sqlite3* and 8 | *postgres*), which are selectable by the *-t* switch. 9 | 10 | Example input (*myfile.fsdb*): 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | :: 14 | 15 | #fsdb -F t col1:l two:a andthree:d 16 | 1 key1 42.0 17 | 2 key2 123.0 18 | 3 key1 90.2 19 | 20 | Example command usage 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | :: 24 | 25 | $ pdb2sql -T newtable -i two -t sqlite3 myfile.fsdb output.sqlite3 26 | $ echo "select * from newtable" | sqlite3 output.sqlite3 27 | 28 | Example output 29 | ^^^^^^^^^^^^^^ 30 | 31 | :: 32 | 33 | 1|key1|42.0 34 | 2|key2|123.0 35 | 3|key1|90.2 36 | 37 | 38 | Command Line Arguments 39 | ^^^^^^^^^^^^^^^^^^^^^^ 40 | 41 | .. sphinx_argparse_cli:: 42 | :module: pyfsdb.tools.pdb2sql 43 | :func: parse_args 44 | :hook: 45 | :prog: pdb2sql 46 | -------------------------------------------------------------------------------- /docs/tools/pdb2tex.md: -------------------------------------------------------------------------------- 1 | ### pdb2tex - create a latex table using the data in a FSDB file 2 | 3 | `pdb2tex` converts an FSDB file into a latex table/tabular output 4 | 5 | #### Example input (*myfile.fsdb*): 6 | 7 | ``` 8 | #fsdb -F t col1:l two:a andthree:d 9 | 1 key1 42.0 10 | 2 key2 123.0 11 | 3 key1 90.2 12 | ``` 13 | 14 | #### Example command usage 15 | 16 | ``` 17 | $ pdb2tex myfile.fsdb 18 | 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` latex 24 | \begin{table} 25 | \begin{tabular}{lll} 26 | \textbf{col1} & \textbf{two} & \textbf{andthree} \\ 27 | 1 & key1 & 42.0 \\ 28 | 2 & key2 & 123.0 \\ 29 | 3 & key1 & 90.2 \\ 30 | \end{tabular} 31 | \end{table} 32 | ``` 33 | 34 | -------------------------------------------------------------------------------- /docs/tools/pdb2tex.rst: -------------------------------------------------------------------------------- 1 | pdb2tex - create a latex table using the data in a FSDB file 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdb2tex`` converts an FSDB file into a latex table/tabular output 5 | 6 | Example input (*myfile.fsdb*): 7 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 8 | 9 | :: 10 | 11 | #fsdb -F t col1:l two:a andthree:d 12 | 1 key1 42.0 13 | 2 key2 123.0 14 | 3 key1 90.2 15 | 16 | Example command usage 17 | ^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | :: 20 | 21 | $ pdb2tex myfile.fsdb 22 | 23 | Example output 24 | ^^^^^^^^^^^^^^ 25 | 26 | .. code:: latex 27 | 28 | \begin{table} 29 | \begin{tabular}{lll} 30 | \textbf{col1} & \textbf{two} & \textbf{andthree} \\ 31 | 1 & key1 & 42.0 \\ 32 | 2 & key2 & 123.0 \\ 33 | 3 & key1 & 90.2 \\ 34 | \end{tabular} 35 | \end{table} 36 | 37 | 38 | Command Line Arguments 39 | ^^^^^^^^^^^^^^^^^^^^^^ 40 | 41 | .. sphinx_argparse_cli:: 42 | :module: pyfsdb.tools.pdb2tex 43 | :func: parse_args 44 | :hook: 45 | :prog: pdb2tex 46 | -------------------------------------------------------------------------------- /docs/tools/pdb2to1.md: -------------------------------------------------------------------------------- 1 | ### pdb2to1 - strip typing information from the FSDB header 2 | 3 | `pdb2to1` simply removes typing information that may confusing older 4 | FSDB or pyfsdb tools that do not understanding datatypes in the 5 | headers. Datatypes were introduced into FSDB format version 2. To 6 | add or change types instead, use `pdbaddtypes`. 7 | 8 | #### Example input (*myfile.fsdb*): 9 | 10 | ``` 11 | #fsdb -F t col1:l two:a andthree:d 12 | 1 key1 42.0 13 | 2 key2 123.0 14 | 3 key1 90.2 15 | ``` 16 | 17 | #### Example command usage 18 | 19 | ``` 20 | $ pdb2to1 myfile.fsdb 21 | ``` 22 | 23 | #### Example output 24 | 25 | ``` 26 | #fsdb -F t col1 two andthree 27 | 1 key1 42.0 28 | 2 key2 123.0 29 | 3 key1 90.2 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /docs/tools/pdb2to1.rst: -------------------------------------------------------------------------------- 1 | pdb2to1 - strip typing information from the FSDB header 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdb2to1`` simply removes typing information that may confusing older 5 | FSDB or pyfsdb tools that do not understanding datatypes in the headers. 6 | Datatypes were introduced into FSDB format version 2. To add or change 7 | types instead, use ``pdbaddtypes``. 8 | 9 | Example input (*myfile.fsdb*): 10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 11 | 12 | :: 13 | 14 | #fsdb -F t col1:l two:a andthree:d 15 | 1 key1 42.0 16 | 2 key2 123.0 17 | 3 key1 90.2 18 | 19 | Example command usage 20 | ^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | :: 23 | 24 | $ pdb2to1 myfile.fsdb 25 | 26 | Example output 27 | ^^^^^^^^^^^^^^ 28 | 29 | :: 30 | 31 | #fsdb -F t col1 two andthree 32 | 1 key1 42.0 33 | 2 key2 123.0 34 | 3 key1 90.2 35 | 36 | 37 | Command Line Arguments 38 | ^^^^^^^^^^^^^^^^^^^^^^ 39 | 40 | .. sphinx_argparse_cli:: 41 | :module: pyfsdb.tools.pdb2to1 42 | :func: parse_args 43 | :hook: 44 | :prog: pdb2to1 45 | -------------------------------------------------------------------------------- /docs/tools/pdbaddtypes.md: -------------------------------------------------------------------------------- 1 | ### pdbaddtypes - strip typing information from the FSDB header 2 | 3 | `pdbaddtypes` adds datatypes to the header so languages with typing 4 | support (eg, Python and Go) can parse FSDB files and output properly 5 | typed variables. Older data, or data generated by older tools may not 6 | be properly typed. The *-a* flag can be used to attempt auto-typing, 7 | but is based on analyzing only the first row. 8 | 9 | #### Example input (*myfile.fsdb*): 10 | 11 | ``` 12 | #fsdb -F t col1 two andthree 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | ``` 17 | 18 | #### Example command usage 19 | 20 | ``` 21 | $ pdbaddtypes -a myfile.fsdb 22 | ``` 23 | 24 | #### Example output 25 | 26 | ``` 27 | #fsdb -F t col1:l two:a andthree:d 28 | 1 key1 42.0 29 | 2 key2 123.0 30 | 3 key1 90.2 31 | ``` 32 | 33 | #### Example command usage with specified typing 34 | 35 | ``` 36 | $ pdbaddtypes -t col1=l andthree=d -- myfile-notypes.fsdb 37 | ``` 38 | 39 | #### Example output 40 | 41 | ``` 42 | #fsdb -F t col1:l two andthree:d 43 | 1 key1 42.0 44 | 2 key2 123.0 45 | 3 key1 90.2 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /docs/tools/pdbaddtypes.rst: -------------------------------------------------------------------------------- 1 | pdbaddtypes - strip typing information from the FSDB header 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbaddtypes`` adds datatypes to the header so languages with typing 5 | support (eg, Python and Go) can parse FSDB files and output properly 6 | typed variables. Older data, or data generated by older tools may not be 7 | properly typed. The *-a* flag can be used to attempt auto-typing, but is 8 | based on analyzing only the first row. 9 | 10 | Example input (*myfile.fsdb*): 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | :: 14 | 15 | #fsdb -F t col1 two andthree 16 | 1 key1 42.0 17 | 2 key2 123.0 18 | 3 key1 90.2 19 | 20 | Example command usage 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | :: 24 | 25 | $ pdbaddtypes -a myfile.fsdb 26 | 27 | Example output 28 | ^^^^^^^^^^^^^^ 29 | 30 | :: 31 | 32 | #fsdb -F t col1:l two:a andthree:d 33 | 1 key1 42.0 34 | 2 key2 123.0 35 | 3 key1 90.2 36 | 37 | Example command usage with specified typing 38 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 39 | 40 | :: 41 | 42 | $ pdbaddtypes -t col1=l andthree=d -- myfile-notypes.fsdb 43 | 44 | .. _example-output-1: 45 | 46 | Example output 47 | ^^^^^^^^^^^^^^ 48 | 49 | :: 50 | 51 | #fsdb -F t col1:l two andthree:d 52 | 1 key1 42.0 53 | 2 key2 123.0 54 | 3 key1 90.2 55 | 56 | 57 | Command Line Arguments 58 | ^^^^^^^^^^^^^^^^^^^^^^ 59 | 60 | .. sphinx_argparse_cli:: 61 | :module: pyfsdb.tools.pdbaddtypes 62 | :func: parse_args 63 | :hook: 64 | :prog: pdbaddtypes 65 | -------------------------------------------------------------------------------- /docs/tools/pdbaugment.md: -------------------------------------------------------------------------------- 1 | ### pdbaugment - join rows from one FSDB files into another 2 | 3 | `pdbaugment` provides a different mechanism for doing FSDB file joins 4 | than the `dbjoin` command from the base perl FSDB package. 5 | Specifically, `pdbaugment` is designed to read a single file entirely 6 | into memory and use it augment a second one that is read in a 7 | streaming style. `pdbaugment` has the advantage being faster because 8 | it dose not need to do a full sort of both files, like `dbjoin` 9 | requires, but has the downside of needing to store one file in memory 10 | while performing the join. In general, the smaller file should be 11 | used as the *augment_file* argument, and the larger as the 12 | `stream_file` when possible. Matching keys in the augment file should 13 | be unique across the file, otherwise only the second row with a give 14 | key combination will be used. 15 | 16 | #### Example input file 1 (*myfile.fsdb*): 17 | 18 | ``` 19 | #fsdb -F t col1 two andthree 20 | 1 key1 42.0 21 | 2 key2 123.0 22 | 3 key1 90.2 23 | ``` 24 | 25 | #### Example input file 2 (*augment.fsdb*): 26 | 27 | ``` 28 | #fsdb -F t col1 additional_column 29 | key1 blue 30 | key2 brown 31 | ``` 32 | 33 | #### Example command usage 34 | 35 | ``` 36 | $ pdbaugment -k two -v additional_column -- myfile.fsdb augment.fsdb 37 | ``` 38 | 39 | #### Example output 40 | 41 | ``` 42 | #fsdb -F t col1:l two:a andthree:d additional_column:a 43 | 1 key1 42.0 blue 44 | 2 key2 123.0 brown 45 | 3 key1 90.2 blue 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /docs/tools/pdbaugment.rst: -------------------------------------------------------------------------------- 1 | pdbaugment - join rows from one FSDB files into another 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbaugment`` provides a different mechanism for doing FSDB file joins 5 | than the ``dbjoin`` command from the base perl FSDB package. 6 | Specifically, ``pdbaugment`` is designed to read a single file entirely 7 | into memory and use it augment a second one that is read in a streaming 8 | style. ``pdbaugment`` has the advantage being faster because it dose not 9 | need to do a full sort of both files, like ``dbjoin`` requires, but has 10 | the downside of needing to store one file in memory while performing the 11 | join. In general, the smaller file should be used as the *augment_file* 12 | argument, and the larger as the ``stream_file`` when possible. Matching 13 | keys in the augment file should be unique across the file, otherwise 14 | only the second row with a give key combination will be used. 15 | 16 | Example input file 1 (*myfile.fsdb*): 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | :: 20 | 21 | #fsdb -F t col1 two andthree 22 | 1 key1 42.0 23 | 2 key2 123.0 24 | 3 key1 90.2 25 | 26 | Example input file 2 (*augment.fsdb*): 27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 28 | 29 | :: 30 | 31 | #fsdb -F t col1 additional_column 32 | key1 blue 33 | key2 brown 34 | 35 | Example command usage 36 | ^^^^^^^^^^^^^^^^^^^^^ 37 | 38 | :: 39 | 40 | $ pdbaugment -k two -v additional_column -- myfile.fsdb augment.fsdb 41 | 42 | Example output 43 | ^^^^^^^^^^^^^^ 44 | 45 | :: 46 | 47 | #fsdb -F t col1:l two:a andthree:d additional_column:a 48 | 1 key1 42.0 blue 49 | 2 key2 123.0 brown 50 | 3 key1 90.2 blue 51 | 52 | 53 | Command Line Arguments 54 | ^^^^^^^^^^^^^^^^^^^^^^ 55 | 56 | .. sphinx_argparse_cli:: 57 | :module: pyfsdb.tools.pdbaugment 58 | :func: parse_args 59 | :hook: 60 | :prog: pdbaugment 61 | -------------------------------------------------------------------------------- /docs/tools/pdbcdf.md: -------------------------------------------------------------------------------- 1 | ### pdbcdf - find all unique values of a key column 2 | 3 | `pdbcdf` analyzes one column from an FSDB file to produce normalized 4 | CDF related columns. 5 | 6 | #### Example input (*myfile.fsdb*): 7 | 8 | ``` 9 | #fsdb -F s col1:l two:a andthree:d 10 | 1 key1 42.0 11 | 2 key2 123.0 12 | 3 key1 90.2 13 | ``` 14 | 15 | #### Example command usage 16 | 17 | ``` 18 | $ pdbcoluniq -c andthree -P percent -R raw myfile.fsdb 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` 24 | #fsdb -F t col1 two andthree andthree_cdf raw percent 25 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837 26 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941 27 | 3 key1 90.2 1.0 255.2 35.3448275862069 28 | ... 29 | ``` 30 | -------------------------------------------------------------------------------- /docs/tools/pdbcdf.rst: -------------------------------------------------------------------------------- 1 | pdbcdf - find all unique values of a key column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbcdf`` analyzes one column from an FSDB file to produce normalized 5 | CDF related columns. 6 | 7 | Example input (*myfile.fsdb*): 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | :: 11 | 12 | #fsdb -F s col1:l two:a andthree:d 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbcoluniq -c andthree -P percent -R raw myfile.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t col1 two andthree andthree_cdf raw percent 30 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837 31 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941 32 | 3 key1 90.2 1.0 255.2 35.3448275862069 33 | ... 34 | 35 | 36 | Command Line Arguments 37 | ^^^^^^^^^^^^^^^^^^^^^^ 38 | 39 | .. sphinx_argparse_cli:: 40 | :module: pyfsdb.tools.pdbcdf 41 | :func: parse_args 42 | :hook: 43 | :prog: pdbcdf 44 | -------------------------------------------------------------------------------- /docs/tools/pdbcoluniq.md: -------------------------------------------------------------------------------- 1 | ### pdbcoluniq - find all unique values of a key column 2 | 3 | `pdbcoluniq` can find all unique values of a key column, optionally 4 | including counting the number of each value seen. This is done with 5 | an internal dictionary and requires no sorting (unlike its perl 6 | dbrowuniq equivelent) at the potential cost of higher memory usage. 7 | 8 | #### Example input (*myfile.fsdb*): 9 | 10 | ``` 11 | #fsdb -F s col1:l two:a andthree:d 12 | 1 key1 42.0 13 | 2 key2 123.0 14 | 3 key1 90.2 15 | ``` 16 | 17 | #### Example command usage 18 | 19 | ``` 20 | $ pdbcoluniq -k two -c myfile.fsdb 21 | ``` 22 | 23 | #### Example output 24 | 25 | ``` 26 | #fsdb -F t two count:l 27 | key1 2 28 | key2 1 29 | # | pdbcoluniq -k two -c myfile.fsdb 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/tools/pdbcoluniq.rst: -------------------------------------------------------------------------------- 1 | pdbcoluniq - find all unique values of a key column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbcoluniq`` can find all unique values of a key column, optionally 5 | including counting the number of each value seen. This is done with an 6 | internal dictionary and requires no sorting (unlike its perl dbrowuniq 7 | equivelent) at the potential cost of higher memory usage. 8 | 9 | Example input (*myfile.fsdb*): 10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 11 | 12 | :: 13 | 14 | #fsdb -F s col1:l two:a andthree:d 15 | 1 key1 42.0 16 | 2 key2 123.0 17 | 3 key1 90.2 18 | 19 | Example command usage 20 | ^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | :: 23 | 24 | $ pdbcoluniq -k two -c myfile.fsdb 25 | 26 | Example output 27 | ^^^^^^^^^^^^^^ 28 | 29 | :: 30 | 31 | #fsdb -F t two count:l 32 | key1 2 33 | key2 1 34 | # | pdbcoluniq -k two -c myfile.fsdb 35 | 36 | 37 | Command Line Arguments 38 | ^^^^^^^^^^^^^^^^^^^^^^ 39 | 40 | .. sphinx_argparse_cli:: 41 | :module: pyfsdb.tools.pdbcoluniq 42 | :func: parse_args 43 | :hook: 44 | :prog: pdbcoluniq 45 | -------------------------------------------------------------------------------- /docs/tools/pdbdatetoepoch.md: -------------------------------------------------------------------------------- 1 | ### pdbdatetoepoch - translate a date-string based column to unix epochs 2 | 3 | `pdbdatetoepoch` translates one date/time based column column to 4 | another unix epoch seconds (since Jan 1 1970) column. This 5 | tool is the inverse of the `pdbepochtodate` tool. 6 | 7 | #### Example input (*mytime.fsdb*): 8 | 9 | ``` 10 | #fsdb -F t index:d datecol:a 11 | 1 2023/01/01 12 | 2 2023/01/01 10:50:05 13 | ``` 14 | 15 | #### Example command usage 16 | 17 | ``` 18 | $ pdbdatetoepoch -d datecol -t timestamp percent mytime.fsdb 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` 24 | #fsdb -F t col1 two andthree andthree_cdf raw percent 25 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837 26 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941 27 | 3 key1 90.2 1.0 255.2 35.3448275862069 28 | ... 29 | ``` 30 | 31 | #### Notes 32 | 33 | Internally this uses python's `dateparser` module. 34 | -------------------------------------------------------------------------------- /docs/tools/pdbdatetoepoch.rst: -------------------------------------------------------------------------------- 1 | pdbdatetoepoch - translate a date-string based column to unix epochs 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbdatetoepoch`` translates one date/time based column column to 5 | another unix epoch seconds (since Jan 1 1970) column. This tool is the 6 | inverse of the ``pdbepochtodate`` tool. 7 | 8 | Example input (*mytime.fsdb*): 9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 10 | 11 | :: 12 | 13 | #fsdb -F t index:d datecol:a 14 | 1 2023/01/01 15 | 2 2023/01/01 10:50:05 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbdatetoepoch -d datecol -t timestamp percent mytime.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t col1 two andthree andthree_cdf raw percent 30 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837 31 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941 32 | 3 key1 90.2 1.0 255.2 35.3448275862069 33 | ... 34 | 35 | Notes 36 | ^^^^^ 37 | 38 | Internally this uses python’s ``dateparser`` module. 39 | 40 | 41 | Command Line Arguments 42 | ^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | .. sphinx_argparse_cli:: 45 | :module: pyfsdb.tools.pdbdatetoepoch 46 | :func: parse_args 47 | :hook: 48 | :prog: pdbdatetoepoch 49 | -------------------------------------------------------------------------------- /docs/tools/pdbensure.md: -------------------------------------------------------------------------------- 1 | ### pdbensure - ensure certain columns are present in the data 2 | 3 | `pdbensure` either simply drops rows without content in a list of 4 | columns, or optionally fills in the values with a default instead. 5 | 6 | #### Example input (*myfile.fsdb*): 7 | 8 | ``` 9 | #fsdb -F s col1:l two:a andthree:d 10 | 1 key1 42.0 11 | 2 key2 12 | 3 90.2 13 | ``` 14 | 15 | #### Example command usage 16 | 17 | ``` 18 | $ pdbensure -c andthree -e myfile.fsdb 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` 24 | #fsdb -F t col1:l two:a andthree:d 25 | 1 42.0 26 | # dbensure dropping row:[2, 'key2', None] 27 | 3 90.2 28 | ``` 29 | 30 | #### Example command usage -- adding a second column 31 | 32 | ``` 33 | $ pdbensure -c andthree two -e myfile.fsdb 34 | ``` 35 | 36 | #### Example output 37 | 38 | ``` 39 | #fsdb -F t col1:l two:a andthree:d 40 | 1 42.0 41 | # dbensure dropping row:[2, 'key2', None] 42 | # dbensure dropping row:[3, None, 90.2] 43 | ``` 44 | 45 | #### Example command usage -- with replacement 46 | 47 | ``` 48 | $ pdbensure -c two -v replace -- myfile.fsdb 49 | ``` 50 | 51 | #### Example output 52 | 53 | ``` 54 | #fsdb -F t col1:l two:a andthree:d 55 | 1 key1 42.0 56 | 2 key2 57 | 3 replace 90.2 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /docs/tools/pdbensure.rst: -------------------------------------------------------------------------------- 1 | pdbensure - ensure certain columns are present in the data 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbensure`` either simply drops rows without content in a list of 5 | columns, or optionally fills in the values with a default instead. 6 | 7 | Example input (*myfile.fsdb*): 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | :: 11 | 12 | #fsdb -F s col1:l two:a andthree:d 13 | 1 key1 42.0 14 | 2 key2 15 | 3 90.2 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbensure -c andthree -e myfile.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t col1:l two:a andthree:d 30 | 1 42.0 31 | # dbensure dropping row:[2, 'key2', None] 32 | 3 90.2 33 | 34 | Example command usage – adding a second column 35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 36 | 37 | :: 38 | 39 | $ pdbensure -c andthree two -e myfile.fsdb 40 | 41 | .. _example-output-1: 42 | 43 | Example output 44 | ^^^^^^^^^^^^^^ 45 | 46 | :: 47 | 48 | #fsdb -F t col1:l two:a andthree:d 49 | 1 42.0 50 | # dbensure dropping row:[2, 'key2', None] 51 | # dbensure dropping row:[3, None, 90.2] 52 | 53 | Example command usage – with replacement 54 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 55 | 56 | :: 57 | 58 | $ pdbensure -c two -v replace -- myfile.fsdb 59 | 60 | .. _example-output-2: 61 | 62 | Example output 63 | ^^^^^^^^^^^^^^ 64 | 65 | :: 66 | 67 | #fsdb -F t col1:l two:a andthree:d 68 | 1 key1 42.0 69 | 2 key2 70 | 3 replace 90.2 71 | 72 | 73 | Command Line Arguments 74 | ^^^^^^^^^^^^^^^^^^^^^^ 75 | 76 | .. sphinx_argparse_cli:: 77 | :module: pyfsdb.tools.pdbensure 78 | :func: parse_args 79 | :hook: 80 | :prog: pdbensure 81 | -------------------------------------------------------------------------------- /docs/tools/pdbepochtodate.md: -------------------------------------------------------------------------------- 1 | ### pdbepochtodate - translate a unix epoch column to a date-string column 2 | 3 | `pdbepochtodante` translates a column containing unix epoch seconds 4 | (since Jan 1 1970) to another column with a formatted date/time. This 5 | tool is the inverse of the `pdbdatetoepoch` tool. 6 | . 7 | 8 | #### Example input (*myepoch.fsdb*): 9 | 10 | ``` 11 | #fsdb -F t index:l timestamp:d 12 | 1 1672560000 13 | 2 1678831200 14 | ``` 15 | 16 | #### Example command usage 17 | 18 | ``` 19 | $ pdbepochtodante -d datecol -t timestamp percent mytime.fsdb 20 | ``` 21 | 22 | #### Example output 23 | 24 | ``` 25 | #fsdb -F t index:l timestamp:d date 26 | 1 1672560000.0 2023-01-01 00:00 27 | 2 1678831200.0 2023-03-14 15:00 28 | ``` 29 | 30 | #### Notes 31 | 32 | Internally this uses python's `dateparser` module. 33 | -------------------------------------------------------------------------------- /docs/tools/pdbepochtodate.rst: -------------------------------------------------------------------------------- 1 | pdbepochtodate - translate a unix epoch column to a date-string column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbepochtodante`` translates a column containing unix epoch seconds 5 | (since Jan 1 1970) to another column with a formatted date/time. This 6 | tool is the inverse of the ``pdbdatetoepoch`` tool. . 7 | 8 | Example input (*myepoch.fsdb*): 9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 10 | 11 | :: 12 | 13 | #fsdb -F t index:l timestamp:d 14 | 1 1672560000 15 | 2 1678831200 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbepochtodante -d datecol -t timestamp percent mytime.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t index:l timestamp:d date 30 | 1 1672560000.0 2023-01-01 00:00 31 | 2 1678831200.0 2023-03-14 15:00 32 | 33 | Notes 34 | ^^^^^ 35 | 36 | Internally this uses python’s ``dateparser`` module. 37 | 38 | 39 | Command Line Arguments 40 | ^^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | .. sphinx_argparse_cli:: 43 | :module: pyfsdb.tools.pdbepochtodate 44 | :func: parse_args 45 | :hook: 46 | :prog: pdbepochtodate 47 | -------------------------------------------------------------------------------- /docs/tools/pdbfgrep.rst: -------------------------------------------------------------------------------- 1 | pdbfgrep - join rows from one FSDB files into another 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbfgrep`` provides a mechanism for doing a multi-match grep from 5 | two FSDB files, where the first is the stream to read and grep from 6 | (search through) and the second is a file containing a list of values 7 | from keys to match against. Similar to ``pdbaugment``, ``pdbfgrep`` 8 | is designed to read a single file entirely into memory and use it 9 | search for rows in a second one that is read in a streaming style. In 10 | general, the smaller file should be used as the *augment_file* 11 | argument, and the larger as the ``stream_file`` when 12 | possible. 13 | 14 | Example input file 1 (*mygreptest.fsdb*): 15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 16 | 17 | :: 18 | 19 | #fsdb -F t col1 two andthree 20 | 1 key1 42.0 21 | 2 key2 123.0 22 | 3 key3 90.2 23 | 24 | Example input file 2 (*grep-values.fsdb*): 25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t two additional_column 30 | key1 blue 31 | key3 brown 32 | 33 | Example command usage 34 | ^^^^^^^^^^^^^^^^^^^^^ 35 | 36 | :: 37 | 38 | $ pdbfgrep -k two -- mygreptest.fsdb grep-values.fsdb 39 | 40 | Example output 41 | ^^^^^^^^^^^^^^ 42 | 43 | :: 44 | 45 | #fsdb -F t col1:a two:a andthree:a 46 | 1 key1 42.0 47 | 3 key3 90.2 48 | # | pdbfgrep --k two -- mygreptest.fsdb grep-values.fsdb 49 | 50 | Example command usage -- inverted grep 51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 52 | 53 | :: 54 | 55 | $ pdbfgrep -v -k two -- mygreptest.fsdb grep-values.fsdb 56 | 57 | Example output 58 | ^^^^^^^^^^^^^^ 59 | 60 | :: 61 | 62 | #fsdb -F t col1:a two:a andthree:a 63 | 2 key2 123.0 64 | # | pdbfgrep -v -k two -- mygreptest.fsdb grep-values.fsdb 65 | 66 | 67 | Command Line Arguments 68 | ^^^^^^^^^^^^^^^^^^^^^^ 69 | 70 | .. sphinx_argparse_cli:: 71 | :module: pyfsdb.tools.pdbfgrep 72 | :func: parse_args 73 | :hook: 74 | :prog: pdbfgrep 75 | -------------------------------------------------------------------------------- /docs/tools/pdbformat.md: -------------------------------------------------------------------------------- 1 | ### pdbformat - create formatted text per row in an FSDB file 2 | 3 | `pdbformat` uses python's internal string formatting mechanisms to 4 | output lines of text based on the column values from each row. The 5 | *-f* flag is used to specify the formatting string to use, where 6 | column names maybe enclosed in curly braces to indicate where 7 | replacement should happen. 8 | 9 | *See also:* `pdbjinja` 10 | 11 | #### Example input (*myfile.fsdb*): 12 | 13 | ``` 14 | #fsdb -F s col1:l two:a andthree:d 15 | 1 key1 42.0 16 | 2 key2 123.0 17 | 3 key1 90.2 18 | ``` 19 | 20 | #### Example command usage 21 | 22 | ``` 23 | $ pdbformat -f "{two} is {andthree:>7.7} !" myfile.fsdb 24 | ``` 25 | 26 | #### Example output 27 | 28 | ``` 29 | key1 is 42.0 ! 30 | key2 is 123.0 ! 31 | key1 is 90.2 ! 32 | ``` 33 | -------------------------------------------------------------------------------- /docs/tools/pdbformat.rst: -------------------------------------------------------------------------------- 1 | pdbformat - create formatted text per row in an FSDB file 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbformat`` uses python’s internal string formatting mechanisms to 5 | output lines of text based on the column values from each row. The *-f* 6 | flag is used to specify the formatting string to use, where column names 7 | maybe enclosed in curly braces to indicate where replacement should 8 | happen. 9 | 10 | *See also:* ``pdbjinja`` 11 | 12 | Example input (*myfile.fsdb*): 13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 14 | 15 | :: 16 | 17 | #fsdb -F s col1:l two:a andthree:d 18 | 1 key1 42.0 19 | 2 key2 123.0 20 | 3 key1 90.2 21 | 22 | Example command usage 23 | ^^^^^^^^^^^^^^^^^^^^^ 24 | 25 | :: 26 | 27 | $ pdbformat -f "{two} is {andthree:>7.7} !" myfile.fsdb 28 | 29 | Example output 30 | ^^^^^^^^^^^^^^ 31 | 32 | :: 33 | 34 | key1 is 42.0 ! 35 | key2 is 123.0 ! 36 | key1 is 90.2 ! 37 | 38 | 39 | Command Line Arguments 40 | ^^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | .. sphinx_argparse_cli:: 43 | :module: pyfsdb.tools.pdbformat 44 | :func: parse_args 45 | :hook: 46 | :prog: pdbformat 47 | -------------------------------------------------------------------------------- /docs/tools/pdbfullpivot.md: -------------------------------------------------------------------------------- 1 | ### pdbfullpivot - translate a date-string based column to unix epochs 2 | 3 | `pdbfullpivot` takes an input file with time/key/value pairs, and 4 | pivots the table into a wide table with one new column per key value. 5 | 6 | *TODO: make this more generic to allow N number of keying columns* 7 | 8 | #### Example input (*myfile.fsdb*): 9 | 10 | ``` 11 | #fsdb -F t col1:l two:a andthree:d 12 | 1 key1 42.0 13 | 1 key2 123.0 14 | 2 key1 90.2 15 | ``` 16 | 17 | #### Example command usage 18 | 19 | ``` 20 | $ pdbfullpivot -t col1 -k two myfile.fsdb 21 | ``` 22 | 23 | #### Example output 24 | 25 | ``` 26 | #fsdb -F t col1:l key1:d key2:d 27 | 1 42.0 123.0 28 | 2 90.2 0 29 | ... 30 | ``` 31 | 32 | #### Notes 33 | 34 | This can produce an output table with a lot of columns when there are 35 | a lot of values within the key column. 36 | -------------------------------------------------------------------------------- /docs/tools/pdbfullpivot.rst: -------------------------------------------------------------------------------- 1 | pdbfullpivot - translate a date-string based column to unix epochs 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbfullpivot`` takes an input file with time/key/value pairs, and 5 | pivots the table into a wide table with one new column per key value. 6 | 7 | *TODO: make this more generic to allow N number of keying columns* 8 | 9 | Example input (*myfile.fsdb*): 10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 11 | 12 | :: 13 | 14 | #fsdb -F t col1:l two:a andthree:d 15 | 1 key1 42.0 16 | 1 key2 123.0 17 | 2 key1 90.2 18 | 19 | Example command usage 20 | ^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | :: 23 | 24 | $ pdbfullpivot -t col1 -k two myfile.fsdb 25 | 26 | Example output 27 | ^^^^^^^^^^^^^^ 28 | 29 | :: 30 | 31 | #fsdb -F t col1:l key1:d key2:d 32 | 1 42.0 123.0 33 | 2 90.2 0 34 | ... 35 | 36 | Notes 37 | ^^^^^ 38 | 39 | This can produce an output table with a lot of columns when there are a 40 | lot of values within the key column. 41 | 42 | 43 | Command Line Arguments 44 | ^^^^^^^^^^^^^^^^^^^^^^ 45 | 46 | .. sphinx_argparse_cli:: 47 | :module: pyfsdb.tools.pdbfullpivot 48 | :func: parse_args 49 | :hook: 50 | :prog: pdbfullpivot 51 | -------------------------------------------------------------------------------- /docs/tools/pdbheatmap.md: -------------------------------------------------------------------------------- 1 | ### pdbheatmap - find all unique values of a key column 2 | 3 | `pdbheatmap` produces a graphical "heat map" of values contained 4 | within a FSDB file given two key columns. It is most useful to get a 5 | visual representation of scored data, for example. 6 | 7 | #### Example input (*myheat.fsdb*): 8 | 9 | Consider the following example input file, where Joe and Bob were 10 | asked to score their favorite fruits on a scale from 1 to 50. 11 | 12 | ``` 13 | #fsdb -F t Person Fruit value 14 | Joe Orange 10 15 | Joe Apple 30 16 | Bob Orange 5 17 | Bob Apple 40 18 | ``` 19 | 20 | #### Example command usage 21 | 22 | We can then run `pdbheatmap` to generate a graphical map that shows 23 | clearly that when you compare Apples and Oranges, Apples will win. 24 | 25 | ``` 26 | $ pdbheatmap -c Person Fruit -v value myheat.fsdb myheat.png -R -fs 20 -L 27 | ``` 28 | 29 | #### Example output 30 | 31 | ![myheat.png](images/myheat.png) 32 | -------------------------------------------------------------------------------- /docs/tools/pdbheatmap.rst: -------------------------------------------------------------------------------- 1 | pdbheatmap - find all unique values of a key column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbheatmap`` produces a graphical “heat map” of values contained 5 | within a FSDB file given two key columns. It is most useful to get a 6 | visual representation of scored data, for example. 7 | 8 | Example input (*myheat.fsdb*): 9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 10 | 11 | Consider the following example input file, where Joe and Bob were asked 12 | to score their favorite fruits on a scale from 1 to 50. 13 | 14 | :: 15 | 16 | #fsdb -F t Person Fruit value 17 | Joe Orange 10 18 | Joe Apple 30 19 | Bob Orange 5 20 | Bob Apple 40 21 | 22 | Example command usage 23 | ^^^^^^^^^^^^^^^^^^^^^ 24 | 25 | We can then run ``pdbheatmap`` to generate a graphical map that shows 26 | clearly that when you compare Apples and Oranges, Apples will win. 27 | 28 | :: 29 | 30 | $ pdbheatmap -c Person Fruit -v value myheat.fsdb myheat.png -R -fs 20 -L 31 | 32 | Example output 33 | ^^^^^^^^^^^^^^ 34 | 35 | .. figure:: images/myheat.png 36 | :alt: myheat.png 37 | 38 | myheat.png 39 | 40 | 41 | Command Line Arguments 42 | ^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | .. sphinx_argparse_cli:: 45 | :module: pyfsdb.tools.pdbheatmap 46 | :func: parse_args 47 | :hook: 48 | :prog: pdbheatmap 49 | -------------------------------------------------------------------------------- /docs/tools/pdbjinja.md: -------------------------------------------------------------------------------- 1 | ### pdbjinja - process an FSDB file with a jinja template 2 | 3 | `pdbjinja` takes all the data in an fsdb file, and passes it to a 4 | jinja2 template with each row being stored in a `rows` variable. 5 | 6 | *Note:* all rows will be loaded into memory at once. 7 | 8 | *See also:* `pdbformat` 9 | 10 | #### Example input (*myfile.fsdb*): 11 | 12 | ``` 13 | #fsdb -F t col1:l two:a andthree:d 14 | 1 key1 42.0 15 | 2 key2 123.0 16 | 3 key1 90.2 17 | ``` 18 | 19 | #### Example jinja template (*myfile.j2*) 20 | 21 | ``` 22 | {% for row in rows -%} 23 | Key {{row["two"]}}'s favorite number is {{row["andthree"]}} 24 | {% endfor %} 25 | ``` 26 | 27 | #### Example command usage 28 | 29 | ``` 30 | $ pdbjinja -j myfile.j2 myfile.fsdb 31 | ``` 32 | 33 | #### Example output 34 | 35 | ``` 36 | Key key1's favorite number is 42.0 37 | Key key2's favorite number is 123.0 38 | Key key1's favorite number is 90.2 39 | ``` 40 | 41 | 42 | -------------------------------------------------------------------------------- /docs/tools/pdbjinja.rst: -------------------------------------------------------------------------------- 1 | pdbjinja - process an FSDB file with a jinja template 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbjinja`` takes all the data in an fsdb file, and passes it to a 5 | jinja2 template with each row being stored in a ``rows`` variable. 6 | 7 | *Note:* all rows will be loaded into memory at once. 8 | 9 | *See also:* ``pdbformat`` 10 | 11 | Example input (*myfile.fsdb*): 12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | 14 | :: 15 | 16 | #fsdb -F t col1:l two:a andthree:d 17 | 1 key1 42.0 18 | 2 key2 123.0 19 | 3 key1 90.2 20 | 21 | Example jinja template (*myfile.j2*) 22 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 23 | 24 | :: 25 | 26 | {% for row in rows -%} 27 | Key {{row["two"]}}'s favorite number is {{row["andthree"]}} 28 | {% endfor %} 29 | 30 | Example command usage 31 | ^^^^^^^^^^^^^^^^^^^^^ 32 | 33 | :: 34 | 35 | $ pdbjinja -j myfile.j2 myfile.fsdb 36 | 37 | Example output 38 | ^^^^^^^^^^^^^^ 39 | 40 | :: 41 | 42 | Key key1's favorite number is 42.0 43 | Key key2's favorite number is 123.0 44 | Key key1's favorite number is 90.2 45 | 46 | 47 | Command Line Arguments 48 | ^^^^^^^^^^^^^^^^^^^^^^ 49 | 50 | .. sphinx_argparse_cli:: 51 | :module: pyfsdb.tools.pdbjinja 52 | :func: parse_args 53 | :hook: 54 | :prog: pdbjinja 55 | -------------------------------------------------------------------------------- /docs/tools/pdbkeyedsort.md: -------------------------------------------------------------------------------- 1 | ### pdbkeyedsort - find all unique values of a key column 2 | 3 | Sort "mostly sorted" large FSDB files using a double pass dbkeyedsort 4 | reads a file twice, sorting the data by the column specified via the 5 | -c/--column option. During the first pass, it counts all the rows per 6 | key to manage which lines it needs to memorize as it is making its 7 | second pass. During the second pass, it only stores in memory the 8 | lines that are out of order. This can greatly optimize the amount of 9 | memory stored when the data is already in a fairly sorted state (which 10 | is common for the output of map/reduce operations such as 11 | hadoop). This comes at the expense of needing to read the entire 12 | dataset twice, which means its impossible to use `stdin` to pass in 13 | data; instead a filename must be specified instead. The output, 14 | though, may be `stdout`. 15 | 16 | #### Example input (*myfile.fsdb*): 17 | 18 | ``` 19 | #fsdb -F s col1:l two:a andthree:d 20 | 1 key1 42.0 21 | 2 key2 123.0 22 | 3 key1 90.2 23 | ``` 24 | 25 | #### Example command usage 26 | 27 | We add the -v flag to have it give a count of the number of lines that 28 | were cached. In general, you want this fraction to be small to 29 | conserve memory. In the example below, `pdbkeyedsort` only needed to 30 | memorize one row (the second) of the above file. 31 | 32 | ``` 33 | $ pdbkeyedsort -c andthree -v myfile.fsdb 34 | ``` 35 | 36 | #### Example output 37 | 38 | ``` 39 | #fsdb -F t col1:l two andthree:d 40 | 1 key1 42.0 41 | 3 key1 90.2 42 | 2 key2 123.0 43 | cached 1/3 lines 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/tools/pdbkeyedsort.rst: -------------------------------------------------------------------------------- 1 | pdbkeyedsort - find all unique values of a key column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | Sort “mostly sorted” large FSDB files using a double pass dbkeyedsort 5 | reads a file twice, sorting the data by the column specified via the 6 | -c/–column option. During the first pass, it counts all the rows per key 7 | to manage which lines it needs to memorize as it is making its second 8 | pass. During the second pass, it only stores in memory the lines that 9 | are out of order. This can greatly optimize the amount of memory stored 10 | when the data is already in a fairly sorted state (which is common for 11 | the output of map/reduce operations such as hadoop). This comes at the 12 | expense of needing to read the entire dataset twice, which means its 13 | impossible to use ``stdin`` to pass in data; instead a filename must be 14 | specified instead. The output, though, may be ``stdout``. 15 | 16 | Example input (*myfile.fsdb*): 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | :: 20 | 21 | #fsdb -F s col1:l two:a andthree:d 22 | 1 key1 42.0 23 | 2 key2 123.0 24 | 3 key1 90.2 25 | 26 | Example command usage 27 | ^^^^^^^^^^^^^^^^^^^^^ 28 | 29 | We add the -v flag to have it give a count of the number of lines that 30 | were cached. In general, you want this fraction to be small to conserve 31 | memory. In the example below, ``pdbkeyedsort`` only needed to memorize 32 | one row (the second) of the above file. 33 | 34 | :: 35 | 36 | $ pdbkeyedsort -c andthree -v myfile.fsdb 37 | 38 | Example output 39 | ^^^^^^^^^^^^^^ 40 | 41 | :: 42 | 43 | #fsdb -F t col1:l two andthree:d 44 | 1 key1 42.0 45 | 3 key1 90.2 46 | 2 key2 123.0 47 | cached 1/3 lines 48 | 49 | 50 | Command Line Arguments 51 | ^^^^^^^^^^^^^^^^^^^^^^ 52 | 53 | .. sphinx_argparse_cli:: 54 | :module: pyfsdb.tools.pdbkeyedsort 55 | :func: parse_args 56 | :hook: 57 | :prog: pdbkeyedsort 58 | -------------------------------------------------------------------------------- /docs/tools/pdbnormalize.md: -------------------------------------------------------------------------------- 1 | ### pdbnormalize - normalize a bunch of columns 2 | 3 | `pdbnormalize` takes an input file and takes each column value from a 4 | number of columns and divides it by the maximum value seen in all the 5 | columns. 6 | 7 | *Note: this is the maximum value of all columns provided; if 8 | you want per-column normalization, run the tool multiple times 9 | instead.* 10 | 11 | *Note: this requires reading the entire file into memory.* 12 | 13 | #### Example input (*myfile.fsdb*): 14 | 15 | ``` 16 | #fsdb -F s col1:l two:a andthree:d 17 | 1 key1 42.0 18 | 2 key2 123.0 19 | 3 key1 90.2 20 | ``` 21 | 22 | #### Example command usage 23 | 24 | ``` 25 | $ pdbnormalize -k andthree -- myfile.fsdb 26 | ``` 27 | 28 | #### Example output 29 | 30 | ``` 31 | pdbnormalize -k andthree -- myfile.fsdb 32 | #fsdb -F t col1:l two andthree:d 33 | 1 key1 0.34146341463414637 34 | 2 key2 1.0 35 | 3 key1 0.7333333333333334 36 | ``` 37 | 38 | #### Example normalizing 2 columns: 39 | 40 | If you normalize multiple columns, be aware that the divisor is the 41 | maximum of all the values from all the columns. Thus by passing both 42 | columns `col1` and `andthree`, you'll note in the output below that 43 | even col1 is divided by the maximum value from both columns in the 44 | input (*123.0*). 45 | 46 | ``` 47 | $ pdbnormalize -k col1 andthree -- myfile.fsdb 48 | ``` 49 | 50 | #### Example output 51 | 52 | ``` 53 | 0.008130081300813009 key1 0.34146341463414637 54 | 0.016260162601626018 key2 1.0 55 | 0.024390243902439025 key1 0.7333333333333334 56 | ``` 57 | -------------------------------------------------------------------------------- /docs/tools/pdbnormalize.rst: -------------------------------------------------------------------------------- 1 | pdbnormalize - normalize a bunch of columns 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbnormalize`` takes an input file and takes each column value from a 5 | number of columns and divides it by the maximum value seen in all the 6 | columns. 7 | 8 | *Note: this is the maximum value of all columns provided; if you want 9 | per-column normalization, run the tool multiple times instead.* 10 | 11 | *Note: this requires reading the entire file into memory.* 12 | 13 | Example input (*myfile.fsdb*): 14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 15 | 16 | :: 17 | 18 | #fsdb -F s col1:l two:a andthree:d 19 | 1 key1 42.0 20 | 2 key2 123.0 21 | 3 key1 90.2 22 | 23 | Example command usage 24 | ^^^^^^^^^^^^^^^^^^^^^ 25 | 26 | :: 27 | 28 | $ pdbnormalize -k andthree -- myfile.fsdb 29 | 30 | Example output 31 | ^^^^^^^^^^^^^^ 32 | 33 | :: 34 | 35 | pdbnormalize -k andthree -- myfile.fsdb 36 | #fsdb -F t col1:l two andthree:d 37 | 1 key1 0.34146341463414637 38 | 2 key2 1.0 39 | 3 key1 0.7333333333333334 40 | 41 | Example normalizing 2 columns: 42 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | If you normalize multiple columns, be aware that the divisor is the 45 | maximum of all the values from all the columns. Thus by passing both 46 | columns ``col1`` and ``andthree``, you’ll note in the output below that 47 | even col1 is divided by the maximum value from both columns in the input 48 | (*123.0*). 49 | 50 | :: 51 | 52 | $ pdbnormalize -k col1 andthree -- myfile.fsdb 53 | 54 | .. _example-output-1: 55 | 56 | Example output 57 | ^^^^^^^^^^^^^^ 58 | 59 | :: 60 | 61 | 0.008130081300813009 key1 0.34146341463414637 62 | 0.016260162601626018 key2 1.0 63 | 0.024390243902439025 key1 0.7333333333333334 64 | 65 | 66 | Command Line Arguments 67 | ^^^^^^^^^^^^^^^^^^^^^^ 68 | 69 | .. sphinx_argparse_cli:: 70 | :module: pyfsdb.tools.pdbnormalize 71 | :func: parse_args 72 | :hook: 73 | :prog: pdbnormalize 74 | -------------------------------------------------------------------------------- /docs/tools/pdbreescape.md: -------------------------------------------------------------------------------- 1 | ### pdbreescape - regexp escape strings from a column 2 | 3 | `pdbreescape` passes the requested columns (-k) through python's 4 | regex escaping function. 5 | 6 | **Note: because -k can take multiple columns, input files likely need 7 | to appear after the "--" argument-stop-parsing string.* 8 | 9 | #### Example input (*myfile.fsdb*): 10 | 11 | ``` 12 | #fsdb -F s col1:l two:a andthree:d 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | ``` 17 | 18 | #### Example command usage 19 | 20 | Using our standard input file for this documentation set, we first 21 | pass the file through `pdbaddtypes` to change the type from a float 22 | to a string, and then escape the period in the (now string) floating 23 | point number: 24 | 25 | ``` 26 | $ pdbaddtypes -t andthree=a -- myfile.fsdb | 27 | pdbreescape -k andthree 28 | ``` 29 | 30 | #### Example output 31 | 32 | ``` 33 | #fsdb -F t col1:l two andthree 34 | 1 key1 42\.0 35 | 2 key2 123\.0 36 | 3 key1 90\.2 37 | # | /home/hardaker/.local/bin/pdbreescape -k andthree 38 | ``` 39 | 40 | #### A more complex file (*mystrings.fsdb*) 41 | 42 | This shows a greater number of regex escaping types. Note that the 43 | spaces are also escaped. 44 | 45 | ``` 46 | #fsdb -F t type value 47 | wild-cards * and . and + and ? 48 | parens () and [] 49 | slashes / and \ 50 | ``` 51 | 52 | 53 | #### Example command usage 54 | 55 | ``` 56 | $ pdbreescape -k value -- mystrings.fsdb 57 | ``` 58 | 59 | #### Example output 60 | 61 | ``` 62 | #fsdb -F t type value 63 | wild-cards \*\ and\ \.\ and\ \+\ and\ \? 64 | parens \(\)\ and\ \[\] 65 | slashes /\ and\ \\ 66 | 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/tools/pdbreescape.rst: -------------------------------------------------------------------------------- 1 | pdbreescape - regexp escape strings from a column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbreescape`` passes the requested columns (-k) through python’s regex 5 | escaping function. 6 | 7 | \**Note: because -k can take multiple columns, input files likely need 8 | to appear after the “–” argument-stop-parsing string.\* 9 | 10 | Example input (*myfile.fsdb*): 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | :: 14 | 15 | #fsdb -F s col1:l two:a andthree:d 16 | 1 key1 42.0 17 | 2 key2 123.0 18 | 3 key1 90.2 19 | 20 | Example command usage 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | Using our standard input file for this documentation set, we first pass 24 | the file through ``pdbaddtypes`` to change the type from a float to a 25 | string, and then escape the period in the (now string) floating point 26 | number: 27 | 28 | :: 29 | 30 | $ pdbaddtypes -t andthree=a -- myfile.fsdb | 31 | pdbreescape -k andthree 32 | 33 | Example output 34 | ^^^^^^^^^^^^^^ 35 | 36 | :: 37 | 38 | #fsdb -F t col1:l two andthree 39 | 1 key1 42\.0 40 | 2 key2 123\.0 41 | 3 key1 90\.2 42 | # | /home/hardaker/.local/bin/pdbreescape -k andthree 43 | 44 | A more complex file (*mystrings.fsdb*) 45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 46 | 47 | This shows a greater number of regex escaping types. Note that the 48 | spaces are also escaped. 49 | 50 | :: 51 | 52 | #fsdb -F t type value 53 | wild-cards * and . and + and ? 54 | parens () and [] 55 | slashes / and \ 56 | 57 | .. _example-command-usage-1: 58 | 59 | Example command usage 60 | ^^^^^^^^^^^^^^^^^^^^^ 61 | 62 | :: 63 | 64 | $ pdbreescape -k value -- mystrings.fsdb 65 | 66 | .. _example-output-1: 67 | 68 | Example output 69 | ^^^^^^^^^^^^^^ 70 | 71 | :: 72 | 73 | #fsdb -F t type value 74 | wild-cards \*\ and\ \.\ and\ \+\ and\ \? 75 | parens \(\)\ and\ \[\] 76 | slashes /\ and\ \\ 77 | 78 | 79 | Command Line Arguments 80 | ^^^^^^^^^^^^^^^^^^^^^^ 81 | 82 | .. sphinx_argparse_cli:: 83 | :module: pyfsdb.tools.pdbreescape 84 | :func: parse_args 85 | :hook: 86 | :prog: pdbreescape 87 | -------------------------------------------------------------------------------- /docs/tools/pdbrow.md: -------------------------------------------------------------------------------- 1 | ### pdbrow - select a subset of rows based on a filter 2 | 3 | `pdbrow` can apply an arbitrary logical python expression that selects 4 | matching rows for passing to the output. 5 | 6 | #### Example input (*myfile.fsdb*): 7 | 8 | ``` 9 | #fsdb -F s col1:l two:a andthree:d 10 | 1 key1 42.0 11 | 2 key2 123.0 12 | 3 key1 90.2 13 | ``` 14 | 15 | #### Example command usage 16 | 17 | ``` 18 | $ pdbrow 'col1 == "key1"' myfile.fsdb 19 | ``` 20 | 21 | #### Example output 22 | 23 | ``` 24 | #fsdb -F t col1:l two andthree:d 25 | 1 key1 42.0 26 | 3 key1 90.2 27 | # | pdbrow 'two == "key1"' myfile.fsdb 28 | ``` 29 | 30 | #### Example command usage with initialization code 31 | 32 | 33 | ``` 34 | $ pdbrow -i "import re" 're.match("key1", two)' myfile.fsdb 35 | ``` 36 | 37 | #### Example output 38 | 39 | ``` 40 | #fsdb -F t col1:l two andthree:d 41 | 1 key1 42.0 42 | 3 key1 90.2 43 | # | pdbrow -i 'import re' 're.match("key1", two)' myfile.fsdb 44 | ``` 45 | 46 | #### Example command usage with namedtuple based rows 47 | 48 | ``` 49 | $ pdbrow -n row 'row.two == "key1"' myfile.fsdb 50 | ``` 51 | 52 | #### Example output 53 | 54 | ``` 55 | #fsdb -F t col1:l two andthree:d 56 | 1 key1 42.0 57 | 3 key1 90.2 58 | # | pdbrow -n row row.two == "key1" 59 | ``` 60 | -------------------------------------------------------------------------------- /docs/tools/pdbrow.rst: -------------------------------------------------------------------------------- 1 | pdbrow - select a subset of rows based on a filter 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbrow`` can apply an arbitrary logical python expression that selects 5 | matching rows for passing to the output. 6 | 7 | Example input (*myfile.fsdb*): 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | :: 11 | 12 | #fsdb -F s col1:l two:a andthree:d 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbrow 'col1 == "key1"' myfile.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t col1:l two andthree:d 30 | 1 key1 42.0 31 | 3 key1 90.2 32 | # | pdbrow 'two == "key1"' myfile.fsdb 33 | 34 | Example command usage with initialization code 35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 36 | 37 | :: 38 | 39 | $ pdbrow -i "import re" 're.match("key1", two)' myfile.fsdb 40 | 41 | .. _example-output-1: 42 | 43 | Example output 44 | ^^^^^^^^^^^^^^ 45 | 46 | :: 47 | 48 | #fsdb -F t col1:l two andthree:d 49 | 1 key1 42.0 50 | 3 key1 90.2 51 | # | pdbrow -i 'import re' 're.match("key1", two)' myfile.fsdb 52 | 53 | Example command usage with namedtuple based rows 54 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 55 | 56 | :: 57 | 58 | $ pdbrow -n row 'row.two == "key1"' myfile.fsdb 59 | 60 | .. _example-output-2: 61 | 62 | Example output 63 | ^^^^^^^^^^^^^^ 64 | 65 | :: 66 | 67 | #fsdb -F t col1:l two andthree:d 68 | 1 key1 42.0 69 | 3 key1 90.2 70 | # | pdbrow -n row row.two == "key1" 71 | 72 | 73 | Command Line Arguments 74 | ^^^^^^^^^^^^^^^^^^^^^^ 75 | 76 | .. sphinx_argparse_cli:: 77 | :module: pyfsdb.tools.pdbrow 78 | :func: parse_args 79 | :hook: 80 | :prog: pdbrow 81 | -------------------------------------------------------------------------------- /docs/tools/pdbroweval.rst: -------------------------------------------------------------------------------- 1 | pdbroweval - alter rows based on python expressions or code 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbroweval`` can apply an arbitrary python expression or code to 5 | modify the contents of the file before passing it to the output stream. 6 | 7 | Example input (*myfile.fsdb*): 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | :: 11 | 12 | #fsdb -F s col1:l two:s andthree:d 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | 17 | Example command usage 18 | ^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | :: 21 | 22 | $ pdbroweval 'andthree *= 2' myfile.fsdb 23 | 24 | Example output 25 | ^^^^^^^^^^^^^^ 26 | 27 | :: 28 | 29 | #fsdb -F t col1:l two andthree:d 30 | 1 key1 84.0 31 | 2 key2 246.0 32 | 3 key1 180.4 33 | # | pdbroweval 'andthree *= 2' myfile.fsdb 34 | 35 | Example command usage with initialization code 36 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 37 | 38 | :: 39 | 40 | $ pdbroweval -i "import re" 're.sub("key", "lock", two)' myfile.fsdb 41 | 42 | .. _example-output-1: 43 | 44 | Example output 45 | ^^^^^^^^^^^^^^ 46 | 47 | :: 48 | 49 | #fsdb -F t col1:l two andthree:d 50 | 1 lock1 42.0 51 | 2 lock2 123.0 52 | 3 lock1 90.2 53 | # | pdbroweval -i import re two = re.sub("key", "lock", two) myfile.fsdb 54 | 55 | Command Line Usage 56 | ^^^^^^^^^^^^^^^^^^ 57 | 58 | .. argparse:: 59 | :ref: pyfsdb.tools.pdbroweval.get_parse_args 60 | :prog: pdbroweval 61 | -------------------------------------------------------------------------------- /docs/tools/pdbsplitter.md: -------------------------------------------------------------------------------- 1 | ### pdbsplitter - split an FSDB file into multiple files 2 | 3 | `pdbsplitter` splits a single FSDB file into a series of output 4 | files. This could be achieved by running `dbcol` multiple times, but 5 | `pdbsplitter` should be faster when processing many columns. 6 | 7 | #### Example input (*myfile.fsdb*): 8 | 9 | ``` 10 | #fsdb -F s col1:l two:a andthree:d 11 | 1 key1 42.0 12 | 2 key2 123.0 13 | 3 key1 90.2 14 | ``` 15 | 16 | #### Example command usage 17 | 18 | ``` 19 | $ pdbsplitter -k col1 -c two andthree -o myfile-split-%s.fsdb myfile.fsdb 20 | ``` 21 | 22 | #### Example output 23 | 24 | The above command produces two different files, one per each column. 25 | 26 | - *myfile-split-two.fsdb*: 27 | 28 | ``` 29 | #fsdb -F t col1 two 30 | 1 key1 31 | 2 key2 32 | 3 key1 33 | ``` 34 | 35 | - *myfile-split-andthree.fsdb*: 36 | 37 | ``` 38 | #fsdb -F t col1 andthree 39 | 1 42.0 40 | 2 123.0 41 | 3 90.2 42 | ``` 43 | -------------------------------------------------------------------------------- /docs/tools/pdbsplitter.rst: -------------------------------------------------------------------------------- 1 | pdbsplitter - split an FSDB file into multiple files 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbsplitter`` splits a single FSDB file into a series of output files. 5 | This could be achieved by running ``dbcol`` multiple times, but 6 | ``pdbsplitter`` should be faster when processing many columns. 7 | 8 | Example input (*myfile.fsdb*): 9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 10 | 11 | :: 12 | 13 | #fsdb -F s col1:l two:a andthree:d 14 | 1 key1 42.0 15 | 2 key2 123.0 16 | 3 key1 90.2 17 | 18 | Example command usage 19 | ^^^^^^^^^^^^^^^^^^^^^ 20 | 21 | :: 22 | 23 | $ pdbsplitter -k col1 -c two andthree -o myfile-split-%s.fsdb myfile.fsdb 24 | 25 | Example output 26 | ^^^^^^^^^^^^^^ 27 | 28 | The above command produces two different files, one per each column. 29 | 30 | - *myfile-split-two.fsdb*: 31 | 32 | :: 33 | 34 | #fsdb -F t col1 two 35 | 1 key1 36 | 2 key2 37 | 3 key1 38 | 39 | - *myfile-split-andthree.fsdb*: 40 | 41 | :: 42 | 43 | #fsdb -F t col1 andthree 44 | 1 42.0 45 | 2 123.0 46 | 3 90.2 47 | 48 | 49 | Command Line Arguments 50 | ^^^^^^^^^^^^^^^^^^^^^^ 51 | 52 | .. sphinx_argparse_cli:: 53 | :module: pyfsdb.tools.pdbsplitter 54 | :func: parse_args 55 | :hook: 56 | :prog: pdbsplitter 57 | -------------------------------------------------------------------------------- /docs/tools/pdbsum.md: -------------------------------------------------------------------------------- 1 | ### pdbsum - sum columns together 2 | 3 | `pdbsum` adds column data together based on keyed input. This is 4 | similar to `dbcolstats` and `dbmultistats`, but only performs addition 5 | (or subtraction) and can be faster on very large datasets where the 6 | rest of the analysis provided by the other tools are not needed. 7 | `dbsum` also supports keyed subtraction as well, as seen below. 8 | 9 | #### Example input (*myfile.fsdb*): 10 | 11 | ``` 12 | #fsdb -F s col1:l two:a andthree:d 13 | 1 key1 42.0 14 | 2 key2 123.0 15 | 3 key1 90.2 16 | ``` 17 | 18 | #### Example command usage 19 | 20 | ``` 21 | $ pdbsum -k two -c col1 andthree -- myfile.fsdb 22 | ``` 23 | 24 | #### Example output 25 | 26 | ``` 27 | #fsdb -F t two col1:d andthree:d 28 | key1 4.0 132.2 29 | key2 2.0 123.0 30 | ``` 31 | 32 | #### Example Subtraction file 33 | 34 | If we have another file (*mysub.fsdb*), we can subtract results: 35 | 36 | ``` 37 | #fsdb -F s two:a andthree:d 38 | key1 10 39 | key2 10 40 | key1 10 41 | ``` 42 | 43 | #### Example subtraction command: 44 | 45 | ``` 46 | pdbsum -k two -c col1 andthree -- myfile.fsdb mysub.fsdb 47 | ``` 48 | 49 | #### Example output of subtraction: 50 | 51 | Note how the two 10's in the key1 subtraction are added together to 20 52 | before being subtracted from the sum of key1 (123.2) in the first 53 | file. 54 | 55 | *Note:* Also observe the typical floating point imprecision rounding 56 | problems that python is well known for displaying. 57 | 58 | ``` 59 | #fsdb -F t two andthree:d 60 | key1 112.19999999999999 61 | key2 113.0 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/tools/pdbsum.rst: -------------------------------------------------------------------------------- 1 | pdbsum - sum columns together 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbsum`` adds column data together based on keyed input. This is 5 | similar to ``dbcolstats`` and ``dbmultistats``, but only performs 6 | addition (or subtraction) and can be faster on very large datasets where 7 | the rest of the analysis provided by the other tools are not needed. 8 | ``dbsum`` also supports keyed subtraction as well, as seen below. 9 | 10 | Example input (*myfile.fsdb*): 11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 12 | 13 | :: 14 | 15 | #fsdb -F s col1:l two:a andthree:d 16 | 1 key1 42.0 17 | 2 key2 123.0 18 | 3 key1 90.2 19 | 20 | Example command usage 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | :: 24 | 25 | $ pdbsum -k two -c col1 andthree -- myfile.fsdb 26 | 27 | Example output 28 | ^^^^^^^^^^^^^^ 29 | 30 | :: 31 | 32 | #fsdb -F t two col1:d andthree:d 33 | key1 4.0 132.2 34 | key2 2.0 123.0 35 | 36 | Example Subtraction file 37 | ^^^^^^^^^^^^^^^^^^^^^^^^ 38 | 39 | If we have another file (*mysub.fsdb*), we can subtract results: 40 | 41 | :: 42 | 43 | #fsdb -F s two:a andthree:d 44 | key1 10 45 | key2 10 46 | key1 10 47 | 48 | Example subtraction command: 49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 50 | 51 | :: 52 | 53 | pdbsum -k two -c col1 andthree -- myfile.fsdb mysub.fsdb 54 | 55 | Example output of subtraction: 56 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 57 | 58 | Note how the two 10’s in the key1 subtraction are added together to 20 59 | before being subtracted from the sum of key1 (123.2) in the first file. 60 | 61 | *Note:* Also observe the typical floating point imprecision rounding 62 | problems that python is well known for displaying. 63 | 64 | :: 65 | 66 | #fsdb -F t two andthree:d 67 | key1 112.19999999999999 68 | key2 113.0 69 | 70 | 71 | Command Line Arguments 72 | ^^^^^^^^^^^^^^^^^^^^^^ 73 | 74 | .. sphinx_argparse_cli:: 75 | :module: pyfsdb.tools.pdbsum 76 | :func: parse_args 77 | :hook: 78 | :prog: pdbsum 79 | -------------------------------------------------------------------------------- /docs/tools/pdbtopn.md: -------------------------------------------------------------------------------- 1 | ### pdbtopn - selects the top N rows based on values from a column 2 | 3 | `pdbtopn` selects N rows from an FSDB file by selecting the top values 4 | from a particular column. For smaller datasets, using a combination 5 | of `dbsort` and `dbuniq` accomplish the same functional result. 6 | However, `pdbtopn` requires far less memory and CPU computation when N 7 | is small and the dataset is large. Using `dbsort` and `dbuniq` may be 8 | a better solution with very large values of N. 9 | 10 | #### Example input (*myfile.fsdb*): 11 | 12 | ``` 13 | #fsdb -F s col1:l two:a andthree:d 14 | 1 key1 42.0 15 | 2 key2 123.0 16 | 3 key1 90.2 17 | ``` 18 | 19 | #### Example command usage 20 | 21 | ``` 22 | $ pdbtopn -k two -n 1 -v andthree myfile.fsdb 23 | ``` 24 | 25 | #### Example output 26 | 27 | ``` 28 | #fsdb -F t col1:l two andthree:d 29 | 2 key2 123.0 30 | ``` 31 | 32 | #### Example selecting the top values of multiple keys 33 | 34 | ``` 35 | $ pdbtopn -k two -n 20 -v andthree myfile.fsdb 36 | ``` 37 | #### Example output 38 | 39 | 40 | ``` 41 | #fsdb -F t col1:l two andthree:d 42 | 3 key1 90.2 43 | 2 key2 123.0 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/tools/pdbtopn.rst: -------------------------------------------------------------------------------- 1 | pdbtopn - selects the top N rows based on values from a column 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbtopn`` selects N rows from an FSDB file by selecting the top values 5 | from a particular column. For smaller datasets, using a combination of 6 | ``dbsort`` and ``dbuniq`` accomplish the same functional result. 7 | However, ``pdbtopn`` requires far less memory and CPU computation when N 8 | is small and the dataset is large. Using ``dbsort`` and ``dbuniq`` may 9 | be a better solution with very large values of N. 10 | 11 | Example input (*myfile.fsdb*): 12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | 14 | :: 15 | 16 | #fsdb -F s col1:l two:a andthree:d 17 | 1 key1 42.0 18 | 2 key2 123.0 19 | 3 key1 90.2 20 | 21 | Example command usage 22 | ^^^^^^^^^^^^^^^^^^^^^ 23 | 24 | :: 25 | 26 | $ pdbtopn -k two -n 1 -v andthree myfile.fsdb 27 | 28 | Example output 29 | ^^^^^^^^^^^^^^ 30 | 31 | :: 32 | 33 | #fsdb -F t col1:l two andthree:d 34 | 2 key2 123.0 35 | 36 | Example selecting the top values of multiple keys 37 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 38 | 39 | :: 40 | 41 | $ pdbtopn -k two -n 20 -v andthree myfile.fsdb 42 | 43 | .. _example-output-1: 44 | 45 | Example output 46 | ^^^^^^^^^^^^^^ 47 | 48 | :: 49 | 50 | #fsdb -F t col1:l two andthree:d 51 | 3 key1 90.2 52 | 2 key2 123.0 53 | 54 | 55 | Command Line Arguments 56 | ^^^^^^^^^^^^^^^^^^^^^^ 57 | 58 | .. sphinx_argparse_cli:: 59 | :module: pyfsdb.tools.pdbtopn 60 | :func: parse_args 61 | :hook: 62 | :prog: pdbtopn 63 | -------------------------------------------------------------------------------- /docs/tools/pdbzerofill.md: -------------------------------------------------------------------------------- 1 | ### pdbzerofill - fills a columns with zeros (or other value) when blank 2 | 3 | `pdbzerofill` fills a row that is missing in a series of rows with a 4 | numerical increasing (frequently a timestamp) index This is a sister 5 | program to `pdbensure` which removes rows with missing data instead of 6 | creating them. 7 | 8 | #### Example input (*myblanks.fsdb*): 9 | 10 | ``` 11 | #fsdb -F t col1:l two:a andthree:d 12 | 2 key1 42.0 13 | 6 key2 14 | 10 90.2 15 | ``` 16 | 17 | #### Example command usage 18 | 19 | ``` 20 | $ pdbzerofill -c two andthree -v xxx -b 2 -t col1 21 | ``` 22 | 23 | #### Example output 24 | 25 | ``` 26 | #fsdb -F t col1:l two andthree:d 27 | 2 key1 42.0 28 | 4 xxx xxx 29 | 6 key2 30 | 8 xxx xxx 31 | 10 90.2 32 | ``` 33 | 34 | -------------------------------------------------------------------------------- /docs/tools/pdbzerofill.rst: -------------------------------------------------------------------------------- 1 | pdbzerofill - fills a columns with zeros (or other value) when blank 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | 4 | ``pdbzerofill`` fills a row that is missing in a series of rows with a 5 | numerical increasing (frequently a timestamp) index This is a sister 6 | program to ``pdbensure`` which removes rows with missing data instead of 7 | creating them. 8 | 9 | Example input (*myblanks.fsdb*): 10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 11 | 12 | :: 13 | 14 | #fsdb -F t col1:l two:a andthree:d 15 | 2 key1 42.0 16 | 6 key2 17 | 10 90.2 18 | 19 | Example command usage 20 | ^^^^^^^^^^^^^^^^^^^^^ 21 | 22 | :: 23 | 24 | $ pdbzerofill -c two andthree -v xxx -b 2 -t col1 25 | 26 | Example output 27 | ^^^^^^^^^^^^^^ 28 | 29 | :: 30 | 31 | #fsdb -F t col1:l two andthree:d 32 | 2 key1 42.0 33 | 4 xxx xxx 34 | 6 key2 35 | 8 xxx xxx 36 | 10 90.2 37 | 38 | 39 | Command Line Arguments 40 | ^^^^^^^^^^^^^^^^^^^^^^ 41 | 42 | .. sphinx_argparse_cli:: 43 | :module: pyfsdb.tools.pdbzerofill 44 | :func: parse_args 45 | :hook: 46 | :prog: pdbzerofill 47 | -------------------------------------------------------------------------------- /pyfsdb/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["fsdb"] 2 | from . import fsdb 3 | 4 | __VERSION__ = "2.4.3" 5 | 6 | __doc__ = fsdb.__doc__ 7 | RETURN_AS_DICTIONARY = fsdb.RETURN_AS_DICTIONARY 8 | RETURN_AS_ARRAY = fsdb.RETURN_AS_ARRAY 9 | Fsdb = fsdb.Fsdb 10 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/obsolete/__init__.py -------------------------------------------------------------------------------- /pyfsdb/obsolete/db2tex.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdb2tex 3 | 4 | 5 | def main(): 6 | sys.stderr.write("db2tex is obsolete; please use pdb2tex instead\n") 7 | pyfsdb.tools.pdb2tex.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbaugment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbaugment 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbaugment is obsolete; please use pdbaugment instead\n") 7 | pyfsdb.tools.pdbaugment.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbcoluniq.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbcoluniq 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbcoluniq is obsolete; please use pdbcoluniq instead\n") 7 | pyfsdb.tools.pdbcoluniq.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbdatetoepoch.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbdatetoepoch 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbdatetoepoch is obsolete; please use pdbdatetoepoch instead\n") 7 | pyfsdb.tools.pdbdatetoepoch.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbensure.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbensure 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbensure is obsolete; please use pdbensure instead\n") 7 | pyfsdb.tools.pdbensure.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbformat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbformat 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbformat is obsolete; please use pdbformat instead\n") 7 | pyfsdb.tools.pdbformat.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbfullpivot.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbfullpivot 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbfullpivot is obsolete; please use pdbfullpivot instead\n") 7 | pyfsdb.tools.pdbfullpivot.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbheatmap.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbheatmap 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbheatmap is obsolete; please use pdbheatmap instead\n") 7 | pyfsdb.tools.pdbheatmap.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbkeyedsort.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbkeyedsort 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbkeyedsort is obsolete; please use pdbkeyedsort instead\n") 7 | pyfsdb.tools.pdbkeyedsort.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbnormalize.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbnormalize 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbnormalize is obsolete; please use pdbnormalize instead\n") 7 | pyfsdb.tools.pdbnormalize.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbreescape.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbreescape 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbreescape is obsolete; please use pdbreescape instead\n") 7 | pyfsdb.tools.pdbreescape.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbreversepivot.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbreversepivot 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbreversepivot is obsolete; please use pdbreversepivot instead\n") 7 | pyfsdb.tools.pdbreversepivot.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbsplitter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbsplitter 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbsplitter is obsolete; please use pdbsplitter instead\n") 7 | pyfsdb.tools.pdbsplitter.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbsum.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbsum 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbsum is obsolete; please use pdbsum instead\n") 7 | pyfsdb.tools.pdbsum.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbtopn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbtopn 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbtopn is obsolete; please use pdbtopn instead\n") 7 | pyfsdb.tools.pdbtopn.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/obsolete/dbzerofill.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pyfsdb.tools.pdbzerofill 3 | 4 | 5 | def main(): 6 | sys.stderr.write("dbzerofill is obsolete; please use pdbzerofill instead\n") 7 | pyfsdb.tools.pdbzerofill.main() 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /pyfsdb/tests/noheader.fsdb: -------------------------------------------------------------------------------- 1 | rowone info data 2 | rowtwo other stuff 3 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_add_types.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | from pyfsdb.tools.pdbaddtypes import add_types 4 | from unittest.mock import Mock 5 | from test_fsdb_class import truncate_comments 6 | 7 | 8 | class test_add_types(unittest.TestCase): 9 | def test_add_single_type(self): 10 | indata = StringIO("#fsdb -F s a b c\na 1 2.3") 11 | outdata = StringIO() 12 | outdata.close = Mock() 13 | add_types(indata, outdata, ["b=l"]) 14 | self.assertEqual( 15 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c\na 1 2.3" 16 | ) 17 | 18 | def test_add_multiple_types(self): 19 | indata = StringIO("#fsdb -F s a b c\na 1 2.3") 20 | outdata = StringIO() 21 | outdata.close = Mock() 22 | add_types(indata, outdata, ["b=l", "c=d"]) 23 | self.assertEqual( 24 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3" 25 | ) 26 | 27 | def test_merge_types(self): 28 | indata = StringIO("#fsdb -F s a b:l c\na 1 2.3") 29 | outdata = StringIO() 30 | outdata.close = Mock() 31 | add_types(indata, outdata, ["c=d"]) 32 | self.assertEqual( 33 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3" 34 | ) 35 | 36 | def test_override_types(self): 37 | indata = StringIO("#fsdb -F s a b:l c:d\na 1 2.3") 38 | outdata = StringIO() 39 | outdata.close = Mock() 40 | add_types(indata, outdata, ["b=d"]) 41 | self.assertEqual( 42 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:d c:d\na 1 2.3" 43 | ) 44 | 45 | def test_guess_converters(self): 46 | import pyfsdb 47 | 48 | indata = StringIO("#fsdb -F s a b c\na 1 2.3") 49 | f = pyfsdb.Fsdb(file_handle=indata, return_type=pyfsdb.RETURN_AS_DICTIONARY) 50 | row = next(f) 51 | self.assertEqual(row, {"a": "a", "b": "1", "c": "2.3"}) 52 | 53 | converters = f.guess_converters(row) 54 | self.assertEqual(converters, {"b": int, "c": float}) 55 | 56 | def test_auto_convert(self): 57 | indata = StringIO("#fsdb -F s a b c\na 1 2.3") 58 | outdata = StringIO() 59 | outdata.close = Mock() 60 | add_types(indata, outdata, auto_convert=True) 61 | self.assertEqual( 62 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3" 63 | ) 64 | 65 | def test_auto_convert_overrides(self): 66 | indata = StringIO("#fsdb -F s a b c\na 1 2.3") 67 | outdata = StringIO() 68 | outdata.close = Mock() 69 | add_types(indata, outdata, types=["b=d"], auto_convert=True) 70 | self.assertEqual( 71 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:d c:d\na 1 2.3" 72 | ) 73 | 74 | 75 | if __name__ == "__main__": 76 | import unittest 77 | 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_column_renames.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | import pyfsdb 4 | 5 | class FsdbTestColumnRename(unittest.TestCase): 6 | def test_column_renames(self): 7 | input_data = "#fsdb -F s one two\n1 2\n" 8 | fh = StringIO(input_data) 9 | fs = pyfsdb.Fsdb(file_handle=fh, 10 | return_type=pyfsdb.RETURN_AS_DICTIONARY) 11 | 12 | fs.column_names = ["_" + x for x in fs.column_names] 13 | 14 | data = next(fs) 15 | 16 | expected = { "_one": '1', "_two": '2' } 17 | 18 | self.assertEqual(data, expected, 19 | "failed to remap columns on the fly") 20 | 21 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_coluniq.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import re 3 | import sys 4 | 5 | 6 | def noop(): 7 | pass 8 | 9 | 10 | def truncate_comments(value): 11 | value = re.sub("\n# +\\|.*", "", value) 12 | return value 13 | 14 | 15 | class TestColUniq(unittest.TestCase): 16 | def test_single_uniques(self): 17 | from io import StringIO 18 | 19 | data = "#fsdb -F t a b c\na\tb\tc\nb\tc\td\na\tb\td\n" 20 | 21 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns 22 | 23 | outh = StringIO() 24 | outh.close = noop 25 | datah = StringIO(data) 26 | filter_unique_columns(datah, outh, ["a"]) 27 | 28 | # check the the result 29 | self.assertEqual( 30 | truncate_comments(outh.getvalue()), 31 | "#fsdb -F t a:a\na\nb\n", 32 | "resulting values are right from uniq", 33 | ) 34 | 35 | outh = StringIO() 36 | outh.close = noop 37 | datah = StringIO(data) 38 | filter_unique_columns(datah, outh, ["a"], count=True) 39 | 40 | # check the the result 41 | self.assertEqual( 42 | truncate_comments(outh.getvalue()), 43 | "#fsdb -F t a:a count:l\na\t2\nb\t1\n", 44 | "resulting values are right from uniq", 45 | ) 46 | 47 | def test_multi_keys(self): 48 | from io import StringIO 49 | 50 | data = "#fsdb -F t a b c\na\tb\tc\nb\tc\td\na\tb\td\n" 51 | 52 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns 53 | 54 | outh = StringIO() 55 | outh.close = noop 56 | datah = StringIO(data) 57 | filter_unique_columns(datah, outh, ["a", "b"]) 58 | 59 | # check the the result 60 | self.assertEqual( 61 | truncate_comments(outh.getvalue()), 62 | "#fsdb -F t a:a b:a\na\tb\nb\tc\n", 63 | "resulting values are right from uniq", 64 | ) 65 | 66 | # 67 | # three columns with counting 68 | # 69 | data = "#fsdb -F t x:a y:a z:a\na\tb\tc\nb\tc\td\na\tb\td\na\tb\tc\n" 70 | 71 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns 72 | 73 | outh = StringIO() 74 | outh.close = noop 75 | datah = StringIO(data) 76 | filter_unique_columns(datah, outh, ["x", "y", "z"], count=True) 77 | 78 | # check the the result 79 | self.assertEqual( 80 | truncate_comments(outh.getvalue()), 81 | "#fsdb -F t x:a y:a z:a count:l\na\tb\tc\t2\na\tb\td\t1\nb\tc\td\t1\n", 82 | "resulting values are right from uniq", 83 | ) 84 | 85 | def test_aggregate(self): 86 | from io import StringIO 87 | 88 | data = "#fsdb -F t a b c count\na\tb\tc\t2\nb\tc\td\t4\na\tb\tc\t10\n" 89 | 90 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns 91 | 92 | outh = StringIO() 93 | outh.close = noop 94 | datah = StringIO(data) 95 | filter_unique_columns( 96 | datah, outh, ["a", "b", "c"], count=True, initial_count_key="count" 97 | ) 98 | 99 | # check the the result 100 | output = outh.getvalue() 101 | self.assertEqual( 102 | truncate_comments(output), 103 | "#fsdb -F t a:a b:a c:a count:l\na\tb\tc\t12\nb\tc\td\t4\n", 104 | "resulting values are right from uniq", 105 | ) 106 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_command_parsing.py: -------------------------------------------------------------------------------- 1 | import pyfsdb 2 | import unittest 3 | from io import StringIO 4 | from logging import error 5 | 6 | 7 | class TestCommandParsing(unittest.TestCase): 8 | commands = ["command1", "command2"] 9 | DATA_FILE = "pyfsdb/tests/tests.fsdb" 10 | COMP_FILE = "pyfsdb/tests/testscomp.fsdb.xz" 11 | test_data = "#fsdb -f s a b c\n1 2 3\n4 5 6\n# | command one" 12 | ROW1 = ["rowone", "info", "data"] 13 | ROW2 = ["rowtwo", "other", "stuff"] 14 | 15 | def test_history_from_stringio_fails(self): 16 | test_file = StringIO(self.test_data) 17 | fh = pyfsdb.Fsdb(file_handle=test_file) 18 | 19 | history_data = fh.commands 20 | self.assertEqual(history_data, None) 21 | 22 | def test_get_commands_at_end(self): 23 | fh = pyfsdb.Fsdb(self.DATA_FILE) 24 | fh.get_all() 25 | read_commands = fh.commands 26 | self.assertEqual(self.commands, read_commands) 27 | 28 | def test_get_commands_before_end(self): 29 | fh = pyfsdb.Fsdb(self.DATA_FILE) 30 | read_commands = fh.commands 31 | self.assertEqual(next(fh), self.ROW1) 32 | self.assertEqual(self.commands, read_commands) 33 | 34 | # make sure we can read data too even after reading ahead 35 | self.assertEqual(next(fh), self.ROW2) 36 | 37 | def test_compressed_files(self): 38 | # ensure we can test thsi 39 | try: 40 | import lzma 41 | except Exception: 42 | return 43 | 44 | fh = pyfsdb.Fsdb(self.COMP_FILE) 45 | row = next(fh) 46 | self.assertEqual(row, self.ROW1) 47 | 48 | def test_command_gathering_in_compressed(self): 49 | # ensure we can test thsi 50 | try: 51 | import lzma 52 | except Exception: 53 | return 54 | 55 | fh = pyfsdb.Fsdb(self.COMP_FILE) 56 | row = next(fh) 57 | self.assertEqual(row, self.ROW1) 58 | 59 | test_commands = fh.commands 60 | self.assertEqual(test_commands, None) # None == failure to read 61 | 62 | row = next(fh) 63 | self.assertEqual(row, self.ROW2) 64 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_comments_at_top.fsdb: -------------------------------------------------------------------------------- 1 | #fsdb -F t one:a two:a 2 | # another comment 3 | 1 2 4 | # done 5 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_comments_at_top.test.fsdb: -------------------------------------------------------------------------------- 1 | #fsdb -F t one:a two:a 2 | # another comment 3 | 1 2 4 | # done 5 | # | /usr/bin/pytest-3 6 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_json.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | 4 | 5 | class test_json_functions(unittest.TestCase): 6 | def test_json_to_fsdb(self): 7 | from pyfsdb.tools.json2fsdb import json_to_fsdb 8 | 9 | self.assertTrue(json_to_fsdb, "loaded") 10 | 11 | inp = StringIO( 12 | '{"d":"f", "a":"c"}' 13 | + "\n" 14 | + '{"a":"b", "d":"e"}' 15 | + "\n" 16 | + '{"d": "x", "c": "2", "a": "y"}' 17 | + "\n" 18 | '{"d": "x"}' + "\n" 19 | ) 20 | output = StringIO() # don't require converting to a string 21 | json_to_fsdb(inp, output) 22 | 23 | self.assertEqual( 24 | output.getvalue(), 25 | "#fsdb -F t a:a d:a\nc\tf\nb\te\ny\tx\n\tx\n", 26 | "output of json_to_fsdb is correct", 27 | ) 28 | 29 | def test_fsdb_to_json(self): 30 | from pyfsdb.tools.fsdb2json import fsdb_to_json 31 | 32 | self.assertTrue(fsdb_to_json, "loaded") 33 | 34 | inp = StringIO("#fsdb -F t a d\nc\tf\nb\te\n") 35 | output = StringIO() 36 | 37 | fsdb_to_json(inp, output) 38 | 39 | self.assertEqual( 40 | output.getvalue(), 41 | '{"a": "c", "d": "f"}' + "\n" + '{"a": "b", "d": "e"}' + "\n", 42 | "output of fsdb_to_json is correct", 43 | ) 44 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_label_shrink.py: -------------------------------------------------------------------------------- 1 | def test_label_shrink(): 2 | from pyfsdb.tools.pdbheatmap import maybe_shrink_label 3 | 4 | assert True 5 | 6 | assert maybe_shrink_label("foo") == "foo" 7 | assert maybe_shrink_label("o" * 20) == "o" * 20 8 | assert maybe_shrink_label("o" * 10 + "p" * 10) == "o" * 10 + "p" * 10 9 | assert maybe_shrink_label("o" * 11 + "p" * 11, 20) == "o" * 9 + "..." + "p" * 8 10 | assert maybe_shrink_label("o" * 10 + "p" * 11, 20) == "o" * 9 + "..." + "p" * 8 11 | assert maybe_shrink_label("o" * 100 + "p" * 11000, 20) == "o" * 9 + "..." + "p" * 8 12 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_msgpack.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pyfsdb 3 | import io 4 | import re 5 | 6 | 7 | def truncate_comments(value): 8 | value = re.sub("\n# +\\|.*", "", value) 9 | return value 10 | 11 | 12 | class FsdbMsgPackTest(TestCase): 13 | fsdb_data = "#fsdb -F s a:l b:l\n1 2\n" 14 | encoded_data = b"#fsdb -F m a:l b:l\n\x92\x01\x02" 15 | 16 | def test_convert_to_msgpack(self): 17 | ih = pyfsdb.Fsdb(file_handle=io.StringIO(self.fsdb_data)) 18 | 19 | def noop(*args, **kwargs): 20 | pass 21 | 22 | out_data = io.BytesIO() 23 | out_data.close = noop 24 | 25 | oh = pyfsdb.Fsdb(out_file_handle=out_data) 26 | oh.out_column_names = ih.column_names 27 | oh.converters = ih.converters 28 | oh.out_separator = "m" 29 | 30 | for row in ih: 31 | oh.append(row) 32 | oh.close() 33 | 34 | # the output data we expect should be: 35 | self.assertEqual(out_data.getvalue(), self.encoded_data) 36 | 37 | def test_convert_from_msgpack(self): 38 | ih = pyfsdb.Fsdb(file_handle=io.BytesIO(self.encoded_data)) 39 | 40 | def noop(*args, **kwargs): 41 | pass 42 | 43 | out_data = io.StringIO() 44 | out_data.close = noop 45 | 46 | oh = pyfsdb.Fsdb(out_file_handle=out_data) 47 | oh.out_column_names = ih.column_names 48 | oh.converters = ih.converters 49 | oh.out_separator = " " 50 | 51 | for row in ih: 52 | oh.append(row) 53 | oh.close() 54 | 55 | # the output data we expect should be: 56 | results = out_data.getvalue() 57 | self.assertEqual(truncate_comments(results), self.fsdb_data) 58 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_pdbaugment.py: -------------------------------------------------------------------------------- 1 | from pyfsdb.tools.pdbaugment import stash_row, find_row 2 | 3 | 4 | def test_cache_saving(): 5 | rows = [ 6 | {"a": 1, "b": 2, "c": 3}, 7 | {"a": 4, "b": 5, "c": 6}, 8 | ] 9 | 10 | cache = {} 11 | 12 | for row in rows: 13 | stash_row(cache, ["a", "b"], row) 14 | 15 | open("/tmp/x", "w").write(str(cache) + "\n") 16 | 17 | assert cache == { 18 | 1: {2: {"data": {"a": 1, "b": 2, "c": 3}}}, 19 | 4: {5: {"data": {"a": 4, "b": 5, "c": 6}}}, 20 | } 21 | 22 | # now try looking up the results 23 | 24 | search_row = {"a": 1, "b": 2, "d": 33} 25 | result = find_row(cache, ["a", "b"], search_row) 26 | 27 | assert result == {"a": 1, "b": 2, "c": 3} 28 | 29 | result = find_row(cache, ["a", "b"], search_row, return_data=False) 30 | assert result == {"data": {"a": 1, "b": 2, "c": 3}} 31 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_pdbcdf.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import io 3 | 4 | 5 | class test_pcdf(unittest.TestCase): 6 | def test_pcdf(self): 7 | from pyfsdb.tools.pdbcdf import process_cdf 8 | 9 | self.assertTrue(True, "loaded module") 10 | 11 | in_data = io.StringIO("#fsdb -F t a b\n1\t2\n3\t6\n") 12 | out_data = io.StringIO() 13 | 14 | process_cdf(in_data, out_data, "b") 15 | 16 | result = out_data.getvalue() 17 | 18 | self.assertEqual( 19 | result, 20 | "#fsdb -F t a b b_cdf\n1\t2\t0.25\n3\t6\t1.0\n", 21 | "results (sum) were as expected", 22 | ) 23 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_pdbjinja.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pyfsdb 3 | from io import StringIO 4 | 5 | 6 | def noop(**kwargs): 7 | pass 8 | 9 | 10 | class pdbjinjaTest(TestCase): 11 | def test_loading_pdbjinja(self): 12 | import pyfsdb.tools.pdbjinja 13 | 14 | self.assertTrue("loaded") 15 | 16 | def test_pdbjinja(self): 17 | input_data = "#fsdb -F t a b c\n1\t2\t3\nd\te\tf\n" 18 | inputh = StringIO(input_data) 19 | 20 | jinja_template = "{% for row in rows %}{{row.b}}\n{% endfor %}" 21 | jinjah = StringIO(jinja_template) 22 | 23 | outh = StringIO() 24 | outh.close = noop 25 | 26 | import pyfsdb.tools.pdbjinja 27 | 28 | pyfsdb.tools.pdbjinja.process(inputh, jinjah, outh) 29 | self.assertTrue("ran") 30 | 31 | # actually test the results 32 | result = outh.getvalue() 33 | self.assertEqual(result, "2\ne\n", "expected template results are correct") 34 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_sql.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class test_sql_support(unittest.TestCase): 5 | def test_load(self): 6 | import pyfsdb.tools.pdb2sql 7 | -------------------------------------------------------------------------------- /pyfsdb/tests/test_utf8.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pyfsdb 3 | from io import StringIO 4 | 5 | 6 | DATA = "#fsdb -F t test:i copy©:i foo:a\n1\t2\t3\n4\t5\t©\n" 7 | 8 | 9 | @pytest.fixture 10 | def create_file(tmp_path): 11 | tmp_file = tmp_path / "test.fsdb" 12 | fh = open(tmp_file, "wb") 13 | fh.write(bytes(DATA, "utf-8")) 14 | fh.close() 15 | yield tmp_file 16 | 17 | 18 | def do_test_utf8_file_handle(fh): 19 | row = next(fh) 20 | assert fh.column_names == ["test", "copy©", "foo"] 21 | assert row == [1, 2, "3"] 22 | 23 | row = next(fh) 24 | assert row == [4, 5, "©"] 25 | 26 | 27 | def test_utf8_support_stringio(): 28 | DATA_stream = StringIO(DATA) 29 | fh = pyfsdb.Fsdb(file_handle=DATA_stream) 30 | do_test_utf8_file_handle(fh) 31 | 32 | 33 | def test_utf8_support_file(create_file): 34 | fh = pyfsdb.Fsdb(create_file) 35 | do_test_utf8_file_handle(fh) 36 | 37 | 38 | def test_utf8_creation(tmp_path): 39 | tmp_file = tmp_path / "test-write.fsdb" 40 | fh = pyfsdb.Fsdb(out_file=tmp_file) 41 | fh.out_column_names = ["test", "copy©", "foo"] 42 | fh.append([4, 5, "©"]) 43 | fh.close() 44 | -------------------------------------------------------------------------------- /pyfsdb/tests/testout.fsdb: -------------------------------------------------------------------------------- 1 | #fsdb -F s colone coltwo colthree 2 | rowone info data 3 | # middle comment 4 | rowtwo other stuff 5 | # | command1 6 | # | command2 7 | # | /usr/bin/pytest-3 8 | -------------------------------------------------------------------------------- /pyfsdb/tests/tests.fsdb: -------------------------------------------------------------------------------- 1 | #fsdb -F t colone coltwo colthree 2 | rowone info data 3 | # middle comment 4 | rowtwo other stuff 5 | # | command1 6 | # | command2 7 | -------------------------------------------------------------------------------- /pyfsdb/tests/testscomp.fsdb.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/tests/testscomp.fsdb.xz -------------------------------------------------------------------------------- /pyfsdb/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/tools/__init__.py -------------------------------------------------------------------------------- /pyfsdb/tools/bro2fsdb.py: -------------------------------------------------------------------------------- 1 | """Converts a bro (zeek) log to a file readable by FSDB. 2 | Bro logs are already tab separated, so we really just replace 3 | the headers and re-print the rest. brotofsdb assumes 4 | the bro log is properly formatted (ie, tab separated already).""" 5 | 6 | import argparse 7 | import sys 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__ 13 | ) 14 | 15 | parser.add_argument( 16 | "-l", 17 | "--leave-bro-headers", 18 | action="store_true", 19 | help="Leave the bro headers in place right after the new FSDB header", 20 | ) 21 | 22 | parser.add_argument( 23 | "input_file", 24 | type=argparse.FileType("r"), 25 | nargs="?", 26 | default=sys.stdin, 27 | help="The input file (bro log) to read", 28 | ) 29 | 30 | parser.add_argument( 31 | "output_file", 32 | type=argparse.FileType("w"), 33 | nargs="?", 34 | default=sys.stdout, 35 | help="The output file (FSDB log) to write back out", 36 | ) 37 | 38 | args = parser.parse_args() 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | leave_headers = [] 46 | 47 | # read in all the headers, looking for certain things 48 | column_names = [] 49 | for line in args.input_file: 50 | if line[0] != "#": 51 | break 52 | 53 | if args.leave_bro_headers: 54 | leave_headers.append(line) 55 | 56 | if line[0:7] == "#fields": 57 | column_names = line.replace(".", "_").split("\t") 58 | column_names.pop(0) 59 | 60 | # print out the FSDB header 61 | args.output_file.write("#fsdb -F t " + " ".join(column_names)) 62 | 63 | # optionally add back in the bro headers 64 | if args.leave_bro_headers: 65 | args.output_file.write("".join(leave_headers)) 66 | 67 | # copy out the rest of thefile 68 | args.output_file.write(line) 69 | for line in args.input_file: 70 | args.output_file.write(line) 71 | 72 | # append our trailing command 73 | args.output_file.write("# " + sys.argv[0] + "\n") 74 | 75 | 76 | if __name__ == "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /pyfsdb/tools/fsdb2many.py: -------------------------------------------------------------------------------- 1 | """fsdb2many converts a single FSDB file into many, by creating 2 | other file names based on a column of the original.""" 3 | 4 | import sys 5 | import argparse 6 | import pyfsdb 7 | import re 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 13 | description=__doc__, 14 | epilog="fsdb2many -c key -o outputdir/%s.fsdb mybigfile.fsdb", 15 | ) 16 | 17 | parser.add_argument( 18 | "-c", "--column", default="key", type=str, help="Column to split on" 19 | ) 20 | 21 | parser.add_argument( 22 | "-o", 23 | "--output-pattern", 24 | default="fsdb2many-out-%s.fsdb", 25 | type=str, 26 | help="Output pattern to split on, which should contain a PERCENT S to use for inserting the column value being saved to that file.", 27 | ) 28 | 29 | parser.add_argument( 30 | "input_file", 31 | type=argparse.FileType("r"), 32 | nargs="?", 33 | default=sys.stdin, 34 | help="str", 35 | ) 36 | 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | # open the input file 46 | inh = pyfsdb.Fsdb(file_handle=args.input_file) 47 | key_column = inh.get_column_number(args.column) 48 | 49 | out_handles = {} 50 | 51 | for row in inh: 52 | value = row[key_column] 53 | 54 | # see if we have an open file handle for this one yet 55 | if value not in out_handles: 56 | # new value, so open a new file handle to save data for it 57 | file_name = re.sub("[^-.0-9a-zA-Z_]", "_", str(value)) 58 | outh = pyfsdb.Fsdb(out_file=(args.output_pattern % file_name)) 59 | outh.init_output_from(inh) 60 | out_handles[value] = outh 61 | 62 | # save the row to the file based on its value 63 | out_handles[value].append(row) 64 | 65 | # clean up 66 | for handle in out_handles: 67 | out_handles[handle].close() 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /pyfsdb/tools/json2fsdb.py: -------------------------------------------------------------------------------- 1 | """Converts a JSON file containing either an array of dictionaries or 2 | individual dictionary lines into an FSDB file""" 3 | 4 | import sys 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 6 | import json 7 | import pyfsdb 8 | 9 | 10 | def parse_args(): 11 | """Parse command line arguments""" 12 | parser = ArgumentParser( 13 | formatter_class=ArgumentDefaultsHelpFormatter, description=__doc__ 14 | ) 15 | 16 | parser.add_argument( 17 | "input_file", 18 | type=FileType("r"), 19 | nargs="?", 20 | default=sys.stdin, 21 | help="The input file (json file) to read", 22 | ) 23 | 24 | parser.add_argument( 25 | "output_file", 26 | type=FileType("w"), 27 | nargs="?", 28 | default=sys.stdout, 29 | help="The output file (FSDB file) to write back out", 30 | ) 31 | 32 | args = parser.parse_args() 33 | return args 34 | 35 | 36 | def handle_rows(out_fsdb, rows, columns): 37 | "Output each row in an array to the output fsdb file" 38 | for row in rows: 39 | out = [] 40 | for column in columns: 41 | if column in row: 42 | out.append(row[column]) 43 | else: 44 | out.append("") 45 | out_fsdb.append(out) 46 | 47 | 48 | def json_to_fsdb(input_file, output_file): 49 | """A function that converts an input file stream of json dictionary 50 | to an output FSDB file, where the header column names are pulled 51 | from the first record keys.""" 52 | first_line = next(input_file) 53 | 54 | try: 55 | rows = json.loads(first_line) 56 | if not isinstance(rows, list): 57 | rows = [rows] 58 | except Exception as exp: 59 | sys.stderr.write("failed to parse the first line as json:\n") 60 | sys.stderr.write(first_line) 61 | sys.stderr.write(str(exp)) 62 | sys.exit(1) 63 | 64 | columns = sorted(list(rows[0].keys())) 65 | out_fsdb = pyfsdb.Fsdb(out_file_handle=output_file) 66 | out_fsdb.out_column_names = columns 67 | handle_rows(out_fsdb, rows, columns) 68 | 69 | for line in input_file: 70 | try: 71 | rows = json.loads(line) 72 | if not isinstance(rows, list): 73 | rows = [rows] 74 | handle_rows(out_fsdb, rows, columns) 75 | except Exception: 76 | sys.stderr.write("failed to parse: " + line) 77 | 78 | 79 | def main(): 80 | "CLI wrapper around json_to_fsdb" 81 | args = parse_args() 82 | json_to_fsdb(args.input_file, args.output_file) 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /pyfsdb/tools/msgpack2pdb.py: -------------------------------------------------------------------------------- 1 | """Converts a msgpack FSDB representation to a normal FSDB text file""" 2 | 3 | import pyfsdb 4 | 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 6 | from logging import debug, info, warning, error, critical 7 | import logging 8 | import sys 9 | import io 10 | 11 | # optionally use rich 12 | try: 13 | from rich import print 14 | from rich.logging import RichHandler 15 | except Exception: 16 | pass 17 | 18 | 19 | def parse_args(): 20 | "Parse the command line arguments." 21 | parser = ArgumentParser( 22 | formatter_class=ArgumentDefaultsHelpFormatter, 23 | description=__doc__, 24 | epilog="Exmaple Usage: ", 25 | ) 26 | 27 | parser.add_argument( 28 | "--log-level", 29 | "--ll", 30 | default="info", 31 | help="Define the logging verbosity level (debug, info, warning, error, fotal, critical).", 32 | ) 33 | 34 | parser.add_argument( 35 | "input_file", type=FileType("rb"), nargs="?", default=sys.stdin, help="" 36 | ) 37 | 38 | parser.add_argument( 39 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help="" 40 | ) 41 | 42 | args = parser.parse_args() 43 | log_level = args.log_level.upper() 44 | handlers = [] 45 | datefmt = None 46 | messagefmt = "%(levelname)-10s:\t%(message)s" 47 | 48 | # see if we're rich 49 | try: 50 | handlers.append(RichHandler(rich_tracebacks=True)) 51 | datefmt = " " 52 | messagefmt = "%(message)s" 53 | except Exception: 54 | pass 55 | 56 | logging.basicConfig( 57 | level=log_level, format=messagefmt, datefmt=datefmt, handlers=handlers 58 | ) 59 | return args 60 | 61 | 62 | def main(): 63 | args = parse_args() 64 | 65 | in_fsdb = pyfsdb.Fsdb( 66 | file_handle=args.input_file, 67 | return_type=pyfsdb.RETURN_AS_ARRAY, 68 | ) 69 | 70 | oh = pyfsdb.Fsdb( 71 | out_file_handle=args.output_file, 72 | out_column_names=in_fsdb.column_names, 73 | ) 74 | 75 | for row in in_fsdb: 76 | oh.append(row) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdb2msgpack.py: -------------------------------------------------------------------------------- 1 | """Converts a textual FSDB representation to a efficient msgpack binary encoding""" 2 | 3 | import pyfsdb 4 | 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 6 | from logging import debug, info, warning, error, critical 7 | import logging 8 | import sys 9 | 10 | # optionally use rich 11 | try: 12 | from rich import print 13 | from rich.logging import RichHandler 14 | except Exception: 15 | pass 16 | 17 | 18 | def parse_args(): 19 | "Parse the command line arguments." 20 | parser = ArgumentParser( 21 | formatter_class=ArgumentDefaultsHelpFormatter, 22 | description=__doc__, 23 | epilog="Exmaple Usage: ", 24 | ) 25 | 26 | parser.add_argument( 27 | "--log-level", 28 | "--ll", 29 | default="info", 30 | help="Define the logging verbosity level (debug, info, warning, error, fotal, critical).", 31 | ) 32 | 33 | parser.add_argument( 34 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help="" 35 | ) 36 | 37 | parser.add_argument( 38 | "output_file", type=FileType("wb"), nargs="?", default=sys.stdout, help="" 39 | ) 40 | 41 | args = parser.parse_args() 42 | log_level = args.log_level.upper() 43 | handlers = [] 44 | datefmt = None 45 | messagefmt = "%(levelname)-10s:\t%(message)s" 46 | 47 | # see if we're rich 48 | try: 49 | handlers.append(RichHandler(rich_tracebacks=True)) 50 | datefmt = " " 51 | messagefmt = "%(message)s" 52 | except Exception: 53 | pass 54 | 55 | logging.basicConfig( 56 | level=log_level, format=messagefmt, datefmt=datefmt, handlers=handlers 57 | ) 58 | return args 59 | 60 | 61 | def main(): 62 | args = parse_args() 63 | 64 | in_fsdb = pyfsdb.Fsdb( 65 | file_handle=args.input_file, 66 | return_type=pyfsdb.RETURN_AS_ARRAY, 67 | ) 68 | 69 | oh = pyfsdb.Fsdb( 70 | # out_file_handle=args.output_file, 71 | out_file_handle=args.output_file, 72 | out_column_names=in_fsdb.column_names, 73 | ) 74 | oh.out_separator_token = "m" # save as msgpack 75 | 76 | # for record in in_fsdb: 77 | for row in in_fsdb: 78 | oh.append(row) 79 | 80 | 81 | if __name__ == "__main__": 82 | main() 83 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdb2tex.py: -------------------------------------------------------------------------------- 1 | """db2tex converts any FSDB file into a latex table. 2 | WARNING: very little escaping is done -- watch out for mallicious input files.""" 3 | 4 | import argparse 5 | import sys 6 | import pyfsdb 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 12 | description=__doc__, 13 | epilog="Exmaple Usage: pdb2tex -c col1 col2 -p cc input.fsdb", 14 | ) 15 | 16 | parser.add_argument( 17 | "-p", 18 | "--tabular-profile", 19 | type=str, 20 | help="The column profile to pass to tabular. The default will be all 'l's.", 21 | ) 22 | 23 | parser.add_argument( 24 | "-c", 25 | "--columns", 26 | type=str, 27 | nargs="*", 28 | help="Column names to include; will use all if not specified", 29 | ) 30 | 31 | parser.add_argument( 32 | "-C", "--caption", type=str, help="Use this as the caption for the table" 33 | ) 34 | 35 | parser.add_argument( 36 | "-l", "--label", type=str, help="Add a label to the table (eg: tab:foo)" 37 | ) 38 | 39 | parser.add_argument( 40 | "input_file", 41 | type=argparse.FileType("r"), 42 | nargs="?", 43 | default=sys.stdin, 44 | help="The input FSDB file", 45 | ) 46 | 47 | parser.add_argument( 48 | "output_file", 49 | type=argparse.FileType("w"), 50 | nargs="?", 51 | default=sys.stdout, 52 | help="The output file to print latex table data to", 53 | ) 54 | 55 | args = parser.parse_args() 56 | return args 57 | 58 | 59 | def latex_escape(value): 60 | return str(value).replace("\\", "\\\\").replace("_", "\\_").replace("&", "\\&") 61 | 62 | 63 | def main(): 64 | args = parse_args() 65 | 66 | inh = pyfsdb.Fsdb(file_handle=args.input_file) 67 | outh = args.output_file 68 | 69 | columns = args.columns 70 | if not columns: 71 | columns = inh.column_names 72 | 73 | if args.tabular_profile: 74 | specifier = args.tabular_profile 75 | else: 76 | specifier = "l" * len(columns) 77 | 78 | column_numbers = inh.get_column_numbers(columns) 79 | 80 | # write out the header info 81 | outh.write("\\begin{table}\n") 82 | outh.write(" \\begin{tabular}{%s}\n" % (specifier)) 83 | 84 | for num, column in enumerate(columns): 85 | if num == 0: 86 | outh.write(" \\textbf{%s}" % (latex_escape(column))) 87 | else: 88 | outh.write(" & \\textbf{%s}" % (latex_escape(column))) 89 | outh.write(" \\\\\n") 90 | 91 | for row in inh: 92 | for num, column in enumerate(column_numbers): 93 | if num == 0: 94 | outh.write(" %s" % (latex_escape(row[column]))) 95 | else: 96 | outh.write(" & %s" % (latex_escape(row[column]))) 97 | outh.write(" \\\\\n") 98 | 99 | outh.write(" \\end{tabular}\n") 100 | if args.caption: 101 | outh.write(" \\caption{%s}\n" % (args.caption)) 102 | if args.label: 103 | outh.write(" \\label{%s}\n" % (args.label)) 104 | outh.write("\\end{table}\n") 105 | 106 | 107 | if __name__ == "__main__": 108 | main() 109 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdb2to1.py: -------------------------------------------------------------------------------- 1 | "Converts a FSDB2 (with type specifications) to an FSDB1 for use with older tools" 2 | 3 | import sys 4 | import os 5 | import argparse 6 | import collections 7 | 8 | import pyfsdb 9 | import re 10 | 11 | 12 | def parse_args(): 13 | formatter_class = argparse.ArgumentDefaultsHelpFormatter 14 | parser = argparse.ArgumentParser( 15 | formatter_class=formatter_class, description=__doc__ 16 | ) 17 | 18 | parser.add_argument( 19 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help="" 20 | ) 21 | 22 | parser.add_argument( 23 | "output_file", 24 | type=argparse.FileType("w"), 25 | nargs="?", 26 | default=sys.stdout, 27 | help="", 28 | ) 29 | 30 | args = parser.parse_args() 31 | return args 32 | 33 | 34 | def main(): 35 | args = parse_args() 36 | 37 | # we do this without using an FSDB class, since raw I/O is faster 38 | fsdb_line = next(args.input_file) 39 | fsdb_line = re.sub(r":\w+", "", fsdb_line) 40 | args.output_file.write(fsdb_line) 41 | 42 | while True: 43 | data = args.input_file.read(1024 * 1024 * 1024) # 1M at a time 44 | if not data: 45 | break 46 | args.output_file.write(data) 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbaddtypes.py: -------------------------------------------------------------------------------- 1 | """Adds type hints for converting a FSDB1 format to add type hints to columns. 2 | This allows compliant tools to get automatic type conversion within their scripts.""" 3 | 4 | import sys 5 | import os 6 | import argparse 7 | import collections 8 | 9 | import pyfsdb 10 | import re 11 | import io 12 | 13 | 14 | def parse_args(): 15 | formatter_class = argparse.ArgumentDefaultsHelpFormatter 16 | parser = argparse.ArgumentParser( 17 | formatter_class=formatter_class, description=__doc__ 18 | ) 19 | 20 | parser.add_argument( 21 | "-t", 22 | "--type-list", 23 | default=[], 24 | type=str, 25 | nargs="*", 26 | help="A list of column=type values, where type can be 'd' (float) or 'l' (integer)", 27 | ) 28 | 29 | parser.add_argument( 30 | "-a", 31 | "--auto-types", 32 | action="store_true", 33 | help="Guess at type values based on the first row", 34 | ) 35 | 36 | parser.add_argument( 37 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help="" 38 | ) 39 | 40 | parser.add_argument( 41 | "output_file", 42 | type=argparse.FileType("w"), 43 | nargs="?", 44 | default=sys.stdout, 45 | help="", 46 | ) 47 | 48 | args = parser.parse_args() 49 | return args 50 | 51 | 52 | def add_types(input_file, output_file, types=[], auto_convert=False): 53 | # we do this without using an FSDB class, since raw I/O is faster 54 | fsdb_line = next(input_file) 55 | first_line = next(input_file) 56 | buffer = io.StringIO(fsdb_line + first_line) 57 | 58 | fh = pyfsdb.Fsdb(file_handle=buffer, return_type=pyfsdb.RETURN_AS_DICTIONARY) 59 | columns = fh.column_names 60 | 61 | converters = fh.converters 62 | 63 | # if auto_conversion, then make some guesses 64 | if auto_convert: 65 | first_row = next(fh) 66 | converters = fh.guess_converters(first_row) 67 | 68 | if not converters: 69 | converters = {} 70 | 71 | # specifications should override autos 72 | for specification in types: 73 | (column, dtype) = specification.split("=") 74 | if column not in columns: 75 | raise ValueError(f"Invalid column: {column} in '{specification}") 76 | converters[column] = pyfsdb.fsdb.incoming_type_converters[dtype] 77 | 78 | # create the new header line with conversions in place 79 | fh.converters = converters 80 | new_header = fh.create_header_line(separator_token=fh.separator_token) 81 | 82 | output_file.write(new_header) 83 | output_file.write(first_line) 84 | 85 | # read the rest as chunks 86 | while True: 87 | data = input_file.read(1024 * 1024 * 1024) # 1M at a time 88 | if not data: 89 | break 90 | output_file.write(data) 91 | 92 | 93 | def main(): 94 | args = parse_args() 95 | 96 | add_types(args.input_file, args.output_file, args.type_list, args.auto_types) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbdatetoepoch.py: -------------------------------------------------------------------------------- 1 | """dbdatetoepoch converts a timestamp column with a human date to a 2 | unix epoch timestamp column""" 3 | 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 5 | import sys 6 | import pyfsdb 7 | import warnings 8 | from dateparser import parse 9 | 10 | # from https://github.com/scrapinghub/dateparser/issues/1013 11 | # Ignore dateparser warnings regarding pytz 12 | warnings.filterwarnings( 13 | "ignore", 14 | message="The localize method is no longer necessary", 15 | ) 16 | 17 | 18 | def parse_args(): 19 | parser = ArgumentParser( 20 | formatter_class=ArgumentDefaultsHelpFormatter, 21 | description=__doc__, 22 | epilog="Exmaple Usage: dbdatetoepoch -d human_column -t timestamp_column input.fsdb output.fsdb", 23 | ) 24 | 25 | parser.add_argument( 26 | "-d", "--date-column", default="date", type=str, help="Date column to use" 27 | ) 28 | 29 | parser.add_argument( 30 | "-t", 31 | "--timestamp-column", 32 | default="timestamp", 33 | type=str, 34 | help="Column to create for storing an epoch column", 35 | ) 36 | 37 | parser.add_argument( 38 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help="" 39 | ) 40 | 41 | parser.add_argument( 42 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help="" 43 | ) 44 | 45 | args = parser.parse_args() 46 | return args 47 | 48 | 49 | def main(): 50 | args = parse_args() 51 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file) 52 | column_names = fh.column_names 53 | fh.out_column_names = column_names + [args.timestamp_column] 54 | 55 | date_column = fh.get_column_number(args.date_column) 56 | for row in fh: 57 | timestamp_value = 0 58 | try: 59 | timestamp_value = parse(row[date_column]).timestamp() 60 | except Exception: 61 | pass 62 | row[-1] = timestamp_value # XXX: this should be append 63 | fh.append(row) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbensure.py: -------------------------------------------------------------------------------- 1 | """dbensure can be used that some or all fields in a table contain data. 2 | 3 | If rows with the specified columns (default: all) don't contain data, 4 | they're dropped from the output rows.""" 5 | 6 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 7 | import sys 8 | 9 | import pyfsdb 10 | 11 | 12 | def parse_args(): 13 | parser = ArgumentParser( 14 | formatter_class=ArgumentDefaultsHelpFormatter, 15 | description=__doc__, 16 | epilog="Exmaple Usage: dbensure input_file.fsdb output_file.fsdb", 17 | ) 18 | 19 | parser.add_argument( 20 | "-c", "--columns", nargs="*", help="The columns to check in the data" 21 | ) 22 | 23 | parser.add_argument( 24 | "-v", 25 | "--fill", 26 | default=None, 27 | type=str, 28 | help="Don't drop the rows but fill with this value if a column is missing", 29 | ) 30 | 31 | parser.add_argument( 32 | "-e", 33 | "--print-error", 34 | action="store_true", 35 | help="Print an error message on each dropped row", 36 | ) 37 | 38 | parser.add_argument( 39 | "input_file", 40 | type=FileType("r"), 41 | nargs="?", 42 | default=sys.stdin, 43 | help="The input file to process", 44 | ) 45 | 46 | parser.add_argument( 47 | "output_file", 48 | type=FileType("w"), 49 | nargs="?", 50 | default=sys.stdout, 51 | help="Where to send the output data", 52 | ) 53 | 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def filter_row(row, columns, fill_value, print_error): 59 | for column in columns: 60 | if row[column] == "" or row[column] is None: 61 | if fill_value: 62 | row[column] = fill_value 63 | else: 64 | if print_error: 65 | sys.stderr.write("# dbensure dropping row:" + str(row) + "\n") 66 | return 67 | return row 68 | 69 | 70 | def main(): 71 | args = parse_args() 72 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file) 73 | 74 | if args.columns: 75 | column_nums = fh.get_column_numbers(args.columns) 76 | else: 77 | column_nums = list(range(len(fh.column_names))) 78 | 79 | fh.filter(filter_row, args=[column_nums, args.fill, args.print_error]) 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbepochtodate.py: -------------------------------------------------------------------------------- 1 | """dbdatetoepoch converts a unix epoch timestamp column into a human 2 | readable date string usting strftime with an adjustable format.""" 3 | 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 5 | import sys 6 | import pyfsdb 7 | from dateparser import parse 8 | import time 9 | 10 | 11 | def parse_args(): 12 | parser = ArgumentParser( 13 | formatter_class=ArgumentDefaultsHelpFormatter, 14 | description=__doc__, 15 | epilog="Exmaple Usage: dbdatetoepoch -d human_column -t timestamp_column input.fsdb output.fsdb", 16 | ) 17 | 18 | parser.add_argument( 19 | "-t", 20 | "--timestamp-column", 21 | default="timestamp", 22 | type=str, 23 | help="Column to use with the epoch timestamp", 24 | ) 25 | 26 | parser.add_argument( 27 | "-T", 28 | "--time-column", 29 | default="timestamp_human", 30 | type=str, 31 | help="The output time/date column to create", 32 | ) 33 | 34 | parser.add_argument( 35 | "-f", 36 | "--format", 37 | default="%Y-%m-%d %H:%M", 38 | type=str, 39 | help="The output format to use in the time column", 40 | ) 41 | 42 | parser.add_argument( 43 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help="" 44 | ) 45 | 46 | parser.add_argument( 47 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help="" 48 | ) 49 | 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | def main(): 55 | args = parse_args() 56 | fh = pyfsdb.Fsdb( 57 | file_handle=args.input_file, 58 | out_file_handle=args.output_file, 59 | converters={args.timestamp_column: float}, 60 | ) 61 | column_names = fh.column_names 62 | fh.out_column_names = column_names + [args.time_column] 63 | 64 | timestamp_column = fh.get_column_number(args.timestamp_column) 65 | 66 | colfmt = args.format 67 | 68 | for row in fh: 69 | row[-1] = time.strftime(colfmt, time.localtime(row[timestamp_column])) 70 | fh.append(row) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbformat.py: -------------------------------------------------------------------------------- 1 | """Outputs a python-string formatted line for every input FSDB row, 2 | with column names acting as variables into the format string.""" 3 | 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 5 | import sys 6 | import pyfsdb 7 | 8 | 9 | def parse_args(): 10 | parser = ArgumentParser( 11 | formatter_class=ArgumentDefaultsHelpFormatter, 12 | description=__doc__, 13 | epilog="Example: dbformat -f 'I can print {col1} and {col2}'", 14 | ) 15 | 16 | parser.add_argument( 17 | "-f", "--format", type=str, help="The python-based format string to use" 18 | ) 19 | 20 | parser.add_argument( 21 | "input_file", 22 | type=FileType("r"), 23 | nargs="?", 24 | default=sys.stdin, 25 | help="The input FSDB file to read", 26 | ) 27 | 28 | parser.add_argument( 29 | "output_file", 30 | type=FileType("w"), 31 | nargs="?", 32 | default=sys.stdout, 33 | help="The output text file to write to", 34 | ) 35 | 36 | args = parser.parse_args() 37 | 38 | if not args.format: 39 | sys.stderr.write("-f is a required argument\n") 40 | exit(1) 41 | 42 | return args 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | 48 | inh = pyfsdb.Fsdb( 49 | file_handle=args.input_file, return_type=pyfsdb.RETURN_AS_DICTIONARY 50 | ) 51 | outh = args.output_file 52 | 53 | format_string = args.format 54 | 55 | for row in inh: 56 | # convert Nones 57 | for column in row: 58 | if row[column] is None: 59 | row[column] = "" 60 | outh.write(format_string.format(**row) + "\n") 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbjinja.py: -------------------------------------------------------------------------------- 1 | """This script takes all the data in a file, and passes it to a 2 | jinja2 template with each row being stored in a `rows` variable. 3 | 4 | Note: all the rows must be loaded into memory at once. 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import os 10 | 11 | import pyfsdb 12 | import jinja2 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser( 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 18 | description=__doc__, 19 | epilog="Example: pdbjinja -j report.jinja input.fsdb output.txt", 20 | ) 21 | 22 | parser.add_argument( 23 | "-j", 24 | "--jinja2-template", 25 | type=argparse.FileType("r"), 26 | help="The jinja2 template file to use", 27 | ) 28 | 29 | parser.add_argument( 30 | "-i", "--include-file-path", type=str, help="Path to allow including files from" 31 | ) 32 | 33 | parser.add_argument( 34 | "input_file", 35 | type=argparse.FileType("r"), 36 | nargs="?", 37 | default=sys.stdin, 38 | help="The input file to use", 39 | ) 40 | 41 | parser.add_argument( 42 | "output_file", 43 | type=argparse.FileType("w"), 44 | nargs="?", 45 | default=sys.stdout, 46 | help="Where to write the results to", 47 | ) 48 | 49 | args = parser.parse_args() 50 | 51 | if not args.jinja2_template: 52 | sys.stderr.write("A jinja2 template argument (-j) is required\n") 53 | exit(1) 54 | 55 | return args 56 | 57 | 58 | def process( 59 | input_file_handle, jinja2_template, output_file_handle, include_file_path=None 60 | ): 61 | "Process an input data file file and template into an output file" 62 | # load the data 63 | inh = pyfsdb.Fsdb( 64 | file_handle=input_file_handle, return_type=pyfsdb.RETURN_AS_DICTIONARY 65 | ) 66 | rows = inh.get_all() 67 | 68 | # get jinja2 setup 69 | jinja_template_data = jinja2_template.read() 70 | loader = None 71 | 72 | # allowing including of other files? 73 | if include_file_path: 74 | if include_file_path[-1] != "/": 75 | include_file_path += "/" # think required? 76 | loader = jinja2.FileStreamLoader(include_file_path) 77 | 78 | # create the actual template 79 | template = jinja2.Environment(loader=loader) 80 | template = template.from_string(jinja_template_data) 81 | 82 | # call jinja and write the results out to the file 83 | output_file_handle.write(template.render({"rows": rows})) 84 | 85 | 86 | def main(): 87 | args = parse_args() 88 | process(args.input_file, args.jinja2_template, args.output_file) 89 | 90 | 91 | if __name__ == "__main__": 92 | main() 93 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbnormalize.py: -------------------------------------------------------------------------------- 1 | """dbnormalize takes an input file and takes each column value from a 2 | number of columns and divides it by the maximum value seen in all the 3 | columns. 4 | 5 | Note: this is the maximum value of all columns provided; if you want 6 | per-column normalization, run the tool multiple times instead. 7 | 8 | Note: this requires reading the entire file into memory. 9 | """ 10 | 11 | import pyfsdb 12 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 13 | import sys 14 | 15 | 16 | def parse_args(): 17 | parser = ArgumentParser( 18 | formatter_class=ArgumentDefaultsHelpFormatter, 19 | description=__doc__, 20 | epilog="Exmaple Usage: dbnormalize -k column -- infile outfile", 21 | ) 22 | 23 | parser.add_argument( 24 | "-k", 25 | "--keys", 26 | default=["key"], 27 | nargs="+", 28 | type=str, 29 | help="The columns/keys to normalize across", 30 | ) 31 | 32 | parser.add_argument( 33 | "input_file", 34 | type=FileType("r"), 35 | nargs="?", 36 | default=sys.stdin, 37 | help="The input file to read", 38 | ) 39 | 40 | parser.add_argument( 41 | "output_file", 42 | type=FileType("w"), 43 | nargs="?", 44 | default=sys.stdout, 45 | help="Where to write the results", 46 | ) 47 | 48 | args = parser.parse_args() 49 | return args 50 | 51 | 52 | def main(): 53 | args = parse_args() 54 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file) 55 | df = fh.get_pandas() 56 | maxval = df[args.keys].max().max() 57 | for key in args.keys: 58 | df[key] = df[key] / maxval 59 | fh.put_pandas(df) 60 | fh.close() 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbreescape.py: -------------------------------------------------------------------------------- 1 | """Passes the requested columns (-k) through the python regex escaping function. 2 | 3 | Note: because -k can take multiple columns, input files likely need to appear 4 | after the "--" argument-stop-parsing string. 5 | """ 6 | 7 | import pyfsdb 8 | from re import escape 9 | 10 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType 11 | import sys 12 | 13 | 14 | def parse_args(): 15 | parser = ArgumentParser( 16 | formatter_class=ArgumentDefaultsHelpFormatter, 17 | description=__doc__, 18 | epilog="Exmaple Usage: dbrequote -k column1 column2 -- file.fsdb", 19 | ) 20 | 21 | parser.add_argument( 22 | "-k", "--keys-to-escape", type=str, nargs="+", help="The keys to regexp quote" 23 | ) 24 | 25 | parser.add_argument( 26 | "input_file", 27 | type=FileType("r"), 28 | nargs="?", 29 | default=sys.stdin, 30 | help="The input file to parse", 31 | ) 32 | 33 | parser.add_argument( 34 | "output_file", 35 | type=FileType("w"), 36 | nargs="?", 37 | default=sys.stdout, 38 | help="Where to send the output", 39 | ) 40 | 41 | args = parser.parse_args() 42 | return args 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | 48 | fs = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file) 49 | 50 | convert_cols = fs.get_column_numbers(args.keys_to_escape) 51 | 52 | for row in fs: 53 | for column in convert_cols: 54 | row[column] = escape(row[column]) 55 | fs.append(row) 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbreversepivot.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | 3 | """dbreversepivot takes an input file with time/value columns, and 4 | pivots the table into a narrow table with one line per old column. 5 | 6 | For example, if the input was this: 7 | 8 | #fsdb -F s time foo bar 9 | 1 10 0 10 | 2 30 20 11 | 3 0 40 12 | 13 | It would convert this to: 14 | 15 | #fsdb -F s time key value 16 | 1 foo 10 17 | 2 bar 20 18 | 2 foo 30 19 | 3 bar 40 20 | 21 | This is the inverse operation of dbfullpivot. 22 | """ 23 | 24 | import sys 25 | import argparse 26 | import pyfsdb 27 | 28 | 29 | def parse_args(): 30 | parser = argparse.ArgumentParser( 31 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__ 32 | ) 33 | 34 | parser.add_argument( 35 | "-k", 36 | "--key-column", 37 | default="key", 38 | type=str, 39 | help="The key column to use in the output for column names to store in", 40 | ) 41 | 42 | parser.add_argument( 43 | "-c", "--columns", nargs="+", type=str, help="The columns to pivot into keys" 44 | ) 45 | 46 | parser.add_argument( 47 | "-v", 48 | "--value-column", 49 | default="value", 50 | type=str, 51 | help="What output column to store the value for what was found in the columns", 52 | ) 53 | 54 | parser.add_argument( 55 | "-o", 56 | "--other-columns", 57 | default=[], 58 | type=str, 59 | nargs="*", 60 | help="Other columns to copy to every row", 61 | ) 62 | 63 | parser.add_argument( 64 | "input_file", 65 | type=argparse.FileType("r"), 66 | nargs="?", 67 | default=sys.stdin, 68 | help="The input FSDB file to read", 69 | ) 70 | 71 | parser.add_argument( 72 | "output_file", 73 | type=argparse.FileType("w"), 74 | nargs="?", 75 | default=sys.stdout, 76 | help="The output FSDB file to write to", 77 | ) 78 | 79 | args = parser.parse_args() 80 | return args 81 | 82 | 83 | def main(): 84 | args = parse_args() 85 | 86 | # set up storage structures 87 | columns = {} 88 | 89 | # from the input, get extract column numbers/names 90 | key_column = args.key_column 91 | value_column = args.value_column 92 | other_columns = args.other_columns 93 | columns = args.columns 94 | 95 | # open the input file stream 96 | fh = pyfsdb.Fsdb( 97 | file_handle=args.input_file, 98 | return_type=pyfsdb.RETURN_AS_DICTIONARY, 99 | out_file_handle=args.output_file, 100 | ) 101 | fh.out_column_names = [key_column, value_column] + other_columns 102 | 103 | # for each row, remember each value based on time and key 104 | for row in fh: 105 | for column in columns: 106 | out_row = [column, row[column]] 107 | for other in other_columns: 108 | out_row.append(row[other]) 109 | fh.append(out_row) 110 | 111 | fh.close(copy_comments_from=fh) 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /pyfsdb/tools/pdbzerofill.py: -------------------------------------------------------------------------------- 1 | """Fills a row that is missing in a series of rows with a numerical 2 | increasing (frequently a timestamp) index""" 3 | 4 | import sys 5 | import argparse 6 | import pyfsdb 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__ 12 | ) 13 | 14 | parser.add_argument( 15 | "-v", "--value", default="0", type=str, help="Fill columns with this value" 16 | ) 17 | 18 | parser.add_argument( 19 | "-c", "--columns", type=str, nargs="+", help="Fill these columns" 20 | ) 21 | 22 | parser.add_argument( 23 | "-k", 24 | "--key-column", 25 | default="timestamp", 26 | type=str, 27 | help="Use this column as the timestamp/key column to increment", 28 | ) 29 | 30 | parser.add_argument( 31 | "-b", 32 | "--bin-size", 33 | default=1, 34 | type=int, 35 | help="Bin-size to check for missing rows", 36 | ) 37 | 38 | parser.add_argument( 39 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help="" 40 | ) 41 | 42 | parser.add_argument( 43 | "output_file", 44 | type=argparse.FileType("w"), 45 | nargs="?", 46 | default=sys.stdout, 47 | help="", 48 | ) 49 | 50 | args = parser.parse_args() 51 | 52 | if args.columns is None: 53 | sys.stderr.write("The --columns argument is required\n") 54 | exit(1) 55 | 56 | return args 57 | 58 | 59 | def main(): 60 | args = parse_args() 61 | 62 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file) 63 | 64 | store_columns = fh.get_column_numbers(args.columns) 65 | time_column = fh.get_column_number(args.key_column) 66 | value = args.value 67 | bin_size = args.bin_size 68 | 69 | last_index = None 70 | 71 | for row in fh: 72 | if last_index is None: 73 | # first row, just store it 74 | last_index = int(row[time_column]) 75 | elif last_index != int(row[time_column]): 76 | for skipped_time in range( 77 | last_index + bin_size, int(row[time_column]), bin_size 78 | ): 79 | newrow = list(row) 80 | newrow[time_column] = str(skipped_time) 81 | for column in store_columns: 82 | newrow[column] = value 83 | fh.append(newrow) 84 | last_index = int(row[time_column]) 85 | fh.append(row) 86 | 87 | fh.write_finish() 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pyfsdb" 7 | dynamic = ["version"] 8 | description = "A python implementation of the flat-file streaming database" 9 | readme = "README.md" 10 | license = {file = "LICENSE"} 11 | requires-python = ">=3.6" 12 | authors = [ 13 | { name = "Wes Hardaker", email = "opensource@hardakers.net" }, 14 | ] 15 | classifiers = [ 16 | "Operating System :: OS Independent", 17 | "Programming Language :: Python :: 3", 18 | ] 19 | 20 | [project.scripts] 21 | bro2fsdb = "pyfsdb.tools.bro2fsdb:main" 22 | db2tex = "pyfsdb.obsolete.db2tex:main" 23 | dbaugment = "pyfsdb.obsolete.dbaugment:main" 24 | dbcoluniq = "pyfsdb.obsolete.dbcoluniq:main" 25 | dbdatetoepoch = "pyfsdb.obsolete.dbdatetoepoch:main" 26 | dbensure = "pyfsdb.obsolete.dbensure:main" 27 | dbformat = "pyfsdb.obsolete.dbformat:main" 28 | dbfullpivot = "pyfsdb.obsolete.dbfullpivot:main" 29 | dbheatmap = "pyfsdb.obsolete.dbheatmap:main" 30 | dbkeyedsort = "pyfsdb.obsolete.dbkeyedsort:main" 31 | dbreescape = "pyfsdb.obsolete.dbreescape:main" 32 | dbreversepivot = "pyfsdb.obsolete.dbreversepivot:main" 33 | dbsplitter = "pyfsdb.obsolete.dbsplitter:main" 34 | dbsum = "pyfsdb.obsolete.dbsum:main" 35 | dbtopn = "pyfsdb.obsolete.dbtopn:main" 36 | dbzerofill = "pyfsdb.obsolete.dbzerofill:main" 37 | fsdb2json = "pyfsdb.tools.fsdb2json:main" 38 | fsdb2many = "pyfsdb.tools.fsdb2many:main" 39 | json2fsdb = "pyfsdb.tools.json2fsdb:main" 40 | pdb2sql = "pyfsdb.tools.pdb2sql:main" 41 | pdb2tex = "pyfsdb.tools.pdb2tex:main" 42 | pdb2to1 = "pyfsdb.tools.pdb2to1:main" 43 | pdbaddtypes = "pyfsdb.tools.pdbaddtypes:main" 44 | pdbaugment = "pyfsdb.tools.pdbaugment:main" 45 | pdbcdf = "pyfsdb.tools.pdbcdf:main" 46 | pdbcoluniq = "pyfsdb.tools.pdbcoluniq:main" 47 | pdbdatetoepoch = "pyfsdb.tools.pdbdatetoepoch:main" 48 | pdbensure = "pyfsdb.tools.pdbensure:main" 49 | pdbepochtodate = "pyfsdb.tools.pdbepochtodate:main" 50 | pdbfgrep = "pyfsdb.tools.pdbfgrep:main" 51 | pdbformat = "pyfsdb.tools.pdbformat:main" 52 | pdbfullpivot = "pyfsdb.tools.pdbfullpivot:main" 53 | pdbheatmap = "pyfsdb.tools.pdbheatmap:main" 54 | pdbjinja = "pyfsdb.tools.pdbjinja:main" 55 | pdbkeyedsort = "pyfsdb.tools.pdbkeyedsort:main" 56 | pdbnormalize = "pyfsdb.tools.pdbnormalize:main" 57 | pdbreescape = "pyfsdb.tools.pdbreescape:main" 58 | pdbreversepivot = "pyfsdb.tools.pdbreversepivot:main" 59 | pdbroc = "pyfsdb.tools.pdbroc:main" 60 | pdbrow = "pyfsdb.tools.pdbrow:main" 61 | pdbroweval = "pyfsdb.tools.pdbroweval:main" 62 | pdbsplitter = "pyfsdb.tools.pdbsplitter:main" 63 | pdbsum = "pyfsdb.tools.pdbsum:main" 64 | pdbtopn = "pyfsdb.tools.pdbtopn:main" 65 | pdbzerofill = "pyfsdb.tools.pdbzerofill:main" 66 | pdbrelplot = "pyfsdb.tools.pdbrelplot:main" 67 | 68 | [project.urls] 69 | Homepage = "https://github.com/hardaker/pyfsdb" 70 | 71 | [tool.hatch.version] 72 | path = "pyfsdb/__init__.py" 73 | 74 | [tool.hatch.build.targets.sdist] 75 | include = [ 76 | "/pyfsdb", 77 | ] 78 | 79 | [tool.ruff] 80 | ignore = ["E501", "F401"] # long lines, unused imports 81 | fixable = ["ALL"] # gulp 82 | # select = ["ALL"] 83 | --------------------------------------------------------------------------------