├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── Jenkinsfile
├── LICENSE
├── README.md
├── docs
├── .gitignore
├── Makefile
├── class.rst
├── cli.template
├── conf.py
├── index.rst
├── introduction.md
├── make.bat
├── perl
│ ├── Makefile
│ ├── cgi_to_db.rst
│ ├── combined_log_format_to_db.rst
│ ├── csv_to_db.rst
│ ├── db_to_csv.rst
│ ├── db_to_html_table.rst
│ ├── dbcol.rst
│ ├── dbcolcopylast.rst
│ ├── dbcolcreate.rst
│ ├── dbcoldefine.rst
│ ├── dbcolhisto.rst
│ ├── dbcolize.rst
│ ├── dbcolmerge.rst
│ ├── dbcolmovingstats.rst
│ ├── dbcolneaten.rst
│ ├── dbcolpercentile.rst
│ ├── dbcolrename.rst
│ ├── dbcolscorrelate.rst
│ ├── dbcolsplittocols.rst
│ ├── dbcolsplittorows.rst
│ ├── dbcolsregression.rst
│ ├── dbcolstats.rst
│ ├── dbcolstatscores.rst
│ ├── dbcoltype.rst
│ ├── dbfilealter.rst
│ ├── dbfilecat.rst
│ ├── dbfilediff.rst
│ ├── dbfilepivot.rst
│ ├── dbfilestripcomments.rst
│ ├── dbfilevalidate.rst
│ ├── dbformmail.rst
│ ├── dbjoin.rst
│ ├── dblistize.rst
│ ├── dbmapreduce.rst
│ ├── dbmerge.rst
│ ├── dbmerge2.rst
│ ├── dbmultistats.rst
│ ├── dbrecolize.rst
│ ├── dbrow.rst
│ ├── dbrowaccumulate.rst
│ ├── dbrowcount.rst
│ ├── dbrowdiff.rst
│ ├── dbrowenumerate.rst
│ ├── dbroweval.rst
│ ├── dbrowuniq.rst
│ ├── dbrvstatdiff.rst
│ ├── dbsort.rst
│ ├── dbstats.rst
│ ├── html_table_to_db.rst
│ ├── kitrace_to_db.rst
│ ├── ns_to_db.rst
│ ├── sqlselect_to_db.rst
│ ├── tabdelim_to_db.rst
│ ├── tcpdump_to_db.rst
│ ├── xml_to_db.rst
│ └── yaml_to_db.rst
├── perltools.rst
├── requirements.txt
└── tools
│ ├── images
│ └── myheat.png
│ ├── index.rst
│ ├── pdb2sql.md
│ ├── pdb2sql.rst
│ ├── pdb2tex.md
│ ├── pdb2tex.rst
│ ├── pdb2to1.md
│ ├── pdb2to1.rst
│ ├── pdbaddtypes.md
│ ├── pdbaddtypes.rst
│ ├── pdbaugment.md
│ ├── pdbaugment.rst
│ ├── pdbcdf.md
│ ├── pdbcdf.rst
│ ├── pdbcoluniq.md
│ ├── pdbcoluniq.rst
│ ├── pdbdatetoepoch.md
│ ├── pdbdatetoepoch.rst
│ ├── pdbensure.md
│ ├── pdbensure.rst
│ ├── pdbepochtodate.md
│ ├── pdbepochtodate.rst
│ ├── pdbfgrep.rst
│ ├── pdbformat.md
│ ├── pdbformat.rst
│ ├── pdbfullpivot.md
│ ├── pdbfullpivot.rst
│ ├── pdbheatmap.md
│ ├── pdbheatmap.rst
│ ├── pdbjinja.md
│ ├── pdbjinja.rst
│ ├── pdbkeyedsort.md
│ ├── pdbkeyedsort.rst
│ ├── pdbnormalize.md
│ ├── pdbnormalize.rst
│ ├── pdbreescape.md
│ ├── pdbreescape.rst
│ ├── pdbrow.md
│ ├── pdbrow.rst
│ ├── pdbroweval.rst
│ ├── pdbsplitter.md
│ ├── pdbsplitter.rst
│ ├── pdbsum.md
│ ├── pdbsum.rst
│ ├── pdbtopn.md
│ ├── pdbtopn.rst
│ ├── pdbzerofill.md
│ └── pdbzerofill.rst
├── pyfsdb
├── __init__.py
├── fsdb.py
├── obsolete
│ ├── __init__.py
│ ├── db2tex.py
│ ├── dbaugment.py
│ ├── dbcoluniq.py
│ ├── dbdatetoepoch.py
│ ├── dbensure.py
│ ├── dbformat.py
│ ├── dbfullpivot.py
│ ├── dbheatmap.py
│ ├── dbkeyedsort.py
│ ├── dbnormalize.py
│ ├── dbreescape.py
│ ├── dbreversepivot.py
│ ├── dbsplitter.py
│ ├── dbsum.py
│ ├── dbtopn.py
│ └── dbzerofill.py
├── tests
│ ├── noheader.fsdb
│ ├── test_add_types.py
│ ├── test_column_renames.py
│ ├── test_coluniq.py
│ ├── test_command_parsing.py
│ ├── test_comments_at_top.fsdb
│ ├── test_comments_at_top.test.fsdb
│ ├── test_fsdb_class.py
│ ├── test_json.py
│ ├── test_label_shrink.py
│ ├── test_msgpack.py
│ ├── test_pdbaugment.py
│ ├── test_pdbcdf.py
│ ├── test_pdbjinja.py
│ ├── test_pdbrow.py
│ ├── test_pdbroweval.py
│ ├── test_sql.py
│ ├── test_utf8.py
│ ├── testout.fsdb
│ ├── tests.fsdb
│ └── testscomp.fsdb.xz
└── tools
│ ├── __init__.py
│ ├── bro2fsdb.py
│ ├── fsdb2json.py
│ ├── fsdb2many.py
│ ├── json2fsdb.py
│ ├── msgpack2pdb.py
│ ├── pdb2msgpack.py
│ ├── pdb2sql.py
│ ├── pdb2tex.py
│ ├── pdb2to1.py
│ ├── pdbaddtypes.py
│ ├── pdbaugment.py
│ ├── pdbcdf.py
│ ├── pdbcoluniq.py
│ ├── pdbdatetoepoch.py
│ ├── pdbensure.py
│ ├── pdbepochtodate.py
│ ├── pdbfgrep.py
│ ├── pdbformat.py
│ ├── pdbfullpivot.py
│ ├── pdbheatmap.py
│ ├── pdbjinja.py
│ ├── pdbkeyedsort.py
│ ├── pdbnormalize.py
│ ├── pdbreescape.py
│ ├── pdbrelplot.py
│ ├── pdbreversepivot.py
│ ├── pdbroc.py
│ ├── pdbrow.py
│ ├── pdbroweval.py
│ ├── pdbsplitter.py
│ ├── pdbsum.py
│ ├── pdbtopn.py
│ └── pdbzerofill.py
└── pyproject.toml
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | /pyfsdb.egg-info/
3 | /dist/
4 | /build/
5 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v3.2.0
6 | hooks:
7 | - id: trailing-whitespace
8 | - id: end-of-file-fixer
9 | - id: check-yaml
10 | - id: check-added-large-files
11 | - repo: https://github.com/psf/black
12 | rev: 22.3.0
13 | hooks:
14 | - id: black
15 | - repo: https://github.com/astral-sh/ruff-pre-commit
16 | # Ruff version.
17 | rev: v0.0.290
18 | hooks:
19 | - id: ruff
20 | - repo: local
21 | hooks:
22 | - id: pytest
23 | name: pytest
24 | entry: bash -c "PYTHONPATH=. pytest-3"
25 | language: system
26 | pass_filenames: false
27 | always_run: true
28 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sphinx:
4 | builder: html
5 | configuration: docs/conf.py
6 | fail_on_warning: false
7 | python:
8 | install:
9 | - method: pip
10 | path: .
11 | - requirements: docs/requirements.txt
12 | build:
13 | os: "ubuntu-22.04"
14 | tools:
15 | python: "3.11"
16 |
--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
1 | pipeline {
2 | agent {
3 | docker {
4 | image 'docker.io/python:3'
5 | }
6 | }
7 | stages {
8 | stage('Preparation') {
9 | steps {
10 | withEnv(["HOME=${env.WORKSPACE}"]) {
11 | sh 'pip install --user pandas'
12 | }
13 | }
14 | }
15 | stage ('Build') {
16 | steps {
17 | sh 'python3 setup.py build'
18 | }
19 | }
20 | stage ('Test') {
21 | steps {
22 | withEnv(["HOME=${env.WORKSPACE}"]) {
23 | sh 'python3 setup.py test'
24 | }
25 | }
26 | }
27 | }
28 | post {
29 | failure {
30 | emailext(
31 | subject: "FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
32 | body: """
FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]':
33 | Check console output at "${env.JOB_NAME} [${env.BUILD_NUMBER}]"
""",
34 | recipientProviders: [[$class: 'DevelopersRecipientProvider']]
35 | )
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019-2025 University of Southern California, Information Sciences Institute
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | SUBMODULES = $(patsubst %.md,%.rst,$(wildcard tools/*.md))
12 |
13 | # build rest files from markdown
14 | %.rst: %.md cli.template Makefile
15 | pandoc -i $< -o $@ > $@
16 | TOOL=$$(echo $$(basename $@) | sed 's/.rst//') ; \
17 | cat cli.template | perl -p -e "s/TOOL/$$TOOL/g" >> $@
18 |
19 | .PHONY: help Makefile submodules
20 |
21 | submodules: $(SUBMODULES)
22 | echo $(SUBMODULES)
23 | echo $(wildcard tools/*.md)
24 |
25 | # Put it first so that "make" without argument is like "make help".
26 | help:
27 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
28 |
29 | # Catch-all target: route all unknown targets to Sphinx using the new
30 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
31 | html: Makefile submodules
32 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
33 |
34 | latex: Makefile
35 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
36 |
37 | latexpdf: Makefile
38 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
39 |
40 | clean: Makefile
41 | @PYTHONPATH=.. $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
42 |
--------------------------------------------------------------------------------
/docs/class.rst:
--------------------------------------------------------------------------------
1 | FSDB Class Description
2 | ======================
3 |
4 | .. currentmodule: pyfsdb
5 | .. autoclass:: pyfsdb.Fsdb
6 | :members:
7 |
--------------------------------------------------------------------------------
/docs/cli.template:
--------------------------------------------------------------------------------
1 |
2 |
3 | Command Line Arguments
4 | ^^^^^^^^^^^^^^^^^^^^^^
5 |
6 | .. sphinx_argparse_cli::
7 | :module: pyfsdb.tools.TOOL
8 | :func: parse_args
9 | :hook:
10 | :prog: TOOL
11 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = "PyFSDB"
21 | copyright = "2020-2023, Wes Hardaker"
22 | author = "Wes Hardaker"
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = "2.4.3"
26 |
27 | # -- General configuration ---------------------------------------------------
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 | "myst_parser",
34 | # "sphinxarg.ext",
35 | "sphinx_argparse_cli",
36 | "sphinx.ext.autodoc",
37 | ]
38 |
39 | # Add any paths that contain templates here, relative to this directory.
40 | templates_path = ["_templates"]
41 |
42 | # List of patterns, relative to source directory, that match files and
43 | # directories to ignore when looking for source files.
44 | # This pattern also affects html_static_path and html_extra_path.
45 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
46 |
47 |
48 | # -- Options for HTML output -------------------------------------------------
49 |
50 | # The theme to use for HTML and HTML Help pages. See the documentation for
51 | # a list of builtin themes.
52 | #
53 | html_theme = "alabaster"
54 |
55 | # Add any paths that contain custom static files (such as style sheets) here,
56 | # relative to this directory. They are copied after the builtin static files,
57 | # so a file named "default.css" will overwrite the builtin "default.css".
58 | html_static_path = ["_static"]
59 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | FSDB - File System Data Base
2 | ==================================
3 |
4 | .. toctree::
5 | :maxdepth: 3
6 | :caption: Contents:
7 |
8 | introduction
9 | perlover
10 | tools/index
11 | perltools
12 | class
13 |
14 | Indices and tables
15 | ==================
16 |
17 | * :ref:`genindex`
18 | * :ref:`modindex`
19 | * :ref:`search`
20 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.https://www.sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/perl/Makefile:
--------------------------------------------------------------------------------
1 | VERSION=3.1
2 | SRC=~/src/Fsdb-$(VERSION)
3 |
4 | BUILD=buildperl scripts perlindex perloverview
5 |
6 | all: $(BUILD)
7 |
8 | .PHONY: $(BUILD)
9 |
10 | buildperl:
11 | cd $(SRC) ; \
12 | perl Makefile.PL ; \
13 | make
14 |
15 | scripts:
16 | outdir=`pwd` ; \
17 | cd $(SRC)/blib/script ; \
18 | for i in * ; do \
19 | pod2man $$i | \
20 | pandoc -f man -i - -t rst -o - | \
21 | perl -n -e 'if (/^NAME$$/) { $$_ = <>; $$_ = <>; $$_ = <>; print("$$_" . "=" x 70 . "\n\n"); $$printit = 1; $$_ = <>; $$_ = <>; } if (/^SYNOPSIS$$/) { print("*NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version $(VERSION)*\n\n")} ; if ("$$printit" eq "1") { s/^(==+)/"-" x length($$1)/e; print; }' \
22 | > $$outdir/$$(basename $$i).rst ; \
23 | done
24 |
25 | perlindex:
26 | outfile=../perltools.rst ; \
27 | echo -e "Perl FSDB Tools\n===============\n\n.. toctree::\n :maxdepth: 1\n :caption: FSDB Perl Scripts:\n" > $$outfile ; \
28 | for i in *.rst ; do \
29 | echo " perl/$$i" >> $$outfile ; \
30 | done
31 |
32 | perloverview:
33 | pod2man $(SRC)/lib/Fsdb.pm | \
34 | pandoc -f man -i - -t rst -o - | \
35 | perl -n -e 'if (/^NAME$$/) { $$_ = <>; $$_ = <>; $$_ = <>; print("$$_" . "=" x 70 . "\n\n"); $$printit = 1; $$_ = <>; $$_ = <>; } if (/^SYNOPSIS$$/) { print("*NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version $(VERSION)*\n\n")} ; if ("$$printit" eq "1") { s/^(==+)/"-" x length($$1)/e; print; }' \
36 | > ../perloverview.rst
37 |
--------------------------------------------------------------------------------
/docs/perl/csv_to_db.rst:
--------------------------------------------------------------------------------
1 | csv_to_db - convert comma-separated-value data into fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | csv_to_db
86 |
87 | This program is distributed under terms of the GNU general public
88 | license, version 2. See the file COPYING with the distribution for
89 | details.
90 |
--------------------------------------------------------------------------------
/docs/perl/db_to_csv.rst:
--------------------------------------------------------------------------------
1 | db_to_csv - convert fsdb to the comma-separated-value file-format
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | db_to_csv [-C]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Covert an existing fsdb file to comma-separated value format.
15 |
16 | Input is fsdb format.
17 |
18 | Output is CSV-format plain text (*not* fsdb).
19 |
20 | OPTIONS
21 | -------
22 |
23 | -C or <--omit-comments>
24 | Also strip all comments.
25 |
26 | This module also supports the standard fsdb options:
27 |
28 | -d
29 | Enable debugging output.
30 |
31 | -i or --input InputSource
32 | Read from InputSource, typically a file name, or ``-`` for standard
33 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
34 | objects.
35 |
36 | -o or --output OutputDestination
37 | Write to OutputDestination, typically a file name, or ``-`` for
38 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
39 | Fsdb::BoundedQueue objects.
40 |
41 | --autorun or --noautorun
42 | By default, programs process automatically, but Fsdb::Filter objects
43 | in Perl do not run until you invoke the **run()** method. The
44 | ``--(no)autorun`` option controls that behavior within Perl.
45 |
46 | --help
47 | Show help.
48 |
49 | --man
50 | Show full manual.
51 |
52 | SAMPLE USAGE
53 | ------------
54 |
55 | Input:
56 | ------
57 |
58 | #fsdb -F S paper papertitle reviewer reviewername score1 score2 score3
59 | score4 score5 1 test, paper 2 Smith 4 4 - - - 2 other paper 3 Jones 3 3
60 | - - - 2 input double space 3 Jones 3 3 - - - # \| csv_to_db
61 |
62 | Command:
63 | --------
64 |
65 | cat data.fsdb \| db_to_csv
66 |
67 | Output:
68 | -------
69 |
70 | paper,papertitle,reviewer,reviewername,score1,score2,score3,score4,score5
71 | 1,"test, paper",2,Smith,4,4,-,-,- 2,"other paper",3,Jones,3,3,-,-,-
72 | 2,"input double space",3,Jones,3,3,-,-,- # \| csv_to_db # \| db_to_csv
73 |
74 | SEE ALSO
75 | --------
76 |
77 | Fsdb. dbfilealter. csv_to_db
78 |
79 | AUTHOR and COPYRIGHT
80 | --------------------
81 |
82 | Copyright (C) 2007-2018 by John Heidemann
83 |
84 | This program is distributed under terms of the GNU general public
85 | license, version 2. See the file COPYING with the distribution for
86 | details.
87 |
--------------------------------------------------------------------------------
/docs/perl/db_to_html_table.rst:
--------------------------------------------------------------------------------
1 | db_to_html_table - convert db to an HTML table
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | db_to_html_table [-g N] dest.html
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Covert an existing dbtable to an HTML table. The output is a fragment of
15 | an HTML page; we assume the user fills in the rest (head and body,
16 | etc.).
17 |
18 | Input is fsdb format.
19 |
20 | Output is HTML code (*not* fsdb), with HTML-specific characters (less
21 | than, greater than, ampersand) are escaped. (The fsdb-1.x version
22 | assumed input was ISO-8859-1; we now assume both input and output are
23 | unicode. This change is considered a feature of the 21st century.)
24 |
25 | OPTIONS
26 | -------
27 |
28 | -g N or <--group-count N>
29 | Color groups of *N* consecutive rows with one background color.
30 |
31 | This module also supports the standard fsdb options:
32 |
33 | -d
34 | Enable debugging output.
35 |
36 | -i or --input InputSource
37 | Read from InputSource, typically a file name, or ``-`` for standard
38 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
39 | objects.
40 |
41 | -o or --output OutputDestination
42 | Write to OutputDestination, typically a file name, or ``-`` for
43 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
44 | Fsdb::BoundedQueue objects.
45 |
46 | --autorun or --noautorun
47 | By default, programs process automatically, but Fsdb::Filter objects
48 | in Perl do not run until you invoke the **run()** method. The
49 | ``--(no)autorun`` option controls that behavior within Perl.
50 |
51 | --help
52 | Show help.
53 |
54 | --man
55 | Show full manual.
56 |
57 | SAMPLE USAGE
58 | ------------
59 |
60 | Input:
61 | ------
62 |
63 | #fsdb -F S account passwd uid gid fullname homedir shell johnh \* 2274
64 | 134 John & Ampersand /home/johnh /bin/bash greg \* 2275 134 Greg <
65 | Lessthan /home/greg /bin/bash root \* 0 0 Root ; Semi /root /bin/bash
66 | four \* 1 1 Fourth Row /home/four /bin/bash
67 |
68 | Command:
69 | --------
70 |
71 | cat data.fsdb \| db_to_csv -g 3
72 |
73 | Output:
74 | -------
75 |
76 | account | passwd | uid | gid |
77 | fullname | homedir | shell |
johnh | * | 2274 | 134 |
79 | John & Ampersand | /home/johnh | /bin/bash |
80 |
greg | * | 2275 |
81 | 134 | Greg < Lessthan | /home/greg |
82 | /bin/bash |
root | * |
83 | 0 | 0 | Root ; Semi | /root |
84 | /bin/bash |
four | * |
85 | 1 | 1 | Fourth Row | /home/four |
86 | /bin/bash |
87 |
88 | SEE ALSO
89 | --------
90 |
91 | Fsdb. dbcolneaten. dbfileadjust. html_table_to_db.
92 |
93 | AUTHOR and COPYRIGHT
94 | --------------------
95 |
96 | Copyright (C) 2007-2015 by John Heidemann
97 |
98 | This program is distributed under terms of the GNU general public
99 | license, version 2. See the file COPYING with the distribution for
100 | details.
101 |
--------------------------------------------------------------------------------
/docs/perl/dbcol.rst:
--------------------------------------------------------------------------------
1 | dbcol - select columns from an Fsdb file
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcol [-v] [-e -] [column...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Select one or more columns from the input database. If a value is given
15 | for empty columns with the -e option, then any named columns which don't
16 | exist will be created. Otherwise, non-existent columns are an error.
17 |
18 | Note: a safer way to create columns is dbcolcreate.
19 |
20 | OPTIONS
21 | -------
22 |
23 | -r or --relaxed-errors
24 | Relaxed error checking: ignore columns that aren't there.
25 |
26 | -v or --invert-match
27 | Output all columns except those listed (like grep -v).
28 |
29 | -a or --all
30 | Output all columns, in addition to those listed. (Thus ``-a foo``
31 | will move column foo to the first column.)
32 |
33 | -e EmptyValue or --empty
34 | Specify the value newly created columns get.
35 |
36 | --saveoutput $OUT_REF
37 | Save output writer (for integration with other fsdb filters).
38 |
39 | and the standard fsdb options:
40 |
41 | -d
42 | Enable debugging output.
43 |
44 | -i or --input InputSource
45 | Read from InputSource, typically a file, or - for standard input, or
46 | (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
47 |
48 | -o or --output OutputDestination
49 | Write to OutputDestination, typically a file, or - for standard
50 | output, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
51 | objects.
52 |
53 | --autorun or --noautorun
54 | By default, programs process automatically, but Fsdb::Filter objects
55 | in Perl do not run until you invoke the **run()** method. The
56 | ``--(no)autorun`` option controls that behavior within Perl.
57 |
58 | --header H
59 | Use H as the full Fsdb header, rather than reading a header from then
60 | input.
61 |
62 | --help
63 | Show help.
64 |
65 | --man
66 | Show full manual.
67 |
68 | SAMPLE USAGE
69 | ------------
70 |
71 | Input:
72 | ------
73 |
74 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134
75 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
76 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
77 | database
78 |
79 | Command:
80 | --------
81 |
82 | cat DATA/passwd.fsdb account \| dbcol account
83 |
84 | Output:
85 | -------
86 |
87 | #fsdb account johnh greg root # this is a simple database # \| dbcol
88 | account
89 |
90 | SEE ALSO
91 | --------
92 |
93 | **dbcolcreate** (1), **Fsdb** (3)
94 |
95 | AUTHOR and COPYRIGHT
96 | --------------------
97 |
98 | Copyright (C) 1991-2022 by John Heidemann
99 |
100 | This program is distributed under terms of the GNU general public
101 | license, version 2. See the file COPYING with the distribution for
102 | details.
103 |
--------------------------------------------------------------------------------
/docs/perl/dbcolcopylast.rst:
--------------------------------------------------------------------------------
1 | dbcolcopylast - create new columns that are copies of prior columns
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolcopylast [-e EMPTY] [column...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | For each COLUMN, create a new column copylast_COLUMN that is the last
15 | value for that column---that is, the value of that column from the row
16 | before.
17 |
18 | OPTIONS
19 | -------
20 |
21 | -e EmptyValue or --empty
22 | Specify the value newly created columns get.
23 |
24 | This module also supports the standard fsdb options:
25 |
26 | -d
27 | Enable debugging output.
28 |
29 | -i or --input InputSource
30 | Read from InputSource, typically a file name, or ``-`` for standard
31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
32 | objects.
33 |
34 | -o or --output OutputDestination
35 | Write to OutputDestination, typically a file name, or ``-`` for
36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
37 | Fsdb::BoundedQueue objects.
38 |
39 | --autorun or --noautorun
40 | By default, programs process automatically, but Fsdb::Filter objects
41 | in Perl do not run until you invoke the **run()** method. The
42 | ``--(no)autorun`` option controls that behavior within Perl.
43 |
44 | --help
45 | Show help.
46 |
47 | --man
48 | Show full manual.
49 |
50 | SAMPLE USAGE
51 | ------------
52 |
53 | Input:
54 | ------
55 |
56 | #fsdb test a b
57 |
58 | Command:
59 | --------
60 |
61 | cat data.fsdb \| dbcolcopylast foo
62 |
63 | Output:
64 | -------
65 |
66 | #fsdb test foo a - b -
67 |
68 | SEE ALSO
69 | --------
70 |
71 | Fsdb.
72 |
73 | AUTHOR and COPYRIGHT
74 | --------------------
75 |
76 | Copyright (C) 1991-2022 by John Heidemann
77 |
78 | This program is distributed under terms of the GNU general public
79 | license, version 2. See the file COPYING with the distribution for
80 | details.
81 |
--------------------------------------------------------------------------------
/docs/perl/dbcolcreate.rst:
--------------------------------------------------------------------------------
1 | dbcolcreate - create new columns
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolcreate NewColumn1 [NewColumn2]
10 |
11 | or
12 |
13 | dbcolcreate -e DefaultValue NewColumnWithDefault
14 |
15 | DESCRIPTION
16 | -----------
17 |
18 | Create columns ``NewColumn1``, etc. with an optional ``DefaultValue``.
19 |
20 | OPTIONS
21 | -------
22 |
23 | -e EmptyValue or --empty
24 | Specify the value newly created columns get.
25 |
26 | -f or --first
27 | Put all new columns as the first columns of each row. By default,
28 | they go at the end of each row.
29 |
30 | --no-recreate-fatal
31 | By default, creating an existing column is an error. With
32 | **--no-recreate-fatal**, we ignore re-creation.
33 |
34 | This module also supports the standard fsdb options:
35 |
36 | -d
37 | Enable debugging output.
38 |
39 | -i or --input InputSource
40 | Read from InputSource, typically a file name, or ``-`` for standard
41 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
42 | objects.
43 |
44 | -o or --output OutputDestination
45 | Write to OutputDestination, typically a file name, or ``-`` for
46 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
47 | Fsdb::BoundedQueue objects.
48 |
49 | --autorun or --noautorun
50 | By default, programs process automatically, but Fsdb::Filter objects
51 | in Perl do not run until you invoke the **run()** method. The
52 | ``--(no)autorun`` option controls that behavior within Perl.
53 |
54 | --header H
55 | Use H as the full Fsdb header, rather than reading a header from then
56 | input.
57 |
58 | --help
59 | Show help.
60 |
61 | --man
62 | Show full manual.
63 |
64 | SAMPLE USAGE
65 | ------------
66 |
67 | Input:
68 | ------
69 |
70 | #fsdb test a b
71 |
72 | Command:
73 | --------
74 |
75 | cat data.fsdb \| dbcolcreate foo
76 |
77 | Output:
78 | -------
79 |
80 | #fsdb test foo a - b -
81 |
82 | SEE ALSO
83 | --------
84 |
85 | Fsdb.
86 |
87 | AUTHOR and COPYRIGHT
88 | --------------------
89 |
90 | Copyright (C) 1991-2022 by John Heidemann
91 |
92 | This program is distributed under terms of the GNU general public
93 | license, version 2. See the file COPYING with the distribution for
94 | details.
95 |
--------------------------------------------------------------------------------
/docs/perl/dbcoldefine.rst:
--------------------------------------------------------------------------------
1 | dbcoldefine - define the columns of a plain text file to make it an Fsdb
2 | ======================================================================
3 |
4 |
5 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
6 |
7 | SYNOPSIS
8 | --------
9 |
10 | dbcoldefine [-F x] [column...]
11 |
12 | DESCRIPTION
13 | -----------
14 |
15 | This program writes a new header before the data with the specified
16 | column names. It does *not* do any validation of the data contents; it
17 | is up to the user to verify that, other than the header, the input
18 | datastream is a correctly formatted Fsdb file.
19 |
20 | OPTIONS
21 | -------
22 |
23 | -F or --fs or --fieldseparator s
24 | Specify the field separator.
25 |
26 | --header H
27 | Give the columns and field separator as a full Fsdb header (including
28 | ``#fsdb``). Can only be used alone, not with other specifications.
29 |
30 | This module also supports the standard fsdb options:
31 |
32 | -d
33 | Enable debugging output.
34 |
35 | -i or --input InputSource
36 | Read from InputSource, typically a file name, or ``-`` for standard
37 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
38 | objects.
39 |
40 | -o or --output OutputDestination
41 | Write to OutputDestination, typically a file name, or ``-`` for
42 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
43 | Fsdb::BoundedQueue objects.
44 |
45 | --autorun or --noautorun
46 | By default, programs process automatically, but Fsdb::Filter objects
47 | in Perl do not run until you invoke the **run()** method. The
48 | ``--(no)autorun`` option controls that behavior within Perl.
49 |
50 | --help
51 | Show help.
52 |
53 | --man
54 | Show full manual.
55 |
56 | SAMPLE USAGE
57 | ------------
58 |
59 | Input:
60 | ------
61 |
62 | 102400 4937974.964736 102400 4585247.875904 102400 5098141.207123
63 |
64 | Command:
65 | --------
66 |
67 | cat DATA/http_bandwidth \| dbcoldefine size bw
68 |
69 | Output:
70 | -------
71 |
72 | #fsdb size bw 102400 4937974.964736 102400 4585247.875904 102400
73 | 5098141.207123 # \| dbcoldefine size bw
74 |
75 | SEE ALSO
76 | --------
77 |
78 | Fsdb. dbfilestripcomments
79 |
80 | AUTHOR and COPYRIGHT
81 | --------------------
82 |
83 | Copyright (C) 1991-2016 by John Heidemann
84 |
85 | This program is distributed under terms of the GNU general public
86 | license, version 2. See the file COPYING with the distribution for
87 | details.
88 |
--------------------------------------------------------------------------------
/docs/perl/dbcolize.rst:
--------------------------------------------------------------------------------
1 | dbcolize - DEPRECATED, now use dbfilealter
2 | ======================================================================
3 |
--------------------------------------------------------------------------------
/docs/perl/dbcolmerge.rst:
--------------------------------------------------------------------------------
1 | dbcolmerge - merge multiple columns into one
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolmerge [-C ElementSeparator] [columns...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | For each row, merge multiple columns down to a single column (always a
15 | string), joining elements with ElementSeparator (defaults to a single
16 | underscore).
17 |
18 | OPTIONS
19 | -------
20 |
21 | -C S or --element-separator S
22 | Specify the separator used to join columns. (Defaults to a single
23 | underscore.)
24 |
25 | -e E or --empty E
26 | give value E as the value for empty (null) records
27 |
28 | This module also supports the standard fsdb options:
29 |
30 | -d
31 | Enable debugging output.
32 |
33 | -i or --input InputSource
34 | Read from InputSource, typically a file name, or ``-`` for standard
35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
36 | objects.
37 |
38 | -o or --output OutputDestination
39 | Write to OutputDestination, typically a file name, or ``-`` for
40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
41 | Fsdb::BoundedQueue objects.
42 |
43 | --autorun or --noautorun
44 | By default, programs process automatically, but Fsdb::Filter objects
45 | in Perl do not run until you invoke the **run()** method. The
46 | ``--(no)autorun`` option controls that behavior within Perl.
47 |
48 | --header H
49 | Use H as the full Fsdb header, rather than reading a header from then
50 | input.
51 |
52 | --help
53 | Show help.
54 |
55 | --man
56 | Show full manual.
57 |
58 | SAMPLE USAGE
59 | ------------
60 |
61 | Input:
62 | ------
63 |
64 | #fsdb first last John Heidemann Greg Johnson Root - # this is a simple
65 | database # \| /home/johnh/BIN/DB/dbcol fullname # \| dbcolrename
66 | fullname first_last # \| /home/johnh/BIN/DB/dbcolsplit -C \_ first_last
67 | # \| /home/johnh/BIN/DB/dbcol first last
68 |
69 | Command:
70 | --------
71 |
72 | cat data.fsdb \| dbcolmerge -C \_ first last
73 |
74 | Output:
75 | -------
76 |
77 | #fsdb first last first_last John Heidemann John_Heidemann Greg Johnson
78 | Greg_Johnson Root - Root\_ # this is a simple database # \|
79 | /home/johnh/BIN/DB/dbcol fullname # \| dbcolrename fullname first_last #
80 | \| /home/johnh/BIN/DB/dbcolsplit first_last # \|
81 | /home/johnh/BIN/DB/dbcol first last # \| /home/johnh/BIN/DB/dbcolmerge
82 | -C \_ first last
83 |
84 | SEE ALSO
85 | --------
86 |
87 | Fsdb. dbcolsplittocols. dbcolsplittorows. dbcolrename.
88 |
89 | AUTHOR and COPYRIGHT
90 | --------------------
91 |
92 | Copyright (C) 1991-2022 by John Heidemann
93 |
94 | This program is distributed under terms of the GNU general public
95 | license, version 2. See the file COPYING with the distribution for
96 | details.
97 |
--------------------------------------------------------------------------------
/docs/perl/dbcolneaten.rst:
--------------------------------------------------------------------------------
1 | dbcolneaten - pretty-print columns of Fsdb data (assuming a monospaced
2 | ======================================================================
3 |
4 |
5 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
6 |
7 | SYNOPSIS
8 | --------
9 |
10 | dbcolneaten [-E] [field_settings]
11 |
12 | DESCRIPTION
13 | -----------
14 |
15 | dbcolneaten arranges that the Fsdb data appears in neat columns if you
16 | view it with a monospaced font. To do this, it pads out each field with
17 | spaces to line up the next field.
18 |
19 | Field settings are of the form
20 |
21 | field op value
22 |
23 | OP is >=, =, or <= specifying that the width of that FIELD must be more,
24 | equal, or less than that VALUE
25 |
26 | dbcolneaten runs in O(1) memory but disk space proportional to the size
27 | of data.
28 |
29 | OPTIONS
30 | -------
31 |
32 | -E or --noeoln
33 | Omit padding for the last column (at the end-of-the-line). (Default
34 | behavior.)
35 |
36 | -e or --eoln
37 | Do padding and include an extra field separator after the last
38 | column. (Useful if you're interactively adding a column.)
39 |
40 | This module also supports the standard fsdb options:
41 |
42 | -d
43 | Enable debugging output.
44 |
45 | -i or --input InputSource
46 | Read from InputSource, typically a file name, or ``-`` for standard
47 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
48 | objects.
49 |
50 | -o or --output OutputDestination
51 | Write to OutputDestination, typically a file name, or ``-`` for
52 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
53 | Fsdb::BoundedQueue objects.
54 |
55 | --autorun or --noautorun
56 | By default, programs process automatically, but Fsdb::Filter objects
57 | in Perl do not run until you invoke the **run()** method. The
58 | ``--(no)autorun`` option controls that behavior within Perl.
59 |
60 | --help
61 | Show help.
62 |
63 | --man
64 | Show full manual.
65 |
66 | SAMPLE USAGE
67 | ------------
68 |
69 | Input:
70 | ------
71 |
72 | #fsdb fullname homedir uid gid Mr._John_Heidemann_Junior /home/johnh
73 | 2274 134 Greg_Johnson /home/greg 2275 134 Root /root 0 0 # this is a
74 | simple database # \| dbcol fullname homedir uid gid
75 |
76 | Command:
77 | --------
78 |
79 | dbcolneaten
80 |
81 | Output:
82 | -------
83 |
84 | #fsdb -F s fullname homedir uid gid Mr._John_Heidemann_Junior
85 | /home/johnh 2274 134 Greg_Johnson /home/greg 2275 134 Root /root 0 0 #
86 | this is a simple database # \| dbcol fullname homedir uid gid # \|
87 | dbcolneaten
88 |
89 | BUGS
90 | ----
91 |
92 | Does not handle tab separators correctly.
93 |
94 | SEE ALSO
95 | --------
96 |
97 | Fsdb.
98 |
99 | AUTHOR and COPYRIGHT
100 | --------------------
101 |
102 | Copyright (C) 1991-2018 by John Heidemann
103 |
104 | This program is distributed under terms of the GNU general public
105 | license, version 2. See the file COPYING with the distribution for
106 | details.
107 |
--------------------------------------------------------------------------------
/docs/perl/dbcolrename.rst:
--------------------------------------------------------------------------------
1 | dbcolrename - change the names of columns in a fsdb schema
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolrename OldName1 NewName1 [OldName2 NewName2] ...
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Dbcolrename changes the names of columns in a fsdb schema, mapping
15 | OldName1 to NewName1, and so on for multiple pairs of column names.
16 |
17 | Note that it is valid to do overlapping renames like
18 | ``dbcolrename a b b a``.
19 |
20 | OPTIONS
21 | -------
22 |
23 | No non-standard options.
24 |
25 | This module also supports the standard fsdb options:
26 |
27 | -d
28 | Enable debugging output.
29 |
30 | -i or --input InputSource
31 | Read from InputSource, typically a file name, or ``-`` for standard
32 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
33 | objects.
34 |
35 | -o or --output OutputDestination
36 | Write to OutputDestination, typically a file name, or ``-`` for
37 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
38 | Fsdb::BoundedQueue objects.
39 |
40 | --autorun or --noautorun
41 | By default, programs process automatically, but Fsdb::Filter objects
42 | in Perl do not run until you invoke the **run()** method. The
43 | ``--(no)autorun`` option controls that behavior within Perl.
44 |
45 | --help
46 | Show help.
47 |
48 | --man
49 | Show full manual.
50 |
51 | SAMPLE USAGE
52 | ------------
53 |
54 | Input:
55 | ------
56 |
57 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134
58 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
59 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
60 | database
61 |
62 | Command:
63 | --------
64 |
65 | cat DATA/passwd.fsdb \| dbcolrename fullname first_last
66 |
67 | Output:
68 | -------
69 |
70 | #fsdb account passwd uid gid first_last homedir shell johnh \* 2274 134
71 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
72 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
73 | database # \| dbcolrename fullname first_last
74 |
75 | SEE ALSO
76 | --------
77 |
78 | Fsdb.
79 |
80 | AUTHOR and COPYRIGHT
81 | --------------------
82 |
83 | Copyright (C) 1991-2022 by John Heidemann
84 |
85 | This program is distributed under terms of the GNU general public
86 | license, version 2. See the file COPYING with the distribution for
87 | details.
88 |
--------------------------------------------------------------------------------
/docs/perl/dbcolscorrelate.rst:
--------------------------------------------------------------------------------
1 | dbcolscorrelate - find the coefficient of correlation over columns
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolscorrelate column1 column2 [column3...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Compute the coefficient of correlation over two (or more) columns.
15 |
16 | The output is one line of correlations.
17 |
18 | With exactly two columns, a new column *correlation* is created.
19 |
20 | With more than two columns, correlations are computed for each pairwise
21 | combination of rows, and each output column is given a name which is the
22 | concatenation of the two source rows, joined with an underscore.
23 |
24 | By default, we compute the *population correlation coefficient* (usually
25 | designed rho, X) and assume we see all members of the population. With
26 | the **--sample** option we instead compute the *sample correlation
27 | coefficient*, usually designated *r*. (Be careful in that the default
28 | here to full-population is the *opposite* of the default in dbcolstats.)
29 |
30 | This program requires a complete copy of the input data on disk.
31 |
32 | OPTIONS
33 | -------
34 |
35 | --sample
36 | Select a the Pearson product-moment correlation coefficient (the
37 | sample correlation coefficient, usually designated *r*).
38 |
39 | --nosample
40 | Select a the Pearson product-moment correlation coefficient (the
41 | sample correlation coefficient, usually designated *r*).
42 |
43 | -f FORMAT or --format FORMAT
44 | Specify a **printf** (3)-style format for output statistics. Defaults
45 | to ``%.5g``.
46 |
47 | -T TmpDir
48 | where to put tmp files. Also uses environment variable TMPDIR, if -T
49 | is not specified. Default is /tmp.
50 |
51 | This module also supports the standard fsdb options:
52 |
53 | -d
54 | Enable debugging output.
55 |
56 | -i or --input InputSource
57 | Read from InputSource, typically a file name, or ``-`` for standard
58 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
59 | objects.
60 |
61 | -o or --output OutputDestination
62 | Write to OutputDestination, typically a file name, or ``-`` for
63 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
64 | Fsdb::BoundedQueue objects.
65 |
66 | --autorun or --noautorun
67 | By default, programs process automatically, but Fsdb::Filter objects
68 | in Perl do not run until you invoke the **run()** method. The
69 | ``--(no)autorun`` option controls that behavior within Perl.
70 |
71 | --help
72 | Show help.
73 |
74 | --man
75 | Show full manual.
76 |
77 | SAMPLE USAGE
78 | ------------
79 |
80 | Input:
81 | ------
82 |
83 | #fsdb name id test1 test2 a 1 80 81 b 2 70 71 c 3 65 66 d 4 90 91 e 5 70
84 | 71 f 6 90 91
85 |
86 | Command:
87 | --------
88 |
89 | cat DATA/more_grades.fsdb \| dbcolscorrelate test1 test2
90 |
91 | Output:
92 | -------
93 |
94 | #fsdb correlation:d 0.83329 # \| dbcolscorrelate test1 test2
95 |
96 | SEE ALSO
97 | --------
98 |
99 | Fsdb, dbcolstatscores, dbcolsregression, dbrvstatdiff.
100 |
101 | AUTHOR and COPYRIGHT
102 | --------------------
103 |
104 | Copyright (C) 1998-2022 by John Heidemann
105 |
106 | This program is distributed under terms of the GNU general public
107 | license, version 2. See the file COPYING with the distribution for
108 | details.
109 |
--------------------------------------------------------------------------------
/docs/perl/dbcolsplittorows.rst:
--------------------------------------------------------------------------------
1 | dbcolsplittorows - split an existing column into multiple new rows
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolsplittorows [-C ElementSeperator] [-e null] [-E] [-N
10 | enumerated-name] column [column...]
11 |
12 | DESCRIPTION
13 | -----------
14 |
15 | Split column into pieces, outputting one row for each piece.
16 |
17 | By default, any empty fields are ignored. If an empty field value is
18 | given with -e, then they produce output.
19 |
20 | When a null value is given, empty fields at the beginning and end of
21 | lines are suppressed (like perl split). Unlike perl, if ALL fields are
22 | empty, we generate one (and not zero) empty fields.
23 |
24 | The inverse of this commend is dbfilepivot.
25 |
26 | OPTIONS
27 | -------
28 |
29 | -C S or --element-separator S
30 | Specify the separator used to split columns. (Defaults to a single
31 | underscore.)
32 |
33 | -E or --enumerate
34 | Enumerate output columns: rather than assuming the column name uses
35 | the element separator, we keep it whole and fill in with indexes
36 | starting from 0.
37 |
38 | -N or --new-name N
39 | Name the new column N for enumeration. Defaults to ``index``.
40 |
41 | -e E or --empty E
42 | give value E as the value for empty (null) records
43 |
44 | This module also supports the standard fsdb options:
45 |
46 | -d
47 | Enable debugging output.
48 |
49 | -i or --input InputSource
50 | Read from InputSource, typically a file name, or ``-`` for standard
51 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
52 | objects.
53 |
54 | -o or --output OutputDestination
55 | Write to OutputDestination, typically a file name, or ``-`` for
56 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
57 | Fsdb::BoundedQueue objects.
58 |
59 | --autorun or --noautorun
60 | By default, programs process automatically, but Fsdb::Filter objects
61 | in Perl do not run until you invoke the **run()** method. The
62 | ``--(no)autorun`` option controls that behavior within Perl.
63 |
64 | --help
65 | Show help.
66 |
67 | --man
68 | Show full manual.
69 |
70 | SAMPLE USAGE
71 | ------------
72 |
73 | Input:
74 | ------
75 |
76 | #fsdb name uid John_Heidemann 2274 Greg_Johnson 2275 Root 0 # this is a
77 | simple database # \| dbcol fullname uid # \| dbcolrename fullname name
78 |
79 | Command:
80 | --------
81 |
82 | cat data.fsdb \| dbcolsplittorows name
83 |
84 | Output:
85 | -------
86 |
87 | #fsdb name uid John 2274 Heidemann 2274 Greg 2275 Johnson 2275 Root 0 #
88 | this is a simple database # \| dbcol fullname uid # \| dbcolrename
89 | fullname name # \| dbcolsplittorows name
90 |
91 | SEE ALSO
92 | --------
93 |
94 | **Fsdb** (1). **dbcolmerge** (1). **dbcolsplittocols** (1).
95 | **dbcolrename** (1). **dbfilepvot** (1).
96 |
97 | AUTHOR and COPYRIGHT
98 | --------------------
99 |
100 | Copyright (C) 1991-2018 by John Heidemann
101 |
102 | This program is distributed under terms of the GNU general public
103 | license, version 2. See the file COPYING with the distribution for
104 | details.
105 |
--------------------------------------------------------------------------------
/docs/perl/dbcolsregression.rst:
--------------------------------------------------------------------------------
1 | dbcolsregression - compute linear regression between two columns
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcolsregression [-a] column1 column2
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Compute linear regression over ``column1`` and ``column2``. Outputs
15 | slope, intercept, and correlation coefficient.
16 |
17 | OPTIONS
18 | -------
19 |
20 | -a or --include-non-numeric
21 | Compute stats over all records (treat non-numeric records as zero
22 | rather than just ignoring them).
23 |
24 | -f FORMAT or --format FORMAT
25 | Specify a **printf** (3)-style format for output statistics. Defaults
26 | to ``%.5g``.
27 |
28 | This module also supports the standard fsdb options:
29 |
30 | -d
31 | Enable debugging output.
32 |
33 | -i or --input InputSource
34 | Read from InputSource, typically a file name, or ``-`` for standard
35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
36 | objects.
37 |
38 | -o or --output OutputDestination
39 | Write to OutputDestination, typically a file name, or ``-`` for
40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
41 | Fsdb::BoundedQueue objects.
42 |
43 | --autorun or --noautorun
44 | By default, programs process automatically, but Fsdb::Filter objects
45 | in Perl do not run until you invoke the **run()** method. The
46 | ``--(no)autorun`` option controls that behavior within Perl.
47 |
48 | --help
49 | Show help.
50 |
51 | --man
52 | Show full manual.
53 |
54 | SAMPLE USAGE
55 | ------------
56 |
57 | Input:
58 | ------
59 |
60 | #fsdb x y 160 126 180 103 200 82 220 75 240 82 260 40 280 20
61 |
62 | Command:
63 | --------
64 |
65 | cat DATA/xy.fsdb \| dbcolsregression x y \| dblistize
66 |
67 | Output:
68 | -------
69 |
70 | #fsdb -R C slope:d intercept:d confcoeff:d n:q slope: -0.79286
71 | intercept: 249.86 confcoeff: -0.95426 n: 7 # \| dbcolsregression x y #
72 | confidence intervals assume normal distribution and small n. # \|
73 | dblistize
74 |
75 | Sample data from
76 |
77 | by Stefan Waner and Steven R. Costenoble.
78 |
79 | SEE ALSO
80 | --------
81 |
82 | dbcolstats, dbcolscorrelate, Fsdb.
83 |
84 | AUTHOR and COPYRIGHT
85 | --------------------
86 |
87 | Copyright (C) 1997-2022 by John Heidemann
88 |
89 | This program is distributed under terms of the GNU general public
90 | license, version 2. See the file COPYING with the distribution for
91 | details.
92 |
--------------------------------------------------------------------------------
/docs/perl/dbcoltype.rst:
--------------------------------------------------------------------------------
1 | dbcoltype - define (or redefine) types for columns of an Fsdb file
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbcol [-v] [column type...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Define the type of each column, where COLUMN and TYPE are pairs. Or,
15 | with the ``-v`` option, redefine all types as string.
16 |
17 | The data does not change (just the header).
18 |
19 | OPTIONS
20 | -------
21 |
22 | -v or --clear-types
23 | Remove definitions from columns that are listed, or from all columns
24 | if none are listed. The effect is to restore types to their default
25 | type of a (string).
26 |
27 | and the standard fsdb options:
28 |
29 | -d
30 | Enable debugging output.
31 |
32 | -i or --input InputSource
33 | Read from InputSource, typically a file, or - for standard input, or
34 | (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
35 |
36 | -o or --output OutputDestination
37 | Write to OutputDestination, typically a file, or - for standard
38 | output, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
39 | objects.
40 |
41 | --autorun or --noautorun
42 | By default, programs process automatically, but Fsdb::Filter objects
43 | in Perl do not run until you invoke the **run()** method. The
44 | ``--(no)autorun`` option controls that behavior within Perl.
45 |
46 | --header H
47 | Use H as the full Fsdb header, rather than reading a header from then
48 | input.
49 |
50 | --help
51 | Show help.
52 |
53 | --man
54 | Show full manual.
55 |
56 | SAMPLE USAGE
57 | ------------
58 |
59 | Input:
60 | ------
61 |
62 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134
63 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
64 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
65 | database
66 |
67 | Command:
68 | --------
69 |
70 | cat DATA/passwd.fsdb account \| dbcoltype uid l gid l
71 |
72 | Output:
73 | -------
74 |
75 | #fsdb account passwd uid:l gid:l fullname homedir shell johnh \* 2274
76 | 134 John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
77 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
78 | database
79 |
80 | SEE ALSO
81 | --------
82 |
83 | **dbcoldefine** (1), **dbcolcreate** (1), **Fsdb** (3).
84 |
85 | AUTHOR and COPYRIGHT
86 | --------------------
87 |
88 | Copyright (C) 2022 by John Heidemann
89 |
90 | This program is distributed under terms of the GNU general public
91 | license, version 2. See the file COPYING with the distribution for
92 | details.
93 |
--------------------------------------------------------------------------------
/docs/perl/dbfilecat.rst:
--------------------------------------------------------------------------------
1 | dbfilecat - concatenate two files with identical schema
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbfilecat --input A.fsdb [--input B.fsdb...]
10 |
11 | or
12 |
13 | echo A.fsdb \| dbfilecat --xargs
14 |
15 | DESCRIPTION
16 | -----------
17 |
18 | Concatenate all provided input files, producing one result. We remove
19 | extra header lines.
20 |
21 | Inputs can both be specified with ``--input``, or one can come from
22 | standard input and the other from ``--input``. With ``--xargs``, each
23 | line of standard input is a filename for input.
24 |
25 | Inputs must have identical schemas (columns, column order, and field
26 | separators).
27 |
28 | Like dbmerge, but no worries about sorting, and with no arguments we
29 | read standard input (although that's not very useful).
30 |
31 | OPTIONS
32 | -------
33 |
34 | General option:
35 |
36 | --xargs
37 | Expect that input filenames are given, one-per-line, on standard
38 | input. (In this case, merging can start incrementally.
39 |
40 | --removeinputs
41 | Delete the source files after they have been consumed. (Defaults off,
42 | leaving the inputs in place.) This module also supports the standard
43 | fsdb options:
44 |
45 | -d
46 | Enable debugging output.
47 |
48 | -i or --input InputSource
49 | Read from InputSource, typically a file name, or ``-`` for standard
50 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
51 | objects.
52 |
53 | -o or --output OutputDestination
54 | Write to OutputDestination, typically a file name, or ``-`` for
55 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
56 | Fsdb::BoundedQueue objects.
57 |
58 | --autorun or --noautorun
59 | By default, programs process automatically, but Fsdb::Filter objects
60 | in Perl do not run until you invoke the **run()** method. The
61 | ``--(no)autorun`` option controls that behavior within Perl.
62 |
63 | --help
64 | Show help.
65 |
66 | --man
67 | Show full manual.
68 |
69 | SAMPLE USAGE
70 | ------------
71 |
72 | Input:
73 | ------
74 |
75 | File *a.fsdb*:
76 |
77 | #fsdb cid cname 11 numanal 10 pascal
78 |
79 | File *b.fsdb*:
80 |
81 | #fsdb cid cname 12 os 13 statistics
82 |
83 | Command:
84 | --------
85 |
86 | dbfilecat --input a.fsdb --input b.fsdb
87 |
88 | Output:
89 | -------
90 |
91 | #fsdb cid cname 11 numanal 10 pascal 12 os 13 statistics # \| dbmerge
92 | --input a.fsdb --input b.fsdb
93 |
94 | SEE ALSO
95 | --------
96 |
97 | **dbmerge** (1), **Fsdb** (3)
98 |
99 | AUTHOR and COPYRIGHT
100 | --------------------
101 |
102 | Copyright (C) 2013-2018 by John Heidemann
103 |
104 | This program is distributed under terms of the GNU general public
105 | license, version 2. See the file COPYING with the distribution for
106 | details.
107 |
--------------------------------------------------------------------------------
/docs/perl/dbfilestripcomments.rst:
--------------------------------------------------------------------------------
1 | dbfilestripcomments - remove comments from a fsdb file
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbfilestripcomments [-h]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Remove any comments in a file, including the header. This makes the file
15 | unreadable by other Fsdb utilities, but perhaps more readable by humans.
16 |
17 | With the -h option, leave the header.
18 |
19 | OPTIONS
20 | -------
21 |
22 | -h or --header
23 | Retain the header.
24 |
25 | This module also supports the standard fsdb options:
26 |
27 | -d
28 | Enable debugging output.
29 |
30 | -i or --input InputSource
31 | Read from InputSource, typically a file name, or ``-`` for standard
32 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
33 | objects.
34 |
35 | -o or --output OutputDestination
36 | Write to OutputDestination, typically a file name, or ``-`` for
37 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
38 | Fsdb::BoundedQueue objects.
39 |
40 | --autorun or --noautorun
41 | By default, programs process automatically, but Fsdb::Filter objects
42 | in Perl do not run until you invoke the **run()** method. The
43 | ``--(no)autorun`` option controls that behavior within Perl.
44 |
45 | --help
46 | Show help.
47 |
48 | --man
49 | Show full manual.
50 |
51 | SAMPLE USAGE
52 | ------------
53 |
54 | Input:
55 | ------
56 |
57 | #fsdb -R C experiment mean stddev pct_rsd conf_range conf_low conf_high
58 | conf_pct sum sum_squared min max n experiment: ufs_mab_sys mean: 37.25
59 | stddev: 0.070711 pct_rsd: 0.18983 conf_range: 0.6353 conf_low: 36.615
60 | conf_high: 37.885 conf_pct: 0.95 sum: 74.5 sum_squared: 2775.1 min: 37.2
61 | max: 37.3 n: 2 # \| /home/johnh/BIN/DB/dbmultistats experiment duration
62 | # \| /home/johnh/BIN/DB/dblistize
63 |
64 | Command:
65 | --------
66 |
67 | cat data.fsdb \| dbfilestripcomments
68 |
69 | Output:
70 | -------
71 |
72 | experiment: ufs_mab_sys mean: 37.25 stddev: 0.070711 pct_rsd: 0.18983
73 | conf_range: 0.6353 conf_low: 36.615 conf_high: 37.885 conf_pct: 0.95
74 | sum: 74.5 sum_squared: 2775.1 min: 37.2 max: 37.3 n: 2
75 |
76 | SEE ALSO
77 | --------
78 |
79 | Fsdb. dbcoldefine.
80 |
81 | AUTHOR and COPYRIGHT
82 | --------------------
83 |
84 | Copyright (C) 1991-2008 by John Heidemann
85 |
86 | This program is distributed under terms of the GNU general public
87 | license, version 2. See the file COPYING with the distribution for
88 | details.
89 |
--------------------------------------------------------------------------------
/docs/perl/dbfilevalidate.rst:
--------------------------------------------------------------------------------
1 | dbfilevalidate - insure the source input is a well-formed Fsdb file
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbfilevalidate [-vc]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Validates the input file to make sure it is a well-formed fsdb file. If
15 | the file is well-formed, it outputs the whole file and exits with a good
16 | exit code. For invalid files, it exits with an error exit code and
17 | embedded error messages in the stream as comments with \**\* in them.
18 |
19 | Currently this program checks for rows with missing or extra columns.
20 |
21 | OPTIONS
22 | -------
23 |
24 | -v or --errors-only
25 | Output only broken lines, not the whole thing.
26 |
27 | -c or --correct
28 | Correct errors, if possible. Pad out rows with the empty value;
29 | truncate rows with extra values. If errors can be corrected the
30 | program exits with a good return code.
31 |
32 | "-e E" or "--empty E"
33 | give value E as the value for empty (null) records
34 |
35 | This module also supports the standard fsdb options:
36 |
37 | -d
38 | Enable debugging output.
39 |
40 | -i or --input InputSource
41 | Read from InputSource, typically a file name, or ``-`` for standard
42 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
43 | objects.
44 |
45 | -o or --output OutputDestination
46 | Write to OutputDestination, typically a file name, or ``-`` for
47 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
48 | Fsdb::BoundedQueue objects.
49 |
50 | --autorun or --noautorun
51 | By default, programs process automatically, but Fsdb::Filter objects
52 | in Perl do not run until you invoke the **run()** method. The
53 | ``--(no)autorun`` option controls that behavior within Perl.
54 |
55 | --help
56 | Show help.
57 |
58 | --man
59 | Show full manual.
60 |
61 | SAMPLE USAGE
62 | ------------
63 |
64 | Input:
65 | ------
66 |
67 | #fsdb sid cid 1 10 2 1 12 2 12
68 |
69 | Command:
70 | --------
71 |
72 | cat TEST/dbfilevalidate_ex.in \| dbvalidate
73 |
74 | Output:
75 | -------
76 |
77 | #fsdb sid cid 1 10 2 # \**\* line above is missing field cid. 1 12 2 12
78 | # \| dbfilevalidate
79 |
80 | SEE ALSO
81 | --------
82 |
83 | Fsdb.
84 |
85 | AUTHOR and COPYRIGHT
86 | --------------------
87 |
88 | Copyright (C) 1991-2008 by John Heidemann
89 |
90 | This program is distributed under terms of the GNU general public
91 | license, version 2. See the file COPYING with the distribution for
92 | details.
93 |
--------------------------------------------------------------------------------
/docs/perl/dblistize.rst:
--------------------------------------------------------------------------------
1 | dblistize - DEPRECATED, now use dbfilealter
2 | ======================================================================
3 |
--------------------------------------------------------------------------------
/docs/perl/dbrecolize.rst:
--------------------------------------------------------------------------------
1 | dbrecolize - DEPRECATED, now use dbfilealter
2 | ======================================================================
3 |
--------------------------------------------------------------------------------
/docs/perl/dbrow.rst:
--------------------------------------------------------------------------------
1 | dbrow - select rows from an Fsdb file based on arbitrary conditions
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbrow [-vw] CONDITION [CONDITION...]
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Select rows for which all CONDITIONS are true. Conditions are specified
15 | as Perl code, in which column names are be embedded, preceded by
16 | underscores.
17 |
18 | OPTIONS
19 | -------
20 |
21 | -v
22 | Invert the selection, picking rows where at least one condition does
23 | *not* match.
24 |
25 | This module also supports the standard fsdb options:
26 |
27 | -d
28 | Enable debugging output.
29 |
30 | -w or --warnings
31 | Enable warnings in user supplied code.
32 |
33 | -i or --input InputSource
34 | Read from InputSource, typically a file name, or ``-`` for standard
35 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
36 | objects.
37 |
38 | -o or --output OutputDestination
39 | Write to OutputDestination, typically a file name, or ``-`` for
40 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
41 | Fsdb::BoundedQueue objects.
42 |
43 | --autorun or --noautorun
44 | By default, programs process automatically, but Fsdb::Filter objects
45 | in Perl do not run until you invoke the **run()** method. The
46 | ``--(no)autorun`` option controls that behavior within Perl.
47 |
48 | --header H
49 | Use H as the full Fsdb header, rather than reading a header from then
50 | input.
51 |
52 | --help
53 | Show help.
54 |
55 | --man
56 | Show full manual.
57 |
58 | SAMPLE USAGE
59 | ------------
60 |
61 | Input:
62 | ------
63 |
64 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134
65 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
66 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
67 | database
68 |
69 | Command:
70 | --------
71 |
72 | cat DATA/passwd.fsdb \| dbrow \_fullname =~ /John/
73 |
74 | Output:
75 | -------
76 |
77 | #fsdb account passwd uid gid fullname homedir shell johnh \* 2274 134
78 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
79 | /home/greg /bin/bash # this is a simple database # \|
80 | /home/johnh/BIN/DB/dbrow
81 |
82 | BUGS
83 | ----
84 |
85 | Doesn't detect references to unknown columns in conditions.
86 |
87 | END #' for font-lock mode. exit 1;
88 |
89 | AUTHOR and COPYRIGHT
90 | --------------------
91 |
92 | Copyright (C) 1991-2018 by John Heidemann
93 |
94 | This program is distributed under terms of the GNU general public
95 | license, version 2. See the file COPYING with the distribution for
96 | details.
97 |
--------------------------------------------------------------------------------
/docs/perl/dbrowaccumulate.rst:
--------------------------------------------------------------------------------
1 | dbrowaccumulate - compute a running sum of a column
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbrowaccumulate [-C increment_constant] [-I initial_value] [-c
10 | increment_column] [-N new_column_name]
11 |
12 | DESCRIPTION
13 | -----------
14 |
15 | Compute a running sum over a column of data, or of a constant
16 | incremented per row, perhaps to generate a cumulative distribution.
17 |
18 | What to accumulate is specified by ``-c`` or ``-C``.
19 |
20 | The new column is named by the ``-N`` argument, defaulting to ``accum``.
21 |
22 | OPTIONS
23 | -------
24 |
25 | -c or --column COLUMN
26 | Accumulate values from the given COLUMN. No default.
27 |
28 | -C or --constant K
29 | Accumulate the given constant K for each row of input. No default.
30 |
31 | -I or --initial-value I
32 | Start accumulation at value I. Defaults to zero.
33 |
34 | -N or --new-name N
35 | Name the new column N. Defaults to ``accum``.
36 |
37 | This module also supports the standard fsdb options:
38 |
39 | -d
40 | Enable debugging output.
41 |
42 | -i or --input InputSource
43 | Read from InputSource, typically a file name, or ``-`` for standard
44 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
45 | objects.
46 |
47 | -o or --output OutputDestination
48 | Write to OutputDestination, typically a file name, or ``-`` for
49 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
50 | Fsdb::BoundedQueue objects.
51 |
52 | --autorun or --noautorun
53 | By default, programs process automatically, but Fsdb::Filter objects
54 | in Perl do not run until you invoke the **run()** method. The
55 | ``--(no)autorun`` option controls that behavior within Perl.
56 |
57 | --help
58 | Show help.
59 |
60 | --man
61 | Show full manual.
62 |
63 | SAMPLE USAGE
64 | ------------
65 |
66 | Input:
67 | ------
68 |
69 | #fsdb diff 0.0 00.000938 00.001611 00.001736 00.002006 00.002049 # \|
70 | /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol diff # \| dbsort
71 | diff
72 |
73 | Command:
74 | --------
75 |
76 | cat DATA/kitrace.fsdb \| dbrowaccumulate -c diff
77 |
78 | Output:
79 | -------
80 |
81 | #fsdb diff accum 0.0 0 00.000938 .000938 00.001611 .002549 00.001736
82 | .004285 00.002006 .006291 00.002049 .00834 # \| /home/johnh/BIN/DB/dbrow
83 | # \| /home/johnh/BIN/DB/dbcol diff # \| dbsort diff # \|
84 | /home/johnh/BIN/DB/dbrowaccumulate diff
85 |
86 | SEE ALSO
87 | --------
88 |
89 | Fsdb, dbrowenumerate.
90 |
91 | AUTHOR and COPYRIGHT
92 | --------------------
93 |
94 | Copyright (C) 1991-2022 by John Heidemann
95 |
96 | This program is distributed under terms of the GNU general public
97 | license, version 2. See the file COPYING with the distribution for
98 | details.
99 |
--------------------------------------------------------------------------------
/docs/perl/dbrowcount.rst:
--------------------------------------------------------------------------------
1 | dbrowcount - count the number of rows in an Fsdb stream
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbrowcount
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Count the number of rows and write out a new fsdb file with one column
15 | (n) and one value: the number of rows. This program is a strict subset
16 | of dbcolstats.
17 |
18 | Although there are other ways to get a count of rows (``dbcolstats``, or
19 | ``dbrowaccumulate -C 1`` and some processing), counting is so common it
20 | warrants its own command. (For example, consider how often ``wc -l`` is
21 | used in regular shell scripting.) There are some gross and subtle
22 | differences, though, in that ``dbrowcount`` doesn't require one to
23 | specify a column to search, and it also doesn't look for and skip null
24 | data items.
25 |
26 | OPTIONS
27 | -------
28 |
29 | No program-specific options.
30 |
31 | This module also supports the standard fsdb options:
32 |
33 | -d
34 | Enable debugging output.
35 |
36 | -i or --input InputSource
37 | Read from InputSource, typically a file name, or ``-`` for standard
38 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
39 | objects.
40 |
41 | -o or --output OutputDestination
42 | Write to OutputDestination, typically a file name, or ``-`` for
43 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
44 | Fsdb::BoundedQueue objects.
45 |
46 | --autorun or --noautorun
47 | By default, programs process automatically, but Fsdb::Filter objects
48 | in Perl do not run until you invoke the **run()** method. The
49 | ``--(no)autorun`` option controls that behavior within Perl.
50 |
51 | --help
52 | Show help.
53 |
54 | --man
55 | Show full manual.
56 |
57 | SAMPLE USAGE
58 | ------------
59 |
60 | Input:
61 | ------
62 |
63 | #fsdb absdiff 0 0.046953 0.072074 0.075413 0.094088 0.096602 # \|
64 | /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol event clock # \|
65 | dbrowdiff clock # \| /home/johnh/BIN/DB/dbcol absdiff
66 |
67 | Command:
68 | --------
69 |
70 | cat data.fsdb \| dbrowcount
71 |
72 | Output:
73 | -------
74 |
75 | #fsdb n 6 # \| /home/johnh/BIN/DB/dbrow # \| /home/johnh/BIN/DB/dbcol
76 | event clock # \| dbrowdiff clock # \| /home/johnh/BIN/DB/dbcol absdiff
77 |
78 | Input 2:
79 | --------
80 |
81 | As another example, this input produces the same output as above in
82 | ``dbrowcount``, but different output in ``dbstats``:
83 |
84 | #fsdb absdiff - - - - - - # \| /home/johnh/BIN/DB/dbrow # \|
85 | /home/johnh/BIN/DB/dbcol event clock # \| dbrowdiff clock # \|
86 | /home/johnh/BIN/DB/dbcol absdiff
87 |
88 | SEE ALSO
89 | --------
90 |
91 | **dbcolaccumulate** (1), **dbcolstats** (1), **Fsdb** (3)
92 |
93 | AUTHOR and COPYRIGHT
94 | --------------------
95 |
96 | Copyright (C) 2007-2022 by John Heidemann
97 |
98 | This program is distributed under terms of the GNU general public
99 | license, version 2. See the file COPYING with the distribution for
100 | details.
101 |
--------------------------------------------------------------------------------
/docs/perl/dbrowenumerate.rst:
--------------------------------------------------------------------------------
1 | dbrowenumerate - enumerate rows, starting from zero
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | dbrowenumerate
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Add a new column \``count'', incremented for each row of data, starting
15 | with zero. Use dbrowaccumulate for control over initial value or
16 | increment; this module is just a wrapper around that.
17 |
18 | OPTIONS
19 | -------
20 |
21 | -N or --new-name N
22 | Name the new column N. Defaults to ``count``.
23 |
24 | This module also supports the standard jdb options:
25 |
26 | -d
27 | Enable debugging output.
28 |
29 | -i or --input InputSource
30 | Read from InputSource, typically a file name, or ``-`` for standard
31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
32 | objects.
33 |
34 | -o or --output OutputDestination
35 | Write to OutputDestination, typically a file name, or ``-`` for
36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
37 | Fsdb::BoundedQueue objects.
38 |
39 | --autorun or --noautorun
40 | By default, programs process automatically, but Fsdb::Filter objects
41 | in Perl do not run until you invoke the **run()** method. The
42 | ``--(no)autorun`` option controls that behavior within Perl.
43 |
44 | --help
45 | Show help.
46 |
47 | --man
48 | Show full manual.
49 |
50 | SAMPLE USAGE
51 | ------------
52 |
53 | Input:
54 | ------
55 |
56 | #h account passwd uid gid fullname homedir shell johnh \* 2274 134
57 | John_Heidemann /home/johnh /bin/bash greg \* 2275 134 Greg_Johnson
58 | /home/greg /bin/bash root \* 0 0 Root /root /bin/bash # this is a simple
59 | database
60 |
61 | Command:
62 | --------
63 |
64 | cat DATA/passwd.jdb \| dbrowenumerate
65 |
66 | Output:
67 | -------
68 |
69 | #h account passwd uid gid fullname homedir shell count johnh \* 2274 134
70 | John_Heidemann /home/johnh /bin/bash 0 greg \* 2275 134 Greg_Johnson
71 | /home/greg /bin/bash 1 root \* 0 0 Root /root /bin/bash 2 # this is a
72 | simple database # \| /home/johnh/BIN/DB/dbrowenumerate
73 |
74 | SEE ALSO
75 | --------
76 |
77 | Fsdb, dbrowaccumulate.
78 |
79 | CLASS FUNCTIONS
80 | ---------------
81 |
--------------------------------------------------------------------------------
/docs/perl/dbstats.rst:
--------------------------------------------------------------------------------
1 | dbstats - DEPRICATED, now use dbcolstats
2 | ======================================================================
3 |
--------------------------------------------------------------------------------
/docs/perl/html_table_to_db.rst:
--------------------------------------------------------------------------------
1 | html_table_to_db - convert HTML tables into fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | html_table_to_db dest.fsdb
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Converts a HTML table to Fsdb format.
15 |
16 | The input is an HTML table (*not* fsdb). Column names are taken from
17 | ``TH`` elements, or defined as ``column0`` through ``columnN`` if no
18 | such elements appear.
19 |
20 | The output is two-space-separated fsdb. (Someday more general field
21 | separators should be supported.) Fsdb fields are normalized version of
22 | the html file: multiple spaces are compressed to one.
23 |
24 | This module also supports the standard fsdb options:
25 |
26 | -d
27 | Enable debugging output.
28 |
29 | -i or --input InputSource
30 | Read from InputSource, typically a file name, or ``-`` for standard
31 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
32 | objects.
33 |
34 | -o or --output OutputDestination
35 | Write to OutputDestination, typically a file name, or ``-`` for
36 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
37 | Fsdb::BoundedQueue objects.
38 |
39 | --autorun or --noautorun
40 | By default, programs process automatically, but Fsdb::Filter objects
41 | in Perl do not run until you invoke the **run()** method. The
42 | ``--(no)autorun`` option controls that behavior within Perl.
43 |
44 | --help
45 | Show help.
46 |
47 | --man
48 | Show full manual.
49 |
50 | SAMPLE USAGE
51 | ------------
52 |
53 | Input:
54 | ------
55 |
56 | account | passwd | uid | gid |
57 | fullname | homedir | shell |
johnh | * | 2274 | 134 |
59 | John & Ampersand | /home/johnh | /bin/bash |
60 |
greg | * | 2275 |
61 | 134 | Greg < Lessthan | /home/greg |
62 | /bin/bash |
root | * |
63 | 0 | 0 | Root ; Semi | /root |
64 | /bin/bash |
four | * |
65 | 1 | 1 | Fourth Row | /home/four |
66 | /bin/bash |
67 |
68 | Command:
69 | --------
70 |
71 | html_table_to_db
72 |
73 | Output:
74 | -------
75 |
76 | #fsdb -F S account passwd uid gid fullname homedir shell johnh \* 2274
77 | 134 John & Ampersand /home/johnh /bin/bash greg \* 2275 134 Greg <
78 | Lessthan /home/greg /bin/bash root \* 0 0 Root ; Semi /root /bin/bash
79 | four \* 1 1 Fourth Row /home/four /bin/bash
80 |
81 | SEE ALSO
82 | --------
83 |
84 | Fsdb. db_to_html_table.
85 |
86 | AUTHOR and COPYRIGHT
87 | --------------------
88 |
89 | Copyright (C) 1991-2015 by John Heidemann
90 |
91 | This program is distributed under terms of the GNU general public
92 | license, version 2. See the file COPYING with the distribution for
93 | details.
94 |
--------------------------------------------------------------------------------
/docs/perl/ns_to_db.rst:
--------------------------------------------------------------------------------
1 | ns_to_db - convert one of ns's output format to jdb
2 | ======================================================================
3 |
4 | KNOWN BUGS
5 | ----------
6 |
7 | No test case.
8 |
--------------------------------------------------------------------------------
/docs/perl/sqlselect_to_db.rst:
--------------------------------------------------------------------------------
1 | sqlselect_to_db - convert MySQL or MariaDB selected tables to fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | sqlselect_to_db dest.fsdb
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Converts a MySQL or MariaDB tables to Fsdb format.
15 |
16 | The input is *not* fsdb. The first non-box row is taken to be the names
17 | of the columns.
18 |
19 | The output is two-space-separated fsdb. (Someday more general field
20 | separators should be supported.)
21 |
22 | This module also supports the standard fsdb options:
23 |
24 | -d
25 | Enable debugging output.
26 |
27 | -i or --input InputSource
28 | Read from InputSource, typically a file name, or ``-`` for standard
29 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
30 | objects.
31 |
32 | -o or --output OutputDestination
33 | Write to OutputDestination, typically a file name, or ``-`` for
34 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
35 | Fsdb::BoundedQueue objects.
36 |
37 | --autorun or --noautorun
38 | By default, programs process automatically, but Fsdb::Filter objects
39 | in Perl do not run until you invoke the **run()** method. The
40 | ``--(no)autorun`` option controls that behavior within Perl.
41 |
42 | --help
43 | Show help.
44 |
45 | --man
46 | Show full manual.
47 |
48 | SAMPLE USAGE
49 | ------------
50 |
51 | Input:
52 | ------
53 |
54 | +----------------+---------------+--------------------+------+-------------------------+
55 | \| username \| firstname \| lastname \| id \| email \|
56 | +----------------+---------------+--------------------+------+-------------------------+
57 | \| johnh \| John \| Heidemann \| 134 \| johnh@isi.edu \|
58 | +----------------+---------------+--------------------+------+-------------------------+
59 | 1 row in set (0.01 sec)
60 |
61 | Command:
62 | --------
63 |
64 | sqlselect_to_db
65 |
66 | Output:
67 | -------
68 |
69 | #fsdb -F S username firstname lastname id email johnh John Heidemann 134
70 | johnh@isi.edu # \| sqlselect_to_db
71 |
72 | SEE ALSO
73 | --------
74 |
75 | Fsdb. db_to_csv.
76 |
77 | AUTHOR and COPYRIGHT
78 | --------------------
79 |
80 | Copyright (C) 2014-2018 by John Heidemann
81 |
82 | This program is distributed under terms of the GNU general public
83 | license, version 2. See the file COPYING with the distribution for
84 | details.
85 |
--------------------------------------------------------------------------------
/docs/perl/tabdelim_to_db.rst:
--------------------------------------------------------------------------------
1 | tabdelim_to_db - convert tab-delimited data into fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | tabdelim_to_db target.fsdb
10 |
11 | DESCRIPTION
12 | -----------
13 |
14 | Converts a tab-delimited data stream to Fsdb format.
15 |
16 | The input is tab-delimited (*not* fsdb): the first row is taken to be
17 | the names of the columns; tabs separate columns.
18 |
19 | The output is a fsdb file with a proper header and a tab
20 | field-separator.
21 |
22 | This module also supports the standard fsdb options:
23 |
24 | -d
25 | Enable debugging output.
26 |
27 | -i or --input InputSource
28 | Read from InputSource, typically a file name, or ``-`` for standard
29 | input, or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue
30 | objects.
31 |
32 | -o or --output OutputDestination
33 | Write to OutputDestination, typically a file name, or ``-`` for
34 | standard output, or (if in Perl) a IO::Handle, Fsdb::IO or
35 | Fsdb::BoundedQueue objects.
36 |
37 | --autorun or --noautorun
38 | By default, programs process automatically, but Fsdb::Filter objects
39 | in Perl do not run until you invoke the **run()** method. The
40 | ``--(no)autorun`` option controls that behavior within Perl.
41 |
42 | --help
43 | Show help.
44 |
45 | --man
46 | Show full manual.
47 |
48 | SAMPLE USAGE
49 | ------------
50 |
51 | Input:
52 | ------
53 |
54 | name email test1 Tommy Trojan tt@usc.edu 80 Joe Bruin joeb@ucla.edu 85
55 | J. Random jr@caltech.edu 90
56 |
57 | Command:
58 | --------
59 |
60 | tabdelim_to_db
61 |
62 | Output:
63 | -------
64 |
65 | #fsdb -Ft name email test1 Tommy Trojan tt@usc.edu 80 Joe Bruin
66 | joeb@ucla.edu 85 J. Random jr@caltech.edu 90 # \| dbcoldefine name email
67 | test1
68 |
69 | SEE ALSO
70 | --------
71 |
72 | Fsdb.
73 |
74 | AUTHOR and COPYRIGHT
75 | --------------------
76 |
77 | Copyright (C) 1991-2008 by John Heidemann
78 |
79 | This program is distributed under terms of the GNU general public
80 | license, version 2. See the file COPYING with the distribution for
81 | details.
82 |
--------------------------------------------------------------------------------
/docs/perl/xml_to_db.rst:
--------------------------------------------------------------------------------
1 | xml_to_db - convert a subset of XML into fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | xml_to_db -k EntityField
75 |
76 |
77 | Command:
78 | --------
79 |
80 | xml_to_db -k files/file
99 |
100 | This program is distributed under terms of the GNU general public
101 | license, version 2. See the file COPYING with the distribution for
102 | details.
103 |
--------------------------------------------------------------------------------
/docs/perl/yaml_to_db.rst:
--------------------------------------------------------------------------------
1 | yaml_to_db - convert a subset of YAML into fsdb
2 | ======================================================================
3 |
4 | *NOTE: this page was directly converted from the perl FSDB manual pages from FSDB version 3.1*
5 |
6 | SYNOPSIS
7 | --------
8 |
9 | yaml_to_db
92 |
93 | This program is distributed under terms of the GNU general public
94 | license, version 2. See the file COPYING with the distribution for
95 | details.
96 |
--------------------------------------------------------------------------------
/docs/perltools.rst:
--------------------------------------------------------------------------------
1 | Perl FSDB Tools
2 | ===============
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: FSDB Perl Scripts:
7 |
8 | perl/cgi_to_db.rst
9 | perl/combined_log_format_to_db.rst
10 | perl/csv_to_db.rst
11 | perl/dbcolcopylast.rst
12 | perl/dbcolcreate.rst
13 | perl/dbcoldefine.rst
14 | perl/dbcolhisto.rst
15 | perl/dbcolize.rst
16 | perl/dbcolmerge.rst
17 | perl/dbcolmovingstats.rst
18 | perl/dbcolneaten.rst
19 | perl/dbcolpercentile.rst
20 | perl/dbcolrename.rst
21 | perl/dbcol.rst
22 | perl/dbcolscorrelate.rst
23 | perl/dbcolsplittocols.rst
24 | perl/dbcolsplittorows.rst
25 | perl/dbcolsregression.rst
26 | perl/dbcolstatscores.rst
27 | perl/dbcolstats.rst
28 | perl/dbcoltype.rst
29 | perl/dbfilealter.rst
30 | perl/dbfilecat.rst
31 | perl/dbfilediff.rst
32 | perl/dbfilepivot.rst
33 | perl/dbfilestripcomments.rst
34 | perl/dbfilevalidate.rst
35 | perl/dbformmail.rst
36 | perl/dbjoin.rst
37 | perl/dblistize.rst
38 | perl/dbmapreduce.rst
39 | perl/dbmerge2.rst
40 | perl/dbmerge.rst
41 | perl/dbmultistats.rst
42 | perl/dbrecolize.rst
43 | perl/dbrowaccumulate.rst
44 | perl/dbrowcount.rst
45 | perl/dbrowdiff.rst
46 | perl/dbrowenumerate.rst
47 | perl/dbroweval.rst
48 | perl/dbrow.rst
49 | perl/dbrowuniq.rst
50 | perl/dbrvstatdiff.rst
51 | perl/dbsort.rst
52 | perl/dbstats.rst
53 | perl/db_to_csv.rst
54 | perl/db_to_html_table.rst
55 | perl/html_table_to_db.rst
56 | perl/kitrace_to_db.rst
57 | perl/ns_to_db.rst
58 | perl/sqlselect_to_db.rst
59 | perl/tabdelim_to_db.rst
60 | perl/tcpdump_to_db.rst
61 | perl/xml_to_db.rst
62 | perl/yaml_to_db.rst
63 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-argparse_cli
3 | sphinx-argparse
4 | myst-parser
5 | dateparser
6 | matplotlib
7 |
--------------------------------------------------------------------------------
/docs/tools/images/myheat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/docs/tools/images/myheat.png
--------------------------------------------------------------------------------
/docs/tools/index.rst:
--------------------------------------------------------------------------------
1 | PyFSDB Command Line Tools
2 | =========================
3 |
4 | The following shell tools come with PyFSDB and can be used for generic
5 | command line processing of FSDB data. We break the list of tools up
6 | into different categories (although some tools may technical belong to
7 | multiple categories, we place them in only one).
8 |
9 | Note: the `python` based tools begin with the `pdb` prefix to
10 | distinguish themselves from their `perl` counter-parts (which begin
11 | with `db`).
12 |
13 |
14 | Data filtering and modification tools
15 | -------------------------------------
16 |
17 | .. toctree::
18 | :maxdepth: 1
19 | :caption: Contents:
20 |
21 | pdbrow
22 | pdbroweval
23 | pdbensure
24 | pdbaugment
25 | pdbfgrep
26 | pdbnormalize
27 | pdbcdf
28 | pdbdatetoepoch
29 | pdbepochtodate
30 | pdbkeyedsort
31 | pdbsum
32 | pdbzerofill
33 |
34 | Data conversion tools
35 | ---------------------
36 |
37 | .. toctree::
38 | :maxdepth: 1
39 | :caption: Contents:
40 |
41 | pdb2to1
42 | pdbaddtypes
43 | pdbformat
44 | pdbjinja
45 | pdb2tex
46 | pdb2sql
47 | pdbsplitter
48 | pdbfullpivot
49 | pdbreescape
50 |
51 | Data analysis tools
52 | -------------------
53 |
54 | .. toctree::
55 | :maxdepth: 1
56 | :caption: Contents:
57 |
58 | pdbcoluniq
59 | pdbtopn
60 | pdbheatmap
61 |
--------------------------------------------------------------------------------
/docs/tools/pdb2sql.md:
--------------------------------------------------------------------------------
1 | ### pdb2sql - uploads an FSDB file into a database
2 |
3 | `pdb2sql` converts an FSDB file into a latex table/tabular output. Specifically, it can both create a table, delete existing rows, add indexes to certain rows, add additional columns and values etc. It currently supports two different types of databases (*sqlite3* and *postgres*), which are selectable by the *-t* switch.
4 |
5 | #### Example input (*myfile.fsdb*):
6 |
7 | ```
8 | #fsdb -F t col1:l two:a andthree:d
9 | 1 key1 42.0
10 | 2 key2 123.0
11 | 3 key1 90.2
12 | ```
13 |
14 | #### Example command usage
15 |
16 | ```
17 | $ pdb2sql -T newtable -i two -t sqlite3 myfile.fsdb output.sqlite3
18 | $ echo "select * from newtable" | sqlite3 output.sqlite3
19 | ```
20 |
21 | #### Example output
22 |
23 | ```
24 | 1|key1|42.0
25 | 2|key2|123.0
26 | 3|key1|90.2
27 | ```
28 |
29 |
--------------------------------------------------------------------------------
/docs/tools/pdb2sql.rst:
--------------------------------------------------------------------------------
1 | pdb2sql - uploads an FSDB file into a database
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdb2sql`` converts an FSDB file into a latex table/tabular output.
5 | Specifically, it can both create a table, delete existing rows, add
6 | indexes to certain rows, add additional columns and values etc. It
7 | currently supports two different types of databases (*sqlite3* and
8 | *postgres*), which are selectable by the *-t* switch.
9 |
10 | Example input (*myfile.fsdb*):
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 |
13 | ::
14 |
15 | #fsdb -F t col1:l two:a andthree:d
16 | 1 key1 42.0
17 | 2 key2 123.0
18 | 3 key1 90.2
19 |
20 | Example command usage
21 | ^^^^^^^^^^^^^^^^^^^^^
22 |
23 | ::
24 |
25 | $ pdb2sql -T newtable -i two -t sqlite3 myfile.fsdb output.sqlite3
26 | $ echo "select * from newtable" | sqlite3 output.sqlite3
27 |
28 | Example output
29 | ^^^^^^^^^^^^^^
30 |
31 | ::
32 |
33 | 1|key1|42.0
34 | 2|key2|123.0
35 | 3|key1|90.2
36 |
37 |
38 | Command Line Arguments
39 | ^^^^^^^^^^^^^^^^^^^^^^
40 |
41 | .. sphinx_argparse_cli::
42 | :module: pyfsdb.tools.pdb2sql
43 | :func: parse_args
44 | :hook:
45 | :prog: pdb2sql
46 |
--------------------------------------------------------------------------------
/docs/tools/pdb2tex.md:
--------------------------------------------------------------------------------
1 | ### pdb2tex - create a latex table using the data in a FSDB file
2 |
3 | `pdb2tex` converts an FSDB file into a latex table/tabular output
4 |
5 | #### Example input (*myfile.fsdb*):
6 |
7 | ```
8 | #fsdb -F t col1:l two:a andthree:d
9 | 1 key1 42.0
10 | 2 key2 123.0
11 | 3 key1 90.2
12 | ```
13 |
14 | #### Example command usage
15 |
16 | ```
17 | $ pdb2tex myfile.fsdb
18 |
19 | ```
20 |
21 | #### Example output
22 |
23 | ``` latex
24 | \begin{table}
25 | \begin{tabular}{lll}
26 | \textbf{col1} & \textbf{two} & \textbf{andthree} \\
27 | 1 & key1 & 42.0 \\
28 | 2 & key2 & 123.0 \\
29 | 3 & key1 & 90.2 \\
30 | \end{tabular}
31 | \end{table}
32 | ```
33 |
34 |
--------------------------------------------------------------------------------
/docs/tools/pdb2tex.rst:
--------------------------------------------------------------------------------
1 | pdb2tex - create a latex table using the data in a FSDB file
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdb2tex`` converts an FSDB file into a latex table/tabular output
5 |
6 | Example input (*myfile.fsdb*):
7 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8 |
9 | ::
10 |
11 | #fsdb -F t col1:l two:a andthree:d
12 | 1 key1 42.0
13 | 2 key2 123.0
14 | 3 key1 90.2
15 |
16 | Example command usage
17 | ^^^^^^^^^^^^^^^^^^^^^
18 |
19 | ::
20 |
21 | $ pdb2tex myfile.fsdb
22 |
23 | Example output
24 | ^^^^^^^^^^^^^^
25 |
26 | .. code:: latex
27 |
28 | \begin{table}
29 | \begin{tabular}{lll}
30 | \textbf{col1} & \textbf{two} & \textbf{andthree} \\
31 | 1 & key1 & 42.0 \\
32 | 2 & key2 & 123.0 \\
33 | 3 & key1 & 90.2 \\
34 | \end{tabular}
35 | \end{table}
36 |
37 |
38 | Command Line Arguments
39 | ^^^^^^^^^^^^^^^^^^^^^^
40 |
41 | .. sphinx_argparse_cli::
42 | :module: pyfsdb.tools.pdb2tex
43 | :func: parse_args
44 | :hook:
45 | :prog: pdb2tex
46 |
--------------------------------------------------------------------------------
/docs/tools/pdb2to1.md:
--------------------------------------------------------------------------------
1 | ### pdb2to1 - strip typing information from the FSDB header
2 |
3 | `pdb2to1` simply removes typing information that may confusing older
4 | FSDB or pyfsdb tools that do not understanding datatypes in the
5 | headers. Datatypes were introduced into FSDB format version 2. To
6 | add or change types instead, use `pdbaddtypes`.
7 |
8 | #### Example input (*myfile.fsdb*):
9 |
10 | ```
11 | #fsdb -F t col1:l two:a andthree:d
12 | 1 key1 42.0
13 | 2 key2 123.0
14 | 3 key1 90.2
15 | ```
16 |
17 | #### Example command usage
18 |
19 | ```
20 | $ pdb2to1 myfile.fsdb
21 | ```
22 |
23 | #### Example output
24 |
25 | ```
26 | #fsdb -F t col1 two andthree
27 | 1 key1 42.0
28 | 2 key2 123.0
29 | 3 key1 90.2
30 | ```
31 |
32 |
--------------------------------------------------------------------------------
/docs/tools/pdb2to1.rst:
--------------------------------------------------------------------------------
1 | pdb2to1 - strip typing information from the FSDB header
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdb2to1`` simply removes typing information that may confusing older
5 | FSDB or pyfsdb tools that do not understanding datatypes in the headers.
6 | Datatypes were introduced into FSDB format version 2. To add or change
7 | types instead, use ``pdbaddtypes``.
8 |
9 | Example input (*myfile.fsdb*):
10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 |
12 | ::
13 |
14 | #fsdb -F t col1:l two:a andthree:d
15 | 1 key1 42.0
16 | 2 key2 123.0
17 | 3 key1 90.2
18 |
19 | Example command usage
20 | ^^^^^^^^^^^^^^^^^^^^^
21 |
22 | ::
23 |
24 | $ pdb2to1 myfile.fsdb
25 |
26 | Example output
27 | ^^^^^^^^^^^^^^
28 |
29 | ::
30 |
31 | #fsdb -F t col1 two andthree
32 | 1 key1 42.0
33 | 2 key2 123.0
34 | 3 key1 90.2
35 |
36 |
37 | Command Line Arguments
38 | ^^^^^^^^^^^^^^^^^^^^^^
39 |
40 | .. sphinx_argparse_cli::
41 | :module: pyfsdb.tools.pdb2to1
42 | :func: parse_args
43 | :hook:
44 | :prog: pdb2to1
45 |
--------------------------------------------------------------------------------
/docs/tools/pdbaddtypes.md:
--------------------------------------------------------------------------------
1 | ### pdbaddtypes - strip typing information from the FSDB header
2 |
3 | `pdbaddtypes` adds datatypes to the header so languages with typing
4 | support (eg, Python and Go) can parse FSDB files and output properly
5 | typed variables. Older data, or data generated by older tools may not
6 | be properly typed. The *-a* flag can be used to attempt auto-typing,
7 | but is based on analyzing only the first row.
8 |
9 | #### Example input (*myfile.fsdb*):
10 |
11 | ```
12 | #fsdb -F t col1 two andthree
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 | ```
17 |
18 | #### Example command usage
19 |
20 | ```
21 | $ pdbaddtypes -a myfile.fsdb
22 | ```
23 |
24 | #### Example output
25 |
26 | ```
27 | #fsdb -F t col1:l two:a andthree:d
28 | 1 key1 42.0
29 | 2 key2 123.0
30 | 3 key1 90.2
31 | ```
32 |
33 | #### Example command usage with specified typing
34 |
35 | ```
36 | $ pdbaddtypes -t col1=l andthree=d -- myfile-notypes.fsdb
37 | ```
38 |
39 | #### Example output
40 |
41 | ```
42 | #fsdb -F t col1:l two andthree:d
43 | 1 key1 42.0
44 | 2 key2 123.0
45 | 3 key1 90.2
46 | ```
47 |
48 |
--------------------------------------------------------------------------------
/docs/tools/pdbaddtypes.rst:
--------------------------------------------------------------------------------
1 | pdbaddtypes - strip typing information from the FSDB header
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbaddtypes`` adds datatypes to the header so languages with typing
5 | support (eg, Python and Go) can parse FSDB files and output properly
6 | typed variables. Older data, or data generated by older tools may not be
7 | properly typed. The *-a* flag can be used to attempt auto-typing, but is
8 | based on analyzing only the first row.
9 |
10 | Example input (*myfile.fsdb*):
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 |
13 | ::
14 |
15 | #fsdb -F t col1 two andthree
16 | 1 key1 42.0
17 | 2 key2 123.0
18 | 3 key1 90.2
19 |
20 | Example command usage
21 | ^^^^^^^^^^^^^^^^^^^^^
22 |
23 | ::
24 |
25 | $ pdbaddtypes -a myfile.fsdb
26 |
27 | Example output
28 | ^^^^^^^^^^^^^^
29 |
30 | ::
31 |
32 | #fsdb -F t col1:l two:a andthree:d
33 | 1 key1 42.0
34 | 2 key2 123.0
35 | 3 key1 90.2
36 |
37 | Example command usage with specified typing
38 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
39 |
40 | ::
41 |
42 | $ pdbaddtypes -t col1=l andthree=d -- myfile-notypes.fsdb
43 |
44 | .. _example-output-1:
45 |
46 | Example output
47 | ^^^^^^^^^^^^^^
48 |
49 | ::
50 |
51 | #fsdb -F t col1:l two andthree:d
52 | 1 key1 42.0
53 | 2 key2 123.0
54 | 3 key1 90.2
55 |
56 |
57 | Command Line Arguments
58 | ^^^^^^^^^^^^^^^^^^^^^^
59 |
60 | .. sphinx_argparse_cli::
61 | :module: pyfsdb.tools.pdbaddtypes
62 | :func: parse_args
63 | :hook:
64 | :prog: pdbaddtypes
65 |
--------------------------------------------------------------------------------
/docs/tools/pdbaugment.md:
--------------------------------------------------------------------------------
1 | ### pdbaugment - join rows from one FSDB files into another
2 |
3 | `pdbaugment` provides a different mechanism for doing FSDB file joins
4 | than the `dbjoin` command from the base perl FSDB package.
5 | Specifically, `pdbaugment` is designed to read a single file entirely
6 | into memory and use it augment a second one that is read in a
7 | streaming style. `pdbaugment` has the advantage being faster because
8 | it dose not need to do a full sort of both files, like `dbjoin`
9 | requires, but has the downside of needing to store one file in memory
10 | while performing the join. In general, the smaller file should be
11 | used as the *augment_file* argument, and the larger as the
12 | `stream_file` when possible. Matching keys in the augment file should
13 | be unique across the file, otherwise only the second row with a give
14 | key combination will be used.
15 |
16 | #### Example input file 1 (*myfile.fsdb*):
17 |
18 | ```
19 | #fsdb -F t col1 two andthree
20 | 1 key1 42.0
21 | 2 key2 123.0
22 | 3 key1 90.2
23 | ```
24 |
25 | #### Example input file 2 (*augment.fsdb*):
26 |
27 | ```
28 | #fsdb -F t col1 additional_column
29 | key1 blue
30 | key2 brown
31 | ```
32 |
33 | #### Example command usage
34 |
35 | ```
36 | $ pdbaugment -k two -v additional_column -- myfile.fsdb augment.fsdb
37 | ```
38 |
39 | #### Example output
40 |
41 | ```
42 | #fsdb -F t col1:l two:a andthree:d additional_column:a
43 | 1 key1 42.0 blue
44 | 2 key2 123.0 brown
45 | 3 key1 90.2 blue
46 | ```
47 |
48 |
--------------------------------------------------------------------------------
/docs/tools/pdbaugment.rst:
--------------------------------------------------------------------------------
1 | pdbaugment - join rows from one FSDB files into another
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbaugment`` provides a different mechanism for doing FSDB file joins
5 | than the ``dbjoin`` command from the base perl FSDB package.
6 | Specifically, ``pdbaugment`` is designed to read a single file entirely
7 | into memory and use it augment a second one that is read in a streaming
8 | style. ``pdbaugment`` has the advantage being faster because it dose not
9 | need to do a full sort of both files, like ``dbjoin`` requires, but has
10 | the downside of needing to store one file in memory while performing the
11 | join. In general, the smaller file should be used as the *augment_file*
12 | argument, and the larger as the ``stream_file`` when possible. Matching
13 | keys in the augment file should be unique across the file, otherwise
14 | only the second row with a give key combination will be used.
15 |
16 | Example input file 1 (*myfile.fsdb*):
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 |
19 | ::
20 |
21 | #fsdb -F t col1 two andthree
22 | 1 key1 42.0
23 | 2 key2 123.0
24 | 3 key1 90.2
25 |
26 | Example input file 2 (*augment.fsdb*):
27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
28 |
29 | ::
30 |
31 | #fsdb -F t col1 additional_column
32 | key1 blue
33 | key2 brown
34 |
35 | Example command usage
36 | ^^^^^^^^^^^^^^^^^^^^^
37 |
38 | ::
39 |
40 | $ pdbaugment -k two -v additional_column -- myfile.fsdb augment.fsdb
41 |
42 | Example output
43 | ^^^^^^^^^^^^^^
44 |
45 | ::
46 |
47 | #fsdb -F t col1:l two:a andthree:d additional_column:a
48 | 1 key1 42.0 blue
49 | 2 key2 123.0 brown
50 | 3 key1 90.2 blue
51 |
52 |
53 | Command Line Arguments
54 | ^^^^^^^^^^^^^^^^^^^^^^
55 |
56 | .. sphinx_argparse_cli::
57 | :module: pyfsdb.tools.pdbaugment
58 | :func: parse_args
59 | :hook:
60 | :prog: pdbaugment
61 |
--------------------------------------------------------------------------------
/docs/tools/pdbcdf.md:
--------------------------------------------------------------------------------
1 | ### pdbcdf - find all unique values of a key column
2 |
3 | `pdbcdf` analyzes one column from an FSDB file to produce normalized
4 | CDF related columns.
5 |
6 | #### Example input (*myfile.fsdb*):
7 |
8 | ```
9 | #fsdb -F s col1:l two:a andthree:d
10 | 1 key1 42.0
11 | 2 key2 123.0
12 | 3 key1 90.2
13 | ```
14 |
15 | #### Example command usage
16 |
17 | ```
18 | $ pdbcoluniq -c andthree -P percent -R raw myfile.fsdb
19 | ```
20 |
21 | #### Example output
22 |
23 | ```
24 | #fsdb -F t col1 two andthree andthree_cdf raw percent
25 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837
26 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941
27 | 3 key1 90.2 1.0 255.2 35.3448275862069
28 | ...
29 | ```
30 |
--------------------------------------------------------------------------------
/docs/tools/pdbcdf.rst:
--------------------------------------------------------------------------------
1 | pdbcdf - find all unique values of a key column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbcdf`` analyzes one column from an FSDB file to produce normalized
5 | CDF related columns.
6 |
7 | Example input (*myfile.fsdb*):
8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | ::
11 |
12 | #fsdb -F s col1:l two:a andthree:d
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbcoluniq -c andthree -P percent -R raw myfile.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t col1 two andthree andthree_cdf raw percent
30 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837
31 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941
32 | 3 key1 90.2 1.0 255.2 35.3448275862069
33 | ...
34 |
35 |
36 | Command Line Arguments
37 | ^^^^^^^^^^^^^^^^^^^^^^
38 |
39 | .. sphinx_argparse_cli::
40 | :module: pyfsdb.tools.pdbcdf
41 | :func: parse_args
42 | :hook:
43 | :prog: pdbcdf
44 |
--------------------------------------------------------------------------------
/docs/tools/pdbcoluniq.md:
--------------------------------------------------------------------------------
1 | ### pdbcoluniq - find all unique values of a key column
2 |
3 | `pdbcoluniq` can find all unique values of a key column, optionally
4 | including counting the number of each value seen. This is done with
5 | an internal dictionary and requires no sorting (unlike its perl
6 | dbrowuniq equivelent) at the potential cost of higher memory usage.
7 |
8 | #### Example input (*myfile.fsdb*):
9 |
10 | ```
11 | #fsdb -F s col1:l two:a andthree:d
12 | 1 key1 42.0
13 | 2 key2 123.0
14 | 3 key1 90.2
15 | ```
16 |
17 | #### Example command usage
18 |
19 | ```
20 | $ pdbcoluniq -k two -c myfile.fsdb
21 | ```
22 |
23 | #### Example output
24 |
25 | ```
26 | #fsdb -F t two count:l
27 | key1 2
28 | key2 1
29 | # | pdbcoluniq -k two -c myfile.fsdb
30 | ```
31 |
--------------------------------------------------------------------------------
/docs/tools/pdbcoluniq.rst:
--------------------------------------------------------------------------------
1 | pdbcoluniq - find all unique values of a key column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbcoluniq`` can find all unique values of a key column, optionally
5 | including counting the number of each value seen. This is done with an
6 | internal dictionary and requires no sorting (unlike its perl dbrowuniq
7 | equivelent) at the potential cost of higher memory usage.
8 |
9 | Example input (*myfile.fsdb*):
10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 |
12 | ::
13 |
14 | #fsdb -F s col1:l two:a andthree:d
15 | 1 key1 42.0
16 | 2 key2 123.0
17 | 3 key1 90.2
18 |
19 | Example command usage
20 | ^^^^^^^^^^^^^^^^^^^^^
21 |
22 | ::
23 |
24 | $ pdbcoluniq -k two -c myfile.fsdb
25 |
26 | Example output
27 | ^^^^^^^^^^^^^^
28 |
29 | ::
30 |
31 | #fsdb -F t two count:l
32 | key1 2
33 | key2 1
34 | # | pdbcoluniq -k two -c myfile.fsdb
35 |
36 |
37 | Command Line Arguments
38 | ^^^^^^^^^^^^^^^^^^^^^^
39 |
40 | .. sphinx_argparse_cli::
41 | :module: pyfsdb.tools.pdbcoluniq
42 | :func: parse_args
43 | :hook:
44 | :prog: pdbcoluniq
45 |
--------------------------------------------------------------------------------
/docs/tools/pdbdatetoepoch.md:
--------------------------------------------------------------------------------
1 | ### pdbdatetoepoch - translate a date-string based column to unix epochs
2 |
3 | `pdbdatetoepoch` translates one date/time based column column to
4 | another unix epoch seconds (since Jan 1 1970) column. This
5 | tool is the inverse of the `pdbepochtodate` tool.
6 |
7 | #### Example input (*mytime.fsdb*):
8 |
9 | ```
10 | #fsdb -F t index:d datecol:a
11 | 1 2023/01/01
12 | 2 2023/01/01 10:50:05
13 | ```
14 |
15 | #### Example command usage
16 |
17 | ```
18 | $ pdbdatetoepoch -d datecol -t timestamp percent mytime.fsdb
19 | ```
20 |
21 | #### Example output
22 |
23 | ```
24 | #fsdb -F t col1 two andthree andthree_cdf raw percent
25 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837
26 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941
27 | 3 key1 90.2 1.0 255.2 35.3448275862069
28 | ...
29 | ```
30 |
31 | #### Notes
32 |
33 | Internally this uses python's `dateparser` module.
34 |
--------------------------------------------------------------------------------
/docs/tools/pdbdatetoepoch.rst:
--------------------------------------------------------------------------------
1 | pdbdatetoepoch - translate a date-string based column to unix epochs
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbdatetoepoch`` translates one date/time based column column to
5 | another unix epoch seconds (since Jan 1 1970) column. This tool is the
6 | inverse of the ``pdbepochtodate`` tool.
7 |
8 | Example input (*mytime.fsdb*):
9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
10 |
11 | ::
12 |
13 | #fsdb -F t index:d datecol:a
14 | 1 2023/01/01
15 | 2 2023/01/01 10:50:05
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbdatetoepoch -d datecol -t timestamp percent mytime.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t col1 two andthree andthree_cdf raw percent
30 | 1 key1 42.0 0.164576802507837 42.0 16.4576802507837
31 | 2 key2 123.0 0.646551724137931 165.0 48.19749216300941
32 | 3 key1 90.2 1.0 255.2 35.3448275862069
33 | ...
34 |
35 | Notes
36 | ^^^^^
37 |
38 | Internally this uses python’s ``dateparser`` module.
39 |
40 |
41 | Command Line Arguments
42 | ^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | .. sphinx_argparse_cli::
45 | :module: pyfsdb.tools.pdbdatetoepoch
46 | :func: parse_args
47 | :hook:
48 | :prog: pdbdatetoepoch
49 |
--------------------------------------------------------------------------------
/docs/tools/pdbensure.md:
--------------------------------------------------------------------------------
1 | ### pdbensure - ensure certain columns are present in the data
2 |
3 | `pdbensure` either simply drops rows without content in a list of
4 | columns, or optionally fills in the values with a default instead.
5 |
6 | #### Example input (*myfile.fsdb*):
7 |
8 | ```
9 | #fsdb -F s col1:l two:a andthree:d
10 | 1 key1 42.0
11 | 2 key2
12 | 3 90.2
13 | ```
14 |
15 | #### Example command usage
16 |
17 | ```
18 | $ pdbensure -c andthree -e myfile.fsdb
19 | ```
20 |
21 | #### Example output
22 |
23 | ```
24 | #fsdb -F t col1:l two:a andthree:d
25 | 1 42.0
26 | # dbensure dropping row:[2, 'key2', None]
27 | 3 90.2
28 | ```
29 |
30 | #### Example command usage -- adding a second column
31 |
32 | ```
33 | $ pdbensure -c andthree two -e myfile.fsdb
34 | ```
35 |
36 | #### Example output
37 |
38 | ```
39 | #fsdb -F t col1:l two:a andthree:d
40 | 1 42.0
41 | # dbensure dropping row:[2, 'key2', None]
42 | # dbensure dropping row:[3, None, 90.2]
43 | ```
44 |
45 | #### Example command usage -- with replacement
46 |
47 | ```
48 | $ pdbensure -c two -v replace -- myfile.fsdb
49 | ```
50 |
51 | #### Example output
52 |
53 | ```
54 | #fsdb -F t col1:l two:a andthree:d
55 | 1 key1 42.0
56 | 2 key2
57 | 3 replace 90.2
58 | ```
59 |
60 |
--------------------------------------------------------------------------------
/docs/tools/pdbensure.rst:
--------------------------------------------------------------------------------
1 | pdbensure - ensure certain columns are present in the data
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbensure`` either simply drops rows without content in a list of
5 | columns, or optionally fills in the values with a default instead.
6 |
7 | Example input (*myfile.fsdb*):
8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | ::
11 |
12 | #fsdb -F s col1:l two:a andthree:d
13 | 1 key1 42.0
14 | 2 key2
15 | 3 90.2
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbensure -c andthree -e myfile.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t col1:l two:a andthree:d
30 | 1 42.0
31 | # dbensure dropping row:[2, 'key2', None]
32 | 3 90.2
33 |
34 | Example command usage – adding a second column
35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36 |
37 | ::
38 |
39 | $ pdbensure -c andthree two -e myfile.fsdb
40 |
41 | .. _example-output-1:
42 |
43 | Example output
44 | ^^^^^^^^^^^^^^
45 |
46 | ::
47 |
48 | #fsdb -F t col1:l two:a andthree:d
49 | 1 42.0
50 | # dbensure dropping row:[2, 'key2', None]
51 | # dbensure dropping row:[3, None, 90.2]
52 |
53 | Example command usage – with replacement
54 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
55 |
56 | ::
57 |
58 | $ pdbensure -c two -v replace -- myfile.fsdb
59 |
60 | .. _example-output-2:
61 |
62 | Example output
63 | ^^^^^^^^^^^^^^
64 |
65 | ::
66 |
67 | #fsdb -F t col1:l two:a andthree:d
68 | 1 key1 42.0
69 | 2 key2
70 | 3 replace 90.2
71 |
72 |
73 | Command Line Arguments
74 | ^^^^^^^^^^^^^^^^^^^^^^
75 |
76 | .. sphinx_argparse_cli::
77 | :module: pyfsdb.tools.pdbensure
78 | :func: parse_args
79 | :hook:
80 | :prog: pdbensure
81 |
--------------------------------------------------------------------------------
/docs/tools/pdbepochtodate.md:
--------------------------------------------------------------------------------
1 | ### pdbepochtodate - translate a unix epoch column to a date-string column
2 |
3 | `pdbepochtodante` translates a column containing unix epoch seconds
4 | (since Jan 1 1970) to another column with a formatted date/time. This
5 | tool is the inverse of the `pdbdatetoepoch` tool.
6 | .
7 |
8 | #### Example input (*myepoch.fsdb*):
9 |
10 | ```
11 | #fsdb -F t index:l timestamp:d
12 | 1 1672560000
13 | 2 1678831200
14 | ```
15 |
16 | #### Example command usage
17 |
18 | ```
19 | $ pdbepochtodante -d datecol -t timestamp percent mytime.fsdb
20 | ```
21 |
22 | #### Example output
23 |
24 | ```
25 | #fsdb -F t index:l timestamp:d date
26 | 1 1672560000.0 2023-01-01 00:00
27 | 2 1678831200.0 2023-03-14 15:00
28 | ```
29 |
30 | #### Notes
31 |
32 | Internally this uses python's `dateparser` module.
33 |
--------------------------------------------------------------------------------
/docs/tools/pdbepochtodate.rst:
--------------------------------------------------------------------------------
1 | pdbepochtodate - translate a unix epoch column to a date-string column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbepochtodante`` translates a column containing unix epoch seconds
5 | (since Jan 1 1970) to another column with a formatted date/time. This
6 | tool is the inverse of the ``pdbdatetoepoch`` tool. .
7 |
8 | Example input (*myepoch.fsdb*):
9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
10 |
11 | ::
12 |
13 | #fsdb -F t index:l timestamp:d
14 | 1 1672560000
15 | 2 1678831200
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbepochtodante -d datecol -t timestamp percent mytime.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t index:l timestamp:d date
30 | 1 1672560000.0 2023-01-01 00:00
31 | 2 1678831200.0 2023-03-14 15:00
32 |
33 | Notes
34 | ^^^^^
35 |
36 | Internally this uses python’s ``dateparser`` module.
37 |
38 |
39 | Command Line Arguments
40 | ^^^^^^^^^^^^^^^^^^^^^^
41 |
42 | .. sphinx_argparse_cli::
43 | :module: pyfsdb.tools.pdbepochtodate
44 | :func: parse_args
45 | :hook:
46 | :prog: pdbepochtodate
47 |
--------------------------------------------------------------------------------
/docs/tools/pdbfgrep.rst:
--------------------------------------------------------------------------------
1 | pdbfgrep - join rows from one FSDB files into another
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbfgrep`` provides a mechanism for doing a multi-match grep from
5 | two FSDB files, where the first is the stream to read and grep from
6 | (search through) and the second is a file containing a list of values
7 | from keys to match against. Similar to ``pdbaugment``, ``pdbfgrep``
8 | is designed to read a single file entirely into memory and use it
9 | search for rows in a second one that is read in a streaming style. In
10 | general, the smaller file should be used as the *augment_file*
11 | argument, and the larger as the ``stream_file`` when
12 | possible.
13 |
14 | Example input file 1 (*mygreptest.fsdb*):
15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16 |
17 | ::
18 |
19 | #fsdb -F t col1 two andthree
20 | 1 key1 42.0
21 | 2 key2 123.0
22 | 3 key3 90.2
23 |
24 | Example input file 2 (*grep-values.fsdb*):
25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t two additional_column
30 | key1 blue
31 | key3 brown
32 |
33 | Example command usage
34 | ^^^^^^^^^^^^^^^^^^^^^
35 |
36 | ::
37 |
38 | $ pdbfgrep -k two -- mygreptest.fsdb grep-values.fsdb
39 |
40 | Example output
41 | ^^^^^^^^^^^^^^
42 |
43 | ::
44 |
45 | #fsdb -F t col1:a two:a andthree:a
46 | 1 key1 42.0
47 | 3 key3 90.2
48 | # | pdbfgrep --k two -- mygreptest.fsdb grep-values.fsdb
49 |
50 | Example command usage -- inverted grep
51 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
52 |
53 | ::
54 |
55 | $ pdbfgrep -v -k two -- mygreptest.fsdb grep-values.fsdb
56 |
57 | Example output
58 | ^^^^^^^^^^^^^^
59 |
60 | ::
61 |
62 | #fsdb -F t col1:a two:a andthree:a
63 | 2 key2 123.0
64 | # | pdbfgrep -v -k two -- mygreptest.fsdb grep-values.fsdb
65 |
66 |
67 | Command Line Arguments
68 | ^^^^^^^^^^^^^^^^^^^^^^
69 |
70 | .. sphinx_argparse_cli::
71 | :module: pyfsdb.tools.pdbfgrep
72 | :func: parse_args
73 | :hook:
74 | :prog: pdbfgrep
75 |
--------------------------------------------------------------------------------
/docs/tools/pdbformat.md:
--------------------------------------------------------------------------------
1 | ### pdbformat - create formatted text per row in an FSDB file
2 |
3 | `pdbformat` uses python's internal string formatting mechanisms to
4 | output lines of text based on the column values from each row. The
5 | *-f* flag is used to specify the formatting string to use, where
6 | column names maybe enclosed in curly braces to indicate where
7 | replacement should happen.
8 |
9 | *See also:* `pdbjinja`
10 |
11 | #### Example input (*myfile.fsdb*):
12 |
13 | ```
14 | #fsdb -F s col1:l two:a andthree:d
15 | 1 key1 42.0
16 | 2 key2 123.0
17 | 3 key1 90.2
18 | ```
19 |
20 | #### Example command usage
21 |
22 | ```
23 | $ pdbformat -f "{two} is {andthree:>7.7} !" myfile.fsdb
24 | ```
25 |
26 | #### Example output
27 |
28 | ```
29 | key1 is 42.0 !
30 | key2 is 123.0 !
31 | key1 is 90.2 !
32 | ```
33 |
--------------------------------------------------------------------------------
/docs/tools/pdbformat.rst:
--------------------------------------------------------------------------------
1 | pdbformat - create formatted text per row in an FSDB file
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbformat`` uses python’s internal string formatting mechanisms to
5 | output lines of text based on the column values from each row. The *-f*
6 | flag is used to specify the formatting string to use, where column names
7 | maybe enclosed in curly braces to indicate where replacement should
8 | happen.
9 |
10 | *See also:* ``pdbjinja``
11 |
12 | Example input (*myfile.fsdb*):
13 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14 |
15 | ::
16 |
17 | #fsdb -F s col1:l two:a andthree:d
18 | 1 key1 42.0
19 | 2 key2 123.0
20 | 3 key1 90.2
21 |
22 | Example command usage
23 | ^^^^^^^^^^^^^^^^^^^^^
24 |
25 | ::
26 |
27 | $ pdbformat -f "{two} is {andthree:>7.7} !" myfile.fsdb
28 |
29 | Example output
30 | ^^^^^^^^^^^^^^
31 |
32 | ::
33 |
34 | key1 is 42.0 !
35 | key2 is 123.0 !
36 | key1 is 90.2 !
37 |
38 |
39 | Command Line Arguments
40 | ^^^^^^^^^^^^^^^^^^^^^^
41 |
42 | .. sphinx_argparse_cli::
43 | :module: pyfsdb.tools.pdbformat
44 | :func: parse_args
45 | :hook:
46 | :prog: pdbformat
47 |
--------------------------------------------------------------------------------
/docs/tools/pdbfullpivot.md:
--------------------------------------------------------------------------------
1 | ### pdbfullpivot - translate a date-string based column to unix epochs
2 |
3 | `pdbfullpivot` takes an input file with time/key/value pairs, and
4 | pivots the table into a wide table with one new column per key value.
5 |
6 | *TODO: make this more generic to allow N number of keying columns*
7 |
8 | #### Example input (*myfile.fsdb*):
9 |
10 | ```
11 | #fsdb -F t col1:l two:a andthree:d
12 | 1 key1 42.0
13 | 1 key2 123.0
14 | 2 key1 90.2
15 | ```
16 |
17 | #### Example command usage
18 |
19 | ```
20 | $ pdbfullpivot -t col1 -k two myfile.fsdb
21 | ```
22 |
23 | #### Example output
24 |
25 | ```
26 | #fsdb -F t col1:l key1:d key2:d
27 | 1 42.0 123.0
28 | 2 90.2 0
29 | ...
30 | ```
31 |
32 | #### Notes
33 |
34 | This can produce an output table with a lot of columns when there are
35 | a lot of values within the key column.
36 |
--------------------------------------------------------------------------------
/docs/tools/pdbfullpivot.rst:
--------------------------------------------------------------------------------
1 | pdbfullpivot - translate a date-string based column to unix epochs
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbfullpivot`` takes an input file with time/key/value pairs, and
5 | pivots the table into a wide table with one new column per key value.
6 |
7 | *TODO: make this more generic to allow N number of keying columns*
8 |
9 | Example input (*myfile.fsdb*):
10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 |
12 | ::
13 |
14 | #fsdb -F t col1:l two:a andthree:d
15 | 1 key1 42.0
16 | 1 key2 123.0
17 | 2 key1 90.2
18 |
19 | Example command usage
20 | ^^^^^^^^^^^^^^^^^^^^^
21 |
22 | ::
23 |
24 | $ pdbfullpivot -t col1 -k two myfile.fsdb
25 |
26 | Example output
27 | ^^^^^^^^^^^^^^
28 |
29 | ::
30 |
31 | #fsdb -F t col1:l key1:d key2:d
32 | 1 42.0 123.0
33 | 2 90.2 0
34 | ...
35 |
36 | Notes
37 | ^^^^^
38 |
39 | This can produce an output table with a lot of columns when there are a
40 | lot of values within the key column.
41 |
42 |
43 | Command Line Arguments
44 | ^^^^^^^^^^^^^^^^^^^^^^
45 |
46 | .. sphinx_argparse_cli::
47 | :module: pyfsdb.tools.pdbfullpivot
48 | :func: parse_args
49 | :hook:
50 | :prog: pdbfullpivot
51 |
--------------------------------------------------------------------------------
/docs/tools/pdbheatmap.md:
--------------------------------------------------------------------------------
1 | ### pdbheatmap - find all unique values of a key column
2 |
3 | `pdbheatmap` produces a graphical "heat map" of values contained
4 | within a FSDB file given two key columns. It is most useful to get a
5 | visual representation of scored data, for example.
6 |
7 | #### Example input (*myheat.fsdb*):
8 |
9 | Consider the following example input file, where Joe and Bob were
10 | asked to score their favorite fruits on a scale from 1 to 50.
11 |
12 | ```
13 | #fsdb -F t Person Fruit value
14 | Joe Orange 10
15 | Joe Apple 30
16 | Bob Orange 5
17 | Bob Apple 40
18 | ```
19 |
20 | #### Example command usage
21 |
22 | We can then run `pdbheatmap` to generate a graphical map that shows
23 | clearly that when you compare Apples and Oranges, Apples will win.
24 |
25 | ```
26 | $ pdbheatmap -c Person Fruit -v value myheat.fsdb myheat.png -R -fs 20 -L
27 | ```
28 |
29 | #### Example output
30 |
31 | 
32 |
--------------------------------------------------------------------------------
/docs/tools/pdbheatmap.rst:
--------------------------------------------------------------------------------
1 | pdbheatmap - find all unique values of a key column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbheatmap`` produces a graphical “heat map” of values contained
5 | within a FSDB file given two key columns. It is most useful to get a
6 | visual representation of scored data, for example.
7 |
8 | Example input (*myheat.fsdb*):
9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
10 |
11 | Consider the following example input file, where Joe and Bob were asked
12 | to score their favorite fruits on a scale from 1 to 50.
13 |
14 | ::
15 |
16 | #fsdb -F t Person Fruit value
17 | Joe Orange 10
18 | Joe Apple 30
19 | Bob Orange 5
20 | Bob Apple 40
21 |
22 | Example command usage
23 | ^^^^^^^^^^^^^^^^^^^^^
24 |
25 | We can then run ``pdbheatmap`` to generate a graphical map that shows
26 | clearly that when you compare Apples and Oranges, Apples will win.
27 |
28 | ::
29 |
30 | $ pdbheatmap -c Person Fruit -v value myheat.fsdb myheat.png -R -fs 20 -L
31 |
32 | Example output
33 | ^^^^^^^^^^^^^^
34 |
35 | .. figure:: images/myheat.png
36 | :alt: myheat.png
37 |
38 | myheat.png
39 |
40 |
41 | Command Line Arguments
42 | ^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | .. sphinx_argparse_cli::
45 | :module: pyfsdb.tools.pdbheatmap
46 | :func: parse_args
47 | :hook:
48 | :prog: pdbheatmap
49 |
--------------------------------------------------------------------------------
/docs/tools/pdbjinja.md:
--------------------------------------------------------------------------------
1 | ### pdbjinja - process an FSDB file with a jinja template
2 |
3 | `pdbjinja` takes all the data in an fsdb file, and passes it to a
4 | jinja2 template with each row being stored in a `rows` variable.
5 |
6 | *Note:* all rows will be loaded into memory at once.
7 |
8 | *See also:* `pdbformat`
9 |
10 | #### Example input (*myfile.fsdb*):
11 |
12 | ```
13 | #fsdb -F t col1:l two:a andthree:d
14 | 1 key1 42.0
15 | 2 key2 123.0
16 | 3 key1 90.2
17 | ```
18 |
19 | #### Example jinja template (*myfile.j2*)
20 |
21 | ```
22 | {% for row in rows -%}
23 | Key {{row["two"]}}'s favorite number is {{row["andthree"]}}
24 | {% endfor %}
25 | ```
26 |
27 | #### Example command usage
28 |
29 | ```
30 | $ pdbjinja -j myfile.j2 myfile.fsdb
31 | ```
32 |
33 | #### Example output
34 |
35 | ```
36 | Key key1's favorite number is 42.0
37 | Key key2's favorite number is 123.0
38 | Key key1's favorite number is 90.2
39 | ```
40 |
41 |
42 |
--------------------------------------------------------------------------------
/docs/tools/pdbjinja.rst:
--------------------------------------------------------------------------------
1 | pdbjinja - process an FSDB file with a jinja template
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbjinja`` takes all the data in an fsdb file, and passes it to a
5 | jinja2 template with each row being stored in a ``rows`` variable.
6 |
7 | *Note:* all rows will be loaded into memory at once.
8 |
9 | *See also:* ``pdbformat``
10 |
11 | Example input (*myfile.fsdb*):
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 |
14 | ::
15 |
16 | #fsdb -F t col1:l two:a andthree:d
17 | 1 key1 42.0
18 | 2 key2 123.0
19 | 3 key1 90.2
20 |
21 | Example jinja template (*myfile.j2*)
22 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23 |
24 | ::
25 |
26 | {% for row in rows -%}
27 | Key {{row["two"]}}'s favorite number is {{row["andthree"]}}
28 | {% endfor %}
29 |
30 | Example command usage
31 | ^^^^^^^^^^^^^^^^^^^^^
32 |
33 | ::
34 |
35 | $ pdbjinja -j myfile.j2 myfile.fsdb
36 |
37 | Example output
38 | ^^^^^^^^^^^^^^
39 |
40 | ::
41 |
42 | Key key1's favorite number is 42.0
43 | Key key2's favorite number is 123.0
44 | Key key1's favorite number is 90.2
45 |
46 |
47 | Command Line Arguments
48 | ^^^^^^^^^^^^^^^^^^^^^^
49 |
50 | .. sphinx_argparse_cli::
51 | :module: pyfsdb.tools.pdbjinja
52 | :func: parse_args
53 | :hook:
54 | :prog: pdbjinja
55 |
--------------------------------------------------------------------------------
/docs/tools/pdbkeyedsort.md:
--------------------------------------------------------------------------------
1 | ### pdbkeyedsort - find all unique values of a key column
2 |
3 | Sort "mostly sorted" large FSDB files using a double pass dbkeyedsort
4 | reads a file twice, sorting the data by the column specified via the
5 | -c/--column option. During the first pass, it counts all the rows per
6 | key to manage which lines it needs to memorize as it is making its
7 | second pass. During the second pass, it only stores in memory the
8 | lines that are out of order. This can greatly optimize the amount of
9 | memory stored when the data is already in a fairly sorted state (which
10 | is common for the output of map/reduce operations such as
11 | hadoop). This comes at the expense of needing to read the entire
12 | dataset twice, which means its impossible to use `stdin` to pass in
13 | data; instead a filename must be specified instead. The output,
14 | though, may be `stdout`.
15 |
16 | #### Example input (*myfile.fsdb*):
17 |
18 | ```
19 | #fsdb -F s col1:l two:a andthree:d
20 | 1 key1 42.0
21 | 2 key2 123.0
22 | 3 key1 90.2
23 | ```
24 |
25 | #### Example command usage
26 |
27 | We add the -v flag to have it give a count of the number of lines that
28 | were cached. In general, you want this fraction to be small to
29 | conserve memory. In the example below, `pdbkeyedsort` only needed to
30 | memorize one row (the second) of the above file.
31 |
32 | ```
33 | $ pdbkeyedsort -c andthree -v myfile.fsdb
34 | ```
35 |
36 | #### Example output
37 |
38 | ```
39 | #fsdb -F t col1:l two andthree:d
40 | 1 key1 42.0
41 | 3 key1 90.2
42 | 2 key2 123.0
43 | cached 1/3 lines
44 | ```
45 |
--------------------------------------------------------------------------------
/docs/tools/pdbkeyedsort.rst:
--------------------------------------------------------------------------------
1 | pdbkeyedsort - find all unique values of a key column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | Sort “mostly sorted” large FSDB files using a double pass dbkeyedsort
5 | reads a file twice, sorting the data by the column specified via the
6 | -c/–column option. During the first pass, it counts all the rows per key
7 | to manage which lines it needs to memorize as it is making its second
8 | pass. During the second pass, it only stores in memory the lines that
9 | are out of order. This can greatly optimize the amount of memory stored
10 | when the data is already in a fairly sorted state (which is common for
11 | the output of map/reduce operations such as hadoop). This comes at the
12 | expense of needing to read the entire dataset twice, which means its
13 | impossible to use ``stdin`` to pass in data; instead a filename must be
14 | specified instead. The output, though, may be ``stdout``.
15 |
16 | Example input (*myfile.fsdb*):
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 |
19 | ::
20 |
21 | #fsdb -F s col1:l two:a andthree:d
22 | 1 key1 42.0
23 | 2 key2 123.0
24 | 3 key1 90.2
25 |
26 | Example command usage
27 | ^^^^^^^^^^^^^^^^^^^^^
28 |
29 | We add the -v flag to have it give a count of the number of lines that
30 | were cached. In general, you want this fraction to be small to conserve
31 | memory. In the example below, ``pdbkeyedsort`` only needed to memorize
32 | one row (the second) of the above file.
33 |
34 | ::
35 |
36 | $ pdbkeyedsort -c andthree -v myfile.fsdb
37 |
38 | Example output
39 | ^^^^^^^^^^^^^^
40 |
41 | ::
42 |
43 | #fsdb -F t col1:l two andthree:d
44 | 1 key1 42.0
45 | 3 key1 90.2
46 | 2 key2 123.0
47 | cached 1/3 lines
48 |
49 |
50 | Command Line Arguments
51 | ^^^^^^^^^^^^^^^^^^^^^^
52 |
53 | .. sphinx_argparse_cli::
54 | :module: pyfsdb.tools.pdbkeyedsort
55 | :func: parse_args
56 | :hook:
57 | :prog: pdbkeyedsort
58 |
--------------------------------------------------------------------------------
/docs/tools/pdbnormalize.md:
--------------------------------------------------------------------------------
1 | ### pdbnormalize - normalize a bunch of columns
2 |
3 | `pdbnormalize` takes an input file and takes each column value from a
4 | number of columns and divides it by the maximum value seen in all the
5 | columns.
6 |
7 | *Note: this is the maximum value of all columns provided; if
8 | you want per-column normalization, run the tool multiple times
9 | instead.*
10 |
11 | *Note: this requires reading the entire file into memory.*
12 |
13 | #### Example input (*myfile.fsdb*):
14 |
15 | ```
16 | #fsdb -F s col1:l two:a andthree:d
17 | 1 key1 42.0
18 | 2 key2 123.0
19 | 3 key1 90.2
20 | ```
21 |
22 | #### Example command usage
23 |
24 | ```
25 | $ pdbnormalize -k andthree -- myfile.fsdb
26 | ```
27 |
28 | #### Example output
29 |
30 | ```
31 | pdbnormalize -k andthree -- myfile.fsdb
32 | #fsdb -F t col1:l two andthree:d
33 | 1 key1 0.34146341463414637
34 | 2 key2 1.0
35 | 3 key1 0.7333333333333334
36 | ```
37 |
38 | #### Example normalizing 2 columns:
39 |
40 | If you normalize multiple columns, be aware that the divisor is the
41 | maximum of all the values from all the columns. Thus by passing both
42 | columns `col1` and `andthree`, you'll note in the output below that
43 | even col1 is divided by the maximum value from both columns in the
44 | input (*123.0*).
45 |
46 | ```
47 | $ pdbnormalize -k col1 andthree -- myfile.fsdb
48 | ```
49 |
50 | #### Example output
51 |
52 | ```
53 | 0.008130081300813009 key1 0.34146341463414637
54 | 0.016260162601626018 key2 1.0
55 | 0.024390243902439025 key1 0.7333333333333334
56 | ```
57 |
--------------------------------------------------------------------------------
/docs/tools/pdbnormalize.rst:
--------------------------------------------------------------------------------
1 | pdbnormalize - normalize a bunch of columns
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbnormalize`` takes an input file and takes each column value from a
5 | number of columns and divides it by the maximum value seen in all the
6 | columns.
7 |
8 | *Note: this is the maximum value of all columns provided; if you want
9 | per-column normalization, run the tool multiple times instead.*
10 |
11 | *Note: this requires reading the entire file into memory.*
12 |
13 | Example input (*myfile.fsdb*):
14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15 |
16 | ::
17 |
18 | #fsdb -F s col1:l two:a andthree:d
19 | 1 key1 42.0
20 | 2 key2 123.0
21 | 3 key1 90.2
22 |
23 | Example command usage
24 | ^^^^^^^^^^^^^^^^^^^^^
25 |
26 | ::
27 |
28 | $ pdbnormalize -k andthree -- myfile.fsdb
29 |
30 | Example output
31 | ^^^^^^^^^^^^^^
32 |
33 | ::
34 |
35 | pdbnormalize -k andthree -- myfile.fsdb
36 | #fsdb -F t col1:l two andthree:d
37 | 1 key1 0.34146341463414637
38 | 2 key2 1.0
39 | 3 key1 0.7333333333333334
40 |
41 | Example normalizing 2 columns:
42 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | If you normalize multiple columns, be aware that the divisor is the
45 | maximum of all the values from all the columns. Thus by passing both
46 | columns ``col1`` and ``andthree``, you’ll note in the output below that
47 | even col1 is divided by the maximum value from both columns in the input
48 | (*123.0*).
49 |
50 | ::
51 |
52 | $ pdbnormalize -k col1 andthree -- myfile.fsdb
53 |
54 | .. _example-output-1:
55 |
56 | Example output
57 | ^^^^^^^^^^^^^^
58 |
59 | ::
60 |
61 | 0.008130081300813009 key1 0.34146341463414637
62 | 0.016260162601626018 key2 1.0
63 | 0.024390243902439025 key1 0.7333333333333334
64 |
65 |
66 | Command Line Arguments
67 | ^^^^^^^^^^^^^^^^^^^^^^
68 |
69 | .. sphinx_argparse_cli::
70 | :module: pyfsdb.tools.pdbnormalize
71 | :func: parse_args
72 | :hook:
73 | :prog: pdbnormalize
74 |
--------------------------------------------------------------------------------
/docs/tools/pdbreescape.md:
--------------------------------------------------------------------------------
1 | ### pdbreescape - regexp escape strings from a column
2 |
3 | `pdbreescape` passes the requested columns (-k) through python's
4 | regex escaping function.
5 |
6 | **Note: because -k can take multiple columns, input files likely need
7 | to appear after the "--" argument-stop-parsing string.*
8 |
9 | #### Example input (*myfile.fsdb*):
10 |
11 | ```
12 | #fsdb -F s col1:l two:a andthree:d
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 | ```
17 |
18 | #### Example command usage
19 |
20 | Using our standard input file for this documentation set, we first
21 | pass the file through `pdbaddtypes` to change the type from a float
22 | to a string, and then escape the period in the (now string) floating
23 | point number:
24 |
25 | ```
26 | $ pdbaddtypes -t andthree=a -- myfile.fsdb |
27 | pdbreescape -k andthree
28 | ```
29 |
30 | #### Example output
31 |
32 | ```
33 | #fsdb -F t col1:l two andthree
34 | 1 key1 42\.0
35 | 2 key2 123\.0
36 | 3 key1 90\.2
37 | # | /home/hardaker/.local/bin/pdbreescape -k andthree
38 | ```
39 |
40 | #### A more complex file (*mystrings.fsdb*)
41 |
42 | This shows a greater number of regex escaping types. Note that the
43 | spaces are also escaped.
44 |
45 | ```
46 | #fsdb -F t type value
47 | wild-cards * and . and + and ?
48 | parens () and []
49 | slashes / and \
50 | ```
51 |
52 |
53 | #### Example command usage
54 |
55 | ```
56 | $ pdbreescape -k value -- mystrings.fsdb
57 | ```
58 |
59 | #### Example output
60 |
61 | ```
62 | #fsdb -F t type value
63 | wild-cards \*\ and\ \.\ and\ \+\ and\ \?
64 | parens \(\)\ and\ \[\]
65 | slashes /\ and\ \\
66 |
67 | ```
68 |
--------------------------------------------------------------------------------
/docs/tools/pdbreescape.rst:
--------------------------------------------------------------------------------
1 | pdbreescape - regexp escape strings from a column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbreescape`` passes the requested columns (-k) through python’s regex
5 | escaping function.
6 |
7 | \**Note: because -k can take multiple columns, input files likely need
8 | to appear after the “–” argument-stop-parsing string.\*
9 |
10 | Example input (*myfile.fsdb*):
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 |
13 | ::
14 |
15 | #fsdb -F s col1:l two:a andthree:d
16 | 1 key1 42.0
17 | 2 key2 123.0
18 | 3 key1 90.2
19 |
20 | Example command usage
21 | ^^^^^^^^^^^^^^^^^^^^^
22 |
23 | Using our standard input file for this documentation set, we first pass
24 | the file through ``pdbaddtypes`` to change the type from a float to a
25 | string, and then escape the period in the (now string) floating point
26 | number:
27 |
28 | ::
29 |
30 | $ pdbaddtypes -t andthree=a -- myfile.fsdb |
31 | pdbreescape -k andthree
32 |
33 | Example output
34 | ^^^^^^^^^^^^^^
35 |
36 | ::
37 |
38 | #fsdb -F t col1:l two andthree
39 | 1 key1 42\.0
40 | 2 key2 123\.0
41 | 3 key1 90\.2
42 | # | /home/hardaker/.local/bin/pdbreescape -k andthree
43 |
44 | A more complex file (*mystrings.fsdb*)
45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
46 |
47 | This shows a greater number of regex escaping types. Note that the
48 | spaces are also escaped.
49 |
50 | ::
51 |
52 | #fsdb -F t type value
53 | wild-cards * and . and + and ?
54 | parens () and []
55 | slashes / and \
56 |
57 | .. _example-command-usage-1:
58 |
59 | Example command usage
60 | ^^^^^^^^^^^^^^^^^^^^^
61 |
62 | ::
63 |
64 | $ pdbreescape -k value -- mystrings.fsdb
65 |
66 | .. _example-output-1:
67 |
68 | Example output
69 | ^^^^^^^^^^^^^^
70 |
71 | ::
72 |
73 | #fsdb -F t type value
74 | wild-cards \*\ and\ \.\ and\ \+\ and\ \?
75 | parens \(\)\ and\ \[\]
76 | slashes /\ and\ \\
77 |
78 |
79 | Command Line Arguments
80 | ^^^^^^^^^^^^^^^^^^^^^^
81 |
82 | .. sphinx_argparse_cli::
83 | :module: pyfsdb.tools.pdbreescape
84 | :func: parse_args
85 | :hook:
86 | :prog: pdbreescape
87 |
--------------------------------------------------------------------------------
/docs/tools/pdbrow.md:
--------------------------------------------------------------------------------
1 | ### pdbrow - select a subset of rows based on a filter
2 |
3 | `pdbrow` can apply an arbitrary logical python expression that selects
4 | matching rows for passing to the output.
5 |
6 | #### Example input (*myfile.fsdb*):
7 |
8 | ```
9 | #fsdb -F s col1:l two:a andthree:d
10 | 1 key1 42.0
11 | 2 key2 123.0
12 | 3 key1 90.2
13 | ```
14 |
15 | #### Example command usage
16 |
17 | ```
18 | $ pdbrow 'col1 == "key1"' myfile.fsdb
19 | ```
20 |
21 | #### Example output
22 |
23 | ```
24 | #fsdb -F t col1:l two andthree:d
25 | 1 key1 42.0
26 | 3 key1 90.2
27 | # | pdbrow 'two == "key1"' myfile.fsdb
28 | ```
29 |
30 | #### Example command usage with initialization code
31 |
32 |
33 | ```
34 | $ pdbrow -i "import re" 're.match("key1", two)' myfile.fsdb
35 | ```
36 |
37 | #### Example output
38 |
39 | ```
40 | #fsdb -F t col1:l two andthree:d
41 | 1 key1 42.0
42 | 3 key1 90.2
43 | # | pdbrow -i 'import re' 're.match("key1", two)' myfile.fsdb
44 | ```
45 |
46 | #### Example command usage with namedtuple based rows
47 |
48 | ```
49 | $ pdbrow -n row 'row.two == "key1"' myfile.fsdb
50 | ```
51 |
52 | #### Example output
53 |
54 | ```
55 | #fsdb -F t col1:l two andthree:d
56 | 1 key1 42.0
57 | 3 key1 90.2
58 | # | pdbrow -n row row.two == "key1"
59 | ```
60 |
--------------------------------------------------------------------------------
/docs/tools/pdbrow.rst:
--------------------------------------------------------------------------------
1 | pdbrow - select a subset of rows based on a filter
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbrow`` can apply an arbitrary logical python expression that selects
5 | matching rows for passing to the output.
6 |
7 | Example input (*myfile.fsdb*):
8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | ::
11 |
12 | #fsdb -F s col1:l two:a andthree:d
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbrow 'col1 == "key1"' myfile.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t col1:l two andthree:d
30 | 1 key1 42.0
31 | 3 key1 90.2
32 | # | pdbrow 'two == "key1"' myfile.fsdb
33 |
34 | Example command usage with initialization code
35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36 |
37 | ::
38 |
39 | $ pdbrow -i "import re" 're.match("key1", two)' myfile.fsdb
40 |
41 | .. _example-output-1:
42 |
43 | Example output
44 | ^^^^^^^^^^^^^^
45 |
46 | ::
47 |
48 | #fsdb -F t col1:l two andthree:d
49 | 1 key1 42.0
50 | 3 key1 90.2
51 | # | pdbrow -i 'import re' 're.match("key1", two)' myfile.fsdb
52 |
53 | Example command usage with namedtuple based rows
54 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
55 |
56 | ::
57 |
58 | $ pdbrow -n row 'row.two == "key1"' myfile.fsdb
59 |
60 | .. _example-output-2:
61 |
62 | Example output
63 | ^^^^^^^^^^^^^^
64 |
65 | ::
66 |
67 | #fsdb -F t col1:l two andthree:d
68 | 1 key1 42.0
69 | 3 key1 90.2
70 | # | pdbrow -n row row.two == "key1"
71 |
72 |
73 | Command Line Arguments
74 | ^^^^^^^^^^^^^^^^^^^^^^
75 |
76 | .. sphinx_argparse_cli::
77 | :module: pyfsdb.tools.pdbrow
78 | :func: parse_args
79 | :hook:
80 | :prog: pdbrow
81 |
--------------------------------------------------------------------------------
/docs/tools/pdbroweval.rst:
--------------------------------------------------------------------------------
1 | pdbroweval - alter rows based on python expressions or code
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbroweval`` can apply an arbitrary python expression or code to
5 | modify the contents of the file before passing it to the output stream.
6 |
7 | Example input (*myfile.fsdb*):
8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | ::
11 |
12 | #fsdb -F s col1:l two:s andthree:d
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 |
17 | Example command usage
18 | ^^^^^^^^^^^^^^^^^^^^^
19 |
20 | ::
21 |
22 | $ pdbroweval 'andthree *= 2' myfile.fsdb
23 |
24 | Example output
25 | ^^^^^^^^^^^^^^
26 |
27 | ::
28 |
29 | #fsdb -F t col1:l two andthree:d
30 | 1 key1 84.0
31 | 2 key2 246.0
32 | 3 key1 180.4
33 | # | pdbroweval 'andthree *= 2' myfile.fsdb
34 |
35 | Example command usage with initialization code
36 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
37 |
38 | ::
39 |
40 | $ pdbroweval -i "import re" 're.sub("key", "lock", two)' myfile.fsdb
41 |
42 | .. _example-output-1:
43 |
44 | Example output
45 | ^^^^^^^^^^^^^^
46 |
47 | ::
48 |
49 | #fsdb -F t col1:l two andthree:d
50 | 1 lock1 42.0
51 | 2 lock2 123.0
52 | 3 lock1 90.2
53 | # | pdbroweval -i import re two = re.sub("key", "lock", two) myfile.fsdb
54 |
55 | Command Line Usage
56 | ^^^^^^^^^^^^^^^^^^
57 |
58 | .. argparse::
59 | :ref: pyfsdb.tools.pdbroweval.get_parse_args
60 | :prog: pdbroweval
61 |
--------------------------------------------------------------------------------
/docs/tools/pdbsplitter.md:
--------------------------------------------------------------------------------
1 | ### pdbsplitter - split an FSDB file into multiple files
2 |
3 | `pdbsplitter` splits a single FSDB file into a series of output
4 | files. This could be achieved by running `dbcol` multiple times, but
5 | `pdbsplitter` should be faster when processing many columns.
6 |
7 | #### Example input (*myfile.fsdb*):
8 |
9 | ```
10 | #fsdb -F s col1:l two:a andthree:d
11 | 1 key1 42.0
12 | 2 key2 123.0
13 | 3 key1 90.2
14 | ```
15 |
16 | #### Example command usage
17 |
18 | ```
19 | $ pdbsplitter -k col1 -c two andthree -o myfile-split-%s.fsdb myfile.fsdb
20 | ```
21 |
22 | #### Example output
23 |
24 | The above command produces two different files, one per each column.
25 |
26 | - *myfile-split-two.fsdb*:
27 |
28 | ```
29 | #fsdb -F t col1 two
30 | 1 key1
31 | 2 key2
32 | 3 key1
33 | ```
34 |
35 | - *myfile-split-andthree.fsdb*:
36 |
37 | ```
38 | #fsdb -F t col1 andthree
39 | 1 42.0
40 | 2 123.0
41 | 3 90.2
42 | ```
43 |
--------------------------------------------------------------------------------
/docs/tools/pdbsplitter.rst:
--------------------------------------------------------------------------------
1 | pdbsplitter - split an FSDB file into multiple files
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbsplitter`` splits a single FSDB file into a series of output files.
5 | This could be achieved by running ``dbcol`` multiple times, but
6 | ``pdbsplitter`` should be faster when processing many columns.
7 |
8 | Example input (*myfile.fsdb*):
9 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
10 |
11 | ::
12 |
13 | #fsdb -F s col1:l two:a andthree:d
14 | 1 key1 42.0
15 | 2 key2 123.0
16 | 3 key1 90.2
17 |
18 | Example command usage
19 | ^^^^^^^^^^^^^^^^^^^^^
20 |
21 | ::
22 |
23 | $ pdbsplitter -k col1 -c two andthree -o myfile-split-%s.fsdb myfile.fsdb
24 |
25 | Example output
26 | ^^^^^^^^^^^^^^
27 |
28 | The above command produces two different files, one per each column.
29 |
30 | - *myfile-split-two.fsdb*:
31 |
32 | ::
33 |
34 | #fsdb -F t col1 two
35 | 1 key1
36 | 2 key2
37 | 3 key1
38 |
39 | - *myfile-split-andthree.fsdb*:
40 |
41 | ::
42 |
43 | #fsdb -F t col1 andthree
44 | 1 42.0
45 | 2 123.0
46 | 3 90.2
47 |
48 |
49 | Command Line Arguments
50 | ^^^^^^^^^^^^^^^^^^^^^^
51 |
52 | .. sphinx_argparse_cli::
53 | :module: pyfsdb.tools.pdbsplitter
54 | :func: parse_args
55 | :hook:
56 | :prog: pdbsplitter
57 |
--------------------------------------------------------------------------------
/docs/tools/pdbsum.md:
--------------------------------------------------------------------------------
1 | ### pdbsum - sum columns together
2 |
3 | `pdbsum` adds column data together based on keyed input. This is
4 | similar to `dbcolstats` and `dbmultistats`, but only performs addition
5 | (or subtraction) and can be faster on very large datasets where the
6 | rest of the analysis provided by the other tools are not needed.
7 | `dbsum` also supports keyed subtraction as well, as seen below.
8 |
9 | #### Example input (*myfile.fsdb*):
10 |
11 | ```
12 | #fsdb -F s col1:l two:a andthree:d
13 | 1 key1 42.0
14 | 2 key2 123.0
15 | 3 key1 90.2
16 | ```
17 |
18 | #### Example command usage
19 |
20 | ```
21 | $ pdbsum -k two -c col1 andthree -- myfile.fsdb
22 | ```
23 |
24 | #### Example output
25 |
26 | ```
27 | #fsdb -F t two col1:d andthree:d
28 | key1 4.0 132.2
29 | key2 2.0 123.0
30 | ```
31 |
32 | #### Example Subtraction file
33 |
34 | If we have another file (*mysub.fsdb*), we can subtract results:
35 |
36 | ```
37 | #fsdb -F s two:a andthree:d
38 | key1 10
39 | key2 10
40 | key1 10
41 | ```
42 |
43 | #### Example subtraction command:
44 |
45 | ```
46 | pdbsum -k two -c col1 andthree -- myfile.fsdb mysub.fsdb
47 | ```
48 |
49 | #### Example output of subtraction:
50 |
51 | Note how the two 10's in the key1 subtraction are added together to 20
52 | before being subtracted from the sum of key1 (123.2) in the first
53 | file.
54 |
55 | *Note:* Also observe the typical floating point imprecision rounding
56 | problems that python is well known for displaying.
57 |
58 | ```
59 | #fsdb -F t two andthree:d
60 | key1 112.19999999999999
61 | key2 113.0
62 | ```
63 |
--------------------------------------------------------------------------------
/docs/tools/pdbsum.rst:
--------------------------------------------------------------------------------
1 | pdbsum - sum columns together
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbsum`` adds column data together based on keyed input. This is
5 | similar to ``dbcolstats`` and ``dbmultistats``, but only performs
6 | addition (or subtraction) and can be faster on very large datasets where
7 | the rest of the analysis provided by the other tools are not needed.
8 | ``dbsum`` also supports keyed subtraction as well, as seen below.
9 |
10 | Example input (*myfile.fsdb*):
11 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12 |
13 | ::
14 |
15 | #fsdb -F s col1:l two:a andthree:d
16 | 1 key1 42.0
17 | 2 key2 123.0
18 | 3 key1 90.2
19 |
20 | Example command usage
21 | ^^^^^^^^^^^^^^^^^^^^^
22 |
23 | ::
24 |
25 | $ pdbsum -k two -c col1 andthree -- myfile.fsdb
26 |
27 | Example output
28 | ^^^^^^^^^^^^^^
29 |
30 | ::
31 |
32 | #fsdb -F t two col1:d andthree:d
33 | key1 4.0 132.2
34 | key2 2.0 123.0
35 |
36 | Example Subtraction file
37 | ^^^^^^^^^^^^^^^^^^^^^^^^
38 |
39 | If we have another file (*mysub.fsdb*), we can subtract results:
40 |
41 | ::
42 |
43 | #fsdb -F s two:a andthree:d
44 | key1 10
45 | key2 10
46 | key1 10
47 |
48 | Example subtraction command:
49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
50 |
51 | ::
52 |
53 | pdbsum -k two -c col1 andthree -- myfile.fsdb mysub.fsdb
54 |
55 | Example output of subtraction:
56 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
57 |
58 | Note how the two 10’s in the key1 subtraction are added together to 20
59 | before being subtracted from the sum of key1 (123.2) in the first file.
60 |
61 | *Note:* Also observe the typical floating point imprecision rounding
62 | problems that python is well known for displaying.
63 |
64 | ::
65 |
66 | #fsdb -F t two andthree:d
67 | key1 112.19999999999999
68 | key2 113.0
69 |
70 |
71 | Command Line Arguments
72 | ^^^^^^^^^^^^^^^^^^^^^^
73 |
74 | .. sphinx_argparse_cli::
75 | :module: pyfsdb.tools.pdbsum
76 | :func: parse_args
77 | :hook:
78 | :prog: pdbsum
79 |
--------------------------------------------------------------------------------
/docs/tools/pdbtopn.md:
--------------------------------------------------------------------------------
1 | ### pdbtopn - selects the top N rows based on values from a column
2 |
3 | `pdbtopn` selects N rows from an FSDB file by selecting the top values
4 | from a particular column. For smaller datasets, using a combination
5 | of `dbsort` and `dbuniq` accomplish the same functional result.
6 | However, `pdbtopn` requires far less memory and CPU computation when N
7 | is small and the dataset is large. Using `dbsort` and `dbuniq` may be
8 | a better solution with very large values of N.
9 |
10 | #### Example input (*myfile.fsdb*):
11 |
12 | ```
13 | #fsdb -F s col1:l two:a andthree:d
14 | 1 key1 42.0
15 | 2 key2 123.0
16 | 3 key1 90.2
17 | ```
18 |
19 | #### Example command usage
20 |
21 | ```
22 | $ pdbtopn -k two -n 1 -v andthree myfile.fsdb
23 | ```
24 |
25 | #### Example output
26 |
27 | ```
28 | #fsdb -F t col1:l two andthree:d
29 | 2 key2 123.0
30 | ```
31 |
32 | #### Example selecting the top values of multiple keys
33 |
34 | ```
35 | $ pdbtopn -k two -n 20 -v andthree myfile.fsdb
36 | ```
37 | #### Example output
38 |
39 |
40 | ```
41 | #fsdb -F t col1:l two andthree:d
42 | 3 key1 90.2
43 | 2 key2 123.0
44 | ```
45 |
--------------------------------------------------------------------------------
/docs/tools/pdbtopn.rst:
--------------------------------------------------------------------------------
1 | pdbtopn - selects the top N rows based on values from a column
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbtopn`` selects N rows from an FSDB file by selecting the top values
5 | from a particular column. For smaller datasets, using a combination of
6 | ``dbsort`` and ``dbuniq`` accomplish the same functional result.
7 | However, ``pdbtopn`` requires far less memory and CPU computation when N
8 | is small and the dataset is large. Using ``dbsort`` and ``dbuniq`` may
9 | be a better solution with very large values of N.
10 |
11 | Example input (*myfile.fsdb*):
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 |
14 | ::
15 |
16 | #fsdb -F s col1:l two:a andthree:d
17 | 1 key1 42.0
18 | 2 key2 123.0
19 | 3 key1 90.2
20 |
21 | Example command usage
22 | ^^^^^^^^^^^^^^^^^^^^^
23 |
24 | ::
25 |
26 | $ pdbtopn -k two -n 1 -v andthree myfile.fsdb
27 |
28 | Example output
29 | ^^^^^^^^^^^^^^
30 |
31 | ::
32 |
33 | #fsdb -F t col1:l two andthree:d
34 | 2 key2 123.0
35 |
36 | Example selecting the top values of multiple keys
37 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38 |
39 | ::
40 |
41 | $ pdbtopn -k two -n 20 -v andthree myfile.fsdb
42 |
43 | .. _example-output-1:
44 |
45 | Example output
46 | ^^^^^^^^^^^^^^
47 |
48 | ::
49 |
50 | #fsdb -F t col1:l two andthree:d
51 | 3 key1 90.2
52 | 2 key2 123.0
53 |
54 |
55 | Command Line Arguments
56 | ^^^^^^^^^^^^^^^^^^^^^^
57 |
58 | .. sphinx_argparse_cli::
59 | :module: pyfsdb.tools.pdbtopn
60 | :func: parse_args
61 | :hook:
62 | :prog: pdbtopn
63 |
--------------------------------------------------------------------------------
/docs/tools/pdbzerofill.md:
--------------------------------------------------------------------------------
1 | ### pdbzerofill - fills a columns with zeros (or other value) when blank
2 |
3 | `pdbzerofill` fills a row that is missing in a series of rows with a
4 | numerical increasing (frequently a timestamp) index This is a sister
5 | program to `pdbensure` which removes rows with missing data instead of
6 | creating them.
7 |
8 | #### Example input (*myblanks.fsdb*):
9 |
10 | ```
11 | #fsdb -F t col1:l two:a andthree:d
12 | 2 key1 42.0
13 | 6 key2
14 | 10 90.2
15 | ```
16 |
17 | #### Example command usage
18 |
19 | ```
20 | $ pdbzerofill -c two andthree -v xxx -b 2 -t col1
21 | ```
22 |
23 | #### Example output
24 |
25 | ```
26 | #fsdb -F t col1:l two andthree:d
27 | 2 key1 42.0
28 | 4 xxx xxx
29 | 6 key2
30 | 8 xxx xxx
31 | 10 90.2
32 | ```
33 |
34 |
--------------------------------------------------------------------------------
/docs/tools/pdbzerofill.rst:
--------------------------------------------------------------------------------
1 | pdbzerofill - fills a columns with zeros (or other value) when blank
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 |
4 | ``pdbzerofill`` fills a row that is missing in a series of rows with a
5 | numerical increasing (frequently a timestamp) index This is a sister
6 | program to ``pdbensure`` which removes rows with missing data instead of
7 | creating them.
8 |
9 | Example input (*myblanks.fsdb*):
10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 |
12 | ::
13 |
14 | #fsdb -F t col1:l two:a andthree:d
15 | 2 key1 42.0
16 | 6 key2
17 | 10 90.2
18 |
19 | Example command usage
20 | ^^^^^^^^^^^^^^^^^^^^^
21 |
22 | ::
23 |
24 | $ pdbzerofill -c two andthree -v xxx -b 2 -t col1
25 |
26 | Example output
27 | ^^^^^^^^^^^^^^
28 |
29 | ::
30 |
31 | #fsdb -F t col1:l two andthree:d
32 | 2 key1 42.0
33 | 4 xxx xxx
34 | 6 key2
35 | 8 xxx xxx
36 | 10 90.2
37 |
38 |
39 | Command Line Arguments
40 | ^^^^^^^^^^^^^^^^^^^^^^
41 |
42 | .. sphinx_argparse_cli::
43 | :module: pyfsdb.tools.pdbzerofill
44 | :func: parse_args
45 | :hook:
46 | :prog: pdbzerofill
47 |
--------------------------------------------------------------------------------
/pyfsdb/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["fsdb"]
2 | from . import fsdb
3 |
4 | __VERSION__ = "2.4.3"
5 |
6 | __doc__ = fsdb.__doc__
7 | RETURN_AS_DICTIONARY = fsdb.RETURN_AS_DICTIONARY
8 | RETURN_AS_ARRAY = fsdb.RETURN_AS_ARRAY
9 | Fsdb = fsdb.Fsdb
10 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/obsolete/__init__.py
--------------------------------------------------------------------------------
/pyfsdb/obsolete/db2tex.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdb2tex
3 |
4 |
5 | def main():
6 | sys.stderr.write("db2tex is obsolete; please use pdb2tex instead\n")
7 | pyfsdb.tools.pdb2tex.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbaugment.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbaugment
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbaugment is obsolete; please use pdbaugment instead\n")
7 | pyfsdb.tools.pdbaugment.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbcoluniq.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbcoluniq
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbcoluniq is obsolete; please use pdbcoluniq instead\n")
7 | pyfsdb.tools.pdbcoluniq.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbdatetoepoch.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbdatetoepoch
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbdatetoepoch is obsolete; please use pdbdatetoepoch instead\n")
7 | pyfsdb.tools.pdbdatetoepoch.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbensure.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbensure
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbensure is obsolete; please use pdbensure instead\n")
7 | pyfsdb.tools.pdbensure.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbformat.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbformat
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbformat is obsolete; please use pdbformat instead\n")
7 | pyfsdb.tools.pdbformat.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbfullpivot.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbfullpivot
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbfullpivot is obsolete; please use pdbfullpivot instead\n")
7 | pyfsdb.tools.pdbfullpivot.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbheatmap.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbheatmap
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbheatmap is obsolete; please use pdbheatmap instead\n")
7 | pyfsdb.tools.pdbheatmap.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbkeyedsort.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbkeyedsort
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbkeyedsort is obsolete; please use pdbkeyedsort instead\n")
7 | pyfsdb.tools.pdbkeyedsort.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbnormalize.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbnormalize
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbnormalize is obsolete; please use pdbnormalize instead\n")
7 | pyfsdb.tools.pdbnormalize.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbreescape.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbreescape
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbreescape is obsolete; please use pdbreescape instead\n")
7 | pyfsdb.tools.pdbreescape.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbreversepivot.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbreversepivot
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbreversepivot is obsolete; please use pdbreversepivot instead\n")
7 | pyfsdb.tools.pdbreversepivot.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbsplitter.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbsplitter
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbsplitter is obsolete; please use pdbsplitter instead\n")
7 | pyfsdb.tools.pdbsplitter.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbsum.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbsum
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbsum is obsolete; please use pdbsum instead\n")
7 | pyfsdb.tools.pdbsum.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbtopn.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbtopn
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbtopn is obsolete; please use pdbtopn instead\n")
7 | pyfsdb.tools.pdbtopn.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/obsolete/dbzerofill.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pyfsdb.tools.pdbzerofill
3 |
4 |
5 | def main():
6 | sys.stderr.write("dbzerofill is obsolete; please use pdbzerofill instead\n")
7 | pyfsdb.tools.pdbzerofill.main()
8 |
9 |
10 | if __name__ == "__main__":
11 | main()
12 |
--------------------------------------------------------------------------------
/pyfsdb/tests/noheader.fsdb:
--------------------------------------------------------------------------------
1 | rowone info data
2 | rowtwo other stuff
3 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_add_types.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from io import StringIO
3 | from pyfsdb.tools.pdbaddtypes import add_types
4 | from unittest.mock import Mock
5 | from test_fsdb_class import truncate_comments
6 |
7 |
8 | class test_add_types(unittest.TestCase):
9 | def test_add_single_type(self):
10 | indata = StringIO("#fsdb -F s a b c\na 1 2.3")
11 | outdata = StringIO()
12 | outdata.close = Mock()
13 | add_types(indata, outdata, ["b=l"])
14 | self.assertEqual(
15 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c\na 1 2.3"
16 | )
17 |
18 | def test_add_multiple_types(self):
19 | indata = StringIO("#fsdb -F s a b c\na 1 2.3")
20 | outdata = StringIO()
21 | outdata.close = Mock()
22 | add_types(indata, outdata, ["b=l", "c=d"])
23 | self.assertEqual(
24 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3"
25 | )
26 |
27 | def test_merge_types(self):
28 | indata = StringIO("#fsdb -F s a b:l c\na 1 2.3")
29 | outdata = StringIO()
30 | outdata.close = Mock()
31 | add_types(indata, outdata, ["c=d"])
32 | self.assertEqual(
33 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3"
34 | )
35 |
36 | def test_override_types(self):
37 | indata = StringIO("#fsdb -F s a b:l c:d\na 1 2.3")
38 | outdata = StringIO()
39 | outdata.close = Mock()
40 | add_types(indata, outdata, ["b=d"])
41 | self.assertEqual(
42 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:d c:d\na 1 2.3"
43 | )
44 |
45 | def test_guess_converters(self):
46 | import pyfsdb
47 |
48 | indata = StringIO("#fsdb -F s a b c\na 1 2.3")
49 | f = pyfsdb.Fsdb(file_handle=indata, return_type=pyfsdb.RETURN_AS_DICTIONARY)
50 | row = next(f)
51 | self.assertEqual(row, {"a": "a", "b": "1", "c": "2.3"})
52 |
53 | converters = f.guess_converters(row)
54 | self.assertEqual(converters, {"b": int, "c": float})
55 |
56 | def test_auto_convert(self):
57 | indata = StringIO("#fsdb -F s a b c\na 1 2.3")
58 | outdata = StringIO()
59 | outdata.close = Mock()
60 | add_types(indata, outdata, auto_convert=True)
61 | self.assertEqual(
62 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:l c:d\na 1 2.3"
63 | )
64 |
65 | def test_auto_convert_overrides(self):
66 | indata = StringIO("#fsdb -F s a b c\na 1 2.3")
67 | outdata = StringIO()
68 | outdata.close = Mock()
69 | add_types(indata, outdata, types=["b=d"], auto_convert=True)
70 | self.assertEqual(
71 | truncate_comments(outdata.getvalue()), "#fsdb -F s a b:d c:d\na 1 2.3"
72 | )
73 |
74 |
75 | if __name__ == "__main__":
76 | import unittest
77 |
78 | unittest.main()
79 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_column_renames.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from io import StringIO
3 | import pyfsdb
4 |
5 | class FsdbTestColumnRename(unittest.TestCase):
6 | def test_column_renames(self):
7 | input_data = "#fsdb -F s one two\n1 2\n"
8 | fh = StringIO(input_data)
9 | fs = pyfsdb.Fsdb(file_handle=fh,
10 | return_type=pyfsdb.RETURN_AS_DICTIONARY)
11 |
12 | fs.column_names = ["_" + x for x in fs.column_names]
13 |
14 | data = next(fs)
15 |
16 | expected = { "_one": '1', "_two": '2' }
17 |
18 | self.assertEqual(data, expected,
19 | "failed to remap columns on the fly")
20 |
21 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_coluniq.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import re
3 | import sys
4 |
5 |
6 | def noop():
7 | pass
8 |
9 |
10 | def truncate_comments(value):
11 | value = re.sub("\n# +\\|.*", "", value)
12 | return value
13 |
14 |
15 | class TestColUniq(unittest.TestCase):
16 | def test_single_uniques(self):
17 | from io import StringIO
18 |
19 | data = "#fsdb -F t a b c\na\tb\tc\nb\tc\td\na\tb\td\n"
20 |
21 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns
22 |
23 | outh = StringIO()
24 | outh.close = noop
25 | datah = StringIO(data)
26 | filter_unique_columns(datah, outh, ["a"])
27 |
28 | # check the the result
29 | self.assertEqual(
30 | truncate_comments(outh.getvalue()),
31 | "#fsdb -F t a:a\na\nb\n",
32 | "resulting values are right from uniq",
33 | )
34 |
35 | outh = StringIO()
36 | outh.close = noop
37 | datah = StringIO(data)
38 | filter_unique_columns(datah, outh, ["a"], count=True)
39 |
40 | # check the the result
41 | self.assertEqual(
42 | truncate_comments(outh.getvalue()),
43 | "#fsdb -F t a:a count:l\na\t2\nb\t1\n",
44 | "resulting values are right from uniq",
45 | )
46 |
47 | def test_multi_keys(self):
48 | from io import StringIO
49 |
50 | data = "#fsdb -F t a b c\na\tb\tc\nb\tc\td\na\tb\td\n"
51 |
52 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns
53 |
54 | outh = StringIO()
55 | outh.close = noop
56 | datah = StringIO(data)
57 | filter_unique_columns(datah, outh, ["a", "b"])
58 |
59 | # check the the result
60 | self.assertEqual(
61 | truncate_comments(outh.getvalue()),
62 | "#fsdb -F t a:a b:a\na\tb\nb\tc\n",
63 | "resulting values are right from uniq",
64 | )
65 |
66 | #
67 | # three columns with counting
68 | #
69 | data = "#fsdb -F t x:a y:a z:a\na\tb\tc\nb\tc\td\na\tb\td\na\tb\tc\n"
70 |
71 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns
72 |
73 | outh = StringIO()
74 | outh.close = noop
75 | datah = StringIO(data)
76 | filter_unique_columns(datah, outh, ["x", "y", "z"], count=True)
77 |
78 | # check the the result
79 | self.assertEqual(
80 | truncate_comments(outh.getvalue()),
81 | "#fsdb -F t x:a y:a z:a count:l\na\tb\tc\t2\na\tb\td\t1\nb\tc\td\t1\n",
82 | "resulting values are right from uniq",
83 | )
84 |
85 | def test_aggregate(self):
86 | from io import StringIO
87 |
88 | data = "#fsdb -F t a b c count\na\tb\tc\t2\nb\tc\td\t4\na\tb\tc\t10\n"
89 |
90 | from pyfsdb.tools.pdbcoluniq import filter_unique_columns
91 |
92 | outh = StringIO()
93 | outh.close = noop
94 | datah = StringIO(data)
95 | filter_unique_columns(
96 | datah, outh, ["a", "b", "c"], count=True, initial_count_key="count"
97 | )
98 |
99 | # check the the result
100 | output = outh.getvalue()
101 | self.assertEqual(
102 | truncate_comments(output),
103 | "#fsdb -F t a:a b:a c:a count:l\na\tb\tc\t12\nb\tc\td\t4\n",
104 | "resulting values are right from uniq",
105 | )
106 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_command_parsing.py:
--------------------------------------------------------------------------------
1 | import pyfsdb
2 | import unittest
3 | from io import StringIO
4 | from logging import error
5 |
6 |
7 | class TestCommandParsing(unittest.TestCase):
8 | commands = ["command1", "command2"]
9 | DATA_FILE = "pyfsdb/tests/tests.fsdb"
10 | COMP_FILE = "pyfsdb/tests/testscomp.fsdb.xz"
11 | test_data = "#fsdb -f s a b c\n1 2 3\n4 5 6\n# | command one"
12 | ROW1 = ["rowone", "info", "data"]
13 | ROW2 = ["rowtwo", "other", "stuff"]
14 |
15 | def test_history_from_stringio_fails(self):
16 | test_file = StringIO(self.test_data)
17 | fh = pyfsdb.Fsdb(file_handle=test_file)
18 |
19 | history_data = fh.commands
20 | self.assertEqual(history_data, None)
21 |
22 | def test_get_commands_at_end(self):
23 | fh = pyfsdb.Fsdb(self.DATA_FILE)
24 | fh.get_all()
25 | read_commands = fh.commands
26 | self.assertEqual(self.commands, read_commands)
27 |
28 | def test_get_commands_before_end(self):
29 | fh = pyfsdb.Fsdb(self.DATA_FILE)
30 | read_commands = fh.commands
31 | self.assertEqual(next(fh), self.ROW1)
32 | self.assertEqual(self.commands, read_commands)
33 |
34 | # make sure we can read data too even after reading ahead
35 | self.assertEqual(next(fh), self.ROW2)
36 |
37 | def test_compressed_files(self):
38 | # ensure we can test thsi
39 | try:
40 | import lzma
41 | except Exception:
42 | return
43 |
44 | fh = pyfsdb.Fsdb(self.COMP_FILE)
45 | row = next(fh)
46 | self.assertEqual(row, self.ROW1)
47 |
48 | def test_command_gathering_in_compressed(self):
49 | # ensure we can test thsi
50 | try:
51 | import lzma
52 | except Exception:
53 | return
54 |
55 | fh = pyfsdb.Fsdb(self.COMP_FILE)
56 | row = next(fh)
57 | self.assertEqual(row, self.ROW1)
58 |
59 | test_commands = fh.commands
60 | self.assertEqual(test_commands, None) # None == failure to read
61 |
62 | row = next(fh)
63 | self.assertEqual(row, self.ROW2)
64 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_comments_at_top.fsdb:
--------------------------------------------------------------------------------
1 | #fsdb -F t one:a two:a
2 | # another comment
3 | 1 2
4 | # done
5 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_comments_at_top.test.fsdb:
--------------------------------------------------------------------------------
1 | #fsdb -F t one:a two:a
2 | # another comment
3 | 1 2
4 | # done
5 | # | /usr/bin/pytest-3
6 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_json.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from io import StringIO
3 |
4 |
5 | class test_json_functions(unittest.TestCase):
6 | def test_json_to_fsdb(self):
7 | from pyfsdb.tools.json2fsdb import json_to_fsdb
8 |
9 | self.assertTrue(json_to_fsdb, "loaded")
10 |
11 | inp = StringIO(
12 | '{"d":"f", "a":"c"}'
13 | + "\n"
14 | + '{"a":"b", "d":"e"}'
15 | + "\n"
16 | + '{"d": "x", "c": "2", "a": "y"}'
17 | + "\n"
18 | '{"d": "x"}' + "\n"
19 | )
20 | output = StringIO() # don't require converting to a string
21 | json_to_fsdb(inp, output)
22 |
23 | self.assertEqual(
24 | output.getvalue(),
25 | "#fsdb -F t a:a d:a\nc\tf\nb\te\ny\tx\n\tx\n",
26 | "output of json_to_fsdb is correct",
27 | )
28 |
29 | def test_fsdb_to_json(self):
30 | from pyfsdb.tools.fsdb2json import fsdb_to_json
31 |
32 | self.assertTrue(fsdb_to_json, "loaded")
33 |
34 | inp = StringIO("#fsdb -F t a d\nc\tf\nb\te\n")
35 | output = StringIO()
36 |
37 | fsdb_to_json(inp, output)
38 |
39 | self.assertEqual(
40 | output.getvalue(),
41 | '{"a": "c", "d": "f"}' + "\n" + '{"a": "b", "d": "e"}' + "\n",
42 | "output of fsdb_to_json is correct",
43 | )
44 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_label_shrink.py:
--------------------------------------------------------------------------------
1 | def test_label_shrink():
2 | from pyfsdb.tools.pdbheatmap import maybe_shrink_label
3 |
4 | assert True
5 |
6 | assert maybe_shrink_label("foo") == "foo"
7 | assert maybe_shrink_label("o" * 20) == "o" * 20
8 | assert maybe_shrink_label("o" * 10 + "p" * 10) == "o" * 10 + "p" * 10
9 | assert maybe_shrink_label("o" * 11 + "p" * 11, 20) == "o" * 9 + "..." + "p" * 8
10 | assert maybe_shrink_label("o" * 10 + "p" * 11, 20) == "o" * 9 + "..." + "p" * 8
11 | assert maybe_shrink_label("o" * 100 + "p" * 11000, 20) == "o" * 9 + "..." + "p" * 8
12 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_msgpack.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | import pyfsdb
3 | import io
4 | import re
5 |
6 |
7 | def truncate_comments(value):
8 | value = re.sub("\n# +\\|.*", "", value)
9 | return value
10 |
11 |
12 | class FsdbMsgPackTest(TestCase):
13 | fsdb_data = "#fsdb -F s a:l b:l\n1 2\n"
14 | encoded_data = b"#fsdb -F m a:l b:l\n\x92\x01\x02"
15 |
16 | def test_convert_to_msgpack(self):
17 | ih = pyfsdb.Fsdb(file_handle=io.StringIO(self.fsdb_data))
18 |
19 | def noop(*args, **kwargs):
20 | pass
21 |
22 | out_data = io.BytesIO()
23 | out_data.close = noop
24 |
25 | oh = pyfsdb.Fsdb(out_file_handle=out_data)
26 | oh.out_column_names = ih.column_names
27 | oh.converters = ih.converters
28 | oh.out_separator = "m"
29 |
30 | for row in ih:
31 | oh.append(row)
32 | oh.close()
33 |
34 | # the output data we expect should be:
35 | self.assertEqual(out_data.getvalue(), self.encoded_data)
36 |
37 | def test_convert_from_msgpack(self):
38 | ih = pyfsdb.Fsdb(file_handle=io.BytesIO(self.encoded_data))
39 |
40 | def noop(*args, **kwargs):
41 | pass
42 |
43 | out_data = io.StringIO()
44 | out_data.close = noop
45 |
46 | oh = pyfsdb.Fsdb(out_file_handle=out_data)
47 | oh.out_column_names = ih.column_names
48 | oh.converters = ih.converters
49 | oh.out_separator = " "
50 |
51 | for row in ih:
52 | oh.append(row)
53 | oh.close()
54 |
55 | # the output data we expect should be:
56 | results = out_data.getvalue()
57 | self.assertEqual(truncate_comments(results), self.fsdb_data)
58 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_pdbaugment.py:
--------------------------------------------------------------------------------
1 | from pyfsdb.tools.pdbaugment import stash_row, find_row
2 |
3 |
4 | def test_cache_saving():
5 | rows = [
6 | {"a": 1, "b": 2, "c": 3},
7 | {"a": 4, "b": 5, "c": 6},
8 | ]
9 |
10 | cache = {}
11 |
12 | for row in rows:
13 | stash_row(cache, ["a", "b"], row)
14 |
15 | open("/tmp/x", "w").write(str(cache) + "\n")
16 |
17 | assert cache == {
18 | 1: {2: {"data": {"a": 1, "b": 2, "c": 3}}},
19 | 4: {5: {"data": {"a": 4, "b": 5, "c": 6}}},
20 | }
21 |
22 | # now try looking up the results
23 |
24 | search_row = {"a": 1, "b": 2, "d": 33}
25 | result = find_row(cache, ["a", "b"], search_row)
26 |
27 | assert result == {"a": 1, "b": 2, "c": 3}
28 |
29 | result = find_row(cache, ["a", "b"], search_row, return_data=False)
30 | assert result == {"data": {"a": 1, "b": 2, "c": 3}}
31 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_pdbcdf.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import io
3 |
4 |
5 | class test_pcdf(unittest.TestCase):
6 | def test_pcdf(self):
7 | from pyfsdb.tools.pdbcdf import process_cdf
8 |
9 | self.assertTrue(True, "loaded module")
10 |
11 | in_data = io.StringIO("#fsdb -F t a b\n1\t2\n3\t6\n")
12 | out_data = io.StringIO()
13 |
14 | process_cdf(in_data, out_data, "b")
15 |
16 | result = out_data.getvalue()
17 |
18 | self.assertEqual(
19 | result,
20 | "#fsdb -F t a b b_cdf\n1\t2\t0.25\n3\t6\t1.0\n",
21 | "results (sum) were as expected",
22 | )
23 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_pdbjinja.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | import pyfsdb
3 | from io import StringIO
4 |
5 |
6 | def noop(**kwargs):
7 | pass
8 |
9 |
10 | class pdbjinjaTest(TestCase):
11 | def test_loading_pdbjinja(self):
12 | import pyfsdb.tools.pdbjinja
13 |
14 | self.assertTrue("loaded")
15 |
16 | def test_pdbjinja(self):
17 | input_data = "#fsdb -F t a b c\n1\t2\t3\nd\te\tf\n"
18 | inputh = StringIO(input_data)
19 |
20 | jinja_template = "{% for row in rows %}{{row.b}}\n{% endfor %}"
21 | jinjah = StringIO(jinja_template)
22 |
23 | outh = StringIO()
24 | outh.close = noop
25 |
26 | import pyfsdb.tools.pdbjinja
27 |
28 | pyfsdb.tools.pdbjinja.process(inputh, jinjah, outh)
29 | self.assertTrue("ran")
30 |
31 | # actually test the results
32 | result = outh.getvalue()
33 | self.assertEqual(result, "2\ne\n", "expected template results are correct")
34 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_sql.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 |
4 | class test_sql_support(unittest.TestCase):
5 | def test_load(self):
6 | import pyfsdb.tools.pdb2sql
7 |
--------------------------------------------------------------------------------
/pyfsdb/tests/test_utf8.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pyfsdb
3 | from io import StringIO
4 |
5 |
6 | DATA = "#fsdb -F t test:i copy©:i foo:a\n1\t2\t3\n4\t5\t©\n"
7 |
8 |
9 | @pytest.fixture
10 | def create_file(tmp_path):
11 | tmp_file = tmp_path / "test.fsdb"
12 | fh = open(tmp_file, "wb")
13 | fh.write(bytes(DATA, "utf-8"))
14 | fh.close()
15 | yield tmp_file
16 |
17 |
18 | def do_test_utf8_file_handle(fh):
19 | row = next(fh)
20 | assert fh.column_names == ["test", "copy©", "foo"]
21 | assert row == [1, 2, "3"]
22 |
23 | row = next(fh)
24 | assert row == [4, 5, "©"]
25 |
26 |
27 | def test_utf8_support_stringio():
28 | DATA_stream = StringIO(DATA)
29 | fh = pyfsdb.Fsdb(file_handle=DATA_stream)
30 | do_test_utf8_file_handle(fh)
31 |
32 |
33 | def test_utf8_support_file(create_file):
34 | fh = pyfsdb.Fsdb(create_file)
35 | do_test_utf8_file_handle(fh)
36 |
37 |
38 | def test_utf8_creation(tmp_path):
39 | tmp_file = tmp_path / "test-write.fsdb"
40 | fh = pyfsdb.Fsdb(out_file=tmp_file)
41 | fh.out_column_names = ["test", "copy©", "foo"]
42 | fh.append([4, 5, "©"])
43 | fh.close()
44 |
--------------------------------------------------------------------------------
/pyfsdb/tests/testout.fsdb:
--------------------------------------------------------------------------------
1 | #fsdb -F s colone coltwo colthree
2 | rowone info data
3 | # middle comment
4 | rowtwo other stuff
5 | # | command1
6 | # | command2
7 | # | /usr/bin/pytest-3
8 |
--------------------------------------------------------------------------------
/pyfsdb/tests/tests.fsdb:
--------------------------------------------------------------------------------
1 | #fsdb -F t colone coltwo colthree
2 | rowone info data
3 | # middle comment
4 | rowtwo other stuff
5 | # | command1
6 | # | command2
7 |
--------------------------------------------------------------------------------
/pyfsdb/tests/testscomp.fsdb.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/tests/testscomp.fsdb.xz
--------------------------------------------------------------------------------
/pyfsdb/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardaker/pyfsdb/4437e19969ceb977e7b3b630dc4f56dee8a8c27a/pyfsdb/tools/__init__.py
--------------------------------------------------------------------------------
/pyfsdb/tools/bro2fsdb.py:
--------------------------------------------------------------------------------
1 | """Converts a bro (zeek) log to a file readable by FSDB.
2 | Bro logs are already tab separated, so we really just replace
3 | the headers and re-print the rest. brotofsdb assumes
4 | the bro log is properly formatted (ie, tab separated already)."""
5 |
6 | import argparse
7 | import sys
8 |
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__
13 | )
14 |
15 | parser.add_argument(
16 | "-l",
17 | "--leave-bro-headers",
18 | action="store_true",
19 | help="Leave the bro headers in place right after the new FSDB header",
20 | )
21 |
22 | parser.add_argument(
23 | "input_file",
24 | type=argparse.FileType("r"),
25 | nargs="?",
26 | default=sys.stdin,
27 | help="The input file (bro log) to read",
28 | )
29 |
30 | parser.add_argument(
31 | "output_file",
32 | type=argparse.FileType("w"),
33 | nargs="?",
34 | default=sys.stdout,
35 | help="The output file (FSDB log) to write back out",
36 | )
37 |
38 | args = parser.parse_args()
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | leave_headers = []
46 |
47 | # read in all the headers, looking for certain things
48 | column_names = []
49 | for line in args.input_file:
50 | if line[0] != "#":
51 | break
52 |
53 | if args.leave_bro_headers:
54 | leave_headers.append(line)
55 |
56 | if line[0:7] == "#fields":
57 | column_names = line.replace(".", "_").split("\t")
58 | column_names.pop(0)
59 |
60 | # print out the FSDB header
61 | args.output_file.write("#fsdb -F t " + " ".join(column_names))
62 |
63 | # optionally add back in the bro headers
64 | if args.leave_bro_headers:
65 | args.output_file.write("".join(leave_headers))
66 |
67 | # copy out the rest of thefile
68 | args.output_file.write(line)
69 | for line in args.input_file:
70 | args.output_file.write(line)
71 |
72 | # append our trailing command
73 | args.output_file.write("# " + sys.argv[0] + "\n")
74 |
75 |
76 | if __name__ == "__main__":
77 | main()
78 |
--------------------------------------------------------------------------------
/pyfsdb/tools/fsdb2many.py:
--------------------------------------------------------------------------------
1 | """fsdb2many converts a single FSDB file into many, by creating
2 | other file names based on a column of the original."""
3 |
4 | import sys
5 | import argparse
6 | import pyfsdb
7 | import re
8 |
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
13 | description=__doc__,
14 | epilog="fsdb2many -c key -o outputdir/%s.fsdb mybigfile.fsdb",
15 | )
16 |
17 | parser.add_argument(
18 | "-c", "--column", default="key", type=str, help="Column to split on"
19 | )
20 |
21 | parser.add_argument(
22 | "-o",
23 | "--output-pattern",
24 | default="fsdb2many-out-%s.fsdb",
25 | type=str,
26 | help="Output pattern to split on, which should contain a PERCENT S to use for inserting the column value being saved to that file.",
27 | )
28 |
29 | parser.add_argument(
30 | "input_file",
31 | type=argparse.FileType("r"),
32 | nargs="?",
33 | default=sys.stdin,
34 | help="str",
35 | )
36 |
37 | args = parser.parse_args()
38 |
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | # open the input file
46 | inh = pyfsdb.Fsdb(file_handle=args.input_file)
47 | key_column = inh.get_column_number(args.column)
48 |
49 | out_handles = {}
50 |
51 | for row in inh:
52 | value = row[key_column]
53 |
54 | # see if we have an open file handle for this one yet
55 | if value not in out_handles:
56 | # new value, so open a new file handle to save data for it
57 | file_name = re.sub("[^-.0-9a-zA-Z_]", "_", str(value))
58 | outh = pyfsdb.Fsdb(out_file=(args.output_pattern % file_name))
59 | outh.init_output_from(inh)
60 | out_handles[value] = outh
61 |
62 | # save the row to the file based on its value
63 | out_handles[value].append(row)
64 |
65 | # clean up
66 | for handle in out_handles:
67 | out_handles[handle].close()
68 |
69 |
70 | if __name__ == "__main__":
71 | main()
72 |
--------------------------------------------------------------------------------
/pyfsdb/tools/json2fsdb.py:
--------------------------------------------------------------------------------
1 | """Converts a JSON file containing either an array of dictionaries or
2 | individual dictionary lines into an FSDB file"""
3 |
4 | import sys
5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
6 | import json
7 | import pyfsdb
8 |
9 |
10 | def parse_args():
11 | """Parse command line arguments"""
12 | parser = ArgumentParser(
13 | formatter_class=ArgumentDefaultsHelpFormatter, description=__doc__
14 | )
15 |
16 | parser.add_argument(
17 | "input_file",
18 | type=FileType("r"),
19 | nargs="?",
20 | default=sys.stdin,
21 | help="The input file (json file) to read",
22 | )
23 |
24 | parser.add_argument(
25 | "output_file",
26 | type=FileType("w"),
27 | nargs="?",
28 | default=sys.stdout,
29 | help="The output file (FSDB file) to write back out",
30 | )
31 |
32 | args = parser.parse_args()
33 | return args
34 |
35 |
36 | def handle_rows(out_fsdb, rows, columns):
37 | "Output each row in an array to the output fsdb file"
38 | for row in rows:
39 | out = []
40 | for column in columns:
41 | if column in row:
42 | out.append(row[column])
43 | else:
44 | out.append("")
45 | out_fsdb.append(out)
46 |
47 |
48 | def json_to_fsdb(input_file, output_file):
49 | """A function that converts an input file stream of json dictionary
50 | to an output FSDB file, where the header column names are pulled
51 | from the first record keys."""
52 | first_line = next(input_file)
53 |
54 | try:
55 | rows = json.loads(first_line)
56 | if not isinstance(rows, list):
57 | rows = [rows]
58 | except Exception as exp:
59 | sys.stderr.write("failed to parse the first line as json:\n")
60 | sys.stderr.write(first_line)
61 | sys.stderr.write(str(exp))
62 | sys.exit(1)
63 |
64 | columns = sorted(list(rows[0].keys()))
65 | out_fsdb = pyfsdb.Fsdb(out_file_handle=output_file)
66 | out_fsdb.out_column_names = columns
67 | handle_rows(out_fsdb, rows, columns)
68 |
69 | for line in input_file:
70 | try:
71 | rows = json.loads(line)
72 | if not isinstance(rows, list):
73 | rows = [rows]
74 | handle_rows(out_fsdb, rows, columns)
75 | except Exception:
76 | sys.stderr.write("failed to parse: " + line)
77 |
78 |
79 | def main():
80 | "CLI wrapper around json_to_fsdb"
81 | args = parse_args()
82 | json_to_fsdb(args.input_file, args.output_file)
83 |
84 |
85 | if __name__ == "__main__":
86 | main()
87 |
--------------------------------------------------------------------------------
/pyfsdb/tools/msgpack2pdb.py:
--------------------------------------------------------------------------------
1 | """Converts a msgpack FSDB representation to a normal FSDB text file"""
2 |
3 | import pyfsdb
4 |
5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
6 | from logging import debug, info, warning, error, critical
7 | import logging
8 | import sys
9 | import io
10 |
11 | # optionally use rich
12 | try:
13 | from rich import print
14 | from rich.logging import RichHandler
15 | except Exception:
16 | pass
17 |
18 |
19 | def parse_args():
20 | "Parse the command line arguments."
21 | parser = ArgumentParser(
22 | formatter_class=ArgumentDefaultsHelpFormatter,
23 | description=__doc__,
24 | epilog="Exmaple Usage: ",
25 | )
26 |
27 | parser.add_argument(
28 | "--log-level",
29 | "--ll",
30 | default="info",
31 | help="Define the logging verbosity level (debug, info, warning, error, fotal, critical).",
32 | )
33 |
34 | parser.add_argument(
35 | "input_file", type=FileType("rb"), nargs="?", default=sys.stdin, help=""
36 | )
37 |
38 | parser.add_argument(
39 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help=""
40 | )
41 |
42 | args = parser.parse_args()
43 | log_level = args.log_level.upper()
44 | handlers = []
45 | datefmt = None
46 | messagefmt = "%(levelname)-10s:\t%(message)s"
47 |
48 | # see if we're rich
49 | try:
50 | handlers.append(RichHandler(rich_tracebacks=True))
51 | datefmt = " "
52 | messagefmt = "%(message)s"
53 | except Exception:
54 | pass
55 |
56 | logging.basicConfig(
57 | level=log_level, format=messagefmt, datefmt=datefmt, handlers=handlers
58 | )
59 | return args
60 |
61 |
62 | def main():
63 | args = parse_args()
64 |
65 | in_fsdb = pyfsdb.Fsdb(
66 | file_handle=args.input_file,
67 | return_type=pyfsdb.RETURN_AS_ARRAY,
68 | )
69 |
70 | oh = pyfsdb.Fsdb(
71 | out_file_handle=args.output_file,
72 | out_column_names=in_fsdb.column_names,
73 | )
74 |
75 | for row in in_fsdb:
76 | oh.append(row)
77 |
78 |
79 | if __name__ == "__main__":
80 | main()
81 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdb2msgpack.py:
--------------------------------------------------------------------------------
1 | """Converts a textual FSDB representation to a efficient msgpack binary encoding"""
2 |
3 | import pyfsdb
4 |
5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
6 | from logging import debug, info, warning, error, critical
7 | import logging
8 | import sys
9 |
10 | # optionally use rich
11 | try:
12 | from rich import print
13 | from rich.logging import RichHandler
14 | except Exception:
15 | pass
16 |
17 |
18 | def parse_args():
19 | "Parse the command line arguments."
20 | parser = ArgumentParser(
21 | formatter_class=ArgumentDefaultsHelpFormatter,
22 | description=__doc__,
23 | epilog="Exmaple Usage: ",
24 | )
25 |
26 | parser.add_argument(
27 | "--log-level",
28 | "--ll",
29 | default="info",
30 | help="Define the logging verbosity level (debug, info, warning, error, fotal, critical).",
31 | )
32 |
33 | parser.add_argument(
34 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help=""
35 | )
36 |
37 | parser.add_argument(
38 | "output_file", type=FileType("wb"), nargs="?", default=sys.stdout, help=""
39 | )
40 |
41 | args = parser.parse_args()
42 | log_level = args.log_level.upper()
43 | handlers = []
44 | datefmt = None
45 | messagefmt = "%(levelname)-10s:\t%(message)s"
46 |
47 | # see if we're rich
48 | try:
49 | handlers.append(RichHandler(rich_tracebacks=True))
50 | datefmt = " "
51 | messagefmt = "%(message)s"
52 | except Exception:
53 | pass
54 |
55 | logging.basicConfig(
56 | level=log_level, format=messagefmt, datefmt=datefmt, handlers=handlers
57 | )
58 | return args
59 |
60 |
61 | def main():
62 | args = parse_args()
63 |
64 | in_fsdb = pyfsdb.Fsdb(
65 | file_handle=args.input_file,
66 | return_type=pyfsdb.RETURN_AS_ARRAY,
67 | )
68 |
69 | oh = pyfsdb.Fsdb(
70 | # out_file_handle=args.output_file,
71 | out_file_handle=args.output_file,
72 | out_column_names=in_fsdb.column_names,
73 | )
74 | oh.out_separator_token = "m" # save as msgpack
75 |
76 | # for record in in_fsdb:
77 | for row in in_fsdb:
78 | oh.append(row)
79 |
80 |
81 | if __name__ == "__main__":
82 | main()
83 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdb2tex.py:
--------------------------------------------------------------------------------
1 | """db2tex converts any FSDB file into a latex table.
2 | WARNING: very little escaping is done -- watch out for mallicious input files."""
3 |
4 | import argparse
5 | import sys
6 | import pyfsdb
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(
11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
12 | description=__doc__,
13 | epilog="Exmaple Usage: pdb2tex -c col1 col2 -p cc input.fsdb",
14 | )
15 |
16 | parser.add_argument(
17 | "-p",
18 | "--tabular-profile",
19 | type=str,
20 | help="The column profile to pass to tabular. The default will be all 'l's.",
21 | )
22 |
23 | parser.add_argument(
24 | "-c",
25 | "--columns",
26 | type=str,
27 | nargs="*",
28 | help="Column names to include; will use all if not specified",
29 | )
30 |
31 | parser.add_argument(
32 | "-C", "--caption", type=str, help="Use this as the caption for the table"
33 | )
34 |
35 | parser.add_argument(
36 | "-l", "--label", type=str, help="Add a label to the table (eg: tab:foo)"
37 | )
38 |
39 | parser.add_argument(
40 | "input_file",
41 | type=argparse.FileType("r"),
42 | nargs="?",
43 | default=sys.stdin,
44 | help="The input FSDB file",
45 | )
46 |
47 | parser.add_argument(
48 | "output_file",
49 | type=argparse.FileType("w"),
50 | nargs="?",
51 | default=sys.stdout,
52 | help="The output file to print latex table data to",
53 | )
54 |
55 | args = parser.parse_args()
56 | return args
57 |
58 |
59 | def latex_escape(value):
60 | return str(value).replace("\\", "\\\\").replace("_", "\\_").replace("&", "\\&")
61 |
62 |
63 | def main():
64 | args = parse_args()
65 |
66 | inh = pyfsdb.Fsdb(file_handle=args.input_file)
67 | outh = args.output_file
68 |
69 | columns = args.columns
70 | if not columns:
71 | columns = inh.column_names
72 |
73 | if args.tabular_profile:
74 | specifier = args.tabular_profile
75 | else:
76 | specifier = "l" * len(columns)
77 |
78 | column_numbers = inh.get_column_numbers(columns)
79 |
80 | # write out the header info
81 | outh.write("\\begin{table}\n")
82 | outh.write(" \\begin{tabular}{%s}\n" % (specifier))
83 |
84 | for num, column in enumerate(columns):
85 | if num == 0:
86 | outh.write(" \\textbf{%s}" % (latex_escape(column)))
87 | else:
88 | outh.write(" & \\textbf{%s}" % (latex_escape(column)))
89 | outh.write(" \\\\\n")
90 |
91 | for row in inh:
92 | for num, column in enumerate(column_numbers):
93 | if num == 0:
94 | outh.write(" %s" % (latex_escape(row[column])))
95 | else:
96 | outh.write(" & %s" % (latex_escape(row[column])))
97 | outh.write(" \\\\\n")
98 |
99 | outh.write(" \\end{tabular}\n")
100 | if args.caption:
101 | outh.write(" \\caption{%s}\n" % (args.caption))
102 | if args.label:
103 | outh.write(" \\label{%s}\n" % (args.label))
104 | outh.write("\\end{table}\n")
105 |
106 |
107 | if __name__ == "__main__":
108 | main()
109 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdb2to1.py:
--------------------------------------------------------------------------------
1 | "Converts a FSDB2 (with type specifications) to an FSDB1 for use with older tools"
2 |
3 | import sys
4 | import os
5 | import argparse
6 | import collections
7 |
8 | import pyfsdb
9 | import re
10 |
11 |
12 | def parse_args():
13 | formatter_class = argparse.ArgumentDefaultsHelpFormatter
14 | parser = argparse.ArgumentParser(
15 | formatter_class=formatter_class, description=__doc__
16 | )
17 |
18 | parser.add_argument(
19 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help=""
20 | )
21 |
22 | parser.add_argument(
23 | "output_file",
24 | type=argparse.FileType("w"),
25 | nargs="?",
26 | default=sys.stdout,
27 | help="",
28 | )
29 |
30 | args = parser.parse_args()
31 | return args
32 |
33 |
34 | def main():
35 | args = parse_args()
36 |
37 | # we do this without using an FSDB class, since raw I/O is faster
38 | fsdb_line = next(args.input_file)
39 | fsdb_line = re.sub(r":\w+", "", fsdb_line)
40 | args.output_file.write(fsdb_line)
41 |
42 | while True:
43 | data = args.input_file.read(1024 * 1024 * 1024) # 1M at a time
44 | if not data:
45 | break
46 | args.output_file.write(data)
47 |
48 |
49 | if __name__ == "__main__":
50 | main()
51 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbaddtypes.py:
--------------------------------------------------------------------------------
1 | """Adds type hints for converting a FSDB1 format to add type hints to columns.
2 | This allows compliant tools to get automatic type conversion within their scripts."""
3 |
4 | import sys
5 | import os
6 | import argparse
7 | import collections
8 |
9 | import pyfsdb
10 | import re
11 | import io
12 |
13 |
14 | def parse_args():
15 | formatter_class = argparse.ArgumentDefaultsHelpFormatter
16 | parser = argparse.ArgumentParser(
17 | formatter_class=formatter_class, description=__doc__
18 | )
19 |
20 | parser.add_argument(
21 | "-t",
22 | "--type-list",
23 | default=[],
24 | type=str,
25 | nargs="*",
26 | help="A list of column=type values, where type can be 'd' (float) or 'l' (integer)",
27 | )
28 |
29 | parser.add_argument(
30 | "-a",
31 | "--auto-types",
32 | action="store_true",
33 | help="Guess at type values based on the first row",
34 | )
35 |
36 | parser.add_argument(
37 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help=""
38 | )
39 |
40 | parser.add_argument(
41 | "output_file",
42 | type=argparse.FileType("w"),
43 | nargs="?",
44 | default=sys.stdout,
45 | help="",
46 | )
47 |
48 | args = parser.parse_args()
49 | return args
50 |
51 |
52 | def add_types(input_file, output_file, types=[], auto_convert=False):
53 | # we do this without using an FSDB class, since raw I/O is faster
54 | fsdb_line = next(input_file)
55 | first_line = next(input_file)
56 | buffer = io.StringIO(fsdb_line + first_line)
57 |
58 | fh = pyfsdb.Fsdb(file_handle=buffer, return_type=pyfsdb.RETURN_AS_DICTIONARY)
59 | columns = fh.column_names
60 |
61 | converters = fh.converters
62 |
63 | # if auto_conversion, then make some guesses
64 | if auto_convert:
65 | first_row = next(fh)
66 | converters = fh.guess_converters(first_row)
67 |
68 | if not converters:
69 | converters = {}
70 |
71 | # specifications should override autos
72 | for specification in types:
73 | (column, dtype) = specification.split("=")
74 | if column not in columns:
75 | raise ValueError(f"Invalid column: {column} in '{specification}")
76 | converters[column] = pyfsdb.fsdb.incoming_type_converters[dtype]
77 |
78 | # create the new header line with conversions in place
79 | fh.converters = converters
80 | new_header = fh.create_header_line(separator_token=fh.separator_token)
81 |
82 | output_file.write(new_header)
83 | output_file.write(first_line)
84 |
85 | # read the rest as chunks
86 | while True:
87 | data = input_file.read(1024 * 1024 * 1024) # 1M at a time
88 | if not data:
89 | break
90 | output_file.write(data)
91 |
92 |
93 | def main():
94 | args = parse_args()
95 |
96 | add_types(args.input_file, args.output_file, args.type_list, args.auto_types)
97 |
98 |
99 | if __name__ == "__main__":
100 | main()
101 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbdatetoepoch.py:
--------------------------------------------------------------------------------
1 | """dbdatetoepoch converts a timestamp column with a human date to a
2 | unix epoch timestamp column"""
3 |
4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
5 | import sys
6 | import pyfsdb
7 | import warnings
8 | from dateparser import parse
9 |
10 | # from https://github.com/scrapinghub/dateparser/issues/1013
11 | # Ignore dateparser warnings regarding pytz
12 | warnings.filterwarnings(
13 | "ignore",
14 | message="The localize method is no longer necessary",
15 | )
16 |
17 |
18 | def parse_args():
19 | parser = ArgumentParser(
20 | formatter_class=ArgumentDefaultsHelpFormatter,
21 | description=__doc__,
22 | epilog="Exmaple Usage: dbdatetoepoch -d human_column -t timestamp_column input.fsdb output.fsdb",
23 | )
24 |
25 | parser.add_argument(
26 | "-d", "--date-column", default="date", type=str, help="Date column to use"
27 | )
28 |
29 | parser.add_argument(
30 | "-t",
31 | "--timestamp-column",
32 | default="timestamp",
33 | type=str,
34 | help="Column to create for storing an epoch column",
35 | )
36 |
37 | parser.add_argument(
38 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help=""
39 | )
40 |
41 | parser.add_argument(
42 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help=""
43 | )
44 |
45 | args = parser.parse_args()
46 | return args
47 |
48 |
49 | def main():
50 | args = parse_args()
51 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file)
52 | column_names = fh.column_names
53 | fh.out_column_names = column_names + [args.timestamp_column]
54 |
55 | date_column = fh.get_column_number(args.date_column)
56 | for row in fh:
57 | timestamp_value = 0
58 | try:
59 | timestamp_value = parse(row[date_column]).timestamp()
60 | except Exception:
61 | pass
62 | row[-1] = timestamp_value # XXX: this should be append
63 | fh.append(row)
64 |
65 |
66 | if __name__ == "__main__":
67 | main()
68 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbensure.py:
--------------------------------------------------------------------------------
1 | """dbensure can be used that some or all fields in a table contain data.
2 |
3 | If rows with the specified columns (default: all) don't contain data,
4 | they're dropped from the output rows."""
5 |
6 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
7 | import sys
8 |
9 | import pyfsdb
10 |
11 |
12 | def parse_args():
13 | parser = ArgumentParser(
14 | formatter_class=ArgumentDefaultsHelpFormatter,
15 | description=__doc__,
16 | epilog="Exmaple Usage: dbensure input_file.fsdb output_file.fsdb",
17 | )
18 |
19 | parser.add_argument(
20 | "-c", "--columns", nargs="*", help="The columns to check in the data"
21 | )
22 |
23 | parser.add_argument(
24 | "-v",
25 | "--fill",
26 | default=None,
27 | type=str,
28 | help="Don't drop the rows but fill with this value if a column is missing",
29 | )
30 |
31 | parser.add_argument(
32 | "-e",
33 | "--print-error",
34 | action="store_true",
35 | help="Print an error message on each dropped row",
36 | )
37 |
38 | parser.add_argument(
39 | "input_file",
40 | type=FileType("r"),
41 | nargs="?",
42 | default=sys.stdin,
43 | help="The input file to process",
44 | )
45 |
46 | parser.add_argument(
47 | "output_file",
48 | type=FileType("w"),
49 | nargs="?",
50 | default=sys.stdout,
51 | help="Where to send the output data",
52 | )
53 |
54 | args = parser.parse_args()
55 | return args
56 |
57 |
58 | def filter_row(row, columns, fill_value, print_error):
59 | for column in columns:
60 | if row[column] == "" or row[column] is None:
61 | if fill_value:
62 | row[column] = fill_value
63 | else:
64 | if print_error:
65 | sys.stderr.write("# dbensure dropping row:" + str(row) + "\n")
66 | return
67 | return row
68 |
69 |
70 | def main():
71 | args = parse_args()
72 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file)
73 |
74 | if args.columns:
75 | column_nums = fh.get_column_numbers(args.columns)
76 | else:
77 | column_nums = list(range(len(fh.column_names)))
78 |
79 | fh.filter(filter_row, args=[column_nums, args.fill, args.print_error])
80 |
81 |
82 | if __name__ == "__main__":
83 | main()
84 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbepochtodate.py:
--------------------------------------------------------------------------------
1 | """dbdatetoepoch converts a unix epoch timestamp column into a human
2 | readable date string usting strftime with an adjustable format."""
3 |
4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
5 | import sys
6 | import pyfsdb
7 | from dateparser import parse
8 | import time
9 |
10 |
11 | def parse_args():
12 | parser = ArgumentParser(
13 | formatter_class=ArgumentDefaultsHelpFormatter,
14 | description=__doc__,
15 | epilog="Exmaple Usage: dbdatetoepoch -d human_column -t timestamp_column input.fsdb output.fsdb",
16 | )
17 |
18 | parser.add_argument(
19 | "-t",
20 | "--timestamp-column",
21 | default="timestamp",
22 | type=str,
23 | help="Column to use with the epoch timestamp",
24 | )
25 |
26 | parser.add_argument(
27 | "-T",
28 | "--time-column",
29 | default="timestamp_human",
30 | type=str,
31 | help="The output time/date column to create",
32 | )
33 |
34 | parser.add_argument(
35 | "-f",
36 | "--format",
37 | default="%Y-%m-%d %H:%M",
38 | type=str,
39 | help="The output format to use in the time column",
40 | )
41 |
42 | parser.add_argument(
43 | "input_file", type=FileType("r"), nargs="?", default=sys.stdin, help=""
44 | )
45 |
46 | parser.add_argument(
47 | "output_file", type=FileType("w"), nargs="?", default=sys.stdout, help=""
48 | )
49 |
50 | args = parser.parse_args()
51 | return args
52 |
53 |
54 | def main():
55 | args = parse_args()
56 | fh = pyfsdb.Fsdb(
57 | file_handle=args.input_file,
58 | out_file_handle=args.output_file,
59 | converters={args.timestamp_column: float},
60 | )
61 | column_names = fh.column_names
62 | fh.out_column_names = column_names + [args.time_column]
63 |
64 | timestamp_column = fh.get_column_number(args.timestamp_column)
65 |
66 | colfmt = args.format
67 |
68 | for row in fh:
69 | row[-1] = time.strftime(colfmt, time.localtime(row[timestamp_column]))
70 | fh.append(row)
71 |
72 |
73 | if __name__ == "__main__":
74 | main()
75 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbformat.py:
--------------------------------------------------------------------------------
1 | """Outputs a python-string formatted line for every input FSDB row,
2 | with column names acting as variables into the format string."""
3 |
4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
5 | import sys
6 | import pyfsdb
7 |
8 |
9 | def parse_args():
10 | parser = ArgumentParser(
11 | formatter_class=ArgumentDefaultsHelpFormatter,
12 | description=__doc__,
13 | epilog="Example: dbformat -f 'I can print {col1} and {col2}'",
14 | )
15 |
16 | parser.add_argument(
17 | "-f", "--format", type=str, help="The python-based format string to use"
18 | )
19 |
20 | parser.add_argument(
21 | "input_file",
22 | type=FileType("r"),
23 | nargs="?",
24 | default=sys.stdin,
25 | help="The input FSDB file to read",
26 | )
27 |
28 | parser.add_argument(
29 | "output_file",
30 | type=FileType("w"),
31 | nargs="?",
32 | default=sys.stdout,
33 | help="The output text file to write to",
34 | )
35 |
36 | args = parser.parse_args()
37 |
38 | if not args.format:
39 | sys.stderr.write("-f is a required argument\n")
40 | exit(1)
41 |
42 | return args
43 |
44 |
45 | def main():
46 | args = parse_args()
47 |
48 | inh = pyfsdb.Fsdb(
49 | file_handle=args.input_file, return_type=pyfsdb.RETURN_AS_DICTIONARY
50 | )
51 | outh = args.output_file
52 |
53 | format_string = args.format
54 |
55 | for row in inh:
56 | # convert Nones
57 | for column in row:
58 | if row[column] is None:
59 | row[column] = ""
60 | outh.write(format_string.format(**row) + "\n")
61 |
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbjinja.py:
--------------------------------------------------------------------------------
1 | """This script takes all the data in a file, and passes it to a
2 | jinja2 template with each row being stored in a `rows` variable.
3 |
4 | Note: all the rows must be loaded into memory at once.
5 | """
6 |
7 | import argparse
8 | import sys
9 | import os
10 |
11 | import pyfsdb
12 | import jinja2
13 |
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(
17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
18 | description=__doc__,
19 | epilog="Example: pdbjinja -j report.jinja input.fsdb output.txt",
20 | )
21 |
22 | parser.add_argument(
23 | "-j",
24 | "--jinja2-template",
25 | type=argparse.FileType("r"),
26 | help="The jinja2 template file to use",
27 | )
28 |
29 | parser.add_argument(
30 | "-i", "--include-file-path", type=str, help="Path to allow including files from"
31 | )
32 |
33 | parser.add_argument(
34 | "input_file",
35 | type=argparse.FileType("r"),
36 | nargs="?",
37 | default=sys.stdin,
38 | help="The input file to use",
39 | )
40 |
41 | parser.add_argument(
42 | "output_file",
43 | type=argparse.FileType("w"),
44 | nargs="?",
45 | default=sys.stdout,
46 | help="Where to write the results to",
47 | )
48 |
49 | args = parser.parse_args()
50 |
51 | if not args.jinja2_template:
52 | sys.stderr.write("A jinja2 template argument (-j) is required\n")
53 | exit(1)
54 |
55 | return args
56 |
57 |
58 | def process(
59 | input_file_handle, jinja2_template, output_file_handle, include_file_path=None
60 | ):
61 | "Process an input data file file and template into an output file"
62 | # load the data
63 | inh = pyfsdb.Fsdb(
64 | file_handle=input_file_handle, return_type=pyfsdb.RETURN_AS_DICTIONARY
65 | )
66 | rows = inh.get_all()
67 |
68 | # get jinja2 setup
69 | jinja_template_data = jinja2_template.read()
70 | loader = None
71 |
72 | # allowing including of other files?
73 | if include_file_path:
74 | if include_file_path[-1] != "/":
75 | include_file_path += "/" # think required?
76 | loader = jinja2.FileStreamLoader(include_file_path)
77 |
78 | # create the actual template
79 | template = jinja2.Environment(loader=loader)
80 | template = template.from_string(jinja_template_data)
81 |
82 | # call jinja and write the results out to the file
83 | output_file_handle.write(template.render({"rows": rows}))
84 |
85 |
86 | def main():
87 | args = parse_args()
88 | process(args.input_file, args.jinja2_template, args.output_file)
89 |
90 |
91 | if __name__ == "__main__":
92 | main()
93 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbnormalize.py:
--------------------------------------------------------------------------------
1 | """dbnormalize takes an input file and takes each column value from a
2 | number of columns and divides it by the maximum value seen in all the
3 | columns.
4 |
5 | Note: this is the maximum value of all columns provided; if you want
6 | per-column normalization, run the tool multiple times instead.
7 |
8 | Note: this requires reading the entire file into memory.
9 | """
10 |
11 | import pyfsdb
12 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
13 | import sys
14 |
15 |
16 | def parse_args():
17 | parser = ArgumentParser(
18 | formatter_class=ArgumentDefaultsHelpFormatter,
19 | description=__doc__,
20 | epilog="Exmaple Usage: dbnormalize -k column -- infile outfile",
21 | )
22 |
23 | parser.add_argument(
24 | "-k",
25 | "--keys",
26 | default=["key"],
27 | nargs="+",
28 | type=str,
29 | help="The columns/keys to normalize across",
30 | )
31 |
32 | parser.add_argument(
33 | "input_file",
34 | type=FileType("r"),
35 | nargs="?",
36 | default=sys.stdin,
37 | help="The input file to read",
38 | )
39 |
40 | parser.add_argument(
41 | "output_file",
42 | type=FileType("w"),
43 | nargs="?",
44 | default=sys.stdout,
45 | help="Where to write the results",
46 | )
47 |
48 | args = parser.parse_args()
49 | return args
50 |
51 |
52 | def main():
53 | args = parse_args()
54 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file)
55 | df = fh.get_pandas()
56 | maxval = df[args.keys].max().max()
57 | for key in args.keys:
58 | df[key] = df[key] / maxval
59 | fh.put_pandas(df)
60 | fh.close()
61 |
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbreescape.py:
--------------------------------------------------------------------------------
1 | """Passes the requested columns (-k) through the python regex escaping function.
2 |
3 | Note: because -k can take multiple columns, input files likely need to appear
4 | after the "--" argument-stop-parsing string.
5 | """
6 |
7 | import pyfsdb
8 | from re import escape
9 |
10 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
11 | import sys
12 |
13 |
14 | def parse_args():
15 | parser = ArgumentParser(
16 | formatter_class=ArgumentDefaultsHelpFormatter,
17 | description=__doc__,
18 | epilog="Exmaple Usage: dbrequote -k column1 column2 -- file.fsdb",
19 | )
20 |
21 | parser.add_argument(
22 | "-k", "--keys-to-escape", type=str, nargs="+", help="The keys to regexp quote"
23 | )
24 |
25 | parser.add_argument(
26 | "input_file",
27 | type=FileType("r"),
28 | nargs="?",
29 | default=sys.stdin,
30 | help="The input file to parse",
31 | )
32 |
33 | parser.add_argument(
34 | "output_file",
35 | type=FileType("w"),
36 | nargs="?",
37 | default=sys.stdout,
38 | help="Where to send the output",
39 | )
40 |
41 | args = parser.parse_args()
42 | return args
43 |
44 |
45 | def main():
46 | args = parse_args()
47 |
48 | fs = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file)
49 |
50 | convert_cols = fs.get_column_numbers(args.keys_to_escape)
51 |
52 | for row in fs:
53 | for column in convert_cols:
54 | row[column] = escape(row[column])
55 | fs.append(row)
56 |
57 |
58 | if __name__ == "__main__":
59 | main()
60 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbreversepivot.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 |
3 | """dbreversepivot takes an input file with time/value columns, and
4 | pivots the table into a narrow table with one line per old column.
5 |
6 | For example, if the input was this:
7 |
8 | #fsdb -F s time foo bar
9 | 1 10 0
10 | 2 30 20
11 | 3 0 40
12 |
13 | It would convert this to:
14 |
15 | #fsdb -F s time key value
16 | 1 foo 10
17 | 2 bar 20
18 | 2 foo 30
19 | 3 bar 40
20 |
21 | This is the inverse operation of dbfullpivot.
22 | """
23 |
24 | import sys
25 | import argparse
26 | import pyfsdb
27 |
28 |
29 | def parse_args():
30 | parser = argparse.ArgumentParser(
31 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__
32 | )
33 |
34 | parser.add_argument(
35 | "-k",
36 | "--key-column",
37 | default="key",
38 | type=str,
39 | help="The key column to use in the output for column names to store in",
40 | )
41 |
42 | parser.add_argument(
43 | "-c", "--columns", nargs="+", type=str, help="The columns to pivot into keys"
44 | )
45 |
46 | parser.add_argument(
47 | "-v",
48 | "--value-column",
49 | default="value",
50 | type=str,
51 | help="What output column to store the value for what was found in the columns",
52 | )
53 |
54 | parser.add_argument(
55 | "-o",
56 | "--other-columns",
57 | default=[],
58 | type=str,
59 | nargs="*",
60 | help="Other columns to copy to every row",
61 | )
62 |
63 | parser.add_argument(
64 | "input_file",
65 | type=argparse.FileType("r"),
66 | nargs="?",
67 | default=sys.stdin,
68 | help="The input FSDB file to read",
69 | )
70 |
71 | parser.add_argument(
72 | "output_file",
73 | type=argparse.FileType("w"),
74 | nargs="?",
75 | default=sys.stdout,
76 | help="The output FSDB file to write to",
77 | )
78 |
79 | args = parser.parse_args()
80 | return args
81 |
82 |
83 | def main():
84 | args = parse_args()
85 |
86 | # set up storage structures
87 | columns = {}
88 |
89 | # from the input, get extract column numbers/names
90 | key_column = args.key_column
91 | value_column = args.value_column
92 | other_columns = args.other_columns
93 | columns = args.columns
94 |
95 | # open the input file stream
96 | fh = pyfsdb.Fsdb(
97 | file_handle=args.input_file,
98 | return_type=pyfsdb.RETURN_AS_DICTIONARY,
99 | out_file_handle=args.output_file,
100 | )
101 | fh.out_column_names = [key_column, value_column] + other_columns
102 |
103 | # for each row, remember each value based on time and key
104 | for row in fh:
105 | for column in columns:
106 | out_row = [column, row[column]]
107 | for other in other_columns:
108 | out_row.append(row[other])
109 | fh.append(out_row)
110 |
111 | fh.close(copy_comments_from=fh)
112 |
113 |
114 | if __name__ == "__main__":
115 | main()
116 |
--------------------------------------------------------------------------------
/pyfsdb/tools/pdbzerofill.py:
--------------------------------------------------------------------------------
1 | """Fills a row that is missing in a series of rows with a numerical
2 | increasing (frequently a timestamp) index"""
3 |
4 | import sys
5 | import argparse
6 | import pyfsdb
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(
11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__
12 | )
13 |
14 | parser.add_argument(
15 | "-v", "--value", default="0", type=str, help="Fill columns with this value"
16 | )
17 |
18 | parser.add_argument(
19 | "-c", "--columns", type=str, nargs="+", help="Fill these columns"
20 | )
21 |
22 | parser.add_argument(
23 | "-k",
24 | "--key-column",
25 | default="timestamp",
26 | type=str,
27 | help="Use this column as the timestamp/key column to increment",
28 | )
29 |
30 | parser.add_argument(
31 | "-b",
32 | "--bin-size",
33 | default=1,
34 | type=int,
35 | help="Bin-size to check for missing rows",
36 | )
37 |
38 | parser.add_argument(
39 | "input_file", type=argparse.FileType("r"), nargs="?", default=sys.stdin, help=""
40 | )
41 |
42 | parser.add_argument(
43 | "output_file",
44 | type=argparse.FileType("w"),
45 | nargs="?",
46 | default=sys.stdout,
47 | help="",
48 | )
49 |
50 | args = parser.parse_args()
51 |
52 | if args.columns is None:
53 | sys.stderr.write("The --columns argument is required\n")
54 | exit(1)
55 |
56 | return args
57 |
58 |
59 | def main():
60 | args = parse_args()
61 |
62 | fh = pyfsdb.Fsdb(file_handle=args.input_file, out_file_handle=args.output_file)
63 |
64 | store_columns = fh.get_column_numbers(args.columns)
65 | time_column = fh.get_column_number(args.key_column)
66 | value = args.value
67 | bin_size = args.bin_size
68 |
69 | last_index = None
70 |
71 | for row in fh:
72 | if last_index is None:
73 | # first row, just store it
74 | last_index = int(row[time_column])
75 | elif last_index != int(row[time_column]):
76 | for skipped_time in range(
77 | last_index + bin_size, int(row[time_column]), bin_size
78 | ):
79 | newrow = list(row)
80 | newrow[time_column] = str(skipped_time)
81 | for column in store_columns:
82 | newrow[column] = value
83 | fh.append(newrow)
84 | last_index = int(row[time_column])
85 | fh.append(row)
86 |
87 | fh.write_finish()
88 |
89 |
90 | if __name__ == "__main__":
91 | main()
92 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "pyfsdb"
7 | dynamic = ["version"]
8 | description = "A python implementation of the flat-file streaming database"
9 | readme = "README.md"
10 | license = {file = "LICENSE"}
11 | requires-python = ">=3.6"
12 | authors = [
13 | { name = "Wes Hardaker", email = "opensource@hardakers.net" },
14 | ]
15 | classifiers = [
16 | "Operating System :: OS Independent",
17 | "Programming Language :: Python :: 3",
18 | ]
19 |
20 | [project.scripts]
21 | bro2fsdb = "pyfsdb.tools.bro2fsdb:main"
22 | db2tex = "pyfsdb.obsolete.db2tex:main"
23 | dbaugment = "pyfsdb.obsolete.dbaugment:main"
24 | dbcoluniq = "pyfsdb.obsolete.dbcoluniq:main"
25 | dbdatetoepoch = "pyfsdb.obsolete.dbdatetoepoch:main"
26 | dbensure = "pyfsdb.obsolete.dbensure:main"
27 | dbformat = "pyfsdb.obsolete.dbformat:main"
28 | dbfullpivot = "pyfsdb.obsolete.dbfullpivot:main"
29 | dbheatmap = "pyfsdb.obsolete.dbheatmap:main"
30 | dbkeyedsort = "pyfsdb.obsolete.dbkeyedsort:main"
31 | dbreescape = "pyfsdb.obsolete.dbreescape:main"
32 | dbreversepivot = "pyfsdb.obsolete.dbreversepivot:main"
33 | dbsplitter = "pyfsdb.obsolete.dbsplitter:main"
34 | dbsum = "pyfsdb.obsolete.dbsum:main"
35 | dbtopn = "pyfsdb.obsolete.dbtopn:main"
36 | dbzerofill = "pyfsdb.obsolete.dbzerofill:main"
37 | fsdb2json = "pyfsdb.tools.fsdb2json:main"
38 | fsdb2many = "pyfsdb.tools.fsdb2many:main"
39 | json2fsdb = "pyfsdb.tools.json2fsdb:main"
40 | pdb2sql = "pyfsdb.tools.pdb2sql:main"
41 | pdb2tex = "pyfsdb.tools.pdb2tex:main"
42 | pdb2to1 = "pyfsdb.tools.pdb2to1:main"
43 | pdbaddtypes = "pyfsdb.tools.pdbaddtypes:main"
44 | pdbaugment = "pyfsdb.tools.pdbaugment:main"
45 | pdbcdf = "pyfsdb.tools.pdbcdf:main"
46 | pdbcoluniq = "pyfsdb.tools.pdbcoluniq:main"
47 | pdbdatetoepoch = "pyfsdb.tools.pdbdatetoepoch:main"
48 | pdbensure = "pyfsdb.tools.pdbensure:main"
49 | pdbepochtodate = "pyfsdb.tools.pdbepochtodate:main"
50 | pdbfgrep = "pyfsdb.tools.pdbfgrep:main"
51 | pdbformat = "pyfsdb.tools.pdbformat:main"
52 | pdbfullpivot = "pyfsdb.tools.pdbfullpivot:main"
53 | pdbheatmap = "pyfsdb.tools.pdbheatmap:main"
54 | pdbjinja = "pyfsdb.tools.pdbjinja:main"
55 | pdbkeyedsort = "pyfsdb.tools.pdbkeyedsort:main"
56 | pdbnormalize = "pyfsdb.tools.pdbnormalize:main"
57 | pdbreescape = "pyfsdb.tools.pdbreescape:main"
58 | pdbreversepivot = "pyfsdb.tools.pdbreversepivot:main"
59 | pdbroc = "pyfsdb.tools.pdbroc:main"
60 | pdbrow = "pyfsdb.tools.pdbrow:main"
61 | pdbroweval = "pyfsdb.tools.pdbroweval:main"
62 | pdbsplitter = "pyfsdb.tools.pdbsplitter:main"
63 | pdbsum = "pyfsdb.tools.pdbsum:main"
64 | pdbtopn = "pyfsdb.tools.pdbtopn:main"
65 | pdbzerofill = "pyfsdb.tools.pdbzerofill:main"
66 | pdbrelplot = "pyfsdb.tools.pdbrelplot:main"
67 |
68 | [project.urls]
69 | Homepage = "https://github.com/hardaker/pyfsdb"
70 |
71 | [tool.hatch.version]
72 | path = "pyfsdb/__init__.py"
73 |
74 | [tool.hatch.build.targets.sdist]
75 | include = [
76 | "/pyfsdb",
77 | ]
78 |
79 | [tool.ruff]
80 | ignore = ["E501", "F401"] # long lines, unused imports
81 | fixable = ["ALL"] # gulp
82 | # select = ["ALL"]
83 |
--------------------------------------------------------------------------------