├── .github
    └── workflows
    │   ├── main.yml
    │   └── release.yml
├── .gitignore
├── README.md
├── bin
    ├── dev-run-and-query-model.sh
    └── install-duckdb.sh
├── build
    └── .gitkeep
├── dbt_project.yml
├── example-queries
    ├── top-cpu-time-processes.sql
    └── top-io-processes.sql
├── images
    ├── process-most-io.png
    └── processes-most-cpu-time.png
├── models
    ├── etc
    │   └── etc_passwd.sql
    └── proc
    │   └── processes
    │       ├── docs.txt
    │       ├── processes_autogroup.sql
    │       ├── processes_cgroup.sql
    │       ├── processes_cmdline.sql
    │       ├── processes_comm.sql
    │       ├── processes_environment.sql
    │       ├── processes_fdinfo.sql
    │       ├── processes_io.sql
    │       ├── processes_limits.sql
    │       ├── processes_schedstat.sql
    │       └── processes_status.sql
├── profiles.yml
└── requirements.txt


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: duckservability
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v3
 8 |       - uses: actions/setup-python@v4
 9 |       - run: pip install -r requirements.txt
10 |       - run: which dbt
11 | 
12 |       # Install DuckDB      
13 |       - run: ./bin/install-duckdb.sh
14 | 
15 |       # Build the Schema
16 |       - run: dbt build
17 | 
18 |       # Package the Example Queries
19 |       - run: zip build/duckservability-example-queries.zip example-queries/*.sql
20 |         
21 |       # Upload the generated duckdb file
22 |       - name: Archive the generated duckdb file
23 |         uses: actions/upload-artifact@v3
24 |         with:
25 |           name: duckservability.duckdb
26 |           path: build/duckservability.duckdb
27 |       - name: Archive the Example Queries zip file
28 |         uses: actions/upload-artifact@v3
29 |         with:
30 |           name: duckservability.duckdb
31 |           path: build/duckservability-example-queries.zip
32 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "pre-release"
 3 | 
 4 | on:
 5 |   push:
 6 |     branches:
 7 |       - "main"
 8 | 
 9 | jobs:
10 |   pre-release:
11 |     name: "Pre Release"
12 |     runs-on: "ubuntu-latest"
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - uses: actions/setup-python@v4
17 |       - run: pip install -r requirements.txt
18 |       - run: which dbt
19 | 
20 |       # Install DuckDB      
21 |       - run: ./bin/install-duckdb.sh
22 | 
23 |       # Build the Schema
24 |       - run: dbt build
25 | 
26 |       # Package the Example Queries
27 |       - run: zip build/duckservability-example-queries.zip example-queries/*.sql
28 | 
29 |       - uses: "marvinpinto/action-automatic-releases@latest"
30 |         with:
31 |           repo_token: "${{ secrets.GITHUB_TOKEN }}"
32 |           automatic_release_tag: "latest"
33 |           prerelease: true
34 |           title: "Automatic Build"
35 |           files: |
36 |             build/duckservability.duckdb
37 |             build/duckservability-example-queries.zip
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_packages/
4 | logs/
5 | .user.yml
6 | build/*
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Duckservability: Query Your Linux Systems
 3 | Duckservability is a DuckDB database schema for querying standard Linux systems via the `/proc` virtual file system which exposes real time kernel and process statistics. Traditionally files exposing these statistics have been aggregated via unix commandline tools. This project wraps these files in database tables that can be queried, joined, etc via SQL.
 4 | 
 5 | # Goals
 6 | Duckservability is primarily meant as a proof of concept. The overarching goal is to illustrate a class of things that can be done when the traditionally coupled storage and compute components of a database system are decoupled.
 7 | 
 8 | # Getting Started
 9 | Note that Duckservability presently only works on Linux and other Linux like systems.
10 | 
11 | 1. Install [DuckDB](https://duckdb.org/docs/installation/)
12 | 1. Download the [latest release](https://github.com/MarkRoddy/duckservability/releases/download/latest/duckservability.duckdb) of the DuckDB file containing the Duckservibility schema.
13 | 1. Start DuckDB by running `duckdb duckservability.duckdb`
14 | 1. Start querying! To do so, run the `show tables;` command, or run one of the [example queries](https://github.com/MarkRoddy/duckservability/releases/download/latest/duckservability-example-queries.zip).
15 | 
16 | # Example Queries
17 | Below are a few examples of questions you can ask with Ducservability. Additionally, see the [example-queries](tree/main/example-queries) directory for more examples.
18 | 
19 | ## Processes with Most IO
20 | Query processes that have performed the most IO operation in megabytes:
21 | <br>
22 | ![most io](images/process-most-io.png)
23 | 
24 | ## Longest Running Proccesses by CPU Time
25 | ![most cpu time](images/processes-most-cpu-time.png)
26 | 
27 | # Production Usage
28 | Duckservability is presently in a proof of concept state. If you're looking for a more mature SQL system for querying OS data, take a look at the [osquery](https://github.com/osquery/osquery) project.
29 | 


--------------------------------------------------------------------------------
/bin/dev-run-and-query-model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ ! $# = 1 ]; then
 6 |     echo "usage: $0 model-name";
 7 |     exit 1;
 8 | fi
 9 | MODEL_NAME="$1"
10 | dbt run -m "$MODEL_NAME"
11 | duckdb build/duckservability.duckdb "select * from $MODEL_NAME"
12 | 


--------------------------------------------------------------------------------
/bin/install-duckdb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e;
 4 | 
 5 | 
 6 | # Tease out 'duckdb==0.7.1' from the requirement.txt to determine the version of
 7 | # the duckdb commandline we want to download and install.
 8 | DUCKDB_VERSION=$(cat requirements.txt |grep ^duckdb\=|cut -d '=' -f 3)
 9 | 
10 | curl -LO "https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip"
11 | unzip duckdb_cli-linux-amd64.zip
12 | mkdir -p ~/.local/bin/ && mv duckdb ~/.local/bin/ && rm duckdb_cli-linux-amd64.zip
13 | duckdb --version
14 | 


--------------------------------------------------------------------------------
/build/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/build/.gitkeep


--------------------------------------------------------------------------------
/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Name your project! Project names should contain only lowercase characters
 3 | # and underscores. A good package name should reflect your organization's
 4 | # name or the intended use of these models
 5 | name: 'duckservability'
 6 | version: '1.0.0'
 7 | config-version: 2
 8 | 
 9 | # This setting configures which "profile" dbt uses for this project.
10 | profile: 'duckservability'
11 | 
12 | # These configurations specify where dbt should look for different types of files.
13 | # The `model-paths` config, for example, states that models in this project can be
14 | # found in the "models/" directory. You probably won't need to change these!
15 | model-paths: ["models"]
16 | analysis-paths: ["analyses"]
17 | test-paths: ["tests"]
18 | seed-paths: ["seeds"]
19 | macro-paths: ["macros"]
20 | snapshot-paths: ["snapshots"]
21 | 
22 | target-path: "target"  # directory which will store compiled SQL files
23 | clean-targets:         # directories to be removed by `dbt clean`
24 |   - "target"
25 |   - "dbt_packages"
26 | 
27 | 
28 | # Configuring models
29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
30 | 
31 | # In this example config, we tell dbt to build all models in the example/
32 | # directory as views. These settings can be overridden in the individual model
33 | # files using the `{{ config(...) }}` macro.
34 | models:
35 |   duckservability:
36 |     # Config indicated by + and applies to all files under models/example/
37 |     example:
38 |       +materialized: view
39 | 


--------------------------------------------------------------------------------
/example-queries/top-cpu-time-processes.sql:
--------------------------------------------------------------------------------
1 | 
2 | SELECT
3 |   comm, cpu.pid, round((cputime_ns * 1e-6), 2) AS cputime_ms
4 | FROM processes_schedstat AS cpu
5 | LEFT JOIN processes_comm AS comm
6 |   ON cpu.pid = comm.pid
7 | ORDER BY cputime_ns DESC
8 | LIMIT 10;
9 | 


--------------------------------------------------------------------------------
/example-queries/top-io-processes.sql:
--------------------------------------------------------------------------------
1 | 
2 | SELECT comm, io.pid, round(((read_bytes + write_bytes) * 1e-6), 2) AS total_bytes_mb
3 | FROM processes_io AS io
4 | LEFT JOIN processes_comm AS comm
5 |   ON io.pid = comm.pid
6 | ORDER BY total_bytes_mb DESC
7 | LIMIT 10;
8 | 


--------------------------------------------------------------------------------
/images/process-most-io.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/images/process-most-io.png


--------------------------------------------------------------------------------
/images/processes-most-cpu-time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/images/processes-most-cpu-time.png


--------------------------------------------------------------------------------
/models/etc/etc_passwd.sql:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | SELECT
4 |   *
5 | FROM read_csv_auto('/etc/passwd', header=False, delim=':', columns = { 'Username': 'VARCHAR', 'Password': 'VARCHAR', 'UserID': 'INT', 'GroupID': 'INT', 'Comment': 'VARCHAR', 'HomeDir': 'VARCHAR', 'Shell': 'VARCHAR' })
6 | 


--------------------------------------------------------------------------------
/models/proc/processes/docs.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | arch_status
  3 | # https://www.phoronix.com/news/Linux-Proc-PID-Arch-Status
  4 | 
  5 | 
  6 | attr
  7 | # https://man7.org/linux/man-pages/man5/proc.5.html
  8 | # A directory containing files detailing SELinux info about a process
  9 | 
 10 | 
 11 | autogroup
 12 | # https://man7.org/linux/man-pages/man7/sched.7.html
 13 | # The kernel scheduler groups related tasks via parent
 14 | # lineage and uses this in making scheduling decisions.
 15 | # The example given is that all processes in a parrallel
 16 | # build via Make are given the same group, so they can't
 17 | # overload interactive desktop applications. This file
 18 | # contains the processe's group ID as well as the groups nice value.
 19 | 
 20 | 
 21 | auxv
 22 | # https://man7.org/linux/man-pages/man5/proc.5.html
 23 | # This contains the contents of the ELF interpreter
 24 | # information passed to the process at exec time.  The
 25 | # format is one unsigned long ID plus one unsigned long
 26 | # value for each entry.  The last entry contains two zeros.
 27 | # See also getauxval(3).
 28 | 
 29 | cgroup
 30 | # https://man7.org/linux/man-pages/man7/cgroups.7.html
 31 | # This file describes control groups to which the process
 32 | # with the corresponding PID belongs.  The displayed
 33 | # information differs for cgroups version 1 and version 2
 34 | # hierarchies.
 35 | 
 36 | clear_refs
 37 | # https://man7.org/linux/man-pages/man5/proc.5.html
 38 | # This is a write-only file, writable only by owner of the process.
 39 | 
 40 | 
 41 | cmdline
 42 | # https://man7.org/linux/man-pages/man5/proc.5.html
 43 | # This read-only file holds the complete command line for
 44 | the process, unless the process is a zombie.  In the
 45 | latter case, there is nothing in this file: that is, a
 46 | read on this file will return 0 characters.  The command-
 47 | line arguments appear in this file as a set of strings
 48 | separated by null bytes ('\0'), with a further null byte
 49 | after the last string.
 50 | 
 51 | comm
 52 | https://man7.org/linux/man-pages/man5/proc.5.html
 53 | This file exposes the process's comm value—that is, the
 54 | command name associated with the process.  Different
 55 | threads in the same process may have different comm
 56 | values, accessible via /proc/[pid]/task/[tid]/comm.  A
 57 | thread may modify its comm value, or that of any of other
 58 | thread in the same thread group (see the discussion of
 59 | CLONE_THREAD in clone(2)), by writing to the file
 60 | /proc/self/task/[tid]/comm.  Strings longer than
 61 | TASK_COMM_LEN (16) characters (including the terminating
 62 | null byte) are silently truncated.
 63 | 
 64 | 
 65 | 
 66 | coredump_filter
 67 | https://man7.org/linux/man-pages/man5/core.5.html
 68 | /proc/[pid]/coredump_filter file can be used to control which
 69 | memory segments are written to the core dump file in the event
 70 | that a core dump is performed for the process with the
 71 | corresponding process ID.
 72 | The value in the file is a bit mask of memory mapping types (see
 73 | mmap(2)).  If a bit is set in the mask, then memory mappings of
 74 | the corresponding type are dumped; otherwise they are not dumped.
 75 | 
 76 | 
 77 | cpu_resctrl_groups
 78 | https://patchwork.kernel.org/project/linux-fsdevel/patch/20200110070608.18902-1-yu.c.chen@intel.com/
 79 | Monitoring tools that want to find out which resctrl control
 80 | and monitor groups a task belongs to must currently read
 81 | the "tasks" file in every group until they locate the process
 82 | ID.
 83 | Add an additional file /proc/{pid}/cpu_resctrl to provide this
 84 | information.
 85 | 
 86 | 
 87 | cpuset
 88 | https://man7.org/linux/man-pages/man7/cpuset.7.html
 89 | each process has a pseudo-file, /proc/<pid>/cpuset,
 90 | that displays the path of the process's cpuset directory relative
 91 | to the root of the cpuset filesystem.
 92 | 
 93 | cwd
 94 | https://man7.org/linux/man-pages/man5/proc.5.html
 95 | This is a symbolic link to the current working directory of the process.
 96 | 
 97 | 
 98 | environ
 99 | https://man7.org/linux/man-pages/man5/proc.5.html
100 | This file contains the initial environment that was set
101 | when the currently executing program was started via
102 | execve(2).  The entries are separated by null bytes
103 | ('\0'), and there may be a null byte at the end. 
104 | 
105 | exe
106 | this file is a symbolic link
107 | containing the actual pathname of the executed command.
108 | https://man7.org/linux/man-pages/man5/proc.5.html
109 | 
110 | fd
111 | https://man7.org/linux/man-pages/man5/proc.5.html
112 | This is a subdirectory containing one entry for each file
113 | which the process has open, named by its file descriptor,
114 | and which is a symbolic link to the actual file.  Thus, 0
115 | is standard input, 1 standard output, 2 standard error, and so on.
116 | 
117 | 
118 | fdinfo
119 | https://man7.org/linux/man-pages/man5/proc.5.html
120 | This is a subdirectory containing one entry for each file
121 | which the process has open, named by its file descriptor.
122 | The files in this directory are readable only by the owner
123 | of the process.  The contents of each file can be read to
124 | obtain information about the corresponding file
125 | descriptor.  The content depends on the type of file
126 | referred to by the corresponding file descriptor.
127 | For regular files and directories, we see something like:
128 |   $ cat /proc/12015/fdinfo/4
129 |   pos:    1000
130 |   flags:  01002002
131 |   mnt_id: 21
132 | (there are a *lot* of potential values that could be included here, they vary on the file type)
133 | 
134 | 
135 | gid_map
136 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html
137 | User and group ID mappings: uid_map and gid_map
138 | When a user namespace is created, it starts out without a mapping
139 | of user IDs (group IDs) to the parent user namespace.  The
140 | /proc/[pid]/uid_map and /proc/[pid]/gid_map files (available
141 | since Linux 3.5) expose the mappings for user and group IDs
142 | inside the user namespace for the process pid.  These files can
143 | be read to view the mappings in a user namespace and written to
144 | (once) to define the mappings.
145 | 
146 | io
147 | https://man7.org/linux/man-pages/man5/proc.5.html
148 | This file contains I/O statistics for the process, for
149 | example:
150 | # cat /proc/3828/io
151 | rchar: 323934931
152 | wchar: 323929600
153 | syscr: 632687
154 | syscw: 632675
155 | read_bytes: 0
156 | write_bytes: 323932160
157 | cancelled_write_bytes: 0
158 | 
159 | 
160 | 
161 | limits
162 | https://man7.org/linux/man-pages/man5/proc.5.html
163 | This file displays the soft limit, hard limit, and units
164 | of measurement for each of the process's resource limits
165 | 
166 | 
167 | loginuid
168 | https://www.kernel.org/doc/Documentation/ABI/stable/procfs-audit_loginuid
169 | The /proc/$pid/sessionid pseudofile is read to get the
170 | audit login session ID of process $pid as a decimal
171 | unsigned int (%u, u32).  It is set automatically,
172 | serially assigned with each new login.
173 | 
174 | 
175 | map_files
176 | https://man7.org/linux/man-pages/man5/proc.5.html
177 | This subdirectory contains entries corresponding to
178 | memory-mapped files (see mmap(2)).  Entries are named by
179 | memory region start and end address pair (expressed as
180 | hexadecimal numbers), and are symbolic links to the mapped
181 | files themselves. 
182 | 
183 | maps
184 | https://man7.org/linux/man-pages/man5/proc.5.html
185 | A file containing the currently mapped memory regions and
186 | their access permissions. 
187 | 
188 | mem
189 | https://man7.org/linux/man-pages/man5/proc.5.html
190 | This file can be used to access the pages of a process's
191 | memory through open(2), read(2), and lseek(2).
192 | 
193 | 
194 | mountinfo
195 | https://man7.org/linux/man-pages/man5/proc.5.html
196 | This file contains information about mounts in the
197 | process's mount namespace (see mount_namespaces(7)).  It
198 | supplies various information (e.g., propagation state,
199 | root of mount for bind mounts, identifier for each mount
200 | and its parent) that is missing from the (older)
201 | /proc/[pid]/mounts file, and fixes various other problems
202 | with that file (e.g., nonextensibility, failure to
203 | distinguish per-mount versus per-superblock options).
204 | 
205 | mounts
206 | https://man7.org/linux/man-pages/man5/proc.5.html
207 | This file lists all the filesystems currently mounted in
208 | the process's mount namespace (see mount_namespaces(7)).
209 |                             
210 | 
211 | mountstats
212 | https://man7.org/linux/man-pages/man5/proc.5.html
213 | This file exports information (statistics, configuration
214 | information) about the mounts in the process's mount
215 | namespace (....) Currently (as at Linux 2.6.26), only NFS filesystems
216 | export statistics information via this field.
217 | 
218 | net
219 | Directory containging a *lot* of hard to immediately decypher stats. Potentially very
220 | useful but hard to tease out what's important w/o more than a casuaul glance.
221 | https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/deployment_guide/s2-proc-dir-net
222 | 
223 | 
224 | ns
225 | https://man7.org/linux/man-pages/man5/proc.5.html
226 | This is a subdirectory containing one entry for each
227 | namespace that supports being manipulated by setns(2).
228 | 
229 | numa_maps
230 | https://man7.org/linux/man-pages/man5/proc.5.html
231 | This file displays information about a process's NUMA memory
232 | policy and allocation.
233 | 
234 | oom_adj
235 | https://man7.org/linux/man-pages/man5/proc.5.html
236 | This file can be used to adjust the score used to select
237 | which process should be killed in an out-of-memory (OOM)
238 | situation.
239 | 
240 | oom_score
241 | https://man7.org/linux/man-pages/man5/proc.5.html
242 | This file displays the current score that the kernel gives
243 | to this process for the purpose of selecting a process for
244 | the OOM-killer.
245 | 
246 | 
247 | oom_score_adj
248 | https://man7.org/linux/man-pages/man5/proc.5.html
249 | This file can be used to adjust the badness heuristic used
250 | to select which process gets killed in out-of-memory
251 | conditions.
252 | 
253 | pagemap
254 | https://man7.org/linux/man-pages/man5/proc.5.html
255 | This file shows the mapping of each of the process's
256 | virtual pages into physical page frames or swap area.
257 | 
258 | 
259 | patch_state
260 | Not finding any clear documentation.
261 | 
262 | 
263 | personality
264 | https://man7.org/linux/man-pages/man5/proc.5.html
265 | This read-only file exposes the process's execution
266 | domain, as set by personality(2).  The value is displayed
267 | in hexadecimal notation.
268 | 
269 | 
270 | 
271 | projid_map
272 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html
273 | Similarly to user and group ID mappings, it is possible to create
274 | project ID mappings for a user namespace.  (Project IDs are used
275 | for disk quotas; see setquota(8) and quotactl(2).)
276 | Project ID mappings are defined by writing to the
277 | /proc/[pid]/projid_map file (present since Linux 3.7).
278 | 
279 | 
280 | 
281 | root -> /
282 | https://man7.org/linux/man-pages/man5/proc.5.html
283 | UNIX and Linux support the idea of a per-process root of
284 | the filesystem, set by the chroot(2) system call.  This
285 | file is a symbolic link that points to the process's root
286 | directory, and behaves in the same way as exe, and fd/*.
287 | 
288 | sched
289 | # Little formal docuemntation:
290 | https://lwn.net/Articles/242900/
291 | 
292 | schedstat
293 | https://docs.kernel.org/scheduler/sched-stats.html
294 | schedstats also adds a new /proc/<pid>/schedstat file to include some of the same information on a per-process level. There are three fields in this file correlating for that process to:
295 | time spent on the cpu (in nanoseconds)
296 | time spent waiting on a runqueue (in nanoseconds)
297 | # of timeslices run on this cpu
298 | 
299 | sessionid
300 | https://patchwork.kernel.org/project/linux-audit/patch/0e77d290bb50232d9ec9317645106f1330bd2d54.1616008065.git.rgb@redhat.com/
301 | Not much documentation, but maybe included in stat output?
302 | 
303 | setgroups
304 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html
305 | The /proc/[pid]/setgroups file displays the string "allow" if
306 | processes in the user namespace that contains the process pid are
307 | permitted to employ the setgroups(2) system call;
308 | 
309 | 
310 | 
311 | smaps
312 | https://man7.org/linux/man-pages/man5/proc.5.html
313 | This file shows memory consumption for each of the
314 | process's mappings.
315 | 
316 | smaps_rollup
317 | https://www.kernel.org/doc/Documentation/ABI/testing/procfs-smaps_rollup
318 | This file provides pre-summed memory information for a
319 | process.  The format is almost identical to /proc/pid/smaps,
320 | except instead of an entry for each VMA in a process,
321 | smaps_rollup has a single entry (tagged "[rollup]")
322 | for which each field is the sum of the corresponding
323 | fields from all the maps in /proc/pid/smaps.
324 | 
325 | stack
326 | https://man7.org/linux/man-pages/man5/proc.5.html
327 | This file provides a symbolic trace of the function calls
328 | in this process's kernel stack.
329 | 
330 | stat (potentially *very* useful)
331 | https://man7.org/linux/man-pages/man5/proc.5.html
332 | Status information about the process.  This is used by ps(1).
333 | 
334 | statm 
335 | https://man7.org/linux/man-pages/man5/proc.5.html
336 | Provides information about memory usage, measured in
337 | pages.
338 | 
339 | status (same as stat but includes named fields)
340 | https://man7.org/linux/man-pages/man5/proc.5.html
341 | Provides much of the information in /proc/[pid]/stat and
342 | /proc/[pid]/statm in a format that's easier for humans to
343 | parse. 
344 | 
345 | 
346 | syscall
347 | https://man7.org/linux/man-pages/man5/proc.5.html
348 | (same as stat but includes named fields)
349 | This file exposes the system call number and argument
350 | registers for the system call currently being executed by
351 | the process, followed by the values of the stack pointer
352 | and program counter registers.  The values of all six
353 | argument registers are exposed, although most system calls
354 | use fewer registers.
355 | 
356 | task
357 | https://man7.org/linux/man-pages/man5/proc.5.html
358 | This is a directory that contains one subdirectory for
359 | each thread in the process.  The name of each subdirectory
360 | is the numerical thread ID ([tid]) of the thread (see
361 | gettid(2)).
362 | Within each of these subdirectories, there is a set of
363 | files with the same names and contents as under the
364 | /proc/[pid] directories. 
365 | 
366 | 
367 | timens_offsets
368 | https://man.archlinux.org/man/time_namespaces.7.en
369 | Associated with each time namespace are offsets, expressed with respect to the initial time namespace, that define the values of the monotonic and boot-time clocks in that namespace. These offsets are exposed via the file /proc/PID/timens_offsets. Within this file, the offsets are expressed as lines consisting of three space-delimited fields:
370 | <clock-id> <offset-secs> <offset-nanosecs>
371 | 
372 | 
373 | timers
374 | https://man7.org/linux/man-pages/man5/proc.5.html
375 | /proc/[pid]/timers (since Linux 3.10)
376 | A list of the POSIX timers for this process.  Each timer
377 | is listed with a line that starts with the string "ID:".
378 | 
379 | 
380 | timerslack_ns
381 | https://man7.org/linux/man-pages/man5/proc.5.html
382 | This file exposes the process's "current" timer slack
383 | value, expressed in nanoseconds.  The file is writable,
384 | allowing the process's timer slack value to be changed.
385 | 
386 | uid_map
387 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html
388 | When a user namespace is created, it starts out without a mapping
389 | of user IDs (group IDs) to the parent user namespace.  The
390 | /proc/[pid]/uid_map and /proc/[pid]/gid_map files (available
391 | since Linux 3.5) expose the mappings for user and group IDs
392 | inside the user namespace for the process pid.
393 | 
394 | wchan
395 | https://man7.org/linux/man-pages/man5/proc.5.html
396 | The symbolic name corresponding to the location in the
397 | kernel where the process is sleeping.
398 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_autogroup.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | SELECT
 4 |   str_split(filename, '/')[3] AS PID,
 5 |   task_group,
 6 |   nice
 7 | FROM read_csv('/proc/[0-9]*/autogroup', header=False, filename=true, delim=' ',
 8 |               columns={task_group: 'VARCHAR', huh: 'VARCHAR', nice: 'INT'})
 9 | ORDER BY PID ASC
10 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_cgroup.sql:
--------------------------------------------------------------------------------
 1 | -- hierarchy-ID:controller-list:cgroup-path
 2 | 
 3 | 
 4 | 
 5 | SELECT
 6 |   str_split(filename, '/')[3] AS PID,
 7 |   hierarchy_id,
 8 |   controller_list,
 9 |   cgroup_path
10 | FROM read_csv('/proc/[0-9]*/cgroup', header=False, filename=true, delim=':',
11 |      columns={hierarchy_id: 'VARCHAR', controller_list: 'VARCHAR', cgroup_path: 'VARCHAR'})
12 | ORDER BY PID ASC
13 | 
14 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_cmdline.sql:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | SELECT
4 |   str_split(filename, '/')[3] AS PID,
5 |   trim(replace(column0, chr(0), ' ')) AS cmdline
6 | FROM read_csv('/proc/[0-9]*/cmdline', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'})
7 | ORDER BY PID ASC
8 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_comm.sql:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | SELECT
4 |   str_split(filename, '/')[3] AS PID,
5 |   trim(replace(column0, chr(0), ' ')) AS comm
6 | FROM read_csv('/proc/[0-9]*/comm', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'})
7 | ORDER BY PID ASC
8 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_environment.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | SELECT
 4 |   str_split(filename, '/')[3] AS PID,
 5 |   env_pair[1] AS Name,
 6 |   env_pair[2] AS Value
 7 | FROM ( 
 8 |   SELECT
 9 |   filename,
10 |     str_split(unnest(str_split(column0, chr(0))), '=') AS env_pair
11 |   FROM read_csv('/proc/[0-9]*/environ', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'})
12 | )
13 | WHERE Name != ''
14 | ORDER BY PID, Name
15 | 
16 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_fdinfo.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | /* Note! Due to a race condition between DuckDB performs the glob operation and
 3 |    when the underlying process closes the file, it is likely queries to this model
 4 |    would fail because a file at glob time does not exist at file open time. To get
 5 |    around this we limit the FD glob pattern to [0-2], that way it will only match
 6 |    stdin, stdout, and stderr. Which... in theory are stable? Note that these could
 7 |    still go away in theory, but this has not yet been observed in practicce.
 8 | */
 9 | SELECT
10 |   str_split(filename, '/')[3] AS PID,
11 |   str_split(filename, '/')[6] AS fd,
12 |   str_split(row, ':')[1] AS tag,
13 |   /* Note you may be tempted to convert these to ints, non-int
14 |      values are legal here even if you don't see them! If you're
15 |      going to do so, you'll need to find a way to filter on *type*
16 |      of file the fd refers to, as the 'value' field changes on this. */
17 |   trim(array_slice(row, instr(row, ':')+1, null)) AS value
18 | FROM read_csv('/proc/[0-9]*/fdinfo/[0-2]', header=False, filename=true, delim='\0',
19 |      columns={row: 'VARCHAR'})
20 | ORDER BY PID ASC
21 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_io.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | SELECT
 4 |   str_split(filename, '/')[3] AS PID,
 5 |   -- Pivot each of our name/value pairs to columns
 6 |   first(value) FILTER (WHERE tag = 'rchar') AS 'rchar',
 7 |   first(value) FILTER (WHERE tag = 'wchar') AS 'wchar',
 8 |   first(value) FILTER (WHERE tag = 'syscr') AS 'syscr',
 9 |   first(value) FILTER (WHERE tag = 'syscw') AS 'syscw',
10 |   first(value) FILTER (WHERE tag = 'read_bytes') AS 'read_bytes',
11 |   first(value) FILTER (WHERE tag = 'write_bytes') AS 'write_bytes',
12 |   first(value) FILTER (WHERE tag = 'cancelled_write_bytes') AS 'cancelled_write_bytes',
13 | FROM read_csv('/proc/[0-9]*/io', header=False, filename=true, delim=':',
14 |      columns={tag: 'VARCHAR', value: 'int64'})
15 | GROUP BY PID
16 | ORDER BY PID ASC
17 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_limits.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | Limit                     Soft Limit           Hard Limit           Units     
 3 | Max cpu time              unlimited            unlimited            seconds   
 4 | Max file size             unlimited            unlimited            bytes     
 5 | Max data size             unlimited            unlimited            bytes     
 6 | Max stack size            8388608              unlimited            bytes     
 7 | Max core file size        0                    unlimited            bytes     
 8 | Max resident set          unlimited            unlimited            bytes     
 9 | Max processes             15630                15630                processes 
10 | Max open files            1048576              1048576              files     
11 | Max locked memory         67108864             67108864             bytes     
12 | Max address space         unlimited            unlimited            bytes     
13 | Max file locks            unlimited            unlimited            locks     
14 | Max pending signals       15630                15630                signals   
15 | Max msgqueue size         819200               819200               bytes     
16 | Max nice priority         0                    0                    
17 | Max realtime priority     0                    0                    
18 | Max realtime timeout      unlimited            unlimited            us
19 | */
20 | 
21 | 
22 | SELECT
23 |   str_split(filename, '/')[3] AS PID,
24 |   trim(row[:26]) AS limit_name,
25 |   trim(row[26:47]) AS soft_limit,
26 |   trim(row[47:68]) AS hard_limit,
27 |   trim(row[68:]) AS units
28 | FROM read_csv('/proc/[0-9]*/limits', header=False, filename=true, skip=1, delim=chr(0),
29 |      columns={row: 'VARCHAR'})
30 | ORDER BY PID ASC
31 | 
32 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_schedstat.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | $ cat /proc/45399/schedstat 
 3 | 189497057 49271715 227
 4 | 
 5 | */
 6 | 
 7 | 
 8 | SELECT
 9 |   str_split(filename, '/')[3] AS PID,
10 |   cputime_ns,
11 |   runqueue_ns,
12 |   num_timeslices,
13 | FROM read_csv('/proc/[0-9]*/schedstat', header=False, filename=true, delim=' ',
14 |      columns={cputime_ns: 'HUGEINT', runqueue_ns: 'HUGEINT', num_timeslices: 'HUGEINT'})
15 | ORDER BY PID ASC
16 | 


--------------------------------------------------------------------------------
/models/proc/processes/processes_status.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 | $ cat /proc/45399/status
 3 | Name:	systemd
 4 | Umask:	0002
 5 | State:	S (sleeping)
 6 | Tgid:	45399
 7 | Ngid:	0
 8 | Pid:	45399
 9 | PPid:	1
10 | TracerPid:	0
11 | Uid:	4000	4000	4000	4000
12 | Gid:	4000	4000	4000	4000
13 | FDSize:	256
14 | .....
15 | */
16 | 
17 | 
18 | SELECT
19 |   str_split(filename, '/')[3] AS PID,
20 |   /* Note that some of the 'value' fields have ':' chars in them. That's
21 |      why we manually parse the tag/value in the select statement, and not
22 |      via the read_csv() function.
23 |      */
24 |   trim(str_split(row, ':')[1]) AS tag,
25 |   trim(array_slice(row, instr(row, ':')+1, null)) AS value,
26 | FROM read_csv('/proc/[0-9]*/status', header=False, filename=true, delim=chr(0),
27 |      columns={row: 'VARCHAR'})
28 | ORDER BY PID, tag ASC
29 | 


--------------------------------------------------------------------------------
/profiles.yml:
--------------------------------------------------------------------------------
1 | duckservability:
2 |   outputs:
3 |     dev:
4 |       type: duckdb
5 |       path: ./build/duckservability.duckdb
6 |   target: dev
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | agate==1.7.0
 2 | attrs==22.2.0
 3 | Babel==2.12.1
 4 | betterproto==1.2.5
 5 | certifi==2022.12.7
 6 | cffi==1.15.1
 7 | charset-normalizer==3.1.0
 8 | click==8.1.3
 9 | colorama==0.4.6
10 | dbt-core==1.4.4
11 | dbt-duckdb==1.4.0
12 | dbt-extractor==0.4.1
13 | duckdb==0.7.1
14 | future==0.18.3
15 | grpclib==0.4.3
16 | h2==4.1.0
17 | hologram==0.0.15
18 | hpack==4.0.0
19 | hyperframe==6.0.1
20 | idna==3.4
21 | isodate==0.6.1
22 | Jinja2==3.1.2
23 | jsonschema==3.2.0
24 | leather==0.3.4
25 | Logbook==1.5.3
26 | MarkupSafe==2.1.2
27 | mashumaro==3.3.1
28 | minimal-snowplow-tracker==0.0.2
29 | msgpack==1.0.4
30 | multidict==6.0.4
31 | networkx==2.8.8
32 | packaging==23.0
33 | parsedatetime==2.4
34 | pathspec==0.10.3
35 | pycparser==2.21
36 | pyrsistent==0.19.3
37 | python-dateutil==2.8.2
38 | python-slugify==8.0.1
39 | pytimeparse==1.1.8
40 | pytz==2022.7.1
41 | PyYAML==6.0
42 | requests==2.28.2
43 | six==1.16.0
44 | sqlparse==0.4.3
45 | stringcase==1.2.0
46 | text-unidecode==1.3
47 | typing-extensions==4.5.0
48 | urllib3==1.26.14
49 | Werkzeug==2.2.3
50 | 


--------------------------------------------------------------------------------