├── .github └── workflows │ ├── main.yml │ └── release.yml ├── .gitignore ├── README.md ├── bin ├── dev-run-and-query-model.sh └── install-duckdb.sh ├── build └── .gitkeep ├── dbt_project.yml ├── example-queries ├── top-cpu-time-processes.sql └── top-io-processes.sql ├── images ├── process-most-io.png └── processes-most-cpu-time.png ├── models ├── etc │ └── etc_passwd.sql └── proc │ └── processes │ ├── docs.txt │ ├── processes_autogroup.sql │ ├── processes_cgroup.sql │ ├── processes_cmdline.sql │ ├── processes_comm.sql │ ├── processes_environment.sql │ ├── processes_fdinfo.sql │ ├── processes_io.sql │ ├── processes_limits.sql │ ├── processes_schedstat.sql │ └── processes_status.sql ├── profiles.yml └── requirements.txt /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: duckservability 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v3 8 | - uses: actions/setup-python@v4 9 | - run: pip install -r requirements.txt 10 | - run: which dbt 11 | 12 | # Install DuckDB 13 | - run: ./bin/install-duckdb.sh 14 | 15 | # Build the Schema 16 | - run: dbt build 17 | 18 | # Package the Example Queries 19 | - run: zip build/duckservability-example-queries.zip example-queries/*.sql 20 | 21 | # Upload the generated duckdb file 22 | - name: Archive the generated duckdb file 23 | uses: actions/upload-artifact@v3 24 | with: 25 | name: duckservability.duckdb 26 | path: build/duckservability.duckdb 27 | - name: Archive the Example Queries zip file 28 | uses: actions/upload-artifact@v3 29 | with: 30 | name: duckservability.duckdb 31 | path: build/duckservability-example-queries.zip 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "pre-release" 3 | 4 | on: 5 | push: 6 | branches: 7 | - "main" 8 | 9 | jobs: 10 | pre-release: 11 | name: "Pre Release" 12 | runs-on: "ubuntu-latest" 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: actions/setup-python@v4 17 | - run: pip install -r requirements.txt 18 | - run: which dbt 19 | 20 | # Install DuckDB 21 | - run: ./bin/install-duckdb.sh 22 | 23 | # Build the Schema 24 | - run: dbt build 25 | 26 | # Package the Example Queries 27 | - run: zip build/duckservability-example-queries.zip example-queries/*.sql 28 | 29 | - uses: "marvinpinto/action-automatic-releases@latest" 30 | with: 31 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 32 | automatic_release_tag: "latest" 33 | prerelease: true 34 | title: "Automatic Build" 35 | files: | 36 | build/duckservability.duckdb 37 | build/duckservability-example-queries.zip 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | .user.yml 6 | build/* 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Duckservability: Query Your Linux Systems 3 | Duckservability is a DuckDB database schema for querying standard Linux systems via the `/proc` virtual file system which exposes real time kernel and process statistics. Traditionally files exposing these statistics have been aggregated via unix commandline tools. This project wraps these files in database tables that can be queried, joined, etc via SQL. 4 | 5 | # Goals 6 | Duckservability is primarily meant as a proof of concept. The overarching goal is to illustrate a class of things that can be done when the traditionally coupled storage and compute components of a database system are decoupled. 7 | 8 | # Getting Started 9 | Note that Duckservability presently only works on Linux and other Linux like systems. 10 | 11 | 1. Install [DuckDB](https://duckdb.org/docs/installation/) 12 | 1. Download the [latest release](https://github.com/MarkRoddy/duckservability/releases/download/latest/duckservability.duckdb) of the DuckDB file containing the Duckservibility schema. 13 | 1. Start DuckDB by running `duckdb duckservability.duckdb` 14 | 1. Start querying! To do so, run the `show tables;` command, or run one of the [example queries](https://github.com/MarkRoddy/duckservability/releases/download/latest/duckservability-example-queries.zip). 15 | 16 | # Example Queries 17 | Below are a few examples of questions you can ask with Ducservability. Additionally, see the [example-queries](tree/main/example-queries) directory for more examples. 18 | 19 | ## Processes with Most IO 20 | Query processes that have performed the most IO operation in megabytes: 21 |
22 | ![most io](images/process-most-io.png) 23 | 24 | ## Longest Running Proccesses by CPU Time 25 | ![most cpu time](images/processes-most-cpu-time.png) 26 | 27 | # Production Usage 28 | Duckservability is presently in a proof of concept state. If you're looking for a more mature SQL system for querying OS data, take a look at the [osquery](https://github.com/osquery/osquery) project. 29 | -------------------------------------------------------------------------------- /bin/dev-run-and-query-model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [ ! $# = 1 ]; then 6 | echo "usage: $0 model-name"; 7 | exit 1; 8 | fi 9 | MODEL_NAME="$1" 10 | dbt run -m "$MODEL_NAME" 11 | duckdb build/duckservability.duckdb "select * from $MODEL_NAME" 12 | -------------------------------------------------------------------------------- /bin/install-duckdb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e; 4 | 5 | 6 | # Tease out 'duckdb==0.7.1' from the requirement.txt to determine the version of 7 | # the duckdb commandline we want to download and install. 8 | DUCKDB_VERSION=$(cat requirements.txt |grep ^duckdb\=|cut -d '=' -f 3) 9 | 10 | curl -LO "https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-amd64.zip" 11 | unzip duckdb_cli-linux-amd64.zip 12 | mkdir -p ~/.local/bin/ && mv duckdb ~/.local/bin/ && rm duckdb_cli-linux-amd64.zip 13 | duckdb --version 14 | -------------------------------------------------------------------------------- /build/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/build/.gitkeep -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | 2 | # Name your project! Project names should contain only lowercase characters 3 | # and underscores. A good package name should reflect your organization's 4 | # name or the intended use of these models 5 | name: 'duckservability' 6 | version: '1.0.0' 7 | config-version: 2 8 | 9 | # This setting configures which "profile" dbt uses for this project. 10 | profile: 'duckservability' 11 | 12 | # These configurations specify where dbt should look for different types of files. 13 | # The `model-paths` config, for example, states that models in this project can be 14 | # found in the "models/" directory. You probably won't need to change these! 15 | model-paths: ["models"] 16 | analysis-paths: ["analyses"] 17 | test-paths: ["tests"] 18 | seed-paths: ["seeds"] 19 | macro-paths: ["macros"] 20 | snapshot-paths: ["snapshots"] 21 | 22 | target-path: "target" # directory which will store compiled SQL files 23 | clean-targets: # directories to be removed by `dbt clean` 24 | - "target" 25 | - "dbt_packages" 26 | 27 | 28 | # Configuring models 29 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 30 | 31 | # In this example config, we tell dbt to build all models in the example/ 32 | # directory as views. These settings can be overridden in the individual model 33 | # files using the `{{ config(...) }}` macro. 34 | models: 35 | duckservability: 36 | # Config indicated by + and applies to all files under models/example/ 37 | example: 38 | +materialized: view 39 | -------------------------------------------------------------------------------- /example-queries/top-cpu-time-processes.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT 3 | comm, cpu.pid, round((cputime_ns * 1e-6), 2) AS cputime_ms 4 | FROM processes_schedstat AS cpu 5 | LEFT JOIN processes_comm AS comm 6 | ON cpu.pid = comm.pid 7 | ORDER BY cputime_ns DESC 8 | LIMIT 10; 9 | -------------------------------------------------------------------------------- /example-queries/top-io-processes.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT comm, io.pid, round(((read_bytes + write_bytes) * 1e-6), 2) AS total_bytes_mb 3 | FROM processes_io AS io 4 | LEFT JOIN processes_comm AS comm 5 | ON io.pid = comm.pid 6 | ORDER BY total_bytes_mb DESC 7 | LIMIT 10; 8 | -------------------------------------------------------------------------------- /images/process-most-io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/images/process-most-io.png -------------------------------------------------------------------------------- /images/processes-most-cpu-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MarkRoddy/duckservability/e4fe9110b9d229fbba5e30056905eea690f34e58/images/processes-most-cpu-time.png -------------------------------------------------------------------------------- /models/etc/etc_passwd.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | * 5 | FROM read_csv_auto('/etc/passwd', header=False, delim=':', columns = { 'Username': 'VARCHAR', 'Password': 'VARCHAR', 'UserID': 'INT', 'GroupID': 'INT', 'Comment': 'VARCHAR', 'HomeDir': 'VARCHAR', 'Shell': 'VARCHAR' }) 6 | -------------------------------------------------------------------------------- /models/proc/processes/docs.txt: -------------------------------------------------------------------------------- 1 | 2 | arch_status 3 | # https://www.phoronix.com/news/Linux-Proc-PID-Arch-Status 4 | 5 | 6 | attr 7 | # https://man7.org/linux/man-pages/man5/proc.5.html 8 | # A directory containing files detailing SELinux info about a process 9 | 10 | 11 | autogroup 12 | # https://man7.org/linux/man-pages/man7/sched.7.html 13 | # The kernel scheduler groups related tasks via parent 14 | # lineage and uses this in making scheduling decisions. 15 | # The example given is that all processes in a parrallel 16 | # build via Make are given the same group, so they can't 17 | # overload interactive desktop applications. This file 18 | # contains the processe's group ID as well as the groups nice value. 19 | 20 | 21 | auxv 22 | # https://man7.org/linux/man-pages/man5/proc.5.html 23 | # This contains the contents of the ELF interpreter 24 | # information passed to the process at exec time. The 25 | # format is one unsigned long ID plus one unsigned long 26 | # value for each entry. The last entry contains two zeros. 27 | # See also getauxval(3). 28 | 29 | cgroup 30 | # https://man7.org/linux/man-pages/man7/cgroups.7.html 31 | # This file describes control groups to which the process 32 | # with the corresponding PID belongs. The displayed 33 | # information differs for cgroups version 1 and version 2 34 | # hierarchies. 35 | 36 | clear_refs 37 | # https://man7.org/linux/man-pages/man5/proc.5.html 38 | # This is a write-only file, writable only by owner of the process. 39 | 40 | 41 | cmdline 42 | # https://man7.org/linux/man-pages/man5/proc.5.html 43 | # This read-only file holds the complete command line for 44 | the process, unless the process is a zombie. In the 45 | latter case, there is nothing in this file: that is, a 46 | read on this file will return 0 characters. The command- 47 | line arguments appear in this file as a set of strings 48 | separated by null bytes ('\0'), with a further null byte 49 | after the last string. 50 | 51 | comm 52 | https://man7.org/linux/man-pages/man5/proc.5.html 53 | This file exposes the process's comm value—that is, the 54 | command name associated with the process. Different 55 | threads in the same process may have different comm 56 | values, accessible via /proc/[pid]/task/[tid]/comm. A 57 | thread may modify its comm value, or that of any of other 58 | thread in the same thread group (see the discussion of 59 | CLONE_THREAD in clone(2)), by writing to the file 60 | /proc/self/task/[tid]/comm. Strings longer than 61 | TASK_COMM_LEN (16) characters (including the terminating 62 | null byte) are silently truncated. 63 | 64 | 65 | 66 | coredump_filter 67 | https://man7.org/linux/man-pages/man5/core.5.html 68 | /proc/[pid]/coredump_filter file can be used to control which 69 | memory segments are written to the core dump file in the event 70 | that a core dump is performed for the process with the 71 | corresponding process ID. 72 | The value in the file is a bit mask of memory mapping types (see 73 | mmap(2)). If a bit is set in the mask, then memory mappings of 74 | the corresponding type are dumped; otherwise they are not dumped. 75 | 76 | 77 | cpu_resctrl_groups 78 | https://patchwork.kernel.org/project/linux-fsdevel/patch/20200110070608.18902-1-yu.c.chen@intel.com/ 79 | Monitoring tools that want to find out which resctrl control 80 | and monitor groups a task belongs to must currently read 81 | the "tasks" file in every group until they locate the process 82 | ID. 83 | Add an additional file /proc/{pid}/cpu_resctrl to provide this 84 | information. 85 | 86 | 87 | cpuset 88 | https://man7.org/linux/man-pages/man7/cpuset.7.html 89 | each process has a pseudo-file, /proc//cpuset, 90 | that displays the path of the process's cpuset directory relative 91 | to the root of the cpuset filesystem. 92 | 93 | cwd 94 | https://man7.org/linux/man-pages/man5/proc.5.html 95 | This is a symbolic link to the current working directory of the process. 96 | 97 | 98 | environ 99 | https://man7.org/linux/man-pages/man5/proc.5.html 100 | This file contains the initial environment that was set 101 | when the currently executing program was started via 102 | execve(2). The entries are separated by null bytes 103 | ('\0'), and there may be a null byte at the end. 104 | 105 | exe 106 | this file is a symbolic link 107 | containing the actual pathname of the executed command. 108 | https://man7.org/linux/man-pages/man5/proc.5.html 109 | 110 | fd 111 | https://man7.org/linux/man-pages/man5/proc.5.html 112 | This is a subdirectory containing one entry for each file 113 | which the process has open, named by its file descriptor, 114 | and which is a symbolic link to the actual file. Thus, 0 115 | is standard input, 1 standard output, 2 standard error, and so on. 116 | 117 | 118 | fdinfo 119 | https://man7.org/linux/man-pages/man5/proc.5.html 120 | This is a subdirectory containing one entry for each file 121 | which the process has open, named by its file descriptor. 122 | The files in this directory are readable only by the owner 123 | of the process. The contents of each file can be read to 124 | obtain information about the corresponding file 125 | descriptor. The content depends on the type of file 126 | referred to by the corresponding file descriptor. 127 | For regular files and directories, we see something like: 128 | $ cat /proc/12015/fdinfo/4 129 | pos: 1000 130 | flags: 01002002 131 | mnt_id: 21 132 | (there are a *lot* of potential values that could be included here, they vary on the file type) 133 | 134 | 135 | gid_map 136 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html 137 | User and group ID mappings: uid_map and gid_map 138 | When a user namespace is created, it starts out without a mapping 139 | of user IDs (group IDs) to the parent user namespace. The 140 | /proc/[pid]/uid_map and /proc/[pid]/gid_map files (available 141 | since Linux 3.5) expose the mappings for user and group IDs 142 | inside the user namespace for the process pid. These files can 143 | be read to view the mappings in a user namespace and written to 144 | (once) to define the mappings. 145 | 146 | io 147 | https://man7.org/linux/man-pages/man5/proc.5.html 148 | This file contains I/O statistics for the process, for 149 | example: 150 | # cat /proc/3828/io 151 | rchar: 323934931 152 | wchar: 323929600 153 | syscr: 632687 154 | syscw: 632675 155 | read_bytes: 0 156 | write_bytes: 323932160 157 | cancelled_write_bytes: 0 158 | 159 | 160 | 161 | limits 162 | https://man7.org/linux/man-pages/man5/proc.5.html 163 | This file displays the soft limit, hard limit, and units 164 | of measurement for each of the process's resource limits 165 | 166 | 167 | loginuid 168 | https://www.kernel.org/doc/Documentation/ABI/stable/procfs-audit_loginuid 169 | The /proc/$pid/sessionid pseudofile is read to get the 170 | audit login session ID of process $pid as a decimal 171 | unsigned int (%u, u32). It is set automatically, 172 | serially assigned with each new login. 173 | 174 | 175 | map_files 176 | https://man7.org/linux/man-pages/man5/proc.5.html 177 | This subdirectory contains entries corresponding to 178 | memory-mapped files (see mmap(2)). Entries are named by 179 | memory region start and end address pair (expressed as 180 | hexadecimal numbers), and are symbolic links to the mapped 181 | files themselves. 182 | 183 | maps 184 | https://man7.org/linux/man-pages/man5/proc.5.html 185 | A file containing the currently mapped memory regions and 186 | their access permissions. 187 | 188 | mem 189 | https://man7.org/linux/man-pages/man5/proc.5.html 190 | This file can be used to access the pages of a process's 191 | memory through open(2), read(2), and lseek(2). 192 | 193 | 194 | mountinfo 195 | https://man7.org/linux/man-pages/man5/proc.5.html 196 | This file contains information about mounts in the 197 | process's mount namespace (see mount_namespaces(7)). It 198 | supplies various information (e.g., propagation state, 199 | root of mount for bind mounts, identifier for each mount 200 | and its parent) that is missing from the (older) 201 | /proc/[pid]/mounts file, and fixes various other problems 202 | with that file (e.g., nonextensibility, failure to 203 | distinguish per-mount versus per-superblock options). 204 | 205 | mounts 206 | https://man7.org/linux/man-pages/man5/proc.5.html 207 | This file lists all the filesystems currently mounted in 208 | the process's mount namespace (see mount_namespaces(7)). 209 | 210 | 211 | mountstats 212 | https://man7.org/linux/man-pages/man5/proc.5.html 213 | This file exports information (statistics, configuration 214 | information) about the mounts in the process's mount 215 | namespace (....) Currently (as at Linux 2.6.26), only NFS filesystems 216 | export statistics information via this field. 217 | 218 | net 219 | Directory containging a *lot* of hard to immediately decypher stats. Potentially very 220 | useful but hard to tease out what's important w/o more than a casuaul glance. 221 | https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/deployment_guide/s2-proc-dir-net 222 | 223 | 224 | ns 225 | https://man7.org/linux/man-pages/man5/proc.5.html 226 | This is a subdirectory containing one entry for each 227 | namespace that supports being manipulated by setns(2). 228 | 229 | numa_maps 230 | https://man7.org/linux/man-pages/man5/proc.5.html 231 | This file displays information about a process's NUMA memory 232 | policy and allocation. 233 | 234 | oom_adj 235 | https://man7.org/linux/man-pages/man5/proc.5.html 236 | This file can be used to adjust the score used to select 237 | which process should be killed in an out-of-memory (OOM) 238 | situation. 239 | 240 | oom_score 241 | https://man7.org/linux/man-pages/man5/proc.5.html 242 | This file displays the current score that the kernel gives 243 | to this process for the purpose of selecting a process for 244 | the OOM-killer. 245 | 246 | 247 | oom_score_adj 248 | https://man7.org/linux/man-pages/man5/proc.5.html 249 | This file can be used to adjust the badness heuristic used 250 | to select which process gets killed in out-of-memory 251 | conditions. 252 | 253 | pagemap 254 | https://man7.org/linux/man-pages/man5/proc.5.html 255 | This file shows the mapping of each of the process's 256 | virtual pages into physical page frames or swap area. 257 | 258 | 259 | patch_state 260 | Not finding any clear documentation. 261 | 262 | 263 | personality 264 | https://man7.org/linux/man-pages/man5/proc.5.html 265 | This read-only file exposes the process's execution 266 | domain, as set by personality(2). The value is displayed 267 | in hexadecimal notation. 268 | 269 | 270 | 271 | projid_map 272 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html 273 | Similarly to user and group ID mappings, it is possible to create 274 | project ID mappings for a user namespace. (Project IDs are used 275 | for disk quotas; see setquota(8) and quotactl(2).) 276 | Project ID mappings are defined by writing to the 277 | /proc/[pid]/projid_map file (present since Linux 3.7). 278 | 279 | 280 | 281 | root -> / 282 | https://man7.org/linux/man-pages/man5/proc.5.html 283 | UNIX and Linux support the idea of a per-process root of 284 | the filesystem, set by the chroot(2) system call. This 285 | file is a symbolic link that points to the process's root 286 | directory, and behaves in the same way as exe, and fd/*. 287 | 288 | sched 289 | # Little formal docuemntation: 290 | https://lwn.net/Articles/242900/ 291 | 292 | schedstat 293 | https://docs.kernel.org/scheduler/sched-stats.html 294 | schedstats also adds a new /proc//schedstat file to include some of the same information on a per-process level. There are three fields in this file correlating for that process to: 295 | time spent on the cpu (in nanoseconds) 296 | time spent waiting on a runqueue (in nanoseconds) 297 | # of timeslices run on this cpu 298 | 299 | sessionid 300 | https://patchwork.kernel.org/project/linux-audit/patch/0e77d290bb50232d9ec9317645106f1330bd2d54.1616008065.git.rgb@redhat.com/ 301 | Not much documentation, but maybe included in stat output? 302 | 303 | setgroups 304 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html 305 | The /proc/[pid]/setgroups file displays the string "allow" if 306 | processes in the user namespace that contains the process pid are 307 | permitted to employ the setgroups(2) system call; 308 | 309 | 310 | 311 | smaps 312 | https://man7.org/linux/man-pages/man5/proc.5.html 313 | This file shows memory consumption for each of the 314 | process's mappings. 315 | 316 | smaps_rollup 317 | https://www.kernel.org/doc/Documentation/ABI/testing/procfs-smaps_rollup 318 | This file provides pre-summed memory information for a 319 | process. The format is almost identical to /proc/pid/smaps, 320 | except instead of an entry for each VMA in a process, 321 | smaps_rollup has a single entry (tagged "[rollup]") 322 | for which each field is the sum of the corresponding 323 | fields from all the maps in /proc/pid/smaps. 324 | 325 | stack 326 | https://man7.org/linux/man-pages/man5/proc.5.html 327 | This file provides a symbolic trace of the function calls 328 | in this process's kernel stack. 329 | 330 | stat (potentially *very* useful) 331 | https://man7.org/linux/man-pages/man5/proc.5.html 332 | Status information about the process. This is used by ps(1). 333 | 334 | statm 335 | https://man7.org/linux/man-pages/man5/proc.5.html 336 | Provides information about memory usage, measured in 337 | pages. 338 | 339 | status (same as stat but includes named fields) 340 | https://man7.org/linux/man-pages/man5/proc.5.html 341 | Provides much of the information in /proc/[pid]/stat and 342 | /proc/[pid]/statm in a format that's easier for humans to 343 | parse. 344 | 345 | 346 | syscall 347 | https://man7.org/linux/man-pages/man5/proc.5.html 348 | (same as stat but includes named fields) 349 | This file exposes the system call number and argument 350 | registers for the system call currently being executed by 351 | the process, followed by the values of the stack pointer 352 | and program counter registers. The values of all six 353 | argument registers are exposed, although most system calls 354 | use fewer registers. 355 | 356 | task 357 | https://man7.org/linux/man-pages/man5/proc.5.html 358 | This is a directory that contains one subdirectory for 359 | each thread in the process. The name of each subdirectory 360 | is the numerical thread ID ([tid]) of the thread (see 361 | gettid(2)). 362 | Within each of these subdirectories, there is a set of 363 | files with the same names and contents as under the 364 | /proc/[pid] directories. 365 | 366 | 367 | timens_offsets 368 | https://man.archlinux.org/man/time_namespaces.7.en 369 | Associated with each time namespace are offsets, expressed with respect to the initial time namespace, that define the values of the monotonic and boot-time clocks in that namespace. These offsets are exposed via the file /proc/PID/timens_offsets. Within this file, the offsets are expressed as lines consisting of three space-delimited fields: 370 | 371 | 372 | 373 | timers 374 | https://man7.org/linux/man-pages/man5/proc.5.html 375 | /proc/[pid]/timers (since Linux 3.10) 376 | A list of the POSIX timers for this process. Each timer 377 | is listed with a line that starts with the string "ID:". 378 | 379 | 380 | timerslack_ns 381 | https://man7.org/linux/man-pages/man5/proc.5.html 382 | This file exposes the process's "current" timer slack 383 | value, expressed in nanoseconds. The file is writable, 384 | allowing the process's timer slack value to be changed. 385 | 386 | uid_map 387 | https://man7.org/linux/man-pages/man7/user_namespaces.7.html 388 | When a user namespace is created, it starts out without a mapping 389 | of user IDs (group IDs) to the parent user namespace. The 390 | /proc/[pid]/uid_map and /proc/[pid]/gid_map files (available 391 | since Linux 3.5) expose the mappings for user and group IDs 392 | inside the user namespace for the process pid. 393 | 394 | wchan 395 | https://man7.org/linux/man-pages/man5/proc.5.html 396 | The symbolic name corresponding to the location in the 397 | kernel where the process is sleeping. 398 | -------------------------------------------------------------------------------- /models/proc/processes/processes_autogroup.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | str_split(filename, '/')[3] AS PID, 5 | task_group, 6 | nice 7 | FROM read_csv('/proc/[0-9]*/autogroup', header=False, filename=true, delim=' ', 8 | columns={task_group: 'VARCHAR', huh: 'VARCHAR', nice: 'INT'}) 9 | ORDER BY PID ASC 10 | -------------------------------------------------------------------------------- /models/proc/processes/processes_cgroup.sql: -------------------------------------------------------------------------------- 1 | -- hierarchy-ID:controller-list:cgroup-path 2 | 3 | 4 | 5 | SELECT 6 | str_split(filename, '/')[3] AS PID, 7 | hierarchy_id, 8 | controller_list, 9 | cgroup_path 10 | FROM read_csv('/proc/[0-9]*/cgroup', header=False, filename=true, delim=':', 11 | columns={hierarchy_id: 'VARCHAR', controller_list: 'VARCHAR', cgroup_path: 'VARCHAR'}) 12 | ORDER BY PID ASC 13 | 14 | -------------------------------------------------------------------------------- /models/proc/processes/processes_cmdline.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | str_split(filename, '/')[3] AS PID, 5 | trim(replace(column0, chr(0), ' ')) AS cmdline 6 | FROM read_csv('/proc/[0-9]*/cmdline', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'}) 7 | ORDER BY PID ASC 8 | -------------------------------------------------------------------------------- /models/proc/processes/processes_comm.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | str_split(filename, '/')[3] AS PID, 5 | trim(replace(column0, chr(0), ' ')) AS comm 6 | FROM read_csv('/proc/[0-9]*/comm', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'}) 7 | ORDER BY PID ASC 8 | -------------------------------------------------------------------------------- /models/proc/processes/processes_environment.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | str_split(filename, '/')[3] AS PID, 5 | env_pair[1] AS Name, 6 | env_pair[2] AS Value 7 | FROM ( 8 | SELECT 9 | filename, 10 | str_split(unnest(str_split(column0, chr(0))), '=') AS env_pair 11 | FROM read_csv('/proc/[0-9]*/environ', header=False, filename=true, delim='\0', columns={column0: 'VARCHAR'}) 12 | ) 13 | WHERE Name != '' 14 | ORDER BY PID, Name 15 | 16 | -------------------------------------------------------------------------------- /models/proc/processes/processes_fdinfo.sql: -------------------------------------------------------------------------------- 1 | 2 | /* Note! Due to a race condition between DuckDB performs the glob operation and 3 | when the underlying process closes the file, it is likely queries to this model 4 | would fail because a file at glob time does not exist at file open time. To get 5 | around this we limit the FD glob pattern to [0-2], that way it will only match 6 | stdin, stdout, and stderr. Which... in theory are stable? Note that these could 7 | still go away in theory, but this has not yet been observed in practicce. 8 | */ 9 | SELECT 10 | str_split(filename, '/')[3] AS PID, 11 | str_split(filename, '/')[6] AS fd, 12 | str_split(row, ':')[1] AS tag, 13 | /* Note you may be tempted to convert these to ints, non-int 14 | values are legal here even if you don't see them! If you're 15 | going to do so, you'll need to find a way to filter on *type* 16 | of file the fd refers to, as the 'value' field changes on this. */ 17 | trim(array_slice(row, instr(row, ':')+1, null)) AS value 18 | FROM read_csv('/proc/[0-9]*/fdinfo/[0-2]', header=False, filename=true, delim='\0', 19 | columns={row: 'VARCHAR'}) 20 | ORDER BY PID ASC 21 | -------------------------------------------------------------------------------- /models/proc/processes/processes_io.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | SELECT 4 | str_split(filename, '/')[3] AS PID, 5 | -- Pivot each of our name/value pairs to columns 6 | first(value) FILTER (WHERE tag = 'rchar') AS 'rchar', 7 | first(value) FILTER (WHERE tag = 'wchar') AS 'wchar', 8 | first(value) FILTER (WHERE tag = 'syscr') AS 'syscr', 9 | first(value) FILTER (WHERE tag = 'syscw') AS 'syscw', 10 | first(value) FILTER (WHERE tag = 'read_bytes') AS 'read_bytes', 11 | first(value) FILTER (WHERE tag = 'write_bytes') AS 'write_bytes', 12 | first(value) FILTER (WHERE tag = 'cancelled_write_bytes') AS 'cancelled_write_bytes', 13 | FROM read_csv('/proc/[0-9]*/io', header=False, filename=true, delim=':', 14 | columns={tag: 'VARCHAR', value: 'int64'}) 15 | GROUP BY PID 16 | ORDER BY PID ASC 17 | -------------------------------------------------------------------------------- /models/proc/processes/processes_limits.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Limit Soft Limit Hard Limit Units 3 | Max cpu time unlimited unlimited seconds 4 | Max file size unlimited unlimited bytes 5 | Max data size unlimited unlimited bytes 6 | Max stack size 8388608 unlimited bytes 7 | Max core file size 0 unlimited bytes 8 | Max resident set unlimited unlimited bytes 9 | Max processes 15630 15630 processes 10 | Max open files 1048576 1048576 files 11 | Max locked memory 67108864 67108864 bytes 12 | Max address space unlimited unlimited bytes 13 | Max file locks unlimited unlimited locks 14 | Max pending signals 15630 15630 signals 15 | Max msgqueue size 819200 819200 bytes 16 | Max nice priority 0 0 17 | Max realtime priority 0 0 18 | Max realtime timeout unlimited unlimited us 19 | */ 20 | 21 | 22 | SELECT 23 | str_split(filename, '/')[3] AS PID, 24 | trim(row[:26]) AS limit_name, 25 | trim(row[26:47]) AS soft_limit, 26 | trim(row[47:68]) AS hard_limit, 27 | trim(row[68:]) AS units 28 | FROM read_csv('/proc/[0-9]*/limits', header=False, filename=true, skip=1, delim=chr(0), 29 | columns={row: 'VARCHAR'}) 30 | ORDER BY PID ASC 31 | 32 | -------------------------------------------------------------------------------- /models/proc/processes/processes_schedstat.sql: -------------------------------------------------------------------------------- 1 | /* 2 | $ cat /proc/45399/schedstat 3 | 189497057 49271715 227 4 | 5 | */ 6 | 7 | 8 | SELECT 9 | str_split(filename, '/')[3] AS PID, 10 | cputime_ns, 11 | runqueue_ns, 12 | num_timeslices, 13 | FROM read_csv('/proc/[0-9]*/schedstat', header=False, filename=true, delim=' ', 14 | columns={cputime_ns: 'HUGEINT', runqueue_ns: 'HUGEINT', num_timeslices: 'HUGEINT'}) 15 | ORDER BY PID ASC 16 | -------------------------------------------------------------------------------- /models/proc/processes/processes_status.sql: -------------------------------------------------------------------------------- 1 | /* 2 | $ cat /proc/45399/status 3 | Name: systemd 4 | Umask: 0002 5 | State: S (sleeping) 6 | Tgid: 45399 7 | Ngid: 0 8 | Pid: 45399 9 | PPid: 1 10 | TracerPid: 0 11 | Uid: 4000 4000 4000 4000 12 | Gid: 4000 4000 4000 4000 13 | FDSize: 256 14 | ..... 15 | */ 16 | 17 | 18 | SELECT 19 | str_split(filename, '/')[3] AS PID, 20 | /* Note that some of the 'value' fields have ':' chars in them. That's 21 | why we manually parse the tag/value in the select statement, and not 22 | via the read_csv() function. 23 | */ 24 | trim(str_split(row, ':')[1]) AS tag, 25 | trim(array_slice(row, instr(row, ':')+1, null)) AS value, 26 | FROM read_csv('/proc/[0-9]*/status', header=False, filename=true, delim=chr(0), 27 | columns={row: 'VARCHAR'}) 28 | ORDER BY PID, tag ASC 29 | -------------------------------------------------------------------------------- /profiles.yml: -------------------------------------------------------------------------------- 1 | duckservability: 2 | outputs: 3 | dev: 4 | type: duckdb 5 | path: ./build/duckservability.duckdb 6 | target: dev 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | agate==1.7.0 2 | attrs==22.2.0 3 | Babel==2.12.1 4 | betterproto==1.2.5 5 | certifi==2022.12.7 6 | cffi==1.15.1 7 | charset-normalizer==3.1.0 8 | click==8.1.3 9 | colorama==0.4.6 10 | dbt-core==1.4.4 11 | dbt-duckdb==1.4.0 12 | dbt-extractor==0.4.1 13 | duckdb==0.7.1 14 | future==0.18.3 15 | grpclib==0.4.3 16 | h2==4.1.0 17 | hologram==0.0.15 18 | hpack==4.0.0 19 | hyperframe==6.0.1 20 | idna==3.4 21 | isodate==0.6.1 22 | Jinja2==3.1.2 23 | jsonschema==3.2.0 24 | leather==0.3.4 25 | Logbook==1.5.3 26 | MarkupSafe==2.1.2 27 | mashumaro==3.3.1 28 | minimal-snowplow-tracker==0.0.2 29 | msgpack==1.0.4 30 | multidict==6.0.4 31 | networkx==2.8.8 32 | packaging==23.0 33 | parsedatetime==2.4 34 | pathspec==0.10.3 35 | pycparser==2.21 36 | pyrsistent==0.19.3 37 | python-dateutil==2.8.2 38 | python-slugify==8.0.1 39 | pytimeparse==1.1.8 40 | pytz==2022.7.1 41 | PyYAML==6.0 42 | requests==2.28.2 43 | six==1.16.0 44 | sqlparse==0.4.3 45 | stringcase==1.2.0 46 | text-unidecode==1.3 47 | typing-extensions==4.5.0 48 | urllib3==1.26.14 49 | Werkzeug==2.2.3 50 | --------------------------------------------------------------------------------