├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── pr.yaml
    │   ├── release.yaml
    │   └── subcharts.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── chart
    ├── Chart.lock
    ├── Chart.yaml
    ├── charts
    │   ├── grafana-6.48.0.tgz
    │   └── victoria-metrics-single-0.8.48.tgz
    ├── dashboard.json
    ├── templates
    │   ├── _helpers.tpl
    │   ├── daemonset.yaml
    │   ├── grafana
    │   │   └── dashboards.yaml
    │   └── rbac
    │   │   ├── psp.yaml
    │   │   ├── role.yaml
    │   │   ├── rolebinding.yaml
    │   │   └── serviceaccount.yaml
    └── values.yaml
├── cmd
    └── caretta
    │   └── caretta.go
├── go.mod
├── go.sum
├── images
    ├── caretta.gif
    ├── logo.svg
    └── screenshot.png
├── pkg
    ├── caretta
    │   ├── caretta.go
    │   ├── config.go
    │   ├── ebpf_map.go
    │   ├── links_tracer.go
    │   ├── links_tracer_test.go
    │   └── types.go
    ├── k8s
    │   ├── ipresolver.go
    │   └── ipresolver_test.go
    ├── metrics
    │   └── prometheus.go
    └── tracing
    │   ├── ebpf
    │       ├── arm_support.h
    │       ├── caretta.bpf.c
    │       ├── core_structures.h
    │       ├── ebpf_internal_types.h
    │       ├── ebpf_utils.h
    │       └── epbf_shared_types.h
    │   └── probes.go
└── scripts
    └── build
        └── download_libbpf_headers.sh


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Environment (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Kubernetes cluster information - distribution, version
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/pr.yaml:
--------------------------------------------------------------------------------
 1 | name: pr
 2 | 
 3 | on:
 4 |  pull_request:
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 |     permissions:
10 |       contents: write
11 |       id-token: write
12 |     steps:
13 |       -
14 |         name: Checkout
15 |         uses: actions/checkout@v3
16 |       - 
17 |         name: Set Up QEMU
18 |         uses: docker/setup-qemu-action@v3
19 |         with:
20 |           platforms: arm64
21 |       -
22 |         name: Set up Docker Buildx
23 |         uses: docker/setup-buildx-action@v3
24 |       -
25 |         name: Build Docker Image
26 |         uses: docker/build-push-action@v3
27 |         with:
28 |           context: .
29 |           push: false
30 |           cache-from: type=gha
31 |           cache-to: type=gha,mode=max
32 |           tags: caretta
33 |           platforms: linux/amd64,linux/arm64
34 | 
35 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |       id-token: write
14 |     steps:
15 |       -
16 |         name: Checkout
17 |         uses: actions/checkout@v3
18 |       - 
19 |         name: Set Up QEMU
20 |         uses: docker/setup-qemu-action@v3
21 |         with:
22 |           platforms: arm64
23 |       -
24 |         name: Set up Docker Buildx
25 |         uses: docker/setup-buildx-action@v3
26 |       -
27 |         name: Login to Quay.io
28 |         uses: docker/login-action@v2
29 |         with:
30 |           registry: quay.io
31 |           username: ${{ secrets.QUAY_USERNAME }}
32 |           password: ${{ secrets.QUAY_ROBOT_TOKEN }}
33 |       -
34 |         name: Build & Push Docker Image
35 |         uses: docker/build-push-action@v3
36 |         with:
37 |           context: .
38 |           push: true
39 |           cache-from: type=gha
40 |           cache-to: type=gha,mode=max
41 |           tags: quay.io/groundcover/caretta:${{ github.ref_name	 }}
42 |           platforms: linux/arm64,linux/amd64
43 |       -
44 |         name: Checkout Helm Repo
45 |         uses: actions/checkout@v3
46 |         with:
47 |           path: helm-repo
48 |           repository: groundcover-com/charts
49 |           token: ${{ secrets.HELM_CHARTS_REPO_KEY }}
50 |       -
51 |         name: Publish Chart
52 |         working-directory: helm-repo
53 |         env:
54 |           GITHUB_TAG: ${{ github.ref_name	 }}
55 |         run: |
56 |           version=${GITHUB_TAG#v}
57 |           helm lint ../chart
58 |           helm package --version ${version} --app-version ${GITHUB_TAG} ../chart
59 |           helm repo index --url https://helm.groundcover.com .
60 |           git config user.name "ci-groundcover"
61 |           git config user.email "ci@groundcover.com"
62 |           git add .
63 |           git commit -m "Added caretta ${version} chart"
64 |           git push
65 | 


--------------------------------------------------------------------------------
/.github/workflows/subcharts.yaml:
--------------------------------------------------------------------------------
 1 | name: subcharts-images
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'main'
 7 |     paths:
 8 |       - 'chart/charts/**'
 9 |       - '.github/workflows/subcharts.yaml'
10 | 
11 | defaults:
12 |  run:
13 |   working-directory: chart/charts
14 | 
15 | jobs:
16 |   subchart-images:
17 |     runs-on: ubuntu-latest
18 |     permissions:
19 |       contents: write
20 |       id-token: write
21 |     steps:
22 |       -
23 |         name: Checkout
24 |         uses: actions/checkout@v3
25 |       -
26 |         name: Login to Quay.io
27 |         uses: docker/login-action@v2
28 |         with:
29 |           registry: quay.io
30 |           username: ${{ secrets.QUAY_USERNAME }}
31 |           password: ${{ secrets.QUAY_ROBOT_TOKEN }}
32 |       -
33 |         name: Set up Docker Buildx
34 |         uses: docker/setup-buildx-action@v2
35 |       -
36 |         name: Push Grafana Image
37 |         run: |
38 |           IMAGE_TAG=$(helm show chart grafana* | yq e '.appVersion' -)
39 |           docker buildx imagetools create grafana/grafana:${IMAGE_TAG} --tag quay.io/groundcover/grafana:${IMAGE_TAG}
40 |       -
41 |         name: Push Victoria-Metrics Image
42 |         run: |
43 |           IMAGE_TAG=v$(helm show chart victoria-metrics* | yq e '.appVersion' -)
44 |           docker buildx imagetools create victoriametrics/victoria-metrics:${IMAGE_TAG}  --tag quay.io/groundcover/victoria-metrics:${IMAGE_TAG}
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you prefer the allow list template instead of the deny list, see community template:
 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 3 | #
 4 | # Binaries for programs and plugins
 5 | *.exe
 6 | *.exe~
 7 | *.dll
 8 | *.so
 9 | *.dylib
10 | *.o
11 | 
12 | # Test binary, built with `go test -c`
13 | *.test
14 | 
15 | # Output of the go coverage tool, specifically when used with LiteIDE
16 | *.out
17 | 
18 | # Dependency directories (remove the comment below to include it)
19 | # vendor/
20 | 
21 | # Go workspace file
22 | go.work
23 | 
24 | 
25 | # autogenerated by bpf2go
26 | *_bpfel_*.go
27 | 
28 | # binary output
29 | bin/
30 | vendor/
31 | 
32 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | info@groundcover.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM quay.io/cilium/ebpf-builder:1648566014 AS builder
 2 | ARG TARGETARCH
 3 | ARG TARGETPLATFORM
 4 | RUN echo "Building for $TARGETARCH"
 5 | RUN echo "Building for $TARGETPLATFORM"
 6 | WORKDIR /build
 7 | COPY . /build/
 8 | RUN make build ARCH=$TARGETARCH
 9 | 
10 | FROM alpine:3.17
11 | 
12 | WORKDIR /app
13 | COPY --from=builder build/bin/caretta ./
14 | 
15 | VOLUME /sys/kernel/debug
16 | 
17 | CMD ./caretta


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | BIN_DIR:=bin
 2 | BINARY_PATH:=${BIN_DIR}/caretta
 3 | DOCKER_BIN:=docker
 4 | BPF2GO_BINARY := ${BIN_DIR}/bpf2go
 5 | BPF2GO_VERSION := 0.9.0
 6 | REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 7 | UIDGID := $(shell stat -c '%u:%g' ${REPODIR})
 8 | PROJECT_DIRNAME := $(shell basename ${REPODIR})
 9 | CILIUM_EBPF_DIRECTORY := /tmp/cilium-ebpf
10 | BUILD_SCRIPTS_DIRECTORY=scripts/build
11 | BPF_CLANG := clang-14
12 | INCLUDE_C_FLAGS := -I/tmp/caretta_extra/libbpf_headers -I/tmp/${PROJECT_DIRNAME}/
13 | BPF_CFLAGS := -O2 -g -Wall -Werror -fdebug-prefix-map=/ebpf=. ${INCLUDE_C_FLAGS}
14 | IMAGE=quay.io/cilium/ebpf-builder
15 | VERSION=1648566014
16 | 
17 | ARCH=amd64 # amd64 or arm64
18 | 
19 | .PHONY: build
20 | build: ${BIN_DIR} pkg/tracing/bpf_bpfel_x86.go cmd/caretta/caretta.go
21 | 	GOOS=linux GOARCH=${TARGETARCH} CGO_ENABLED=0 go build -o ${BINARY_PATH} cmd/caretta/caretta.go
22 | 
23 | ${BIN_DIR}:
24 | 	mkdir -p ${BIN_DIR}
25 | 
26 | .PHONY: download_libbpf_headers
27 | download_libbpf_headers: 
28 | 	${REPODIR}/${BUILD_SCRIPTS_DIRECTORY}/download_libbpf_headers.sh
29 | 
30 | .PHONY: generate_ebpf
31 | generate_ebpf: ${BPF2GO_BINARY}_${BPF2GO_VERSION} \
32 | 				download_libbpf_headers
33 | 	go mod vendor
34 | 	(cd ${REPODIR}/pkg/tracing && \
35 | 		GOPACKAGE=tracing ${REPODIR}/${BPF2GO_BINARY}_${BPF2GO_VERSION} \
36 | 		-cc "${BPF_CLANG}" -cflags "${BPF_CFLAGS}"  \
37 | 		-target arm64,amd64 bpf \
38 | 		ebpf/caretta.bpf.c --)
39 | 
40 | ${BPF2GO_BINARY}_${BPF2GO_VERSION}:
41 | 	git clone -q --branch v${BPF2GO_VERSION} https://github.com/cilium/ebpf \
42 | 		${CILIUM_EBPF_DIRECTORY} 2>/dev/null
43 | 	cd ${CILIUM_EBPF_DIRECTORY} && \
44 | 		go build -o ${REPODIR}/${BPF2GO_BINARY}_${BPF2GO_VERSION} ./cmd/bpf2go
45 | 
46 | .PHONY: generate_ebpf_in_docker
47 | generate_ebpf_in_docker: ${BIN_DIR}
48 | 	${DOCKER_BIN} run \
49 | 		-v ${REPODIR}:/tmp/caretta \
50 | 		-w /tmp/${PROJECT_DIRNAME} \
51 | 		--env HOME="/tmp/" \
52 | 		"${IMAGE}:${VERSION}" \
53 | 		${MAKE} generate_ebpf
54 | 
55 | pkg/tracing/bpf_bpfel%.go: pkg/tracing/ebpf/caretta.bpf.c
56 | 	$(MAKE) generate_ebpf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <h1>Caretta</h1>
  3 |   <p align="center">
  4 |     <img src="images/logo.svg" width="25%" alt="caretta" title="caretta" />
  5 |    <br><br>
  6 |    <a href="https://www.producthunt.com/posts/caretta?utm_source=badge-top-post-topic-badge&utm_medium=badge&utm_souce=badge-caretta" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-topic-badge.svg?post_id=373791&theme=neutral&period=weekly&topic_id=267" alt="Caretta - Instant&#0032;K8s&#0032;service&#0032;dependency&#0032;map&#0044;&#0032;right&#0032;to&#0032;your&#0032;Grafana | Product Hunt" width="25%"/></a>
  7 |   </p>
  8 |     <h2>Instant K8s service dependency map, right to your Grafana.</h2>
  9 |     <h4> made by <a href="https://www.groundcover.com">groundcover</h4>
 10 |   <p>
 11 | 
 12 | [![slack](https://img.shields.io/badge/slack-groundcover-yellowgreen.svg?logo=slack)](http://www.groundcover.com/join-slack)
 13 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 14 | <br>
 15 | 
 16 | </div>
 17 | 
 18 | <p align="center">
 19 |     <img src="images/caretta.gif" width="90%" alt="caretta-screenshot" title="caretta-screenshot">
 20 | </p>
 21 | 
 22 | ## What is Caretta?
 23 | 
 24 | Caretta is a lightweight, standalone tool that instantly creates a visual network map of the services running in your cluster.
 25 | 
 26 | Carreta leverages eBPF to efficiently map all service network interactions in a K8s  cluster, and Grafana to query and visualize the collected data.
 27 | 
 28 | Carreta is built to be efficient, with a minimal footprint on the system, and does not require any modifications of the cluster.
 29 | 
 30 | Caretta demonstrates the power of using eBPF for observability solutions, which is our vision at <a href="https://groundcover.com">groundcover</a>. If you're interested in understanding how Caretta is built, head over to our <a href="https://www.groundcover.com/blog/caretta">Caretta blog post!</a>
 31 | 
 32 | ## Installing Caretta :zap:
 33 | As simple as installing a helm chart. It is recommended to install Caretta in a new, unique namespace.
 34 | ```bash
 35 | helm repo add groundcover https://helm.groundcover.com/
 36 | ```
 37 | ```bash
 38 | helm repo update
 39 | ```
 40 | ```bash
 41 | helm install caretta --namespace caretta --create-namespace groundcover/caretta
 42 | ```
 43 | 
 44 | ### Configuration
 45 | You can configure Caretta using helm values.
 46 | Useful values:
 47 | * **tolerations** can be specified to make sure Caretta's eBPF-agent will run on all cluster in your nodes. *default value will tolerate common control-plane node annotations*
 48 | * **victoria-metrics-single.server.persistentVolume.enabled** can be set to *true* if you wish to save Caretta's metrics to a persistent volume *default: false*
 49 | * **pollIntervalSeconds** can be modified to specify the polling and publishing interval of new metrics from the kernel. *default: 5*
 50 | * The built-in Victoria Metrics and Grafana instances can be disabled by changing the values **victoria-metrics-single.enabled** or **grafana.enabled** to false, accordingly. _default: true_
 51 | * Caretta resolves Kubernetes entities to their owners by default. For example, a pod 'pod1' and another pod 'pod2' both belonging to a deployment 'deployment1' will be resolved to 'deployment1'. This can be disabled by setting **traverseUpHierarchy** to false. _default: true_
 52 | 
 53 | 
 54 | Example yaml for overriding these values:
 55 | ```yaml
 56 | pollIntervalSeconds: 15  # set metrics polling interval
 57 | traverseUpHierarchy: false  # disable resolving kubernetes entities to their owners
 58 | 
 59 | tolerations:             # set any desired tolerations
 60 |   - key: node-role.kubernetes.io/control-plane
 61 |     operator: Exists
 62 |     effect: NoSchedule
 63 | 
 64 |  victoria-metrics-single:
 65 |   server:
 66 |     persistentVolume:
 67 |        enabled: true   # set to true to use persistent volume
 68 | ```
 69 | This can also be done using the --set flag on the `helm install` command.
 70 | 
 71 | ### Uninstallation
 72 | To uninstall, delete the helm release:
 73 | ```bash
 74 | helm delete caretta --namespace caretta
 75 | ```
 76 | Note that if persistent storage was enabled in the installation, it may not be deleted automatically by this command.
 77 | 
 78 | ## Requirements
 79 | * Linux kernel version >= 4.16
 80 | * <a href="https://nakryiko.com/posts/bpf-portability-and-co-re/">CO-RE</a> support. Supported linux distributions can be found <a href="https://github.com/libbpf/libbpf#bpf-co-re-compile-once--run-everywhere">here</a>. Specifically, Docker for Mac uses a distribution which is not currently supported.
 81 | 
 82 | 
 83 | 
 84 | ## Working with Caretta :turtle:
 85 | Caretta's helm chart ships an instance of Grafana with a predefined dashboard using data published by Caretta. This dashboard contains some examples to demonstrate the usage of Caretta's metrics.
 86 | 
 87 | ### Using the provided Grafana instance
 88 | To access Grafana, port-forward port `3000` from the Grafana pod in Caretta's namespace.
 89 | 
 90 | Using *kubectl*, it should look something like this:
 91 | 
 92 | ```bash
 93 | kubectl port-forward --namespace caretta <grafana-pod-name> 3000:3000
 94 | ```
 95 | 
 96 | > **_NOTE:_**  Anonymous mode is enabled, making the default dashboard accessible with no login needed.
 97 | >              To edit the default dashboard or create your own dashboard, use the default administrator's credentials user:`admin` ; password:`caretta`.
 98 | 
 99 | ### Scraping Caretta's metrics
100 | 
101 | Caretta uses [Victoria Metrics](https://victoriametrics.com/) to collect and publish its metrics, and the outcome can be consumed by **any Prometheus-compatible dashboard**.
102 | 
103 | Caretta's main metric is `caretta_links_observed` (Gauge). It uses the following labels to represent a specific connection (network socket) going through the cluster:
104 | * `client_name` - either a name of a kubernetes entity, if resolved, an external domain, if resolved, or an IP address.
105 | * `client_namespace` - either the namespace of the kubernetes entity, or "node", or "external".
106 | * `client_kind` - either the kind of the kubernetes entity, or "node", or "external".
107 | * `server_name` - either a name of a kubernetes entity, if resolved, an external domain, if resolved, or an IP address.
108 | * `server_namespace` - either the namespace of the kubernetes entity, or "node", or "external".
109 | * `server_kind` - either the kind of the kubernetes entity, or "node", or "external".
110 | * `server_port` - the port used by the server.
111 | * `role` - either 1 (client) or 2 (server).
112 | 
113 | Along those labels, Caretta uses other labels for Grafana's Node Graph panel.
114 | 
115 | #### Example metric data
116 | This example shows a connection between a client named `checkoutservice`, controlled by a deployment, to a service named `productioncatalogservice` on port 3550, from the perspective of the client. Total bytes sent by the client in this connection is 2537 bytes.
117 | ```bash
118 | caretta_links_observed{client_id="1074587981",client_kind="Deployment",client_name="checkoutservice",client_namespace="demo-ng",link_id="198768460",role="1",server_id="1112713827",server_kind="Service",server_name="productcatalogservice",server_namespace="demo-ng",server_port="3550"} 2537
119 | ```
120 | 
121 | #### Example queries  :star:
122 | ```bash
123 | increase ((sum (server_port) (caretta_links_observed{client_name="some-client", server_name="some-server}))[15m])
124 | ```
125 | will output the throughput observed between some-client and some-server in the last 15 minutes, aggregated by port.
126 | 
127 | ```bash
128 | sum by (server_name) (rate(caretta_links_observed{client_name="some-client"}))
129 | ```
130 | will output the rate of traffic from some-client to servers it communicates with, aggregated by the server's name.
131 | 
132 | ```bash
133 | sort_desc(increase((sum by (client_name)(caretta_links_observed{server_namespace="external"}))[5m]))
134 | ```
135 | will output communication to external servers by client's name, sorted descending.
136 | 
137 | ## Need help:grey_question:
138 | Feel free to reach us on our <a href="http://www.groundcover.com/join-slack">slack channel</a>, or create an issue in this repository.
139 | 
140 | ## Contribution
141 | Feel free to add your contribution to the project.
142 | 
143 | * Open an issue for missing features, or bugs
144 | * Create a pull request for adding code to the project
145 | 


--------------------------------------------------------------------------------
/chart/Chart.lock:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 | - name: victoria-metrics-single
 3 |   repository: https://victoriametrics.github.io/helm-charts
 4 |   version: 0.8.48
 5 | - name: grafana
 6 |   repository: https://grafana.github.io/helm-charts
 7 |   version: 6.48.0
 8 | digest: sha256:eb7c3b54ae1fef78dae03136bdd7c0e34a3a08a34c147a227e824437a443bccb
 9 | generated: "2022-12-26T10:15:04.518501964Z"
10 | 


--------------------------------------------------------------------------------
/chart/Chart.yaml:
--------------------------------------------------------------------------------
 1 | version: 0.0.1
 2 | apiVersion: v2
 3 | appVersion: v0.0.1
 4 | name: caretta
 5 | description: A helm chart for Caretta service map.
 6 | type: application
 7 | dependencies:
 8 | - name: victoria-metrics-single
 9 |   version: "0.8.48"
10 |   repository: "https://victoriametrics.github.io/helm-charts"
11 |   condition: victoria-metrics-single.enabled
12 | - name: grafana
13 |   version: "6.48.0"
14 |   repository: "https://grafana.github.io/helm-charts"
15 |   condition: grafana.enabled


--------------------------------------------------------------------------------
/chart/charts/grafana-6.48.0.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groundcover-com/caretta/280d1640ce0174b1dfdd7d05bdd104604aa04508/chart/charts/grafana-6.48.0.tgz


--------------------------------------------------------------------------------
/chart/charts/victoria-metrics-single-0.8.48.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groundcover-com/caretta/280d1640ce0174b1dfdd7d05bdd104604aa04508/chart/charts/victoria-metrics-single-0.8.48.tgz


--------------------------------------------------------------------------------
/chart/dashboard.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 15,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "datasource": {
 33 |         "type": "prometheus",
 34 |         "uid": "${DS_PROMETHEUS}"
 35 |       },
 36 |       "description": "",
 37 |       "gridPos": {
 38 |         "h": 24,
 39 |         "w": 17,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 2,
 44 |       "interval": "15s",
 45 |       "options": {
 46 |         "nodes": {
 47 |           "arcs": [
 48 |             {
 49 |               "color": "#5794F2",
 50 |               "field": "arc__color"
 51 |             }
 52 |           ]
 53 |         }
 54 |       },
 55 |       "targets": [
 56 |         {
 57 |           "datasource": {
 58 |             "type": "prometheus",
 59 |             "uid": "${DS_PROMETHEUS}"
 60 |           },
 61 |           "editorMode": "code",
 62 |           "exemplar": false,
 63 |           "expr": "increase((sum by (id, title, subTitle, detail__kind, arc__color) (label_replace((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"server_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"server_namespace\", \"(.*)\"), \"title\", \"$1\", \"server_name\", \"(.*)\"), \"id\", \"$1\", \"server_id\", \"(.*)\") or label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"detail__kind\", \"$1\", \"client_kind\", \"(.*)\"), \"subTitle\", \"$1\", \"client_namespace\", \"(.*)\"), \"title\", \"$1\", \"client_name\", \"(.*)\"), \"id\", \"$1\", \"client_id\", \"(.*)\")  ), \"arc__color\", \"1\", \"link_id\", \"(.*)\")) )[$__range:$__interval])  > 0",
 64 |           "format": "table",
 65 |           "instant": true,
 66 |           "legendFormat": "__auto",
 67 |           "range": false,
 68 |           "refId": "nodes"
 69 |         },
 70 |         {
 71 |           "datasource": {
 72 |             "type": "prometheus",
 73 |             "uid": "${DS_PROMETHEUS}"
 74 |           },
 75 |           "editorMode": "code",
 76 |           "exemplar": false,
 77 |           "expr": "increase((sum by (id, source, target, mainStat) ((label_replace(label_replace(label_replace(label_replace((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"}), \"id\", \"$1\", \"link_id\", \"(.*)\"), \"source\", \"$1\", \"client_id\", \"(.*)\"), \"target\", \"$1\", \"server_id\", \"(.*)\"), \"mainStat\", \"$1\", \"server_port\", \"(.*)\"))) )[$__range:$__interval]) > 0",
 78 |           "format": "table",
 79 |           "hide": false,
 80 |           "instant": true,
 81 |           "legendFormat": "__auto",
 82 |           "range": false,
 83 |           "refId": "edges"
 84 |         }
 85 |       ],
 86 |       "title": "Service Map ☸️",
 87 |       "type": "nodeGraph"
 88 |     },
 89 |     {
 90 |       "datasource": {
 91 |         "type": "prometheus",
 92 |         "uid": "${DS_PROMETHEUS}"
 93 |       },
 94 |       "fieldConfig": {
 95 |         "defaults": {
 96 |           "color": {
 97 |             "fixedColor": "blue",
 98 |             "mode": "fixed"
 99 |           },
100 |           "custom": {
101 |             "hideFrom": {
102 |               "legend": false,
103 |               "tooltip": false,
104 |               "viz": false
105 |             }
106 |           },
107 |           "links": [],
108 |           "mappings": []
109 |         },
110 |         "overrides": []
111 |       },
112 |       "gridPos": {
113 |         "h": 7,
114 |         "w": 4,
115 |         "x": 17,
116 |         "y": 0
117 |       },
118 |       "id": 4,
119 |       "options": {
120 |         "displayLabels": [
121 |           "name"
122 |         ],
123 |         "legend": {
124 |           "displayMode": "list",
125 |           "placement": "right",
126 |           "showLegend": false
127 |         },
128 |         "pieType": "donut",
129 |         "reduceOptions": {
130 |           "calcs": [
131 |             "lastNotNull"
132 |           ],
133 |           "fields": "",
134 |           "values": false
135 |         },
136 |         "tooltip": {
137 |           "mode": "single",
138 |           "sort": "none"
139 |         }
140 |       },
141 |       "targets": [
142 |         {
143 |           "datasource": {
144 |             "type": "prometheus",
145 |             "uid": "${DS_PROMETHEUS}"
146 |           },
147 |           "editorMode": "code",
148 |           "expr": "sum by (server_port) (increase((caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\"})[$__range:$__interval])) > 0",
149 |           "legendFormat": "__auto",
150 |           "range": true,
151 |           "refId": "A"
152 |         }
153 |       ],
154 |       "title": "Active Ports",
155 |       "type": "piechart"
156 |     },
157 |     {
158 |       "datasource": {
159 |         "type": "datasource",
160 |         "uid": "grafana"
161 |       },
162 |       "gridPos": {
163 |         "h": 7,
164 |         "w": 3,
165 |         "x": 21,
166 |         "y": 0
167 |       },
168 |       "id": 10,
169 |       "options": {
170 |         "code": {
171 |           "language": "plaintext",
172 |           "showLineNumbers": false,
173 |           "showMiniMap": false
174 |         },
175 |         "content": "<table style=\"width:100%; height:100%;border:0px solid black;\">\n     <td style=\"text-align: center;vertical-align: middle;border:0px solid black; \">\n<div style=\"text-align: center\">\n<p align=\"center\">\n  <img src=\"https://raw.githubusercontent.com/groundcover-com/caretta/main/images/logo.svg\" width=\"75%\" alt=\"caretta\" title=\"caretta\" />\n  <h4>by <a href=\"https://www.groundcover.com\">groundcover</h4>\n\n  \n  [![slack](https://img.shields.io/badge/slack-groundcover-yellowgreen.svg?logo=slack)](http://www.groundcover.com/join-slack)\n  \n</div>\n</p>\n</div>\n</td>\n</table>\n",
176 |         "mode": "markdown"
177 |       },
178 |       "pluginVersion": "10.1.2",
179 |       "type": "text"
180 |     },
181 |     {
182 |       "datasource": {
183 |         "type": "prometheus",
184 |         "uid": "${DS_PROMETHEUS}"
185 |       },
186 |       "fieldConfig": {
187 |         "defaults": {
188 |           "color": {
189 |             "fixedColor": "purple",
190 |             "mode": "continuous-blues"
191 |           },
192 |           "mappings": [],
193 |           "thresholds": {
194 |             "mode": "absolute",
195 |             "steps": [
196 |               {
197 |                 "color": "green",
198 |                 "value": null
199 |               },
200 |               {
201 |                 "color": "red",
202 |                 "value": 80
203 |               }
204 |             ]
205 |           },
206 |           "unit": "Bps"
207 |         },
208 |         "overrides": []
209 |       },
210 |       "gridPos": {
211 |         "h": 8,
212 |         "w": 7,
213 |         "x": 17,
214 |         "y": 7
215 |       },
216 |       "id": 8,
217 |       "options": {
218 |         "displayMode": "gradient",
219 |         "minVizHeight": 10,
220 |         "minVizWidth": 0,
221 |         "orientation": "horizontal",
222 |         "reduceOptions": {
223 |           "calcs": [
224 |             "lastNotNull"
225 |           ],
226 |           "fields": "",
227 |           "values": false
228 |         },
229 |         "showUnfilled": true,
230 |         "valueMode": "color"
231 |       },
232 |       "pluginVersion": "10.1.2",
233 |       "targets": [
234 |         {
235 |           "datasource": {
236 |             "type": "prometheus",
237 |             "uid": "${DS_PROMETHEUS}"
238 |           },
239 |           "editorMode": "code",
240 |           "exemplar": false,
241 |           "expr": "topk(8, sum by (client_name) ((rate(caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\"}[$__range:$__interval]))))",
242 |           "format": "time_series",
243 |           "instant": true,
244 |           "legendFormat": "__auto",
245 |           "range": false,
246 |           "refId": "A"
247 |         }
248 |       ],
249 |       "title": "Top Throughput Workloads",
250 |       "type": "bargauge"
251 |     },
252 |     {
253 |       "datasource": {
254 |         "type": "prometheus",
255 |         "uid": "${DS_PROMETHEUS}"
256 |       },
257 |       "description": "",
258 |       "fieldConfig": {
259 |         "defaults": {
260 |           "color": {
261 |             "mode": "continuous-blues"
262 |           },
263 |           "mappings": [],
264 |           "thresholds": {
265 |             "mode": "absolute",
266 |             "steps": [
267 |               {
268 |                 "color": "green",
269 |                 "value": null
270 |               },
271 |               {
272 |                 "color": "red",
273 |                 "value": 80
274 |               }
275 |             ]
276 |           },
277 |           "unit": "Bps"
278 |         },
279 |         "overrides": []
280 |       },
281 |       "gridPos": {
282 |         "h": 9,
283 |         "w": 7,
284 |         "x": 17,
285 |         "y": 15
286 |       },
287 |       "id": 6,
288 |       "options": {
289 |         "colorMode": "background",
290 |         "graphMode": "area",
291 |         "justifyMode": "center",
292 |         "orientation": "horizontal",
293 |         "reduceOptions": {
294 |           "calcs": [
295 |             "lastNotNull"
296 |           ],
297 |           "fields": "",
298 |           "values": false
299 |         },
300 |         "text": {},
301 |         "textMode": "auto"
302 |       },
303 |       "pluginVersion": "10.1.2",
304 |       "targets": [
305 |         {
306 |           "datasource": {
307 |             "type": "prometheus",
308 |             "uid": "${DS_PROMETHEUS}"
309 |           },
310 |           "editorMode": "code",
311 |           "exemplar": false,
312 |           "expr": "topk(7, sum by (client_name, server_name) ( rate( (caretta_links_observed{client_namespace=~\"$namespace\", client_kind=~\"$kind\", client_name=~\"$workload\", server_port=~\"$port\", client_kind!~\"(node|external)\",} or caretta_links_observed{server_namespace=~\"$namespace\", server_kind=~\"$kind\", server_name=~\"$workload\", server_port=~\"$port\", server_kind!~\"(node|external)\"})[$__range:$__interval]) ) )",
313 |           "format": "time_series",
314 |           "instant": true,
315 |           "legendFormat": "{{client_name}} ⮂ {{server_name}}",
316 |           "range": false,
317 |           "refId": "A"
318 |         }
319 |       ],
320 |       "title": "Top Throughput Connections",
321 |       "type": "stat"
322 |     }
323 |   ],
324 |   "refresh": "",
325 |   "schemaVersion": 38,
326 |   "style": "dark",
327 |   "tags": [],
328 |   "templating": {
329 |     "list": [
330 |       {
331 |         "current": {
332 |           "selected": false,
333 |           "text": "default",
334 |           "value": "default"
335 |         },
336 |         "hide": 0,
337 |         "includeAll": false,
338 |         "label": "datasource",
339 |         "multi": false,
340 |         "name": "DS_PROMETHEUS",
341 |         "options": [],
342 |         "query": "prometheus",
343 |         "queryValue": "",
344 |         "refresh": 1,
345 |         "regex": "",
346 |         "skipUrlSync": false,
347 |         "type": "datasource"
348 |       },
349 |       {
350 |         "allValue": "(.*)",
351 |         "current": {
352 |           "selected": true,
353 |           "text": [
354 |             "All"
355 |           ],
356 |           "value": [
357 |             "$__all"
358 |           ]
359 |         },
360 |         "datasource": {
361 |           "type": "prometheus",
362 |           "uid": "${DS_PROMETHEUS}"
363 |         },
364 |         "definition": "query_result(caretta_links_observed)",
365 |         "hide": 0,
366 |         "includeAll": true,
367 |         "multi": true,
368 |         "name": "namespace",
369 |         "options": [],
370 |         "query": {
371 |           "query": "query_result(caretta_links_observed)",
372 |           "refId": "StandardVariableQuery"
373 |         },
374 |         "refresh": 1,
375 |         "regex": "/.*_namespace=\"([^\"]*).*/",
376 |         "skipUrlSync": false,
377 |         "sort": 1,
378 |         "type": "query"
379 |       },
380 |       {
381 |         "allValue": "(.*)",
382 |         "current": {
383 |           "selected": true,
384 |           "text": [
385 |             "All"
386 |           ],
387 |           "value": [
388 |             "$__all"
389 |           ]
390 |         },
391 |         "datasource": {
392 |           "type": "prometheus",
393 |           "uid": "${DS_PROMETHEUS}"
394 |         },
395 |         "definition": "query_result(caretta_links_observed)",
396 |         "hide": 0,
397 |         "includeAll": true,
398 |         "multi": true,
399 |         "name": "kind",
400 |         "options": [],
401 |         "query": {
402 |           "query": "query_result(caretta_links_observed)",
403 |           "refId": "StandardVariableQuery"
404 |         },
405 |         "refresh": 1,
406 |         "regex": "/.*_kind=\"([^\"]*).*/",
407 |         "skipUrlSync": false,
408 |         "sort": 0,
409 |         "type": "query"
410 |       },
411 |       {
412 |         "allValue": "(.*)",
413 |         "current": {
414 |           "selected": true,
415 |           "text": [
416 |             "All"
417 |           ],
418 |           "value": [
419 |             "$__all"
420 |           ]
421 |         },
422 |         "datasource": {
423 |           "type": "prometheus",
424 |           "uid": "${DS_PROMETHEUS}"
425 |         },
426 |         "definition": "query_result(caretta_links_observed)",
427 |         "hide": 0,
428 |         "includeAll": true,
429 |         "label": "workload",
430 |         "multi": true,
431 |         "name": "workload",
432 |         "options": [],
433 |         "query": {
434 |           "query": "query_result(caretta_links_observed)",
435 |           "refId": "StandardVariableQuery"
436 |         },
437 |         "refresh": 2,
438 |         "regex": "/.*_name=\"([^\"]*).*/",
439 |         "skipUrlSync": false,
440 |         "sort": 1,
441 |         "type": "query"
442 |       },
443 |       {
444 |         "allValue": "(.*)",
445 |         "current": {
446 |           "selected": true,
447 |           "text": [
448 |             "All"
449 |           ],
450 |           "value": [
451 |             "$__all"
452 |           ]
453 |         },
454 |         "datasource": {
455 |           "type": "prometheus",
456 |           "uid": "${DS_PROMETHEUS}"
457 |         },
458 |         "definition": "label_values(server_port)",
459 |         "hide": 0,
460 |         "includeAll": true,
461 |         "label": "server port",
462 |         "multi": true,
463 |         "name": "port",
464 |         "options": [],
465 |         "query": {
466 |           "query": "label_values(server_port)",
467 |           "refId": "StandardVariableQuery"
468 |         },
469 |         "refresh": 1,
470 |         "regex": "",
471 |         "skipUrlSync": false,
472 |         "sort": 0,
473 |         "type": "query"
474 |       }
475 |     ]
476 |   },
477 |   "time": {
478 |     "from": "now-5m",
479 |     "to": "now"
480 |   },
481 |   "timepicker": {},
482 |   "timezone": "",
483 |   "title": "Caretta Dashboard",
484 |   "uid": "k0Om62pVf",
485 |   "version": 2,
486 |   "weekStart": ""
487 | }


--------------------------------------------------------------------------------
/chart/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Expand the name of the chart.
 3 | */}}
 4 | {{- define "caretta.name" -}}
 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 6 | {{- end }}
 7 | 
 8 | {{/*
 9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "caretta.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 | 
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "caretta.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 | 
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "caretta.labels" -}}
37 | helm.sh/chart: {{ include "caretta.chart" . }}
38 | {{ include "caretta.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 | 
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "caretta.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "caretta.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 | 
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "caretta.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "caretta.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 | 


--------------------------------------------------------------------------------
/chart/templates/daemonset.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: DaemonSet
 3 | metadata:
 4 |   name: {{ include "caretta.name" . }}
 5 |   labels:
 6 |     app: caretta
 7 |     {{- include "caretta.labels" . | nindent 4 }}
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       app: caretta
12 |       {{- include "caretta.selectorLabels" . | nindent 6 }}
13 |   template:
14 |     metadata:
15 |       annotations:
16 |     {{- with .Values.podAnnotations }}
17 |         {{- toYaml . | nindent 8 }}
18 |     {{- end }}
19 |       labels:
20 |         app: caretta
21 |         {{- include "caretta.selectorLabels" . | nindent 8 }}
22 |     spec:
23 |       {{- with .Values.imagePullSecrets }}
24 |       imagePullSecrets:
25 |         {{- toYaml . | nindent 8 }}
26 |       {{- end }}
27 |       serviceAccountName: {{ include "caretta.name" . }}
28 |       {{- if .Values.priorityClassName }}
29 |       priorityClassName: {{ .Values.priorityClassName }}
30 |       {{- end }}
31 |       securityContext:
32 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
33 |       containers:
34 |         - name: {{ .Chart.Name }}
35 |           securityContext:
36 |             {{- toYaml .Values.securityContext | nindent 12 }}
37 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
38 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
39 |           volumeMounts:
40 |             - mountPath: /proc
41 |               name: proc
42 |             - mountPath: /sys/kernel/debug
43 |               name: debug
44 |           ports:
45 |             - name: prom-metrics
46 |               containerPort: {{ .Values.prometheusPort }}
47 |               protocol: TCP
48 |           env:
49 |             - name: "RESOLVE_DNS"
50 |               value: "{{ .Values.enableDnsResolving }}"
51 |             - name: "PROMETHEUS_PORT"
52 |               value: "{{ .Values.prometheusPort }}"
53 |             - name: "PROMETHEUS_ENDPOINT"
54 |               value: "{{ .Values.prometheusEndpoint }}"
55 |             - name: "POLL_INTERVAL"
56 |               value: "{{ .Values.pollIntervalSeconds }}"
57 |             - name: "TRAVERSE_UP_HIERARCHY"
58 |               value: "{{ .Values.traverseUpHierarchy }}"
59 |           resources:
60 |             {{- toYaml .Values.resources | nindent 12 }}
61 |       {{- with .Values.nodeSelector }}
62 |       nodeSelector:
63 |         {{- toYaml . | nindent 8 }}
64 |       {{- end }}
65 |       {{- with .Values.affinity }}
66 |       affinity:
67 |         {{- toYaml . | nindent 8 }}
68 |       {{- end }}
69 |       {{- with .Values.tolerations }}
70 |       tolerations:
71 |         {{- toYaml . | nindent 8 }}
72 |       {{- end }}
73 |       volumes:
74 |         - name: proc
75 |           hostPath:
76 |             path: /proc
77 |         - name: debug
78 |           hostPath:
79 |             path: /sys/kernel/debug


--------------------------------------------------------------------------------
/chart/templates/grafana/dashboards.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: caretta-grafana-dashboards
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     {{- if $.Values.grafana.sidecar.dashboards.enabled }}
 8 |     {{ $.Values.grafana.sidecar.dashboards.label }}: {{ $.Values.grafana.sidecar.dashboards.labelValue | quote }}
 9 |     {{- end }}
10 | data: 
11 |   dashboard.json: |-
12 | {{ .Files.Get "dashboard.json" | indent 4}}


--------------------------------------------------------------------------------
/chart/templates/rbac/psp.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1") }}
 2 | apiVersion: policy/v1beta1
 3 | kind: PodSecurityPolicy
 4 | metadata:
 5 |   annotations:
 6 |     seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
 7 |   name: {{ template "caretta.fullname" . }}
 8 | spec:
 9 |   allowPrivilegeEscalation: true
10 |   allowedCapabilities:
11 |   - '*'
12 |   fsGroup:
13 |     rule: RunAsAny
14 |   hostIPC: true
15 |   hostNetwork: false
16 |   hostPID: true
17 |   hostPorts:
18 |   - max: 65535
19 |     min: 0
20 |   privileged: true
21 |   runAsUser:
22 |     rule: RunAsAny
23 |   seLinux:
24 |     rule: RunAsAny
25 |   supplementalGroups:
26 |     rule: RunAsAny
27 |   volumes:
28 |   - '*'
29 | {{ end -}}
30 | 


--------------------------------------------------------------------------------
/chart/templates/rbac/role.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: rbac.authorization.k8s.io/v1
  2 | kind: ClusterRole
  3 | metadata:
  4 |   name: {{ include "caretta.fullname" . }}
  5 | rules:
  6 | {{- if and .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1") }}
  7 |   - apiGroups:
  8 |     - policy
  9 |     - extensions
 10 |     resourceNames:
 11 |     - {{ template "caretta.fullname" . }}
 12 |     resources:
 13 |     - podsecuritypolicies
 14 |     verbs:
 15 |     - use
 16 | {{- end }}
 17 | {{- if and .Values.rbac.sccEnabled (.Capabilities.APIVersions.Has "security.openshift.io/v1")}}
 18 |   - apiGroups:
 19 |     - security.openshift.io
 20 |     resources:
 21 |     - securitycontextconstraints
 22 |     verbs:
 23 |     - use
 24 |     resourceNames:
 25 |     - privileged
 26 | {{- end }}
 27 |   - verbs:
 28 |       - get
 29 |       - list
 30 |       - watch
 31 |     apiGroups:
 32 |       - ''
 33 |     resources:
 34 |       - configmaps
 35 |       - endpoints
 36 |       - persistentvolumeclaims
 37 |       - persistentvolumeclaims/status
 38 |       - pods
 39 |       - replicationcontrollers
 40 |       - replicationcontrollers/scale
 41 |       - serviceaccounts
 42 |       - services
 43 |       - services/status
 44 |   - verbs:
 45 |       - get
 46 |       - list
 47 |       - watch
 48 |     apiGroups:
 49 |       - ''
 50 |     resources:
 51 |       - bindings
 52 |       - events
 53 |       - limitranges
 54 |       - namespaces/status
 55 |       - pods/log
 56 |       - pods/status
 57 |       - nodes
 58 |       - replicationcontrollers/status
 59 |       - resourcequotas
 60 |       - resourcequotas/status
 61 |   - verbs:
 62 |       - get
 63 |       - list
 64 |       - watch
 65 |     apiGroups:
 66 |       - ''
 67 |     resources:
 68 |       - namespaces
 69 |   - verbs:
 70 |       - get
 71 |       - list
 72 |       - watch
 73 |     apiGroups:
 74 |       - apps
 75 |     resources:
 76 |       - controllerrevisions
 77 |       - daemonsets
 78 |       - daemonsets/status
 79 |       - deployments
 80 |       - deployments/scale
 81 |       - deployments/status
 82 |       - replicasets
 83 |       - replicasets/scale
 84 |       - replicasets/status
 85 |       - statefulsets
 86 |       - statefulsets/scale
 87 |       - statefulsets/status
 88 |   - verbs:
 89 |       - get
 90 |       - list
 91 |       - watch
 92 |     apiGroups:
 93 |       - batch
 94 |     resources:
 95 |       - cronjobs
 96 |       - cronjobs/status
 97 |       - jobs
 98 |       - jobs/status
 99 |   - verbs:
100 |       - get
101 |       - list
102 |       - watch
103 |     apiGroups:
104 |       - extensions
105 |     resources:
106 |       - daemonsets
107 |       - daemonsets/status
108 |       - deployments
109 |       - deployments/scale
110 |       - deployments/status
111 |       - ingresses
112 |       - ingresses/status
113 |       - networkpolicies
114 |       - replicasets
115 |       - replicasets/scale
116 |       - replicasets/status
117 |       - replicationcontrollers/scale
118 |   - verbs:
119 |       - get
120 |       - list
121 |       - watch
122 |     apiGroups:
123 |       - policy
124 |     resources:
125 |       - poddisruptionbudgets
126 |       - poddisruptionbudgets/status
127 |   - verbs:
128 |       - get
129 |       - list
130 |       - watch
131 |     apiGroups:
132 |       - networking.k8s.io
133 |     resources:
134 |       - ingresses
135 |       - ingresses/status
136 |       - networkpolicies
137 |   - verbs:
138 |       - get
139 |     apiGroups:
140 |       - discovery.k8s.io
141 |     resources:
142 |       - endpointslices
143 |   - verbs:
144 |       - list
145 |     apiGroups:
146 |       - discovery.k8s.io
147 |     resources:
148 |       - endpointslices
149 |   - verbs:
150 |       - watch
151 |     apiGroups:
152 |       - discovery.k8s.io
153 |     resources:
154 |       - endpointslices
155 |   - verbs:
156 |       - get
157 |       - list
158 |       - watch
159 |     apiGroups:
160 |       - metrics.k8s.io
161 |     resources:
162 |       - pods
163 |       - nodes
164 | 
165 | 


--------------------------------------------------------------------------------
/chart/templates/rbac/rolebinding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRoleBinding
 3 | metadata:
 4 |   name: {{ include "caretta.fullname" . }}
 5 | roleRef:
 6 |   apiGroup: rbac.authorization.k8s.io
 7 |   kind: ClusterRole
 8 |   name: {{ include "caretta.fullname" . }}
 9 | subjects:
10 |   - kind: ServiceAccount
11 |     name: {{ include "caretta.name" . }}
12 |     namespace: {{ .Release.Namespace }}


--------------------------------------------------------------------------------
/chart/templates/rbac/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: {{ include "caretta.name" . }}
5 |   namespace: {{ .Release.Namespace }}
6 | 


--------------------------------------------------------------------------------
/chart/values.yaml:
--------------------------------------------------------------------------------
  1 | enableDnsResolving: true
  2 | prometheusPort: 7117
  3 | prometheusEndpoint: "/metrics"
  4 | pollIntervalSeconds: 5
  5 | traverseUpHierarchy: true
  6 | 
  7 | rbac:
  8 |   pspEnabled: true
  9 |   sccEnabled: true
 10 | image:
 11 |   repository: quay.io/groundcover/caretta
 12 |   pullPolicy: Always
 13 |   tag: ""
 14 | 
 15 | imagePullSecrets: []
 16 | nameOverride: ""
 17 | fullnameOverride: ""
 18 | 
 19 | resources:
 20 |   limits:
 21 |     cpu: 150m
 22 |     memory: 500Mi
 23 |   requests:
 24 |     cpu: 10m
 25 |     memory: 50Mi
 26 | 
 27 | serviceAccount:
 28 |   # Specifies whether a service account should be created
 29 |   create: true
 30 |   # Annotations to add to the service account
 31 |   annotations: {}
 32 |   # The name of the service account to use.
 33 |   # If not set and create is true, a name is generated using the fullname template
 34 |   name: ""
 35 | 
 36 | podAnnotations: {}
 37 | 
 38 | podSecurityContext: {}
 39 |   # fsGroup: 2000
 40 | 
 41 | securityContext:
 42 |   privileged: true
 43 |   readOnlyRootFilesystem: true
 44 |   # capabilities:
 45 |   #   drop:
 46 |   #   - ALL
 47 |   # readOnlyRootFilesystem: true
 48 |   # runAsNonRoot: true
 49 |   # runAsUser: 1000
 50 | 
 51 | tolerations:
 52 |   - key: node-role.kubernetes.io/control-plane
 53 |     operator: Exists
 54 |     effect: NoSchedule
 55 |   - key: node-role.kubernetes.io/master
 56 |     operator: Exists
 57 |     effect: NoSchedule
 58 | 
 59 | nodeSelector: {}
 60 | affinity: {}
 61 | priorityClassName:
 62 | 
 63 | victoria-metrics-single:
 64 |   server:
 65 |     image:
 66 |       repository: quay.io/groundcover/victoria-metrics
 67 |     resources:
 68 |       limits:
 69 |         cpu: 300m
 70 |         memory: 350Mi
 71 |       requests:
 72 |         cpu: 5m
 73 |         memory: 50Mi
 74 |     fullnameOverride: caretta-vm
 75 |     persistentVolume:
 76 |        enabled: false
 77 |        size: 16Gi # change enabled to true if you pv is required
 78 | 
 79 |     scrape:
 80 |       enabled: true
 81 | 
 82 |       config:
 83 |         global:
 84 |           scrape_interval: 15s
 85 | 
 86 |         scrape_configs:
 87 |           - job_name:       'caretta'
 88 |             metrics_path: /metrics
 89 |             scrape_interval: 5s
 90 |             kubernetes_sd_configs:
 91 |             - role: pod
 92 |               namespaces:
 93 |                 own_namespace: true
 94 |             relabel_configs:
 95 |               - source_labels: [__meta_kubernetes_pod_label_app]
 96 |                 separator: ;
 97 |                 regex: caretta
 98 |                 replacement: $1
 99 |                 action: keep
100 |               - action: labelmap
101 |                 regex: __meta_kubernetes_pod_label_(.+)
102 |               - source_labels: [__meta_kubernetes_pod_name]
103 |                 action: replace
104 |                 target_label: caretta_pod
105 |               - source_labels: [__meta_kubernetes_pod_node_name]
106 |                 action: replace
107 |                 target_label: caretta_node
108 | 
109 | grafana:
110 |   image:
111 |     repository: quay.io/groundcover/grafana
112 |   resources:
113 |     limits:
114 |       memory: 300Mi
115 |       cpu: 300m
116 |     requests:
117 |       memory: 50Mi
118 |       cpu: 5m
119 |   datasources:
120 |     datasources.yaml:
121 |       apiVersion: 1
122 |       datasources:
123 |       - name: Prometheus
124 |         type: prometheus
125 |         access: proxy
126 |         url: "http://caretta-vm:8428"
127 |         editable: "true"
128 | 
129 |   sidecar:
130 |     dashboards:
131 |       label: grafana_dashboard
132 |       labelValue: ""
133 | 
134 |   dashboardProviders:
135 |     dashboardproviders.yaml:
136 |       apiVersion: 1
137 |       providers:
138 |       - name: 'default'
139 |         orgId: 1
140 |         folder: ''
141 |         type: file
142 |         disableDeletion: false
143 |         editable: true
144 |         options:
145 |           path: /var/lib/grafana/dashboards
146 |           foldersFromFilesStructure: true
147 | 
148 |   dashboardsConfigMaps:
149 |     default: "caretta-grafana-dashboards"
150 | 
151 |   grafana.ini:
152 |     auth.anonymous:
153 |       enabled: true
154 |     dashboards:
155 |       default_home_dashboard_path: /var/lib/grafana/dashboards/default/dashboard.json
156 |   adminUser: "admin"
157 |   adminPassword: "caretta"
158 | 


--------------------------------------------------------------------------------
/cmd/caretta/caretta.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	_ "net/http/pprof"
 6 | 	"os"
 7 | 	"os/signal"
 8 | 	"syscall"
 9 | 
10 | 	"github.com/groundcover-com/caretta/pkg/caretta"
11 | )
12 | 
13 | func main() {
14 | 	log.Print("Caretta starting...")
15 | 	caretta := caretta.NewCaretta()
16 | 
17 | 	caretta.Start()
18 | 
19 | 	osSignal := make(chan os.Signal, 1)
20 | 	signal.Notify(osSignal, syscall.SIGINT, syscall.SIGTERM)
21 | 	<-osSignal
22 | 	caretta.Stop()
23 | }
24 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/groundcover-com/caretta
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/cilium/ebpf v0.10.0
 7 | 	github.com/google/uuid v1.3.0
 8 | 	github.com/hashicorp/golang-lru/v2 v2.0.1
 9 | 	github.com/prometheus/client_golang v1.14.0
10 | 	github.com/stretchr/testify v1.8.1
11 | 	k8s.io/api v0.26.0
12 | 	k8s.io/apimachinery v0.26.0
13 | 	k8s.io/client-go v0.26.0
14 | )
15 | 
16 | require (
17 | 	github.com/beorn7/perks v1.0.1 // indirect
18 | 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
19 | 	github.com/davecgh/go-spew v1.1.1 // indirect
20 | 	github.com/emicklei/go-restful/v3 v3.9.0 // indirect
21 | 	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
22 | 	github.com/go-logr/logr v1.2.3 // indirect
23 | 	github.com/go-openapi/jsonpointer v0.19.5 // indirect
24 | 	github.com/go-openapi/jsonreference v0.20.0 // indirect
25 | 	github.com/go-openapi/swag v0.19.14 // indirect
26 | 	github.com/gogo/protobuf v1.3.2 // indirect
27 | 	github.com/golang/protobuf v1.5.2 // indirect
28 | 	github.com/google/gnostic v0.5.7-v3refs // indirect
29 | 	github.com/google/go-cmp v0.5.9 // indirect
30 | 	github.com/google/gofuzz v1.1.0 // indirect
31 | 	github.com/josharian/intern v1.0.0 // indirect
32 | 	github.com/json-iterator/go v1.1.12 // indirect
33 | 	github.com/mailru/easyjson v0.7.6 // indirect
34 | 	github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
35 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
36 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
37 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
38 | 	github.com/pkg/errors v0.9.1 // indirect
39 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
40 | 	github.com/prometheus/client_model v0.3.0 // indirect
41 | 	github.com/prometheus/common v0.37.0 // indirect
42 | 	github.com/prometheus/procfs v0.8.0 // indirect
43 | 	golang.org/x/net v0.3.1-0.20221206200815-1e63c2f08a10 // indirect
44 | 	golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect
45 | 	golang.org/x/sys v0.3.0 // indirect
46 | 	golang.org/x/term v0.3.0 // indirect
47 | 	golang.org/x/text v0.5.0 // indirect
48 | 	golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
49 | 	google.golang.org/appengine v1.6.7 // indirect
50 | 	google.golang.org/protobuf v1.28.1 // indirect
51 | 	gopkg.in/inf.v0 v0.9.1 // indirect
52 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
53 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
54 | 	k8s.io/klog/v2 v2.80.1 // indirect
55 | 	k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect
56 | 	k8s.io/utils v0.0.0-20221107191617-1a15be271d1d // indirect
57 | 	sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
58 | 	sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
59 | 	sigs.k8s.io/yaml v1.3.0 // indirect
60 | )
61 | 


--------------------------------------------------------------------------------
/images/caretta.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groundcover-com/caretta/280d1640ce0174b1dfdd7d05bdd104604aa04508/images/caretta.gif


--------------------------------------------------------------------------------
/images/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groundcover-com/caretta/280d1640ce0174b1dfdd7d05bdd104604aa04508/images/screenshot.png


--------------------------------------------------------------------------------
/pkg/caretta/caretta.go:
--------------------------------------------------------------------------------
  1 | package caretta
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"hash/fnv"
  6 | 	"log"
  7 | 	"net/http"
  8 | 	"strconv"
  9 | 	"time"
 10 | 
 11 | 	caretta_k8s "github.com/groundcover-com/caretta/pkg/k8s"
 12 | 	"github.com/groundcover-com/caretta/pkg/metrics"
 13 | 	"github.com/prometheus/client_golang/prometheus"
 14 | 	"github.com/prometheus/client_golang/prometheus/promauto"
 15 | 	"k8s.io/client-go/kubernetes"
 16 | 	"k8s.io/client-go/rest"
 17 | )
 18 | 
 19 | var (
 20 | 	linksMetrics = promauto.NewGaugeVec(prometheus.GaugeOpts{
 21 | 		Name: "caretta_links_observed",
 22 | 		Help: "total bytes_sent value of links observed by caretta since its launch",
 23 | 	}, []string{
 24 | 		"link_id", "client_id", "client_name", "client_namespace", "client_kind", "client_owner", "server_id", "server_name", "server_namespace", "server_kind", "server_port", "role",
 25 | 	})
 26 | 	tcpStateMetrics = promauto.NewGaugeVec(prometheus.GaugeOpts{
 27 | 		Name: "caretta_tcp_states",
 28 | 		Help: "state of TCP connections observed by caretta since its launch",
 29 | 	}, []string{
 30 | 		"link_id", "client_id", "client_name", "client_namespace", "client_kind", "client_owner", "server_id", "server_name", "server_namespace", "server_kind", "server_port", "role",
 31 | 	})
 32 | )
 33 | 
 34 | type Caretta struct {
 35 | 	stopSignal    chan bool
 36 | 	tracer        LinksTracer
 37 | 	metricsServer *http.Server
 38 | 	config        carettaConfig
 39 | }
 40 | 
 41 | func NewCaretta() *Caretta {
 42 | 	return &Caretta{
 43 | 		stopSignal: make(chan bool, 1),
 44 | 		config:     readConfig(),
 45 | 	}
 46 | }
 47 | 
 48 | func (caretta *Caretta) Start() {
 49 | 	caretta.metricsServer = metrics.StartMetricsServer(caretta.config.prometheusEndpoint, caretta.config.prometheusPort)
 50 | 
 51 | 	clientset, err := caretta.getClientSet()
 52 | 	if err != nil {
 53 | 		log.Fatalf("Error getting kubernetes clientset: %v", err)
 54 | 	}
 55 | 	resolver, err := caretta_k8s.NewK8sIPResolver(clientset, caretta.config.shouldResolveDns, caretta.config.traverseUpHierarchy)
 56 | 	if err != nil {
 57 | 		log.Fatalf("Error creating resolver: %v", err)
 58 | 	}
 59 | 	err = resolver.StartWatching()
 60 | 	if err != nil {
 61 | 		log.Fatalf("Error watching cluster's state: %v", err)
 62 | 	}
 63 | 
 64 | 	// wait for resolver to populate
 65 | 	time.Sleep(10 * time.Second)
 66 | 
 67 | 	caretta.tracer = NewTracer(resolver)
 68 | 	err = caretta.tracer.Start()
 69 | 	if err != nil {
 70 | 		log.Fatalf("Couldn't load probes - %v", err)
 71 | 	}
 72 | 
 73 | 	pollingTicker := time.NewTicker(time.Duration(caretta.config.pollingIntervalSeconds) * time.Second)
 74 | 
 75 | 	pastLinks := make(map[NetworkLink]uint64)
 76 | 
 77 | 	go func() {
 78 | 		for {
 79 | 			select {
 80 | 			case <-caretta.stopSignal:
 81 | 				return
 82 | 			case <-pollingTicker.C:
 83 | 				var links map[NetworkLink]uint64
 84 | 				var tcpConnections []TcpConnection
 85 | 
 86 | 				if err != nil {
 87 | 					log.Printf("Error updating snapshot of cluster state, skipping iteration")
 88 | 					continue
 89 | 				}
 90 | 
 91 | 				pastLinks, links, tcpConnections = caretta.tracer.TracesPollingIteration(pastLinks)
 92 | 				for link, throughput := range links {
 93 | 					caretta.handleLink(&link, throughput)
 94 | 				}
 95 | 
 96 | 				for _, connection := range tcpConnections {
 97 | 					caretta.handleTcpConnection(&connection)
 98 | 				}
 99 | 			}
100 | 		}
101 | 	}()
102 | }
103 | 
104 | func (caretta *Caretta) Stop() {
105 | 	log.Print("Stopping Caretta...")
106 | 	caretta.stopSignal <- true
107 | 	err := caretta.tracer.Stop()
108 | 	if err != nil {
109 | 		log.Printf("Error unloading bpf objects: %v", err)
110 | 	}
111 | 	err = caretta.metricsServer.Shutdown(context.Background())
112 | 	if err != nil {
113 | 		log.Printf("Error shutting Prometheus server down: %v", err)
114 | 	}
115 | 
116 | }
117 | 
118 | func (caretta *Caretta) handleLink(link *NetworkLink, throughput uint64) {
119 | 	linksMetrics.With(prometheus.Labels{
120 | 		"link_id":          strconv.Itoa(int(fnvHash(link.Client.Name+link.Client.Namespace+link.Server.Name+link.Server.Namespace) + link.Role)),
121 | 		"client_id":        strconv.Itoa(int(fnvHash(link.Client.Name + link.Client.Namespace))),
122 | 		"client_name":      link.Client.Name,
123 | 		"client_namespace": link.Client.Namespace,
124 | 		"client_kind":      link.Client.Kind,
125 | 		"client_owner":     link.Client.Owner,
126 | 		"server_id":        strconv.Itoa(int(fnvHash(link.Server.Name + link.Server.Namespace))),
127 | 		"server_name":      link.Server.Name,
128 | 		"server_namespace": link.Server.Namespace,
129 | 		"server_kind":      link.Server.Kind,
130 | 		"server_port":      strconv.Itoa(int(link.ServerPort)),
131 | 		"role":             strconv.Itoa(int(link.Role)),
132 | 	}).Set(float64(throughput))
133 | }
134 | 
135 | func (caretta *Caretta) handleTcpConnection(connection *TcpConnection) {
136 | 	tcpStateMetrics.With(prometheus.Labels{
137 | 		"link_id":          strconv.Itoa(int(fnvHash(connection.Client.Name+connection.Client.Namespace+connection.Server.Name+connection.Server.Namespace) + connection.Role)),
138 | 		"client_id":        strconv.Itoa(int(fnvHash(connection.Client.Name + connection.Client.Namespace))),
139 | 		"client_name":      connection.Client.Name,
140 | 		"client_namespace": connection.Client.Namespace,
141 | 		"client_kind":      connection.Client.Kind,
142 | 		"client_owner":     connection.Client.Owner,
143 | 		"server_id":        strconv.Itoa(int(fnvHash(connection.Server.Name + connection.Server.Namespace))),
144 | 		"server_name":      connection.Server.Name,
145 | 		"server_namespace": connection.Server.Namespace,
146 | 		"server_kind":      connection.Server.Kind,
147 | 		"server_port":      strconv.Itoa(int(connection.ServerPort)),
148 | 		"role":             strconv.Itoa(int(connection.Role)),
149 | 	}).Set(float64(connection.State))
150 | }
151 | 
152 | func (caretta *Caretta) getClientSet() (*kubernetes.Clientset, error) {
153 | 	config, err := rest.InClusterConfig()
154 | 	if err != nil {
155 | 		return nil, err
156 | 	}
157 | 
158 | 	clientset, err := kubernetes.NewForConfig(config)
159 | 	if err != nil {
160 | 		return nil, err
161 | 	}
162 | 	return clientset, nil
163 | }
164 | 
165 | // simple fnvHash function from string to uint32
166 | func fnvHash(s string) uint32 {
167 | 	h := fnv.New32a()
168 | 	h.Write([]byte(s))
169 | 	return h.Sum32()
170 | }
171 | 


--------------------------------------------------------------------------------
/pkg/caretta/config.go:
--------------------------------------------------------------------------------
 1 | package caretta
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"strconv"
 7 | )
 8 | 
 9 | const (
10 | 	defaultPrometheusEndpoint     = "/metrics"
11 | 	defaultPrometheusPort         = ":7117"
12 | 	defaultPollingIntervalSeconds = 5
13 | 	defaultShouldResolveDns       = false
14 | 	defaultTraverseUpHierarchy    = true
15 | )
16 | 
17 | type carettaConfig struct {
18 | 	shouldResolveDns       bool
19 | 	prometheusPort         string
20 | 	prometheusEndpoint     string
21 | 	pollingIntervalSeconds int
22 | 	traverseUpHierarchy    bool
23 | }
24 | 
25 | // environment variables based, encapsulated to enable future changes
26 | func readConfig() carettaConfig {
27 | 	port := defaultPrometheusPort
28 | 	if val := os.Getenv("PROMETHEUS_PORT"); val != "" {
29 | 		valInt, err := strconv.Atoi(val)
30 | 		if err == nil {
31 | 			port = fmt.Sprintf(":%d", valInt)
32 | 		}
33 | 	}
34 | 
35 | 	endpoint := defaultPrometheusEndpoint
36 | 	if val := os.Getenv("PROMETHEUS_ENDPOINT"); val != "" {
37 | 		endpoint = val
38 | 	}
39 | 
40 | 	interval := defaultPollingIntervalSeconds
41 | 	if val := os.Getenv("POLL_INTERVAL"); val != "" {
42 | 		valInt, err := strconv.Atoi(val)
43 | 		if err == nil {
44 | 			interval = valInt
45 | 		}
46 | 	}
47 | 
48 | 	shouldResolveDns := defaultShouldResolveDns
49 | 	if val := os.Getenv("RESOLVE_DNS"); val != "" {
50 | 		valBool, err := strconv.ParseBool(val)
51 | 		if err == nil {
52 | 			shouldResolveDns = valBool
53 | 		}
54 | 	}
55 | 
56 | 	traverseUpHierarchy := defaultTraverseUpHierarchy
57 | 	if val := os.Getenv("TRAVERSE_UP_HIERARCHY"); val != "" {
58 | 		valBool, err := strconv.ParseBool(val)
59 | 		if err == nil {
60 | 			traverseUpHierarchy = valBool
61 | 		}
62 | 	}
63 | 
64 | 	return carettaConfig{
65 | 		shouldResolveDns:       shouldResolveDns,
66 | 		prometheusPort:         port,
67 | 		prometheusEndpoint:     endpoint,
68 | 		pollingIntervalSeconds: interval,
69 | 		traverseUpHierarchy:    traverseUpHierarchy,
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/pkg/caretta/ebpf_map.go:
--------------------------------------------------------------------------------
 1 | package caretta
 2 | 
 3 | import "github.com/cilium/ebpf"
 4 | 
 5 | type IEbpfMapIterator interface {
 6 | 	Next(interface{}, interface{}) bool
 7 | }
 8 | 
 9 | type IEbpfMap interface {
10 | 	Lookup(interface{}, interface{}) error
11 | 	Iterate() IEbpfMapIterator
12 | 	Delete(interface{}) error
13 | }
14 | 
15 | type EbpfMap struct {
16 | 	innerMap *ebpf.Map
17 | }
18 | 
19 | type EbpfMapIterator struct {
20 | 	innerIterator *ebpf.MapIterator
21 | }
22 | 
23 | func (m *EbpfMap) Lookup(key interface{}, val interface{}) error {
24 | 	return m.innerMap.Lookup(key, val)
25 | }
26 | 
27 | func (m *EbpfMap) Iterate() IEbpfMapIterator {
28 | 	return &EbpfMapIterator{innerIterator: m.innerMap.Iterate()}
29 | }
30 | 
31 | func (m *EbpfMap) Delete(key interface{}) error {
32 | 	return m.innerMap.Delete(key)
33 | }
34 | 
35 | func (it *EbpfMapIterator) Next(key interface{}, val interface{}) bool {
36 | 	return it.innerIterator.Next(key, val)
37 | }
38 | 


--------------------------------------------------------------------------------
/pkg/caretta/links_tracer.go:
--------------------------------------------------------------------------------
  1 | package caretta
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"log"
  7 | 	"net"
  8 | 
  9 | 	"github.com/groundcover-com/caretta/pkg/k8s"
 10 | 	"github.com/groundcover-com/caretta/pkg/tracing"
 11 | 
 12 | 	"github.com/prometheus/client_golang/prometheus"
 13 | 	"github.com/prometheus/client_golang/prometheus/promauto"
 14 | )
 15 | 
 16 | var (
 17 | 	pollsMade = promauto.NewCounter(prometheus.CounterOpts{
 18 | 		Name: "caretta_polls_made",
 19 | 		Help: "Counter of polls made by caretta",
 20 | 	})
 21 | 	failedConnectionDeletion = promauto.NewCounter(prometheus.CounterOpts{
 22 | 		Name: "caretta_failed_deletions",
 23 | 		Help: "Counter of failed deletion of closed connection from map",
 24 | 	})
 25 | 	filteredLoopbackConnections = promauto.NewGauge(prometheus.GaugeOpts{
 26 | 		Name: "caretta_current_loopback_connections",
 27 | 		Help: `Number of loopback connections observed in the last iteration`,
 28 | 	})
 29 | 	mapSize = promauto.NewGauge(prometheus.GaugeOpts{
 30 | 		Name: "caretta_ebpf_connections_map_size",
 31 | 		Help: "number of items in the connections map iterated from user space per iteration",
 32 | 	})
 33 | 	mapDeletions = promauto.NewCounter(prometheus.CounterOpts{
 34 | 		Name: "caretta_connection_deletions",
 35 | 		Help: "total number of deletions from the map done by the userspace",
 36 | 	})
 37 | )
 38 | 
 39 | type IPResolver interface {
 40 | 	ResolveIP(string) k8s.Workload
 41 | 	StartWatching() error
 42 | 	StopWatching()
 43 | }
 44 | 
 45 | type Probes interface {
 46 | 	UnloadProbes() error
 47 | }
 48 | 
 49 | type LinksTracer struct {
 50 | 	ebpfObjects Probes
 51 | 	connections IEbpfMap
 52 | 	resolver    IPResolver
 53 | }
 54 | 
 55 | // initializes a LinksTracer object
 56 | func NewTracer(resolver *k8s.K8sIPResolver) LinksTracer {
 57 | 	tracer := LinksTracer{resolver: resolver}
 58 | 	return tracer
 59 | }
 60 | 
 61 | func NewTracerWithObjs(resolver IPResolver, connections IEbpfMap, probes Probes) LinksTracer {
 62 | 	return LinksTracer{
 63 | 		ebpfObjects: probes,
 64 | 		connections: connections,
 65 | 		resolver:    resolver,
 66 | 	}
 67 | }
 68 | 
 69 | func (tracer *LinksTracer) Start() error {
 70 | 	objs, connMap, err := tracing.LoadProbes()
 71 | 	if err != nil {
 72 | 		return err
 73 | 	}
 74 | 
 75 | 	tracer.ebpfObjects = &objs
 76 | 	tracer.connections = &EbpfMap{innerMap: connMap}
 77 | 	return nil
 78 | }
 79 | 
 80 | func (tracer *LinksTracer) Stop() error {
 81 | 	tracer.resolver.StopWatching()
 82 | 	return tracer.ebpfObjects.UnloadProbes()
 83 | }
 84 | 
 85 | // a single polling from the eBPF maps
 86 | // iterating the traces from the kernel-space, summing each network link
 87 | func (tracer *LinksTracer) TracesPollingIteration(pastLinks map[NetworkLink]uint64) (map[NetworkLink]uint64, map[NetworkLink]uint64, []TcpConnection) {
 88 | 	// outline of an iteration -
 89 | 	// filter unwanted connections, sum all connections as links, add past links, and return the new map
 90 | 	pollsMade.Inc()
 91 | 	loopbackCounter := 0
 92 | 
 93 | 	currentLinks := make(map[NetworkLink]uint64)
 94 | 	currentTcpConnections := []TcpConnection{}
 95 | 	var connectionsToDelete []ConnectionIdentifier
 96 | 
 97 | 	var conn ConnectionIdentifier
 98 | 	var throughput ConnectionThroughputStats
 99 | 
100 | 	entries := tracer.connections.Iterate()
101 | 	// iterate the map from the eBPF program
102 | 	itemsCounter := 0
103 | 	for entries.Next(&conn, &throughput) {
104 | 		itemsCounter += 1
105 | 		// filter unnecessary connection
106 | 
107 | 		if throughput.IsActive == 0 {
108 | 			connectionsToDelete = append(connectionsToDelete, conn)
109 | 		}
110 | 
111 | 		// skip loopback connections
112 | 		if conn.Tuple.SrcIp == conn.Tuple.DstIp && isAddressLoopback(conn.Tuple.DstIp) {
113 | 			loopbackCounter++
114 | 			continue
115 | 		}
116 | 
117 | 		// filter unroled connections (probably indicates a bug)
118 | 		link, err := tracer.reduceConnectionToLink(conn)
119 | 		if conn.Role == UnknownConnectionRole || err != nil {
120 | 			continue
121 | 		}
122 | 
123 | 		tcpConn, err := tracer.reduceConnectionToTcp(conn, throughput)
124 | 		if err != nil {
125 | 			continue
126 | 		}
127 | 
128 | 		currentLinks[link] += throughput.BytesSent
129 | 		currentTcpConnections = append(currentTcpConnections, tcpConn)
130 | 	}
131 | 
132 | 	mapSize.Set(float64(itemsCounter))
133 | 	filteredLoopbackConnections.Set(float64(loopbackCounter))
134 | 
135 | 	// add past links
136 | 	for pastLink, pastThroughput := range pastLinks {
137 | 		currentLinks[pastLink] += pastThroughput
138 | 	}
139 | 
140 | 	// delete connections marked to delete
141 | 	for _, conn := range connectionsToDelete {
142 | 		tracer.deleteAndStoreConnection(&conn, pastLinks)
143 | 	}
144 | 
145 | 	return pastLinks, currentLinks, currentTcpConnections
146 | 
147 | }
148 | 
149 | func (tracer *LinksTracer) deleteAndStoreConnection(conn *ConnectionIdentifier, pastLinks map[NetworkLink]uint64) {
150 | 	// newer kernels introduce batch map operation, but it might not be available so we delete item-by-item
151 | 	var throughput ConnectionThroughputStats
152 | 	err := tracer.connections.Lookup(conn, &throughput)
153 | 	if err != nil {
154 | 		log.Printf("Error retrieving connection to delete, skipping it: %v", err)
155 | 		failedConnectionDeletion.Inc()
156 | 		return
157 | 	}
158 | 	err = tracer.connections.Delete(conn)
159 | 	if err != nil {
160 | 		log.Printf("Error deleting connection from map: %v", err)
161 | 		failedConnectionDeletion.Inc()
162 | 		return
163 | 	}
164 | 	// if deletion is successful, add it to past links
165 | 	link, err := tracer.reduceConnectionToLink(*conn)
166 | 	if err != nil {
167 | 		log.Printf("Error reducing connection to link when deleting: %v", err)
168 | 		return
169 | 	}
170 | 
171 | 	pastLinks[link] += throughput.BytesSent
172 | 
173 | 	mapDeletions.Inc()
174 | }
175 | 
176 | // reduce a specific connection to a general link
177 | func (tracer *LinksTracer) reduceConnectionToLink(connection ConnectionIdentifier) (NetworkLink, error) {
178 | 	var link NetworkLink
179 | 	link.Role = connection.Role
180 | 
181 | 	srcWorkload := tracer.resolver.ResolveIP(IP(connection.Tuple.SrcIp).String())
182 | 	dstWorkload := tracer.resolver.ResolveIP(IP(connection.Tuple.DstIp).String())
183 | 
184 | 	if connection.Role == ClientConnectionRole {
185 | 		// Src is Client, Dst is Server, Port is DstPort
186 | 		link.Client = srcWorkload
187 | 		link.Server = dstWorkload
188 | 		link.ServerPort = connection.Tuple.DstPort
189 | 	} else if connection.Role == ServerConnectionRole {
190 | 		// Dst is Client, Src is Server, Port is SrcPort
191 | 		link.Client = dstWorkload
192 | 		link.Server = srcWorkload
193 | 		link.ServerPort = connection.Tuple.SrcPort
194 | 	} else {
195 | 		return NetworkLink{}, errors.New("connection's role is unknown")
196 | 	}
197 | 	return link, nil
198 | }
199 | 
200 | // reduce a specific connection to a general tcp connection
201 | func (tracer *LinksTracer) reduceConnectionToTcp(connection ConnectionIdentifier, throughput ConnectionThroughputStats) (TcpConnection, error) {
202 | 	var tcpConn TcpConnection
203 | 	tcpConn.Role = connection.Role
204 | 
205 | 	srcWorkload := tracer.resolver.ResolveIP(IP(connection.Tuple.SrcIp).String())
206 | 	dstWorkload := tracer.resolver.ResolveIP(IP(connection.Tuple.DstIp).String())
207 | 
208 | 	if connection.Role == ClientConnectionRole {
209 | 		// Src is Client, Dst is Server, Port is DstPort
210 | 		tcpConn.Client = srcWorkload
211 | 		tcpConn.Server = dstWorkload
212 | 		tcpConn.ServerPort = connection.Tuple.DstPort
213 | 		tcpConn.State = TcpConnectionOpenState
214 | 	} else if connection.Role == ServerConnectionRole {
215 | 		// Dst is Client, Src is Server, Port is SrcPort
216 | 		tcpConn.Client = dstWorkload
217 | 		tcpConn.Server = srcWorkload
218 | 		tcpConn.ServerPort = connection.Tuple.SrcPort
219 | 		tcpConn.State = TcpConnectionAcceptState
220 | 	} else {
221 | 		return TcpConnection{}, errors.New("connection's role is unknown")
222 | 	}
223 | 
224 | 	if throughput.IsActive == 0 {
225 | 		tcpConn.State = TcpConnectionClosedState
226 | 	}
227 | 
228 | 	return tcpConn, nil
229 | }
230 | 
231 | func isAddressLoopback(ip uint32) bool {
232 | 	ipAddr := make(net.IP, 4)
233 | 	binary.LittleEndian.PutUint32(ipAddr, ip)
234 | 	return ipAddr.IsLoopback()
235 | }
236 | 


--------------------------------------------------------------------------------
/pkg/caretta/links_tracer_test.go:
--------------------------------------------------------------------------------
  1 | package caretta_test
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/groundcover-com/caretta/pkg/caretta"
  9 | 
 10 | 	"github.com/groundcover-com/caretta/pkg/k8s"
 11 | 	"github.com/stretchr/testify/assert"
 12 | )
 13 | 
 14 | // Defining a mock of a map. This is not a complete implementation of a map with iterator
 15 | type MockConnectionsMapIterator struct {
 16 | 	innerMap map[caretta.ConnectionIdentifier]caretta.ConnectionThroughputStats
 17 | 	keys     []caretta.ConnectionIdentifier
 18 | 	count    int
 19 | }
 20 | 
 21 | func (mi *MockConnectionsMapIterator) Next(conn interface{}, throughput interface{}) bool {
 22 | 	assertedConn, ok := conn.(*caretta.ConnectionIdentifier)
 23 | 	if !ok {
 24 | 		return false
 25 | 	}
 26 | 	assertedThroughput, ok := throughput.(*caretta.ConnectionThroughputStats)
 27 | 	if !ok {
 28 | 		return false
 29 | 	}
 30 | 	for mi.count < len(mi.keys) {
 31 | 		*assertedConn = mi.keys[mi.count]
 32 | 		*assertedThroughput = mi.innerMap[*assertedConn]
 33 | 		mi.count++
 34 | 		return true
 35 | 	}
 36 | 
 37 | 	return false
 38 | }
 39 | 
 40 | type MockConnectionsMap struct {
 41 | 	innerMap map[caretta.ConnectionIdentifier]caretta.ConnectionThroughputStats
 42 | }
 43 | 
 44 | func NewMockConnectionsMap() *MockConnectionsMap {
 45 | 	return &MockConnectionsMap{innerMap: make(map[caretta.ConnectionIdentifier]caretta.ConnectionThroughputStats)}
 46 | }
 47 | 
 48 | func (m *MockConnectionsMap) Lookup(conn interface{}, throughput interface{}) error {
 49 | 	assertedConn, ok := conn.(*caretta.ConnectionIdentifier)
 50 | 	if !ok {
 51 | 		return errors.New("wrong type for Lookup")
 52 | 	}
 53 | 	assertedThroughput, ok := throughput.(*caretta.ConnectionThroughputStats)
 54 | 	if !ok {
 55 | 		return errors.New("wrong type for Lookup")
 56 | 	}
 57 | 	*assertedThroughput, ok = m.innerMap[*assertedConn]
 58 | 	if !ok {
 59 | 		return errors.New("Key not in map")
 60 | 	}
 61 | 	return nil
 62 | }
 63 | 
 64 | func (m *MockConnectionsMap) Iterate() caretta.IEbpfMapIterator {
 65 | 	keys := make([]caretta.ConnectionIdentifier, 0, len(m.innerMap))
 66 | 	for ci := range m.innerMap {
 67 | 		keys = append(keys, ci)
 68 | 	}
 69 | 
 70 | 	return &MockConnectionsMapIterator{innerMap: m.innerMap, keys: keys, count: 0}
 71 | }
 72 | 
 73 | func (m *MockConnectionsMap) Delete(key interface{}) error {
 74 | 	assertedKey, ok := key.(*caretta.ConnectionIdentifier)
 75 | 	if !ok {
 76 | 		return errors.New("wrong type in delete")
 77 | 	}
 78 | 	delete(m.innerMap, *assertedKey)
 79 | 	return nil
 80 | }
 81 | 
 82 | func (m *MockConnectionsMap) Update(key caretta.ConnectionIdentifier, value caretta.ConnectionThroughputStats) {
 83 | 	m.innerMap[key] = value
 84 | }
 85 | 
 86 | type MockResolver struct{}
 87 | 
 88 | func (resolver *MockResolver) ResolveIP(ip string) k8s.Workload {
 89 | 	return k8s.Workload{
 90 | 		Name:      ip,
 91 | 		Namespace: "Namespace",
 92 | 		Kind:      "Kind",
 93 | 	}
 94 | }
 95 | 
 96 | func (resolver *MockResolver) StartWatching() error {
 97 | 	return nil
 98 | }
 99 | func (resolver *MockResolver) StopWatching() {}
100 | 
101 | type testConnection struct {
102 | 	connId     caretta.ConnectionIdentifier
103 | 	throughput caretta.ConnectionThroughputStats
104 | }
105 | 
106 | type aggregationTest struct {
107 | 	description        string
108 | 	connections        []testConnection
109 | 	expectedLink       caretta.NetworkLink
110 | 	expectedThroughput uint64
111 | }
112 | 
113 | var clientTuple = caretta.ConnectionTuple{
114 | 	SrcIp:   1,
115 | 	DstIp:   2,
116 | 	SrcPort: 55555,
117 | 	DstPort: 80,
118 | }
119 | var serverTuple = caretta.ConnectionTuple{
120 | 	DstIp:   1,
121 | 	SrcIp:   2,
122 | 	DstPort: 55555,
123 | 	SrcPort: 80,
124 | }
125 | var activeThroughput = caretta.ConnectionThroughputStats{
126 | 	BytesSent:     10,
127 | 	BytesReceived: 2,
128 | 	IsActive:      1,
129 | }
130 | var inactiveThroughput = caretta.ConnectionThroughputStats{
131 | 	BytesSent:     10,
132 | 	BytesReceived: 2,
133 | 	IsActive:      0,
134 | }
135 | var clientLink = caretta.NetworkLink{
136 | 	Client: k8s.Workload{
137 | 		Name:      caretta.IP(1).String(),
138 | 		Namespace: "Namespace",
139 | 		Kind:      "Kind",
140 | 	},
141 | 	Server: k8s.Workload{
142 | 		Name:      caretta.IP(2).String(),
143 | 		Namespace: "Namespace",
144 | 		Kind:      "Kind",
145 | 	},
146 | 	ServerPort: 80,
147 | 	Role:       caretta.ClientConnectionRole,
148 | }
149 | var serverLink = caretta.NetworkLink{
150 | 	Client: k8s.Workload{
151 | 		Name:      caretta.IP(1).String(),
152 | 		Namespace: "Namespace",
153 | 		Kind:      "Kind",
154 | 	},
155 | 	Server: k8s.Workload{
156 | 		Name:      caretta.IP(2).String(),
157 | 		Namespace: "Namespace",
158 | 		Kind:      "Kind",
159 | 	},
160 | 	ServerPort: 80,
161 | 	Role:       caretta.ServerConnectionRole,
162 | }
163 | 
164 | func TestAggregations(t *testing.T) {
165 | 	var aggregationTests = []aggregationTest{
166 | 		{
167 | 			description: "single client connection create correct link",
168 | 			connections: []testConnection{
169 | 				{
170 | 					connId: caretta.ConnectionIdentifier{
171 | 						Id:    1,
172 | 						Pid:   1,
173 | 						Tuple: clientTuple,
174 | 						Role:  caretta.ClientConnectionRole,
175 | 					},
176 | 					throughput: activeThroughput,
177 | 				},
178 | 			},
179 | 			expectedLink:       clientLink,
180 | 			expectedThroughput: activeThroughput.BytesSent,
181 | 		},
182 | 		{
183 | 			description: "single server connection create correct link",
184 | 			connections: []testConnection{
185 | 				{
186 | 					connId: caretta.ConnectionIdentifier{
187 | 						Id:    1,
188 | 						Pid:   1,
189 | 						Tuple: serverTuple,
190 | 						Role:  caretta.ServerConnectionRole,
191 | 					},
192 | 					throughput: activeThroughput,
193 | 				},
194 | 			},
195 | 			expectedLink:       serverLink,
196 | 			expectedThroughput: activeThroughput.BytesSent,
197 | 		},
198 | 		{
199 | 			description: "2 client connections aggregate both to one",
200 | 			connections: []testConnection{
201 | 				{
202 | 					connId: caretta.ConnectionIdentifier{
203 | 						Id:    1,
204 | 						Pid:   1,
205 | 						Tuple: clientTuple,
206 | 						Role:  caretta.ClientConnectionRole,
207 | 					},
208 | 					throughput: activeThroughput,
209 | 				},
210 | 				{
211 | 					connId: caretta.ConnectionIdentifier{
212 | 						Id:    2,
213 | 						Pid:   1,
214 | 						Tuple: clientTuple,
215 | 						Role:  caretta.ClientConnectionRole,
216 | 					},
217 | 					throughput: activeThroughput,
218 | 				},
219 | 			},
220 | 			expectedLink:       clientLink,
221 | 			expectedThroughput: 2 * activeThroughput.BytesSent,
222 | 		},
223 | 		{
224 | 			description: "2 server connections aggregate both to one",
225 | 			connections: []testConnection{
226 | 				{
227 | 					connId: caretta.ConnectionIdentifier{
228 | 						Id:    1,
229 | 						Pid:   1,
230 | 						Tuple: serverTuple,
231 | 						Role:  caretta.ServerConnectionRole,
232 | 					},
233 | 					throughput: activeThroughput,
234 | 				},
235 | 				{
236 | 					connId: caretta.ConnectionIdentifier{
237 | 						Id:    2,
238 | 						Pid:   1,
239 | 						Tuple: serverTuple,
240 | 						Role:  caretta.ServerConnectionRole,
241 | 					},
242 | 					throughput: activeThroughput,
243 | 				},
244 | 			},
245 | 			expectedLink:       serverLink,
246 | 			expectedThroughput: 2 * activeThroughput.BytesSent,
247 | 		},
248 | 		{
249 | 			description: "3 active client connections, 2 inactive aggregate all to one",
250 | 			connections: []testConnection{
251 | 				{
252 | 					connId: caretta.ConnectionIdentifier{
253 | 						Id:    1,
254 | 						Pid:   1,
255 | 						Tuple: clientTuple,
256 | 						Role:  caretta.ClientConnectionRole,
257 | 					},
258 | 					throughput: activeThroughput,
259 | 				},
260 | 				{
261 | 					connId: caretta.ConnectionIdentifier{
262 | 						Id:    2,
263 | 						Pid:   1,
264 | 						Tuple: clientTuple,
265 | 						Role:  caretta.ClientConnectionRole,
266 | 					},
267 | 					throughput: activeThroughput,
268 | 				},
269 | 				{
270 | 					connId: caretta.ConnectionIdentifier{
271 | 						Id:    3,
272 | 						Pid:   1,
273 | 						Tuple: clientTuple,
274 | 						Role:  caretta.ClientConnectionRole,
275 | 					},
276 | 					throughput: activeThroughput,
277 | 				},
278 | 				{
279 | 					connId: caretta.ConnectionIdentifier{
280 | 						Id:    4,
281 | 						Pid:   1,
282 | 						Tuple: clientTuple,
283 | 						Role:  caretta.ClientConnectionRole,
284 | 					},
285 | 					throughput: inactiveThroughput,
286 | 				},
287 | 				{
288 | 					connId: caretta.ConnectionIdentifier{
289 | 						Id:    5,
290 | 						Pid:   1,
291 | 						Tuple: clientTuple,
292 | 						Role:  caretta.ClientConnectionRole,
293 | 					},
294 | 					throughput: inactiveThroughput,
295 | 				},
296 | 			},
297 | 			expectedLink:       clientLink,
298 | 			expectedThroughput: 3*activeThroughput.BytesSent + 2*inactiveThroughput.BytesSent,
299 | 		},
300 | 		{
301 | 			description: "3 active server connections, 2 inactive aggregate all to one",
302 | 			connections: []testConnection{
303 | 				{
304 | 					connId: caretta.ConnectionIdentifier{
305 | 						Id:    1,
306 | 						Pid:   1,
307 | 						Tuple: serverTuple,
308 | 						Role:  caretta.ServerConnectionRole,
309 | 					},
310 | 					throughput: activeThroughput,
311 | 				},
312 | 				{
313 | 					connId: caretta.ConnectionIdentifier{
314 | 						Id:    2,
315 | 						Pid:   1,
316 | 						Tuple: serverTuple,
317 | 						Role:  caretta.ServerConnectionRole,
318 | 					},
319 | 					throughput: activeThroughput,
320 | 				},
321 | 				{
322 | 					connId: caretta.ConnectionIdentifier{
323 | 						Id:    3,
324 | 						Pid:   1,
325 | 						Tuple: serverTuple,
326 | 						Role:  caretta.ServerConnectionRole,
327 | 					},
328 | 					throughput: activeThroughput,
329 | 				},
330 | 				{
331 | 					connId: caretta.ConnectionIdentifier{
332 | 						Id:    4,
333 | 						Pid:   1,
334 | 						Tuple: serverTuple,
335 | 						Role:  caretta.ServerConnectionRole,
336 | 					},
337 | 					throughput: inactiveThroughput,
338 | 				},
339 | 				{
340 | 					connId: caretta.ConnectionIdentifier{
341 | 						Id:    5,
342 | 						Pid:   1,
343 | 						Tuple: serverTuple,
344 | 						Role:  caretta.ServerConnectionRole,
345 | 					},
346 | 					throughput: inactiveThroughput,
347 | 				},
348 | 			},
349 | 			expectedLink:       serverLink,
350 | 			expectedThroughput: 3*activeThroughput.BytesSent + 2*inactiveThroughput.BytesSent,
351 | 		},
352 | 	}
353 | 	for _, test := range aggregationTests {
354 | 		t.Run(test.description, func(t *testing.T) {
355 | 			assert := assert.New(t)
356 | 			m := NewMockConnectionsMap()
357 | 
358 | 			tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
359 | 			pastLinks := make(map[caretta.NetworkLink]uint64)
360 | 			var currentLinks map[caretta.NetworkLink]uint64
361 | 			for _, connection := range test.connections {
362 | 				m.Update(connection.connId, connection.throughput)
363 | 				_, currentLinks, _ = tracer.TracesPollingIteration(pastLinks)
364 | 			}
365 | 			resultThroughput, ok := currentLinks[test.expectedLink]
366 | 			assert.True(ok, "expected link not in result map")
367 | 			assert.Equal(test.expectedThroughput, resultThroughput, "wrong throughput value")
368 | 		})
369 | 
370 | 	}
371 | }
372 | 
373 | func TestDeletion_ActiveConnection_NotDeleted(t *testing.T) {
374 | 	assert := assert.New(t)
375 | 
376 | 	// Arrange mock map, initial connection
377 | 	m := NewMockConnectionsMap()
378 | 
379 | 	conn1 := caretta.ConnectionIdentifier{
380 | 		Id:    1,
381 | 		Pid:   1,
382 | 		Tuple: serverTuple,
383 | 		Role:  caretta.ServerConnectionRole,
384 | 	}
385 | 	throughput1 := activeThroughput
386 | 
387 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
388 | 
389 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
390 | 
391 | 	// Act
392 | 	m.Update(conn1, throughput1)
393 | 	_, currentLinks, _ := tracer.TracesPollingIteration(pastLinks)
394 | 
395 | 	// Assert
396 | 	resultThroughput, ok := currentLinks[serverLink]
397 | 	assert.True(ok, "link not in map, map is %v", currentLinks)
398 | 	assert.Equal(throughput1.BytesSent, resultThroughput)
399 | 
400 | 	var testThroughput caretta.ConnectionThroughputStats
401 | 
402 | 	err := m.Lookup(&conn1, &testThroughput)
403 | 	assert.NoError(err, "connection should stay on the map")
404 | }
405 | 
406 | func TestDeletion_InactiveConnection_AddedToPastLinksAndRemovedFromMap(t *testing.T) {
407 | 	assert := assert.New(t)
408 | 
409 | 	// Arrange mock map, initial connection
410 | 	m := NewMockConnectionsMap()
411 | 
412 | 	conn1 := caretta.ConnectionIdentifier{
413 | 		Id:    1,
414 | 		Pid:   1,
415 | 		Tuple: serverTuple,
416 | 		Role:  caretta.ServerConnectionRole,
417 | 	}
418 | 	throughput1 := activeThroughput
419 | 	m.Update(conn1, throughput1)
420 | 
421 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
422 | 
423 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
424 | 
425 | 	pastLinks, _, _ = tracer.TracesPollingIteration(pastLinks)
426 | 
427 | 	// Act: update the throughput so the connection is inactive, and iterate
428 | 	throughput2 := inactiveThroughput
429 | 	m.Update(conn1, throughput2)
430 | 	pastLinks, currentLinks, _ := tracer.TracesPollingIteration(pastLinks)
431 | 
432 | 	// Assert: check the past connection is both in past links and in current links
433 | 	resultThroughput, ok := currentLinks[serverLink]
434 | 	assert.True(ok, "link not in map, map is %v", currentLinks)
435 | 	assert.Equal(throughput1.BytesSent, resultThroughput)
436 | 	_, ok = pastLinks[serverLink]
437 | 	assert.True(ok, "inactive link not in past links: %v", pastLinks)
438 | 
439 | 	var testThroughput caretta.ConnectionThroughputStats
440 | 	err := m.Lookup(&conn1, &testThroughput)
441 | 	assert.Error(err, fmt.Sprintf("inactive connection not deleted from connections map, val is %d", testThroughput.BytesSent))
442 | }
443 | 
444 | func TestDeletion_InactiveConnection_NewConnectionAfterDeletionUpdatesCorrectly(t *testing.T) {
445 | 	assert := assert.New(t)
446 | 
447 | 	// Arrange mock map, initial connection, inactive connection
448 | 	m := NewMockConnectionsMap()
449 | 
450 | 	conn1 := caretta.ConnectionIdentifier{
451 | 		Id:    1,
452 | 		Pid:   1,
453 | 		Tuple: serverTuple,
454 | 		Role:  caretta.ServerConnectionRole,
455 | 	}
456 | 	throughput1 := activeThroughput
457 | 	m.Update(conn1, throughput1)
458 | 
459 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
460 | 
461 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
462 | 
463 | 	// update the throughput so the connection is inactive
464 | 	throughput2 := inactiveThroughput
465 | 	m.Update(conn1, throughput2)
466 | 	pastLinks, _, _ = tracer.TracesPollingIteration(pastLinks)
467 | 
468 | 	// Act: new connection, same link
469 | 	throughput3 := activeThroughput
470 | 	m.Update(conn1, throughput3)
471 | 	_, currentLinks, _ := tracer.TracesPollingIteration(pastLinks)
472 | 
473 | 	// Assert the new connection is aggregated correctly
474 | 	resultThroughput, ok := currentLinks[serverLink]
475 | 	assert.True(ok, "link not in map, map is %v", currentLinks)
476 | 	assert.Equal(throughput1.BytesSent+throughput3.BytesSent, resultThroughput)
477 | }
478 | 
479 | func TestConnectionState_Open(t *testing.T) {
480 | 	assert := assert.New(t)
481 | 
482 | 	// Arrange mock map, initial connection
483 | 	m := NewMockConnectionsMap()
484 | 
485 | 	conn1 := caretta.ConnectionIdentifier{
486 | 		Id:    1,
487 | 		Pid:   1,
488 | 		Tuple: serverTuple,
489 | 		Role:  caretta.ClientConnectionRole,
490 | 	}
491 | 	throughput1 := activeThroughput
492 | 
493 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
494 | 
495 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
496 | 
497 | 	// Act
498 | 	m.Update(conn1, throughput1)
499 | 	_, _, currentConnections := tracer.TracesPollingIteration(pastLinks)
500 | 
501 | 	// Assert
502 | 	assert.Equal(1, len(currentConnections))
503 | 	// Get the first element of the map
504 | 	for _, tcp := range currentConnections {
505 | 		assert.Equal(uint32(caretta.TcpConnectionOpenState), tcp.State)
506 | 		break
507 | 	}
508 | }
509 | 
510 | func TestConnectionState_Close(t *testing.T) {
511 | 	assert := assert.New(t)
512 | 
513 | 	// Arrange mock map, initial connection
514 | 	m := NewMockConnectionsMap()
515 | 
516 | 	conn1 := caretta.ConnectionIdentifier{
517 | 		Id:    1,
518 | 		Pid:   1,
519 | 		Tuple: serverTuple,
520 | 		Role:  caretta.ServerConnectionRole,
521 | 	}
522 | 	throughput1 := inactiveThroughput
523 | 
524 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
525 | 
526 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
527 | 
528 | 	// Act
529 | 	m.Update(conn1, throughput1)
530 | 	_, _, currentConnections := tracer.TracesPollingIteration(pastLinks)
531 | 
532 | 	// Assert
533 | 	assert.Equal(1, len(currentConnections))
534 | 	for _, tcp := range currentConnections {
535 | 		assert.Equal(uint32(caretta.TcpConnectionClosedState), tcp.State)
536 | 		break
537 | 	}
538 | }
539 | 
540 | func TestConnectionState_Accept(t *testing.T) {
541 | 	assert := assert.New(t)
542 | 
543 | 	// Arrange mock map, initial connection
544 | 	m := NewMockConnectionsMap()
545 | 
546 | 	conn1 := caretta.ConnectionIdentifier{
547 | 		Id:    1,
548 | 		Pid:   1,
549 | 		Tuple: serverTuple,
550 | 		Role:  caretta.ServerConnectionRole,
551 | 	}
552 | 	throughput1 := activeThroughput
553 | 
554 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
555 | 
556 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
557 | 
558 | 	// Act
559 | 	m.Update(conn1, throughput1)
560 | 	_, _, currentConnections := tracer.TracesPollingIteration(pastLinks)
561 | 
562 | 	// Assert
563 | 	assert.Equal(1, len(currentConnections))
564 | 	for _, tcp := range currentConnections {
565 | 		assert.Equal(uint32(caretta.TcpConnectionAcceptState), tcp.State)
566 | 		break
567 | 	}
568 | }
569 | 
570 | func TestConnectionState_UnknownRole(t *testing.T) {
571 | 	assert := assert.New(t)
572 | 
573 | 	// Arrange mock map, initial connection
574 | 	m := NewMockConnectionsMap()
575 | 
576 | 	conn1 := caretta.ConnectionIdentifier{
577 | 		Id:    1,
578 | 		Pid:   1,
579 | 		Tuple: serverTuple,
580 | 		Role:  caretta.UnknownConnectionRole,
581 | 	}
582 | 	throughput1 := activeThroughput
583 | 
584 | 	tracer := caretta.NewTracerWithObjs(&MockResolver{}, m, nil)
585 | 
586 | 	pastLinks := make(map[caretta.NetworkLink]uint64)
587 | 
588 | 	// Act
589 | 	m.Update(conn1, throughput1)
590 | 	_, _, currentConnections := tracer.TracesPollingIteration(pastLinks)
591 | 
592 | 	// Assert
593 | 	assert.Equal(0, len(currentConnections))
594 | }
595 | 


--------------------------------------------------------------------------------
/pkg/caretta/types.go:
--------------------------------------------------------------------------------
 1 | package caretta
 2 | 
 3 | import (
 4 | 	"encoding/binary"
 5 | 	"net"
 6 | 
 7 | 	caretta_k8s "github.com/groundcover-com/caretta/pkg/k8s"
 8 | )
 9 | 
10 | const (
11 | 	UnknownConnectionRole    = iota
12 | 	ClientConnectionRole     = iota
13 | 	ServerConnectionRole     = iota
14 | 	TcpConnectionOpenState   = iota
15 | 	TcpConnectionAcceptState = iota
16 | 	TcpConnectionClosedState = iota
17 | )
18 | 
19 | type IP uint32
20 | 
21 | func (ip IP) String() string {
22 | 	netIp := make(net.IP, 4)
23 | 	binary.LittleEndian.PutUint32(netIp, uint32(ip))
24 | 	return netIp.String()
25 | }
26 | 
27 | // "final" type of link, like an edge on the graph
28 | type NetworkLink struct {
29 | 	Client     caretta_k8s.Workload
30 | 	Server     caretta_k8s.Workload
31 | 	ServerPort uint16
32 | 	Role       uint32
33 | }
34 | 
35 | type TcpConnection struct {
36 | 	Client     caretta_k8s.Workload
37 | 	Server     caretta_k8s.Workload
38 | 	ServerPort uint16
39 | 	Role       uint32
40 | 	State      uint32
41 | }
42 | 
43 | type ConnectionTuple struct {
44 | 	SrcIp   uint32
45 | 	DstIp   uint32
46 | 	SrcPort uint16
47 | 	DstPort uint16
48 | }
49 | 
50 | type ConnectionIdentifier struct {
51 | 	Id    uint32
52 | 	Pid   uint32
53 | 	Tuple ConnectionTuple
54 | 	Role  uint32
55 | }
56 | 
57 | type ConnectionThroughputStats struct {
58 | 	BytesSent     uint64
59 | 	BytesReceived uint64
60 | 	IsActive      uint64
61 | }
62 | 


--------------------------------------------------------------------------------
/pkg/k8s/ipresolver.go:
--------------------------------------------------------------------------------
  1 | package k8s
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"net"
  9 | 	"sync"
 10 | 	"time"
 11 | 
 12 | 	"k8s.io/apimachinery/pkg/watch"
 13 | 	"k8s.io/client-go/kubernetes"
 14 | 
 15 | 	lrucache "github.com/hashicorp/golang-lru/v2"
 16 | 	"github.com/prometheus/client_golang/prometheus"
 17 | 	"github.com/prometheus/client_golang/prometheus/promauto"
 18 | 	appsv1 "k8s.io/api/apps/v1"
 19 | 	batchv1 "k8s.io/api/batch/v1"
 20 | 	"k8s.io/api/batch/v1beta1"
 21 | 	v1 "k8s.io/api/core/v1"
 22 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 23 | )
 24 | 
 25 | const MAX_RESOLVED_DNS = 10000 // arbitrary limit
 26 | var reregisterWatchSleepDuration = 1 * time.Second
 27 | 
 28 | var (
 29 | 	watchEventsCounter = promauto.NewCounterVec(prometheus.CounterOpts{
 30 | 		Name: "caretta_watcher_events_count",
 31 | 	}, []string{"object_type"})
 32 | 	watchResetsCounter = promauto.NewCounterVec(prometheus.CounterOpts{
 33 | 		Name: "caretta_watcher_resets_count",
 34 | 	}, []string{"object_type"})
 35 | )
 36 | 
 37 | type clusterSnapshot struct {
 38 | 	Pods           sync.Map // map[types.UID]v1.Pod
 39 | 	Nodes          sync.Map // map[types.UID]v1.Node
 40 | 	ReplicaSets    sync.Map // map[types.UID]appsv1.ReplicaSet
 41 | 	DaemonSets     sync.Map // map[types.UID]appsv1.DaemonSet
 42 | 	StatefulSets   sync.Map // map[types.UID]appsv1.StatefulSet
 43 | 	Jobs           sync.Map // map[types.UID]batchv1.Job
 44 | 	Services       sync.Map // map[types.UID]v1.Service
 45 | 	Deployments    sync.Map // map[types.UID]appsv1.Deployment
 46 | 	CronJobs       sync.Map // map[types.UID]batchv1.CronJob or batchv1beta.CronJob
 47 | 	PodDescriptors sync.Map // map[types.UID]Workload
 48 | }
 49 | 
 50 | type K8sIPResolver struct {
 51 | 	clientset           kubernetes.Interface
 52 | 	snapshot            clusterSnapshot
 53 | 	ipsMap              sync.Map
 54 | 	stopSignal          chan bool
 55 | 	shouldResolveDns    bool
 56 | 	traverseUpHierarchy bool
 57 | 	dnsResolvedIps      *lrucache.Cache[string, string]
 58 | }
 59 | 
 60 | type Workload struct {
 61 | 	Name      string
 62 | 	Namespace string
 63 | 	Kind      string
 64 | 	Owner     string
 65 | }
 66 | 
 67 | func NewK8sIPResolver(clientset kubernetes.Interface, resolveDns bool, traverseUpHierarchy bool) (*K8sIPResolver, error) {
 68 | 	var dnsCache *lrucache.Cache[string, string]
 69 | 	if resolveDns {
 70 | 		var err error
 71 | 		dnsCache, err = lrucache.New[string, string](MAX_RESOLVED_DNS)
 72 | 		if err != nil {
 73 | 			return nil, err
 74 | 		}
 75 | 	} else {
 76 | 		dnsCache = nil
 77 | 	}
 78 | 	return &K8sIPResolver{
 79 | 		clientset:           clientset,
 80 | 		snapshot:            clusterSnapshot{},
 81 | 		ipsMap:              sync.Map{},
 82 | 		stopSignal:          make(chan bool),
 83 | 		shouldResolveDns:    resolveDns,
 84 | 		dnsResolvedIps:      dnsCache,
 85 | 		traverseUpHierarchy: traverseUpHierarchy,
 86 | 	}, nil
 87 | }
 88 | 
 89 | // resolve the given IP from the resolver's cache
 90 | // if not available, return the IP itself.
 91 | func (resolver *K8sIPResolver) ResolveIP(ip string) Workload {
 92 | 	if val, ok := resolver.ipsMap.Load(ip); ok {
 93 | 		entry, ok := val.(Workload)
 94 | 		if ok {
 95 | 			return entry
 96 | 		}
 97 | 		log.Printf("type confusion in ipsMap")
 98 | 	}
 99 | 	host := ip
100 | 
101 | 	if resolver.shouldResolveDns {
102 | 		val, ok := resolver.dnsResolvedIps.Get(ip)
103 | 		if ok {
104 | 			host = val
105 | 		} else {
106 | 			hosts, err := net.LookupAddr(ip)
107 | 			if err == nil && len(hosts) > 0 {
108 | 				host = hosts[0]
109 | 			}
110 | 			resolver.dnsResolvedIps.Add(ip, host)
111 | 		}
112 | 	}
113 | 	return Workload{
114 | 		Name:      host,
115 | 		Namespace: "external",
116 | 		Kind:      "external",
117 | 	}
118 | }
119 | 
120 | func (resolver *K8sIPResolver) StartWatching() error {
121 | 	// register watchers
122 | 	podsWatcher, err := resolver.clientset.CoreV1().Pods("").Watch(context.Background(), metav1.ListOptions{})
123 | 	if err != nil {
124 | 		return fmt.Errorf("error watching pods changes - %v", err)
125 | 	}
126 | 
127 | 	nodesWatcher, err := resolver.clientset.CoreV1().Nodes().Watch(context.Background(), metav1.ListOptions{})
128 | 	if err != nil {
129 | 		return fmt.Errorf("error watching nodes changes - %v", err)
130 | 	}
131 | 
132 | 	replicasetsWatcher, err := resolver.clientset.AppsV1().ReplicaSets("").Watch(context.Background(), metav1.ListOptions{})
133 | 	if err != nil {
134 | 		return fmt.Errorf("error watching replicasets changes - %v", err)
135 | 	}
136 | 
137 | 	daemonsetsWatcher, err := resolver.clientset.AppsV1().DaemonSets("").Watch(context.Background(), metav1.ListOptions{})
138 | 	if err != nil {
139 | 		return fmt.Errorf("error watching daemonsets changes - %v", err)
140 | 	}
141 | 
142 | 	statefulsetsWatcher, err := resolver.clientset.AppsV1().StatefulSets("").Watch(context.Background(), metav1.ListOptions{})
143 | 	if err != nil {
144 | 		return fmt.Errorf("error watching statefulsets changes - %v", err)
145 | 	}
146 | 
147 | 	jobsWatcher, err := resolver.clientset.BatchV1().Jobs("").Watch(context.Background(), metav1.ListOptions{})
148 | 	if err != nil {
149 | 		return fmt.Errorf("error watching jobs changes - %v", err)
150 | 	}
151 | 
152 | 	servicesWatcher, err := resolver.clientset.CoreV1().Services("").Watch(context.Background(), metav1.ListOptions{})
153 | 	if err != nil {
154 | 		return fmt.Errorf("error watching services changes - %v", err)
155 | 	}
156 | 
157 | 	deploymentsWatcher, err := resolver.clientset.AppsV1().Deployments("").Watch(context.Background(), metav1.ListOptions{})
158 | 	if err != nil {
159 | 		return fmt.Errorf("error watching deployments changes - %v", err)
160 | 	}
161 | 
162 | 	cronJobsWatcher, err := resolver.startCronjobWatcher()
163 | 	if err != nil {
164 | 		return fmt.Errorf("error watching cronjobs changes - %v", err)
165 | 	}
166 | 
167 | 	// invoke a watching function
168 | 	go func() {
169 | 		for {
170 | 			select {
171 | 			case <-resolver.stopSignal:
172 | 				podsWatcher.Stop()
173 | 				nodesWatcher.Stop()
174 | 				replicasetsWatcher.Stop()
175 | 				daemonsetsWatcher.Stop()
176 | 				statefulsetsWatcher.Stop()
177 | 				jobsWatcher.Stop()
178 | 				servicesWatcher.Stop()
179 | 				deploymentsWatcher.Stop()
180 | 				cronJobsWatcher.Stop()
181 | 				return
182 | 			case podEvent, ok := <-podsWatcher.ResultChan():
183 | 				{
184 | 					if !ok {
185 | 						watchResetsCounter.WithLabelValues("pod").Inc()
186 | 						podsWatcher, err = resolver.clientset.CoreV1().Pods("").Watch(context.Background(), metav1.ListOptions{})
187 | 						if err != nil {
188 | 							time.Sleep(reregisterWatchSleepDuration)
189 | 						}
190 | 						continue
191 | 					}
192 | 					watchEventsCounter.WithLabelValues("pod").Inc()
193 | 					resolver.handlePodWatchEvent(&podEvent)
194 | 				}
195 | 			case nodeEvent, ok := <-nodesWatcher.ResultChan():
196 | 				{
197 | 					if !ok {
198 | 						watchResetsCounter.WithLabelValues("node").Inc()
199 | 						nodesWatcher, err = resolver.clientset.CoreV1().Nodes().Watch(context.Background(), metav1.ListOptions{})
200 | 						if err != nil {
201 | 							time.Sleep(reregisterWatchSleepDuration)
202 | 						}
203 | 						continue
204 | 					}
205 | 					watchEventsCounter.WithLabelValues("node").Inc()
206 | 					resolver.handleNodeWatchEvent(&nodeEvent)
207 | 				}
208 | 			case replicasetsEvent, ok := <-replicasetsWatcher.ResultChan():
209 | 				{
210 | 					if !ok {
211 | 						watchResetsCounter.WithLabelValues("replicaset").Inc()
212 | 						replicasetsWatcher, err = resolver.clientset.AppsV1().ReplicaSets("").Watch(context.Background(), metav1.ListOptions{})
213 | 						if err != nil {
214 | 							time.Sleep(reregisterWatchSleepDuration)
215 | 						}
216 | 						continue
217 | 					}
218 | 					watchEventsCounter.WithLabelValues("replicaset").Inc()
219 | 					resolver.handleReplicaSetWatchEvent(&replicasetsEvent)
220 | 				}
221 | 			case daemonsetsEvent, ok := <-daemonsetsWatcher.ResultChan():
222 | 				{
223 | 					if !ok {
224 | 						watchResetsCounter.WithLabelValues("daemonset").Inc()
225 | 						daemonsetsWatcher, err = resolver.clientset.AppsV1().DaemonSets("").Watch(context.Background(), metav1.ListOptions{})
226 | 						if err != nil {
227 | 							time.Sleep(reregisterWatchSleepDuration)
228 | 						}
229 | 						continue
230 | 					}
231 | 					watchEventsCounter.WithLabelValues("daemonset").Inc()
232 | 					resolver.handleDaemonSetWatchEvent(&daemonsetsEvent)
233 | 				}
234 | 			case statefulsetsEvent, ok := <-statefulsetsWatcher.ResultChan():
235 | 				{
236 | 					if !ok {
237 | 						watchResetsCounter.WithLabelValues("statefulset").Inc()
238 | 						statefulsetsWatcher, err = resolver.clientset.AppsV1().StatefulSets("").Watch(context.Background(), metav1.ListOptions{})
239 | 						if err != nil {
240 | 							time.Sleep(reregisterWatchSleepDuration)
241 | 						}
242 | 						continue
243 | 					}
244 | 					watchEventsCounter.WithLabelValues("statefulset").Inc()
245 | 					resolver.handleStatefulSetWatchEvent(&statefulsetsEvent)
246 | 				}
247 | 			case jobsEvent, ok := <-jobsWatcher.ResultChan():
248 | 				{
249 | 					if !ok {
250 | 						watchResetsCounter.WithLabelValues("job").Inc()
251 | 						jobsWatcher, err = resolver.clientset.BatchV1().Jobs("").Watch(context.Background(), metav1.ListOptions{})
252 | 						if err != nil {
253 | 							time.Sleep(reregisterWatchSleepDuration)
254 | 						}
255 | 						continue
256 | 					}
257 | 					watchEventsCounter.WithLabelValues("job").Inc()
258 | 					resolver.handleJobsWatchEvent(&jobsEvent)
259 | 				}
260 | 			case servicesEvent, ok := <-servicesWatcher.ResultChan():
261 | 				{
262 | 					if !ok {
263 | 						watchResetsCounter.WithLabelValues("service").Inc()
264 | 						servicesWatcher, err = resolver.clientset.CoreV1().Services("").Watch(context.Background(), metav1.ListOptions{})
265 | 						if err != nil {
266 | 							time.Sleep(reregisterWatchSleepDuration)
267 | 						}
268 | 						continue
269 | 					}
270 | 					watchEventsCounter.WithLabelValues("service").Inc()
271 | 					resolver.handleServicesWatchEvent(&servicesEvent)
272 | 				}
273 | 			case deploymentsEvent, ok := <-deploymentsWatcher.ResultChan():
274 | 				{
275 | 					if !ok {
276 | 						watchResetsCounter.WithLabelValues("deployment").Inc()
277 | 						deploymentsWatcher, err = resolver.clientset.AppsV1().Deployments("").Watch(context.Background(), metav1.ListOptions{})
278 | 						if err != nil {
279 | 							time.Sleep(reregisterWatchSleepDuration)
280 | 						}
281 | 						continue
282 | 					}
283 | 					watchEventsCounter.WithLabelValues("deployment").Inc()
284 | 					resolver.handleDeploymentsWatchEvent(&deploymentsEvent)
285 | 				}
286 | 			case cronjobsEvent, ok := <-cronJobsWatcher.ResultChan():
287 | 				{
288 | 					if !ok {
289 | 						watchResetsCounter.WithLabelValues("cronjob").Inc()
290 | 						cronJobsWatcher, err = resolver.startCronjobWatcher()
291 | 						if err != nil {
292 | 							time.Sleep(reregisterWatchSleepDuration)
293 | 						}
294 | 						continue
295 | 					}
296 | 					watchEventsCounter.WithLabelValues("cronjob").Inc()
297 | 					resolver.handleCronJobsWatchEvent(&cronjobsEvent)
298 | 				}
299 | 			}
300 | 		}
301 | 	}()
302 | 
303 | 	// get initial state
304 | 	err = resolver.getResolvedClusterSnapshot()
305 | 	if err != nil {
306 | 		resolver.StopWatching()
307 | 		return fmt.Errorf("error retrieving cluster's initial state: %v", err)
308 | 	}
309 | 
310 | 	return nil
311 | }
312 | 
313 | func (resolver *K8sIPResolver) startCronjobWatcher() (watch.Interface, error) {
314 | 	cronJobsWatcher, err := resolver.clientset.BatchV1().CronJobs("").Watch(context.Background(), metav1.ListOptions{})
315 | 	if err != nil {
316 | 		return resolver.clientset.BatchV1beta1().CronJobs("").Watch(context.Background(), metav1.ListOptions{})
317 | 	}
318 | 
319 | 	return cronJobsWatcher, nil
320 | }
321 | 
322 | func (resolver *K8sIPResolver) StopWatching() {
323 | 	resolver.stopSignal <- true
324 | }
325 | 
326 | func (resolver *K8sIPResolver) handlePodWatchEvent(podEvent *watch.Event) {
327 | 	switch podEvent.Type {
328 | 	case watch.Added:
329 | 		pod, ok := podEvent.Object.(*v1.Pod)
330 | 		if !ok {
331 | 			return
332 | 		}
333 | 		resolver.snapshot.Pods.Store(pod.UID, *pod)
334 | 		entry := resolver.resolvePodDescriptor(pod)
335 | 		for _, podIp := range pod.Status.PodIPs {
336 | 			resolver.storeWorkloadsIP(podIp.IP, &entry)
337 | 		}
338 | 	case watch.Modified:
339 | 		pod, ok := podEvent.Object.(*v1.Pod)
340 | 		if !ok {
341 | 			return
342 | 		}
343 | 		resolver.snapshot.Pods.Store(pod.UID, *pod)
344 | 		entry := resolver.resolvePodDescriptor(pod)
345 | 		for _, podIp := range pod.Status.PodIPs {
346 | 			resolver.storeWorkloadsIP(podIp.IP, &entry)
347 | 		}
348 | 	case watch.Deleted:
349 | 		if val, ok := podEvent.Object.(*v1.Pod); ok {
350 | 			resolver.snapshot.Pods.Delete(val.UID)
351 | 			resolver.snapshot.PodDescriptors.Delete(val.UID)
352 | 		}
353 | 	}
354 | }
355 | 
356 | func (resolver *K8sIPResolver) handleNodeWatchEvent(nodeEvent *watch.Event) {
357 | 	switch nodeEvent.Type {
358 | 	case watch.Added, watch.Modified:
359 | 		node, ok := nodeEvent.Object.(*v1.Node)
360 | 		if !ok {
361 | 			return
362 | 		}
363 | 		resolver.snapshot.Nodes.Store(node.UID, *node)
364 | 		for _, nodeAddress := range node.Status.Addresses {
365 | 			resolver.storeWorkloadsIP(nodeAddress.Address, &Workload{
366 | 				Name:      node.Name,
367 | 				Namespace: "node",
368 | 				Kind:      "node",
369 | 			})
370 | 		}
371 | 	case watch.Deleted:
372 | 		if val, ok := nodeEvent.Object.(*v1.Node); ok {
373 | 			resolver.snapshot.Nodes.Delete(val.UID)
374 | 		}
375 | 	}
376 | }
377 | 
378 | func (resolver *K8sIPResolver) handleReplicaSetWatchEvent(replicasetsEvent *watch.Event) {
379 | 	switch replicasetsEvent.Type {
380 | 	case watch.Added:
381 | 		if val, ok := replicasetsEvent.Object.(*appsv1.ReplicaSet); ok {
382 | 			resolver.snapshot.ReplicaSets.Store(val.UID, *val)
383 | 		}
384 | 	case watch.Deleted:
385 | 		if val, ok := replicasetsEvent.Object.(*appsv1.ReplicaSet); ok {
386 | 			resolver.snapshot.ReplicaSets.Delete(val.UID)
387 | 		}
388 | 	}
389 | }
390 | 
391 | func (resolver *K8sIPResolver) handleDaemonSetWatchEvent(daemonsetsEvent *watch.Event) {
392 | 	switch daemonsetsEvent.Type {
393 | 	case watch.Added:
394 | 		if val, ok := daemonsetsEvent.Object.(*appsv1.DaemonSet); ok {
395 | 			resolver.snapshot.DaemonSets.Store(val.UID, *val)
396 | 		}
397 | 	case watch.Deleted:
398 | 		if val, ok := daemonsetsEvent.Object.(*appsv1.DaemonSet); ok {
399 | 			resolver.snapshot.DaemonSets.Delete(val.UID)
400 | 		}
401 | 	}
402 | }
403 | 
404 | func (resolver *K8sIPResolver) handleStatefulSetWatchEvent(statefulsetsEvent *watch.Event) {
405 | 	switch statefulsetsEvent.Type {
406 | 	case watch.Added:
407 | 		if val, ok := statefulsetsEvent.Object.(*appsv1.StatefulSet); ok {
408 | 			resolver.snapshot.StatefulSets.Store(val.UID, *val)
409 | 		}
410 | 	case watch.Deleted:
411 | 		if val, ok := statefulsetsEvent.Object.(*appsv1.StatefulSet); ok {
412 | 			resolver.snapshot.StatefulSets.Delete(val.UID)
413 | 		}
414 | 	}
415 | }
416 | 
417 | func (resolver *K8sIPResolver) handleJobsWatchEvent(jobsEvent *watch.Event) {
418 | 	switch jobsEvent.Type {
419 | 	case watch.Added:
420 | 		if val, ok := jobsEvent.Object.(*batchv1.Job); ok {
421 | 			resolver.snapshot.Jobs.Store(val.UID, *val)
422 | 		}
423 | 	case watch.Deleted:
424 | 		if val, ok := jobsEvent.Object.(*batchv1.Job); ok {
425 | 			resolver.snapshot.Jobs.Delete(val.UID)
426 | 		}
427 | 	}
428 | }
429 | 
430 | func (resolver *K8sIPResolver) handleServicesWatchEvent(servicesEvent *watch.Event) {
431 | 	switch servicesEvent.Type {
432 | 	case watch.Added, watch.Modified:
433 | 		service, ok := servicesEvent.Object.(*v1.Service)
434 | 		if !ok {
435 | 			return
436 | 		}
437 | 		resolver.snapshot.Services.Store(service.UID, *service)
438 | 
439 | 		// services has (potentially multiple) ClusterIP
440 | 		workload := Workload{
441 | 			Name:      service.Name,
442 | 			Namespace: service.Namespace,
443 | 			Kind:      "Service",
444 | 		}
445 | 
446 | 		// TODO maybe try to match service to workload
447 | 		for _, clusterIp := range service.Spec.ClusterIPs {
448 | 			if clusterIp != "None" {
449 | 				_, ok := resolver.ipsMap.Load(clusterIp)
450 | 				if !ok {
451 | 					resolver.storeWorkloadsIP(clusterIp, &workload)
452 | 				}
453 | 			}
454 | 		}
455 | 	case watch.Deleted:
456 | 		if val, ok := servicesEvent.Object.(*v1.Service); ok {
457 | 			resolver.snapshot.Services.Delete(val.UID)
458 | 		}
459 | 	}
460 | }
461 | 
462 | func (resolver *K8sIPResolver) handleDeploymentsWatchEvent(deploymentsEvent *watch.Event) {
463 | 	switch deploymentsEvent.Type {
464 | 	case watch.Added:
465 | 		if val, ok := deploymentsEvent.Object.(*appsv1.Deployment); ok {
466 | 			resolver.snapshot.Deployments.Store(val.UID, *val)
467 | 		}
468 | 	case watch.Deleted:
469 | 		if val, ok := deploymentsEvent.Object.(*appsv1.Deployment); ok {
470 | 			resolver.snapshot.Deployments.Delete(val.UID)
471 | 		}
472 | 	}
473 | }
474 | 
475 | func (resolver *K8sIPResolver) handleCronJobsWatchEvent(cronjobsEvent *watch.Event) {
476 | 	switch cronjobsEvent.Type {
477 | 	case watch.Added:
478 | 		if val, ok := cronjobsEvent.Object.(*batchv1.CronJob); ok {
479 | 			resolver.snapshot.CronJobs.Store(val.UID, *val)
480 | 		}
481 | 		if val, ok := cronjobsEvent.Object.(*v1beta1.CronJob); ok {
482 | 			resolver.snapshot.CronJobs.Store(val.UID, *val)
483 | 		}
484 | 
485 | 	case watch.Deleted:
486 | 		if val, ok := cronjobsEvent.Object.(*batchv1.CronJob); ok {
487 | 			resolver.snapshot.CronJobs.Delete(val.UID)
488 | 		}
489 | 		if val, ok := cronjobsEvent.Object.(*v1beta1.CronJob); ok {
490 | 			resolver.snapshot.CronJobs.Delete(val.UID)
491 | 		}
492 | 	}
493 | }
494 | 
495 | func (resolver *K8sIPResolver) getResolvedClusterSnapshot() error {
496 | 	err := resolver.getFullClusterSnapshot()
497 | 	if err != nil {
498 | 		return err
499 | 	}
500 | 	resolver.updateIpMapping()
501 | 	return nil
502 | }
503 | 
504 | // iterate the API for initial coverage of the cluster's state
505 | func (resolver *K8sIPResolver) getFullClusterSnapshot() error {
506 | 	pods, err := resolver.clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
507 | 	if err != nil {
508 | 		return errors.New("error getting pods, aborting snapshot update")
509 | 	}
510 | 	for _, pod := range pods.Items {
511 | 		resolver.snapshot.Pods.Store(pod.UID, pod)
512 | 	}
513 | 
514 | 	nodes, err := resolver.clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
515 | 	if err != nil {
516 | 		return errors.New("error getting nodes, aborting snapshot update")
517 | 	}
518 | 	for _, node := range nodes.Items {
519 | 		resolver.snapshot.Nodes.Store(node.UID, node)
520 | 	}
521 | 
522 | 	replicasets, err := resolver.clientset.AppsV1().ReplicaSets("").List(context.Background(), metav1.ListOptions{})
523 | 	if err != nil {
524 | 		return errors.New("error getting replicasets, aborting snapshot update")
525 | 	}
526 | 	for _, rs := range replicasets.Items {
527 | 		resolver.snapshot.ReplicaSets.Store(rs.ObjectMeta.UID, rs)
528 | 	}
529 | 
530 | 	daemonsets, err := resolver.clientset.AppsV1().DaemonSets("").List(context.Background(), metav1.ListOptions{})
531 | 	if err != nil {
532 | 		return errors.New("error getting daemonsets, aborting snapshot update")
533 | 	}
534 | 	for _, ds := range daemonsets.Items {
535 | 		resolver.snapshot.DaemonSets.Store(ds.ObjectMeta.UID, ds)
536 | 	}
537 | 
538 | 	statefulsets, err := resolver.clientset.AppsV1().StatefulSets("").List(context.Background(), metav1.ListOptions{})
539 | 	if err != nil {
540 | 		return errors.New("error getting statefulsets, aborting snapshot update")
541 | 	}
542 | 	for _, ss := range statefulsets.Items {
543 | 		resolver.snapshot.StatefulSets.Store(ss.ObjectMeta.UID, ss)
544 | 	}
545 | 
546 | 	jobs, err := resolver.clientset.BatchV1().Jobs("").List(context.Background(), metav1.ListOptions{})
547 | 	if err != nil {
548 | 		return errors.New("error getting jobs, aborting snapshot update")
549 | 	}
550 | 	for _, job := range jobs.Items {
551 | 		resolver.snapshot.Jobs.Store(job.ObjectMeta.UID, job)
552 | 	}
553 | 
554 | 	services, err := resolver.clientset.CoreV1().Services("").List(context.Background(), metav1.ListOptions{})
555 | 	if err != nil {
556 | 		return errors.New("error getting services, aborting snapshot update")
557 | 	}
558 | 	for _, service := range services.Items {
559 | 		resolver.snapshot.Services.Store(service.UID, service)
560 | 	}
561 | 
562 | 	deployments, err := resolver.clientset.AppsV1().Deployments("").List(context.Background(), metav1.ListOptions{})
563 | 	if err != nil {
564 | 		return errors.New("error getting deployments, aborting snapshot update")
565 | 	}
566 | 	for _, deployment := range deployments.Items {
567 | 		resolver.snapshot.Deployments.Store(deployment.UID, deployment)
568 | 	}
569 | 
570 | 	cronJobs, err := resolver.clientset.BatchV1().CronJobs("").List(context.Background(), metav1.ListOptions{})
571 | 	if err != nil {
572 | 		cronJobs, err := resolver.clientset.BatchV1beta1().CronJobs("").List(context.Background(), metav1.ListOptions{})
573 | 		if err != nil {
574 | 			return errors.New("error getting cronjobs, aborting snapshot update")
575 | 		}
576 | 		for _, cronJob := range cronJobs.Items {
577 | 			resolver.snapshot.CronJobs.Store(cronJob.UID, cronJob)
578 | 		}
579 | 	}
580 | 	for _, cronJob := range cronJobs.Items {
581 | 		resolver.snapshot.CronJobs.Store(cronJob.UID, cronJob)
582 | 	}
583 | 
584 | 	return nil
585 | }
586 | 
587 | // add mapping from ip to resolved host to an existing map,
588 | // based on the given cluster snapshot
589 | func (resolver *K8sIPResolver) updateIpMapping() {
590 | 	// because IP collisions may occur and lead to overwrites in the map, the order is important
591 | 	// we go from less "favorable" to more "favorable" -
592 | 	// services -> running pods -> nodes
593 | 
594 | 	resolver.snapshot.Services.Range(func(key any, val any) bool {
595 | 		service, ok := val.(v1.Service)
596 | 		if !ok {
597 | 			log.Printf("Type confusion in services map")
598 | 			return true // continue
599 | 		}
600 | 		// services has (potentially multiple) ClusterIP
601 | 		workload := Workload{
602 | 			Name:      service.Name,
603 | 			Namespace: service.Namespace,
604 | 			Kind:      "Service",
605 | 		}
606 | 
607 | 		// TODO maybe try to match service to workload
608 | 		for _, clusterIp := range service.Spec.ClusterIPs {
609 | 			if clusterIp != "None" {
610 | 				resolver.storeWorkloadsIP(clusterIp, &workload)
611 | 			}
612 | 		}
613 | 		return true
614 | 	})
615 | 
616 | 	resolver.snapshot.Pods.Range(func(key, value any) bool {
617 | 		pod, ok := value.(v1.Pod)
618 | 		if !ok {
619 | 			log.Printf("Type confusion in pods map")
620 | 			return true // continue
621 | 		}
622 | 		entry := resolver.resolvePodDescriptor(&pod)
623 | 		for _, podIp := range pod.Status.PodIPs {
624 | 			// if ip is already in the map, override only if current pod is running
625 | 			resolver.storeWorkloadsIP(podIp.IP, &entry)
626 | 		}
627 | 		return true
628 | 	})
629 | 
630 | 	resolver.snapshot.Nodes.Range(func(key any, value any) bool {
631 | 		node, ok := value.(v1.Node)
632 | 		if !ok {
633 | 			log.Printf("Type confusion in nodes map")
634 | 			return true // continue
635 | 		}
636 | 		for _, nodeAddress := range node.Status.Addresses {
637 | 			workload := Workload{
638 | 				Name:      node.Name,
639 | 				Namespace: "node",
640 | 				Kind:      "node",
641 | 			}
642 | 			resolver.storeWorkloadsIP(nodeAddress.Address, &workload)
643 | 		}
644 | 		return true
645 | 	})
646 | }
647 | 
648 | func (resolver *K8sIPResolver) storeWorkloadsIP(ip string, newWorkload *Workload) {
649 | 	// we want to override existing workload, unless the existing workload is a node and the new one isn't
650 | 	val, ok := resolver.ipsMap.Load(ip)
651 | 	if ok {
652 | 		existingWorkload, ok := val.(Workload)
653 | 		if ok {
654 | 			if existingWorkload.Kind == "node" && newWorkload.Kind != "node" {
655 | 				return
656 | 			}
657 | 		}
658 | 	}
659 | 	resolver.ipsMap.Store(ip, *newWorkload)
660 | }
661 | 
662 | // an ugly function to go up one level in hierarchy. maybe there's a better way to do it
663 | // the snapshot is maintained to avoid using an API request for each resolving
664 | func (resolver *K8sIPResolver) getControllerOfOwner(originalOwner *metav1.OwnerReference) (*metav1.OwnerReference, error) {
665 | 	switch originalOwner.Kind {
666 | 	case "ReplicaSet":
667 | 		replicaSetVal, ok := resolver.snapshot.ReplicaSets.Load(originalOwner.UID)
668 | 		if !ok {
669 | 			return nil, errors.New("Missing replicaset for UID " + string(originalOwner.UID))
670 | 		}
671 | 		replicaSet, ok := replicaSetVal.(appsv1.ReplicaSet)
672 | 		if !ok {
673 | 			return nil, errors.New("type confusion in replicasets map")
674 | 		}
675 | 		return metav1.GetControllerOf(&replicaSet), nil
676 | 	case "DaemonSet":
677 | 		daemonSetVal, ok := resolver.snapshot.DaemonSets.Load(originalOwner.UID)
678 | 		if !ok {
679 | 			return nil, errors.New("Missing daemonset for UID " + string(originalOwner.UID))
680 | 		}
681 | 		daemonSet, ok := daemonSetVal.(appsv1.DaemonSet)
682 | 		if !ok {
683 | 			return nil, errors.New("type confusion in daemonsets map")
684 | 		}
685 | 		return metav1.GetControllerOf(&daemonSet), nil
686 | 	case "StatefulSet":
687 | 		statefulSetVal, ok := resolver.snapshot.StatefulSets.Load(originalOwner.UID)
688 | 		if !ok {
689 | 			return nil, errors.New("Missing statefulset for UID " + string(originalOwner.UID))
690 | 		}
691 | 		statefulSet, ok := statefulSetVal.(appsv1.StatefulSet)
692 | 		if !ok {
693 | 			return nil, errors.New("type confusion in statefulsets map")
694 | 		}
695 | 		return metav1.GetControllerOf(&statefulSet), nil
696 | 	case "Job":
697 | 		jobVal, ok := resolver.snapshot.Jobs.Load(originalOwner.UID)
698 | 		if !ok {
699 | 			return nil, errors.New("Missing job for UID " + string(originalOwner.UID))
700 | 		}
701 | 		job, ok := jobVal.(batchv1.Job)
702 | 		if !ok {
703 | 			return nil, errors.New("type confusion in jobs map")
704 | 		}
705 | 		return metav1.GetControllerOf(&job), nil
706 | 	case "Deployment":
707 | 		deploymentVal, ok := resolver.snapshot.Deployments.Load(originalOwner.UID)
708 | 		if !ok {
709 | 			return nil, errors.New("Missing deployment for UID " + string(originalOwner.UID))
710 | 		}
711 | 		deployment, ok := deploymentVal.(appsv1.Deployment)
712 | 		if !ok {
713 | 			return nil, errors.New("type confusion in deployments map")
714 | 		}
715 | 		return metav1.GetControllerOf(&deployment), nil
716 | 	case "CronJob":
717 | 		cronJobVal, ok := resolver.snapshot.CronJobs.Load(originalOwner.UID)
718 | 		if !ok {
719 | 			return nil, errors.New("Missing cronjob for UID " + string(originalOwner.UID))
720 | 		}
721 | 		cronJob, ok := cronJobVal.(batchv1.CronJob)
722 | 		if !ok {
723 | 			cronJob, ok := cronJobVal.(v1beta1.CronJob)
724 | 			if !ok {
725 | 				return nil, errors.New("type confusion in cronjobs map")
726 | 			}
727 | 			return metav1.GetControllerOf(&cronJob), nil
728 | 		}
729 | 
730 | 		return metav1.GetControllerOf(&cronJob), nil
731 | 	}
732 | 	return nil, errors.New("Unsupported kind for lookup - " + originalOwner.Kind)
733 | }
734 | 
735 | func (resolver *K8sIPResolver) resolvePodDescriptor(pod *v1.Pod) Workload {
736 | 	existing, ok := resolver.snapshot.PodDescriptors.Load(pod.UID)
737 | 	if ok {
738 | 		result, ok := existing.(Workload)
739 | 		if ok {
740 | 			return result
741 | 		}
742 | 	}
743 | 	var err error
744 | 	name := pod.Name
745 | 	namespace := pod.Namespace
746 | 	kind := "pod"
747 | 	result := Workload{
748 | 		Name:      name,
749 | 		Namespace: namespace,
750 | 		Kind:      kind,
751 | 	}
752 | 
753 | 	if resolver.traverseUpHierarchy {
754 | 		owner := metav1.GetControllerOf(pod)
755 | 		// climbing up the owners' hierarchy. if an error occurs, we take the data we got and save
756 | 		// the error to know we shouldn't save this resolution to the descriptors map and retry later.
757 | 		for owner != nil {
758 | 			name = owner.Name
759 | 			kind = owner.Kind
760 | 			owner, err = resolver.getControllerOfOwner(owner)
761 | 			if err != nil {
762 | 				log.Printf("Warning: couldn't retrieve owner of %v - %v. This might happen when starting up", name, err)
763 | 			}
764 | 		}
765 | 
766 | 		result.Name = name
767 | 		result.Kind = kind
768 | 	} else {
769 | 		owner := metav1.GetControllerOf(pod)
770 | 		if owner != nil {
771 | 			result.Owner = owner.Name
772 | 		}
773 | 	}
774 | 
775 | 	if err == nil {
776 | 		resolver.snapshot.PodDescriptors.Store(pod.UID, result)
777 | 	}
778 | 	return result
779 | }
780 | 


--------------------------------------------------------------------------------
/pkg/k8s/ipresolver_test.go:
--------------------------------------------------------------------------------
   1 | package k8s_test
   2 | 
   3 | import (
   4 | 	"log"
   5 | 	"testing"
   6 | 	"time"
   7 | 
   8 | 	"github.com/groundcover-com/caretta/pkg/k8s"
   9 | 
  10 | 	"github.com/google/uuid"
  11 | 	"github.com/stretchr/testify/assert"
  12 | 	appsv1 "k8s.io/api/apps/v1"
  13 | 	batchv1 "k8s.io/api/batch/v1"
  14 | 	v1 "k8s.io/api/core/v1"
  15 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  16 | 	"k8s.io/apimachinery/pkg/runtime"
  17 | 	"k8s.io/apimachinery/pkg/types"
  18 | 	"k8s.io/apimachinery/pkg/watch"
  19 | 	testclient "k8s.io/client-go/kubernetes/fake"
  20 | 	k8stesting "k8s.io/client-go/testing"
  21 | )
  22 | 
  23 | type podDescriptor struct {
  24 | 	Name       string
  25 | 	Namespace  string
  26 | 	IP         string
  27 | 	Phase      v1.PodPhase
  28 | 	UID        types.UID
  29 | 	Controller *workloadResourceDescriptor
  30 | }
  31 | 
  32 | type nodeDescriptor struct {
  33 | 	Name string
  34 | 	IP   string
  35 | 	UID  types.UID
  36 | }
  37 | 
  38 | type workloadResourceDescriptor struct {
  39 | 	Name      string
  40 | 	Namespace string
  41 | 	UID       types.UID
  42 | 	Kind      string
  43 | }
  44 | 
  45 | func (desc *workloadResourceDescriptor) CreateObject() runtime.Object {
  46 | 	switch desc.Kind {
  47 | 	case "Deployment":
  48 | 		{
  49 | 			return &appsv1.Deployment{
  50 | 				ObjectMeta: metav1.ObjectMeta{
  51 | 					Name:      desc.Name,
  52 | 					Namespace: desc.Namespace,
  53 | 					UID:       desc.UID,
  54 | 				},
  55 | 			}
  56 | 		}
  57 | 	case "ReplicaSet":
  58 | 		{
  59 | 			return &appsv1.ReplicaSet{
  60 | 				ObjectMeta: metav1.ObjectMeta{
  61 | 					Name:      desc.Name,
  62 | 					Namespace: desc.Namespace,
  63 | 					UID:       desc.UID,
  64 | 				},
  65 | 			}
  66 | 		}
  67 | 	case "DaemonSet":
  68 | 		{
  69 | 			return &appsv1.DaemonSet{
  70 | 				ObjectMeta: metav1.ObjectMeta{
  71 | 					Name:      desc.Name,
  72 | 					Namespace: desc.Namespace,
  73 | 					UID:       desc.UID,
  74 | 				},
  75 | 			}
  76 | 		}
  77 | 	case "StatefulSet":
  78 | 		{
  79 | 			return &appsv1.StatefulSet{
  80 | 				ObjectMeta: metav1.ObjectMeta{
  81 | 					Name:      desc.Name,
  82 | 					Namespace: desc.Namespace,
  83 | 					UID:       desc.UID,
  84 | 				},
  85 | 			}
  86 | 		}
  87 | 	case "Job":
  88 | 		{
  89 | 			return &batchv1.Job{
  90 | 				ObjectMeta: metav1.ObjectMeta{
  91 | 					Name:      desc.Name,
  92 | 					Namespace: desc.Namespace,
  93 | 					UID:       desc.UID,
  94 | 				},
  95 | 			}
  96 | 		}
  97 | 	case "Service":
  98 | 		{
  99 | 			return &v1.Service{
 100 | 				ObjectMeta: metav1.ObjectMeta{
 101 | 					Name:      desc.Name,
 102 | 					Namespace: desc.Namespace,
 103 | 					UID:       desc.UID,
 104 | 				},
 105 | 			}
 106 | 		}
 107 | 	case "CronJob":
 108 | 		{
 109 | 			return &batchv1.CronJob{
 110 | 				ObjectMeta: metav1.ObjectMeta{
 111 | 					Name:      desc.Name,
 112 | 					Namespace: desc.Namespace,
 113 | 					UID:       desc.UID,
 114 | 				},
 115 | 			}
 116 | 		}
 117 | 	}
 118 | 	return nil
 119 | }
 120 | 
 121 | func generatePod(pod podDescriptor) runtime.Object {
 122 | 	newPod := v1.Pod{
 123 | 		ObjectMeta: metav1.ObjectMeta{
 124 | 			Name:      pod.Name,
 125 | 			Namespace: pod.Namespace,
 126 | 			UID:       pod.UID,
 127 | 		},
 128 | 		Status: v1.PodStatus{
 129 | 			PodIP: pod.IP,
 130 | 			PodIPs: []v1.PodIP{
 131 | 				{IP: pod.IP},
 132 | 			},
 133 | 		},
 134 | 	}
 135 | 	if pod.Controller != nil {
 136 | 		newTrue := new(bool)
 137 | 		*newTrue = true
 138 | 		ref := metav1.OwnerReference{
 139 | 			Kind:       pod.Controller.Kind,
 140 | 			Name:       pod.Controller.Name,
 141 | 			UID:        pod.Controller.UID,
 142 | 			Controller: newTrue,
 143 | 		}
 144 | 		newPod.OwnerReferences = append(newPod.OwnerReferences, ref)
 145 | 	}
 146 | 	return &newPod
 147 | 
 148 | }
 149 | 
 150 | func generateWorkloadResource(desc workloadResourceDescriptor) runtime.Object {
 151 | 	return desc.CreateObject()
 152 | }
 153 | 
 154 | func generateNode(node nodeDescriptor) runtime.Object {
 155 | 	return &v1.Node{
 156 | 		ObjectMeta: metav1.ObjectMeta{
 157 | 			Name: node.Name,
 158 | 			UID:  node.UID,
 159 | 		},
 160 | 		Status: v1.NodeStatus{
 161 | 			Addresses: []v1.NodeAddress{
 162 | 				{
 163 | 					Type:    "InternalIP",
 164 | 					Address: node.IP,
 165 | 				},
 166 | 			},
 167 | 		},
 168 | 	}
 169 | }
 170 | 
 171 | func generateClusterObjects(pods []podDescriptor, workloadsResources []workloadResourceDescriptor, nodes []nodeDescriptor) []runtime.Object {
 172 | 	result := make([]runtime.Object, 0, len(pods)+len(workloadsResources)+len(nodes))
 173 | 	for _, pod := range pods {
 174 | 		newPod := generatePod(pod)
 175 | 		result = append(result, newPod)
 176 | 	}
 177 | 	for _, desc := range workloadsResources {
 178 | 		result = append(result, generateWorkloadResource(desc))
 179 | 	}
 180 | 	for _, node := range nodes {
 181 | 		result = append(result, generateNode(node))
 182 | 	}
 183 | 	return result
 184 | }
 185 | 
 186 | type testStep struct {
 187 | 	shouldWait                bool
 188 | 	newPods                   []podDescriptor
 189 | 	newNodes                  []nodeDescriptor
 190 | 	newWorkloadResource       []workloadResourceDescriptor
 191 | 	modifiedPods              []podDescriptor
 192 | 	modifiedNodes             []nodeDescriptor
 193 | 	modifiedWorkloadResources []workloadResourceDescriptor
 194 | 	expectedResolves          map[string]k8s.Workload
 195 | }
 196 | 
 197 | type testScenario struct {
 198 | 	description    string
 199 | 	initialState   testStep
 200 | 	shouldTraverse bool
 201 | 	updateSteps    []testStep
 202 | }
 203 | 
 204 | type fakeWatchers struct {
 205 | 	nodesWatcher        *watch.FakeWatcher
 206 | 	podsWatcher         *watch.FakeWatcher
 207 | 	deploymentsWatcher  *watch.FakeWatcher
 208 | 	replicasetsWatcher  *watch.FakeWatcher
 209 | 	daemonsetsWatcher   *watch.FakeWatcher
 210 | 	statefulsetsWatcher *watch.FakeWatcher
 211 | 	jobsWatcher         *watch.FakeWatcher
 212 | 	servicesWatcher     *watch.FakeWatcher
 213 | 	cronjobsWatcher     *watch.FakeWatcher
 214 | }
 215 | 
 216 | func createPrependWatchers(clientset *testclient.Clientset) fakeWatchers {
 217 | 	watchers := fakeWatchers{
 218 | 		nodesWatcher:        watch.NewFake(),
 219 | 		podsWatcher:         watch.NewFake(),
 220 | 		deploymentsWatcher:  watch.NewFake(),
 221 | 		replicasetsWatcher:  watch.NewFake(),
 222 | 		daemonsetsWatcher:   watch.NewFake(),
 223 | 		statefulsetsWatcher: watch.NewFake(),
 224 | 		jobsWatcher:         watch.NewFake(),
 225 | 		servicesWatcher:     watch.NewFake(),
 226 | 		cronjobsWatcher:     watch.NewFake(),
 227 | 	}
 228 | 	clientset.PrependWatchReactor("nodes", k8stesting.DefaultWatchReactor(watchers.nodesWatcher, nil))
 229 | 	clientset.PrependWatchReactor("pods", k8stesting.DefaultWatchReactor(watchers.podsWatcher, nil))
 230 | 	clientset.PrependWatchReactor("deployments", k8stesting.DefaultWatchReactor(watchers.deploymentsWatcher, nil))
 231 | 	clientset.PrependWatchReactor("replicasets", k8stesting.DefaultWatchReactor(watchers.replicasetsWatcher, nil))
 232 | 	clientset.PrependWatchReactor("daemonsets", k8stesting.DefaultWatchReactor(watchers.daemonsetsWatcher, nil))
 233 | 	clientset.PrependWatchReactor("statefulsets", k8stesting.DefaultWatchReactor(watchers.statefulsetsWatcher, nil))
 234 | 	clientset.PrependWatchReactor("jobs", k8stesting.DefaultWatchReactor(watchers.jobsWatcher, nil))
 235 | 	clientset.PrependWatchReactor("services", k8stesting.DefaultWatchReactor(watchers.servicesWatcher, nil))
 236 | 	clientset.PrependWatchReactor("cronjobs", k8stesting.DefaultWatchReactor(watchers.cronjobsWatcher, nil))
 237 | 	return watchers
 238 | }
 239 | 
 240 | func addObject(watchers fakeWatchers, obj runtime.Object, kind string) {
 241 | 	switch kind {
 242 | 	case "Pod":
 243 | 		{
 244 | 			watchers.podsWatcher.Add(obj)
 245 | 		}
 246 | 	case "node":
 247 | 		{
 248 | 			watchers.nodesWatcher.Add(obj)
 249 | 		}
 250 | 	case "Deployment":
 251 | 		{
 252 | 			watchers.deploymentsWatcher.Add(obj)
 253 | 		}
 254 | 	case "ReplicaSet":
 255 | 		{
 256 | 			watchers.replicasetsWatcher.Add(obj)
 257 | 		}
 258 | 	case "DaemonSet":
 259 | 		{
 260 | 			watchers.daemonsetsWatcher.Add(obj)
 261 | 		}
 262 | 	case "StatefulSet":
 263 | 		{
 264 | 			watchers.statefulsetsWatcher.Add(obj)
 265 | 		}
 266 | 	case "Job":
 267 | 		{
 268 | 			watchers.jobsWatcher.Add(obj)
 269 | 		}
 270 | 	case "Service":
 271 | 		{
 272 | 			watchers.servicesWatcher.Add(obj)
 273 | 		}
 274 | 	case "CronJob":
 275 | 		{
 276 | 			watchers.cronjobsWatcher.Add(obj)
 277 | 		}
 278 | 	}
 279 | }
 280 | 
 281 | func modifyObject(watchers fakeWatchers, obj runtime.Object, kind string) {
 282 | 	switch kind {
 283 | 	case "Pod":
 284 | 		{
 285 | 			watchers.podsWatcher.Modify(obj)
 286 | 		}
 287 | 	case "node":
 288 | 		{
 289 | 			watchers.nodesWatcher.Modify(obj)
 290 | 		}
 291 | 	case "Deployment":
 292 | 		{
 293 | 			watchers.deploymentsWatcher.Modify(obj)
 294 | 		}
 295 | 	case "ReplicaSet":
 296 | 		{
 297 | 			watchers.replicasetsWatcher.Modify(obj)
 298 | 		}
 299 | 	case "DaemonSet":
 300 | 		{
 301 | 			watchers.daemonsetsWatcher.Modify(obj)
 302 | 		}
 303 | 	case "StatefulSet":
 304 | 		{
 305 | 			watchers.statefulsetsWatcher.Modify(obj)
 306 | 		}
 307 | 	case "Job":
 308 | 		{
 309 | 			watchers.jobsWatcher.Modify(obj)
 310 | 		}
 311 | 	case "Service":
 312 | 		{
 313 | 			watchers.servicesWatcher.Modify(obj)
 314 | 		}
 315 | 	case "CronJob":
 316 | 		{
 317 | 			watchers.cronjobsWatcher.Modify(obj)
 318 | 		}
 319 | 	default:
 320 | 		{
 321 | 			log.Printf("unhandled kind %v", kind)
 322 | 		}
 323 | 	}
 324 | }
 325 | 
 326 | func runTest(t *testing.T, test testScenario) {
 327 | 	assert := assert.New(t)
 328 | 	// Arrange 1: mocks and initial state
 329 | 	originalObjs := generateClusterObjects(test.initialState.newPods, test.initialState.newWorkloadResource, test.initialState.newNodes)
 330 | 	fakeClient := testclient.NewSimpleClientset(originalObjs...)
 331 | 	fakeWatchers := createPrependWatchers(fakeClient)
 332 | 
 333 | 	resolver, err := k8s.NewK8sIPResolver(fakeClient, false, test.shouldTraverse)
 334 | 	assert.NoError(err)
 335 | 
 336 | 	// Act 1: process initial state
 337 | 	err = resolver.StartWatching()
 338 | 	assert.NoError(err)
 339 | 
 340 | 	// Assert 1: resolve and compare to expected, original state
 341 | 	for ipToCheck, expectedWorkload := range test.initialState.expectedResolves {
 342 | 		resultWorkload := resolver.ResolveIP(ipToCheck)
 343 | 		assert.Equal(expectedWorkload, resultWorkload)
 344 | 	}
 345 | 
 346 | 	for _, step := range test.updateSteps {
 347 | 		// Arrange 2+n: update the state via watchers
 348 | 		for _, newPod := range step.newPods {
 349 | 			podObj := generatePod(newPod)
 350 | 			addObject(fakeWatchers, podObj, "Pod")
 351 | 		}
 352 | 		for _, newWorkloadResource := range step.newWorkloadResource {
 353 | 			obj := generateWorkloadResource(newWorkloadResource)
 354 | 			addObject(fakeWatchers, obj, newWorkloadResource.Kind)
 355 | 		}
 356 | 		for _, newNode := range step.newNodes {
 357 | 			obj := generateNode(newNode)
 358 | 			addObject(fakeWatchers, obj, "node")
 359 | 		}
 360 | 		for _, modifiedPod := range step.modifiedPods {
 361 | 			podObj := generatePod(modifiedPod)
 362 | 			modifyObject(fakeWatchers, podObj, "Pod")
 363 | 		}
 364 | 		for _, modifiedWorkloadResource := range step.newWorkloadResource {
 365 | 			obj := generateWorkloadResource(modifiedWorkloadResource)
 366 | 			modifyObject(fakeWatchers, obj, modifiedWorkloadResource.Kind)
 367 | 		}
 368 | 		for _, modifiedNode := range step.modifiedNodes {
 369 | 			obj := generateNode(modifiedNode)
 370 | 			modifyObject(fakeWatchers, obj, "node")
 371 | 		}
 372 | 
 373 | 		if step.shouldWait {
 374 | 			time.Sleep(1 * time.Second)
 375 | 		}
 376 | 
 377 | 		// Act+Assert 2+n
 378 | 		for ipToResolve, expectedWorkload := range step.expectedResolves {
 379 | 			assert.Equal(expectedWorkload, resolver.ResolveIP(ipToResolve))
 380 | 		}
 381 | 
 382 | 	}
 383 | }
 384 | 
 385 | var testDeployment = workloadResourceDescriptor{"deployment1", "namespaceA", types.UID(uuid.NewString()), "Deployment"}
 386 | var testReplicaSet = workloadResourceDescriptor{"replicaset1", "namespaceA", types.UID(uuid.NewString()), "ReplicaSet"}
 387 | var testDaemonSet = workloadResourceDescriptor{"daemonset1", "namespaceA", types.UID(uuid.NewString()), "DaemonSet"}
 388 | var testStatefulSet = workloadResourceDescriptor{"statefulset1", "namespaceA", types.UID(uuid.NewString()), "StatefulSet"}
 389 | var testJob = workloadResourceDescriptor{"job1", "namespaceA", types.UID(uuid.NewString()), "Job"}
 390 | var testCronjob = workloadResourceDescriptor{"cronjob1", "namespaceA", types.UID(uuid.NewString()), "CronJob"}
 391 | 
 392 | func TestResolving(t *testing.T) {
 393 | 	var tests = []testScenario{
 394 | 		{
 395 | 			description:    "unsuccessful resolving result should be external",
 396 | 			shouldTraverse: true,
 397 | 			initialState: testStep{
 398 | 				shouldWait: false,
 399 | 				newPods: []podDescriptor{
 400 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 401 | 				},
 402 | 				expectedResolves: map[string]k8s.Workload{
 403 | 					"1.1.1.2": {
 404 | 						Name:      "1.1.1.2",
 405 | 						Namespace: "external",
 406 | 						Kind:      "external",
 407 | 					},
 408 | 				},
 409 | 			},
 410 | 		},
 411 | 		{
 412 | 			description:    "initial snapshot 1 pod resolve to pod1",
 413 | 			shouldTraverse: true,
 414 | 			initialState: testStep{
 415 | 				shouldWait: false,
 416 | 				newPods: []podDescriptor{
 417 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 418 | 				},
 419 | 				expectedResolves: map[string]k8s.Workload{
 420 | 					"1.1.1.1": {
 421 | 						Name:      "pod1",
 422 | 						Namespace: "namespaceA",
 423 | 						Kind:      "pod",
 424 | 					},
 425 | 				},
 426 | 			},
 427 | 		},
 428 | 		{
 429 | 			description:    "initial snapshot 3 pods resolve to each pod",
 430 | 			shouldTraverse: true,
 431 | 			initialState: testStep{
 432 | 				shouldWait: false,
 433 | 				newPods: []podDescriptor{
 434 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 435 | 					{"pod2", "namespaceA", "1.1.1.2", v1.PodRunning, types.UID(uuid.New().String()), nil},
 436 | 					{"pod3", "namespaceA", "1.1.1.3", v1.PodRunning, types.UID(uuid.New().String()), nil},
 437 | 				},
 438 | 				expectedResolves: map[string]k8s.Workload{
 439 | 					"1.1.1.1": {
 440 | 						Name:      "pod1",
 441 | 						Namespace: "namespaceA",
 442 | 						Kind:      "pod",
 443 | 					},
 444 | 					"1.1.1.2": {
 445 | 						Name:      "pod2",
 446 | 						Namespace: "namespaceA",
 447 | 						Kind:      "pod",
 448 | 					},
 449 | 					"1.1.1.3": {
 450 | 						Name:      "pod3",
 451 | 						Namespace: "namespaceA",
 452 | 						Kind:      "pod",
 453 | 					},
 454 | 				},
 455 | 			},
 456 | 		},
 457 | 		{
 458 | 			description:    "empty initial 1 pod added resolve to pod",
 459 | 			shouldTraverse: true,
 460 | 			initialState: testStep{
 461 | 				shouldWait: false,
 462 | 				expectedResolves: map[string]k8s.Workload{
 463 | 					"1.1.1.1": {
 464 | 						Name:      "1.1.1.1",
 465 | 						Namespace: "external",
 466 | 						Kind:      "external",
 467 | 					},
 468 | 				},
 469 | 			},
 470 | 			updateSteps: []testStep{
 471 | 				{
 472 | 					shouldWait: true,
 473 | 					newPods: []podDescriptor{
 474 | 						{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 475 | 					},
 476 | 					expectedResolves: map[string]k8s.Workload{
 477 | 						"1.1.1.1": {
 478 | 							Name:      "pod1",
 479 | 							Namespace: "namespaceA",
 480 | 							Kind:      "pod",
 481 | 						},
 482 | 					},
 483 | 				},
 484 | 			},
 485 | 		},
 486 | 		{
 487 | 			description:    "empty initial 1 node added resolve to node",
 488 | 			shouldTraverse: true,
 489 | 			initialState: testStep{
 490 | 				shouldWait: false,
 491 | 				expectedResolves: map[string]k8s.Workload{
 492 | 					"1.1.1.0": {
 493 | 						Name:      "1.1.1.0",
 494 | 						Namespace: "external",
 495 | 						Kind:      "external",
 496 | 					},
 497 | 				},
 498 | 			},
 499 | 			updateSteps: []testStep{
 500 | 				{
 501 | 					shouldWait: true,
 502 | 					newNodes: []nodeDescriptor{
 503 | 						{"Node1", "1.1.1.0", types.UID(uuid.NewString())},
 504 | 					},
 505 | 					expectedResolves: map[string]k8s.Workload{
 506 | 						"1.1.1.0": {
 507 | 							Name:      "Node1",
 508 | 							Namespace: "node",
 509 | 							Kind:      "node",
 510 | 						},
 511 | 					},
 512 | 				},
 513 | 			},
 514 | 		},
 515 | 		{
 516 | 			description:    "empty initial 1 node, 1 pod added resolve to each",
 517 | 			shouldTraverse: true,
 518 | 			initialState: testStep{
 519 | 				shouldWait: false,
 520 | 				expectedResolves: map[string]k8s.Workload{
 521 | 					"1.1.1.0": {
 522 | 						Name:      "1.1.1.0",
 523 | 						Namespace: "external",
 524 | 						Kind:      "external",
 525 | 					},
 526 | 				},
 527 | 			},
 528 | 			updateSteps: []testStep{
 529 | 				{
 530 | 					shouldWait: true,
 531 | 					newPods: []podDescriptor{
 532 | 						{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 533 | 					},
 534 | 					newNodes: []nodeDescriptor{
 535 | 						{"Node1", "1.1.1.0", types.UID(uuid.NewString())},
 536 | 					},
 537 | 					expectedResolves: map[string]k8s.Workload{
 538 | 						"1.1.1.0": {
 539 | 							Name:      "Node1",
 540 | 							Namespace: "node",
 541 | 							Kind:      "node",
 542 | 						},
 543 | 						"1.1.1.1": {
 544 | 							Name:      "pod1",
 545 | 							Namespace: "namespaceA",
 546 | 							Kind:      "pod",
 547 | 						},
 548 | 					},
 549 | 				},
 550 | 			},
 551 | 		},
 552 | 		{
 553 | 			description:    "1 pod changing ip resolve both ips to pod",
 554 | 			shouldTraverse: true,
 555 | 			initialState: testStep{
 556 | 				shouldWait: false,
 557 | 				newPods: []podDescriptor{
 558 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 559 | 				},
 560 | 				expectedResolves: map[string]k8s.Workload{
 561 | 					"1.1.1.1": {
 562 | 						Name:      "pod1",
 563 | 						Namespace: "namespaceA",
 564 | 						Kind:      "pod",
 565 | 					},
 566 | 					"1.1.1.2": {
 567 | 						Name:      "1.1.1.2",
 568 | 						Namespace: "external",
 569 | 						Kind:      "external",
 570 | 					},
 571 | 				},
 572 | 			},
 573 | 			updateSteps: []testStep{
 574 | 				{
 575 | 					shouldWait: true,
 576 | 					modifiedPods: []podDescriptor{
 577 | 						{"pod1", "namespaceA", "1.1.1.2", v1.PodRunning, types.UID(uuid.New().String()), nil},
 578 | 					},
 579 | 					expectedResolves: map[string]k8s.Workload{
 580 | 						"1.1.1.1": { // the resolver shouldn't delete old not-reused entries
 581 | 							Name:      "pod1",
 582 | 							Namespace: "namespaceA",
 583 | 							Kind:      "pod",
 584 | 						},
 585 | 						"1.1.1.2": {
 586 | 							Name:      "pod1",
 587 | 							Namespace: "namespaceA",
 588 | 							Kind:      "pod",
 589 | 						},
 590 | 					},
 591 | 				},
 592 | 			},
 593 | 		},
 594 | 		{
 595 | 			description:    "1 pod changing ip old ip is reused resolve reused ip to new pod",
 596 | 			shouldTraverse: true,
 597 | 			initialState: testStep{
 598 | 				shouldWait: false,
 599 | 				newPods: []podDescriptor{
 600 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID("1"), nil},
 601 | 				},
 602 | 				expectedResolves: map[string]k8s.Workload{
 603 | 					"1.1.1.1": {
 604 | 						Name:      "pod1",
 605 | 						Namespace: "namespaceA",
 606 | 						Kind:      "pod",
 607 | 					},
 608 | 					"1.1.1.2": {
 609 | 						Name:      "1.1.1.2",
 610 | 						Namespace: "external",
 611 | 						Kind:      "external",
 612 | 					},
 613 | 				},
 614 | 			},
 615 | 			updateSteps: []testStep{
 616 | 				{
 617 | 					shouldWait: false,
 618 | 					modifiedPods: []podDescriptor{
 619 | 						{"pod1", "namespaceA", "1.1.1.2", v1.PodRunning, types.UID("1"), nil},
 620 | 					},
 621 | 					expectedResolves: map[string]k8s.Workload{},
 622 | 				},
 623 | 				{
 624 | 					shouldWait: true,
 625 | 					newPods: []podDescriptor{
 626 | 						{"pod2", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.New().String()), nil},
 627 | 					},
 628 | 					expectedResolves: map[string]k8s.Workload{
 629 | 						"1.1.1.1": {
 630 | 							Name:      "pod2",
 631 | 							Namespace: "namespaceA",
 632 | 							Kind:      "pod",
 633 | 						},
 634 | 						"1.1.1.2": {
 635 | 							Name:      "pod1",
 636 | 							Namespace: "namespaceA",
 637 | 							Kind:      "pod",
 638 | 						},
 639 | 					},
 640 | 				},
 641 | 			},
 642 | 		},
 643 | 		{
 644 | 			description:    "1 pod changing ip old ip is reused by node resolve ip to new node",
 645 | 			shouldTraverse: true,
 646 | 			initialState: testStep{
 647 | 				shouldWait: false,
 648 | 				newPods: []podDescriptor{
 649 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID("1"), nil},
 650 | 				},
 651 | 				expectedResolves: map[string]k8s.Workload{
 652 | 					"1.1.1.1": {
 653 | 						Name:      "pod1",
 654 | 						Namespace: "namespaceA",
 655 | 						Kind:      "pod",
 656 | 					},
 657 | 					"1.1.1.2": {
 658 | 						Name:      "1.1.1.2",
 659 | 						Namespace: "external",
 660 | 						Kind:      "external",
 661 | 					},
 662 | 				},
 663 | 			},
 664 | 			updateSteps: []testStep{
 665 | 				{
 666 | 					shouldWait: false,
 667 | 					modifiedPods: []podDescriptor{
 668 | 						{"pod1", "namespaceA", "1.1.1.2", v1.PodRunning, types.UID("1"), nil},
 669 | 					},
 670 | 					expectedResolves: map[string]k8s.Workload{},
 671 | 				},
 672 | 				{
 673 | 					shouldWait: true,
 674 | 					newNodes: []nodeDescriptor{
 675 | 						{"Node1", "1.1.1.1", types.UID(uuid.NewString())},
 676 | 					},
 677 | 					expectedResolves: map[string]k8s.Workload{
 678 | 						"1.1.1.1": {
 679 | 							Name:      "Node1",
 680 | 							Namespace: "node",
 681 | 							Kind:      "node",
 682 | 						},
 683 | 						"1.1.1.2": {
 684 | 							Name:      "pod1",
 685 | 							Namespace: "namespaceA",
 686 | 							Kind:      "pod",
 687 | 						},
 688 | 					},
 689 | 				},
 690 | 			},
 691 | 		},
 692 | 		{
 693 | 			description:    "1 node changing ip resolve both ips to node",
 694 | 			shouldTraverse: true,
 695 | 			initialState: testStep{
 696 | 				shouldWait: false,
 697 | 				newPods:    []podDescriptor{},
 698 | 				newNodes: []nodeDescriptor{
 699 | 					{"Node1", "1.1.1.0", types.UID("1")},
 700 | 				},
 701 | 				expectedResolves: map[string]k8s.Workload{},
 702 | 			},
 703 | 			updateSteps: []testStep{
 704 | 				{
 705 | 					shouldWait: true,
 706 | 					modifiedNodes: []nodeDescriptor{
 707 | 						{"Node1", "1.1.2.0", types.UID("1")},
 708 | 					},
 709 | 					modifiedWorkloadResources: []workloadResourceDescriptor{},
 710 | 					expectedResolves: map[string]k8s.Workload{
 711 | 						"1.1.1.0": { // resolver isn't expected to delete old not-reused entries
 712 | 							Name:      "Node1",
 713 | 							Namespace: "node",
 714 | 							Kind:      "node",
 715 | 						},
 716 | 						"1.1.2.0": {
 717 | 							Name:      "Node1",
 718 | 							Namespace: "node",
 719 | 							Kind:      "node",
 720 | 						},
 721 | 					},
 722 | 				},
 723 | 			},
 724 | 		},
 725 | 		{
 726 | 			description:    "1 node changing ip, reused by another node resolve reused ip to new node",
 727 | 			shouldTraverse: true,
 728 | 			initialState: testStep{
 729 | 				shouldWait: true,
 730 | 				newNodes: []nodeDescriptor{
 731 | 					{"Node1", "1.1.1.0", types.UID("1")},
 732 | 				},
 733 | 				expectedResolves: map[string]k8s.Workload{},
 734 | 			},
 735 | 			updateSteps: []testStep{
 736 | 				{
 737 | 					shouldWait: true,
 738 | 					modifiedNodes: []nodeDescriptor{
 739 | 						{"Node1", "1.1.2.0", types.UID("1")},
 740 | 					},
 741 | 					expectedResolves: map[string]k8s.Workload{},
 742 | 				},
 743 | 				{
 744 | 					shouldWait: true,
 745 | 					newNodes: []nodeDescriptor{
 746 | 						{"Node2", "1.1.1.0", types.UID("2")},
 747 | 					},
 748 | 					modifiedNodes: []nodeDescriptor{
 749 | 						{"Node1", "1.1.2.0", types.UID("1")},
 750 | 					},
 751 | 					expectedResolves: map[string]k8s.Workload{
 752 | 						"1.1.1.0": {
 753 | 							Name:      "Node2",
 754 | 							Namespace: "node",
 755 | 							Kind:      "node",
 756 | 						},
 757 | 						"1.1.2.0": {
 758 | 							Name:      "Node1",
 759 | 							Namespace: "node",
 760 | 							Kind:      "node",
 761 | 						},
 762 | 					},
 763 | 				},
 764 | 			},
 765 | 		},
 766 | 		{
 767 | 			description:    "pod with hostip wont override node",
 768 | 			shouldTraverse: true,
 769 | 			initialState: testStep{
 770 | 				shouldWait: false,
 771 | 				newNodes: []nodeDescriptor{
 772 | 					{"Node1", "1.1.1.0", types.UID(uuid.NewString())},
 773 | 				},
 774 | 				expectedResolves: map[string]k8s.Workload{},
 775 | 			},
 776 | 			updateSteps: []testStep{
 777 | 				{
 778 | 					shouldWait: true,
 779 | 					newPods: []podDescriptor{
 780 | 						{"pod1", "namespaceA", "1.1.1.0", v1.PodRunning, types.UID(uuid.New().String()), nil},
 781 | 					},
 782 | 					expectedResolves: map[string]k8s.Workload{
 783 | 						"1.1.1.0": {
 784 | 							Name:      "Node1",
 785 | 							Namespace: "node",
 786 | 							Kind:      "node",
 787 | 						},
 788 | 					},
 789 | 				},
 790 | 			},
 791 | 		},
 792 | 	}
 793 | 	for _, test := range tests {
 794 | 		t.Run(test.description, func(t *testing.T) {
 795 | 			runTest(t, test)
 796 | 		})
 797 | 	}
 798 | }
 799 | 
 800 | func TestControllersResolving(t *testing.T) {
 801 | 	var controllersTests = []testScenario{
 802 | 		{
 803 | 			description:    "initial snapshot 1 pod controlled by deployment resolve to deployment",
 804 | 			shouldTraverse: true,
 805 | 			initialState: testStep{
 806 | 				shouldWait: false,
 807 | 				newPods: []podDescriptor{
 808 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testDeployment},
 809 | 				},
 810 | 				newWorkloadResource: []workloadResourceDescriptor{testDeployment},
 811 | 				expectedResolves: map[string]k8s.Workload{
 812 | 					"1.1.1.1": {
 813 | 						Name:      testDeployment.Name,
 814 | 						Namespace: testDeployment.Namespace,
 815 | 						Kind:      testDeployment.Kind,
 816 | 					},
 817 | 				},
 818 | 			},
 819 | 		},
 820 | 		{
 821 | 			description:    "initial snapshot 1 pod controlled by replicaset resolve to replicaset",
 822 | 			shouldTraverse: true,
 823 | 			initialState: testStep{
 824 | 				shouldWait: false,
 825 | 				newPods: []podDescriptor{
 826 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testReplicaSet},
 827 | 				},
 828 | 				newWorkloadResource: []workloadResourceDescriptor{testReplicaSet},
 829 | 				expectedResolves: map[string]k8s.Workload{
 830 | 					"1.1.1.1": {
 831 | 						Name:      testReplicaSet.Name,
 832 | 						Namespace: testReplicaSet.Namespace,
 833 | 						Kind:      testReplicaSet.Kind,
 834 | 					},
 835 | 				},
 836 | 			},
 837 | 		},
 838 | 		{
 839 | 			description:    "initial snapshot 1 pod controlled by daemonset resolve to daemonset",
 840 | 			shouldTraverse: true,
 841 | 			initialState: testStep{
 842 | 				shouldWait: false,
 843 | 				newPods: []podDescriptor{
 844 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testDaemonSet},
 845 | 				},
 846 | 				newWorkloadResource: []workloadResourceDescriptor{testDaemonSet},
 847 | 				expectedResolves: map[string]k8s.Workload{
 848 | 					"1.1.1.1": {
 849 | 						Name:      testDaemonSet.Name,
 850 | 						Namespace: testDaemonSet.Namespace,
 851 | 						Kind:      testDaemonSet.Kind,
 852 | 					},
 853 | 				},
 854 | 			},
 855 | 		},
 856 | 		{
 857 | 			description:    "initial snapshot 1 pod controlled by statefulset resolve to statefulset",
 858 | 			shouldTraverse: true,
 859 | 			initialState: testStep{
 860 | 				shouldWait: false,
 861 | 				newPods: []podDescriptor{
 862 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testStatefulSet},
 863 | 				},
 864 | 				newWorkloadResource: []workloadResourceDescriptor{testStatefulSet},
 865 | 				expectedResolves: map[string]k8s.Workload{
 866 | 					"1.1.1.1": {
 867 | 						Name:      testStatefulSet.Name,
 868 | 						Namespace: testStatefulSet.Namespace,
 869 | 						Kind:      testStatefulSet.Kind,
 870 | 					},
 871 | 				},
 872 | 			},
 873 | 		},
 874 | 		{
 875 | 			description:    "initial snapshot 1 pod controlled by job resolve to job",
 876 | 			shouldTraverse: true,
 877 | 			initialState: testStep{
 878 | 				shouldWait: false,
 879 | 				newPods: []podDescriptor{
 880 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testJob},
 881 | 				},
 882 | 				newWorkloadResource: []workloadResourceDescriptor{testJob},
 883 | 				expectedResolves: map[string]k8s.Workload{
 884 | 					"1.1.1.1": {
 885 | 						Name:      testJob.Name,
 886 | 						Namespace: testJob.Namespace,
 887 | 						Kind:      testJob.Kind,
 888 | 					},
 889 | 				},
 890 | 			},
 891 | 		},
 892 | 		{
 893 | 			description:    "initial snapshot 1 pod controlled by cronjob resolve to cronjob",
 894 | 			shouldTraverse: true,
 895 | 			initialState: testStep{
 896 | 				shouldWait: false,
 897 | 				newPods: []podDescriptor{
 898 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testCronjob},
 899 | 				},
 900 | 				newWorkloadResource: []workloadResourceDescriptor{testCronjob},
 901 | 				expectedResolves: map[string]k8s.Workload{
 902 | 					"1.1.1.1": {
 903 | 						Name:      testCronjob.Name,
 904 | 						Namespace: testCronjob.Namespace,
 905 | 						Kind:      testCronjob.Kind,
 906 | 					},
 907 | 				},
 908 | 			},
 909 | 		},
 910 | 		{
 911 | 			description:    "initial snapshot 1 pod controlled by deployment owned by deployment",
 912 | 			shouldTraverse: false,
 913 | 			initialState: testStep{
 914 | 				shouldWait: false,
 915 | 				newPods: []podDescriptor{
 916 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testDeployment},
 917 | 				},
 918 | 				newWorkloadResource: []workloadResourceDescriptor{testDeployment},
 919 | 				expectedResolves: map[string]k8s.Workload{
 920 | 					"1.1.1.1": {
 921 | 						Name:      "pod1",
 922 | 						Namespace: "namespaceA",
 923 | 						Kind:      "pod",
 924 | 						Owner:     testDeployment.Name,
 925 | 					},
 926 | 				},
 927 | 			},
 928 | 		},
 929 | 		{
 930 | 			description:    "initial snapshot 1 pod controlled by replicaset owned by replicaset",
 931 | 			shouldTraverse: false,
 932 | 			initialState: testStep{
 933 | 				shouldWait: false,
 934 | 				newPods: []podDescriptor{
 935 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testReplicaSet},
 936 | 				},
 937 | 				newWorkloadResource: []workloadResourceDescriptor{testReplicaSet},
 938 | 				expectedResolves: map[string]k8s.Workload{
 939 | 					"1.1.1.1": {
 940 | 						Name:      "pod1",
 941 | 						Namespace: "namespaceA",
 942 | 						Kind:      "pod",
 943 | 						Owner:     testReplicaSet.Name,
 944 | 					},
 945 | 				},
 946 | 			},
 947 | 		},
 948 | 		{
 949 | 			description:    "initial snapshot 1 pod controlled by daemonset owned by daemonset",
 950 | 			shouldTraverse: false,
 951 | 			initialState: testStep{
 952 | 				shouldWait: false,
 953 | 				newPods: []podDescriptor{
 954 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testDaemonSet},
 955 | 				},
 956 | 				newWorkloadResource: []workloadResourceDescriptor{testDaemonSet},
 957 | 				expectedResolves: map[string]k8s.Workload{
 958 | 					"1.1.1.1": {
 959 | 						Name:      "pod1",
 960 | 						Namespace: "namespaceA",
 961 | 						Kind:      "pod",
 962 | 						Owner:     testDaemonSet.Name,
 963 | 					},
 964 | 				},
 965 | 			},
 966 | 		},
 967 | 		{
 968 | 			description:    "initial snapshot 1 pod controlled by statefulset owned by statefulset",
 969 | 			shouldTraverse: false,
 970 | 			initialState: testStep{
 971 | 				shouldWait: false,
 972 | 				newPods: []podDescriptor{
 973 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testStatefulSet},
 974 | 				},
 975 | 				newWorkloadResource: []workloadResourceDescriptor{testStatefulSet},
 976 | 				expectedResolves: map[string]k8s.Workload{
 977 | 					"1.1.1.1": {
 978 | 						Name:      "pod1",
 979 | 						Namespace: "namespaceA",
 980 | 						Kind:      "pod",
 981 | 						Owner:     testStatefulSet.Name,
 982 | 					},
 983 | 				},
 984 | 			},
 985 | 		},
 986 | 		{
 987 | 			description:    "initial snapshot 1 pod controlled by job owned by job",
 988 | 			shouldTraverse: false,
 989 | 			initialState: testStep{
 990 | 				shouldWait: false,
 991 | 				newPods: []podDescriptor{
 992 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testJob},
 993 | 				},
 994 | 				newWorkloadResource: []workloadResourceDescriptor{testJob},
 995 | 				expectedResolves: map[string]k8s.Workload{
 996 | 					"1.1.1.1": {
 997 | 						Name:      "pod1",
 998 | 						Namespace: "namespaceA",
 999 | 						Kind:      "pod",
1000 | 						Owner:     testJob.Name,
1001 | 					},
1002 | 				},
1003 | 			},
1004 | 		},
1005 | 		{
1006 | 			description:    "initial snapshot 1 pod controlled by cronjob owned by cronjob",
1007 | 			shouldTraverse: false,
1008 | 			initialState: testStep{
1009 | 				shouldWait: false,
1010 | 				newPods: []podDescriptor{
1011 | 					{"pod1", "namespaceA", "1.1.1.1", v1.PodRunning, types.UID(uuid.NewString()), &testCronjob},
1012 | 				},
1013 | 				newWorkloadResource: []workloadResourceDescriptor{testCronjob},
1014 | 				expectedResolves: map[string]k8s.Workload{
1015 | 					"1.1.1.1": {
1016 | 						Name:      "pod1",
1017 | 						Namespace: "namespaceA",
1018 | 						Kind:      "pod",
1019 | 						Owner:     testCronjob.Name,
1020 | 					},
1021 | 				},
1022 | 			},
1023 | 		},
1024 | 	}
1025 | 	for _, test := range controllersTests {
1026 | 		t.Run(test.description, func(t *testing.T) {
1027 | 			runTest(t, test)
1028 | 		})
1029 | 	}
1030 | }
1031 | 


--------------------------------------------------------------------------------
/pkg/metrics/prometheus.go:
--------------------------------------------------------------------------------
 1 | package metrics
 2 | 
 3 | import (
 4 | 	"log"
 5 | 	"net/http"
 6 | 
 7 | 	"github.com/prometheus/client_golang/prometheus/promhttp"
 8 | )
 9 | 
10 | func StartMetricsServer(endpoint string, port string) *http.Server {
11 | 	http.Handle(endpoint, promhttp.Handler())
12 | 	server := &http.Server{Addr: port}
13 | 	go func() {
14 | 		err := server.ListenAndServe()
15 | 		if err != nil {
16 | 			log.Fatalf("Error starting prometheus server on port %v", port)
17 | 		}
18 | 	}()
19 | 	return server
20 | }
21 | 


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/arm_support.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ARM_SUPPORT_H__
 2 | #define __ARM_SUPPORT_H__
 3 | 
 4 | struct user_pt_regs {
 5 |   __u64 regs[31];
 6 |   __u64 sp;
 7 |   __u64 pc;
 8 |   __u64 pstate;
 9 | };
10 | 
11 | #endif


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/caretta.bpf.c:
--------------------------------------------------------------------------------
  1 | #include "core_structures.h"
  2 | #include "arm_support.h"
  3 | #include <bpf_core_read.h>
  4 | #include <bpf_helpers.h>
  5 | #include <bpf_tracing.h>
  6 | #include "ebpf_utils.h"
  7 | #include "epbf_shared_types.h"
  8 | #include "ebpf_internal_types.h"
  9 | 
 10 | char __license[] SEC("license") = "Dual MIT/GPL";
 11 | 
 12 | // internal kernel-only map to hold state for each sock observed.
 13 | struct bpf_map_def SEC("maps") sock_infos = {
 14 |     .type = BPF_MAP_TYPE_HASH,
 15 |     .key_size = sizeof(struct sock *),
 16 |     .value_size = sizeof(struct sock_info),
 17 |     .max_entries = MAX_CONNECTIONS,
 18 | };
 19 | 
 20 | // the main product of the tracing - map containing all connections observed,
 21 | // with metadata and throughput stats.
 22 | // key is a whole identifier struct and not a single id to split the constant
 23 | // and dynamic values and to resemble as closely as possible the end result in
 24 | // the userspace code.
 25 | struct bpf_map_def SEC("maps") connections = {
 26 |     .type = BPF_MAP_TYPE_HASH,
 27 |     .key_size = sizeof(struct connection_identifier),
 28 |     .value_size = sizeof(struct connection_throughput_stats),
 29 |     .max_entries = MAX_CONNECTIONS,
 30 | };
 31 | 
 32 | // helper to convert short int from BE to LE
 33 | static inline u16 be_to_le(__be16 be) { return (be >> 8) | (be << 8); }
 34 | 
 35 | static inline u32 get_unique_id() {
 36 |   return bpf_ktime_get_ns() % __UINT32_MAX__; // no reason to use 64 bit for this
 37 | }
 38 | 
 39 | // function for parsing the struct sock
 40 | static inline int
 41 | parse_sock_data(struct sock *sock, struct connection_tuple *out_tuple,
 42 |                 struct connection_throughput_stats *out_throughput) {
 43 | 
 44 |   if (sock == NULL) {
 45 |     return BPF_ERROR;
 46 |   }
 47 | 
 48 |   // struct sock wraps struct tcp_sock and struct inet_sock as its first member
 49 |   struct tcp_sock *tcp = (struct tcp_sock *)sock;
 50 |   struct inet_sock *inet = (struct inet_sock *)sock;
 51 | 
 52 |   // initialize variables. IP addresses and ports are read originally
 53 |   // big-endian, and we will convert the ports to little-endian.
 54 |   __be16 src_port_be = 0;
 55 |   __be16 dst_port_be = 0;
 56 | 
 57 |   // read connection tuple
 58 | 
 59 |   if (0 != bpf_core_read(&out_tuple->src_ip, sizeof(out_tuple->src_ip),
 60 |                       &inet->inet_saddr)) {
 61 |     return BPF_ERROR;
 62 |   }
 63 | 
 64 |   if (0 != bpf_core_read(&out_tuple->dst_ip, sizeof(out_tuple->dst_ip),
 65 |                       &inet->inet_daddr)) {
 66 |     return BPF_ERROR;
 67 |   }
 68 | 
 69 |   if (0 != bpf_core_read(&src_port_be, sizeof(src_port_be), &inet->inet_sport)) {
 70 |     return BPF_ERROR;
 71 |   }
 72 |   out_tuple->src_port = be_to_le(src_port_be);
 73 | 
 74 |   if (0 != bpf_core_read(&dst_port_be, sizeof(dst_port_be), &inet->inet_dport)) {
 75 |     return BPF_ERROR;
 76 |   }
 77 |   out_tuple->dst_port = be_to_le(dst_port_be);
 78 | 
 79 |   // read throughput data
 80 | 
 81 |   if (0 != bpf_core_read(&out_throughput->bytes_received,
 82 |                       sizeof(out_throughput->bytes_received),
 83 |                       &tcp->bytes_received)) {
 84 |     return BPF_ERROR;
 85 |   }
 86 |   if (0 != bpf_core_read(&out_throughput->bytes_sent,
 87 |                       sizeof(out_throughput->bytes_sent), &tcp->bytes_sent)) {
 88 |     return BPF_ERROR;
 89 |   }
 90 | 
 91 |   return BPF_SUCCESS;
 92 | };
 93 | 
 94 | static inline enum connection_role get_sock_role(struct sock* sock) {
 95 |   // the max_ack_backlog holds the limit for the accept queue
 96 |   // if it is a server, it will not be 0
 97 |   int max_ack_backlog = 0;
 98 |   if (0 != bpf_core_read(&max_ack_backlog, sizeof(max_ack_backlog),
 99 |                 &sock->sk_max_ack_backlog)) {
100 |     return CONNECTION_ROLE_UNKNOWN;
101 |   }
102 | 
103 |   return max_ack_backlog == 0 ? CONNECTION_ROLE_CLIENT : CONNECTION_ROLE_SERVER;
104 | }
105 | 
106 | // probing the tcp_data_queue kernel function, and adding the connection
107 | // observed to the map.
108 | SEC("kprobe/tcp_data_queue")
109 | static int handle_tcp_data_queue(struct pt_regs *ctx) {
110 |   // first argument to tcp_data_queue is a struct sock*
111 |   struct sock *sock = (struct sock *)PT_REGS_PARM1(ctx);
112 | 
113 |   struct connection_identifier conn_id = {};
114 |   struct connection_throughput_stats throughput = {};
115 | 
116 |   if (parse_sock_data(sock, &conn_id.tuple, &throughput) == BPF_ERROR) {
117 |     return BPF_ERROR;
118 |   }
119 | 
120 |   // skip unconnected sockets
121 |   if (conn_id.tuple.dst_port == 0 && conn_id.tuple.dst_ip == BPF_SUCCESS) {
122 |     return BPF_SUCCESS;
123 |   }
124 | 
125 |   // fill the conn_id extra details from sock_info map entry, or create one
126 |   struct sock_info *sock_info = bpf_map_lookup_elem(&sock_infos, &sock);
127 |   if (sock_info == NULL) {
128 |     // first time we encounter this sock
129 |     // check if server or client and insert to the maps
130 | 
131 |     enum connection_role role = get_sock_role(sock);
132 | 
133 |     struct sock_info info = {
134 |         .pid = 0, // can't associate to pid anyway
135 |         .role = role,
136 |         .is_active = true,
137 |         .id = get_unique_id(),
138 |     };
139 |     bpf_map_update_elem(&sock_infos, &sock, &info, BPF_ANY);
140 | 
141 |     conn_id.pid = info.pid;
142 |     conn_id.id = info.id;
143 |     conn_id.role = info.role;
144 |     throughput.is_active = true;
145 | 
146 |     bpf_map_update_elem(&connections, &conn_id, &throughput, BPF_ANY);
147 | 
148 |     return BPF_SUCCESS;
149 | 
150 |   }
151 | 
152 |   conn_id.pid = sock_info->pid;
153 |   conn_id.id = sock_info->id;
154 |   conn_id.role = sock_info->role;
155 |   if (!sock_info->is_active) {
156 |     return -1;
157 |   }
158 |   throughput.is_active = sock_info->is_active;
159 | 
160 |   bpf_map_update_elem(&connections, &conn_id, &throughput, BPF_ANY);
161 | 
162 |   return BPF_SUCCESS;
163 | };
164 | 
165 | static inline int handle_set_tcp_syn_sent(struct sock* sock) {
166 |   // start of a client session
167 |   u32 pid = bpf_get_current_pid_tgid() >> 32;
168 | 
169 |   struct sock_info info = {
170 |       .pid = pid,
171 |       .role = CONNECTION_ROLE_CLIENT,
172 |       .is_active = true,
173 |       .id = get_unique_id(),
174 |   };
175 | 
176 |   bpf_map_update_elem(&sock_infos, &sock, &info, BPF_ANY);
177 | 
178 |   return BPF_SUCCESS;
179 | }
180 | 
181 | static inline int handle_set_tcp_syn_recv(struct sock* sock) {
182 |   // this is a server getting syn after listen
183 |     struct connection_identifier conn_id = {};
184 |     struct connection_throughput_stats throughput = {};
185 | 
186 |     if (parse_sock_data(sock, &conn_id.tuple, &throughput) == BPF_ERROR) {
187 |       return BPF_ERROR;
188 |     }
189 | 
190 |     struct sock_info info = {
191 |         .pid = 0, // can't associate to process
192 |         .role = CONNECTION_ROLE_SERVER,
193 |         .is_active = true,
194 |         .id = get_unique_id(),
195 |     };
196 | 
197 |     bpf_map_update_elem(&sock_infos, &sock, &info, BPF_ANY);
198 | 
199 |     // probably the dst ip will still be uninitialized
200 |     if (conn_id.tuple.dst_ip == 0) {
201 |       return BPF_SUCCESS;
202 |     }
203 | 
204 |     conn_id.pid = info.pid;
205 |     conn_id.id = info.id;
206 |     conn_id.role = info.role;
207 | 
208 |     bpf_map_update_elem(&connections, &conn_id, &throughput, BPF_ANY);
209 | 
210 |     return BPF_SUCCESS;
211 | }
212 | 
213 | static inline int handle_set_tcp_close(struct sock* sock) {
214 |   // mark as inactive
215 |   struct connection_identifier conn_id = {};
216 |   struct connection_throughput_stats throughput = {};
217 | 
218 |   if (parse_sock_data(sock, &conn_id.tuple, &throughput) == BPF_ERROR) {
219 |     return BPF_ERROR;
220 |   }
221 | 
222 |   struct sock_info *info = bpf_map_lookup_elem(&sock_infos, &sock);
223 |   if (info == NULL) {
224 |     conn_id.id = get_unique_id();
225 |     conn_id.pid = 0; // cannot associate to PID in this state
226 |     conn_id.role = get_sock_role(sock);
227 |   } else {
228 |     conn_id.id = info->id;
229 |     conn_id.pid = info->pid;
230 |     conn_id.role = info->role;
231 |     bpf_map_delete_elem(&sock_infos, &sock);
232 |   }
233 | 
234 |   throughput.is_active = false;
235 |   bpf_map_update_elem(&connections, &conn_id, &throughput, BPF_ANY);
236 | 
237 |   return BPF_SUCCESS;
238 | }
239 | 
240 | SEC("tracepoint/sock/inet_sock_set_state")
241 | static int handle_sock_set_state(struct set_state_args *args) {
242 |   struct sock *sock = (struct sock *)args->skaddr;
243 | 
244 |   switch(args->newstate) {
245 |     case TCP_SYN_RECV: {
246 |       return handle_set_tcp_syn_recv(sock) == BPF_ERROR;
247 |     }
248 |     case TCP_SYN_SENT: {
249 |       return handle_set_tcp_syn_sent(sock) == BPF_ERROR;
250 |     }
251 |     case TCP_CLOSE:  {
252 |       return handle_set_tcp_close(sock);
253 |     }
254 |   }
255 | 
256 |   return BPF_SUCCESS;
257 | }


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/core_structures.h:
--------------------------------------------------------------------------------
  1 | #ifndef __CORE_STRUCTURES_H__
  2 | #define __CORE_STRUCTURES_H__
  3 | 
  4 | #include <vmlinux.h>
  5 | 
  6 | /*
  7 |  * All structs and unions in this file should have a "preserve access index"
  8 |  * attribute. The following attaches this attribute to all records (structs,
  9 |  * unions, classes).
 10 |  * @see https://clang.llvm.org/docs/LanguageExtensions.html
 11 |  */
 12 | #pragma clang attribute push
 13 | #pragma clang attribute(__attribute__((preserve_access_index)),                \
 14 |                         apply_to = record)
 15 | 
 16 | // this is not core structure per se, but it would have been defined in a full
 17 | // vmlinux.h
 18 | enum {
 19 |   false = 0,
 20 |   true = 1,
 21 | };
 22 | 
 23 | enum {
 24 |   TCP_ESTABLISHED = 1,
 25 |   TCP_SYN_SENT = 2,
 26 |   TCP_SYN_RECV = 3,
 27 |   TCP_FIN_WAIT1 = 4,
 28 |   TCP_FIN_WAIT2 = 5,
 29 |   TCP_TIME_WAIT = 6,
 30 |   TCP_CLOSE = 7,
 31 |   TCP_CLOSE_WAIT = 8,
 32 |   TCP_LAST_ACK = 9,
 33 |   TCP_LISTEN = 10,
 34 |   TCP_CLOSING = 11,
 35 |   TCP_NEW_SYN_RECV = 12,
 36 |   TCP_MAX_STATES = 13,
 37 | };
 38 | 
 39 | 
 40 | typedef u16 sa_family_t;
 41 | typedef u32 socklen_t;
 42 | 
 43 | struct in_addr {
 44 |   __be32 s_addr;
 45 | };
 46 | 
 47 | struct in6_addr {
 48 |   union {
 49 |     __u8 u6_addr8[16];
 50 |   } in6_u;
 51 | };
 52 | 
 53 | struct sockaddr_in {
 54 |   sa_family_t sin_family;
 55 |   __be16 sin_port;
 56 |   struct in_addr sin_addr;
 57 | };
 58 | 
 59 | struct sockaddr_in6 {
 60 |   sa_family_t sin6_family;
 61 |   __be16 sin6_port;
 62 |   struct in6_addr sin6_addr;
 63 | };
 64 | 
 65 | struct sockaddr {
 66 |   sa_family_t sa_family;
 67 | };
 68 | 
 69 | struct sock_common {
 70 |   struct {
 71 |     __be32 skc_daddr;
 72 |     __be32 skc_rcv_saddr;
 73 |   };
 74 |   struct {
 75 |     __be16 skc_dport;
 76 |     __u16 skc_num;
 77 |   };
 78 |   unsigned short skc_family;
 79 |   struct in6_addr skc_v6_daddr;
 80 | };
 81 | 
 82 | struct sock {
 83 |   struct sock_common __sk_common;
 84 |   unsigned int sk_shutdown : 2, sk_no_check_tx : 1, sk_no_check_rx : 1,
 85 |       sk_userlocks : 4, sk_protocol : 8, sk_type : 16;
 86 |   u32 sk_max_ack_backlog;
 87 | };
 88 | 
 89 | struct socket {
 90 |   struct sock *sk;
 91 | };
 92 | 
 93 | struct ipv6_pinfo {
 94 |   struct in6_addr saddr;
 95 | };
 96 | 
 97 | struct inet_sock {
 98 |   struct sock sk;
 99 |   struct ipv6_pinfo *pinet6;
100 |   __be32 inet_saddr;
101 |   __be16 inet_sport;
102 | };
103 | 
104 | struct tcp_sock {
105 |   u64 bytes_received;
106 |   u64 bytes_sent;
107 | };
108 | 
109 | typedef u8 u_int8_t;
110 | typedef u16 u_int16_t;
111 | 
112 | #pragma clang attribute pop
113 | 
114 | #endif // __KERNEL_CORE_STRUCTURES_H__
115 | 


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/ebpf_internal_types.h:
--------------------------------------------------------------------------------
 1 | #include "epbf_shared_types.h"
 2 | 
 3 | #define MAX_CONNECTIONS 1000000
 4 | 
 5 | // internal kernel-only struct to hold socket information which can't be parsed
 6 | // from struct sock.
 7 | struct sock_info {
 8 |   u32 pid;
 9 |   enum connection_role role;
10 |   u32 is_active;
11 |   u32 id;
12 | };
13 | 
14 | // partial struct of args for tcp_set_state
15 | struct set_state_args {
16 |   u64 padding;
17 |   struct sock *skaddr;
18 |   u32 oldstate;
19 |   u32 newstate;
20 |   // more...
21 | };
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/ebpf_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef __EBPF_UTILS_H__
2 | #define __EBPF_UTILS_H__
3 | 
4 | #define BPF_SUCCESS 0
5 | #define BPF_ERROR -1
6 | 
7 | #endif


--------------------------------------------------------------------------------
/pkg/tracing/ebpf/epbf_shared_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef __EBPF_SHARED_TYPES_H__
 2 | #define __EBPF_SHARED_TYPES_H__
 3 | #include "vmlinux.h"
 4 | 
 5 | // helper defs for inet_sock. These are defined in inet_sock.h, but not copied
 6 | // automatically to vmlinux.h
 7 | #define inet_daddr sk.__sk_common.skc_daddr
 8 | #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
 9 | #define inet_dport sk.__sk_common.skc_dport
10 | #define inet_num sk.__sk_common.skc_num
11 | 
12 | 
13 | enum connection_role {
14 |   CONNECTION_ROLE_UNKNOWN = 0,
15 |   CONNECTION_ROLE_CLIENT,
16 |   CONNECTION_ROLE_SERVER,
17 | };
18 | 
19 | // describing two sides of a connection. constant for each connection.
20 | struct connection_tuple {
21 |   __be32 src_ip;
22 |   __be32 dst_ip;
23 |   u16 src_port;
24 |   u16 dst_port;
25 | };
26 | 
27 | // all information needed to identify a specific connection.
28 | // due to socket reuses, each of the members (beside id) may change while
29 | // maintaining the others.
30 | struct connection_identifier {
31 |   u32 id; // uniquely generated id
32 |   u32 pid;
33 |   struct connection_tuple tuple;
34 |   enum connection_role role;
35 | };
36 | 
37 | // dynamic information about the state of a connection.
38 | struct connection_throughput_stats {
39 |   u64 bytes_sent;
40 |   u64 bytes_received;
41 |   u64 is_active; // u64 because it will be padded anyway. should change whether
42 |                  // new members are added
43 | };
44 | 
45 | #endif


--------------------------------------------------------------------------------
/pkg/tracing/probes.go:
--------------------------------------------------------------------------------
 1 | package tracing
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 	"log"
 7 | 
 8 | 	"github.com/cilium/ebpf"
 9 | 	"github.com/cilium/ebpf/btf"
10 | 	"github.com/cilium/ebpf/link"
11 | 	"github.com/cilium/ebpf/rlimit"
12 | )
13 | 
14 | type Probes struct {
15 | 	Kprobe     link.Link
16 | 	Tracepoint link.Link
17 | 	BpfObjs    bpfObjects
18 | }
19 | 
20 | func LoadProbes() (Probes, *ebpf.Map, error) {
21 | 	if err := rlimit.RemoveMemlock(); err != nil {
22 | 		return Probes{}, nil, fmt.Errorf("error removing memory lock - %v", err)
23 | 	}
24 | 
25 | 	objs := bpfObjects{}
26 | 	err := loadBpfObjects(&objs, &ebpf.CollectionOptions{})
27 | 	if err != nil {
28 | 		var ve *ebpf.VerifierError
29 | 		if errors.As(err, &ve) {
30 | 			fmt.Printf("Verifier Error: %+v\n", ve)
31 | 		}
32 | 		return Probes{}, nil, fmt.Errorf("error loading BPF objects from go-side. %v", err)
33 | 	}
34 | 	log.Printf("BPF objects loaded")
35 | 
36 | 	// attach a kprobe and tracepoint
37 | 	kp, err := link.Kprobe("tcp_data_queue", objs.bpfPrograms.HandleTcpDataQueue, nil)
38 | 	if err != nil {
39 | 		return Probes{}, nil, fmt.Errorf("error attaching kprobe: %v", err)
40 | 	}
41 | 	log.Printf("Kprobe attached successfully")
42 | 
43 | 	tp, err := link.Tracepoint("sock", "inet_sock_set_state", objs.bpfPrograms.HandleSockSetState, nil)
44 | 	if err != nil {
45 | 		return Probes{}, nil, fmt.Errorf("error attaching tracepoint: %v", err)
46 | 	}
47 | 	log.Printf("Tracepoint attached successfully")
48 | 
49 | 	// We are done with loading kprobes - clear the btf cache
50 | 	btf.FlushKernelSpec()
51 | 
52 | 	return Probes{
53 | 		Kprobe:     kp,
54 | 		Tracepoint: tp,
55 | 		BpfObjs:    objs,
56 | 	}, objs.Connections, nil
57 | }
58 | 
59 | func (objs *Probes) UnloadProbes() error {
60 | 	// if any close operation fails, will continue to try closing the rest of the struct,
61 | 	// and return the first error
62 | 	var resultErr error
63 | 	resultErr = nil
64 | 
65 | 	err := objs.Kprobe.Close()
66 | 	if err != nil {
67 | 		resultErr = err
68 | 	}
69 | 	err = objs.Tracepoint.Close()
70 | 	if err != nil && resultErr == nil {
71 | 		resultErr = err
72 | 	}
73 | 	err = objs.BpfObjs.Close()
74 | 	if err != nil && resultErr == nil {
75 | 		resultErr = err
76 | 	}
77 | 
78 | 	return resultErr
79 | }
80 | 


--------------------------------------------------------------------------------
/scripts/build/download_libbpf_headers.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This downloads the libbpf headers we need to compile eBPF code.
 4 | # The script is based on cilium's update headers script,
 5 | # https://github.com/cilium/ebpf/blob/4420605496c54a45653a7f1d277896e71e6705e2/examples/headers/update.sh#L1
 6 | 
 7 | # Version of libbpf to fetch headers from
 8 | LIBBPF_VERSION=0.6.1
 9 | 
10 | # Version of cilium ebpf repository to fetch vmlinux from
11 | CILIUM_VMLINUX_VERSION=0.10.0
12 | 
13 | HEADERS_DIRECTORY="/tmp/caretta_extra/libbpf_headers"
14 | 
15 | # The headers we want
16 | prefix=libbpf-"$LIBBPF_VERSION"
17 | headers=(
18 |     "$prefix"/src/bpf_endian.h
19 |     "$prefix"/src/bpf_helper_defs.h
20 |     "$prefix"/src/bpf_helpers.h
21 |     "$prefix"/src/bpf_tracing.h
22 |     "$prefix"/src/bpf_core_read.h
23 | )
24 | 
25 | if [ ! -d "pkg" ] ; then
26 |     echo "Run this scripts from the repository's root directory." 1>&2
27 |     exit 1
28 | fi
29 | 
30 | if [ ! -d "$HEADERS_DIRECTORY" ]; then
31 |     mkdir -p "$HEADERS_DIRECTORY"
32 |     if [ "$?" -ne 0 ]; then
33 |         echo "Failed to create libbpf headers directory \""$HEADERS_DIRECTORY"\"." 1>&2
34 |         exit 1
35 |     fi
36 | fi
37 | 
38 | # Fetch libbpf release and extract the desired headers
39 | curl -sL --connect-timeout 10 --max-time 10 \
40 |     "https://github.com/libbpf/libbpf/archive/refs/tags/v${LIBBPF_VERSION}.tar.gz" | \
41 |     tar -xz --xform='s#.*/##' -C "$HEADERS_DIRECTORY" "${headers[@]}"
42 | if [ "$?" -ne 0 ]; then
43 |     echo "Failed to download and extract the needed libbpf headers." 1>&2
44 |     exit 1
45 | fi
46 | 
47 | # Fetch compact vmlinux file from cilium's ebpf repository.
48 | # This is not a libbpf header per-se, but it's close enough that we put it in the same location.
49 | curl -s -o "$HEADERS_DIRECTORY"/vmlinux.h \
50 |     https://raw.githubusercontent.com/cilium/ebpf/v${CILIUM_VMLINUX_VERSION}/examples/headers/common.h
51 | if [ "$?" -ne 0 ]; then
52 |     echo "Failed to download vmlinux compact version from cilium's repository."
53 |     exit 1
54 | fi
55 | 
56 | echo "Successfully downloaded libbpf headers." 1>&2
57 | 


--------------------------------------------------------------------------------