├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   └── feature-request.yml
    ├── scripts
    │   ├── check_and_install_stub.py
    │   └── scan_org_licenses_licensee.py
    └── workflows
    │   ├── dependency-scan.yml
    │   ├── license-cla-check.yml
    │   ├── manage-cla-stubs.yml
    │   ├── old
    │       └── license-cal-check.yml
    │   ├── python_licensee_scan.yml
    │   ├── reusable-cla-check.yml
    │   ├── scan-licenses.yml
    │   └── secrets-scanning-report.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── SECURITY.md
├── actions
    └── check-license
    │   └── action.yml
├── permissive_licenses.json
├── profile
    ├── README.md
    └── image1.jpg
├── reports
    ├── secret_report_20250217_002349.csv
    ├── secret_report_20250224_002324.csv
    ├── secret_report_20250303_002412.csv
    ├── secret_report_20250310_002003.csv
    ├── secret_report_20250317_002435.csv
    ├── secret_report_20250324_002442.csv
    ├── secret_report_20250331_002538.csv
    ├── secret_report_20250407_002459.csv
    ├── secret_report_20250414_002556.csv
    ├── secret_report_20250421_002953.csv
    ├── secret_report_20250428_002604.csv
    ├── vulnerability_report_20250218_185722.csv
    ├── vulnerability_report_20250224_000546.csv
    ├── vulnerability_report_20250303_000551.csv
    ├── vulnerability_report_20250310_000510.csv
    ├── vulnerability_report_20250317_000607.csv
    ├── vulnerability_report_20250324_000606.csv
    ├── vulnerability_report_20250331_000603.csv
    ├── vulnerability_report_20250407_000559.csv
    ├── vulnerability_report_20250414_000614.csv
    ├── vulnerability_report_20250421_000559.csv
    └── vulnerability_report_20250428_000622.csv
├── scripts
    ├── check_licenses.py
    ├── dependency_scanner.py
    ├── github_secret_scanner.py
    └── requirements.txt
├── signatures
    └── CLA.json
└── templates
    └── cla-trigger-template.yml


/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: 🐛 Bug report
 2 | description: Create a report to help us improve
 3 | labels: bug
 4 | 
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this bug report!
10 | 
11 |   - type: textarea
12 |     id: describe-bug
13 |     attributes:
14 |       label: Describe the bug
15 |       description: A clear and concise description of what the bug is.
16 |     validations:
17 |       required: true
18 | 
19 |   - type: textarea
20 |     id: reproduction-steps
21 |     attributes:
22 |       label: Reproduction steps
23 |       description: Steps to reproduce the behavior
24 |       value: |
25 |         1.
26 |         2.
27 |         3.
28 |         ...
29 |     validations:
30 |       required: true
31 | 
32 |   - type: textarea
33 |     id: expected-behavior
34 |     attributes:
35 |       label: Expected behavior
36 |       description: A clear and concise description of what you expected to happen.
37 |     validations:
38 |       required: true
39 | 
40 |   - type: textarea
41 |     id: additional-context
42 |     attributes:
43 |       label: Additional context
44 |       description: Add any other context about the problem here.
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature request
 2 | description: Suggest an idea for this project
 3 | labels: enhancement
 4 | 
 5 | body:
 6 |   - type: textarea
 7 |     id: describe-problem
 8 |     attributes:
 9 |       label: Is your feature request related to a problem? Please describe.
10 |       description: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 |     validations:
12 |       required: true
13 | 
14 |   - type: textarea
15 |     id: describe-solution
16 |     attributes:
17 |       label: Describe the solution you'd like
18 |       description: A clear and concise description of what you want to happen.
19 |     validations:
20 |       required: true
21 | 
22 |   - type: textarea
23 |     id: describe-alternatives
24 |     attributes:
25 |       label: Describe alternatives you've considered
26 |       description: A clear and concise description of any alternative solutions or features you've considered.
27 |     validations:
28 |       required: false
29 | 
30 |   - type: textarea
31 |     id: additional-context
32 |     attributes:
33 |       label: Additional context
34 |       description: Add any other context or screenshots about the feature request here.
35 |     validations:
36 |       required: false
37 | 


--------------------------------------------------------------------------------
/.github/scripts/check_and_install_stub.py:
--------------------------------------------------------------------------------
  1 | # File: .github/scripts/check_and_install_stub.py
  2 | # Purpose: Determines a repository's license, classifies it, and manages a CLA trigger workflow stub.
  3 | # The stub now triggers on both pull_request_target and issue_comment events.
  4 | 
  5 | import os
  6 | import json
  7 | import subprocess
  8 | import base64
  9 | import time
 10 | import logging
 11 | from github import Github, GithubException, UnknownObjectException
 12 | 
 13 | # --- Configuration ---
 14 | ORG_NAME = os.environ.get("GITHUB_REPOSITORY_OWNER")
 15 | # IMPORTANT: Increment this version due to changes in the stub template's trigger logic.
 16 | TARGET_STUB_VERSION = "1.1.0" # Example: Major.Minor.Patch -> new trigger is significant
 17 | STUB_WORKFLOW_PATH = ".github/workflows/cla-check-trigger.yml"
 18 | 
 19 | # Get the Licensee Docker image tag from an environment variable set by the workflow.
 20 | LICENSEE_DOCKER_IMAGE_TAG = os.environ.get("LICENSEE_DOCKER_IMAGE", "local-org-licensee:latest")
 21 | 
 22 | if not ORG_NAME:
 23 |     logging.critical("CRITICAL: GITHUB_REPOSITORY_OWNER environment variable not set. Cannot proceed.")
 24 |     exit(1)
 25 | 
 26 | # Default URL to your CLA document stored within the .github repository itself.
 27 | # This will be embedded in the stub workflow.
 28 | DEFAULT_CLA_DOCUMENT_URL_IN_STUB = f"https://github.com/{ORG_NAME}/.github/blob/main/.github/CONTRIBUTOR_LICENSE_AGREEMENT.md"
 29 | # If you prefer to configure this via an environment variable from manage-cla-stubs.yml:
 30 | # CLA_DOCUMENT_URL_FOR_STUB_FINAL = os.environ.get("CLA_DOCUMENT_URL_FOR_STUBS_ENV_VAR", DEFAULT_CLA_DOCUMENT_URL_IN_STUB)
 31 | 
 32 | 
 33 | # Define the content of the stub workflow file.
 34 | # This stub triggers the reusable workflow for both PR events and relevant PR comments.
 35 | STUB_WORKFLOW_CONTENT_TEMPLATE = f"""\
 36 | # This file is auto-generated and managed by the organization's .github repository.
 37 | # Do not modify manually. Version: {TARGET_STUB_VERSION}
 38 | name: CLA Check Trigger
 39 | 
 40 | on:
 41 |   # Trigger on pull request events (opened, new commits pushed, reopened)
 42 |   pull_request_target:
 43 |     types: [opened, synchronize, reopened]
 44 |   # Trigger on issue comments (pull requests are also 'issues' in GitHub's model)
 45 |   issue_comment:
 46 |     types: [created] # Only when a new comment is made
 47 | 
 48 | jobs:
 49 |   call_cla_check:
 50 |     # This job will run if:
 51 |     # 1. The event is 'pull_request_target'.
 52 |     # OR
 53 |     # 2. The event is 'issue_comment' AND the comment was made on a pull request.
 54 |     # The 'contributor-assistant/github-action' in the reusable workflow will then
 55 |     # determine if the comment body is relevant for CLA signing or rechecking.
 56 |     if: >
 57 |       github.event_name == 'pull_request_target' ||
 58 |       (github.event_name == 'issue_comment' && github.event.issue.pull_request)
 59 | 
 60 |     # Call the organization's centralized reusable CLA checking workflow.
 61 |     # Pinning to a specific versioned tag (e.g., @v1.0.0) or commit SHA of the reusable workflow is highly recommended for stability.
 62 |     uses: {ORG_NAME}/.github/.github/workflows/reusable-cla-check.yml@main
 63 |     secrets:
 64 |       # Pass the PAT required by contributor-assistant. This PAT needs permissions for:
 65 |       # - PR interactions (comments, labels, statuses) on THIS repository (where the stub runs).
 66 |       # - Contents Read & Write on the {ORG_NAME}/.github repository to manage the CLA.csv signature file.
 67 |       CONTRIBUTOR_ASSISTANT_PAT: ${{{{ secrets.CLA_ASSISTANT_PAT }}}} # Note: Secret name is still CLA_ASSISTANT_PAT as per previous setup
 68 |                                                                  # Can be renamed if desired, but ensure consistency.
 69 |     with:
 70 |       # Provide the URL to the CLA document.
 71 |       cla_document_url: {DEFAULT_CLA_DOCUMENT_URL_IN_STUB} # Using the default determined in Python script
 72 |       # Optional overrides for signature file path and branch if defaults in reusable workflow are not suitable:
 73 |       # signature_file_path: '.github/signatures/CLA.csv'
 74 |       # signature_branch: 'main'
 75 | """
 76 | 
 77 | # --- Logging Setup ---
 78 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 79 | 
 80 | # --- get_license_info function ---
 81 | # (This function remains unchanged from the previous correct version that builds licensee locally)
 82 | def get_license_info(repo_full_name, gh_token, temp_base_dir="temp_license_check"):
 83 |     g = Github(gh_token)
 84 |     try:
 85 |         repo = g.get_repo(repo_full_name)
 86 |     except UnknownObjectException:
 87 |         logging.warning(f"Repository {repo_full_name} not found or PAT lacks access.")
 88 |         return "REPO_NOT_FOUND", "non-permissive"
 89 |     except Exception as e:
 90 |         logging.error(f"Error accessing repository {repo_full_name} object: {e}")
 91 |         return "REPO_ACCESS_ERROR", "non-permissive"
 92 | 
 93 |     license_content = None; license_filename = "LICENSE"
 94 |     common_license_files = ["LICENSE", "LICENSE.MD", "LICENSE.TXT", "COPYING", "COPYING.MD", "UNLICENSE"]
 95 |     try:
 96 |         contents = repo.get_contents("")
 97 |         for content_file in contents:
 98 |             if content_file.name.upper() in common_license_files:
 99 |                 license_filename = content_file.name
100 |                 license_content_b64 = repo.get_contents(content_file.path).content
101 |                 license_content = base64.b64decode(license_content_b64).decode('utf-8', errors='replace')
102 |                 logging.info(f"Found license file '{content_file.path}' in {repo_full_name}.")
103 |                 break
104 |     except Exception as e:
105 |         logging.warning(f"Could not list root contents or read license file from root for {repo_full_name}: {e}. Will try specific paths.")
106 |         for fname in common_license_files:
107 |             try:
108 |                 license_content_b64 = repo.get_contents(fname).content
109 |                 license_content = base64.b64decode(license_content_b64).decode('utf-8', errors='replace')
110 |                 license_filename = fname; logging.info(f"Found license file '{fname}' directly in {repo_full_name}."); break
111 |             except UnknownObjectException: continue
112 |             except Exception as e_inner: logging.warning(f"Error fetching specific license file {fname} for {repo_full_name}: {e_inner}"); continue
113 |     if not license_content:
114 |         logging.info(f"No common license file found for {repo_full_name} via API. Classifying as non-permissive.")
115 |         return "NO_LICENSE_FILE", "non-permissive"
116 | 
117 |     repo_temp_dir = os.path.join(temp_base_dir, repo_full_name.replace("/", "_"))
118 |     os.makedirs(repo_temp_dir, exist_ok=True)
119 |     temp_license_filepath = os.path.join(repo_temp_dir, license_filename)
120 |     try:
121 |         with open(temp_license_filepath, "w", encoding="utf-8") as f: f.write(license_content)
122 |         cmd = [ "docker", "run", "--rm", "-v", f"{os.path.abspath(repo_temp_dir)}:/scan_dir", LICENSEE_DOCKER_IMAGE_TAG, "detect", "/scan_dir", "--json" ]
123 |         logging.info(f"Running licensee for {repo_full_name} using image {LICENSEE_DOCKER_IMAGE_TAG}: {' '.join(cmd)}")
124 |         result = subprocess.run(cmd, capture_output=True, text=True, check=False, timeout=90)
125 |         if result.returncode != 0:
126 |             if "command not found" in result.stderr.lower() or "No such file or directory" in result.stderr.lower():
127 |                  logging.error(f"Licensee Docker command failed for {repo_full_name}. Docker image '{LICENSEE_DOCKER_IMAGE_TAG}' might not be available or command inside is wrong. Stderr: {result.stderr[:500]}")
128 |             else: logging.error(f"Licensee Docker command failed for {repo_full_name}. Exit: {result.returncode}, Stderr: {result.stderr[:500]}")
129 |             return "LICENSEE_ERROR", "non-permissive"
130 |         license_data = json.loads(result.stdout); spdx_id = "OTHER"
131 |         if license_data.get("licenses") and isinstance(license_data["licenses"], list) and license_data["licenses"]:
132 |             best_license = max(license_data["licenses"], key=lambda lic: lic.get("confidence", 0), default=None)
133 |             if best_license and best_license.get("spdx_id"): spdx_id = best_license["spdx_id"]
134 |             elif license_data["licenses"][0].get("spdx_id"): spdx_id = license_data["licenses"][0].get("spdx_id", "OTHER")
135 |         elif license_data.get("matched_files") and license_data["matched_files"][0].get("license") and license_data["matched_files"][0]["license"].get("spdx_id"):
136 |             spdx_id = license_data["matched_files"][0]["license"]["spdx_id"]
137 |         logging.info(f"License for {repo_full_name} determined by licensee as: {spdx_id}")
138 |         return spdx_id, classify_license(spdx_id)
139 |     except subprocess.TimeoutExpired: logging.error(f"Licensee Docker command timed out for {repo_full_name}."); return "LICENSEE_TIMEOUT", "non-permissive"
140 |     except json.JSONDecodeError as e: logging.error(f"Failed to parse licensee JSON for {repo_full_name}. Output: {result.stdout[:300]}. Error: {e}"); return "LICENSEE_JSON_ERROR", "non-permissive"
141 |     except Exception as e: logging.error(f"Unexpected error during licensee processing for {repo_full_name}: {e}"); return "UNKNOWN_ERROR_LICENSEE_PROCESSING", "non-permissive"
142 |     finally:
143 |         if os.path.exists(temp_license_filepath): os.remove(temp_license_filepath)
144 |         if os.path.exists(repo_temp_dir) and not os.listdir(repo_temp_dir): os.rmdir(repo_temp_dir)
145 |         if os.path.exists(temp_base_dir) and not os.listdir(temp_base_dir):
146 |             try: os.rmdir(temp_base_dir)
147 |             except OSError: pass
148 | 
149 | # --- classify_license function ---
150 | # (This function remains unchanged)
151 | def classify_license(spdx_id):
152 |     permissive_spdx_ids_str = os.environ.get("PERMISSIVE_SPDX_IDS", "MIT,Apache-2.0,BSD-3-Clause,ISC,BSD-2-Clause,CC0-1.0,Unlicense")
153 |     permissive_ids = {pid.strip().upper() for pid in permissive_spdx_ids_str.split(',')}
154 |     if spdx_id is None: logging.warning("classify_license received None SPDX ID, defaulting to non-permissive."); return "non-permissive"
155 |     if spdx_id.upper() in permissive_ids: return "permissive"
156 |     return "non-permissive"
157 | 
158 | # --- manage_stub function ---
159 | # (This function's core logic for creating/updating/deleting files remains unchanged.
160 | # It will use the updated STUB_WORKFLOW_CONTENT_TEMPLATE.)
161 | def manage_stub(repo_full_name, gh_token):
162 |     g = Github(gh_token)
163 |     try:
164 |         repo = g.get_repo(repo_full_name)
165 |         if repo.archived: logging.info(f"Skipping archived repository: {repo_full_name}"); return "skipped_archived"
166 |     except UnknownObjectException: logging.warning(f"Repository {repo_full_name} not found or PAT lacks access during stub management."); return "error_repo_not_found_stub_mgmt"
167 |     except Exception as e: logging.error(f"Error accessing repository {repo_full_name} object for stub management: {e}"); return "error_repo_access_stub_mgmt"
168 | 
169 |     logging.info(f"Managing stub for repository: {repo_full_name}")
170 |     spdx_id, license_type = get_license_info(repo_full_name, gh_token)
171 |     logging.info(f"  License classification for {repo_full_name}: {license_type} (SPDX/Code: {spdx_id or 'N/A'})")
172 | 
173 |     action_taken = "no_action_default"
174 |     if license_type == "non-permissive":
175 |         logging.info(f"  Non-permissive license. Ensuring CLA stub workflow exists for {repo_full_name}.")
176 |         try:
177 |             existing_stub_file = None; existing_content = ""
178 |             try:
179 |                 existing_stub_file = repo.get_contents(STUB_WORKFLOW_PATH, ref=repo.default_branch)
180 |                 existing_content = base64.b64decode(existing_stub_file.content).decode('utf-8')
181 |             except UnknownObjectException: logging.info(f"    No existing stub found at {STUB_WORKFLOW_PATH} in {repo_full_name}.")
182 |             current_version_str = "0.0.0"
183 |             if existing_content:
184 |                 for line in existing_content.splitlines():
185 |                     if "# Version:" in line: current_version_str = line.split("# Version:")[1].strip(); break
186 |             if existing_stub_file and current_version_str == TARGET_STUB_VERSION and existing_content.strip() == STUB_WORKFLOW_CONTENT_TEMPLATE.strip():
187 |                 logging.info(f"    CLA stub '{STUB_WORKFLOW_PATH}' is up-to-date (v{TARGET_STUB_VERSION}) in {repo_full_name}.")
188 |                 action_taken = "skipped_stub_up_to_date"
189 |             elif existing_stub_file:
190 |                 commit_message = f"ci: Update CLA trigger workflow to v{TARGET_STUB_VERSION}"
191 |                 logging.info(f"    Updating existing CLA stub '{STUB_WORKFLOW_PATH}' (Old: v{current_version_str}) in {repo_full_name}.")
192 |                 repo.update_file(STUB_WORKFLOW_PATH, commit_message, STUB_WORKFLOW_CONTENT_TEMPLATE, existing_stub_file.sha, branch=repo.default_branch)
193 |                 action_taken = "stub_updated"
194 |             else:
195 |                 commit_message = f"ci: Add CLA trigger workflow v{TARGET_STUB_VERSION}"
196 |                 logging.info(f"    CLA stub '{STUB_WORKFLOW_PATH}' not found. Creating in {repo_full_name}.")
197 |                 repo.create_file(STUB_WORKFLOW_PATH, commit_message, STUB_WORKFLOW_CONTENT_TEMPLATE, branch=repo.default_branch)
198 |                 action_taken = "stub_created"
199 |         except GithubException as e: logging.error(f"    GitHub API error (stub for non-permissive {repo_full_name}): Status {e.status}, Data {e.data}"); action_taken = f"error_api_non_permissive_{e.status}"
200 |         except Exception as e: logging.error(f"    Unexpected error (stub for non-permissive {repo_full_name}): {e}"); action_taken = "error_unknown_non_permissive"
201 |     elif license_type == "permissive":
202 |         logging.info(f"  Permissive license ({spdx_id}). Ensuring CLA stub does NOT exist for {repo_full_name}.")
203 |         try:
204 |             existing_stub_file = repo.get_contents(STUB_WORKFLOW_PATH, ref=repo.default_branch)
205 |             commit_message = f"ci: Remove CLA trigger workflow (license: {spdx_id} is permissive)"
206 |             logging.info(f"    Permissive license; removing existing CLA stub '{STUB_WORKFLOW_PATH}' from {repo_full_name}.")
207 |             repo.delete_file(STUB_WORKFLOW_PATH, commit_message, existing_stub_file.sha, branch=repo.default_branch)
208 |             action_taken = "stub_removed_permissive"
209 |         except UnknownObjectException: logging.info(f"    Permissive license; CLA stub '{STUB_WORKFLOW_PATH}' does not exist. No action needed."); action_taken = "skipped_permissive_no_stub"
210 |         except GithubException as e: logging.error(f"    GitHub API error (removing stub for permissive {repo_full_name}): Status {e.status}, Data {e.data}"); action_taken = f"error_api_permissive_{e.status}"
211 |         except Exception as e: logging.error(f"    Unexpected error (removing stub for permissive {repo_full_name}): {e}"); action_taken = "error_unknown_permissive"
212 |     else: logging.warning(f"  Unknown license type '{license_type}' for {repo_full_name} (SPDX/Code: {spdx_id}). No action on stub."); action_taken = "skipped_unknown_license_type"
213 |     return action_taken
214 | 
215 | # --- __main__ function ---
216 | # (This function remains unchanged)
217 | if __name__ == "__main__":
218 |     repo_to_process = os.environ.get("TARGET_REPO_FULL_NAME")
219 |     org_pat = os.environ.get("ORG_PAT")
220 |     if not repo_to_process: logging.critical("CRITICAL: TARGET_REPO_FULL_NAME not set."); exit(1)
221 |     if not org_pat: logging.critical("CRITICAL: ORG_PAT not set."); exit(1)
222 |     max_retries = 1; final_status = "error_unknown_initial"
223 |     for attempt in range(max_retries + 1):
224 |         try:
225 |             final_status = manage_stub(repo_to_process, org_pat)
226 |             if not final_status.startswith("error_"): break 
227 |         except Exception as e:
228 |             logging.error(f"Attempt {attempt+1} for {repo_to_process} failed with unhandled exception: {e}")
229 |             final_status = f"error_unhandled_exception_attempt_{attempt+1}"
230 |         if attempt < max_retries and final_status.startswith("error_"):
231 |             sleep_duration = (attempt + 1) * 10
232 |             logging.info(f"Retrying {repo_to_process} in {sleep_duration}s after status: {final_status}"); time.sleep(sleep_duration)
233 |         elif attempt == max_retries:
234 |              logging.error(f"All {max_retries+1} attempts failed for {repo_to_process}. Final status: {final_status}")
235 |     print(f"REPO_PROCESSED_NAME={repo_to_process}")
236 |     print(f"REPO_PROCESSED_STATUS={final_status}")
237 |     if final_status.startswith("error_"): exit(1)
238 | 
239 | 
240 | 


--------------------------------------------------------------------------------
/.github/scripts/scan_org_licenses_licensee.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import json
  4 | import tempfile
  5 | import shutil
  6 | import time
  7 | import sys
  8 | from github import Github, GithubException, RateLimitExceededException, UnknownObjectException
  9 | 
 10 | MAX_RETRIES_CLONE = 3
 11 | RETRY_DELAY_SECONDS = 10
 12 | LICENSEE_CONFIDENCE_THRESHOLD = "70" # Lowered confidence threshold
 13 | 
 14 | def run_command_robust(command_args, cwd=None, check_return_code=True, an_input=None):
 15 |     """
 16 |     Runs a shell command, captures its output, and handles errors robustly.
 17 |     Returns a tuple: (success, stdout, stderr)
 18 |     """
 19 |     try:
 20 |         process = subprocess.Popen(
 21 |             command_args,
 22 |             cwd=cwd,
 23 |             stdin=subprocess.PIPE if an_input else None,
 24 |             stdout=subprocess.PIPE,
 25 |             stderr=subprocess.PIPE,
 26 |             text=True,
 27 |             env=os.environ.copy()
 28 |         )
 29 |         stdout, stderr = process.communicate(input=an_input)
 30 |         
 31 |         if check_return_code and process.returncode != 0:
 32 |             print(f"Command failed with exit code {process.returncode}: {' '.join(command_args)}")
 33 |             print(f"Stderr: {stderr.strip()}")
 34 |             return False, stdout.strip(), stderr.strip()
 35 |         return True, stdout.strip(), stderr.strip()
 36 | 
 37 |     except FileNotFoundError:
 38 |         print(f"Error: Command not found - {command_args[0]}. Ensure it's installed and in PATH.")
 39 |         return False, "", f"Command not found: {command_args[0]}"
 40 |     except Exception as e:
 41 |         print(f"An unexpected error occurred while running command {' '.join(command_args)}: {e}")
 42 |         return False, "", str(e)
 43 | 
 44 | def extract_license_from_entry(license_entry_obj):
 45 |     """Helper to extract SPDX ID or name from a license object/dictionary."""
 46 |     if not isinstance(license_entry_obj, dict):
 47 |         return None
 48 |     
 49 |     spdx_id = license_entry_obj.get("spdx_id")
 50 |     if spdx_id and spdx_id != "NOASSERTION":
 51 |         return spdx_id
 52 |     
 53 |     name = license_entry_obj.get("name")
 54 |     if name:
 55 |         return name
 56 |     return None
 57 | 
 58 | 
 59 | def detect_license_with_licensee_cli(repo_dir_path):
 60 |     """Runs licensee detect in the given directory and parses the output."""
 61 |     command = ["licensee", "detect", "--json", ".", f"--confidence={LICENSEE_CONFIDENCE_THRESHOLD}"]
 62 |     success, stdout_raw, stderr_raw = run_command_robust(command, cwd=repo_dir_path, check_return_code=False)
 63 | 
 64 |     if not stdout_raw and not success:
 65 |          print(f"Licensee CLI failed to execute in {repo_dir_path}. Stderr: {stderr_raw}")
 66 |          return "LICENSEE_EXECUTION_ERROR"
 67 | 
 68 |     json_output_str = stdout_raw.strip()
 69 |     if not json_output_str or json_output_str == "null":
 70 |         if "No license found" in stderr_raw.lower():
 71 |              return "NONE_FOUND_BY_LICENSEE"
 72 |         print(f"Licensee produced empty or null JSON output for {repo_dir_path} with confidence {LICENSEE_CONFIDENCE_THRESHOLD}. Stderr: {stderr_raw}")
 73 |         return "LICENSEE_EMPTY_OUTPUT"
 74 | 
 75 |     try:
 76 |         license_data = json.loads(json_output_str)
 77 |         if not license_data:
 78 |             return "NONE_FOUND_BY_LICENSEE" # Empty JSON object means no license
 79 | 
 80 |         # print(f"DEBUG: Full licensee JSON for {repo_dir_path} (Confidence: {LICENSEE_CONFIDENCE_THRESHOLD}): {json_output_str}")
 81 | 
 82 |         # Attempt 1: "matched_license" - This is usually licensee's primary determination
 83 |         matched_license_obj = license_data.get("matched_license")
 84 |         license_id_from_matched = extract_license_from_entry(matched_license_obj)
 85 |         if license_id_from_matched:
 86 |             # print(f"Found via 'matched_license': {license_id_from_matched} for {repo_dir_path}")
 87 |             return license_id_from_matched
 88 |         
 89 |         # Attempt 2: "licenses" array - If matched_license is null or not conclusive
 90 |         licenses_array = license_data.get("licenses")
 91 |         if isinstance(licenses_array, list) and licenses_array:
 92 |             # print(f"DEBUG: 'matched_license' was not conclusive for {repo_dir_path}. Examining 'licenses' array (length {len(licenses_array)}).")
 93 |             
 94 |             def get_confidence(lic_entry_dict): # Renamed for clarity
 95 |                 conf = lic_entry_dict.get("confidence")
 96 |                 try: return float(conf) if conf is not None else 0.0
 97 |                 except (ValueError, TypeError): return 0.0
 98 | 
 99 |             # Filter out entries that are not dictionaries before sorting
100 |             valid_license_entries = [entry for entry in licenses_array if isinstance(entry, dict)]
101 | 
102 |             if not valid_license_entries:
103 |                 # print(f"DEBUG: 'licenses' array for {repo_dir_path} contained no valid dictionary entries.")
104 |                 # Fall through to check matched_files or return NONE_FOUND
105 |                 pass
106 |             else:
107 |                 sorted_licenses = sorted(
108 |                     valid_license_entries,
109 |                     key=lambda lic_entry_dict: (lic_entry_dict.get("featured") is True, get_confidence(lic_entry_dict)),
110 |                     reverse=True
111 |                 )
112 |                 
113 |                 if sorted_licenses: # Should always be true if valid_license_entries was not empty
114 |                     best_license_entry = sorted_licenses[0]
115 |                     license_id_from_array = extract_license_from_entry(best_license_entry)
116 |                     if license_id_from_array:
117 |                         # print(f"Found via 'licenses' array (best after sort): {license_id_from_array} for {repo_dir_path}")
118 |                         return license_id_from_array
119 |         
120 |         # Attempt 3: Check "matched_files" array as a fallback if the above didn't yield anything.
121 |         # This is less ideal for a single repository-wide license but can be an indicator.
122 |         # We'll take the first usable license found in any matched file.
123 |         matched_files_array = license_data.get("matched_files")
124 |         if isinstance(matched_files_array, list) and matched_files_array:
125 |             # print(f"DEBUG: No clear license from 'matched_license' or 'licenses'. Checking 'matched_files' for {repo_dir_path}.")
126 |             for file_match_entry in matched_files_array:
127 |                 if isinstance(file_match_entry, dict):
128 |                     license_in_file_obj = file_match_entry.get("license") # The 'license' object within a matched_file
129 |                     license_id_from_file = extract_license_from_entry(license_in_file_obj)
130 |                     if license_id_from_file:
131 |                         # print(f"Found via 'matched_files[x].license': {license_id_from_file} from file {file_match_entry.get('filename')} for {repo_dir_path}")
132 |                         return license_id_from_file 
133 |             # print(f"DEBUG: 'matched_files' array was present for {repo_dir_path} but no usable ID found within its entries' 'license' objects.")
134 | 
135 | 
136 |         # If none of the above parsing strategies yielded a result
137 |         print(f"DEBUG: No conclusive license found after all parsing strategies for {repo_dir_path} (Confidence: {LICENSEE_CONFIDENCE_THRESHOLD}).")
138 |         print(f"DEBUG: Full licensee JSON for {repo_dir_path}: {json_output_str}") # This log is CRITICAL
139 |         return "NONE_FOUND_BY_LICENSEE"
140 | 
141 |     except json.JSONDecodeError:
142 |         print(f"Error decoding JSON from licensee output for {repo_dir_path}: {json_output_str}")
143 |         return "LICENSEE_JSON_ERROR"
144 |     except Exception as e:
145 |         print(f"Unexpected error parsing licensee output for {repo_dir_path}: {e}. JSON was: {json_output_str}")
146 |         return "LICENSEE_PARSE_ERROR"
147 | 
148 | 
149 | 
150 | def main():
151 |     organization_name = os.environ.get("ORGANIZATION_TO_SCAN")
152 |     github_token = os.environ.get("GH_TOKEN_FOR_SCAN")
153 |     output_filename = os.environ.get("OUTPUT_FILENAME_TO_USE", "organization_public_licenses_licensee.json")
154 | 
155 |     if not organization_name:
156 |         print("Error: ORGANIZATION_TO_SCAN environment variable is not set.")
157 |         sys.exit(1)
158 |     if not github_token:
159 |         print("Error: GH_TOKEN_FOR_SCAN environment variable is not set. Token is needed for API and git operations.")
160 |         sys.exit(1)
161 | 
162 |     print(f"Python script starting scan for organization: {organization_name} using licensee CLI with confidence >= {LICENSEE_CONFIDENCE_THRESHOLD}%")
163 |     print(f"Output file will be: {output_filename}")
164 | 
165 |     g = None 
166 |     try:
167 |         g = Github(github_token)
168 |         try:
169 |             user = g.get_user()
170 |             print(f"Authenticated to GitHub API as: {user.login}")
171 |             rate_limit_info = g.get_rate_limit().core
172 |             reset_time_str = rate_limit_info.reset.strftime('%Y-%m-%d %H:%M:%S UTC') if rate_limit_info.reset else 'N/A'
173 |             print(f"API Rate limit: {rate_limit_info.remaining}/{rate_limit_info.limit}, Resets at: {reset_time_str}")
174 |         except GithubException as ge_user:
175 |             is_integration_error = ge_user.status == 403 and "integration" in str(ge_user.data).lower()
176 |             is_forbidden_generic = ge_user.status == 403
177 |             
178 |             if is_integration_error:
179 |                 print(f"Warning (non-critical): Could not get authenticated user info (g.get_user()): {ge_user.status} - {ge_user.data}. This can happen with GITHUB_TOKEN. Proceeding...")
180 |             elif is_forbidden_generic:
181 |                 print(f"Warning (potentially critical): GET /user failed with 403 Forbidden: {ge_user.data}. The provided token may lack 'read:user' or similar scope if it's a PAT. Proceeding cautiously...")
182 |             else:
183 |                 print(f"Error during g.get_user() call: {ge_user.status} - {ge_user.data}")
184 |                 if ge_user.status == 401:
185 |                     print("This is a 401 Unauthorized error. The token is likely invalid or expired. Exiting.")
186 |                     sys.exit(1)
187 |                 print("Proceeding, but initial user verification failed with an unexpected error.")
188 |             
189 |             if g: 
190 |                 try:
191 |                     rate_limit_info = g.get_rate_limit().core
192 |                     reset_time_str = rate_limit_info.reset.strftime('%Y-%m-%d %H:%M:%S UTC') if rate_limit_info.reset else 'N/A'
193 |                     print(f"API Rate limit (fetched separately): {rate_limit_info.remaining}/{rate_limit_info.limit}, Resets at: {reset_time_str}")
194 |                 except Exception as e_rl:
195 |                     print(f"Warning: Could not fetch rate limit information separately: {e_rl}")
196 |         except Exception as e_user_other:
197 |             print(f"Unexpected error during g.get_user() or initial rate limit check: {e_user_other}")
198 |             print("Proceeding despite this initial error.")
199 | 
200 |     except Exception as e_init:
201 |         print(f"CRITICAL Error initializing PyGithub object with token: {e_init}. This usually means the token is malformed or there's a fundamental issue with PyGithub or network.")
202 |         sys.exit(1)
203 |     
204 |     if not g:
205 |         print("CRITICAL: PyGithub object (g) could not be initialized. Exiting.")
206 |         sys.exit(1)
207 | 
208 |     all_licenses_info = []
209 |     repo_count = 0
210 |     processed_repo_count = 0
211 | 
212 |     try:
213 |         org = g.get_organization(organization_name)
214 |         print(f"Successfully fetched organization object for: {org.login}")
215 |         
216 |         repos_paginator = org.get_repos(type="public")
217 |         print("Starting to iterate through public repositories...")
218 |         
219 |         for repo in repos_paginator:
220 |             repo_count += 1
221 |             print("-----------------------------------------------------")
222 |             print(f"Processing repository: {repo.full_name} (Discovered: {repo_count})")
223 |             
224 |             if repo.archived:
225 |                 print(f"Skipping archived repository: {repo.full_name}")
226 |                 all_licenses_info.append({"repository_name": repo.name, "license": "ARCHIVED_REPO_SKIPPED"})
227 |                 continue
228 | 
229 |             if repo.size == 0:
230 |                  print(f"Skipping potentially empty repository (size 0 KB): {repo.full_name}")
231 |                  all_licenses_info.append({"repository_name": repo.name, "license": "EMPTY_REPO_SKIPPED"})
232 |                  continue
233 | 
234 |             current_license_info = {"repository_name": repo.name, "license": "ERROR_PROCESSING_REPO"}
235 |             temp_clone_dir = tempfile.mkdtemp(prefix=f"repo_licensee_{repo.name.replace('/', '_')}_")
236 |             
237 |             cloned_successfully = False
238 |             for attempt in range(1, MAX_RETRIES_CLONE + 1):
239 |                 clone_command = ["git", "clone", "--depth", "1", "--quiet", repo.clone_url, temp_clone_dir]
240 |                 
241 |                 if attempt > 1:
242 |                     for item_name in os.listdir(temp_clone_dir):
243 |                         item_path = os.path.join(temp_clone_dir, item_name)
244 |                         try:
245 |                             if os.path.isdir(item_path) and not os.path.islink(item_path):
246 |                                 shutil.rmtree(item_path)
247 |                             else:
248 |                                 os.unlink(item_path)
249 |                         except Exception as e_rm:
250 |                             print(f"Warning: Failed to remove {item_path} for retry: {e_rm}")
251 | 
252 |                 success, stdout_clone, stderr_clone = run_command_robust(clone_command, check_return_code=True)
253 |                 
254 |                 if success:
255 |                     cloned_successfully = True
256 |                     break
257 |                 else:
258 |                     print(f"Clone failed for {repo.full_name} (attempt {attempt}). Stderr: {stderr_clone}")
259 |                     if attempt < MAX_RETRIES_CLONE:
260 |                         time.sleep(RETRY_DELAY_SECONDS)
261 |                     else:
262 |                         print(f"Max retries reached for cloning {repo.full_name}.")
263 |                         current_license_info["license"] = "ERROR_CLONING"
264 |             
265 |             if cloned_successfully:
266 |                 license_id = detect_license_with_licensee_cli(temp_clone_dir)
267 |                 current_license_info["license"] = license_id
268 |                 print(f"License for {repo.name}: {license_id}")
269 |             
270 |             all_licenses_info.append(current_license_info)
271 |             processed_repo_count +=1
272 |             
273 |             try:
274 |                 shutil.rmtree(temp_clone_dir)
275 |             except Exception as e_clean:
276 |                 print(f"Error cleaning up temp directory {temp_clone_dir}: {e_clean}")
277 | 
278 |     except UnknownObjectException:
279 |         print(f"Error: Organization '{organization_name}' not found or not accessible via API.")
280 |     except RateLimitExceededException as rle:
281 |         print(f"Error: GitHub API rate limit exceeded during repository processing. {rle.data}")
282 |     except GithubException as ge:
283 |         print(f"GitHub API error during repository processing: {ge.status} - {ge.data}")
284 |     except Exception as e:
285 |         print(f"An unexpected error occurred during main processing loop: {e}")
286 |     finally:
287 |         with open(output_filename, "w") as f_out:
288 |             json.dump(all_licenses_info, f_out, indent=2)
289 |         print(f"Output file '{output_filename}' written with {len(all_licenses_info)} entries (discovered {repo_count} repos, attempted to process {processed_repo_count}).")
290 | 
291 |     print("-----------------------------------------------------")
292 |     print(f"Python + Licensee CLI: Public license scan finished. Report: {output_filename}")
293 |     if repo_count == 0:
294 |         print("No public repositories were discovered for this organization.")
295 |     elif processed_repo_count == 0 and repo_count > 0:
296 |         print(f"Discovered {repo_count} repositories, but none were processed (e.g., all archived/empty or errors before processing).")
297 | 
298 | if __name__ == "__main__":
299 |     main()
300 |     
301 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-scan.yml:
--------------------------------------------------------------------------------
  1 | name: Dependency Scan
  2 | 
  3 | on:
  4 |   schedule:
  5 |     - cron: '0 0 * * 1'  # Run weekly on Monday at midnight UTC
  6 |   workflow_dispatch:
  7 |     inputs:
  8 |       org_name:
  9 |         description: 'GitHub Organization Name (optional)'
 10 |         required: false
 11 |       repo_list:
 12 |         description: 'Comma-separated list of repositories (optional)'
 13 |         required: false
 14 |       log_level:
 15 |         description: 'Logging level'
 16 |         required: false
 17 |         type: choice
 18 |         options:
 19 |           - INFO
 20 |           - DEBUG
 21 |           - WARNING
 22 |           - ERROR
 23 |         default: 'INFO'
 24 |       vulnerability_threshold:
 25 |         description: 'Number of vulnerabilities to trigger issue creation'
 26 |         required: false
 27 |         type: number
 28 |         default: 10
 29 | 
 30 | permissions:
 31 |   security-events: read
 32 |   contents: write  # Needed for committing the report
 33 |   issues: write  # Needed for creating issues
 34 | 
 35 | jobs:
 36 |   scan:
 37 |     runs-on: ubuntu-latest
 38 | 
 39 |     steps:
 40 |       - name: Checkout repository
 41 |         uses: actions/checkout@v4
 42 |         with:
 43 |           repository: ${{ github.repository_owner }}/.github  # Checkout .github repo
 44 |           ref: main  # Or your default branch
 45 |           token: ${{ secrets.DEPENDENCY_SCAN_TOKEN }} # Use a PAT or GitHub App token with appropriate permissions!
 46 | 
 47 |       - name: Set up Python
 48 |         uses: actions/setup-python@v5
 49 |         with:
 50 |           python-version: '3.10'
 51 |           cache: 'pip'
 52 |           cache-dependency-path: scripts/requirements.txt
 53 | 
 54 |       - name: Install dependencies
 55 |         run: |
 56 |           python -m pip install --upgrade pip
 57 |           pip install requests
 58 | 
 59 |       - name: Generate timestamp
 60 |         id: timestamp
 61 |         run: echo "timestamp=$(date +%Y%m%d_%H%M%S)" >> $GITHUB_OUTPUT
 62 | 
 63 |       - name: Run dependency scan
 64 |         id: run-scan
 65 |         env:
 66 |           GITHUB_TOKEN: ${{ secrets.DEPENDENCY_SCAN_TOKEN }}
 67 |           ORG_NAME: ${{ github.event.inputs.org_name || github.repository_owner }}
 68 |           REPO_LIST: ${{ github.event.inputs.repo_list }}
 69 |           REPORT_FILE: "vulnerability_report_${{ steps.timestamp.outputs.timestamp }}.csv"
 70 |         run: |
 71 | 
 72 |           COMMAND="python scripts/dependency_scanner.py \
 73 |             --token $GITHUB_TOKEN \
 74 |             --output $REPORT_FILE \
 75 |             --log-level ${{ github.event.inputs.log_level || 'INFO' }} \
 76 |             --max-workers ${{ github.event.inputs.max_workers || 10 }} \
 77 |             --max-retries 3"
 78 | 
 79 |           if [[ -n "$ORG_NAME" ]]; then
 80 |             COMMAND="$COMMAND --org $ORG_NAME"
 81 |           fi
 82 | 
 83 |           if [[ -n "$REPO_LIST" ]]; then
 84 |              COMMAND="$COMMAND --repo-list $REPO_LIST"
 85 |           fi
 86 | 
 87 |           $COMMAND
 88 |           echo "report_path=reports/$REPORT_FILE" >> $GITHUB_OUTPUT
 89 | 
 90 | 
 91 |       - name: Check for No Repositories
 92 |         id: check-repos
 93 |         if: success()
 94 |         run: |
 95 |           if grep -q "__NO_REPOS__" ${{ steps.run-scan.outputs.report_path }}/../output.txt; then
 96 |             echo "No repositories found in the organization.  Exiting."
 97 |             exit 1
 98 |           fi
 99 | 
100 |       - name: Process report statistics (inline)
101 |         id: stats
102 |         if: success() && steps.check-repos.outcome == 'success'
103 |         run: |
104 |           STATS=$(grep "__STATS_START__" ${{ steps.run-scan.outputs.report_path }}/../output.txt | sed 's/__STATS_START__//' | sed 's/__STATS_END__//')
105 |           echo "total_vulnerabilities=$(echo $STATS | cut -d',' -f1 | cut -d'=' -f2)" >> $GITHUB_OUTPUT
106 |           echo "processed_repos=$(echo $STATS | cut -d',' -f2 | cut -d'=' -f2)" >> $GITHUB_OUTPUT
107 |           echo "Total vulnerabilities found: $(echo $STATS | cut -d',' -f1 | cut -d'=' -f2)"
108 |           echo "Processed repos: $(echo $STATS | cut -d',' -f2 | cut -d'=' -f2)"
109 | 
110 | 
111 |       - name: Create summary issue (using github-script)
112 |         if: success() && steps.check-repos.outcome == 'success' && steps.stats.outputs.total_vulnerabilities > inputs.vulnerability_threshold
113 |         uses: actions/github-script@v7
114 |         with:
115 |           script: |
116 |             const stats = {
117 |               total: '${{ steps.stats.outputs.total_vulnerabilities }}',
118 |               processedRepos: '${{ steps.stats.outputs.processed_repos }}',
119 |             };
120 | 
121 |             const now = new Date();
122 |             const formattedDate = now.toLocaleDateString('en-US', {
123 |               year: 'numeric',
124 |               month: 'long',
125 |               day: 'numeric'
126 |             });
127 | 
128 |             const body = `
129 |             # Dependency Vulnerability Report Summary
130 | 
131 |             Report generated on: ${now.toISOString()}
132 | 
133 |             ## Statistics
134 |             - Total vulnerabilities found: ${stats.total}
135 |             - Repositories processed: ${stats.processedRepos}
136 | 
137 |             ## Details
138 |             - Report artifact: [Download report](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
139 |             - Workflow run: [View details](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
140 | 
141 |             ## Configuration
142 |             - Log level: ${{ github.event.inputs.log_level || 'INFO' }}
143 |             - Vulnerability threshold: ${{ github.event.inputs.vulnerability_threshold || '10'}}
144 |             `;
145 | 
146 |             await github.rest.issues.create({
147 |               owner: context.repo.owner,
148 |               repo: context.repo.repo,
149 |               title: \`⚠️ Dependency Vulnerability Report - \${formattedDate}\`,
150 |               body: body,
151 |               labels: ['dependency-vulnerability', 'report']
152 |             });
153 | 
154 |       - name: Commit and Push Report
155 |         if: success() && steps.check-repos.outcome == 'success'
156 |         uses: stefanzweifel/git-auto-commit-action@v5
157 |         with:
158 |           commit_message: "Add dependency vulnerability report: ${{ steps.timestamp.outputs.timestamp }}"
159 |           repository: ./  # Commit to the root of the checked-out repo
160 |           file_pattern: reports/*.csv
161 |           commit_user_name: GitHub Actions
162 |           commit_user_email: actions@github.com
163 |           commit_author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
164 |           push_options: '--force'
165 |           token: ${{ secrets.DEPENDENCY_SCAN_TOKEN }}
166 | 
167 |       - name: Notify on failure
168 |         if: failure()
169 |         uses: actions/github-script@v7
170 |         with:
171 |           script: |
172 |             const body = `
173 |             # 🚨 Dependency Vulnerability Report Generation Failed
174 | 
175 |             Workflow run failed at ${new Date().toISOString()}
176 | 
177 |             ## Details
178 |             - Run ID: \`${context.runId}\`
179 |             - Trigger: ${context.eventName}
180 |             - Actor: @${context.actor}
181 | 
182 |             ## Links
183 |             - [View run details](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
184 |             - [View workflow file](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/blob/main/.github/workflows/dependency-scan.yml)
185 | 
186 |             Please check the workflow logs for detailed error information.
187 |             `;
188 | 
189 |             await github.rest.issues.create({
190 |               owner: context.repo.owner,
191 |               repo: context.repo.repo,
192 |               title: '🚨 Dependency Vulnerability Report Generation Failed',
193 |               body: body,
194 |               labels: ['dependency-vulnerability', 'failed']
195 |             });
196 | 
197 |       - name: Clean up
198 |         if: always()
199 |         run: |
200 |           echo "No clean up required."
201 | 
202 | concurrency:
203 |   group: ${{ github.workflow }}-${{ github.ref }}
204 |   cancel-in-progress: true
205 |   
206 | 


--------------------------------------------------------------------------------
/.github/workflows/license-cla-check.yml:
--------------------------------------------------------------------------------
  1 | name: License Compliance & CLA Gate
  2 | 
  3 | on:
  4 |   pull_request_target:
  5 |     types: [opened, reopened, synchronize]
  6 |   issue_comment:
  7 |     types: [created]
  8 | 
  9 | permissions:
 10 |   contents: read
 11 |   pull-requests: write
 12 | 
 13 | jobs:
 14 |   license-check:
 15 |     # Run on PR events or CLA-related comments
 16 |     if: |
 17 |       github.event_name == 'pull_request_target' || 
 18 |       (github.event_name == 'issue_comment' && 
 19 |        github.event.issue.pull_request && 
 20 |        (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA'))
 21 |     runs-on: ubuntu-latest
 22 |     steps:
 23 |       - name: Debug Event Payload
 24 |         run: |
 25 |           echo "Event Name: ${{ github.event_name }}"
 26 |           echo "Repository: ${{ github.repository }}"
 27 |           echo "Enabled Repos: ${{ vars.ENABLED_REPOS }}"
 28 |           echo "Excluded Repos: ${{ vars.EXCLUDED_REPOS }}"
 29 |           
 30 |       - name: Check if repository is enabled or excluded
 31 |         id: repo_filter
 32 |         shell: python
 33 |         env:
 34 |           ENABLED_REPOS: ${{ vars.ENABLED_REPOS || '[]' }}
 35 |           EXCLUDED_REPOS: ${{ vars.EXCLUDED_REPOS || '[]' }}
 36 |           CURRENT_REPO: ${{ github.repository }}
 37 |         run: |
 38 |           import os
 39 |           import json
 40 | 
 41 |           try:
 42 |               # Load repository lists from environment variables
 43 |               enabled_repos = json.loads(os.environ['ENABLED_REPOS'])
 44 |               excluded_repos = json.loads(os.environ['EXCLUDED_REPOS'])
 45 |               current_repo = os.environ['CURRENT_REPO']
 46 | 
 47 |               # Check enabled repos first if specified
 48 |               if enabled_repos:
 49 |                   if current_repo not in enabled_repos:
 50 |                       print(f"⚠️ Skipping {current_repo} - not in enabled list")
 51 |                       print(f"Enabled repos: {', '.join(enabled_repos)}")
 52 |                       with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 53 |                           fh.write('should_run=false')
 54 |                       exit(0)
 55 | 
 56 |               # Then check excluded repos
 57 |               if current_repo in excluded_repos:
 58 |                   print(f"⚠️ Skipping {current_repo} - in excluded list")
 59 |                   print(f"Excluded repos: {', '.join(excluded_repos)}")
 60 |                   with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 61 |                       fh.write('should_run=false')
 62 |                   exit(0)
 63 | 
 64 |               # Repository is allowed to run
 65 |               print(f"✅ Running checks for {current_repo}")
 66 |               with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 67 |                   fh.write('should_run=true')
 68 | 
 69 |           except json.JSONDecodeError as e:
 70 |               print(f"❌ Error parsing repository lists: {e}")
 71 |               print(f"ENABLED_REPOS: {os.environ['ENABLED_REPOS']}")
 72 |               print(f"EXCLUDED_REPOS: {os.environ['EXCLUDED_REPOS']}")
 73 |               with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 74 |                   fh.write('should_run=false')
 75 |               exit(1)
 76 | 
 77 |       - name: Get PR SHA
 78 |         id: pr_sha
 79 |         if: steps.repo_filter.outputs.should_run == 'true'
 80 |         shell: python
 81 |         env:
 82 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 83 |           EVENT_NAME: ${{ github.event_name }}
 84 |           EVENT_PATH: ${{ github.event_path }}
 85 |           REPOSITORY: ${{ github.repository }}
 86 |         run: |
 87 |           import os
 88 |           import json
 89 |           from github import Github
 90 | 
 91 |           try:
 92 |               event_name = os.environ['EVENT_NAME']
 93 |               
 94 |               # For pull_request_target events, get SHA directly from event payload
 95 |               if event_name == 'pull_request_target':
 96 |                   with open(os.environ['EVENT_PATH']) as f:
 97 |                       event_data = json.load(f)
 98 |                       sha = event_data['pull_request']['head']['sha']
 99 |                       print(f"✅ Got SHA from pull_request_target event: {sha}")
100 |               
101 |               # For issue_comment events, need to fetch PR details
102 |               else:
103 |                   g = Github(os.environ['GITHUB_TOKEN'])
104 |                   repo = g.get_repo(os.environ['REPOSITORY'])
105 |                   with open(os.environ['EVENT_PATH']) as f:
106 |                       event_data = json.load(f)
107 |                       pr_number = event_data['issue']['number']
108 |                   
109 |                   pr = repo.get_pull(pr_number)
110 |                   sha = pr.head.sha
111 |                   print(f"✅ Got SHA from PR #{pr_number}: {sha}")
112 | 
113 |               # Write SHA to output
114 |               with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
115 |                   fh.write(f'sha={sha}')
116 | 
117 |           except Exception as e:
118 |               print(f"❌ Error getting PR SHA: {e}")
119 |               exit(1)
120 | 
121 |       - name: Checkout PR Head
122 |         if: steps.repo_filter.outputs.should_run == 'true'
123 |         uses: actions/checkout@v4
124 |         with:
125 |           ref: ${{ steps.pr_sha.outputs.sha }}
126 |           fetch-depth: 0
127 | 
128 |       - name: Fuzzy License Validation
129 |         if: steps.repo_filter.outputs.should_run == 'true'
130 |         id: license_check
131 |         uses: ./.github/actions/check-license
132 | 
133 |       - name: Enforce CLA
134 |         if: steps.license_check.outputs.license_status == 'non-permissive' && steps.repo_filter.outputs.should_run == 'true'
135 |         uses: contributor-assistant/github-action@v2.6.1
136 |         env:
137 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
138 |         with:
139 |           path-to-signatures: 'signatures/version1/${{ github.repository }}/cla.json'
140 |           path-to-document: 'https://${{ github.repository_owner }}.github.io/cla-docs/GenericCLA.html'
141 |           branch: 'main'
142 |           allowlist: 'org-admin,bot*'
143 |           # Optional inputs (uncomment and customize as needed)
144 |           # remote-organization-name: 'your-remote-org'
145 |           # remote-repository-name: 'your-remote-repo'
146 |           # create-file-commit-message: 'Creating file for storing CLA Signatures'
147 |           # signed-commit-message: '$contributorName has signed the CLA in $owner/$repo#$pullRequestNo'
148 |           # custom-notsigned-prcomment: 'Please sign the CLA to contribute.'
149 |           # custom-pr-sign-comment: 'I have read the CLA Document and I hereby sign the CLA'
150 |           # custom-allsigned-prcomment: 'All contributors have signed the CLA.'
151 |           # lock-pullrequest-aftermerge: false
152 |           # use-dco-flag: false
153 | 


--------------------------------------------------------------------------------
/.github/workflows/manage-cla-stubs.yml:
--------------------------------------------------------------------------------
  1 | # File: .github/.github/workflows/manage-cla-stubs.yml
  2 | # Purpose: Periodically manages CLA trigger stubs across organization repositories.
  3 | # Builds the licensee Docker image within each processing job.
  4 | 
  5 | name: Manage CLA Stubs Org-Wide
  6 | 
  7 | on:
  8 |   # schedule:
  9 |     # - cron: '0 3 * * *'
 10 |   workflow_dispatch:
 11 |     inputs:
 12 |       specific_repos:
 13 |         description: 'Comma-separated list of specific repos (owner/repo1,owner/repo2) to process. Overrides INCLUDE/EXCLUDE.'
 14 |         required: false
 15 |         default: ''
 16 |       licensee_version_tag:
 17 |         description: 'Licensee git tag to build (e.g., v9.16.1). Default is v9.16.1.'
 18 |         required: false
 19 |         default: 'v9.16.1' # Pin to a known stable version of licensee
 20 | 
 21 | permissions: {}
 22 | 
 23 | env:
 24 |   EXCLUDE_REPOS_CSV: ${{ vars.EXCLUDE_REPOS || '.github' }}
 25 |   INCLUDE_REPOS_CSV: ${{ vars.INCLUDE_REPOS || '' }}
 26 |   PERMISSIVE_SPDX_IDS_CSV: ${{ vars.PERMISSIVE_SPDX_IDS || 'MIT,Apache-2.0,BSD-3-Clause,ISC,BSD-2-Clause,CC0-1.0,Unlicense' }}
 27 |   # Define the image tag to be used consistently
 28 |   LOCAL_LICENSEE_IMAGE_TAG: 'local-org-licensee:latest' # This tag will be built on each runner
 29 | 
 30 | jobs:
 31 |   discover_repositories:
 32 |     name: Discover Repositories
 33 |     runs-on: ubuntu-latest
 34 |     outputs:
 35 |       repositories_json: ${{ steps.get_repos.outputs.repo_list_json }}
 36 |       repository_count: ${{ steps.get_repos.outputs.repo_count }}
 37 |     steps:
 38 |       - name: Get Organization Repositories List
 39 |         id: get_repos
 40 |         uses: actions/github-script@v7.0.1
 41 |         env:
 42 |           INCLUDE_REPOS_CSV: ${{ env.INCLUDE_REPOS_CSV }}
 43 |           EXCLUDE_REPOS_CSV: ${{ env.EXCLUDE_REPOS_CSV }}
 44 |           SPECIFIC_REPOS_INPUT: ${{ github.event.inputs.specific_repos }}
 45 |         with:
 46 |           github-token: ${{ secrets.ORG_PAT }}
 47 |           script: |
 48 |             // ... (JavaScript for discovery - same as before) ...
 49 |             const includeReposList = (process.env.INCLUDE_REPOS_CSV || "").split(',').map(r => r.trim()).filter(r => r);
 50 |             const excludeReposList = (process.env.EXCLUDE_REPOS_CSV || "").split(',').map(r => r.trim()).filter(r => r);
 51 |             const specificReposInput = (process.env.SPECIFIC_REPOS_INPUT || "").split(',').map(r => r.trim()).filter(r => r);
 52 |             let reposToProcess = [];
 53 | 
 54 |             if (specificReposInput.length > 0) {
 55 |               console.log("Processing only specifically provided repositories (from manual input):", specificReposInput);
 56 |               reposToProcess = specificReposInput.map(repoName => {
 57 |                 if (repoName.includes('/')) return repoName;
 58 |                 return `${context.repo.owner}/${repoName}`;
 59 |               });
 60 |             } else if (includeReposList.length > 0) {
 61 |               console.log("Processing only explicitly included repositories (from VARS_INCLUDE_REPOS):", includeReposList);
 62 |               reposToProcess = includeReposList.map(repoName => `${context.repo.owner}/${repoName}`);
 63 |             } else {
 64 |               console.log("Fetching all non-archived repositories for organization:", context.repo.owner);
 65 |               for await (const response of github.paginate.iterator(github.rest.repos.listForOrg, {
 66 |                 org: context.repo.owner, type: 'all', per_page: 100
 67 |               })) {
 68 |                 for (const repo of response.data) {
 69 |                   if (!repo.archived) { reposToProcess.push(repo.full_name); }
 70 |                 }
 71 |               }
 72 |               console.log(`Found ${reposToProcess.length} non-archived repositories in the organization.`);
 73 |             }
 74 |             
 75 |             const finalRepos = reposToProcess.filter(fullName => {
 76 |                 const repoName = fullName.split('/')[1];
 77 |                 return !excludeReposList.includes(repoName);
 78 |             });
 79 | 
 80 |             console.log(`Final list of ${finalRepos.length} repositories to process:`, finalRepos);
 81 |             core.setOutput('repo_list_json', JSON.stringify(finalRepos));
 82 |             core.setOutput('repo_count', finalRepos.length);
 83 |             
 84 |             await core.summary
 85 |               .addHeading('Repository Discovery Phase')
 86 |               .addRaw(`Discovered <strong>${finalRepos.length}</strong> repositories to process based on include/exclude rules.`)
 87 |               .addSeparator().write();
 88 | 
 89 |   process_repositories:
 90 |     name: Process Repository (${{ matrix.repository_full_name }})
 91 |     needs: [discover_repositories] # Does not need a separate build job anymore
 92 |     if: needs.discover_repositories.outputs.repository_count > 0
 93 |     runs-on: ubuntu-latest
 94 |     strategy:
 95 |       matrix:
 96 |         repository_full_name: ${{ fromJson(needs.discover_repositories.outputs.repositories_json) }}
 97 |       fail-fast: false
 98 |     
 99 |     steps:
100 |       - name: Checkout .github repo (for our management scripts)
101 |         uses: actions/checkout@v4.1.1
102 |         # This checks out the content of THIS .github repository.
103 | 
104 |       - name: Checkout licensee source code
105 |         uses: actions/checkout@v4.1.1
106 |         with:
107 |           repository: licensee/licensee # The official licensee repository
108 |           path: licensee-src          # Checkout to a subdirectory to avoid conflicts
109 |           # Use the version tag from workflow_dispatch input or a default.
110 |           ref: ${{ github.event.inputs.licensee_version_tag || 'v9.16.1' }} 
111 |           # Pin to a specific tag of licensee for stability (e.g., 'v9.16.1')
112 | 
113 |       - name: Build licensee Docker image locally
114 |         run: |
115 |           echo "Building licensee Docker image (${{ env.LOCAL_LICENSEE_IMAGE_TAG }}) from ref: ${{ github.event.inputs.licensee_version_tag || 'v9.16.1' }}"
116 |           docker build ./licensee-src --tag ${{ env.LOCAL_LICENSEE_IMAGE_TAG }}
117 |           echo "Successfully built Docker image: ${{ env.LOCAL_LICENSEE_IMAGE_TAG }}"
118 |           # Verify image exists locally
119 |           docker images ${{ env.LOCAL_LICENSEE_IMAGE_TAG }}
120 | 
121 |       - name: DEBUG - Test ORG_PAT Access to ${{ matrix.repository_full_name }}
122 |         if: matrix.repository_full_name == 'vmware/test-cla-gpl2' # Only run for one repo for testing
123 |         uses: actions/github-script@v7.0.1
124 |         with:
125 |           github-token: ${{ secrets.ORG_PAT }}
126 |           script: |
127 |             const repoFullName = '${{ matrix.repository_full_name }}';
128 |             const owner = repoFullName.split('/')[0];
129 |             const repo = repoFullName.split('/')[1];
130 |             core.info(`Attempting to get repo details for ${repoFullName} using ORG_PAT.`);
131 |             try {
132 |               const { data: repoData } = await github.rest.repos.get({ owner, repo });
133 |               core.info(`Successfully fetched repo data: ${JSON.stringify(repoData.name)}`);
134 |               
135 |               core.info(`Attempting to get root contents for ${repoFullName}`);
136 |               const { data: contents } = await github.rest.repos.getContent({ owner, repo, path: '' });
137 |               core.info(`Successfully fetched root contents. Number of items: ${contents.length}`);
138 | 
139 |               // Attempt a "safe" write-like check, e.g., trying to get a specific workflow file,
140 |               // or even just trying to create a dummy branch (which requires write access).
141 |               // For now, just reading content which `ORG_PAT` *should* be able to do.
142 |               // The Python script is failing on a repo.create_file() call.
143 |             } catch (error) {
144 |               core.setFailed(`ORG_PAT test failed for ${repoFullName}: ${error.message}`);
145 |               core.error(JSON.stringify(error)); // Log the full error object
146 |             }
147 |             
148 |       - name: Set up Python
149 |         uses: actions/setup-python@v5.0.0
150 |         with:
151 |           python-version: '3.10'
152 | 
153 |       - name: Install Python dependencies
154 |         run: pip install PyGithub==1.59.1
155 | 
156 |       - name: Check license and manage CLA stub for ${{ matrix.repository_full_name }}
157 |         id: manage_stub_step
158 |         run: python .github/scripts/check_and_install_stub.py
159 |         env:
160 |           TARGET_REPO_FULL_NAME: ${{ matrix.repository_full_name }}
161 |           ORG_PAT: ${{ secrets.ORG_PAT }}
162 |           PERMISSIVE_SPDX_IDS: ${{ env.PERMISSIVE_SPDX_IDS_CSV }}
163 |           GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
164 |           # Pass the locally built image tag to the Python script.
165 |           LICENSEE_DOCKER_IMAGE: ${{ env.LOCAL_LICENSEE_IMAGE_TAG }}
166 |           
167 |       - name: Record action for ${{ matrix.repository_full_name }} to Job Summary
168 |         if: always() # Run even if previous steps fail, to capture status.
169 |         run: |
170 |           echo "Debug: manage_stub_step stdout was: [${{ steps.manage_stub_step.outputs.stdout }}]"
171 |           echo "Debug: manage_stub_step outcome was: [${{ steps.manage_stub_step.outcome }}]"
172 | 
173 |           # Attempt to parse the status from stdout if available
174 |           # Initialize FINAL_STATUS to reflect the outcome if stdout parsing fails
175 |           FINAL_STATUS="status_unknown_due_to_step_failure"
176 |           if [[ "${{ steps.manage_stub_step.outcome }}" == "success" ]]; then
177 |             FINAL_STATUS="success_script_did_not_error" # Default if successful but no specific status
178 |           fi
179 | 
180 |           # Try to get the more specific status if stdout is available
181 |           # Use a temporary variable to avoid issues if grep finds nothing
182 |           PYTHON_STDOUT_CAPTURE="${{ steps.manage_stub_step.outputs.stdout }}"
183 |           STATUS_LINE=$(echo "${PYTHON_STDOUT_CAPTURE}" | grep REPO_PROCESSED_STATUS= || true) # Prevent grep from failing the line if no match
184 | 
185 |           if [[ -n "${STATUS_LINE}" ]]; then # If STATUS_LINE is not empty
186 |             PARSED_STATUS=${STATUS_LINE#*=}
187 |             if [[ -n "${PARSED_STATUS}" ]]; then # If PARSED_STATUS is not empty after extraction
188 |               FINAL_STATUS="${PARSED_STATUS}"
189 |             fi
190 |           elif [[ "${{ steps.manage_stub_step.outcome }}" == "failure" ]]; then
191 |              FINAL_STATUS="script_failed_see_logs" # If script failed and we couldn't parse specific status
192 |           fi
193 | 
194 |           ICON="ℹ️ Unknown" # Default icon
195 |           if [[ "${{ steps.manage_stub_step.outcome }}" == "failure" ]]; then
196 |             ICON="❌ Error"
197 |           elif [[ "${{ steps.manage_stub_step.outcome }}" == "success" ]]; then
198 |             # If successful, refine icon based on parsed status
199 |             if [[ "${FINAL_STATUS}" == "skipped_"* ]]; then ICON="⚪ Skipped";
200 |             elif [[ "${FINAL_STATUS}" == *"updated"* || "${FINAL_STATUS}" == *"created"* || "${FINAL_STATUS}" == *"removed"* ]]; then ICON="📝 Action";
201 |             elif [[ "${FINAL_STATUS}" == "success_script_did_not_error" || "${FINAL_STATUS}" == "skipped_stub_up_to_date" || "${FINAL_STATUS}" == "skipped_permissive_no_stub" ]]; then ICON="✅ Success/NoOp";
202 |             else ICON="✅ Success (Status: ${FINAL_STATUS})"; fi # Catch other success statuses
203 |           fi
204 |           
205 |           echo "#### ${{ matrix.repository_full_name }}" >> $GITHUB_STEP_SUMMARY
206 |           echo "- Parsed Status: ${FINAL_STATUS}" >> $GITHUB_STEP_SUMMARY # Use the determined FINAL_STATUS
207 |           echo "- Step Outcome: ${ICON} (${{ steps.manage_stub_step.outcome }})" >> $GITHUB_STEP_SUMMARY
208 |           echo "---" >> $GITHUB_STEP_SUMMARY
209 | 
210 |   summarize_run:
211 |     name: Final Run Summary
212 |     if: always()
213 |     needs: [discover_repositories, process_repositories] # Removed build_licensee_image from needs here
214 |     runs-on: ubuntu-latest
215 |     steps:
216 |       - name: Create final summary of the run
217 |         run: |
218 |           echo "### CLA Stub Management Full Run Summary" >> $GITHUB_STEP_SUMMARY
219 |           echo "- Total Repositories Discovered for Processing: **${{ needs.discover_repositories.outputs.repository_count || 0 }}**" >> $GITHUB_STEP_SUMMARY
220 |           # No separate build job status to report now, as it's part of each matrix job.
221 |           # You could infer overall build success if all matrix jobs succeeded, but that's indirect.
222 |           
223 |           RESULT_MSG="ℹ️ **Overall Repository Processing Result:** Status - ${{ needs.process_repositories.result }}."
224 |           if [[ "${{ needs.process_repositories.result }}" == "failure" ]]; then
225 |             RESULT_MSG="⚠️ **Overall Repository Processing Result:** At least one repository failed processing. This could be due to image build, license check, or API errors."
226 |           elif [[ "${{ needs.process_repositories.result }}" == "success" ]]; then
227 |             RESULT_MSG="✅ **Overall Repository Processing Result:** All processed repositories completed without error."
228 |           elif [[ "${{ needs.process_repositories.result }}" == "skipped" ]]; then
229 |             RESULT_MSG="⚪ **Overall Repository Processing Result:** Processing job was skipped."
230 |           fi
231 |           echo "$RESULT_MSG Check individual 'Process Repository' job logs and their summaries for details." >> $GITHUB_STEP_SUMMARY
232 | 


--------------------------------------------------------------------------------
/.github/workflows/old/license-cal-check.yml:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/.github/workflows/python_licensee_scan.yml:
--------------------------------------------------------------------------------
  1 | name: Python + Licensee CLI Org Public License Scan
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       organization:
  7 |         description: 'GitHub organization name (e.g., "my-org")'
  8 |         required: true
  9 |         type: string
 10 |       github_token:
 11 |         description: 'Optional GitHub PAT. If empty for manual run, secrets.GITHUB_TOKEN will be used.'
 12 |         required: false
 13 |         type: string
 14 |       output_filename:
 15 |         description: 'Name of the output JSON file'
 16 |         required: false
 17 |         default: 'organization_public_licenses_licensee.json'
 18 |         type: string
 19 |   schedule:
 20 |     - cron: '0 2 * * 1' # Example: Run every Monday at 2 AM UTC
 21 | 
 22 | jobs:
 23 |   scan_licenses_with_licensee_cli:
 24 |     runs-on: ubuntu-latest
 25 |     permissions:
 26 |       contents: read # For checkout and GITHUB_TOKEN to read public repo data & clone
 27 | 
 28 |     steps:
 29 |       - name: Checkout repository (to get the Python script)
 30 |         uses: actions/checkout@v4
 31 | 
 32 |       - name: Set up Python
 33 |         uses: actions/setup-python@v5
 34 |         with:
 35 |           python-version: '3.10' 
 36 | 
 37 |       - name: Install Python dependencies
 38 |         run: pip install PyGithub
 39 | 
 40 |       - name: Set up Ruby and Bundler
 41 |         uses: ruby/setup-ruby@v1
 42 |         with:
 43 |           ruby-version: '3.1' 
 44 | 
 45 |       - name: Install licensee CLI
 46 |         run: gem install licensee
 47 | 
 48 |       - name: Install/Ensure GitHub CLI (for auth setup)
 49 |         run: |
 50 |           echo "Installing/Ensuring GitHub CLI..."
 51 |           if ! type -p gh &>/dev/null; then
 52 |             echo "GitHub CLI not found, installing..."
 53 |             sudo apt-get update -qq && sudo apt-get install -y -qq curl
 54 |             curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
 55 |             && sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
 56 |             && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
 57 |             && sudo apt-get update -qq \
 58 |             && sudo apt-get install -y -qq gh
 59 |           else
 60 |             echo "GitHub CLI already installed."
 61 |           fi
 62 |           echo "GitHub CLI version:"
 63 |           gh --version
 64 |           echo "--- End GitHub CLI Install ---"
 65 | 
 66 |       - name: Prepare environment variables for Python script
 67 |         id: prep_env
 68 |         run: |
 69 |           echo "--- Start Prepare Environment Variables ---"
 70 |           _ORG_SOURCE=""
 71 |           _TOKEN_SOURCE=""
 72 |           _OUTPUT_FILENAME_VAL=""
 73 | 
 74 |           if [[ "${{ github.event_name }}" == "schedule" ]]; then
 75 |             if [ -z "${{ secrets.ORG_NAME_FOR_SCAN }}" ]; then
 76 |               echo "Error: ORG_NAME_FOR_SCAN secret is not set for scheduled run."
 77 |               exit 1
 78 |             fi
 79 |             echo "ORGANIZATION_TO_SCAN=${{ secrets.ORG_NAME_FOR_SCAN }}" >> $GITHUB_ENV
 80 |             _ORG_SOURCE="secrets.ORG_NAME_FOR_SCAN"
 81 |             
 82 |             echo "GH_TOKEN_FOR_SCAN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
 83 |             _TOKEN_SOURCE="secrets.GITHUB_TOKEN (scheduled)"
 84 |             
 85 |             _OUTPUT_FILENAME_VAL="organization_public_licenses_licensee_scheduled.json"
 86 |           else # workflow_dispatch
 87 |             if [ -z "${{ github.event.inputs.organization }}" ]; then
 88 |               echo "Error: 'organization' input is not set for manual run."
 89 |               exit 1
 90 |             fi
 91 |             echo "ORGANIZATION_TO_SCAN=${{ github.event.inputs.organization }}" >> $GITHUB_ENV
 92 |             _ORG_SOURCE="inputs.organization"
 93 | 
 94 |             if [ -n "${{ github.event.inputs.github_token }}" ]; then
 95 |               echo "GH_TOKEN_FOR_SCAN=${{ github.event.inputs.github_token }}" >> $GITHUB_ENV
 96 |               _TOKEN_SOURCE="inputs.github_token (manual)"
 97 |             else
 98 |               echo "GH_TOKEN_FOR_SCAN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
 99 |               _TOKEN_SOURCE="secrets.GITHUB_TOKEN (manual fallback)"
100 |             fi
101 |             
102 |             _OUTPUT_FILENAME_VAL="${{ github.event.inputs.output_filename }}"
103 |             if [ -z "$_OUTPUT_FILENAME_VAL" ]; then # Use default if input is empty
104 |                 _OUTPUT_FILENAME_VAL="organization_public_licenses_licensee.json" 
105 |             fi
106 |           fi
107 |           echo "OUTPUT_FILENAME_TO_USE=$_OUTPUT_FILENAME_VAL" >> $GITHUB_ENV
108 |           echo "Prepared Env Vars: ORG_FROM='$_ORG_SOURCE', TOKEN_FROM='$_TOKEN_SOURCE', OUTPUT_FILE='$_OUTPUT_FILENAME_VAL'"
109 |           echo "Checking GITHUB_ENV content (first few lines):"
110 |           head -n 5 $GITHUB_ENV || echo "GITHUB_ENV not found or empty."
111 |           echo "--- End Prepare Environment Variables ---"
112 | 
113 |       - name: Authenticate GitHub CLI (for git clone)
114 |         env:
115 |           # This GH_TOKEN_FROM_WORKFLOW is the token we want to use.
116 |           # We will pass it via stdin to gh auth login.
117 |           GH_TOKEN_FROM_WORKFLOW: ${{ env.GH_TOKEN_FOR_SCAN }}
118 |         run: |
119 |           echo "--- Start Authenticate GitHub CLI ---"
120 |           echo "Value of GH_TOKEN_FOR_SCAN (from previous step GITHUB_ENV): ${{ env.GH_TOKEN_FOR_SCAN }}"
121 |           echo "Value of GH_TOKEN_FROM_WORKFLOW (this step's env): $GH_TOKEN_FROM_WORKFLOW"
122 | 
123 |           if [ -n "$GH_TOKEN_FROM_WORKFLOW" ]; then
124 |             echo "Attempting to authenticate GitHub CLI with token (for git)..."
125 |             
126 |             echo "DEBUG: Current GH_TOKEN before unset: '${GH_TOKEN:-not set}'"
127 |             # Temporarily unset GH_TOKEN for the gh auth login command itself,
128 |             # so it doesn't complain about it already being set.
129 |             # Pipe the token from our workflow variable into its stdin.
130 |             if (unset GH_TOKEN; echo "Token is being piped to gh auth login" ; echo "$GH_TOKEN_FROM_WORKFLOW" | gh auth login --with-token --hostname github.com); then
131 |               echo "gh auth login command completed successfully."
132 |             else
133 |               echo "ERROR: gh auth login command failed. Exit code: $?"
134 |               # exit 1 # Optionally exit immediately
135 |             fi
136 |             echo "DEBUG: Current GH_TOKEN after gh auth login attempt: '${GH_TOKEN:-not set}'"
137 | 
138 |             echo "Attempting gh auth setup-git..."
139 |             # gh auth setup-git should now pick up the token stored by gh auth login
140 |             if gh auth setup-git --hostname github.com; then
141 |               echo "gh auth setup-git command completed successfully."
142 |             else
143 |               echo "ERROR: gh auth setup-git command failed. Exit code: $?"
144 |               # exit 1 # Optionally exit immediately
145 |             fi
146 |             echo "gh auth login and gh auth setup-git process finished."
147 |           else
148 |             echo "Warning: No GitHub token available (GH_TOKEN_FROM_WORKFLOW is empty)."
149 |             echo "Public repo clones might work, but API access by Python will fail without a token."
150 |           fi
151 | 
152 |           echo "Verifying gh auth status:"
153 |           # Temporarily unset GH_TOKEN here too, so gh auth status checks stored creds
154 |           (unset GH_TOKEN; gh auth status -h github.com) || echo "gh auth status indicated not logged in or failed to check."
155 |           
156 |           echo "Verifying git config for github.com:"
157 |           git config --global --get-all http.https://github.com/.extraheader || echo "Git extraheader for github.com not found."
158 |           
159 |           echo "Listing relevant git config settings:"
160 |           git config --global --list | grep -E 'http\.extraheader|credential\.helper' || echo "No relevant git config found."
161 |           echo "--- End Authenticate GitHub CLI ---"
162 | 
163 |       - name: Run Python script with Licensee CLI
164 |         env:
165 |           ORGANIZATION_TO_SCAN: ${{ env.ORGANIZATION_TO_SCAN }}
166 |           # Python script uses GH_TOKEN_FOR_SCAN for PyGithub
167 |           GH_TOKEN_FOR_SCAN: ${{ env.GH_TOKEN_FOR_SCAN }} 
168 |           OUTPUT_FILENAME_TO_USE: ${{ env.OUTPUT_FILENAME_TO_USE }}
169 |           # For debugging git within Python's subprocess:
170 |           GIT_TRACE: "1" 
171 |           GIT_CURL_VERBOSE: "1"
172 |         run: |
173 |           echo "--- Start Run Python script ---"
174 |           echo "Environment variables for Python script:"
175 |           echo "ORGANIZATION_TO_SCAN: $ORGANIZATION_TO_SCAN"
176 |           echo "OUTPUT_FILENAME_TO_USE: $OUTPUT_FILENAME_TO_USE"
177 |           # GH_TOKEN_FOR_SCAN will be masked by GitHub Actions if it's a secret
178 |           
179 |           python .github/scripts/scan_org_licenses_licensee.py # Ensure this path is correct
180 |           echo "--- End Run Python script ---"
181 | 
182 | 
183 |       - name: Upload license report
184 |         if: always() 
185 |         uses: actions/upload-artifact@v4
186 |         with:
187 |           name: python-licensee-cli-report
188 |           path: ${{ env.OUTPUT_FILENAME_TO_USE }}
189 |           if-no-files-found: warn
190 | 


--------------------------------------------------------------------------------
/.github/workflows/reusable-cla-check.yml:
--------------------------------------------------------------------------------
  1 | # File: .github/.github/workflows/reusable-cla-check.yml
  2 | # Purpose: Centralized, reusable workflow for performing the CLA check.
  3 | # Signatures are read from and committed to a CSV file within this .github repository.
  4 | 
  5 | name: Reusable CLA Check (Repo Signatures)
  6 | 
  7 | on:
  8 | 
  9 |   workflow_call:
 10 |     secrets:
 11 |       # This PAT is used by cla-assistant/github-action for:
 12 |       # 1. PR interactions (comments, labels, statuses) on the TARGET repository.
 13 |       # 2. Reading and committing signatures to the CLA.csv file in THIS .github repository.
 14 |       # It requires:
 15 |       # - Pull requests: R/W, Issues: R/W, Commit statuses: R/W (for all target repos).
 16 |       # - Contents: R/W (for THIS .github repository to manage CLA.csv).
 17 |       CONTRIBUTOR_ASSISTANT_PAT:
 18 |         description: 'PAT for CLA Assistant Lite (PR interaction on target repos AND signature commits to this .github repo).'
 19 |         required: true
 20 |     inputs:
 21 |       # URL or path to the CLA document text.
 22 |       # If a path, it's relative to the root of THIS .github repository after checkout.
 23 |       cla_document_url:
 24 |         description: 'URL or path to the CLA document text.'
 25 |         required: false # Made optional, can default
 26 |         type: string
 27 |         default: 'https://vmware.github.io/cla-docs/GenericCLA.html' # Dynamic default
 28 |         # default: 'https://github.com/${{ github.repository_owner }}/.github/blob/main/.github/CONTRIBUTOR_LICENSE_AGREEMENT.md' # Dynamic default
 29 |       # Path to the CSV signature file within THIS .github repository.
 30 |       signature_file_path:
 31 |         description: 'Path to the CSV signature file within this .github repository.'
 32 |         required: false
 33 |         type: string
 34 |         default: 'signatures/CLA.json' # Default path
 35 |       # Branch in THIS .github repository where signatures are stored and committed.
 36 |       signature_branch:
 37 |         description: 'Branch in this .github repository where signatures are stored and committed.'
 38 |         required: false
 39 |         type: string
 40 |         default: 'main' # Default branch (e.g., main or your .github repo's default)
 41 | 
 42 | # Permissions this reusable workflow's GITHUB_TOKEN needs in the context of the CALLER's repository (target repo).
 43 | # cla-assistant/github-action primarily uses its own CONTRIBUTOR_ASSISTANT_PAT for its operations.
 44 | permissions:
 45 |   pull-requests: write # For commenting/labeling on the target PR.
 46 |   issues: write        # For commenting on issues on the target PR.
 47 |   statuses: write      # For setting commit statuses on the target PR.
 48 |   # Contents: write on THIS .github repo is handled by the CONTRIBUTOR_ASSISTANT_PAT PAT,
 49 |   # not this workflow's GITHUB_TOKEN.
 50 | 
 51 | jobs:
 52 |   cla_check:
 53 |     runs-on: ubuntu-latest
 54 |         # The 'if' condition to run this job is now primarily handled by the CALLER (the stub workflow).
 55 |         # Run only for relevant PR actions.
 56 |     steps:
 57 |       # Checkout THIS .github repository.
 58 |       # This makes the signature file (e.g., .github/signatures/CLA.csv) and potentially the
 59 |       # CLA document (if stored locally) available to the cla-assistant-lite action.
 60 |       - name: Checkout .github repository (for signature file access)
 61 |         uses: actions/checkout@v4.1.1
 62 |         with:
 63 |           # Explicitly checkout THIS .github repository where the workflow runs and signatures are stored.
 64 |           repository: ${{ github.repository }} # e.g., YOUR_ORG/.github
 65 |           # Checkout the branch where the signature file (CLA.csv) is located and where new signatures will be committed.
 66 |           ref: ${{ inputs.signature_branch }}
 67 |           # The default GITHUB_TOKEN of this job is sufficient for checking out its own repository.
 68 | 
 69 |       - name: CLA Assistant Lite (Signatures in Repo)
 70 | 
 71 |         # Use the official cla-assistant/github-action. Pin to a specific version.
 72 |         uses: contributor-assistant/github-action@v2.6.1
 73 |         env:
 74 |           # Provide the dedicated PAT. This PAT needs permissions for:
 75 |           # 1. PR interactions (comments, labels, statuses) on the *target repository* (where the PR was opened).
 76 |           # 2. Contents Read & Write permissions on *this .github repository* to read/commit CLA.csv.
 77 |           GITHUB_TOKEN: ${{ secrets.CONTRIBUTOR_ASSISTANT_PAT }}
 78 |           # PERSONAL_ACCESS_TOKEN is REQUIRED for writing signatures to a remote repository.
 79 |           # We can use the same PAT if it has the necessary permissions for the .github repo.
 80 |           PERSONAL_ACCESS_TOKEN: ${{ secrets.CONTRIBUTOR_ASSISTANT_PAT }}          
 81 |         with:
 82 |           # --- Configuration for Centralized Signatures ---
 83 |           # Organization where the .github repository (hosting signatures) resides.
 84 |           remote-organization-name: ${{ github.repository_owner }} # e.g., 'vmware'
 85 |           # Name of the repository hosting the signatures (i.e., '.github').
 86 |           remote-repository-name: '.github' # The name of your .github repository
 87 |        
 88 |           # Path to the signature CSV file within the checked-out .github repository.
 89 |           path-to-signatures: ${{ inputs.signature_file_path }}
 90 |           # Path or URL to the CLA document text.
 91 |           path-to-document: ${{ inputs.cla_document_url }}
 92 |           # Branch in THIS .github repository where new signatures should be committed.
 93 |           branch: ${{ inputs.signature_branch }}
 94 |           # List of users/bots to ignore for CLA checks.
 95 |           allowlist: bot*,dependabot[bot],github-actions[bot],renovate[bot]
 96 | 
 97 |           # --- Optional configurations for cla-assistant-lite ---
 98 |           # Custom commit message when a new signature is added to CLA.csv.
 99 |           # Uses GitHub context variables available to cla-assistant-lite.
100 |           create-file-commit-message: 'chore(CLA): Add signature for @${{ github.event.sender.login }} for PR #${{ github.event.pull_request.number }} on ${{ github.event.repository.full_name }}'
101 |           # This tells the action what comment triggers a signature.
102 |           # The README image you provided had 'custom-pr-sign-comment'.
103 |           custom-pr-sign-comment: "I have read the CLA Document and I hereby sign the CLA"
104 |           
105 |           # User to attribute commits to if the action commits signatures.
106 |           # github-actions[bot] is a good default if you want to distinguish these commits.
107 |           # commit-author-name: 'CLA Automation'
108 |           # commit-author-email: 'cla-bot@users.noreply.github.com' # Or a dedicated bot user email
109 | 
110 |           # Lock PR from non-members until CLA is signed
111 |           # lock-pullrequest-after: true
112 |           # Custom message when CLA is required
113 |           # sigRequiredComment: 'Thanks for your contribution! Please sign our CLA to proceed.'
114 | 


--------------------------------------------------------------------------------
/.github/workflows/scan-licenses.yml:
--------------------------------------------------------------------------------
  1 | name: Organization Public License Scan
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       organization:
  7 |         description: 'GitHub organization name (e.g., "my-org")'
  8 |         required: true
  9 |         type: string
 10 |       github_token:
 11 |         description: 'Optional GitHub PAT. If empty for a manual run, secrets.GITHUB_TOKEN will be used.'
 12 |         required: false
 13 |         type: string
 14 |       output_filename:
 15 |         description: 'Name of the output JSON file'
 16 |         required: false
 17 |         default: 'organization_public_licenses.json'
 18 |         type: string
 19 |   schedule:
 20 |     # Example: Run every Monday at 2 AM UTC
 21 |     - cron: '0 2 * * 1'
 22 | 
 23 | jobs:
 24 |   scan_licenses:
 25 |     runs-on: ubuntu-latest
 26 |     permissions:
 27 |       contents: read # For checkout
 28 | 
 29 |     steps:
 30 |       - name: Checkout code (optional)
 31 |         uses: actions/checkout@v4
 32 | 
 33 |       - name: Set up Ruby
 34 |         uses: ruby/setup-ruby@v1
 35 |         with:
 36 |           ruby-version: '3.1'
 37 |           bundler-cache: true
 38 | 
 39 |       - name: Install licensee
 40 |         run: gem install licensee
 41 | 
 42 |       - name: Install jq
 43 |         run: sudo apt-get update && sudo apt-get install -y jq
 44 | 
 45 |       - name: Install/Ensure GitHub CLI
 46 |         run: |
 47 |           if ! type -p gh &>/dev/null; then
 48 |             echo "GitHub CLI not found, installing..."
 49 |             sudo apt-get update
 50 |             sudo apt-get install -y curl
 51 |             curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
 52 |             && sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
 53 |             && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
 54 |             && sudo apt update \
 55 |             && sudo apt install -y gh
 56 |           else
 57 |             echo "GitHub CLI already installed."
 58 |           fi
 59 |           gh --version
 60 | 
 61 |       - name: Prepare environment variables
 62 |         id: prep_env
 63 |         run: |
 64 |           if [[ "${{ github.event_name }}" == "schedule" ]]; then
 65 |             if [ -z "${{ secrets.ORG_NAME_FOR_SCAN }}" ]; then
 66 |               echo "Error: ORG_NAME_FOR_SCAN secret is not set for scheduled run."
 67 |               exit 1
 68 |             fi
 69 |             echo "ORGANIZATION_TO_SCAN=${{ secrets.ORG_NAME_FOR_SCAN }}" >> $GITHUB_ENV
 70 |             echo "GH_TOKEN_FOR_SCAN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
 71 |             echo "OUTPUT_FILENAME_TO_USE=organization_public_licenses_scheduled.json" >> $GITHUB_ENV
 72 |             echo "Scheduled run: Using secrets.ORG_NAME_FOR_SCAN and secrets.GITHUB_TOKEN."
 73 |           else # workflow_dispatch
 74 |             if [ -z "${{ github.event.inputs.organization }}" ]; then
 75 |               echo "Error: 'organization' input is not set for manual run."
 76 |               exit 1
 77 |             fi
 78 |             echo "ORGANIZATION_TO_SCAN=${{ github.event.inputs.organization }}" >> $GITHUB_ENV
 79 |             
 80 |             if [ -n "${{ github.event.inputs.github_token }}" ]; then
 81 |               echo "Manual run: Using provided github_token input."
 82 |               echo "GH_TOKEN_FOR_SCAN=${{ github.event.inputs.github_token }}" >> $GITHUB_ENV
 83 |             else
 84 |               echo "Manual run: github_token input is empty. Using secrets.GITHUB_TOKEN as fallback."
 85 |               echo "GH_TOKEN_FOR_SCAN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
 86 |             fi
 87 |             echo "OUTPUT_FILENAME_TO_USE=${{ github.event.inputs.output_filename }}" >> $GITHUB_ENV
 88 |             echo "Manual run: Using inputs for organization and output_filename."
 89 |           fi
 90 |           # Mask the token value if it's set and passed to the script via GH_TOKEN_FOR_SCAN
 91 |           # Note: $GITHUB_ENV variables are automatically masked if they look like secrets.
 92 |           # This explicit masking is for the variable if it were used directly in `run` scripts.
 93 |           # For GH_TOKEN_FOR_SCAN being put into GITHUB_ENV, it should be auto-masked by Actions.
 94 |           # However, if we were to echo it directly:
 95 |           # TOKEN_VALUE_TO_MASK=$(echo "$GH_TOKEN_FOR_SCAN" | sed 's/./*/g') # Example of how you might get it
 96 |           # echo "::add-mask::${TOKEN_VALUE_TO_MASK}" # This isn't quite right as we don't have it here directly
 97 |           # The best way is to rely on Actions auto-masking for values from secrets and GITHUB_ENV.
 98 |           # If GH_TOKEN_FOR_SCAN is set, it will be used by the next step.
 99 |           echo "Token for scan will be (masked if secret): $GH_TOKEN_FOR_SCAN"
100 | 
101 | 
102 |       - name: Authenticate GitHub CLI and Git
103 |         env:
104 |           GH_TOKEN: ${{ env.GH_TOKEN_FOR_SCAN }} # This correctly picks up from GITHUB_ENV
105 |         run: |
106 |           if [ -z "$ORGANIZATION_TO_SCAN" ]; then
107 |             echo "Error: Organization name (ORGANIZATION_TO_SCAN) is not available."
108 |             exit 1
109 |           fi
110 | 
111 |           if [ -n "$GH_TOKEN" ]; then
112 |             echo "Authenticating GitHub CLI and Git with the determined token."
113 |             echo "$GH_TOKEN" | gh auth login --with-token
114 |             gh auth setup-git
115 |           else
116 |             echo "Warning: No GitHub token available (GH_TOKEN is empty). Proceeding with unauthenticated access (lowest rate limits)."
117 |             git config --global credential.helper ''
118 |           fi
119 | 
120 |       - name: Scan public repositories and generate report
121 |         run: |
122 |           set -e
123 |           set -o pipefail
124 | 
125 |           echo "Scanning public repositories in organization: $ORGANIZATION_TO_SCAN"
126 |           echo "Output file will be: $OUTPUT_FILENAME_TO_USE"
127 | 
128 |           TEMP_LICENSE_FILE="license_lines.temp.jsonl"
129 |           FINAL_OUTPUT_FILE="$OUTPUT_FILENAME_TO_USE"
130 |           > "$TEMP_LICENSE_FILE"
131 | 
132 |           echo "Fetching public repository list for $ORGANIZATION_TO_SCAN..."
133 |           repo_names_json=$(gh repo list "$ORGANIZATION_TO_SCAN" --visibility public --limit 2000 --json name --jq '.[].name')
134 | 
135 |           if [ -z "$repo_names_json" ]; then
136 |             echo "No public repositories found in organization $ORGANIZATION_TO_SCAN or failed to list them."
137 |             echo "[]" > "$FINAL_OUTPUT_FILE"
138 |             echo "Workflow finished: No public repositories to scan."
139 |             exit 0
140 |           fi
141 | 
142 |           MAX_RETRIES=3
143 |           RETRY_DELAY_SECONDS=10
144 |           mapfile -t repo_array < <(echo "$repo_names_json")
145 | 
146 |           for repo_name in "${repo_array[@]}"; do
147 |             if [ -z "$repo_name" ]; then
148 |               continue
149 |             fi
150 | 
151 |             repo_full_name="$ORGANIZATION_TO_SCAN/$repo_name"
152 |             echo "-----------------------------------------------------"
153 |             echo "Processing repository: $repo_full_name"
154 |             CLONE_DIR=$(mktemp -d -t "repo_${repo_name//\//_}_XXXXXX")
155 |             echo "Cloning to temporary directory: $CLONE_DIR"
156 |             current_attempt=1
157 |             license_id="ERROR_PROCESSING"
158 | 
159 |             while [ $current_attempt -le $MAX_RETRIES ]; do
160 |               echo "Attempt $current_attempt/$MAX_RETRIES to clone and analyze $repo_full_name..."
161 |               if GIT_TERMINAL_PROMPT=0 git clone --depth 1 --quiet "https://github.com/$repo_full_name.git" "$CLONE_DIR"; then
162 |                 echo "Clone successful."
163 |                 cd "$CLONE_DIR"
164 |                 license_output=$(licensee detect --json . 2>/dev/null || echo "LICENSEE_CLI_ERROR")
165 |                 cd ..
166 | 
167 |                 if [[ "$license_output" == "LICENSEE_CLI_ERROR" ]]; then
168 |                     license_id="LICENSEE_CLI_ERROR"
169 |                     echo "Licensee CLI failed for $repo_full_name."
170 |                 elif [[ "$license_output" == "null" ]] || [[ -z "$license_output" ]]; then
171 |                     license_id="NONE_FOUND"
172 |                     echo "No license found by licensee in $repo_full_name."
173 |                 else
174 |                     license_id_raw=$(echo "$license_output" | jq -r '.matched_license.spdx_id // .matched_license.name // "UNKNOWN_OR_NO_MATCH"')
175 |                     # JQ's // operator already handles null, so extra check for "null" string is less critical but fine
176 |                     if [[ "$license_id_raw" == "null" ]] || [[ -z "$license_id_raw" ]] || [[ "$license_id_raw" == "UNKNOWN_OR_NO_MATCH" && "$license_output" != *"UNKNOWN_OR_NO_MATCH"* ]]; then
177 |                         license_id="UNKNOWN_OR_NO_MATCH" # Ensure this if jq itself returned the fallback literal
178 |                         echo "Licensee ran, but license SPDX ID or name was effectively null/empty in $repo_full_name."
179 |                     else
180 |                         license_id="$license_id_raw"
181 |                         echo "License found for $repo_full_name: $license_id"
182 |                     fi
183 |                 fi
184 |                 break
185 |               else
186 |                 echo "Clone failed for $repo_full_name (attempt $current_attempt)."
187 |                 if [ $current_attempt -lt $MAX_RETRIES ]; then
188 |                   echo "Retrying in $RETRY_DELAY_SECONDS seconds..."
189 |                   sleep $RETRY_DELAY_SECONDS
190 |                 else
191 |                   echo "Max retries reached for $repo_full_name. Marking as clone error."
192 |                   license_id="ERROR_CLONING"
193 |                 fi
194 |               fi
195 |               current_attempt=$((current_attempt + 1))
196 |             done
197 |             jq -n --arg repo_name "$repo_name" --arg license_id "$license_id" \
198 |               '{ "repository_name": $repo_name, "license": $license_id }' >> "$TEMP_LICENSE_FILE"
199 |             rm -rf "$CLONE_DIR"
200 |             echo "Cleaned up $CLONE_DIR."
201 |           done
202 | 
203 |           if [ -s "$TEMP_LICENSE_FILE" ]; then
204 |             jq -s '.' "$TEMP_LICENSE_FILE" > "$FINAL_OUTPUT_FILE"
205 |           else
206 |             echo "[]" > "$FINAL_OUTPUT_FILE"
207 |           fi
208 |           rm "$TEMP_LICENSE_FILE"
209 | 
210 |           echo "-----------------------------------------------------"
211 |           echo "Public license report generated: $FINAL_OUTPUT_FILE"
212 |           echo "Content of $FINAL_OUTPUT_FILE:"
213 |           cat "$FINAL_OUTPUT_FILE"
214 | 
215 |       - name: Upload license report
216 |         uses: actions/upload-artifact@v4
217 |         with:
218 |           name: public-license-report
219 |           path: ${{ env.OUTPUT_FILENAME_TO_USE }}
220 | 


--------------------------------------------------------------------------------
/.github/workflows/secrets-scanning-report.yml:
--------------------------------------------------------------------------------
  1 | name: Centralized Secret Scanning Report
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       include_inactive:
  7 |         description: 'Include inactive alerts in report'
  8 |         required: false
  9 |         type: boolean
 10 |         default: false
 11 |       max_workers:
 12 |         description: 'Maximum number of concurrent workers'
 13 |         required: false
 14 |         type: number
 15 |         default: 10
 16 |       log_level:
 17 |         description: 'Logging level'
 18 |         required: false
 19 |         type: choice
 20 |         options:
 21 |           - INFO
 22 |           - DEBUG
 23 |           - WARNING
 24 |           - ERROR
 25 |         default: 'INFO'
 26 |       alert_threshold:
 27 |         description: 'Number of active alerts to trigger issue creation'
 28 |         required: false
 29 |         type: number
 30 |         default: 10
 31 |   schedule:
 32 |     - cron: '0 0 * * 1'
 33 | 
 34 | permissions:
 35 |   security-events: read
 36 |   contents: write
 37 |   actions: write
 38 |   issues: write
 39 | 
 40 | jobs:
 41 |   generate-report:
 42 |     runs-on: ubuntu-latest
 43 | 
 44 |     steps:
 45 |       - name: Checkout .github repo
 46 |         uses: actions/checkout@v4
 47 |         with:
 48 |           repository: ${{ github.repository_owner }}/.github
 49 |           ref: main
 50 |           token: ${{ secrets.SECRET_SCANNING_TOKEN }}
 51 | 
 52 |       - name: Set up Python
 53 |         uses: actions/setup-python@v5
 54 |         with:
 55 |           python-version: '3.11'
 56 |           cache: 'pip'
 57 |           cache-dependency-path: scripts/requirements.txt  # Now correct
 58 | 
 59 |       - name: Install dependencies
 60 |         run: |
 61 |           python -m pip install --upgrade pip
 62 |           pip install -r scripts/requirements.txt # Now correct
 63 | 
 64 |       - name: Generate timestamp
 65 |         id: timestamp
 66 |         run: echo "timestamp=$(date +%Y%m%d_%H%M%S)" >> $GITHUB_OUTPUT
 67 | 
 68 |       - name: Generate Secret Report
 69 |         id: generate-report
 70 |         env:
 71 |           GITHUB_TOKEN: ${{ secrets.SECRET_SCANNING_TOKEN }}
 72 |           ORGANIZATION: ${{ github.repository_owner }}
 73 |           REPORT_FILE: "secret_report_${{ steps.timestamp.outputs.timestamp }}.csv"
 74 |         run: |
 75 |           # Create the reports directory (now in the correct location)
 76 |           mkdir -p reports
 77 | 
 78 |           # Construct the command string explicitly (now simpler paths)
 79 |           COMMAND="python scripts/github_secret_scanner.py \
 80 |             --org $ORGANIZATION \
 81 |             --token $GITHUB_TOKEN \
 82 |             --output reports/$REPORT_FILE \
 83 |             --log-level ${{ inputs.log_level || 'INFO' }} \
 84 |             --max-workers ${{ inputs.max_workers || 10 }} \
 85 |             --max-retries 3"
 86 | 
 87 |           # Add the conditional --include-inactive flag
 88 |           if [[ "${{ inputs.include_inactive }}" == "true" ]]; then
 89 |             COMMAND="$COMMAND --include-inactive"
 90 |           fi
 91 | 
 92 |           # Execute the command
 93 |           $COMMAND
 94 | 
 95 |           echo "report_path=reports/$REPORT_FILE" >> $GITHUB_OUTPUT
 96 | 
 97 |       - name: Check for No Repositories
 98 |         id: check-repos
 99 |         if: success()
100 |         run: |
101 |           if grep -q "__NO_REPOS__" ${{ steps.generate-report.outputs.report_path }}/../output.txt; then
102 |             echo "No repositories found in the organization.  Exiting."
103 |             exit 1
104 |           fi
105 | 
106 |       - name: Process report statistics (inline)
107 |         id: stats
108 |         if: success() && steps.check-repos.outcome == 'success'
109 |         run: |
110 |           STATS=$(grep "__STATS_START__" ${{ steps.generate-report.outputs.report_path }}/../output.txt | sed 's/__STATS_START__//' | sed 's/__STATS_END__//')
111 |           echo "total_alerts=$(echo $STATS | cut -d',' -f1 | cut -d'=' -f2)" >> $GITHUB_OUTPUT
112 |           echo "active_alerts=$(echo $STATS | cut -d',' -f2 | cut -d'=' -f2)" >> $GITHUB_OUTPUT
113 |           echo "inactive_alerts=$(echo $STATS | cut -d',' -f3 | cut -d'=' -f2)" >> $GITHUB_OUTPUT
114 |           echo "Total alerts found: $(echo $STATS | cut -d',' -f1 | cut -d'=' -f2)"
115 |           echo "Active alerts: $(echo $STATS | cut -d',' -f2 | cut -d'=' -f2)"
116 |           echo "Inactive alerts: $(echo $STATS | cut -d',' -f3 | cut -d'=' -f2)"
117 | 
118 |       - name: Create summary issue (using github-script)
119 |         if: success() && steps.check-repos.outcome == 'success' && steps.stats.outputs.active_alerts > inputs.alert_threshold
120 |         uses: actions/github-script@v7
121 |         with:
122 |           script: |
123 |             const stats = {
124 |               total: '${{ steps.stats.outputs.total_alerts }}',
125 |               active: '${{ steps.stats.outputs.active_alerts }}',
126 |               inactive: '${{ steps.stats.outputs.inactive_alerts }}',
127 |             };
128 | 
129 |             const now = new Date();
130 |             const formattedDate = now.toLocaleDateString('en-US', {
131 |               year: 'numeric',
132 |               month: 'long',
133 |               day: 'numeric'
134 |             });
135 | 
136 |             const body = `
137 |             # Secret Scanning Report Summary
138 | 
139 |             Report generated on: ${now.toISOString()}
140 | 
141 |             ## Statistics
142 |             - Total alerts analyzed: ${stats.total}
143 |             - Active alerts found: ${stats.active}
144 |             - Inactive alerts found: ${stats.inactive}
145 | 
146 |             ## Details
147 |             - Report artifact: [Download report](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
148 |             - Workflow run: [View details](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
149 | 
150 |             ## Configuration
151 |             - Include inactive alerts: ${{ inputs.include_inactive || 'false' }}
152 |             - Max workers: ${{ inputs.max_workers || '10' }}
153 |             - Log level: ${{ inputs.log_level || 'INFO' }}
154 |             - Alert threshold: ${{ inputs.alert_threshold || '10'}}
155 |             `;
156 | 
157 |             await github.rest.issues.create({
158 |               owner: context.repo.owner,
159 |               repo: context.repo.repo,
160 |               title: \`📊 Secret Scanning Report - \${formattedDate}\`,
161 |               body: body,
162 |               labels: ['secret-scanning', 'report']
163 |             });
164 | 
165 |       - name: Commit and Push Report
166 |         if: success() && steps.check-repos.outcome == 'success'
167 |         uses: stefanzweifel/git-auto-commit-action@v5
168 |         with:
169 |           commit_message: "Add secret scanning report: ${{ steps.timestamp.outputs.timestamp }}"
170 |           repository: ./  # Commit to the root of the checked-out repo
171 |           file_pattern: reports/*.csv
172 |           commit_user_name: GitHub Actions
173 |           commit_user_email: actions@github.com
174 |           commit_author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
175 |           push_options: '--force'
176 |           token: ${{ secrets.SECRET_SCANNING_TOKEN }}  # Use the PAT with write access!
177 | 
178 |       - name: Notify on failure
179 |         if: failure()
180 |         uses: actions/github-script@v7
181 |         with:
182 |           script: |
183 |             const body = `
184 |             # 🚨 Secret Scanning Report Generation Failed
185 | 
186 |             Workflow run failed at ${new Date().toISOString()}
187 | 
188 |             ## Details
189 |             - Run ID: \`${context.runId}\`
190 |             - Trigger: ${context.eventName}
191 |             - Actor: @${context.actor}
192 | 
193 |             ## Links
194 |             - [View run details](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})
195 |             - [View workflow file](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/blob/main/.github/workflows/secret-scanning-report.yml)
196 | 
197 |             Please check the workflow logs for detailed error information.
198 |             `;
199 | 
200 |             await github.rest.issues.create({
201 |               owner: context.repo.owner,
202 |               repo: context.repo.repo,
203 |               title: '🚨 Secret Scanning Report Generation Failed',
204 |               body: body,
205 |               labels: ['secret-scanning', 'failed']
206 |             });
207 | 
208 |       - name: Clean up
209 |         if: always()
210 |         run: |
211 |           echo "No clean up required."
212 | 
213 | concurrency:
214 |   group: ${{ github.workflow }}-${{ github.ref }}
215 |   cancel-in-progress: true
216 |   
217 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official email address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | [opensource@broadcom.com][enforcement]. All complaints will be reviewed and
 64 | investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series of
 86 | actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or permanent
 93 | ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][cc], version
118 | [v2.1][cc-v2.1].
119 | 
120 | Community Impact Guidelines were inspired by Mozilla's
121 | [Code of Conduct Enforcement][mozilla-coce] consequence ladder.
122 | 
123 | For answers to common questions about this code of conduct, please refer to the
124 | [Frequently Asked Questions][cc-faq]. Translations are available in
125 | [additional languages][cc-translations].
126 | 
127 | [cc]: https://www.contributor-covenant.org
128 | [cc-faq]: https://www.contributor-covenant.org/faq/
129 | [cc-translations]: https://www.contributor-covenant.org/translations/
130 | [cc-v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct/
131 | [enforcement]: mailto:opensource@broadcom.com?subject=Open%20Source%20Code%20of%20Conduct
132 | [mozilla-coce]: https://github.com/mozilla/inclusion/
133 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to {{ project }}
 2 | 
 3 | _NOTE: This is a template document that requires editing before it is ready to use!_
 4 | 
 5 | We welcome contributions from the community and first want to thank you for taking the time to contribute!
 6 | 
 7 | Please familiarize yourself with the [Code of Conduct](https://github.com/vmware/.github/blob/main/CODE_OF_CONDUCT.md) before contributing.
 8 | 
 9 | _TO BE EDITED: Depending on the open source license that governs the project, leave only one of the options below:_
10 | 
11 | * _DCO: Before you start working with {{ project }}, please read our [Developer Certificate of Origin](https://cla.vmware.com/dco). All contributions to this repository must be signed as described on that page. Your signature certifies that you wrote the patch or have the right to pass it on as an open-source patch._
12 | * _CLA: Before you start working with {{ project }}, please read and sign our Contributor License Agreement [CLA](https://cla.vmware.com/cla/1/preview). If you wish to contribute code and you have not signed our contributor license agreement (CLA), our bot will update the issue when you open a Pull Request. For any questions about the CLA process, please refer to our [FAQ]([https://cla.vmware.com/faq](https://cla.vmware.com/faq))._
13 | 
14 | ## Ways to contribute
15 | 
16 | We welcome many different types of contributions and not all of them need a Pull request. Contributions may include:
17 | 
18 | * New features and proposals
19 | * Documentation
20 | * Bug fixes
21 | * Issue Triage
22 | * Answering questions and giving feedback
23 | * Helping to onboard new contributors
24 | * Other related activities
25 | 
26 | ## Getting started
27 | 
28 | _TO BE EDITED: This section explains how to build the project from source, including Development Environment Setup, Build, Run and Test._
29 | 
30 | _Provide information about how someone can find your project, get set up, build the code, test it, and submit a pull request successfully without having to ask any questions. Also include common errors people run into, or useful scripts they should run._
31 | 
32 | _List any tests that the contributor should run / or testing processes to follow before submitting. Describe any automated and manual checks performed by reviewers._
33 | 
34 | 
35 | ## Contribution Flow
36 | 
37 | This is a rough outline of what a contributor's workflow looks like:
38 | 
39 | * Make a fork of the repository within your GitHub account
40 | * Create a topic branch in your fork from where you want to base your work
41 | * Make commits of logical units
42 | * Make sure your commit messages are with the proper format, quality and descriptiveness (see below)
43 | * Push your changes to the topic branch in your fork
44 | * Create a pull request containing that commit
45 | 
46 | We follow the GitHub workflow and you can find more details on the [GitHub flow documentation](https://docs.github.com/en/get-started/quickstart/github-flow).
47 | 
48 | Before submitting your pull request, we advise you to use the following:
49 | 
50 | 
51 | ### Pull Request Checklist
52 | 
53 | 1. Check if your code changes will pass both code linting checks and unit tests.
54 | 2. Ensure your commit messages are descriptive. We follow the conventions on [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/). Be sure to include any related GitHub issue references in the commit message. See [GFM syntax](https://guides.github.com/features/mastering-markdown/#GitHub-flavored-markdown) for referencing issues and commits.
55 | 3. Check the commits and commits messages and ensure they are free from typos.
56 | 
57 | ## Reporting Bugs and Creating Issues
58 | 
59 | For specifics on what to include in your report, please follow the guidelines in the issue and pull request templates when available.
60 | 
61 | _TO BE EDITED: Add additional information if needed._
62 | 
63 | 
64 | ## Ask for Help
65 | 
66 | _TO BE EDITED: Provide information about the channels you use to communicate (i.e. Slack, IRC, Discord, etc)_
67 | 
68 | The best way to reach us with a question when contributing is to ask on:
69 | 
70 | * The original GitHub issue
71 | * The developer mailing list
72 | * Our Slack channel
73 | 
74 | 
75 | ## Additional Resources
76 | 
77 | _Optional_
78 | 
79 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | © Broadcom. All Rights Reserved.
2 | The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
3 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Release Process
 2 | 
 3 | The community has adopted this security disclosure and response policy to ensure we responsibly handle critical issues.
 4 | 
 5 | 
 6 | ## Supported Versions
 7 | 
 8 | For a list of support versions that this project will potentially create security fixes for, please refer to the Releases page on this project's GitHub and/or project related documentation on release cadence and support.
 9 | 
10 | 
11 | ## Reporting a Vulnerability - Private Disclosure Process
12 | 
13 | Security is of the highest importance and all security vulnerabilities or suspected security vulnerabilities should be reported to this project privately, to minimize attacks against current users  before they are fixed. Vulnerabilities will be investigated and patched on the next patch (or minor) release as soon as possible. This information could be kept entirely internal to the project.
14 | 
15 | If you know of a publicly disclosed security vulnerability for this project, please **IMMEDIATELY** contact the maintainers of this project privately. The use of encrypted email is encouraged.
16 | 
17 | 
18 | **IMPORTANT: Do not file public issues on GitHub for security vulnerabilities**
19 | 
20 | To report a vulnerability or a security-related issue, please contact the maintainers with enough details through one of the following channels: 
21 | * Directly via their individual email addresses
22 | * Open a [GitHub Security Advisory](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability). This allows for anyone to report security vulnerabilities directly and privately to the maintainers via GitHub. Note that this option may not be present for every repository.
23 | 
24 | The report will be fielded by the maintainers who have committer and release permissions. Feedback will be sent within 3 business days, including a detailed plan to investigate the issue and any potential workarounds to perform in the meantime. 
25 | 
26 | Do not report non-security-impacting bugs through this channel. Use GitHub issues for all non-security-impacting bugs.
27 | 
28 | 
29 | ## Proposed Report Content
30 | 
31 | Provide a descriptive title and in the description of the report include the following information:
32 | 
33 | *   Basic identity information, such as your name and your affiliation or company.
34 | *   Detailed steps to reproduce the vulnerability  (POC scripts, screenshots, and logs are all helpful to us).
35 | *   Description of the effects of the vulnerability on this project and the related hardware and software configurations, so that the maintainers can reproduce it.
36 | *   How the vulnerability affects this project's usage and an estimation of the attack surface, if there is one.
37 | *   List other projects or dependencies that were used in conjunction with this project to produce the vulnerability.
38 | 
39 | 
40 | ## When to report a vulnerability
41 | 
42 | *   When you think this project has a potential security vulnerability.
43 | *   When you suspect a potential vulnerability but you are unsure that it impacts this project.
44 | *   When you know of or suspect a potential vulnerability on another project that is used by this project.
45 | 
46 | 
47 | ## Patch, Release, and Disclosure
48 | 
49 | The maintainers will respond to vulnerability reports as follows:
50 | 
51 | 1. The maintainers will investigate the vulnerability and determine its effects and criticality.
52 | 2. If the issue is not deemed to be a vulnerability, the maintainers will follow up with a detailed reason for rejection.
53 | 3. The maintainers will initiate a conversation with the reporter within 3 business days.
54 | 4. If a vulnerability is acknowledged and the timeline for a fix is determined, the maintainers will work on a plan to communicate with the appropriate community, including identifying mitigating steps that affected users can take to protect themselves until the fix is rolled out.
55 | 5. The maintainers will also create a [Security Advisory](https://docs.github.com/en/code-security/repository-security-advisories/publishing-a-repository-security-advisory) using the [CVSS Calculator](https://www.first.org/cvss/calculator/3.0), if it is not created yet.  The maintainers make the final call on the calculated CVSS; it is better to move quickly than making the CVSS perfect. Issues may also be reported to [Mitre](https://cve.mitre.org/) using this [scoring calculator](https://nvd.nist.gov/vuln-metrics/cvss/v3-calculator). The draft advisory will initially be set to private.
56 | 6. The maintainers will work on fixing the vulnerability and perform internal testing before preparing to roll out the fix.
57 | 7. Once the fix is confirmed, the maintainers will patch the vulnerability in the next patch or minor release, and backport a patch release into all earlier supported releases.
58 | 
59 | 
60 | ## Public Disclosure Process
61 | 
62 | The maintainers publish the public advisory to this project's community via GitHub. In most cases, additional communication via Slack, Twitter, mailing lists, blog, and other channels will assist in educating the project's users and rolling out the patched release to affected users.
63 | 
64 | The maintainers will also publish any mitigating steps users can take until the fix can be applied to their instances. This project's distributors will handle creating and publishing their own security advisories.
65 | 
66 | 
67 | ## Confidentiality, integrity and availability
68 | 
69 | We consider vulnerabilities leading to the compromise of data confidentiality, elevation of privilege, or integrity to be our highest priority concerns. Availability, in particular in areas relating to DoS and resource exhaustion, is also a serious security concern. The maintainer team takes all vulnerabilities, potential vulnerabilities, and suspected vulnerabilities seriously and will investigate them in an urgent and expeditious manner.
70 | 
71 | Note that we do not currently consider the default settings for this project to be secure-by-default. It is necessary for operators to explicitly configure settings, role based access control, and other resource related features in this project to provide a hardened environment. We will not act on any security disclosure that relates to a lack of safe defaults. Over time, we will work towards improved safe-by-default configuration, taking into account backwards compatibility.
72 | 


--------------------------------------------------------------------------------
/actions/check-license/action.yml:
--------------------------------------------------------------------------------
  1 | name: License Check
  2 | description: Fuzzy license matching with GPL exclusion
  3 | runs:
  4 |   using: composite
  5 |   steps:
  6 |     - name: Setup Python
  7 |       uses: actions/setup-python@v4
  8 |       with:
  9 |         python-version: '3.x'
 10 | 
 11 |     - name: Install dependencies
 12 |       run: pip install PyGithub python-Levenshtein
 13 | 
 14 |     - name: Fuzzy License Check
 15 |       shell: python
 16 |       env:
 17 |         ORG_TOKEN: ${{ secrets.ORG_TOKEN }}
 18 |         ENABLED_REPOS: ${{ vars.ENABLED_REPOS || '[]' }}
 19 |         EXCLUDED_REPOS: ${{ vars.EXCLUDED_REPOS || '[]' }}
 20 |         CURRENT_REPO: ${{ github.repository }}
 21 |       run: |
 22 |         import os
 23 |         import re
 24 |         import json
 25 |         from difflib import SequenceMatcher
 26 |         from github import Github, GithubException
 27 | 
 28 |         # Check if the repository is enabled or excluded
 29 |         enabled_repos = json.loads(os.environ['ENABLED_REPOS'])
 30 |         excluded_repos = json.loads(os.environ['EXCLUDED_REPOS'])
 31 |         current_repo = os.environ['CURRENT_REPO']
 32 | 
 33 |         if enabled_repos and current_repo not in enabled_repos:
 34 |             print(f"Skipping repository {current_repo} (not in enabled list)")
 35 |             with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 36 |                 fh.write('license_status=skipped')
 37 |             exit(0)
 38 | 
 39 |         if current_repo in excluded_repos:
 40 |             print(f"Skipping repository {current_repo} (excluded)")
 41 |             with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
 42 |                 fh.write('license_status=skipped')
 43 |             exit(0)
 44 | 
 45 |         # Configuration
 46 |         EXCLUDED_KEYWORDS = {
 47 |             'gpl', 'gnu', 'general', 'public', 'version', '2', '3',
 48 |             'agpl', 'lgpl', 'lesser', 'copying', 'affero', 'copyleft',
 49 |             'copyright', 'foundation', 'franklin', 'street', 'patent'
 50 |         }
 51 |         SIMILARITY_THRESHOLD = 0.75
 52 | 
 53 |         g = Github(os.environ['ORG_TOKEN'])
 54 |         repo = g.get_repo(os.environ['GITHUB_REPOSITORY'])
 55 | 
 56 |         # Load permitted licenses
 57 |         org_repo = repo.organization.get_repo(".github")
 58 |         licenses_file = org_repo.get_contents("permissive_licenses.json")
 59 |         permitted_licenses = json.loads(licenses_file.decoded_content)['permissive']
 60 | 
 61 |         # Get license file content
 62 |         license_text = ""
 63 |         try:
 64 |             for f in repo.get_contents(""):
 65 |                 if f.name.lower().startswith(('license', 'copying')):
 66 |                     license_text = f.decoded_content.decode().lower()
 67 |                     break
 68 |         except GithubException:
 69 |             pass
 70 | 
 71 |         # Preprocess text
 72 |         if license_text:
 73 |             lines = license_text.split('\n')[:20]
 74 |             clean_text = ' '.join([
 75 |                 word for line in lines 
 76 |                 for word in re.findall(r'\w+', line) 
 77 |                 if word not in EXCLUDED_KEYWORDS
 78 |             ])
 79 | 
 80 |             # Generate candidate phrases
 81 |             candidates = re.findall(r'\w+', clean_text)
 82 |             phrases = [' '.join(candidates[i:i+3]) for i in range(len(candidates)-2)]
 83 |             all_candidates = set(candidates + phrases)
 84 | 
 85 |             # Fuzzy match against permitted licenses
 86 |             is_permissive = False
 87 |             for license_name in permitted_licenses:
 88 |                 license_lower = license_name.lower()
 89 |                 for candidate in all_candidates:
 90 |                     ratio = SequenceMatcher(None, candidate, license_lower).ratio()
 91 |                     if ratio >= SIMILARITY_THRESHOLD:
 92 |                         is_permissive = True
 93 |                         break
 94 |                 if is_permissive:
 95 |                     break
 96 |         else:
 97 |             # No license file found
 98 |             is_permissive = False
 99 | 
100 |         # Set output
101 |         print(f"license_status={'permissive' if is_permissive else 'non-permissive'}")
102 |         with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
103 |             fh.write(f'license_status={"permissive" if is_permissive else "non-permissive"}')
104 |             
105 | 
106 | 


--------------------------------------------------------------------------------
/permissive_licenses.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "permissive": [
  3 |         "0BSD",
  4 |         "Adobe-2006 License",
  5 |         "AFL-1.1",
  6 |         "AFL-1.2",
  7 |         "AFL-2.1",
  8 |         "AFL-3.0",
  9 |         "Apache-1.0",
 10 |         "Apache-1.1",
 11 |         "Apache-2.0",
 12 |         "Artistic-1.0",
 13 |         "Artistic-1.0-cl8",
 14 |         "Artistic-1.0-Perl",
 15 |         "Artistic-2.0",
 16 |         "BlueOak-1.0.0",
 17 |         "Broadcom_Proprietary",
 18 |         "BSD-1-Clause",
 19 |         "BSD-2-Clause",
 20 |         "BSD-2-Clause-FreeBSD",
 21 |         "BSD-2-Clause-NetBSD",
 22 |         "BSD-2-Clause-Patent",
 23 |         "BSD-3-Clause",
 24 |         "BSD-3-Clause-Attribution",
 25 |         "BSD-3-Clause-Clear",
 26 |         "BSD-4-Clause",
 27 |         "BSD-4-Clause-UC",
 28 |         "BSL-1.0",
 29 |         "bzip2-1.0.5",
 30 |         "bzip2-1.0.6",
 31 |         "CC-BY-1.0",
 32 |         "CC-BY-2.0",
 33 |         "CC-BY-2.5",
 34 |         "CC-BY-3.0",
 35 |         "CC-BY-4.0",
 36 |         "CC0-1.0",
 37 |         "CNRI-Python",
 38 |         "curl License",
 39 |         "EDL-1.0",
 40 |         "FTL",
 41 |         "GFDL 1.2",
 42 |         "GFDL 1.3",
 43 |         "GFDL v1.1",
 44 |         "HPND",
 45 |         "ICU License",
 46 |         "ImageMagick License",
 47 |         "ISC License",
 48 |         "Jam STAPL Software License",
 49 |         "Lattice Diamond Programmer Embedded Software",
 50 |         "libpng License",
 51 |         "libpng-2.0",
 52 |         "MIT",
 53 |         "MIT License",
 54 |         "MIT-advertising License",
 55 |         "MIT-CMU License",
 56 |         "MIT-enna License",
 57 |         "MIT-feh License",
 58 |         "MITNFA",
 59 |         "Ms-PL",
 60 |         "NCSA",
 61 |         "NVIDIA_Proprietary",
 62 |         "OLDAP-2.8",
 63 |         "OpenSSL License",
 64 |         "PDDL-1.0",
 65 |         "PHP-3.01 License",
 66 |         "PostgreSQL License",
 67 |         "PSF-1.6a2",
 68 |         "PSF-1.6b1",
 69 |         "PSF-2.0",
 70 |         "PSF-2.1",
 71 |         "PSF-2.1.1",
 72 |         "PSF-2.3",
 73 |         "Public Domain",
 74 |         "Ruby License",
 75 |         "Sax Public Domain Notice",
 76 |         "SGI-B-1.0",
 77 |         "SGI-B-1.1",
 78 |         "TCL",
 79 |         "Unlicense",
 80 |         "UPL-1.0",
 81 |         "W3C",
 82 |         "VMW_Proprietary",
 83 |         "VMW_Proprietary_Component",
 84 |         "WTFPL",
 85 |         "X.Net",
 86 |         "X11",
 87 |         "zlib License",
 88 |         "zlib-acknowledgment License",
 89 |         "Expat",
 90 |         "Microsoft-Proprietary-SDK-License",
 91 |         "IJG",
 92 |         "ECL-2.0",
 93 |         "SSH-OpenSSH",
 94 |         "MIT-Modern-Variant",
 95 |         "libtiff",
 96 |         "X11",
 97 |         "BSD-3-Clause-acpica",
 98 |         "Unicode-DFS-2016",
 99 |         "NTP",
100 |         "Cryptoki-Proprietary",
101 |         "MinIO-Proprietary",
102 |         "Highcharts-Proprietary",
103 |         "Adobe-Glyph",
104 |         "ADSL",
105 |         "AML-glslang",
106 |         "AML",
107 |         "any-OSI",
108 |         "Beerware",
109 |         "Bison-exception-1.24",
110 |         "Bison-exception-2.2",
111 |         "Bitstream-Charter",
112 |         "Bitstream-Vera",
113 |         "blessing",
114 |         "Boehm-GC",
115 |         "Brian-Gladman-2-Clause",
116 |         "BSD-2-Clause-Darwin",
117 |         "BSD-2-Clause-first-lines",
118 |         "BSD-2-Clause-Views",
119 |         "BSD-3-Clause-flex",
120 |         "BSD-3-Clause-LBNL",
121 |         "BSD-3-Clause-Modification",
122 |         "BSD-3-Clause-No-Military-License",
123 |         "BSD-3-Clause-No-Nuclear-License",
124 |         "BSD-3-Clause-No-Nuclear-Warranty",
125 |         "BSD-3-Clause-Open-MPI",
126 |         "BSD-3-Clause-Sun",
127 |         "BSD-4-Clause-Shortened",
128 |         "BSD-4.3RENO",
129 |         "BSD-4.3TAHOE",
130 |         "BSD-Attribution-HPND-disclaimer",
131 |         "BSD-Source-beginning-file",
132 |         "BSD-Source-Code",
133 |         "BSD-Systemics",
134 |         "Caldera-no-preamble",
135 |         "CC-PDDC",
136 |         "checkmk",
137 |         "CMU-Mach-nodoc",
138 |         "CMU-Mach",
139 |         "Cronyx",
140 |         "DOC",
141 |         "DocBook-Schema",
142 |         "dtoa",
143 |         "EFL-1.0",
144 |         "EFL-2.0",
145 |         "etalab-2.0",
146 |         "FreeBSD-DOC",
147 |         "FSFAP-no-warranty-disclaimer",
148 |         "FSFAP",
149 |         "FSFUL",
150 |         "FSFULLR",
151 |         "FSFULLRWD",
152 |         "Furuseth",
153 |         "GCR-docs",
154 |         "GD",
155 |         "gtkbook",
156 |         "HaskellReport",
157 |         "hdparm",
158 |         "HP-1986",
159 |         "HP-1989",
160 |         "HPND-doc-sell",
161 |         "HPND-doc",
162 |         "HPND-export-US-acknowledgement",
163 |         "HPND-export-US-modify",
164 |         "HPND-export-US",
165 |         "HPND-export2-US",
166 |         "HPND-Fenneberg-Livingston",
167 |         "HPND-Kevlin-Henney",
168 |         "HPND-Markus-Kuhn",
169 |         "HPND-Pbmplus",
170 |         "HPND-sell-variant-MIT-disclaimer",
171 |         "HPND-sell-variant",
172 |         "HPND-UC",
173 |         "IBM-pibs",
174 |         "Info-ZIP",
175 |         "Inner-Net-2.0",
176 |         "Intel",
177 |         "ISC-Veillard",
178 |         "Jam",
179 |         "JPNIC",
180 |         "JSON",
181 |         "Kazlib",
182 |         "libselinux-1.0",
183 |         "LiLiQ-P-1.1",
184 |         "Linux-man-pages-1-para",
185 |         "Linux-OpenIB",
186 |         "LLVM-exception",
187 |         "LPD-document",
188 |         "lsof",
189 |         "Lucida-Bitmap-Fonts",
190 |         "LZMA-SDK-9.22",
191 |         "mailprio",
192 |         "Martin-Birgmeier",
193 |         "metamail",
194 |         "MIT-0",
195 |         "MIT-Festival",
196 |         "MIT-Khronos-old",
197 |         "MIT-open-group",
198 |         "MIT-testregex",
199 |         "MIT-Wu",
200 |         "MPEG-SSG",
201 |         "mplus",
202 |         "Multics",
203 |         "NAIST-2003",
204 |         "NCGL-UK-2.0",
205 |         "NCL",
206 |         "NIST-PD-fallback",
207 |         "NIST-PD",
208 |         "NTP-0",
209 |         "OFL-1.1-no-RFN",
210 |         "OGC-1.0",
211 |         "OGL-Canada-2.0",
212 |         "OGL-UK-1.0",
213 |         "OGL-UK-2.0",
214 |         "OGL-UK-3.0",
215 |         "OLDAP-2.0.1",
216 |         "OML",
217 |         "OpenSSL-standalone",
218 |         "PADL",
219 |         "PHP-3.0",
220 |         "pkgconf",
221 |         "radvd",
222 |         "Rdisc",
223 |         "RSA-MD",
224 |         "SchemeReport",
225 |         "Sendmail",
226 |         "SGI-B-2.0",
227 |         "SISSL",
228 |         "SMLNJ",
229 |         "snprintf",
230 |         "softSurfer",
231 |         "Spencer-86",
232 |         "Spencer-94",
233 |         "Spencer-99",
234 |         "ssh-keyscan",
235 |         "SSH-short",
236 |         "SSLeay-standalone",
237 |         "SunPro",
238 |         "Swift-exception",
239 |         "TCP-wrappers",
240 |         "TermReadKey",
241 |         "TMate",
242 |         "TTWL",
243 |         "TU-Berlin-1.0",
244 |         "TU-Berlin-2.0",
245 |         "Unicode-3.0",
246 |         "Unicode-DFS-2015",
247 |         "Unicode-TOU",
248 |         "W3C-19980720",
249 |         "W3C-20150513",
250 |         "X11-distribute-modifications-variant",
251 |         "Xfig",
252 |         "XFree86-1.1",
253 |         "xinetd",
254 |         "xlock",
255 |         "Zimbra-1.3",
256 |         "Zimbra-1.4",
257 |         "ZPL-2.1",
258 |         "bcrypt-Solar-Designer",
259 |         "MirOS",
260 |         "DocBook-XML",
261 |         "Genivia-Proprietary-License"
262 |     ]
263 | }
264 | 


--------------------------------------------------------------------------------
/profile/README.md:
--------------------------------------------------------------------------------
 1 | ![VMware Collaboration Image](https://github.com/vmware/.github/blob/main/profile/image1.jpg)
 2 | 
 3 | # Welcome!
 4 | 
 5 | Collaboration, community and curiosity - all essential to a vibrant open source ethos and part of VMware’s culture. You’ll find us throughout the open source community, contributing to projects like Kubernetes, Linux, and TensorFlow. But we’re also hard at work solving technical challenges with innovative approaches and releasing those ideas as new open source projects. We’re proud of the creativity and contributions of VMware employees - from small but mighty open source projects to large, headline-grabbing community projects.
 6 | 
 7 | # Finding your way
 8 | 
 9 | If you’re looking for open source projects that serve our technologies such as VMware vSphere® or VMware NSX®, start in the [vmware](https://github.com/vmware) org; you’ll also find projects that address emerging technologies such as blockchain, machine learning, AI and data science. For smaller sample projects and code snippets browse our [vmware-labs](https://github.com/vmware-labs) org and the aptly named [vmware-samples](https://github.com/vmware-samples) org. 
10 | 
11 | Throughout these collections you’ll discover scripts, libraries, APIs, templates as well as complete solutions such as [Versatile Data Kit](https://github.com/vmware/versatile-data-kit) or [VMware Event Broker Appliance](https://github.com/vmware-samples/vcenter-event-broker-appliance).  
12 | 
13 | Browse the [vmware-tanzu](https://github.com/vmware-tanzu/) or the [vmware-tanzu-labs](https://github.com/vmware-tanzu-labs/) org to discover cloud native and modern applications related open source - think of Kubernetes and all its surrounding ecosystem. Recommended projects to explore include [Sonobuoy](https://github.com/vmware-tanzu/sonobuoy), [Carvel](https://github.com/vmware-tanzu/carvel), [Pinniped](https://github.com/vmware-tanzu/pinniped), and the newest member, [Cartographer](https://github.com/vmware-tanzu/cartographer). 
14 | 
15 | There’s even more VMware-backed open source to experience in [Clarity](https://github.com/vmware/clarity), [Spring](https://github.com/spring-projects), [RabbitMQ](https://github.com/rabbitmq), [Project Salt](https://github.com/saltstack/salt), and [Greenplum](https://github.com/greenplum-db). 
16 | 
17 | # Get Started
18 | Join our open source community: explore, experiment, ask questions, and contribute. Follow us [on Twitter](https://twitter.com/vmwopensource) and check in on the latest news and project updates [at our blog](https://blogs.vmware.com/opensource/). 
19 | 
20 | <details>
21 | <summary>GPL Commitment</summary>
22 | <br>
23 | Before filing or continuing to prosecute any legal proceeding or claim (other than a Defensive Action) arising from termination of a Covered License, VMware commits to extend to the person or entity ("you") accused of violating the Covered License the following provisions regarding cure and reinstatement, taken from GPL version 3. As used here, the term 'this License' refers to the specific Covered License being enforced.<br>
24 | <br>
25 | However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.<br>
26 | <br>
27 | Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.<br>
28 | <br>
29 | VMware intends this Commitment to be irrevocable, and binding and enforceable.
30 | <br>
31 |   
32 | ## Definitions
33 |   
34 | <br>
35 | 'Covered License' means the GNU General Public License, version 2 (GPLv2), the GNU Lesser General Public License, version 2.1 (LGPLv2.1), or the GNU Library General Public License, version 2 (LGPLv2), all as published by the Free Software Foundation.<br>
36 | <br>
37 | 'Defensive Action' means a legal proceeding or claim that VMware brings against you in response to a prior proceeding or claim initiated by you or your affiliate.
38 | </details>
39 | 


--------------------------------------------------------------------------------
/profile/image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vmware/.github/82221468ce824ec9393dcae082e80ab5150f8556/profile/image1.jpg


--------------------------------------------------------------------------------
/reports/secret_report_20250217_002349.csv:
--------------------------------------------------------------------------------
 1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
 2 | photon,5,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/5,2023-07-28T02:40:07Z,2024-09-12T07:52:46Z,
 3 | photon,4,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/4,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 4 | photon,3,Amazon AWS Secret Access Key,open,https://github.com/vmware/photon/security/secret-scanning/3,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 5 | photon,2,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/2,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 6 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
 7 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
 8 | vic,1,Slack Incoming Webhook URL,open,https://github.com/vmware/vic/security/secret-scanning/1,2023-10-02T02:22:38Z,2024-09-12T07:29:37Z,
 9 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
10 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
11 | vic-ui,1,Slack Incoming Webhook URL,open,https://github.com/vmware/vic-ui/security/secret-scanning/1,2023-08-22T07:16:27Z,2024-09-12T07:29:37Z,
12 | mangle,1,Dynatrace API Token,open,https://github.com/vmware/mangle/security/secret-scanning/1,2023-10-03T11:20:20Z,2024-09-16T08:20:23Z,
13 | versatile-data-kit,5,Bitbucket Server Personal Access Token,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/5,2024-01-31T16:11:01Z,2024-01-31T16:11:01Z,
14 | versatile-data-kit,4,GitLab Access Token,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/4,2023-10-20T14:13:51Z,2023-10-20T14:13:51Z,
15 | versatile-data-kit,3,Slack Incoming Webhook URL,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/3,2023-06-28T14:08:44Z,2023-06-28T14:08:44Z,
16 | cloud-director-extension-standard-library,1,Slack Incoming Webhook URL,open,https://github.com/vmware/cloud-director-extension-standard-library/security/secret-scanning/1,2024-07-14T14:47:43Z,2024-09-11T22:06:23Z,
17 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250224_002324.csv:
--------------------------------------------------------------------------------
 1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
 2 | photon,5,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/5,2023-07-28T02:40:07Z,2024-09-12T07:52:46Z,
 3 | photon,4,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/4,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 4 | photon,3,Amazon AWS Secret Access Key,open,https://github.com/vmware/photon/security/secret-scanning/3,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 5 | photon,2,Amazon AWS Access Key ID,open,https://github.com/vmware/photon/security/secret-scanning/2,2023-07-28T02:40:07Z,2024-12-27T10:23:04Z,
 6 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
 7 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
 8 | vic,1,Slack Incoming Webhook URL,open,https://github.com/vmware/vic/security/secret-scanning/1,2023-10-02T02:22:38Z,2024-09-12T07:29:37Z,
 9 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
10 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
11 | vic-ui,1,Slack Incoming Webhook URL,open,https://github.com/vmware/vic-ui/security/secret-scanning/1,2023-08-22T07:16:27Z,2024-09-12T07:29:37Z,
12 | mangle,1,Dynatrace API Token,open,https://github.com/vmware/mangle/security/secret-scanning/1,2023-10-03T11:20:20Z,2024-09-16T08:20:23Z,
13 | versatile-data-kit,5,Bitbucket Server Personal Access Token,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/5,2024-01-31T16:11:01Z,2024-01-31T16:11:01Z,
14 | versatile-data-kit,4,GitLab Access Token,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/4,2023-10-20T14:13:51Z,2023-10-20T14:13:51Z,
15 | versatile-data-kit,3,Slack Incoming Webhook URL,open,https://github.com/vmware/versatile-data-kit/security/secret-scanning/3,2023-06-28T14:08:44Z,2023-06-28T14:08:44Z,
16 | cloud-director-extension-standard-library,1,Slack Incoming Webhook URL,open,https://github.com/vmware/cloud-director-extension-standard-library/security/secret-scanning/1,2024-07-14T14:47:43Z,2024-09-11T22:06:23Z,
17 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250303_002412.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | mangle,1,Dynatrace API Token,open,https://github.com/vmware/mangle/security/secret-scanning/1,2023-10-03T11:20:20Z,2024-09-16T08:20:23Z,
7 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250310_002003.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | mangle,1,Dynatrace API Token,open,https://github.com/vmware/mangle/security/secret-scanning/1,2023-10-03T11:20:20Z,2024-09-16T08:20:23Z,
7 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250317_002435.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250324_002442.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250331_002538.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250407_002459.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250414_002556.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250421_002953.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/reports/secret_report_20250428_002604.csv:
--------------------------------------------------------------------------------
1 | Repository,Alert ID,Secret Type,State,Alert URL,Created At,Updated At,Resolved Reason
2 | workflowTools,2,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/2,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
3 | workflowTools,1,Slack API Token,open,https://github.com/vmware/workflowTools/security/secret-scanning/1,2023-08-20T22:15:10Z,2024-12-27T10:15:13Z,
4 | alb-sdk,2,Amazon AWS Secret Access Key,open,https://github.com/vmware/alb-sdk/security/secret-scanning/2,2023-07-18T09:03:11Z,2024-12-27T10:45:58Z,
5 | alb-sdk,1,Amazon AWS Access Key ID,open,https://github.com/vmware/alb-sdk/security/secret-scanning/1,2023-07-18T09:03:10Z,2024-12-27T10:45:58Z,
6 | 


--------------------------------------------------------------------------------
/scripts/check_licenses.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import re
  4 | from github import Github, GithubException
  5 | 
  6 | def check_license(repo):
  7 |     """Check if repository has a permissive license."""
  8 |     try:
  9 |         # Load permissive licenses from organization's .github repository
 10 |         org_github_repo = repo.organization.get_repo(".github")
 11 |         license_file = org_github_repo.get_contents(".github/permissive_licenses.json")
 12 |         permissive_licenses = json.loads(license_file.decoded_content.decode())['permissive']
 13 |         pattern = '|'.join(permissive_licenses)
 14 | 
 15 |         # Check for license file
 16 |         license_content = None
 17 |         try:
 18 |             contents = repo.get_contents("")
 19 |             for content in contents:
 20 |                 if content.name.lower().startswith(('license', 'copying')):
 21 |                     license_content = content.decoded_content.decode()
 22 |                     break
 23 |         except GithubException as e:
 24 |             if e.status == 404:
 25 |                 return False, "No license file found"
 26 |             raise
 27 | 
 28 |         if not license_content:
 29 |             return False, "No license file found"
 30 | 
 31 |         # Check first 20 lines for license match
 32 |         first_20_lines = '\n'.join(license_content.split('\n')[:20])
 33 |         if re.search(fr'\b({pattern})\b', first_20_lines, re.IGNORECASE):
 34 |             return True, "Permissive license found"
 35 |             
 36 |         return False, "Non-permissive license found"
 37 | 
 38 |     except Exception as e:
 39 |         return False, f"Error checking license: {str(e)}"
 40 | 
 41 | def install_trigger(repo):
 42 |     """Install CLA trigger workflow from template."""
 43 |     try:
 44 |         # Get template from organization's .github repository
 45 |         org_github_repo = repo.organization.get_repo(".github")
 46 |         template = org_github_repo.get_contents(".github/templates/cla-trigger-template.yml")
 47 |         workflow_content = template.decoded_content.decode()
 48 | 
 49 |         # Ensure workflows directory exists
 50 |         try:
 51 |             repo.get_contents('.github/workflows')
 52 |         except GithubException as e:
 53 |             if e.status == 404:
 54 |                 repo.create_file('.github/workflows/.gitkeep', 'Create workflows directory', '')
 55 |             else:
 56 |                 raise
 57 | 
 58 |         # Create/update workflow
 59 |         workflow_path = '.github/workflows/cla-trigger.yml'
 60 |         try:
 61 |             existing_file = repo.get_contents(workflow_path)
 62 |             repo.update_file(workflow_path, 'Update CLA trigger', workflow_content, existing_file.sha)
 63 |         except GithubException as e:
 64 |             if e.status == 404:
 65 |                 repo.create_file(workflow_path, 'Add CLA trigger', workflow_content)
 66 |             else:
 67 |                 raise
 68 | 
 69 |         return True
 70 |     except Exception as e:
 71 |         print(f"Error installing trigger in {repo.full_name}: {str(e)}")
 72 |         return False
 73 | 
 74 | def main():
 75 |     # Authenticate with bot's token
 76 |     token = os.environ.get('ORG_TOKEN')
 77 |     if not token:
 78 |         print("Error: ORG_TOKEN environment variable not set.")
 79 |         return 1
 80 | 
 81 |     g = Github(token)
 82 |     user = g.get_user()
 83 | 
 84 |     # Get included/excluded repos from environment variables
 85 |     excluded_repos = [r.strip() for r in os.environ.get('EXCLUDED_REPOS', '').split(',') if r.strip()]
 86 |     included_repos = [r.strip() for r in os.environ.get('INCLUDED_REPOS', '').split(',') if r.strip()]
 87 | 
 88 |     # Get repositories accessible to the bot via org membership
 89 |     repos = user.get_repos(affiliation="organization_member", visibility="all")
 90 | 
 91 |     results = {
 92 |         'non_permissive': [],
 93 |         'trigger_installed': [],
 94 |         'excluded': excluded_repos,
 95 |         'errors': []
 96 |     }
 97 | 
 98 |     for repo in repos:
 99 |         repo_full_name = repo.full_name
100 | 
101 |         # Apply filters
102 |         if repo_full_name in excluded_repos:
103 |             continue
104 |         if included_repos and repo_full_name not in included_repos:
105 |             continue
106 | 
107 |         print(f"Processing: {repo_full_name}")
108 | 
109 |         try:
110 |             # License check
111 |             is_permissive, msg = check_license(repo)
112 |             if not is_permissive:
113 |                 results['non_permissive'].append({'repo': repo_full_name, 'reason': msg})
114 |                 
115 |                 # Install workflow
116 |                 if install_trigger(repo):
117 |                     results['trigger_installed'].append(repo_full_name)
118 |                 else:
119 |                     results['errors'].append(f"Failed to install trigger in {repo_full_name}")
120 | 
121 |         except Exception as e:
122 |             results['errors'].append(f"{repo_full_name}: {str(e)}")
123 |             continue
124 | 
125 |     # Save results
126 |     with open('scan_results.json', 'w') as f:
127 |         json.dump(results, f, indent=2)
128 | 
129 |     if results['non_permissive'] or results['errors']:
130 |         print("Scan completed with findings:")
131 |         print(json.dumps(results, indent=2))
132 |         return 1
133 | 
134 |     print("Scan completed successfully - no issues found")
135 |     return 0
136 | 
137 | if __name__ == '__main__':
138 |     exit(main())
139 |     
140 | 


--------------------------------------------------------------------------------
/scripts/dependency_scanner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import requests
  4 | import logging
  5 | import argparse
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from datetime import datetime
  8 | from requests.adapters import HTTPAdapter
  9 | # Corrected import statement:
 10 | from urllib3.util.retry import Retry  # Import Retry directly from urllib3.util
 11 | import sys
 12 | import time
 13 | import json
 14 | import re
 15 | import base64
 16 | import xml.etree.ElementTree as ET
 17 | 
 18 | 
 19 | class Logger:
 20 |     _instance = None
 21 | 
 22 |     def __new__(cls, log_level='INFO'):
 23 |         if cls._instance is None:
 24 |             cls._instance = super(Logger, cls).__new__(cls)
 25 |             numeric_level = getattr(logging, log_level.upper(), None)
 26 |             if not isinstance(numeric_level, int):
 27 |                 raise ValueError(f'Invalid log level: {log_level}')
 28 |             logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
 29 |         return cls._instance
 30 | 
 31 | 
 32 | class GitHubClient:
 33 |     def __init__(self, token, max_retries=3, timeout=10):  # Increased timeout
 34 |         self.token = token
 35 |         self.base_url = "https://api.github.com"
 36 |         self.headers = {
 37 |             "Authorization": f"Bearer {self.token}",
 38 |             "Accept": "application/vnd.github+json",  # This is crucial for the SBOM API
 39 |             "X-GitHub-Api-Version": "2022-11-28",
 40 |             "User-Agent": "dependency-alerts-report-script"
 41 |         }
 42 |         self.max_retries = max_retries
 43 |         self.timeout = timeout  # Use the timeout
 44 |         self.session = self._create_session()
 45 |         self.logger = Logger()
 46 |         self.rate_limit_remaining = None
 47 |         self.rate_limit_reset = None
 48 | 
 49 | 
 50 |     def _create_session(self):
 51 |         session = requests.Session()
 52 |         session.headers.update(self.headers)
 53 |         retry_strategy = Retry(
 54 |             total=self.max_retries,
 55 |             backoff_factor=2,
 56 |             status_forcelist=[429, 500, 502, 503, 504],
 57 |             allowed_methods=["GET"]  # Only retry GET requests
 58 |         )
 59 |         adapter = HTTPAdapter(max_retries=retry_strategy)
 60 |         session.mount("https://", adapter)
 61 |         session.mount("http://", adapter)
 62 |         return session
 63 | 
 64 |     def _handle_rate_limit(self):
 65 |         if self.rate_limit_remaining is None:
 66 |             self.validate_token()
 67 | 
 68 |         if self.rate_limit_remaining < 50:  # More conservative threshold
 69 |             wait_time = (self.rate_limit_reset - datetime.now()).total_seconds() + 5
 70 |             if wait_time > 0:
 71 |                 logging.info(f"Rate limit approaching. Waiting for {wait_time:.0f} seconds.")
 72 |                 time.sleep(wait_time)
 73 |             self.validate_token()  # Re-validate after waiting
 74 | 
 75 |     def _request(self, method, url, **kwargs):
 76 |         self._handle_rate_limit()
 77 |         try:
 78 |             # Add timeout to the request
 79 |             response = self.session.request(method, url, timeout=self.timeout, **kwargs)
 80 |             response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
 81 | 
 82 |             if 'X-RateLimit-Remaining' in response.headers:
 83 |                 self.rate_limit_remaining = int(response.headers['X-RateLimit-Remaining'])
 84 |                 self.rate_limit_reset = datetime.fromtimestamp(int(response.headers['X-RateLimit-Reset']))
 85 |             return response
 86 | 
 87 |         except requests.exceptions.RequestException as e:
 88 |             logging.exception(f"Request failed: {e}")
 89 |             raise
 90 |         except requests.exceptions.Timeout:  # Handle timeout specifically
 91 |             logging.error(f"Request to {url} timed out after {self.timeout} seconds.")
 92 |             raise
 93 | 
 94 |     def validate_token(self):
 95 |         """Validates the GitHub token and retrieves initial rate limit information."""
 96 |         url = f"{self.base_url}/rate_limit"
 97 |         try:
 98 |             response = self.session.get(url)
 99 |             response.raise_for_status()
100 |             rate_limit = response.json()['resources']['core']
101 |             self.rate_limit_remaining = rate_limit['remaining']
102 |             self.rate_limit_reset = datetime.fromtimestamp(rate_limit['reset'])
103 | 
104 |             logging.info(f"Rate Limit: {self.rate_limit_remaining} remaining. Reset at {self.rate_limit_reset}")
105 |         except requests.exceptions.RequestException as e:
106 |             logging.exception(f"Token validation failed: {e}")
107 |             raise
108 | 
109 |     def get_repositories(self, org_name, repo_list=None):
110 |         """Retrieves a list of repositories to scan.  Prioritizes org, then list."""
111 |         repositories = []
112 |         if org_name:
113 |             # Fetch all repos in the organization (with pagination)
114 |             url = f"{self.base_url}/orgs/{org_name}/repos?per_page=100"
115 |             while url:
116 |                 response = self._request("GET", url)
117 |                 for repo in response.json():
118 |                     repositories.append({"name": repo["name"], "owner": repo["owner"]["login"]})
119 |                 url = response.links.get("next", {}).get("url")
120 | 
121 |         elif repo_list:
122 |             # Use the provided comma-separated list
123 |             for repo_name in repo_list.split(","):
124 |                 parts = repo_name.strip().split("/")
125 |                 if len(parts) == 2:
126 |                     owner, repo = parts
127 |                 else:
128 |                     owner = os.environ.get("GITHUB_REPOSITORY", "/").split("/")[0]
129 |                     repo = parts[0]
130 |                 repositories.append({"name": repo, "owner": owner})
131 |         else:
132 |             # Default to the current repository
133 |             full_repo = os.environ.get("GITHUB_REPOSITORY")
134 |             if not full_repo:
135 |                 raise ValueError("GITHUB_REPOSITORY environment variable is not set.")
136 |             owner, repo = full_repo.split("/")
137 |             repositories.append({"name": repo, "owner": owner})
138 | 
139 |         return repositories
140 | 
141 |     def get_dependabot_alerts(self, owner, repo_name):
142 |         """Retrieves Dependabot alerts for a single repository (with pagination)."""
143 |         alerts = []
144 |         url = f"{self.base_url}/repos/{owner}/{repo_name}/dependabot/alerts?per_page=100&state=open"
145 |         while url:
146 |             response = self._request("GET", url)
147 |             if response.status_code == 404:
148 |                 logging.info(f"Dependabot alerts not available or repo not found for {owner}/{repo_name}.")
149 |                 return []
150 |             response.raise_for_status()
151 |             alerts.extend(response.json())
152 |             url = response.links.get("next", {}).get("url")
153 |         return alerts
154 | 
155 |     def get_sbom_dependencies(self, owner, repo_name):
156 |         """Retrieves the SBOM for a repository and extracts dependency information."""
157 |         url = f"{self.base_url}/repos/{owner}/{repo_name}/dependency-graph/sbom"
158 |         try:
159 |             response = self._request("GET", url)
160 |             response.raise_for_status()
161 |             sbom_data = response.json()
162 |             dependencies = {}
163 |             # Extract dependency information from the SBOM
164 |             for package in sbom_data.get("sbom", {}).get("packages", []):
165 |                 if "name" in package and "versionInfo" in package:
166 |                     dependencies[package["name"]] = package["versionInfo"]
167 |             return dependencies
168 |         except requests.exceptions.RequestException as e:
169 |             logging.exception(f"Failed to get SBOM for {owner}/{repo_name}: {e}")
170 |             return {} # Return empty dict in case of failure.
171 | 
172 | 
173 | class DependencyScanner:
174 |     """
175 |     Scans GitHub repositories for vulnerable dependencies using the Dependabot alerts API.
176 |     """
177 | 
178 |     def __init__(self, github_token, org_name=None, repo_list=None, log_level='INFO', max_workers=10, max_retries=3):
179 |         """
180 |         Initializes the DependencyScanner.
181 |         """
182 |         self.github_token = github_token
183 |         self.org_name = org_name
184 |         self.repo_list = repo_list
185 |         self.max_workers = max_workers
186 |         self.client = GitHubClient(github_token, max_retries)
187 |         self.logger = Logger(log_level)  # Use the custom Logger class
188 |         self.total_vulnerabilities = 0
189 |         self.processed_repos = 0
190 | 
191 |     def generate_csv_report(self, filename=None):
192 |         """Generates a CSV report of vulnerable dependencies."""
193 | 
194 |         if filename is None:
195 |             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
196 |             filename = f"vulnerability_report_{timestamp}.csv"
197 | 
198 |         reports_dir = "reports"
199 |         os.makedirs(reports_dir, exist_ok=True)
200 |         filepath = os.path.join(reports_dir, filename)
201 | 
202 |         all_vulnerabilities = []
203 |         repositories = self.client.get_repositories(self.org_name, self.repo_list)
204 |         if not repositories:
205 |             logging.warning("No repositories found to scan.")
206 |             print("__NO_REPOS__")
207 |             return
208 | 
209 |         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
210 |             future_to_repo = {
211 |                 executor.submit(self.client.get_dependabot_alerts, repo["owner"], repo["name"]): repo
212 |                 for repo in repositories
213 |             }
214 | 
215 |             for future in as_completed(future_to_repo):
216 |                 repo = future_to_repo[future]
217 |                 try:
218 |                     alerts = future.result()
219 |                     self.processed_repos += 1
220 |                     logging.info(f"Processed {repo['owner']}/{repo['name']}: Found {len(alerts)} alerts.")
221 | 
222 |                     # Get *all* current dependency versions from the SBOM *once* per repo
223 |                     current_versions = self.client.get_sbom_dependencies(repo['owner'], repo['name'])
224 | 
225 |                     for alert in alerts:
226 |                         # print(json.dumps(alert, indent=2))  # Uncomment for debugging.
227 |                         try:
228 |                             dependency = alert.get("dependency", {})
229 |                             pkg = dependency.get("package", {})
230 |                             package_name = pkg.get("name", "N/A")
231 | 
232 |                             # --- Use SBOM data for current version ---
233 |                             current_version = current_versions.get(package_name, "N/A")
234 |                             # --- End SBOM data ---
235 | 
236 |                             security_advisory = alert.get("security_advisory", {})
237 |                             # --- Use security_vulnerability, not vulnerabilities array ---
238 |                             security_vulnerability = alert.get("security_vulnerability", {})
239 |                             vulnerable_range = security_vulnerability.get("vulnerable_version_range", "N/A")
240 |                             # --- End Use security_vulnerability ---
241 | 
242 |                             severity = security_advisory.get("severity", "N/A")
243 |                             alert_url = alert.get("html_url", "N/A")  # Get alert URL
244 |                             # Create Excel hyperlink formula
245 |                             severity_link = f'=HYPERLINK("{alert_url}", "{severity}")'
246 | 
247 |                             first_patched = security_vulnerability.get("first_patched_version", {})
248 |                             update_available = first_patched.get("identifier", "N/A") if first_patched else "N/A"
249 | 
250 |                             #print(f"DEBUG: Data before append: {repo['owner']}/{repo['name']}, {package_name}, {current_version}, {vulnerable_range}, {severity}, {update_available}")
251 | 
252 |                             all_vulnerabilities.append({
253 |                                 "Repository Name": f"{repo['owner']}/{repo['name']}",
254 |                                 "Package Name": package_name,
255 |                                 "Current Version": current_version,
256 |                                 "Vulnerable Versions": vulnerable_range,
257 |                                 "Severity": severity_link,  # Use the hyperlink formula
258 |                                 "Update Available": update_available
259 |                             })
260 |                             self.total_vulnerabilities += 1
261 |                         except KeyError as e:
262 |                             logging.warning(f"Missing key in alert data for repo {repo['owner']}/{repo['name']}: {e}. Skipping.")
263 |                             print(f"KeyError: {e}") #KEEP
264 |                             continue
265 |                         except Exception as e:
266 |                             logging.exception(f"Error processing alert data for repo {repo['owner']}/{repo['name']}: {e}. Skipping.")
267 |                             print(f"Other Exception: {e}") #KEEP
268 |                             continue
269 |                 except Exception as e:
270 |                     logging.exception(f"Error processing repo {repo['owner']}/{repo['name']}: {e}")
271 | 
272 |         if not all_vulnerabilities:
273 |             logging.info("No vulnerabilities found.")
274 |             return
275 | 
276 |         with open(filepath, "w", newline="", encoding="utf-8") as csvfile:
277 |             fieldnames = [
278 |                 "Repository Name",
279 |                 "Package Name",
280 |                 "Current Version",
281 |                 "Vulnerable Versions",
282 |                 "Severity",
283 |                 "Update Available",
284 |             ]
285 |             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
286 |             writer.writeheader()
287 |             writer.writerows(all_vulnerabilities)
288 |         logging.info(f"CSV report generated: {filepath}")
289 | 
290 |     def run_scan(self, filename=None):
291 |         """Runs the complete scan and report generation."""
292 |         self.generate_csv_report(filename)
293 | 
294 |     def get_stats(self):
295 |         return {"total": self.total_vulnerabilities, "processed_repos": self.processed_repos}
296 | 
297 | 
298 | def main():
299 |     parser = argparse.ArgumentParser(description="GitHub Dependency Scanner")
300 |     parser.add_argument("--token", required=True, help="GitHub token")
301 |     parser.add_argument("--output", required=True, help="Output CSV file path")
302 |     parser.add_argument("--org", help="GitHub organization name (optional)")
303 |     parser.add_argument("--repo-list", help="Comma-separated list of repositories (optional)")
304 |     parser.add_argument("--log-level", default="INFO", help="Logging level (default: INFO)")
305 |     parser.add_argument("--max-workers", type=int, default=10, help="Maximum concurrent workers (default: 10)")
306 |     parser.add_argument("--max-retries", type=int, default=3, help="Maximum retries for API requests (default: 3)")
307 | 
308 |     args = parser.parse_args()
309 | 
310 |     if not args.org and not args.repo_list and not os.environ.get("GITHUB_REPOSITORY"):
311 |         print("Error: Must specify either --org, --repo-list, or run within a GitHub Actions context.", file=sys.stderr)
312 |         sys.exit(1)
313 | 
314 |     scanner = DependencyScanner(
315 |         github_token=args.token,
316 |         org_name=args.org,
317 |         repo_list=args.repo_list,
318 |         log_level=args.log_level,
319 |         max_workers=args.max_workers,
320 |         max_retries=args.max_retries
321 |     )
322 |     scanner.run_scan(args.output)
323 |     stats = scanner.get_stats()
324 |     print(f"__STATS_START__total={stats['total']},processed_repos={stats['processed_repos']}__STATS_END__")
325 | 
326 | 
327 | if __name__ == "__main__":
328 |     main()
329 |     
330 | 


--------------------------------------------------------------------------------
/scripts/github_secret_scanner.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import requests
  3 | import logging
  4 | import csv
  5 | from concurrent.futures import ThreadPoolExecutor, as_completed
  6 | from datetime import datetime
  7 | import sys
  8 | import time
  9 | from requests.adapters import HTTPAdapter
 10 | from requests.packages.urllib3.util.retry import Retry
 11 | 
 12 | 
 13 | class Logger:
 14 |     _instance = None
 15 | 
 16 |     def __new__(cls, log_level='INFO'):
 17 |         if cls._instance is None:
 18 |             cls._instance = super(Logger, cls).__new__(cls)
 19 |             numeric_level = getattr(logging, log_level.upper(), None)
 20 |             if not isinstance(numeric_level, int):
 21 |                 raise ValueError(f'Invalid log level: {log_level}')
 22 |             logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
 23 |         return cls._instance
 24 | 
 25 | 
 26 | class GitHubClient:
 27 |     def __init__(self, token, max_retries=3):
 28 |         self.token = token
 29 |         self.base_url = "https://api.github.com"
 30 |         self.headers = {"Authorization": f"token {self.token}", "Accept": "application/vnd.github.v3+json"}
 31 |         self.max_retries = max_retries
 32 |         self.session = self._create_session()
 33 |         self.logger = Logger()
 34 |         self.rate_limit_remaining = None
 35 |         self.rate_limit_reset = None
 36 | 
 37 |     def _create_session(self):
 38 |         session = requests.Session()
 39 |         session.headers.update(self.headers)
 40 |         retry_strategy = Retry(
 41 |             total=self.max_retries,
 42 |             backoff_factor=2,
 43 |             status_forcelist=[429, 500, 502, 503, 504],
 44 |             allowed_methods=["GET"]
 45 |         )
 46 |         adapter = HTTPAdapter(max_retries=retry_strategy)
 47 |         session.mount("https://", adapter)
 48 |         session.mount("http://", adapter)
 49 |         return session
 50 | 
 51 |     def _handle_rate_limit(self):
 52 |         if self.rate_limit_remaining is None:
 53 |             self.validate_token()
 54 | 
 55 |         if self.rate_limit_remaining < 10:
 56 |             wait_time = (self.rate_limit_reset - datetime.now()).total_seconds() + 5
 57 |             if wait_time > 0:
 58 |                 logging.info(f"Rate limit approaching. Waiting for {wait_time:.0f} seconds.")
 59 |                 time.sleep(wait_time)
 60 |             self.validate_token()
 61 | 
 62 |     def _request(self, method, url, **kwargs):
 63 |         self._handle_rate_limit()
 64 |         try:
 65 |             response = self.session.request(method, url, **kwargs)
 66 |             response.raise_for_status()
 67 | 
 68 |             if 'X-RateLimit-Remaining' in response.headers:
 69 |                 self.rate_limit_remaining = int(response.headers['X-RateLimit-Remaining'])
 70 |                 self.rate_limit_reset = datetime.fromtimestamp(int(response.headers['X-RateLimit-Reset']))
 71 | 
 72 |             return response
 73 |         except requests.exceptions.RequestException as e:
 74 |             logging.exception(f"Request failed: {e}")
 75 |             raise
 76 | 
 77 |     def validate_token(self):
 78 |         url = f"{self.base_url}/rate_limit"
 79 |         try:
 80 |             response = self.session.get(url)
 81 |             response.raise_for_status()
 82 |             rate_limit = response.json()['resources']['core']
 83 |             self.rate_limit_remaining = rate_limit['remaining']
 84 |             self.rate_limit_reset = datetime.fromtimestamp(rate_limit['reset'])
 85 | 
 86 |             logging.info(f"Rate Limit: {self.rate_limit_remaining} remaining. Reset at {self.rate_limit_reset}")
 87 |         except requests.exceptions.RequestException as e:
 88 |             logging.exception(f"Token validation failed: {e}")
 89 |             raise
 90 | 
 91 |     def fetch_repositories(self, org):
 92 |         repos = []
 93 |         url = f"{self.base_url}/orgs/{org}/repos?per_page=100"
 94 |         try:
 95 |             while url:
 96 |                 response = self._request("GET", url)
 97 |                 repos.extend(response.json())
 98 |                 url = response.links.get('next', {}).get('url')
 99 |         except requests.exceptions.RequestException as e:
100 |             logging.exception(f"Failed to fetch repositories for {org}: {e}")
101 |             raise
102 |         return repos
103 | 
104 |     def fetch_secret_alerts(self, org, repo, state="open"):
105 |         alerts = []
106 |         url = f"{self.base_url}/repos/{org}/{repo}/secret-scanning/alerts?per_page=100&state={state}"
107 |         try:
108 |             while url:
109 |                 response = self._request("GET", url)
110 |                 alerts.extend(response.json())
111 |                 url = response.links.get('next', {}).get('url')
112 |         except requests.exceptions.RequestException as e:
113 |             logging.exception(f"Failed to fetch {state} alerts for {repo}: {e}")
114 |             raise
115 |         return alerts
116 | 
117 | 
118 | class SecretScanner:
119 |     def __init__(self, org, token, output_file, include_inactive=False, log_level='INFO', max_workers=10, max_retries=3):
120 |         self.org = org
121 |         self.token = token
122 |         self.output_file = output_file  # This is now relative to the .github repo
123 |         self.include_inactive = include_inactive
124 |         self.max_workers = max_workers
125 |         self.client = GitHubClient(self.token, max_retries)
126 |         self.logger = Logger(log_level)
127 |         self.total_alerts = 0
128 |         self.inactive_alerts = 0
129 |         self.active_alerts = 0
130 | 
131 |     def generate_report(self):
132 |         try:
133 |             self.client.validate_token()
134 |             repos = self.client.fetch_repositories(self.org)
135 | 
136 |             if not repos:
137 |                 logging.warning("No repositories found in the organization.")
138 |                 print("__NO_REPOS__")
139 |                 return
140 | 
141 |             with open(self.output_file, mode='w', newline='', encoding='utf-8') as file:
142 |                 writer = csv.writer(file)
143 |                 writer.writerow(["Repository", "Alert ID", "Secret Type", "State", "Alert URL", "Created At", "Updated At", "Resolved Reason"])
144 | 
145 |                 with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
146 |                     future_to_repo = {}
147 |                     for repo in repos:
148 |                         future_to_repo[executor.submit(self.client.fetch_secret_alerts, self.org, repo['name'], "open")] = (repo, "open")
149 | 
150 |                     if self.include_inactive:
151 |                         for repo in repos:
152 |                             future_to_repo[executor.submit(self.client.fetch_secret_alerts, self.org, repo['name'], "fixed")] = (repo, "fixed")
153 |                         for repo in repos:
154 |                             future_to_repo[executor.submit(self.client.fetch_secret_alerts, self.org, repo['name'], "resolved")] = (repo, "resolved")
155 | 
156 | 
157 |                     for future in as_completed(future_to_repo):
158 |                         (repo, state) = future_to_repo[future]
159 |                         try:
160 |                             alerts = future.result()
161 |                             logging.info(f"Processing {repo['name']} ({state} alerts): Found {len(alerts)} alerts.")
162 |                             for alert in alerts:
163 |                                 self.total_alerts += 1
164 |                                 if state == "open":
165 |                                     self.active_alerts += 1
166 |                                 else:
167 |                                     self.inactive_alerts += 1
168 | 
169 |                                 resolved_reason = alert.get('resolution_comment') if state == 'resolved' else ''
170 | 
171 |                                 writer.writerow([
172 |                                     repo['name'],
173 |                                     alert['number'],
174 |                                     alert.get('secret_type_display_name', alert.get('secret_type', 'Unknown')),
175 |                                     alert['state'],
176 |                                     alert['html_url'],
177 |                                     alert['created_at'],
178 |                                     alert['updated_at'],
179 |                                     resolved_reason
180 |                                 ])
181 |                         except Exception as e:
182 |                             logging.exception(f"Error processing alerts for {repo['name']}: {e}")
183 | 
184 |             logging.info(f"Report generated: {self.output_file}")
185 |             logging.info(f"Total alerts found: {self.total_alerts}")
186 |             logging.info(f"Active alerts: {self.active_alerts}")
187 |             logging.info(f"Inactive alerts: {self.inactive_alerts}")
188 | 
189 |         except Exception as e:
190 |             logging.exception(f"Failed to generate report: {e}")
191 |             sys.exit(1)
192 | 
193 |     def get_stats(self):
194 |         return {"total": self.total_alerts, "active": self.active_alerts, "inactive": self.inactive_alerts}
195 | 
196 | 
197 | def main():
198 |     parser = argparse.ArgumentParser(description="GitHub Secret Scanner")
199 |     parser.add_argument("--org", required=True, help="GitHub organization name")
200 |     parser.add_argument("--token", required=True, help="GitHub token")
201 |     parser.add_argument("--output", required=True, help="Output CSV file path")  # Now relative path
202 |     parser.add_argument("--include-inactive", action='store_true', help="Include inactive alerts in the report")
203 |     parser.add_argument("--log-level", default="INFO", help="Logging level")
204 |     parser.add_argument("--max-workers", type=int, default=10, help="Maximum concurrent workers")
205 |     parser.add_argument("--max-retries", type=int, default=3, help="Maximum retries for API requests")
206 | 
207 |     args = parser.parse_args()
208 | 
209 |     try:
210 |         scanner = SecretScanner(args.org, args.token, args.output, args.include_inactive, args.log_level, args.max_workers, args.max_retries)
211 |         scanner.generate_report()
212 |         stats = scanner.get_stats()
213 |         print(f"__STATS_START__total={stats['total']},active={stats['active']},inactive={stats['inactive']}__STATS_END__")
214 | 
215 |     except Exception as e:
216 |         logging.exception(f"An error occurred: {e}")
217 |         sys.exit(1)
218 | 
219 | 
220 | if __name__ == "__main__":
221 |     main()
222 |     
223 | 


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | 


--------------------------------------------------------------------------------
/signatures/CLA.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "signedContributors": [
 3 |     {
 4 |       "name": "normansden",
 5 |       "id": 159940766,
 6 |       "comment_id": 2894313349,
 7 |       "created_at": "2025-05-20T12:58:48Z",
 8 |       "repoId": 921107922,
 9 |       "pullRequestNo": 14
10 |     }
11 |   ]
12 | }


--------------------------------------------------------------------------------
/templates/cla-trigger-template.yml:
--------------------------------------------------------------------------------
 1 | name: CLA Check Trigger Template
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types: [opened, synchronize, closed]
 6 |   issue_comment:
 7 |     types: [created]
 8 | jobs:
 9 |   cla-check-trigger:
10 |     uses: ${{ github.repository_owner }}/.github/.github/workflows/cla-workflow.yml@main
11 |     secrets: inherit
12 | 


--------------------------------------------------------------------------------