├── COPYING ├── README.md └── githump.sh /COPYING: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # githump 2 | 3 | Simple bash script that loots email addresses from commit entries. Email addresses are set via user config when pushing changes up to github. Running `git log` against a repository shows a list of commits, from which email addresses can be parsed. The `githump` script enumerates all repositories for a target organization or user and then extracts email addresses from the commit logs of each repository. Finally, all unique emails are extracted from the intermediary results and saved off in the `results` directory. 4 | 5 | # Usage 6 | 7 | Usage is easy: `./githump.sh ` where `` is the github account username. For example, `./githump.sh SalesforceEng` to target everything at https://github.com/SalesforceEng. 8 | 9 | # TODO 10 | 11 | Future improvements to be enumerated here. 12 | 13 | ## Historical Aggregation 14 | 15 | This is more of a documentation and calling issue. Write up instructions for running as a repeated task and collecting email addresses historically. This could be done by pushing result commits up to bitbucket or another version control repository. 16 | 17 | ## New Results Only 18 | 19 | The JSON results from the github API include an `updated_at` key-value pair. If running `githump` daily and aggregating results, a check should be added to only clone and search repositories that have been updated since the previous run. 20 | 21 | -------------------------------------------------------------------------------- /githump.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ----------------------------------------------------------- 3 | # githump clones all repositories for a specified user/org 4 | # then extracts all unique authors from the commit history. 5 | # ----------------------------------------------------------- 6 | 7 | 8 | # ----------------------------------------------------------- 9 | # color configuration 10 | # ----------------------------------------------------------- 11 | blue=$(tput setaf 4) 12 | green=$(tput setaf 2) 13 | red=$(tput setaf 1) 14 | rst=$(tput sgr0) 15 | 16 | error="$red-$rst" 17 | info="$blue*$rst" 18 | success="$green+$rst" 19 | 20 | 21 | # ----------------------------------------------------------- 22 | # runtime configuration 23 | # ----------------------------------------------------------- 24 | count=0 25 | temp_dir="/tmp/githump" 26 | 27 | 28 | # ----------------------------------------------------------- 29 | # utility logging functions 30 | # ----------------------------------------------------------- 31 | function log_error() { 32 | echo "[$error] ${1}" 33 | } 34 | 35 | function log_info() { 36 | echo "[$info] ${1}" 37 | } 38 | 39 | function log_success() { 40 | echo "[$success] ${1}" 41 | } 42 | 43 | 44 | # ----------------------------------------------------------- 45 | # welcome banner 46 | # ----------------------------------------------------------- 47 | function welcome() { 48 | log_success "githump: Loaded at $(date)" 49 | } 50 | 51 | 52 | # ----------------------------------------------------------- 53 | # print usage and exit 54 | # ----------------------------------------------------------- 55 | function usage() { 56 | log_error "Missing required target organization or user." 57 | log_error "Org or user is the account name from https://github.com/" 58 | log_error "Example: $0 rapid7 (for https://github.com/rapid7)" 59 | log_error "Usage: $0 " 60 | exit 1 61 | } 62 | 63 | 64 | # ----------------------------------------------------------- 65 | # grab the list of repos via the /orgs api 66 | # ----------------------------------------------------------- 67 | function get_org_emails() { 68 | curl -s "https://api.github.com/orgs/${1}/repos" | grep html_url | sort | uniq | awk -F \" '{print $4}' | tail -n +2 | while read repo; do 69 | # ----------------------------------------------------------- 70 | # set up the results directory and file 71 | # ----------------------------------------------------------- 72 | repo_dir=$(basename "${repo}") 73 | output_dir="${temp_dir}/${1}/${repo_dir}" 74 | output_file="${output_dir}/${repo_dir}.results" 75 | mkdir -p "${output_dir}" 76 | 77 | # ----------------------------------------------------------- 78 | # clone the repo and extract email addresses 79 | # ----------------------------------------------------------- 80 | git clone -n -q --no-checkout --filter=blob:none "${repo}" 81 | cd ${repo_dir} 82 | git log --all | grep "^Author:" | sort | uniq | grep -E -o "\b[a-zA-Z0-9_.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> "${output_file}" 83 | 84 | # ----------------------------------------------------------- 85 | # update user with status 86 | # ----------------------------------------------------------- 87 | total=$(git log --all | grep "^Author:" | sort | uniq | grep -E -o "\b[a-zA-Z0-9_.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" | wc -l) 88 | [ $total -gt 0 ] && log_success "Dumped ${total} email addresses to ${output_file}" 89 | 90 | # ----------------------------------------------------------- 91 | # remove the repo 92 | # ----------------------------------------------------------- 93 | cd .. 94 | rm -rf "${repo_dir}" 95 | done 96 | } 97 | 98 | 99 | # ----------------------------------------------------------- 100 | # grab the list of repos via the /orgs api 101 | # ----------------------------------------------------------- 102 | function get_user_emails() { 103 | curl -s "https://api.github.com/users/${1}/repos" | grep html_url | sort | uniq | awk -F \" '{print $4}' | tail -n +2 | while read repo; do 104 | # ----------------------------------------------------------- 105 | # set up the results directory and file 106 | # ----------------------------------------------------------- 107 | repo_dir=$(basename "${repo}") 108 | output_dir="${temp_dir}/${1}/${repo_dir}" 109 | output_file="${output_dir}/${repo_dir}.results" 110 | mkdir -p "${output_dir}" 111 | 112 | # ----------------------------------------------------------- 113 | # clone the repo and extract email addresses 114 | # ----------------------------------------------------------- 115 | git clone -n -q --no-checkout --filter=blob:none "${repo}" 116 | cd ${repo_dir} 117 | git log --all | grep "^Author:" | sort | uniq | grep -E -o "\b[a-zA-Z0-9_.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> "${output_file}" 118 | 119 | # ----------------------------------------------------------- 120 | # update user with status 121 | # ----------------------------------------------------------- 122 | total=$(git log --all | grep "^Author:" | sort | uniq | grep -E -o "\b[a-zA-Z0-9_.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" | wc -l) 123 | [ $total -gt 0 ] && log_success "Dumped ${total} email addresses to ${output_file}" 124 | 125 | # ----------------------------------------------------------- 126 | # remove the repo 127 | # ----------------------------------------------------------- 128 | cd .. 129 | rm -rf "${repo_dir}" 130 | done 131 | } 132 | 133 | 134 | # ----------------------------------------------------------- 135 | # display welcome message 136 | # ----------------------------------------------------------- 137 | welcome 138 | 139 | # ----------------------------------------------------------- 140 | # print usage and exit if no targets specified 141 | # ----------------------------------------------------------- 142 | [ $# -eq 0 ] && usage 143 | 144 | 145 | # ----------------------------------------------------------- 146 | # begin the acquisition 147 | # ----------------------------------------------------------- 148 | mkdir -p results 149 | for target in ${BASH_ARGV[*]}; do 150 | # ----------------------------------------------------------- 151 | # sanity check the target name format 152 | # ----------------------------------------------------------- 153 | match=$(echo "${target}" | grep -E -i "^[a-z0-9]([a-z0-9]?|-([a-z0-9]+)){0,38}$") 154 | if [[ -z ${match} ]]; then 155 | log_error "${target} did not match the GitHub username format requirement." 156 | continue 157 | fi 158 | 159 | # ----------------------------------------------------------- 160 | # collect the emails from the repositories 161 | # ----------------------------------------------------------- 162 | log_info "Beginning collection for $target. This may take a while." 163 | get_org_emails $target 164 | get_user_emails $target 165 | 166 | # ----------------------------------------------------------- 167 | # accumulate all the unique emails 168 | # ----------------------------------------------------------- 169 | address_count=$(find "${temp_dir}/${target}" -name "*.results" -type f -exec cat "{}" + | sort | uniq | wc -l) 170 | find "${temp_dir}/${target}" -name "*.results" -type f -exec cat "{}" + | sort | uniq > "./results/${target}.txt" 171 | rm -rf "${temp_dir}/${target}" 172 | log_success "Collected ${address_count} emails for $target, stored in ./results/${target}.txt" 173 | 174 | # ----------------------------------------------------------- 175 | # update user with number of targets remaining 176 | # ----------------------------------------------------------- 177 | count=$(($count + 1)) 178 | log_info "$(($# - ${count})) remaining." 179 | done 180 | 181 | # ----------------------------------------------------------- 182 | # clean up the working directory and exit 183 | # ----------------------------------------------------------- 184 | rm -rf "${temp_dir}" 185 | exit 0 186 | --------------------------------------------------------------------------------