├── Logs ├── importedDBS.log └── DELETE_ME.txt ├── data └── DELETE_ME.txt ├── OutputFiles └── DELETE_ME.txt ├── PutYourDataBasesHere └── example.txt ├── dependencies.sh ├── LICENSE.md ├── query.sh ├── decompress.sh ├── compress.sh ├── folderPrimer.py ├── benchmark.py ├── Import.sh ├── README.md ├── run.sh ├── search.sh └── pysort.py /Logs/importedDBS.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Logs/DELETE_ME.txt: -------------------------------------------------------------------------------- 1 | This file is just here so github doesn't delete the directory from the repository -------------------------------------------------------------------------------- /data/DELETE_ME.txt: -------------------------------------------------------------------------------- 1 | This file is just here so github doesn't delete the directory from the repository -------------------------------------------------------------------------------- /OutputFiles/DELETE_ME.txt: -------------------------------------------------------------------------------- 1 | This file is just here so github doesn't delete the directory from the repository -------------------------------------------------------------------------------- /PutYourDataBasesHere/example.txt: -------------------------------------------------------------------------------- 1 | test@example.com:Password1 2 | admin@example.com:SUperSeCreTPassswrd1 3 | Michael@dundermifflin.com:Sc0tt 4 | Password17:Mark@facebook.com 5 | -------------------------------------------------------------------------------- /dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 2/18/2019 6 | #Usage: ./dependencies.sh 7 | 8 | RED='\033[0;31m' 9 | GREEN='\033[0;32m' 10 | NC='\033[0m' # No Color 11 | 12 | if [ "${PWD##*/}" == "BaseQuery" ];then 13 | sudo chmod 755 -R $(pwd) 14 | sudo apt-get update -y 15 | sudo apt-get install python3.7 -y 16 | sudo apt-get install tar -y 17 | sudo apt-get install zstd -y 18 | sudo apt-get install xterm -y 19 | echo 20 | printf "${GREEN}[+]${NC} Finished downloading!\n" 21 | else 22 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 23 | fi 24 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2019 https://github.com/g666gle 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /query.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 1/29/2019 6 | #Usage: ./query.sh test@example.com 7 | #Usage: ./query.sh test@ 8 | #Usage: ./query.sh @example.com 9 | #Usage: ./query.sh /home/user/Desktop/file.txt 10 | 11 | RED='\033[0;31m' 12 | YELLOW='\033[1;33m' 13 | GREEN='\033[0;32m' 14 | NC='\033[0m' # No Color 15 | 16 | # Checks to see if the user forgot to enter input 17 | if [ $# -eq 1 ];then 18 | if [ "${PWD##*/}" == "BaseQuery" ];then 19 | # Checks to see if the file exists in the working directory 20 | if ! [ -e "$1" ];then 21 | # Only one email 22 | ./search.sh "$1" 23 | else 24 | # A file was inputed 25 | filename="$(echo $1 | rev | cut -f 1 -d "/" | rev)" #test.txt 26 | printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/""$(echo $filename | cut -f 1 -d "." )""_output.txt${NC}\n" 27 | cat "$1" | while read -r email;do 28 | #echo 29 | # The first param is the email address the second is telling ./search that it 30 | # is a file so the user is not prompted 31 | ./search.sh "$email" "$filename" 32 | 33 | done 34 | fi 35 | else 36 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 37 | fi 38 | else 39 | printf "${YELLOW}[!]${NC} Please enter one email address or a file with one email address per line\n" 40 | fi 41 | -------------------------------------------------------------------------------- /decompress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 2/16/2019 6 | #Usage: ./decompress.sh 7 | #Usage: ./decompress.sh 8 | #Example: ./decompress.sh 0.tar.zst 9 | 10 | RED='\033[0;31m' 11 | YELLOW='\033[1;33m' 12 | NC='\033[0m' # No Color 13 | 14 | # Make sure the user is in the BaseQuery directory 15 | if [ "${PWD##*/}" == "BaseQuery" ];then 16 | # Check if no args were passed in; then decompress everything 17 | if [ $# -eq 0 ];then 18 | # Iterate through all the compressed files 19 | find data/ -type f -name "*.tar.zst" | sort | while read -r compressed_file; do 20 | # check to make sure you dont decompress the working directory 21 | if [ "$compressed_file" != "data/" ];then 22 | # Grabs the name of the file from the path 23 | name="$(echo $compressed_file | cut -f 2- -d "/" | cut -f 1 -d ".")" 24 | # decompress the .tar.zst files 25 | #tar --use-compress-program=zstd -xf ./data/0.tar.zst 26 | tar --use-compress-program=zstd -xf ./data/$name.tar.zst 27 | # remove the old compressed files 28 | rm -rf data/"$name".tar.zst 29 | fi 30 | done 31 | elif [ $# -eq 1 ];then 32 | # make sur you dont try and decompress the working directory 33 | if [ "$1" != "data/" ];then 34 | # decompress the .tar.zst files 35 | #tar --use-compress-program=zstd -xf ./data/0.tar.zst 36 | tar --use-compress-program=zstd -xf ./data/"$1" 37 | # remove the old compressed files 38 | rm -rf data/"$1" 39 | fi 40 | else # Wrong input 41 | printf "${RED}[!]${NC} Usage Error: ./decompress.sh \n" 42 | printf "${RED}[!]${NC} Usage Error: ./decompress.sh \n" 43 | 44 | printf "[!] Usage Error: ./decompress.sh \n" >> ./Logs/ActivityLogs.log 45 | printf "[!] Usage Error: ./decompress.sh \n" >> ./Logs/ActivityLogs.log 46 | 47 | fi 48 | else 49 | # If the users working directory is not BaseQuery while trying to run the script 50 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 51 | printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log 52 | fi 53 | -------------------------------------------------------------------------------- /compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 2/16/2019 6 | #Usage: ./compress.sh 7 | 8 | RED='\033[0;31m' 9 | YELLOW='\033[1;33m' 10 | NC='\033[0m' # No Color 11 | 12 | # Make sure the user is in the BaseQuery directory 13 | if [ "${PWD##*/}" == "BaseQuery" ];then 14 | let orig_bytes=0 15 | declare -a arr 16 | # Find all of the uncompressed directories 17 | while read -r uncompressed_dir; do 18 | arr=(${uncompressed_dir}) 19 | if [ "$uncompressed_dir" != "data/" ];then 20 | file_bytes=$(du -sb "$uncompressed_dir"/ | cut -f 1) 21 | let orig_bytes=$orig_bytes+$file_bytes 22 | name="$(echo $uncompressed_dir | cut -f 2- -d "/")" 23 | #tar --use-compress-program=zstd -cf data/0.tar.zst data/0 24 | tar --use-compress-program=zstd -cf data/"$name".tar.zst "$uncompressed_dir" 25 | rm -rf "$uncompressed_dir" 26 | fi 27 | _constr+="${arr[2]}" 28 | done< <(find data/ -maxdepth 1 -type d | sort) 29 | 30 | compressed_bytes=$(du -sb data/ | cut -f 1) 31 | if [[ $orig_bytes -ne 0 && $compressed_bytes -ne 0 ]];then 32 | comp_div_ori=$( awk -v orig=$orig_bytes -v comp=$compressed_bytes 'BEGIN{printf("%.2f\n",comp/orig*100)}' ) 33 | multiples_compressed=$(( $orig_bytes/$compressed_bytes )) 34 | echo 35 | printf "${RED}[*] Your data is $multiples_compressed""x times smaller! (~$comp_div_ori%% of the original size)${NC}\n" 36 | printf "${YELLOW}[!] Original number of bytes $orig_bytes${NC}\n" 37 | printf "${YELLOW}[!] Compressed number of bytes $compressed_bytes${NC}\n" 38 | 39 | printf "[*] Your data is $multiples_compressed""x times smaller! (~$percentage_compressed%% of the original size)\n" >> ./Logs/ActivityLogs.log 40 | printf "[!] Original number of bytes $orig_bytes\n" >> ./Logs/ActivityLogs.log 41 | printf "[!] Compressed number of bytes $compressed_bytes\n" >> ./Logs/ActivityLogs.log 42 | fi 43 | 44 | else 45 | # If the users working directory is not BaseQuery while trying to run the script 46 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 47 | 48 | printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log 49 | fi 50 | 51 | 52 | -------------------------------------------------------------------------------- /folderPrimer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import time 4 | 5 | """ 6 | Author Github: https://github.com/g666gle 7 | Author Twitter: https://twitter.com/g666gle1 8 | Date: 1/29/2019 9 | Description: This file creates a triple nested [A-Z] and [0-9] directories so the data from the databases are easily 10 | accessible. The reason to do this before instead of while pysort.py is placing all the files is due to 11 | the fact that creating directories is slightly time consuming and by creating them all at once instead 12 | of on the fly. We can expect to see a small efficiency improvement. Also.... organization 13 | Usage: python3 folderPrimer.py 14 | Version: 1.5.0 15 | Python Version: 3.7.1 16 | """ 17 | 18 | path = os.getcwd() 19 | 20 | 21 | def folder_spam(): 22 | """ 23 | This function creates all the nested files needed to store the data. [A-Z][0-9] 24 | :return: N/A 25 | """ 26 | first_nest = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9') 27 | second_nest = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9') 28 | third_nest = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9') 29 | 30 | for char in first_nest: # Creating the first nesting of the folders 31 | if not os.path.isdir(path + "/data/" + char.strip()): 32 | os.makedirs(path + "/data/" + char.strip()) 33 | for char in first_nest: # Creating the second nesting of the folders 34 | for char2 in second_nest: 35 | if not os.path.isdir(path + "/data/" + char.strip() + "/" + char2.strip()): 36 | os.makedirs(path + "/data/" + char.strip() + "/" + char2.strip()) 37 | for char in first_nest: # Creating the third nesting of the folders 38 | for char2 in second_nest: 39 | for char3 in third_nest: 40 | if not os.path.isdir(path + "/data/" + char.strip() + "/" + char2.strip() + "/" + char3.strip()): 41 | os.makedirs(path + "/data/" + char.strip() + "/" + char2.strip() + "/" + char3.strip()) 42 | 43 | 44 | if __name__ == '__main__': 45 | GREEN = '\033[0;32m' 46 | YELLOW = '\033[1;33m' 47 | NC = '\033[0m' # No Color 48 | 49 | print() 50 | print(GREEN + "[+]" + NC + "Priming the data directory") 51 | start_time = time.time() 52 | folder_spam() 53 | end_time = time.time() 54 | print(GREEN + "[+]" + NC + " Data directory finished being primed!") 55 | print(YELLOW + "[!]" + NC + " Action took " + str(int(end_time - start_time)) + " seconds") 56 | print() 57 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import time 6 | from pysort import place_data 7 | 8 | """ 9 | Author Github: https://github.com/g666gle 10 | Author Twitter: https://twitter.com/g666gle1 11 | Date: 1/29/2019 12 | Description: This file is used by run.sh to calculate the amount of time it will take the current hardware to process 13 | the user specified amount of lines. This is important because each user has different hardware. Import 14 | mainly relies on the type of CPU and how many cores it has. On an Intel i7-7700 processor and 16GB of 15 | RAM you can expect results of around 18000 lines per second while your laptop is plugged in and 12000 16 | lines per second while your laptop is not plugged in. 17 | 18 | Usage: python3 folderPrimer.py 19 | Usage: python3 folderPrimer.py file.txt 1000000 20 | Version: 1.5.0 21 | Python Version: 3.7.1 22 | """ 23 | 24 | if __name__ == '__main__': 25 | # the arguments passed in 26 | args = sys.argv 27 | path = os.getcwd() 28 | written_lines = 0 29 | # The amount of lines written in 1 Second 30 | total_lines = 0 31 | RED = '\033[0;31m' 32 | GREEN = '\033[0;32m' 33 | YELLOW = '\033[1;33m' 34 | NC = '\033[0m' # No Color 35 | start_time = time.time() 36 | 37 | # Check to see if the arguments are correct 38 | if len(args) == 3 and args[1] != "" and args[2] != "": 39 | # Grab the users imputed amount of lines 40 | amt_lines = args[2] 41 | # Directory guaranteed to exist from previous check in Import.sh 42 | with open(path + "/PutYourDataBasesHere/" + args[1], 'r') as fp: 43 | try: 44 | for line in fp: 45 | # go through as many lines as you can in 2 second 46 | if (time.time() - start_time) <= 2: 47 | written_lines += place_data(line.strip(), path) 48 | total_lines += 1 49 | else: 50 | break 51 | except Exception as e: 52 | print(RED + "Exception: " + str(e) + NC) 53 | 54 | # The seconds is ( ( 2 * ( user imputed lines ) ) / ( amount of lines processed in 2 seconds ) ) 55 | secs = (2 * int(amt_lines)) / int(total_lines) 56 | mins = secs / 60 57 | hours = mins / 60 58 | days = hours / 24 59 | years = days / 364 60 | os.system('cls' if os.name == 'nt' else 'clear') 61 | print(YELLOW + "[!]" + NC + " Your computer can process " + RED + str(total_lines / 2) + NC + " lines per second!") 62 | print(YELLOW + "[!] For the best results make sure your laptop is pluged into a power source!" + NC) 63 | print() 64 | print(GREEN + "[+]" + NC + " To import " + GREEN + amt_lines + NC + " lines") 65 | print(GREEN + "[+]" + NC + " You can expect an import time of around " + GREEN + "{}".format('%.2f' % secs) + NC + " seconds which is...") 66 | print(" " + GREEN + "{}".format('%.2f' % mins) + NC + " minutes which is...") 67 | print(" " + GREEN + "{}".format('%.2f' % hours) + NC + " hours which is...") 68 | print(" " + GREEN + "{}".format('%.2f' % days) + NC + " days which is...") 69 | print(" " + GREEN + "{}".format('%.2f' % years) + NC + " years") 70 | 71 | 72 | else: 73 | print(YELLOW + "[!]" + NC + " Invalid arguments provided") 74 | -------------------------------------------------------------------------------- /Import.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 1/29/2019 6 | #Usage: ./Import 7 | 8 | RED='\033[0;31m' 9 | YELLOW='\033[1;33m' 10 | GREEN='\033[0;32m' 11 | NC='\033[0m' # No Color 12 | 13 | printf "${RED}[*]${NC} Starting at $(date)\n" 14 | 15 | #Checks to see if the user is working out of the BaseQuery directory 16 | if [ "${PWD##*/}" == "BaseQuery" ];then 17 | # Checks to see if the Import directory is there 18 | if [ -d ./PutYourDataBasesHere ];then 19 | dataDir="$(pwd)" 20 | 21 | # This loop is checking to see if any new files are in the PutYourDataBasesHere 22 | # directory.If not then there is no reason to decompress and compress everything 23 | let i=0 # used to count the amount of files not already imported 24 | declare -a arr 25 | while read -r inputfile;do 26 | arr=(${inputfile}) 27 | file_SHA_sum="$(sha256sum "$dataDir"/PutYourDataBasesHere/"$inputfile" | awk '{print$1}')" 28 | # check to see if the database has already been imported 29 | if [ "$(grep "$file_SHA_sum" -c < ./Logs/importedDBS.log)" == "0" ];then 30 | let i=i+1 31 | fi 32 | _constr+="${arr[2]}" 33 | done< <(find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/") 34 | 35 | # if there are files that need to be imported 36 | if [ $i -ne 0 ];then 37 | # decompress all of the folders before priming and import 38 | printf "${YELLOW}[!]${NC} Decompressing all stored data\n" 39 | printf "[!] Decompressing all stored data\n" >> ./Logs/ActivityLogs.log 40 | ./decompress.sh 41 | printf "${GREEN}[+]${NC} Finished decompressing!\n" 42 | printf "[+] Finished decompressing!\n" >> ./Logs/ActivityLogs.log 43 | 44 | # Prime the data folder 45 | python3 folderPrimer.py 46 | 47 | # Read each file in the input files, in sorted order 48 | find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/" | while read -r inputfile;do 49 | file_SHA_sum="$(sha256sum "$dataDir"/PutYourDataBasesHere/"$inputfile" | awk '{print$1}')" 50 | # check to see if the database has already been imported 51 | if [ "$(grep "$file_SHA_sum" -c < ./Logs/importedDBS.log)" == "0" ];then 52 | # Call a python script to iterate through the file and sort them 53 | python3 pysort.py "$inputfile" 54 | printf "${YELLOW}[!] Adding $inputfile to importedDBS.log${NC}\n" 55 | echo "$file_SHA_sum" "$(date)" "$inputfile" >> "$dataDir"/Logs/importedDBS.log 56 | echo 57 | else 58 | printf "${YELLOW}[!]${NC} $inputfile SHASUM found in importedDBS.log\n" 59 | printf "[!] $inputfile SHASUM found in importedDBS.log\n" >> ./Logs/ActivityLogs.log 60 | fi 61 | done 62 | printf "${YELLOW}[!]${NC} Compressing all data\n" 63 | printf "[!] Compressing all data\n" >> ./Logs/ActivityLogs.log 64 | # All data is stored. Time to compress 65 | ./compress.sh 66 | printf "${GREEN}[+]${NC} Finished compressing!\n" 67 | printf "[+] Finished compressing!\n" >> ./Logs/ActivityLogs.log 68 | 69 | else # No new files found 70 | echo 71 | printf "${RED}ERROR:${NC} No new files found in the 'PutYourDataBasesHere' directory \n" 72 | printf "ERROR: No new files found in the 'PutYourDataBasesHere' directory \n" >> ./Logs/ActivityLogs.log 73 | 74 | fi # check for imported files 75 | 76 | else # If the Import directory doesn't exist 77 | dataDir=$(pwd) 78 | printf "${RED}ERROR: Please make a directory called 'PutYourDataBasesHere' in $dataDir${NC}\n" 79 | printf "ERROR: Please make a directory called 'PutYourDataBasesHere' in $dataDir\n" >> ./Logs/ActivityLogs.log 80 | fi 81 | else 82 | # If the users working directory is not BaseQuery while trying to run the script 83 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 84 | printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log 85 | fi 86 | echo 87 | printf "${RED}[*]${NC} Completed\n" 88 | printf "[*] Completed\n" >> ./Logs/ActivityLogs.log 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | BaseQuery V1.5 3 |

4 | 5 |

6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |

19 | 20 | Your private data is being traded and sold all over the internet as we speak. Tons of leaks come out on a daily basis which can make you feel powerless. The majority of user-passwords and other sensitive information have been posted somewhere on the internet/darknet for any prying eyes to see, whether you like it or not. To take more control of what personal info is out there you can use [Haveibeenpwned](https://haveibeenpwned.com/) to narrow down which breaches your information has been exposed in. This is a great start but what if you want to know exactly what information of yours other people have access to? BaseQuery is an all in one program that makes importing and searching through thousands of data-breaches easy. 21 | 22 | 23 | ![basequery_banner](https://user-images.githubusercontent.com/47184892/53661764-272e8380-3c2f-11e9-8303-763cf00c27ab.png) 24 | ### Features Included: 25 | * A 4x nested storage structure 26 | * Average import speeds of 12,000+ entries per second (Intel Core i7-7700HQ CPU @ 2.8GHz) 27 | * Instantaneous querying system 28 | * Facebook's zstd lossless compression algorithm to reduce the size of the data (On average reduces the data to less than 10% of the original size) 29 | * Calculate the time all your files will take to import based on your specific hardware 30 | * Duplicate data protection 31 | * Output all of your findings in a standard format 32 | * Email harvesting built-in 33 | 34 | ## Installing 35 | 36 | To Install BaseQuery type the following commands 37 | 38 | ``` 39 | git clone https://github.com/g666gle/BaseQuery.git 40 | sudo chmod 755 -R BaseQuery/ 41 | cd BaseQuery 42 | ./dependencies.sh 43 | ./run.sh 44 | ``` 45 | 46 | 47 | ## Getting Started 48 | 1. Place any databases that you have into the "PutYourDataBasesHere" folder 49 | - As of right now, BaseQuery can only accept files in the format where each line is colon seperated "test@example.com:password" or "password:test@example.com" 50 | - It doesn't matter if the line formats are mixed up within the same file. Ex) The first line may be "email:password" and the second line can be "password:email" 51 | - One entry per line!! 52 | - If you need a better visual there is an example.txt file in the folder "PutYourDataBasesHere" 53 | - You should delete the example file before running the program. 54 | 1. Now that you have all of your files in the correct folder 55 | - Open up a terminal in the BaseQuery directory. 56 | - Type ./dependencies.sh to install all of the resources needed ( You only need to do this once ) 57 | - Type ./run.sh to start the program 58 | - **Note that if you are using a laptop make sure it is plugged in. Importing databases uses A LOT of processing power and will make the import 4 times faster on average!** 59 | 1. Follow the instructions on the screen 60 | - That's it, enjoy! 61 | - Contact me with any issues. 62 | 63 | ### Import Times Based on Hardware Specifics 64 | 65 | 66 | 67 | 68 | ### Query Options 69 | 70 | ![basequery_query](https://user-images.githubusercontent.com/47184892/53662460-f0596d00-3c30-11e9-8ac6-f0b154ad22b7.PNG) 71 | 72 | *** 73 | ## Prerequisites 74 | **Note: All of these are automatically installed using the 'dependencies.sh' script** 75 | 76 | ``` 77 | Update packages: (sudo apt-get update) 78 | 79 | Python Version 3.6+ (sudo apt-get install python3.7) 80 | Bash 4+ 81 | tar (sudo apt-get install tar) 82 | zstd (sudo apt-get install zstd) 83 | xterm (sudo apt-get install xterm) 84 | ``` 85 | 86 | 87 | ## Built With 88 | 89 | * Ubuntu 18.04 bionic 90 | 91 | * Bash Version: 92 | GNU bash, version 4.4.19(1)-release (x86_64-pc-linux-gnu) 93 | 94 | * Python Version: 95 | 3.7.1 96 | 97 | ## Authors 98 | 99 | * **G666gle** - [Github](https://github.com/G666gle), [Twitter](https://twitter.com/g666g1e) 100 | 101 | 102 | ## License 103 | 104 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details 105 | 106 | ## Disclaimer 107 | 108 | **READ UP ON YOUR LOCAL LAWS FIRST BEFORE USING THIS PROGRAM. I TAKE NO RESPONSIBILITY FOR ANYTHING YOU DO WITH BASEQUERY. UNDER NO CIRCUMSTANCE SHOULD BASEQUERY BE USED FOR ILLEGAL PURPOSES.** 109 | 110 | 111 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 1/29/2019 6 | #Usage: ./run.sh 7 | 8 | RED='\033[0;31m' 9 | GREEN='\033[0;32m' 10 | YELLOW='\033[1;33m' 11 | NC='\033[0m' # No Color 12 | 13 | #change the window size to fit the art 14 | # requires xterm as a dependency 15 | resize -s 25 134 16 | 17 | #ctrl+C 18 | trap finish INT 19 | 20 | function finish { 21 | printf "[*] Exit Trap Reached (CTRL+C)\n" >> ./Logs/ActivityLogs.log 22 | clear 23 | exit 24 | } 25 | 26 | clear 27 | echo " _ _ _ _ _ _ _ _ _ _ " 28 | echo " / /\ / /\ / /\ /\ \ /\ \ /\_\ /\ \ /\ \ /\ \ /\_\ " 29 | echo " / / \ / / \ / / \ / \ \ / \ \ / / / _ / \ \ / \ \\\\ \ \ / / / " 30 | echo " / / /\ \ / / /\ \ / / /\ \__ / /\ \ \ / /\ \ \ \ \ \__ /\_\ / /\ \ \ / /\ \ \\\\ \ \_/ / / " 31 | echo " / / /\ \ \ / / /\ \ \ / / /\ \___\ / / /\ \_\ / / /\ \ \ \ \___\ / / // / /\ \_\ / / /\ \_\\\\ \___/ / " 32 | echo " / / /\ \_\ \ / / / \ \ \ \ \ \ \/___// /_/_ \/_/ / / / \ \_\ \__ / / / // /_/_ \/_/ / / /_/ / / \ \ \_/ " 33 | echo " / / /\ \ \___\ / / /___/ /\ \ \ \ \ / /____/\ / / / _ / / / / / / / / // /____/\ / / /__\/ / \ \ \ " 34 | echo " / / / \ \ \__/ / / /_____/ /\ \ _ \ \ \ / /\____\/ / / / /\ \/ / / / / / / // /\____\/ / / /_____/ \ \ \ " 35 | echo " / / /____\_\ \ / /_________/\ \ \ /_/\__/ / / / / /______ / / /__\ \ \/ / / /___/ / // / /______ / / /\ \ \ \ \ \ " 36 | echo " / / /__________\/ / /_ __\ \_\\\\ \/___/ / / / /_______\ / / /____\ \ \/ / /____\/ // / /_______\/ / / \ \ \ \ \_\ " 37 | echo " \/_____________/\_\___\ /____/_/ \_____\/ \/__________/ \/________\_\/\/_________/ \/__________/\/_/ \_\/ \/_/ " 38 | echo 39 | 40 | 41 | #Make sure that the user is in the BaseQuery directory 42 | if [ "${PWD##*/}" == "BaseQuery" ];then 43 | 44 | #Log entry 45 | echo "[*] Executed run.sh [ $(date) ]" >> ./Logs/ActivityLogs.log 46 | 47 | while true;do 48 | echo 49 | echo "Options:" 50 | echo " [1] Import Your data" 51 | echo " [2] Calculate Import Time" 52 | echo " [3] Query" 53 | echo " [4] Harvest Email Addresses" 54 | echo " [5] Message" 55 | echo " [Q] Quit" 56 | echo 57 | read -p "Option Number-> " answer 58 | 59 | # Check to see if the answer is only letters 60 | if [[ "$answer" =~ ^[a-zA-Z]+$ ]];then 61 | if [[ "$answer" == [Qq] ]];then 62 | # Log entry 63 | echo "[*] run.sh COMMAND 'q' [ $(date) ]" >> ./Logs/ActivityLogs.log 64 | echo >> ./Logs/ActivityLogs.log 65 | clear 66 | exit 67 | fi 68 | 69 | # Check to see if the answer is only numbers 70 | elif [[ "$answer" =~ ^[0-9]+$ ]];then 71 | 72 | if [ "$answer" -eq 1 ];then 73 | #Log entry 74 | echo "[+] run.sh COMMAND '1' [ $(date) ]" >> ./Logs/ActivityLogs.log 75 | echo "[!] Executing ./Import.sh [ $(date) ]" >> ./Logs/ActivityLogs.log 76 | start=$SECONDS 77 | ./Import.sh 78 | stop=$SECONDS 79 | difference=$(( stop - start )) 80 | printf "${GREEN}[!]${NC} The entire import including compression and decompression took $difference seconds\n" 81 | echo 82 | 83 | elif [ "$answer" -eq 2 ];then 84 | echo 85 | printf "${YELLOW}Make sure you have at least one file with at least 20,000 lines in PutYourDataBasesHere/${NC}\n" 86 | echo 87 | 88 | while true;do 89 | echo "Please enter the number of lines you wish to import... " 90 | read -p "('q' to quit) Lines>> " num_lines 91 | echo 92 | if [[ $num_lines != [Qq] ]]; then 93 | if [[ "$num_lines" =~ ^[0-9]+$ ]];then 94 | # Log Entry 95 | echo "[+] run.sh COMMAND '2' arg [$num_lines] [ $(date) ]" >> ./Logs/ActivityLogs.log 96 | path="$(pwd)" 97 | while read -r inputfile;do 98 | num="$(wc -c < "$path"/PutYourDataBasesHere/"$inputfile")" 99 | if [ "$num" -ge 20000 ];then 100 | printf "${GREEN}[+]${NC} Decompressing files\n" 101 | ./decompress.sh 102 | printf "${GREEN}[+]${NC} Starting Benchmark!\n" 103 | python3 benchmark.py "$inputfile" "$num_lines" 104 | break 105 | fi 106 | 107 | done< <(find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/") 108 | 109 | else 110 | printf "${YELLOW}[!]${NC} Invalid input\n" 111 | fi 112 | echo 113 | 114 | else # If the user enters q or Q 115 | printf "${YELLOW}[!]${NC} Exiting Calculate Import Time\n" 116 | printf "${GREEN}[+]${NC} Compressing files\n" 117 | ./compress.sh 118 | echo 119 | printf "${GREEN}[!]${NC} Compression completed!\n" 120 | break 121 | fi 122 | done 123 | 124 | 125 | elif [ "$answer" -eq 3 ];then 126 | echo 127 | printf "Please enter enter an email address in one of the following formats \n" 128 | printf " ex) test@example.com [ Searches for all passwords associated with this address ]\n" 129 | printf " ex) test@ [ Searches for all passwords for any email addresses starting with this username ]\n" 130 | printf " ex) @example.com [ Searches for all passwords for any email addresses ending with this domain name ]\n" 131 | printf " ex) /home/user/Desktop/email_list.txt [ Searches line by line through the file for all passwords for each email address ]\n\n" 132 | while true;do 133 | read -p "('q' to quit) Email>> " email 134 | if [[ $email != [Qq] ]]; then 135 | if [ "$email" != "" ];then 136 | # Log Entry 137 | echo "[+] run.sh COMMAND '3' arg [$email] [ $(date) ]" >> ./Logs/ActivityLogs.log 138 | echo "[!] Executing query.sh [ $(date) ]" >> ./Logs/ActivityLogs.log 139 | ./query.sh "$email" 140 | echo 141 | else 142 | continue 143 | fi 144 | else 145 | echo 146 | printf "${YELLOW}[!]${NC} Exiting Query\n" 147 | break 148 | fi 149 | done 150 | 151 | # Compress all of the data 152 | echo 153 | printf "${YELLOW}[!]${NC} Compressing all stored data\n" 154 | ./compress.sh 155 | printf "${GREEN}[+]${NC} Finished compressing!\n" 156 | echo 157 | 158 | elif [ "$answer" -eq 4 ];then 159 | echo 160 | echo "[+] run.sh COMMAND '4' [ $(date) ]" >> ./Logs/ActivityLogs.log 161 | printf "${GREEN}Code taken from https://github.com/laramies/theHarvester${NC}\n" 162 | printf "${GREEN} Go check him out${NC}\n" 163 | # Check if theHarvester is already installed 164 | if ! [ -d ./theHarvester ];then 165 | printf "${YELLOW}[!]${NC} Installing theHarvester\n" 166 | git clone https://github.com/laramies/theHarvester.git #&> /dev/null 167 | fi 168 | 169 | # Install all of the requirements 170 | printf "${YELLOW}[!]${NC} Updating requirements\n" 171 | sudo python3 -m pip install -r ./theHarvester/requirements.txt #&> /dev/null 172 | 173 | printf "${YELLOW}[!] PLACE ANY API KEYS IN $(pwd)/theHarvester/api-keys.yaml${NC}\n" 174 | echo "Domain name? ex) google.com" 175 | read -p "> " domain 176 | echo "Limit for the amount of email addresses? ex) 500" 177 | read -p "> " limit 178 | printf " 179 | ${RED}source:${NC} baidu, bing, bingapi, censys, crtsh, cymon, 180 | dogpile, duckduckgo, google, googleCSE, google- 181 | certificates, google-profiles, hunter, intelx, 182 | linkedin, netcraft, pgp, securityTrails, threatcrowd, 183 | trello, twitter, vhost, virustotal, yahoo, all\n" 184 | echo 185 | echo "Source? ex) all" 186 | read -p "> " source 187 | sudo python3 ./theHarvester/theHarvester.py -d "$domain" -l "$limit" -b "$source" 188 | echo 189 | printf "${RED}COPY ONLY THE EMAIL ADDRESSES AND SAVE THEM TO A .TXT FILE${NC}\n" 190 | printf "${RED}YOU CAN USE THE TEXT FILE AS INPUT TO QUERY ALL OF THEM AT ONCE${NC}\n" 191 | echo 192 | 193 | elif [ "$answer" -eq 5 ];then 194 | # Log Entry 195 | echo "[+] run.sh COMMAND '5' [ $(date) ]" >> ./Logs/ActivityLogs.log 196 | echo 197 | echo "Hey... thanks for downloading Base Query, I've spent way too many hours coding this" 198 | echo "Base Query is a OSINT tool to help you organize and query all those pesky databases you have laying around" 199 | echo "With a quadruple nested structure and a careful design your querys should be INSTANTANEOUS! Or ya know like really fast." 200 | echo "Something broken? Check the logs and then message me!" 201 | echo "For more information regarding use check the README.md file" 202 | echo "Found a bug? Just want to talk? Message me on GitHub or Twitter https://github.com/g666gle" 203 | echo " https://twitter.com/g666gle1" 204 | echo " V1.5" 205 | echo 206 | 207 | fi 208 | 209 | fi 210 | read -sp "Press Enter to continue..." 211 | clear 212 | 213 | done 214 | else 215 | #Log entry 216 | echo "[!] ERROR: run.sh NOT executed in the BaseQuery directory; Exiting [ $(date) ]" >> ./Logs/ActivityLogs.log 217 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 218 | fi 219 | 220 | 221 | -------------------------------------------------------------------------------- /search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Author Github: https://github.com/g666gle 4 | #Author Twitter: https://twitter.com/g666gle1 5 | #Date: 1/29/2019 6 | #Usage: ./search.sh test@example.com 7 | #Usage: ./search.sh test@ 8 | #Usage: ./search.sh @example.com 9 | 10 | 11 | RED='\033[0;31m' 12 | GREEN='\033[0;32m' 13 | YELLOW='\033[1;33m' 14 | NC='\033[0m' # No Color 15 | 16 | # Makes sure the user is in the BaseQuery dir 17 | if [ "${PWD##*/}" == "BaseQuery" ];then 18 | # Grab everything before the @ sign 19 | user_name=$(echo "$1" | cut -d @ -f 1 | awk '{print tolower($0)}') 20 | email=$(echo "$1" | cut -d : -f 1 | awk '{print tolower($0)}') 21 | check_for_at=${1:0:1} 22 | 23 | # Check to see if the user entered in a domain ex) @google.com 24 | if [ "$check_for_at" == "@" ];then 25 | read -p "Are you sure you want to find every possible $1 address? This might take a while! [y/n] " answer 26 | # Checks input 27 | while [[ "$answer" != [YyNn] ]];do 28 | printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n" 29 | read -p "Are you sure you want to find every possible $1 address? This might take a while! [y/n] " answer 30 | done 31 | if [[ "$answer" == [Yy] ]];then # Checks if the user is sure 32 | read -p "Output to a file? [y/n] " out_to_file 33 | # Checks input 34 | while [[ "$out_to_file" != [YyNn] ]];do 35 | printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n" 36 | read -p "Output to a file? [y/n] " out_to_file 37 | done 38 | # Decompress all files 39 | printf "${GREEN}[+]${NC} Decompressing files\n" 40 | ./decompress.sh 41 | 42 | printf "${GREEN}[+]${NC} Starting search!\n" 43 | if [[ "$out_to_file" == [Yy] ]];then 44 | if ! [ -d ./OutputFiles ];then 45 | mkdir OutputFiles 46 | fi 47 | printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/$1_output.txt${NC}\n" 48 | printf "${GREEN}[+]${NC} Please wait this could take a few minutes!\n" 49 | fi 50 | # Start iterating through every file in the database and grep for the domain name 51 | for first_nest_dir in data/*;do # data/0 52 | # More efficient than executing this statement twice in the if statement 53 | check="$(echo $first_nest_dir | cut -f 2 -d "/")" 54 | # checks to see if the dir is not 0UTLIERS or NOTVALID 55 | if [[ "$(ls -A $first_nest_dir)" ]];then 56 | if [[ $check != "0UTLIERS" && $check != "NOTVALID" ]];then 57 | for second_nest_dir in $first_nest_dir/*;do # data/0/0 58 | if [[ "$(ls -A $second_nest_dir)" ]];then 59 | # checks to see if the dir is 0UTLIERS and if the second dir is not empty 60 | if [[ "$(echo $second_nest_dir | cut -f 3 -d "/")" != "0UTLIERS" ]];then 61 | for third_nest_dir in $second_nest_dir/*;do # data/0/0/0 62 | # checks to see if the dir is 0UTLIERS and if the third dir is not empty 63 | if [[ "$(ls -A $third_nest_dir)" ]];then 64 | if [[ "$(echo $third_nest_dir | cut -f 4 -d "/")" != "0UTLIERS" ]];then 65 | for fourth_dir in $third_nest_dir/*;do # data/0/0/0/a.txt 66 | # Check to see if we have the 0UTLIERS dir and if the fourth dir is not empty 67 | if [[ "$(ls -A $fourth_dir)" ]];then 68 | if [[ "$(echo $fourth_dir | cut -f 5 -d "/")" != "0UTLIERS" ]];then # data/0/0/0/0UTLIERS/ 69 | # Loop through the files in the dir and grep for the domain name 70 | cat ./"$fourth_dir" | grep -i "$1" | while read -r Line;do 71 | user_name="$(echo $Line | cut -f 1 -d ":")" 72 | Password="$(echo $Line | cut -f 2- -d ":")" 73 | # check if the user wants the output to a file 74 | if [[ "$out_to_file" == [Yy] ]];then 75 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 76 | else # Send the output to the console 77 | printf "$user_name${RED}:"$Password"${NC}\n" 78 | fi 79 | done 80 | else # If the fourth dir is 0UTLIERS dir 81 | # Iterate through each file in the 0UTLIER dir 82 | for file in $fourth_dir/*;do # data/0UTLIERS 83 | # Loop through the dir and grep for the domain name 84 | cat ./"$file" | grep -i "$1" | while read -r Line;do 85 | user_name="$(echo $Line | cut -f 1 -d ":")" 86 | Password="$(echo $Line | cut -f 2- -d ":")" 87 | # check if the user wants the output to a file 88 | if [[ "$out_to_file" == [Yy] ]];then 89 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 90 | else # Send the output to the console 91 | printf "$user_name${RED}:"$Password"${NC}\n" 92 | fi 93 | done 94 | done 95 | fi #fourth 96 | fi 97 | done 98 | else # If the third dir is 0UTLIERS dir 99 | # Iterate through each file 100 | for file in $third_nest_dir/*;do # data/0UTLIERS 101 | # Loop through the dir and grep for the domain name 102 | cat ./"$file" | grep -i "$1" | while read -r Line;do 103 | user_name="$(echo $Line | cut -f 1 -d ":")" 104 | Password="$(echo $Line | cut -f 2- -d ":")" 105 | # check if the user wants the output to a file 106 | if [[ "$out_to_file" == [Yy] ]];then 107 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 108 | else # Send the output to the console 109 | printf "$user_name${RED}:"$Password"${NC}\n" 110 | fi 111 | done 112 | done 113 | fi # third 114 | fi 115 | done 116 | else # If the second dir is 0UTLIERS dir 117 | # Iterate through each file 118 | for file in $second_nest_dir/*;do # data/0UTLIERS 119 | # Loop through the dir and grep for the domain name 120 | cat ./"$file" | grep -i "$1" | while read -r Line;do 121 | user_name="$(echo $Line | cut -f 1 -d ":")" 122 | Password="$(echo $Line | cut -f 2- -d ":")" 123 | # check if the user wants the output to a file 124 | if [[ "$out_to_file" == [Yy] ]];then 125 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 126 | else # Send the output to the console 127 | printf "$user_name${RED}:"$Password"${NC}\n" 128 | fi 129 | done 130 | done 131 | fi # second 132 | fi 133 | done 134 | else # If the first dir is 0UTLIERS dir 135 | # Iterate through each file 136 | for file in $first_nest_dir/*;do # data/0UTLIERS 137 | # Loop through the dir and grep for the domain name 138 | cat ./"$file" | grep -i "$1" | while read -r Line;do 139 | user_name="$(echo $Line | cut -f 1 -d ":")" 140 | Password="$(echo $Line | cut -f 2- -d ":")" 141 | # check if the user wants the output to a file 142 | if [[ "$out_to_file" == [Yy] ]];then 143 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 144 | else # Send the output to the console 145 | printf "$user_name${RED}:"$Password"${NC}\n" 146 | fi 147 | done 148 | done 149 | fi # first 150 | fi 151 | done 152 | else 153 | printf "${YELLOW}[!]${NC} Aborting!\n" 154 | fi 155 | 156 | printf "${YELLOW}[!]${NC} Finished search!\n" 157 | exit #end and exit 158 | fi # End of checking for domain 159 | 160 | ######################################################################### 161 | # The above code deals with querying every file for a specific domain # 162 | # The below code deals with querying a specific username or file # 163 | ######################################################################### 164 | 165 | # Deals with all the cases of having a file vs stdin 166 | out_to_file="N" 167 | # Check to see if the user is running a file or just commandline input 168 | if [ $# -ge 2 ];then 169 | out_to_file="YF" # Yes implicit from entering a file 170 | else # The user is not running a file so ask them if they want to output to a file 171 | read -p "Output to a file? [y/n] " out_to_file 172 | # Checks input 173 | while [[ "$out_to_file" != [YyNn] ]];do 174 | printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n" 175 | read -p "Output to a file? [y/n] " out_to_file 176 | done 177 | # Informing the user 178 | printf "${GREEN}[+]${NC} Starting search!\n" 179 | if [[ "$out_to_file" == [Yy] ]];then 180 | # Make the dir if it doesn't exist 181 | if ! [ -d ./OutputFiles ];then 182 | mkdir OutputFiles 183 | fi 184 | printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/$1_output.txt${NC}\n" 185 | fi 186 | fi 187 | 188 | 189 | # Check to make sure the user name is at least 4 and the email has a @ 190 | if [[ ${#user_name} -ge 4 ]] && [[ "$email" == *"@"* ]];then 191 | # Grab each individual character 192 | first_char=${user_name:0:1} # {variable name: starting position : how many letters} 193 | second_char=${user_name:1:1} 194 | third_char=${user_name:2:1} 195 | fourth_char=${user_name:3:1} 196 | 197 | # Check to see if the folder is compressed 198 | if [ -e ./data/"$first_char".tar.zst ];then 199 | # Decompress the data 200 | #printf "${YELLOW}[!]${NC} Decompressing ./data/"$first_char".tar.zst\n" 201 | ./decompress.sh "$first_char".tar.zst > /dev/null 202 | #printf "${GREEN}[+]${NC} Finished decompressing!\n" 203 | #printf "${GREEN}[+]${NC} Starting Search!\n" 204 | #echo 205 | fi 206 | 207 | # Check the first directory 208 | if [ -d ./data/"$first_char" ];then 209 | # Check the second directory 210 | if [ -d ./data/"$first_char"/"$second_char" ];then 211 | # Check the third directory 212 | if [ -d ./data/"$first_char"/"$second_char"/"$third_char" ];then 213 | if [[ "$out_to_file" == [Nn] ]];then 214 | printf "${GREEN}Email Address: "$email"${NC}\n" 215 | fi 216 | # Check to see if the file exists 217 | if [ -e ./data/"$first_char"/"$second_char"/"$third_char"/"$fourth_char".txt ];then 218 | # Open the file and search for the email address then only keep the passwords, iterate through the passwords and echo then 219 | cat ./data/"$first_char"/"$second_char"/"$third_char"/"$fourth_char".txt | grep -i "^$email" | while read -r Line;do 220 | user_name="$(echo $Line | cut -f 1 -d ":")" 221 | Password="$(echo $Line | cut -f 2- -d ":")" 222 | # check if the user wants the output to a file 223 | if [[ "$out_to_file" == [Yy] ]];then 224 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 225 | elif [ "$out_to_file" == "YF" ];then 226 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 227 | else # Send the output to the console 228 | printf "$user_name${RED}:"$Password"${NC}\n" 229 | fi 230 | done 231 | 232 | # Check to see if the email is in the NOT VALID file 233 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 234 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 235 | user_name="$(echo $Line | cut -f 1 -d ":")" 236 | Password="$(echo $Line | cut -f 2- -d ":")" 237 | # check if the user wants the output to a file 238 | if [[ "$out_to_file" == [Yy] ]];then 239 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 240 | elif [ "$out_to_file" == "YF" ];then 241 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 242 | else # Send the output to the console 243 | printf "$user_name${RED}:"$Password"${NC}\n" 244 | fi 245 | done 246 | fi 247 | else 248 | # The file does not exists 249 | # Check to make sure the directory exists and the file exists for 0UTLIERS 250 | if [[ -d ./data/$first_char/$second_char/$third_char/0UTLIERS && -e ./data/$first_char/$second_char/$third_char/0UTLIERS/0utliers.txt ]];then 251 | cat ./data/"$first_char"/"$second_char"/"$third_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do 252 | user_name="$(echo $Line | cut -f 1 -d ":")" 253 | Password="$(echo $Line | cut -f 2- -d ":")" 254 | # check if the user wants the output to a file 255 | if [[ "$out_to_file" == [Yy] ]];then 256 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 257 | elif [ "$out_to_file" == "YF" ];then 258 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 259 | else # Send the output to the console 260 | printf "$user_name${RED}:"$Password"${NC}\n" 261 | fi 262 | done 263 | fi 264 | 265 | # Check to see if the email is in the NOT VALID file 266 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 267 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 268 | user_name="$(echo $Line | cut -f 1 -d ":")" 269 | Password="$(echo $Line | cut -f 2- -d ":")" 270 | # check if the user wants the output to a file 271 | if [[ "$out_to_file" == [Yy] ]];then 272 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 273 | elif [ "$out_to_file" == "YF" ];then 274 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 275 | else # Send the output to the console 276 | printf "$user_name${RED}:"$Password"${NC}\n" 277 | fi 278 | done 279 | fi 280 | fi 281 | else 282 | if [[ "$out_to_file" == [Nn] ]];then 283 | printf "${GREEN}Email Address: "$email"${NC}\n" 284 | fi 285 | # The third letter directory does not exists 286 | if [[ -d ./data/$first_char/$second_char/0UTLIERS && -e ./data/$first_char/$second_char/0UTLIERS/0utliers.txt ]];then 287 | cat ./data/"$first_char"/"$second_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do 288 | user_name="$(echo $Line | cut -f 1 -d ":")" 289 | Password="$(echo $Line | cut -f 2- -d ":")" 290 | # check if the user wants the output to a file 291 | if [[ "$out_to_file" == [Yy] ]];then 292 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 293 | elif [ "$out_to_file" == "YF" ];then 294 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 295 | else # Send the output to the console 296 | printf "$user_name${RED}:"$Password"${NC}\n" 297 | fi 298 | done 299 | fi 300 | 301 | # Check to see if the email is in the NOT VALID file 302 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 303 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 304 | user_name="$(echo $Line | cut -f 1 -d ":")" 305 | Password="$(echo $Line | cut -f 2- -d ":")" 306 | # check if the user wants the output to a file 307 | if [[ "$out_to_file" == [Yy] ]];then 308 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 309 | elif [ "$out_to_file" == "YF" ];then 310 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 311 | else # Send the output to the console 312 | printf "$user_name${RED}:"$Password"${NC}\n" 313 | fi 314 | done 315 | fi 316 | fi 317 | else 318 | if [[ "$out_to_file" == [Nn] ]];then 319 | printf "${GREEN}Email Address: "$email"${NC}\n" 320 | fi 321 | # The second letter directory does not exists 322 | if [[ -d ./data/$first_char/0UTLIERS && -e ./data/$first_char/0UTLIERS/0utliers.txt ]];then 323 | cat ./data/"$first_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do 324 | user_name="$(echo $Line | cut -f 1 -d ":")" 325 | Password="$(echo $Line | cut -f 2- -d ":")" 326 | # check if the user wants the output to a file 327 | if [[ "$out_to_file" == [Yy] ]];then 328 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 329 | elif [ "$out_to_file" == "YF" ];then 330 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 331 | else # Send the output to the console 332 | printf "$user_name${RED}:"$Password"${NC}\n" 333 | fi 334 | done 335 | fi 336 | 337 | # Check to see if the email is in the NOT VALID file 338 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 339 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 340 | user_name="$(echo $Line | cut -f 1 -d ":")" 341 | Password="$(echo $Line | cut -f 2- -d ":")" 342 | # check if the user wants the output to a file 343 | if [[ "$out_to_file" == [Yy] ]];then 344 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 345 | elif [ "$out_to_file" == "YF" ];then 346 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 347 | else # Send the output to the console 348 | printf "$user_name${RED}:"$Password"${NC}\n" 349 | fi 350 | done 351 | fi 352 | fi 353 | else 354 | if [[ "$out_to_file" == [Nn] ]];then 355 | printf "${GREEN}Email Address: "$email"${NC}\n" 356 | fi 357 | # The first letter directory does not exists 358 | if [[ -d ./data/0UTLIERS && -e ./data/0UTLIERS/0utliers.txt ]];then 359 | cat ./data/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do 360 | user_name="$(echo $Line | cut -f 1 -d ":")" 361 | Password="$(echo $Line | cut -f 2- -d ":")" 362 | # check if the user wants the output to a file 363 | if [[ "$out_to_file" == [Yy] ]];then 364 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 365 | elif [ "$out_to_file" == "YF" ];then 366 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 367 | else # Send the output to the console 368 | printf "$user_name${RED}:"$Password"${NC}\n" 369 | fi 370 | done 371 | fi 372 | 373 | # Check to see if the email is in the NOT VALID file 374 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 375 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 376 | user_name="$(echo $Line | cut -f 1 -d ":")" 377 | Password="$(echo $Line | cut -f 2- -d ":")" 378 | # check if the user wants the output to a file 379 | if [[ "$out_to_file" == [Yy] ]];then 380 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 381 | elif [ "$out_to_file" == "YF" ];then 382 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 383 | else # Send the output to the console 384 | printf "$user_name${RED}:"$Password"${NC}\n" 385 | fi 386 | done 387 | fi 388 | fi 389 | else # If not a valid address 390 | first_char=${user_name:0:1} # {variable name: starting position : how many letters} 391 | 392 | # Check to see if the folder is compressed 393 | if [ -e ./data/"$first_char".tar.zst ];then 394 | ./decompress.sh "$first_char".tar.zst > /dev/null 395 | fi 396 | # Uncompresses NOTVALID 397 | if [ -e ./data/NOTVALID.tar.zst ];then 398 | ./decompress.sh NOTVALID.tar.zst > /dev/null 399 | fi 400 | # Supreses output 401 | if [[ "$out_to_file" == [Nn] ]];then 402 | printf "${GREEN}Email Address: "$email"${NC}\n" 403 | fi 404 | 405 | # Checks if the email has an @ 406 | if [[ $email == *"@"* ]];then 407 | # The username is either not >= 4 or the email doesn't contain an @ 408 | # Check to see if the email is in the NOT VALID file 409 | if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then 410 | cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do 411 | user_name="$(echo $Line | cut -f 1 -d ":")" 412 | Password="$(echo $Line | cut -f 2- -d ":")" 413 | # check if the user wants the output to a file 414 | if [[ "$out_to_file" == [Yy] ]];then 415 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt 416 | elif [ "$out_to_file" == "YF" ];then 417 | echo ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt 418 | else # Send the output to the console 419 | printf "$user_name${RED}:"$Password"${NC}\n" 420 | fi 421 | done 422 | fi 423 | else 424 | printf "${YELLOW}[!]${NC} Please enter one email address or a file with one email address per line\n" 425 | fi 426 | fi 427 | 428 | else 429 | printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n" 430 | fi 431 | 432 | 433 | 434 | 435 | 436 | -------------------------------------------------------------------------------- /pysort.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import time 5 | import mmap 6 | 7 | """ 8 | Author Github: https://github.com/g666gle 9 | Author Twitter: https://twitter.com/g666gle1 10 | Date: 2/16/2019 11 | Description: Takes in one file at a time as command line input. processes each line in the file and places the 12 | information into the correct subdirectory of the data folder. 13 | Usage: python3 pysort.py file.txt 14 | Version: 1.5.0 15 | Python Version: 3.7.1 16 | """ 17 | 18 | args = sys.argv 19 | 20 | # Need TODO 21 | # Option to choose certain dirs to check for @gmail.com 22 | # Make everything check NOTVALID just incase 23 | # Automatically find the amount of lines in import using wc 24 | # Change theharvester out for hunter.io api 25 | # TODO make an option for counting the number of files 26 | # Make check import time, import into a seperate folder and then delete it for more accurate results so you dont have to compress or decompress 27 | # TODO fix the percentage in compress ( * by 100) 28 | # TODO PutYourDataBasesHere for an optional HDD 29 | # TODO and store your data on a HDD 30 | # TODO Add support for SQL vbull CSV Json 31 | # TODO take the SHA256 hash of the data folder before compression and after decompression 32 | 33 | 34 | 35 | # FIXED 36 | # Check bash script . when inputing a file make sure @gmail.com exports results to the file 37 | # TODO skip .sql .csv .json files 38 | # TODO make an output to a file option for query 39 | # Fixed the harvester (option 4) 40 | # Take out the second option for the email harvesting 41 | # TODO make lookup by company name. @gmail.com 42 | # TODO add to readme instructions to install dependencies 43 | # Improved query to output all user information 44 | # TODO add an install.sh script 45 | # TODO extra checks for duplicate data 46 | # TODO make the expected time more tailored to users hardware 47 | # TODO automatically set the window size 48 | # Added dependencies 49 | # TODO uncompress all for email files; uncompress only the folder you need for one email 50 | # TODO make another log file for usage 51 | # TODO fix bug for checking duplicates 52 | # added exit trap 53 | # added comments 54 | # fixed duplicate output bug when using Query 55 | # TODO show how much was compressed from zstd 56 | # TODO walk through database directory 57 | # added support to query all possible entries 58 | # continuous query prompt 59 | # TODO fixed bug where spaces in import files and directories could break parts of the program 60 | # added extra log info 61 | # add check to see if the PutYourDatabasesHere folder has any new files before decompressing. Saving time 62 | 63 | 64 | 65 | def check_duplicate(full_file_path, line): 66 | """ 67 | This function takes in a path to the file and the specific line we want to check for duplicates with. First the file 68 | is checked to make sure it isn't empty, then the file is opened as a binary so we can store the lines as a mmap obj. 69 | Next if the line is a duplicate then False is returned else True 70 | :param full_file_path: Path to the file 71 | :param line: The line being checked 72 | :return: True if the line should be written to the file; else False 73 | """ 74 | # Check to see if the file is not empty 75 | if not os.stat(full_file_path).st_size == 0: 76 | # Open the file as a binary file and store it in a mmap obj 77 | with open(full_file_path, 'rb', 0) as fp, mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) as s: 78 | # Check to see if the line already exists in the file 79 | if s.find(str.encode(line)) != -1: 80 | return False # string is in file so do not re-write it 81 | return True # string is not in file so write it to the file 82 | return True # Write to the file 83 | 84 | 85 | def place_data(line, path): 86 | """ 87 | This function takes in the line of the current file and the root path to the BaseQuery directory. Checks the format 88 | of the file to make sure each line is in the email:password format. Then determines the depth of the correct characters 89 | ex) ex|ample@gmail.com ---> would result in a depth of 2. Then checking each directory to see if it already exists 90 | the username:password combo is correctly placed into a easy to query file. If a invalid character is determined in the 91 | first 4 chars then it will be put in a '0UTLIERS.txt' file. 92 | :param line: email:password 93 | :param path: full path to file 94 | :return: Either a 1 or a 0 depending on if a line has been written or not 95 | """ 96 | # Check if the line starts with a : 97 | if line[0] == ":": 98 | # strip the colon from the line 99 | line = line[1:] 100 | emailPaswd = line.split(':', 2) 101 | 102 | # Checks to see if the users format is "Password:Username" instead of "Username:Password" 103 | if len(emailPaswd) >= 2 and '@' in emailPaswd[1] and '.' in emailPaswd[1]: 104 | # Switches the position of the username and password 105 | temp = emailPaswd[0] 106 | emailPaswd[0] = emailPaswd[1].lower() 107 | emailPaswd[1] = temp 108 | else: 109 | # Change all of the email usernames to be lowercase; to be uniform 110 | emailPaswd[0] = emailPaswd[0].lower() 111 | 112 | try: 113 | # check to see if you have a valid email address and the username is >= 4; also checks if there is a '@' in the username 114 | if '@' in emailPaswd[0].strip() and len(emailPaswd[0].strip().split('@')[0]) >= 4 and len(emailPaswd) >= 2 and emailPaswd[0].strip().count('@') == 1: 115 | first_letter = emailPaswd[0][0] 116 | second_letter = emailPaswd[0][1] 117 | third_letter = emailPaswd[0][2] 118 | fourth_letter = emailPaswd[0][3] 119 | 120 | # Check to see if the username has an invalid character and at what spot 121 | if str(first_letter).isalnum(): 122 | folder_depth = 1 123 | if str(second_letter).isalnum(): 124 | folder_depth = 2 125 | if str(third_letter).isalnum(): 126 | folder_depth = 3 127 | if str(fourth_letter).isalnum(): 128 | folder_depth = 4 129 | else: 130 | folder_depth = 0 131 | 132 | # Check to see if the first letter doesn't have a directory 133 | if not os.path.isdir(path + "/data/" + first_letter): 134 | # Check to see if we start with at least one valid char 135 | if folder_depth >= 1: 136 | # Make the directory 137 | os.makedirs(path + "/data/" + first_letter) 138 | else: 139 | # If the outlier dir doesn't exist; make it and start the file 140 | if not os.path.isdir(path + "/data/0UTLIERS"): 141 | os.makedirs(path + "/data/0UTLIERS") 142 | # Don't need to check for duplicates because its a new file 143 | with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp: 144 | length = len(emailPaswd) 145 | # Iterate through each index of the list and write it to the file 146 | for index in range(length): 147 | if index != length - 1: 148 | fp.write(emailPaswd[index] + ":") 149 | else: # Don't add a ':' at the end of the line 150 | fp.write(emailPaswd[index]) 151 | fp.write("\n") 152 | return 1 153 | else: # If the outlier dir already exists append the line to the file 154 | # Get the new line from the emailPasswd list 155 | length = len(emailPaswd) 156 | new_line = "" 157 | # Iterate through each index and add it to new_line 158 | for index in range(length): 159 | if index != length - 1: 160 | new_line += emailPaswd[index] + ":" 161 | else: 162 | new_line += emailPaswd[index] 163 | 164 | if check_duplicate(path + "/data/0UTLIERS/0utliers.txt", new_line): 165 | # Checks to see if there are duplicates already in the file, returns true if there isn't 166 | with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp: 167 | fp.write(new_line + "\n") 168 | return 1 169 | return 0 170 | else: # The directory already exists 171 | if folder_depth == 0: # There is NOT at least one consecutive valid char 172 | # If the outlier dir doesn't exist; make it and start the file 173 | if not os.path.isdir(path + "/data/0UTLIERS"): 174 | os.makedirs("mkdir " + path + "/data/0UTLIERS") 175 | with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp: 176 | length = len(emailPaswd) 177 | # Iterate through each index of the list and write it to the file 178 | for index in range(length): 179 | if index != length - 1: 180 | fp.write(emailPaswd[index] + ":") 181 | else: # Don't add a ':' at the end of the line 182 | fp.write(emailPaswd[index]) 183 | fp.write("\n") 184 | return 1 185 | else: # If the outlier dir already exists append the line to the file 186 | # Get the new line from the emailPasswd list 187 | length = len(emailPaswd) 188 | new_line = "" 189 | # Iterate through each index and add it to new_line 190 | for index in range(length): 191 | if index != length - 1: 192 | new_line += emailPaswd[index] + ":" 193 | else: 194 | new_line += emailPaswd[index] 195 | 196 | if check_duplicate(path + "/data/0UTLIERS/0utliers.txt", new_line): 197 | with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp: 198 | # Write to the file 199 | fp.write(new_line + "\n") 200 | return 1 201 | return 0 202 | 203 | # Check to see if the second letter doesn't have a directory 204 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter): 205 | # Check to see if we start with at least two valid char 206 | if folder_depth >= 2: 207 | # Make the directory 208 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter) 209 | else: 210 | # If the outlier dir doesn't exist; make it and start the file 211 | if not os.path.isdir(path + "/data/" + first_letter + "/0UTLIERS"): 212 | os.makedirs(path + "/data/" + first_letter + "/0UTLIERS") 213 | with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 214 | length = len(emailPaswd) 215 | # Iterate through each index of the list and write it to the file 216 | for index in range(length): 217 | if index != length - 1: 218 | fp.write(emailPaswd[index] + ":") 219 | else: # Don't add a ':' at the end of the line 220 | fp.write(emailPaswd[index]) 221 | fp.write("\n") 222 | return 1 223 | else: 224 | # Get the new line from the emailPasswd list 225 | length = len(emailPaswd) 226 | new_line = "" 227 | # Iterate through each index and add it to new_line 228 | for index in range(length): 229 | if index != length - 1: 230 | new_line += emailPaswd[index] + ":" 231 | else: 232 | new_line += emailPaswd[index] 233 | 234 | # Check for duplicates 235 | if check_duplicate(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", new_line): 236 | with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 237 | fp.write(new_line + "\n") 238 | return 1 239 | return 0 240 | else: # The directory already exists 241 | if folder_depth <= 1: # There is not at least two consecutive valid char 242 | # If the outlier dir doesn't exist; make it and start the file 243 | if not os.path.isdir(path + "/data/" + first_letter + "/0UTLIERS"): 244 | os.makedirs(path + "/data/" + first_letter + "/0UTLIERS") 245 | with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 246 | length = len(emailPaswd) 247 | # Iterate through each index of the list and write it to the file 248 | for index in range(length): 249 | if index != length - 1: 250 | fp.write(emailPaswd[index] + ":") 251 | else: # Don't add a ':' at the end of the line 252 | fp.write(emailPaswd[index]) 253 | fp.write("\n") 254 | return 1 255 | else: # If the outlier dir already exists append the line to the file 256 | # Get the new line from the emailPasswd list 257 | length = len(emailPaswd) 258 | new_line = "" 259 | # Iterate through each index and add it to new_line 260 | for index in range(length): 261 | if index != length - 1: 262 | new_line += emailPaswd[index] + ":" 263 | else: 264 | new_line += emailPaswd[index] 265 | 266 | if check_duplicate(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", new_line): 267 | with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 268 | fp.write(new_line + "\n") 269 | return 1 270 | return 0 271 | 272 | # Check to see if the third letter doesn't have a directory 273 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter): 274 | # Check to see if we start with at least three valid char 275 | if folder_depth >= 3: 276 | # Make the directory 277 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter) 278 | else: 279 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS"): 280 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS") 281 | with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 282 | length = len(emailPaswd) 283 | # Iterate through each index of the list and write it to the file 284 | for index in range(length): 285 | if index != length - 1: 286 | fp.write(emailPaswd[index] + ":") 287 | else: # Don't add a ':' at the end of the line 288 | fp.write(emailPaswd[index]) 289 | fp.write("\n") 290 | return 1 291 | else: # If the outlier dir already exists append the line to the file 292 | # Get the new line from the emailPasswd list 293 | length = len(emailPaswd) 294 | new_line = "" 295 | # Iterate through each index and add it to new_line 296 | for index in range(length): 297 | if index != length - 1: 298 | new_line += emailPaswd[index] + ":" 299 | else: 300 | new_line += emailPaswd[index] 301 | 302 | if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", new_line): 303 | with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 304 | fp.write(new_line + "\n") 305 | return 1 306 | return 0 307 | else: # The directory already exists 308 | if folder_depth <= 2: # There is not at least three consecutive valid char 309 | # If the outlier dir doesn't exist; make it and start the file 310 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS"): 311 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS") 312 | with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 313 | length = len(emailPaswd) 314 | # Iterate through each index of the list and write it to the file 315 | for index in range(length): 316 | if index != length - 1: 317 | fp.write(emailPaswd[index] + ":") 318 | else: # Don't add a ':' at the end of the line 319 | fp.write(emailPaswd[index]) 320 | fp.write("\n") 321 | return 1 322 | else: # If the outlier dir already exists append the line to the file 323 | # Get the new line from the emailPasswd list 324 | length = len(emailPaswd) 325 | new_line = "" 326 | # Iterate through each index and add it to new_line 327 | for index in range(length): 328 | if index != length - 1: 329 | new_line += emailPaswd[index] + ":" 330 | else: 331 | new_line += emailPaswd[index] 332 | 333 | if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", new_line): 334 | with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp: 335 | fp.write(new_line + "\n") 336 | return 1 337 | return 0 338 | 339 | # Checks to see if the file in the third directory doesn't exists 340 | if not os.path.isfile(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt"): 341 | if folder_depth == 4: # The file doesn't exist in the third dir but there is 4 valid chars 342 | # Make the file 343 | with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", 'a') as output_file: 344 | length = len(emailPaswd) 345 | # Iterate through each index of the list and write it to the file 346 | for index in range(length): 347 | if index != length-1: 348 | output_file.write(emailPaswd[index] + ":") 349 | else: # Don't add a ':' at the end of the line 350 | output_file.write(emailPaswd[index]) 351 | output_file.write("\n") 352 | return 1 353 | elif folder_depth == 3: # Check to see if the fourth letter is an outlier EX) exa!mple@example.com 354 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS"): 355 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS") 356 | # Make the 0UTLIERS file 357 | with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", 'a') as output_file: 358 | # Get the new line from the emailPasswd list 359 | length = len(emailPaswd) 360 | new_line = "" 361 | # Iterate through each index and add it to new_line 362 | for index in range(length): 363 | if index != length - 1: 364 | new_line += emailPaswd[index] + ":" 365 | else: 366 | new_line += emailPaswd[index] 367 | 368 | if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", new_line): 369 | output_file.write(new_line + "\n") 370 | return 1 371 | return 0 372 | else: # The file exists 373 | if folder_depth == 4: # The file does exist in the third dir but there is 4 valid chars 374 | # Get the new line from the emailPasswd list 375 | length = len(emailPaswd) 376 | new_line = "" 377 | # Iterate through each index and add it to new_line 378 | for index in range(length): 379 | if index != length - 1: 380 | new_line += emailPaswd[index] + ":" 381 | else: 382 | new_line += emailPaswd[index] 383 | 384 | if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", new_line): 385 | with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", 'a') as output_file: 386 | output_file.write(new_line + "\n") 387 | return 1 388 | return 0 389 | elif folder_depth == 3: # The file does exist in the third dir but there is only 3 valid chars 390 | # Check to see if you need to make the 0UTLIERS dir 391 | if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS"): 392 | os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS") 393 | 394 | # Get the new line from the emailPasswd list 395 | length = len(emailPaswd) 396 | new_line = "" 397 | # Iterate through each index and add it to new_line 398 | for index in range(length): 399 | if index != length - 1: 400 | new_line += emailPaswd[index] + ":" 401 | else: 402 | new_line += emailPaswd[index] 403 | 404 | # Check for duplicates and then write to the file 405 | if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", new_line): 406 | # Append the 0UTLIERS file 407 | with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", 'a') as output_file: 408 | output_file.write(new_line + "\n") 409 | return 1 410 | return 0 411 | 412 | # NOT a valid email address or the username is NOT >= 4; or there is more than one '@' in the username 413 | else: 414 | if not os.path.isdir(path + "/data/NOTVALID"): 415 | os.makedirs(path + "/data/NOTVALID/") 416 | with open(path + "/data/NOTVALID/FAILED_TEST.txt", 'a') as fp: 417 | length = len(emailPaswd) 418 | # Iterate through each index of the list and write it to the file 419 | for index in range(length): 420 | if index != length - 1: 421 | fp.write(emailPaswd[index] + ":") 422 | else: # Don't add a ':' at the end of the line 423 | fp.write(emailPaswd[index]) 424 | fp.write("\n") 425 | return 1 426 | else: # The directory already exists 427 | if line != "": 428 | # Get the new line from the emailPasswd list 429 | length = len(emailPaswd) 430 | new_line = "" 431 | # Iterate through each index and add it to new_line 432 | for index in range(length): 433 | if index != length - 1: 434 | new_line += emailPaswd[index] + ":" 435 | else: 436 | new_line += emailPaswd[index] 437 | 438 | if check_duplicate(path + "/data/NOTVALID/FAILED_TEST.txt", new_line): 439 | # Open the file; check if it's a duplicate and write to the file 440 | with open(path + "/data/NOTVALID/FAILED_TEST.txt", 'a') as fp: 441 | fp.write(new_line + "\n") 442 | return 1 443 | return 0 444 | except OSError: 445 | raise 446 | return 0 447 | 448 | 449 | if __name__ == '__main__': 450 | 451 | # There is currently not support for these file extension; This skips them to speed up the import 452 | if args[1].endswith(".sql") or args[1].endswith(".csv") or args[1].endswith(".json") or args[1].endswith(".sql") or args[1].endswith(".xlsx"): 453 | exit() 454 | 455 | start_time = time.time() 456 | total_lines = 0 # The amount of lines that are not white-space 457 | written_lines = 0 # The amount of lines written 458 | 459 | RED = '\033[0;31m' 460 | GREEN = '\033[0;32m' 461 | YELLOW = '\033[1;33m' 462 | NC = '\033[0m' # No Color 463 | path = os.getcwd() 464 | 465 | # Check to see if the arguments are correct 466 | if len(args) == 2 and args[1] != "": 467 | print(GREEN + "[+]" + NC + " Opening file " + GREEN + args[1] + NC) 468 | # Directory guaranteed to exist from previous check in Import.sh 469 | with open(path + "/PutYourDataBasesHere/" + args[1], 'r') as fp: 470 | try: 471 | for line in fp: 472 | if total_lines % 10000 == 0 and total_lines != 0: 473 | print(GREEN + "[+]" + NC + " Processing line number: " + str(total_lines) + "\nLine: " + line) 474 | if line.strip() != "": 475 | written_lines += place_data(line.strip(), path) 476 | total_lines += 1 477 | except Exception as e: 478 | print(RED + "Exception: " + str(e) + NC) 479 | stop_time = time.time() 480 | # Output to Stdout 481 | print() 482 | print(GREEN + "[+]" + NC + " Total time: " + str(("%.2f" % (stop_time - start_time)) + " seconds")) 483 | print(GREEN + "[+]" + NC + " Total lines: " + str(("%.2f" % total_lines))) 484 | print(GREEN + "[+]" + NC + " Written lines: " + str(("%.2f" % written_lines))) 485 | 486 | # Log times 487 | with open(path + "/Logs/ActivityLogs.log", 'a') as log: 488 | log.write("[+] Total time: " + str(("%.2f" % (stop_time - start_time)) + " seconds") + "\n") 489 | log.write("[+] Total lines: " + str(("%.2f" % total_lines)) + "\n") 490 | log.write("[+] Written lines: " + str(("%.2f" % written_lines)) + "\n") 491 | else: 492 | print(YELLOW + "[!]" + NC + " Invalid arguments provided") 493 | 494 | 495 | 496 | --------------------------------------------------------------------------------