├── Logs
    ├── importedDBS.log
    └── DELETE_ME.txt
├── data
    └── DELETE_ME.txt
├── OutputFiles
    └── DELETE_ME.txt
├── PutYourDataBasesHere
    └── example.txt
├── dependencies.sh
├── LICENSE.md
├── query.sh
├── decompress.sh
├── compress.sh
├── folderPrimer.py
├── benchmark.py
├── Import.sh
├── README.md
├── run.sh
├── search.sh
└── pysort.py


/Logs/importedDBS.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Logs/DELETE_ME.txt:
--------------------------------------------------------------------------------
1 | This file is just here so github doesn't delete the directory from the repository


--------------------------------------------------------------------------------
/data/DELETE_ME.txt:
--------------------------------------------------------------------------------
1 | This file is just here so github doesn't delete the directory from the repository


--------------------------------------------------------------------------------
/OutputFiles/DELETE_ME.txt:
--------------------------------------------------------------------------------
1 | This file is just here so github doesn't delete the directory from the repository


--------------------------------------------------------------------------------
/PutYourDataBasesHere/example.txt:
--------------------------------------------------------------------------------
1 | test@example.com:Password1
2 | admin@example.com:SUperSeCreTPassswrd1
3 | Michael@dundermifflin.com:Sc0tt
4 | Password17:Mark@facebook.com
5 | 


--------------------------------------------------------------------------------
/dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Author Github:   https://github.com/g666gle
 4 | #Author Twitter:  https://twitter.com/g666gle1
 5 | #Date: 2/18/2019
 6 | #Usage: ./dependencies.sh
 7 | 
 8 | RED='\033[0;31m'
 9 | GREEN='\033[0;32m'
10 | NC='\033[0m'  # No Color
11 | 
12 | if [ "${PWD##*/}" == "BaseQuery" ];then
13 | 	sudo chmod 755 -R $(pwd)
14 | 	sudo apt-get update -y
15 | 	sudo apt-get install python3.7 -y
16 | 	sudo apt-get install tar -y
17 | 	sudo apt-get install zstd -y
18 | 	sudo apt-get install xterm -y
19 | 	echo
20 | 	printf "${GREEN}[+]${NC} Finished downloading!\n"
21 | else
22 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
23 | fi
24 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2019 https://github.com/g666gle
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/query.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Author Github:   https://github.com/g666gle
 4 | #Author Twitter:  https://twitter.com/g666gle1
 5 | #Date: 1/29/2019
 6 | #Usage: ./query.sh test@example.com
 7 | #Usage: ./query.sh test@
 8 | #Usage: ./query.sh @example.com
 9 | #Usage: ./query.sh /home/user/Desktop/file.txt
10 | 
11 | RED='\033[0;31m'
12 | YELLOW='\033[1;33m'
13 | GREEN='\033[0;32m'
14 | NC='\033[0m'  # No Color
15 | 
16 | #  Checks to see if the user forgot to enter input
17 | if [ $# -eq 1 ];then
18 | 	if [ "${PWD##*/}" == "BaseQuery" ];then
19 | 		# Checks to see if the file exists in the working directory
20 | 		if ! [ -e "$1" ];then
21 | 			#  Only one email
22 | 			./search.sh "$1"
23 | 		else
24 | 			# A file was inputed
25 | 			filename="$(echo $1 | rev | cut -f 1 -d "/" | rev)" #test.txt
26 | 			printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/""$(echo $filename | cut -f 1 -d "." )""_output.txt${NC}\n"
27 | 			cat "$1" | while read -r email;do
28 | 				#echo
29 | 				#  The first param is the email address the second is telling ./search that it 
30 | 				#  is a file so the user is not prompted
31 | 				./search.sh "$email" "$filename"
32 | 				
33 | 			done
34 | 		fi
35 | 	else
36 | 		printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
37 | 	fi
38 | else
39 | 	printf "${YELLOW}[!]${NC} Please enter one email address or a file with one email address per line\n"
40 | fi
41 | 


--------------------------------------------------------------------------------
/decompress.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Author Github:   https://github.com/g666gle
 4 | #Author Twitter:  https://twitter.com/g666gle1
 5 | #Date: 2/16/2019
 6 | #Usage:   ./decompress.sh
 7 | #Usage:   ./decompress.sh <name of compressed file>
 8 | #Example: ./decompress.sh 0.tar.zst
 9 | 
10 | RED='\033[0;31m'
11 | YELLOW='\033[1;33m'
12 | NC='\033[0m'  # No Color
13 | 
14 | #  Make sure the user is in the BaseQuery directory
15 | if [ "${PWD##*/}" == "BaseQuery" ];then
16 | 	#  Check if no args were passed in; then decompress everything
17 | 	if [ $# -eq 0 ];then
18 | 		#  Iterate through all the compressed files 
19 | 		find data/ -type f -name "*.tar.zst" | sort | while read -r compressed_file; do
20 | 			#  check to make sure you dont decompress the working directory
21 | 			if [ "$compressed_file" != "data/" ];then
22 | 				#  Grabs the name of the file from the path
23 | 				name="$(echo $compressed_file | cut -f 2- -d "/" | cut -f 1 -d ".")"
24 | 				# decompress the .tar.zst files
25 | 				#tar --use-compress-program=zstd -xf ./data/0.tar.zst
26 | 				tar --use-compress-program=zstd -xf ./data/$name.tar.zst
27 | 				#  remove the old compressed files			
28 | 				rm -rf data/"$name".tar.zst
29 | 			fi	
30 | 		done
31 | 	elif [ $# -eq 1 ];then
32 | 		# make sur you dont try and decompress the working directory
33 | 		if [ "$1" != "data/" ];then
34 | 			# decompress the .tar.zst files
35 | 			#tar --use-compress-program=zstd -xf ./data/0.tar.zst
36 | 			tar --use-compress-program=zstd -xf ./data/"$1"
37 | 			#  remove the old compressed files			
38 | 			rm -rf data/"$1"
39 | 		fi
40 | 	else  # Wrong input
41 | 		printf "${RED}[!]${NC} Usage Error: ./decompress.sh \n"
42 | 		printf "${RED}[!]${NC} Usage Error: ./decompress.sh <name of compressed file>\n"
43 | 		
44 | 		printf "[!] Usage Error: ./decompress.sh \n" >> ./Logs/ActivityLogs.log
45 | 		printf "[!] Usage Error: ./decompress.sh <name of compressed file>\n" >> ./Logs/ActivityLogs.log
46 | 		
47 | 	fi
48 | else
49 | 	# If the users working directory is not BaseQuery while trying to run the script
50 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
51 | 	printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log
52 | fi
53 | 


--------------------------------------------------------------------------------
/compress.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Author Github:   https://github.com/g666gle
 4 | #Author Twitter:  https://twitter.com/g666gle1
 5 | #Date: 2/16/2019
 6 | #Usage: ./compress.sh
 7 | 
 8 | RED='\033[0;31m'
 9 | YELLOW='\033[1;33m'
10 | NC='\033[0m'  # No Color
11 | 
12 | #  Make sure the user is in the BaseQuery directory
13 | if [ "${PWD##*/}" == "BaseQuery" ];then
14 | 	let orig_bytes=0
15 | 	declare -a arr
16 | 	#  Find all of the uncompressed directories 
17 | 	while read -r uncompressed_dir; do
18 | 		arr=(${uncompressed_dir})
19 | 		if [ "$uncompressed_dir" != "data/" ];then
20 | 			file_bytes=$(du -sb "$uncompressed_dir"/ | cut -f 1)
21 | 			let orig_bytes=$orig_bytes+$file_bytes
22 | 			name="$(echo $uncompressed_dir | cut -f 2- -d "/")"
23 | 			#tar --use-compress-program=zstd -cf data/0.tar.zst data/0
24 | 			tar --use-compress-program=zstd -cf data/"$name".tar.zst "$uncompressed_dir"
25 | 			rm -rf "$uncompressed_dir"
26 | 		fi
27 | 		_constr+="${arr[2]}"	
28 | 	done< <(find data/ -maxdepth 1 -type d | sort)
29 | 
30 | 	compressed_bytes=$(du -sb data/ | cut -f 1)
31 | 	if [[ $orig_bytes -ne 0 && $compressed_bytes -ne 0 ]];then
32 | 		comp_div_ori=$( awk -v orig=$orig_bytes -v comp=$compressed_bytes 'BEGIN{printf("%.2f\n",comp/orig*100)}' )
33 | 		multiples_compressed=$(( $orig_bytes/$compressed_bytes ))
34 | 		echo 
35 | 		printf "${RED}[*] Your data is $multiples_compressed""x times smaller! (~$comp_div_ori%% of the original size)${NC}\n"
36 | 		printf "${YELLOW}[!] Original number of bytes	$orig_bytes${NC}\n"
37 | 		printf "${YELLOW}[!] Compressed number of bytes	$compressed_bytes${NC}\n"
38 | 
39 | 		printf "[*] Your data is $multiples_compressed""x times smaller! (~$percentage_compressed%% of the original size)\n" >> ./Logs/ActivityLogs.log
40 | 		printf "[!] Original number of bytes	$orig_bytes\n" >> ./Logs/ActivityLogs.log
41 | 		printf "[!] Compressed number of bytes	$compressed_bytes\n" >> ./Logs/ActivityLogs.log
42 | 	fi
43 | 	
44 | else
45 | 	# If the users working directory is not BaseQuery while trying to run the script
46 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
47 | 
48 | 	printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log
49 | fi
50 | 	
51 | 
52 | 


--------------------------------------------------------------------------------
/folderPrimer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import time
 4 | 
 5 | """
 6 | Author Github:   https://github.com/g666gle      
 7 | Author Twitter:  https://twitter.com/g666gle1
 8 | Date:            1/29/2019
 9 | Description:     This file creates a triple nested [A-Z] and [0-9] directories so the data from the databases are easily 
10 |                  accessible. The reason to do this before instead of while pysort.py is placing all the files is due to 
11 |                  the fact that creating directories is slightly time consuming and by creating them all at once instead 
12 |                  of on the fly. We can expect to see a small efficiency improvement. Also.... organization 
13 | Usage:           python3 folderPrimer.py
14 | Version:	     1.5.0
15 | Python Version:  3.7.1
16 | """
17 | 
18 | path = os.getcwd()
19 | 
20 | 
21 | def folder_spam():
22 |     """
23 |     This function creates all the nested files needed to store the data. [A-Z][0-9]
24 |     :return: N/A
25 |     """
26 |     first_nest =  ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
27 |     second_nest = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
28 |     third_nest =  ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
29 | 
30 |     for char in first_nest:  # Creating the first nesting of the folders
31 |         if not os.path.isdir(path + "/data/" + char.strip()):
32 |             os.makedirs(path + "/data/" + char.strip())
33 |     for char in first_nest:  # Creating the second nesting of the folders
34 |         for char2 in second_nest:
35 |             if not os.path.isdir(path + "/data/" + char.strip() + "/" + char2.strip()):
36 |                 os.makedirs(path + "/data/" + char.strip() + "/" + char2.strip())
37 |     for char in first_nest:  # Creating the third nesting of the folders
38 |         for char2 in second_nest:
39 |             for char3 in third_nest:
40 |                 if not os.path.isdir(path + "/data/" + char.strip() + "/" + char2.strip() + "/" + char3.strip()):
41 |                     os.makedirs(path + "/data/" + char.strip() + "/" + char2.strip() + "/" + char3.strip())
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     GREEN = '\033[0;32m'
46 |     YELLOW = '\033[1;33m'
47 |     NC = '\033[0m'  # No Color
48 | 
49 |     print()
50 |     print(GREEN + "[+]" + NC + "Priming the data directory")
51 |     start_time = time.time()
52 |     folder_spam()
53 |     end_time = time.time()
54 |     print(GREEN + "[+]" + NC + " Data directory finished being primed!")
55 |     print(YELLOW + "[!]" + NC + " Action took " + str(int(end_time - start_time)) + " seconds")
56 |     print()
57 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import sys
 5 | import time
 6 | from pysort import place_data
 7 | 
 8 | """
 9 | Author Github:   https://github.com/g666gle      
10 | Author Twitter:  https://twitter.com/g666gle1
11 | Date:            1/29/2019
12 | Description:     This file is used by run.sh to calculate the amount of time it will take the current hardware to process
13 |                  the user specified amount of lines. This is important because each user has different hardware. Import
14 |                  mainly relies on the type of CPU and how many cores it has. On an Intel i7-7700 processor and 16GB of
15 |                  RAM you can expect results of around 18000 lines per second while your laptop is plugged in and 12000
16 |                  lines per second while your laptop is not plugged in.
17 |                   
18 | Usage:           python3 folderPrimer.py <path to file> <number of lines>
19 | Usage:           python3 folderPrimer.py file.txt 1000000
20 | Version:	     1.5.0
21 | Python Version:  3.7.1
22 | """
23 | 
24 | if __name__ == '__main__':
25 |     #  the arguments passed in
26 |     args = sys.argv
27 |     path = os.getcwd()
28 |     written_lines = 0
29 |     # The amount of lines written in 1 Second
30 |     total_lines = 0
31 |     RED = '\033[0;31m'
32 |     GREEN = '\033[0;32m'
33 |     YELLOW = '\033[1;33m'
34 |     NC = '\033[0m'  # No Color
35 |     start_time = time.time()
36 | 
37 |     #  Check to see if the arguments are correct
38 |     if len(args) == 3 and args[1] != "" and args[2] != "":
39 |         #  Grab the users imputed amount of lines
40 |         amt_lines = args[2]
41 |         #  Directory guaranteed to exist from previous check in Import.sh
42 |         with open(path + "/PutYourDataBasesHere/" + args[1], 'r') as fp:
43 |             try:
44 |                 for line in fp:
45 |                     #  go through as many lines as you can in 2 second
46 |                     if (time.time() - start_time) <= 2:
47 |                         written_lines += place_data(line.strip(), path)
48 |                         total_lines += 1
49 |                     else:
50 |                         break
51 |             except Exception as e:
52 |                 print(RED + "Exception: " + str(e) + NC)
53 | 
54 |         #  The seconds is ( ( 2 * ( user imputed lines ) ) / ( amount of lines processed in 2 seconds ) )
55 |         secs = (2 * int(amt_lines)) / int(total_lines)
56 |         mins = secs / 60
57 |         hours = mins / 60
58 |         days = hours / 24
59 |         years = days / 364
60 |         os.system('cls' if os.name == 'nt' else 'clear')
61 |         print(YELLOW + "[!]" + NC + " Your computer can process " + RED + str(total_lines / 2) + NC + " lines per second!")
62 |         print(YELLOW + "[!] For the best results make sure your laptop is pluged into a power source!" + NC)
63 |         print()
64 |         print(GREEN + "[+]" + NC + " To import " + GREEN + amt_lines + NC + " lines")
65 |         print(GREEN + "[+]" + NC + " You can expect an import time of around " + GREEN + "{}".format('%.2f' % secs) + NC + " seconds which is...")
66 |         print("                                            " + GREEN + "{}".format('%.2f' % mins) + NC + " minutes which is...")
67 |         print("                                            " + GREEN + "{}".format('%.2f' % hours) + NC + " hours which is...")
68 |         print("                                            " + GREEN + "{}".format('%.2f' % days) + NC + " days which is...")
69 |         print("                                            " + GREEN + "{}".format('%.2f' % years) + NC + " years")
70 | 
71 | 
72 |     else:
73 |         print(YELLOW + "[!]" + NC + " Invalid arguments provided")
74 | 


--------------------------------------------------------------------------------
/Import.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Author Github:   https://github.com/g666gle
 4 | #Author Twitter:  https://twitter.com/g666gle1
 5 | #Date: 1/29/2019
 6 | #Usage: ./Import
 7 | 
 8 | RED='\033[0;31m'
 9 | YELLOW='\033[1;33m'
10 | GREEN='\033[0;32m'
11 | NC='\033[0m'  # No Color
12 | 
13 | printf "${RED}[*]${NC} Starting at $(date)\n"
14 | 
15 | #Checks to see if the user is working out of the BaseQuery directory
16 | if [ "${PWD##*/}" == "BaseQuery" ];then
17 | 	#  Checks to see if the Import directory is there
18 | 	if [ -d ./PutYourDataBasesHere ];then
19 | 		dataDir="$(pwd)"
20 | 
21 | 		#  This loop is checking to see if any new files are in the PutYourDataBasesHere
22 | 		#  directory.If not then there is no reason to decompress and compress everything
23 | 		let i=0  # used to count the amount of files not already imported 
24 | 		declare -a arr
25 | 		 while read -r inputfile;do
26 | 			arr=(${inputfile})
27 | 			file_SHA_sum="$(sha256sum "$dataDir"/PutYourDataBasesHere/"$inputfile" | awk '{print$1}')"
28 | 			#  check to see if the database has already been imported
29 | 			if [ "$(grep "$file_SHA_sum" -c < ./Logs/importedDBS.log)" == "0" ];then
30 | 				let i=i+1
31 | 			fi
32 | 			_constr+="${arr[2]}"
33 | 		done< <(find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/")
34 | 
35 | 		# if there are files that need to be imported
36 | 		if [ $i -ne 0 ];then
37 | 			#  decompress all of the folders before priming and import
38 | 			printf "${YELLOW}[!]${NC} Decompressing all stored data\n"
39 | 			printf "[!] Decompressing all stored data\n" >> ./Logs/ActivityLogs.log
40 | 			./decompress.sh
41 | 			printf "${GREEN}[+]${NC} Finished decompressing!\n"
42 | 			printf "[+] Finished decompressing!\n" >> ./Logs/ActivityLogs.log
43 | 
44 | 			#  Prime the data folder
45 | 			python3 folderPrimer.py
46 | 
47 | 			#  Read each file in the input files, in sorted order
48 | 			find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/" | while read -r inputfile;do
49 | 				file_SHA_sum="$(sha256sum "$dataDir"/PutYourDataBasesHere/"$inputfile" | awk '{print$1}')"
50 | 				#  check to see if the database has already been imported
51 | 				if [ "$(grep "$file_SHA_sum" -c < ./Logs/importedDBS.log)" == "0" ];then
52 | 					#  Call a python script to iterate through the file and sort them
53 | 					python3 pysort.py "$inputfile"
54 | 					printf "${YELLOW}[!] Adding $inputfile to importedDBS.log${NC}\n"
55 | 					echo "$file_SHA_sum" "$(date)" "$inputfile" >> "$dataDir"/Logs/importedDBS.log
56 | 					echo
57 | 				else
58 | 					printf "${YELLOW}[!]${NC} $inputfile SHASUM found in importedDBS.log\n"
59 | 					printf "[!] $inputfile SHASUM found in importedDBS.log\n" >> ./Logs/ActivityLogs.log
60 | 				fi
61 | 			done
62 | 			printf "${YELLOW}[!]${NC} Compressing all data\n"
63 | 			printf "[!] Compressing all data\n" >> ./Logs/ActivityLogs.log
64 | 			#  All data is stored. Time to compress
65 | 			./compress.sh
66 | 			printf "${GREEN}[+]${NC} Finished compressing!\n"
67 | 			printf "[+] Finished compressing!\n" >> ./Logs/ActivityLogs.log
68 | 
69 | 		else # No new files found 
70 | 			echo
71 | 			printf "${RED}ERROR:${NC} No new files found in the 'PutYourDataBasesHere' directory \n"
72 | 			printf "ERROR: No new files found in the 'PutYourDataBasesHere' directory \n" >> ./Logs/ActivityLogs.log
73 | 			
74 | 		fi # check for imported files
75 | 
76 | 	else    #  If the Import directory doesn't exist
77 | 		dataDir=$(pwd)
78 | 		printf "${RED}ERROR: Please make a directory called 'PutYourDataBasesHere' in $dataDir${NC}\n"
79 | 		printf "ERROR: Please make a directory called 'PutYourDataBasesHere' in $dataDir\n" >> ./Logs/ActivityLogs.log
80 | 	fi
81 | else
82 | 	# If the users working directory is not BaseQuery while trying to run the script
83 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
84 | 	printf "ERROR: Please change directories to the BaseQuery root directory\n" >> ./Logs/ActivityLogs.log
85 | fi
86 | echo
87 | printf "${RED}[*]${NC} Completed\n"
88 | printf "[*] Completed\n" >> ./Logs/ActivityLogs.log
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">
  2 |   BaseQuery V1.5
  3 | </h1>
  4 | 
  5 | <p align="center">
  6 |   <a href="https://github.com/g666gle/BaseQuery/blob/master/LICENSE.md">
  7 |     <img src="https://img.shields.io/github/license/g666gle/BaseQuery.svg">
  8 |   </a>
  9 |   <a href="https://github.com/g666gle/BaseQuery/graphs/contributors">
 10 |       <img src="https://img.shields.io/github/contributors/g666gle/BaseQuery.svg">
 11 |   </a>
 12 |   <a href="https://github.com/g666gle/BaseQuery/issues">
 13 |     <img src="https://img.shields.io/github/issues-raw/g666gle/BaseQuery.svg">
 14 |   </a>
 15 |   <a href="https://github.com/g666gle/BaseQuery/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aclosed+">
 16 |       <img src="https://img.shields.io/github/issues-closed-raw/g666gle/BaseQuery.svg">
 17 |   </a>
 18 | </p>
 19 | 
 20 | Your private data is being traded and sold all over the internet as we speak. Tons of leaks come out on a daily basis which can make you feel powerless. The majority of user-passwords and other sensitive information have been posted somewhere on the internet/darknet for any prying eyes to see, whether you like it or not. To take more control of what personal info is out there you can use [Haveibeenpwned](https://haveibeenpwned.com/) to narrow down which breaches your information has been exposed in. This is a great start but what if you want to know exactly what information of yours other people have access to? BaseQuery is an all in one program that makes importing and searching through thousands of data-breaches easy.
 21 | 
 22 | 
 23 | ![basequery_banner](https://user-images.githubusercontent.com/47184892/53661764-272e8380-3c2f-11e9-8303-763cf00c27ab.png)
 24 | ### Features Included:
 25 |  * A 4x nested storage structure
 26 |  * Average import speeds of 12,000+ entries per second (Intel Core i7-7700HQ CPU @ 2.8GHz)
 27 |  * Instantaneous querying system
 28 |  * Facebook's zstd lossless compression algorithm to reduce the size of the data (On average reduces the data to less than 10% of the original size)
 29 |  * Calculate the time all your files will take to import based on your specific hardware
 30 |  * Duplicate data protection
 31 |  * Output all of your findings in a standard format
 32 |  * Email harvesting built-in
 33 | 
 34 | ## Installing
 35 | 
 36 | To Install BaseQuery type the following commands
 37 | 
 38 | ```
 39 | git clone https://github.com/g666gle/BaseQuery.git
 40 | sudo chmod 755 -R BaseQuery/
 41 | cd BaseQuery
 42 | ./dependencies.sh
 43 | ./run.sh
 44 | ```
 45 | 
 46 | 
 47 | ## Getting Started
 48 | 1. Place any databases that you have into the "PutYourDataBasesHere" folder
 49 |     - As of right now, BaseQuery can only accept files in the format where each line is colon seperated "test@example.com:password" or "password:test@example.com"
 50 |     - It doesn't matter if the line formats are mixed up within the same file. Ex) The first line may be "email:password" and the second line can be "password:email"
 51 |     - One entry per line!! 
 52 |     - If you need a better visual there is an example.txt file in the folder "PutYourDataBasesHere"
 53 |     - You should delete the example file before running the program.
 54 | 1. Now that you have all of your files in the correct folder
 55 |     - Open up a terminal in the BaseQuery directory.
 56 |     - Type ./dependencies.sh to install all of the resources needed ( You only need to do this once )
 57 |     - Type ./run.sh to start the program 
 58 |     - **Note that if you are using a laptop make sure it is plugged in. Importing databases uses A LOT of processing power and will make the import 4 times faster on average!**
 59 | 1. Follow the instructions on the screen
 60 |     - That's it, enjoy!
 61 |     - Contact me with any issues.
 62 | 
 63 | ### Import Times Based on Hardware Specifics
 64 | 
 65 | <a href="url"><img src="https://user-images.githubusercontent.com/47184892/53662625-5fcf5c80-3c31-11e9-8be3-a43b01106a7c.PNG" height="183" width="535" ></a>
 66 | 
 67 | 
 68 | ### Query Options
 69 | 
 70 | ![basequery_query](https://user-images.githubusercontent.com/47184892/53662460-f0596d00-3c30-11e9-8ac6-f0b154ad22b7.PNG)
 71 | 
 72 | ***
 73 | ## Prerequisites
 74 | **Note: All of these are automatically installed using the 'dependencies.sh' script**
 75 | 
 76 | ```
 77 | Update packages:    (sudo apt-get update)
 78 | 
 79 | Python Version 3.6+ (sudo apt-get install python3.7)
 80 | Bash 4+
 81 | tar                 (sudo apt-get install tar)
 82 | zstd                (sudo apt-get install zstd)
 83 | xterm               (sudo apt-get install xterm)
 84 | ```
 85 | 
 86 | 
 87 | ## Built With
 88 | 
 89 | * Ubuntu 18.04 bionic
 90 | 
 91 | * Bash Version:
 92 | GNU bash, version 4.4.19(1)-release (x86_64-pc-linux-gnu)
 93 | 
 94 | * Python Version:
 95 | 3.7.1
 96 | 
 97 | ## Authors
 98 | 
 99 | * **G666gle** -  [Github](https://github.com/G666gle), [Twitter](https://twitter.com/g666g1e)
100 | 
101 | 
102 | ## License
103 | 
104 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details
105 | 
106 | ## Disclaimer
107 | 
108 | **READ UP ON YOUR LOCAL LAWS FIRST BEFORE USING THIS PROGRAM. I TAKE NO RESPONSIBILITY FOR ANYTHING YOU DO WITH BASEQUERY. UNDER NO CIRCUMSTANCE SHOULD BASEQUERY BE USED FOR ILLEGAL PURPOSES.**
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #Author Github:   https://github.com/g666gle
  4 | #Author Twitter:  https://twitter.com/g666gle1
  5 | #Date: 1/29/2019
  6 | #Usage: ./run.sh
  7 | 
  8 | RED='\033[0;31m'
  9 | GREEN='\033[0;32m'
 10 | YELLOW='\033[1;33m'
 11 | NC='\033[0m'  # No Color
 12 | 
 13 | #change the window size to fit the art
 14 | # requires xterm as a dependency 
 15 | resize -s 25 134
 16 | 
 17 | #ctrl+C
 18 | trap finish INT
 19 | 
 20 | function finish {
 21 | 	printf "[*] Exit Trap Reached (CTRL+C)\n" >> ./Logs/ActivityLogs.log
 22 | 	clear
 23 | 	exit
 24 | }
 25 | 
 26 | clear
 27 | echo "           _               _                  _            _               _       _                  _            _    _        _   "
 28 | echo "          / /\            / /\               / /\         /\ \            /\ \    /\_\               /\ \         /\ \ /\ \     /\_\ "
 29 | echo "         / /  \          / /  \             / /  \       /  \ \          /  \ \  / / /         _    /  \ \       /  \ \\\\ \ \   / / / "
 30 | echo "        / / /\ \        / / /\ \           / / /\ \__   / /\ \ \        / /\ \ \ \ \ \__      /\_\ / /\ \ \     / /\ \ \\\\ \ \_/ / /  "
 31 | echo "       / / /\ \ \      / / /\ \ \         / / /\ \___\ / / /\ \_\      / / /\ \ \ \ \___\    / / // / /\ \_\   / / /\ \_\\\\ \___/ /   "
 32 | echo "      / / /\ \_\ \    / / /  \ \ \        \ \ \ \/___// /_/_ \/_/     / / /  \ \_\ \__  /   / / // /_/_ \/_/  / / /_/ / / \ \ \_/    "
 33 | echo "     / / /\ \ \___\  / / /___/ /\ \        \ \ \     / /____/\       / / / _ / / / / / /   / / // /____/\    / / /__\/ /   \ \ \     "
 34 | echo "    / / /  \ \ \__/ / / /_____/ /\ \   _    \ \ \   / /\____\/      / / / /\ \/ / / / /   / / // /\____\/   / / /_____/     \ \ \    "
 35 | echo "   / / /____\_\ \  / /_________/\ \ \ /_/\__/ / /  / / /______     / / /__\ \ \/ / / /___/ / // / /______  / / /\ \ \        \ \ \   "
 36 | echo "  / / /__________\/ / /_       __\ \_\\\\ \/___/ /  / / /_______\   / / /____\ \ \/ / /____\/ // / /_______\/ / /  \ \ \        \ \_\  "
 37 | echo "  \/_____________/\_\___\     /____/_/ \_____\/   \/__________/   \/________\_\/\/_________/ \/__________/\/_/    \_\/         \/_/  "
 38 | echo 
 39 | 
 40 | 
 41 | #Make sure that the user is in the BaseQuery directory
 42 | if [ "${PWD##*/}" == "BaseQuery" ];then
 43 | 
 44 | 	#Log entry
 45 | 	echo "[*] Executed run.sh	[ $(date) ]" >> ./Logs/ActivityLogs.log
 46 | 
 47 | 	while true;do
 48 | 		echo
 49 | 		echo "Options:"
 50 | 		echo "		[1] Import Your data"
 51 | 		echo "		[2] Calculate Import Time"
 52 | 		echo "		[3] Query"
 53 | 		echo "		[4] Harvest Email Addresses"
 54 | 		echo "		[5] Message"
 55 | 		echo "		[Q] Quit"
 56 | 		echo
 57 | 		read -p "Option Number-> " answer
 58 | 
 59 | 		#  Check to see if the answer is only letters
 60 | 		if [[ "$answer" =~ ^[a-zA-Z]+$ ]];then
 61 | 			if [[ "$answer" == [Qq] ]];then
 62 | 				# Log entry
 63 | 				echo "[*] run.sh COMMAND 'q'	[ $(date) ]" >> ./Logs/ActivityLogs.log
 64 | 				echo >> ./Logs/ActivityLogs.log
 65 | 				clear
 66 | 				exit
 67 | 			fi
 68 | 
 69 | 		#  Check to see if the answer is only numbers
 70 | 		elif [[ "$answer" =~ ^[0-9]+$ ]];then
 71 | 			
 72 | 			if [ "$answer" -eq 1 ];then
 73 | 				#Log entry
 74 | 				echo "[+] run.sh COMMAND '1'	[ $(date) ]" >> ./Logs/ActivityLogs.log
 75 | 				echo "[!] Executing ./Import.sh	[ $(date) ]" >> ./Logs/ActivityLogs.log
 76 | 				start=$SECONDS
 77 | 				./Import.sh
 78 | 				stop=$SECONDS
 79 | 				difference=$(( stop - start ))
 80 | 				printf "${GREEN}[!]${NC} The entire import including compression and decompression took $difference seconds\n"
 81 | 				echo
 82 | 
 83 | 			elif [ "$answer" -eq 2 ];then
 84 | 				echo
 85 | 				printf "${YELLOW}Make sure you have at least one file with at least 20,000 lines in PutYourDataBasesHere/${NC}\n"
 86 | 				echo
 87 | 
 88 | 				while true;do
 89 | 					echo "Please enter the number of lines you wish to import... "
 90 | 					read -p "('q' to quit) Lines>> " num_lines
 91 | 					echo
 92 | 					if [[ $num_lines != [Qq] ]]; then
 93 | 						if [[ "$num_lines" =~ ^[0-9]+$ ]];then
 94 | 							# Log Entry
 95 | 							echo "[+] run.sh COMMAND '2' arg [$num_lines]	[ $(date) ]" >> ./Logs/ActivityLogs.log
 96 | 							path="$(pwd)"
 97 | 							while read -r inputfile;do
 98 | 								num="$(wc -c < "$path"/PutYourDataBasesHere/"$inputfile")"
 99 | 								if [ "$num" -ge 20000 ];then
100 | 									printf "${GREEN}[+]${NC} Decompressing files\n"
101 | 									./decompress.sh
102 | 									printf "${GREEN}[+]${NC} Starting Benchmark!\n"
103 | 									python3 benchmark.py "$inputfile" "$num_lines"
104 | 									break
105 | 								fi
106 | 
107 | 							done< <(find PutYourDataBasesHere -type f -exec echo {} \; | cut -f 2- -d "/")
108 | 
109 | 						else
110 | 							printf "${YELLOW}[!]${NC} Invalid input\n"
111 | 						fi
112 | 						echo
113 | 
114 | 					else # If the user enters q or Q
115 | 						printf "${YELLOW}[!]${NC} Exiting Calculate Import Time\n"
116 | 						printf "${GREEN}[+]${NC} Compressing files\n"
117 | 						./compress.sh
118 | 						echo
119 | 						printf "${GREEN}[!]${NC} Compression completed!\n"
120 | 						break
121 | 					fi
122 | 				done
123 | 				
124 | 
125 | 			elif [ "$answer" -eq 3 ];then
126 | 				echo
127 | 				printf "Please enter enter an email address in one of the following formats \n"
128 | 				printf "	ex) test@example.com			 [ Searches for all passwords associated with this address ]\n"
129 | 				printf "	ex) test@				 [ Searches for all passwords for any email addresses starting with this username ]\n"
130 | 				printf "	ex) @example.com			 [ Searches for all passwords for any email addresses ending with this domain name ]\n"
131 | 				printf "	ex) /home/user/Desktop/email_list.txt	 [ Searches line by line through the file for all passwords for each email address ]\n\n"
132 | 				while true;do
133 | 					read -p "('q' to quit) Email>> " email
134 | 					if [[ $email != [Qq] ]]; then
135 | 						if [ "$email" != "" ];then
136 | 							# Log Entry
137 | 							echo "[+] run.sh COMMAND '3' arg [$email]	[ $(date) ]" >> ./Logs/ActivityLogs.log
138 | 							echo "[!] Executing query.sh	[ $(date) ]" >> ./Logs/ActivityLogs.log
139 | 							./query.sh "$email"
140 | 							echo
141 | 						else
142 | 							continue
143 | 						fi
144 | 					else
145 | 						echo
146 | 						printf "${YELLOW}[!]${NC} Exiting Query\n"
147 | 						break
148 | 					fi
149 | 				done
150 | 
151 | 				#  Compress all of the data
152 | 				echo 
153 | 				printf "${YELLOW}[!]${NC} Compressing all stored data\n"
154 | 				./compress.sh
155 | 				printf "${GREEN}[+]${NC} Finished compressing!\n"
156 | 				echo
157 | 				
158 | 			elif [ "$answer" -eq 4 ];then
159 | 				echo
160 | 				echo "[+] run.sh COMMAND '4'	[ $(date) ]" >> ./Logs/ActivityLogs.log
161 | 				printf "${GREEN}Code taken from https://github.com/laramies/theHarvester${NC}\n"
162 | 				printf "${GREEN}		   Go check him out${NC}\n"
163 | 				# Check if theHarvester is already installed
164 | 				if ! [ -d ./theHarvester ];then
165 | 					printf "${YELLOW}[!]${NC} Installing theHarvester\n"
166 | 					git clone https://github.com/laramies/theHarvester.git #&> /dev/null
167 | 				fi
168 | 
169 | 				# Install all of the requirements
170 | 				printf "${YELLOW}[!]${NC} Updating requirements\n"
171 | 				sudo python3 -m pip install -r ./theHarvester/requirements.txt #&> /dev/null
172 | 
173 | 				printf "${YELLOW}[!] PLACE ANY API KEYS IN $(pwd)/theHarvester/api-keys.yaml${NC}\n"		
174 | 				echo "Domain name? ex) google.com"
175 | 				read -p "> " domain
176 | 				echo "Limit for the amount of email addresses? ex) 500"
177 | 				read -p "> " limit
178 | 				printf "
179 | 	${RED}source:${NC} baidu, bing, bingapi, censys, crtsh, cymon,
180 | 	dogpile, duckduckgo, google, googleCSE, google-
181 | 	certificates, google-profiles, hunter, intelx,
182 | 	linkedin, netcraft, pgp, securityTrails, threatcrowd,
183 | 	trello, twitter, vhost, virustotal, yahoo, all\n"
184 | 				echo
185 | 				echo "Source? ex) all"
186 | 				read -p "> " source 
187 | 				sudo python3 ./theHarvester/theHarvester.py -d "$domain" -l "$limit" -b "$source"
188 | 				echo
189 | 				printf "${RED}COPY ONLY THE EMAIL ADDRESSES AND SAVE THEM TO A .TXT FILE${NC}\n"
190 | 				printf "${RED}YOU CAN USE THE TEXT FILE AS INPUT TO QUERY ALL OF THEM AT ONCE${NC}\n"
191 | 				echo
192 | 
193 | 			elif [ "$answer" -eq 5 ];then
194 | 				# Log Entry
195 | 				echo "[+] run.sh COMMAND '5'	[ $(date) ]" >> ./Logs/ActivityLogs.log
196 | 				echo
197 | 				echo "Hey... thanks for downloading Base Query, I've spent way too many hours coding this"
198 | 				echo "Base Query is a OSINT tool to help you organize and query all those pesky databases you have laying around"
199 | 				echo "With a quadruple nested structure and a careful design your querys should be INSTANTANEOUS! Or ya know like really fast."
200 | 				echo "Something broken? Check the logs and then message me!"
201 | 				echo "For more information regarding use check the README.md file"
202 | 				echo "Found a bug? Just want to talk? Message me on GitHub or Twitter https://github.com/g666gle"
203 | 				echo "					                        https://twitter.com/g666gle1"
204 | 				echo "													         V1.5"
205 | 				echo 
206 | 
207 | 			fi
208 | 
209 | 		fi
210 | 		read -sp "Press Enter to continue..." 
211 | 		clear
212 | 		
213 | 	done
214 | else
215 | 	#Log entry
216 | 	echo "[!] ERROR: run.sh NOT executed in the BaseQuery directory; Exiting	[ $(date) ]" >> ./Logs/ActivityLogs.log
217 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
218 | fi
219 | 
220 | 
221 | 


--------------------------------------------------------------------------------
/search.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #Author Github:   https://github.com/g666gle
  4 | #Author Twitter:  https://twitter.com/g666gle1
  5 | #Date: 1/29/2019
  6 | #Usage: ./search.sh test@example.com <optional filename>
  7 | #Usage: ./search.sh test@ <optional filename>
  8 | #Usage: ./search.sh @example.com <optional filename>
  9 | 
 10 | 
 11 | RED='\033[0;31m'
 12 | GREEN='\033[0;32m'
 13 | YELLOW='\033[1;33m'
 14 | NC='\033[0m'  # No Color
 15 | 
 16 | # Makes sure the user is in the BaseQuery dir
 17 | if [ "${PWD##*/}" == "BaseQuery" ];then
 18 | 	# Grab everything before the @ sign
 19 | 	user_name=$(echo "$1" | cut -d @ -f 1 | awk '{print tolower($0)}')
 20 | 	email=$(echo "$1" | cut -d : -f 1 | awk '{print tolower($0)}')
 21 | 	check_for_at=${1:0:1}
 22 | 
 23 | 	# Check to see if the user entered in a domain ex) @google.com
 24 | 	if [ "$check_for_at" == "@" ];then
 25 | 		read -p "Are you sure you want to find every possible $1 address? This might take a while! [y/n] " answer
 26 | 		# Checks input
 27 | 		while [[ "$answer" != [YyNn] ]];do
 28 | 			printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n"
 29 | 			read -p "Are you sure you want to find every possible $1 address? This might take a while! [y/n] " answer
 30 | 		done
 31 | 		if [[ "$answer" == [Yy] ]];then	# Checks if the user is sure
 32 | 			read -p "Output to a file? [y/n] " out_to_file 
 33 | 			# Checks input
 34 | 			while [[ "$out_to_file" != [YyNn] ]];do
 35 | 				printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n"
 36 | 				read -p "Output to a file? [y/n] " out_to_file 
 37 | 			done
 38 | 			# Decompress all files
 39 | 			printf "${GREEN}[+]${NC} Decompressing files\n"
 40 | 			./decompress.sh
 41 | 			
 42 | 			printf "${GREEN}[+]${NC} Starting search!\n"
 43 | 			if [[ "$out_to_file" == [Yy] ]];then
 44 | 				if ! [ -d ./OutputFiles ];then
 45 | 					mkdir OutputFiles
 46 | 				fi
 47 | 				printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/$1_output.txt${NC}\n"
 48 | 				printf "${GREEN}[+]${NC} Please wait this could take a few minutes!\n"
 49 | 			fi
 50 | 			# Start iterating through every file in the database and grep for the domain name
 51 | 			for first_nest_dir in data/*;do # data/0
 52 | 				# More efficient than executing this statement twice in the if statement
 53 | 				check="$(echo $first_nest_dir | cut -f 2 -d "/")"  
 54 | 				# checks to see if the dir is not 0UTLIERS or NOTVALID
 55 | 				if [[ "$(ls -A $first_nest_dir)" ]];then
 56 | 					if [[ $check != "0UTLIERS" && $check != "NOTVALID" ]];then 
 57 | 						for second_nest_dir in $first_nest_dir/*;do # data/0/0
 58 | 							if [[ "$(ls -A $second_nest_dir)" ]];then
 59 | 								# checks to see if the dir is 0UTLIERS and if the second dir is not empty
 60 | 								if [[ "$(echo $second_nest_dir | cut -f 3 -d "/")" != "0UTLIERS" ]];then  
 61 | 									for third_nest_dir in $second_nest_dir/*;do # data/0/0/0
 62 | 										# checks to see if the dir is 0UTLIERS and if the third dir is not empty
 63 | 										if [[ "$(ls -A $third_nest_dir)" ]];then 
 64 | 											if [[ "$(echo $third_nest_dir | cut -f 4 -d "/")" != "0UTLIERS" ]];then 
 65 | 												for fourth_dir in $third_nest_dir/*;do # data/0/0/0/a.txt <Could be the last 0UTLIERS dir>
 66 | 													# Check to see if we have the 0UTLIERS dir and if the fourth dir is not empty
 67 | 													if [[ "$(ls -A $fourth_dir)" ]];then
 68 | 														if [[ "$(echo $fourth_dir | cut -f 5 -d "/")" != "0UTLIERS" ]];then #  data/0/0/0/0UTLIERS/
 69 | 															# Loop through the files in the dir and grep for the domain name
 70 | 															cat ./"$fourth_dir" | grep -i "$1" | while read -r Line;do 
 71 | 																user_name="$(echo $Line | cut -f 1 -d ":")"
 72 | 																Password="$(echo $Line | cut -f 2- -d ":")"
 73 | 																# check if the user wants the output to a file
 74 | 																if [[ "$out_to_file" == [Yy] ]];then 
 75 | 																	echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
 76 | 																else # Send the output to the console
 77 | 																	printf "$user_name${RED}:"$Password"${NC}\n"
 78 | 																fi
 79 | 															done
 80 | 														else # If the fourth dir is 0UTLIERS dir
 81 | 															# Iterate through each file in the 0UTLIER dir
 82 | 															for file in $fourth_dir/*;do # data/0UTLIERS
 83 | 																# Loop through the dir and grep for the domain name
 84 | 																cat ./"$file" | grep -i "$1" | while read -r Line;do
 85 | 																	user_name="$(echo $Line | cut -f 1 -d ":")"
 86 | 																	Password="$(echo $Line | cut -f 2- -d ":")"
 87 | 																	# check if the user wants the output to a file
 88 | 																	if [[ "$out_to_file" == [Yy] ]];then 
 89 | 																		echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
 90 | 																	else # Send the output to the console
 91 | 																		printf "$user_name${RED}:"$Password"${NC}\n"
 92 | 																	fi
 93 | 																done
 94 | 															done
 95 | 														fi #fourth
 96 | 													fi
 97 | 												done
 98 | 											else # If the third dir is 0UTLIERS dir
 99 | 												# Iterate through each file
100 | 												for file in $third_nest_dir/*;do # data/0UTLIERS
101 | 													# Loop through the dir and grep for the domain name
102 | 													cat ./"$file" | grep -i "$1" | while read -r Line;do
103 | 														user_name="$(echo $Line | cut -f 1 -d ":")"
104 | 														Password="$(echo $Line | cut -f 2- -d ":")"
105 | 														# check if the user wants the output to a file
106 | 														if [[ "$out_to_file" == [Yy] ]];then 
107 | 															echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
108 | 														else # Send the output to the console
109 | 															printf "$user_name${RED}:"$Password"${NC}\n"
110 | 														fi
111 | 													done
112 | 												done
113 | 											fi # third
114 | 										fi
115 | 									done
116 | 								else # If the second dir is 0UTLIERS dir
117 | 									# Iterate through each file
118 | 									for file in $second_nest_dir/*;do # data/0UTLIERS
119 | 										# Loop through the dir and grep for the domain name
120 | 										cat ./"$file" | grep -i "$1" | while read -r Line;do
121 | 											user_name="$(echo $Line | cut -f 1 -d ":")"
122 | 											Password="$(echo $Line | cut -f 2- -d ":")"
123 | 											# check if the user wants the output to a file
124 | 											if [[ "$out_to_file" == [Yy] ]];then 
125 | 												echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
126 | 											else # Send the output to the console
127 | 												printf "$user_name${RED}:"$Password"${NC}\n"
128 | 											fi
129 | 										done
130 | 									done
131 | 								fi # second
132 | 							fi
133 | 						done
134 | 					else # If the first dir is 0UTLIERS dir
135 | 						# Iterate through each file
136 | 						for file in $first_nest_dir/*;do # data/0UTLIERS
137 | 							# Loop through the dir and grep for the domain name
138 | 							cat ./"$file" | grep -i "$1" | while read -r Line;do
139 | 								user_name="$(echo $Line | cut -f 1 -d ":")"
140 | 								Password="$(echo $Line | cut -f 2- -d ":")"
141 | 								# check if the user wants the output to a file
142 | 								if [[ "$out_to_file" == [Yy] ]];then 
143 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
144 | 								else # Send the output to the console
145 | 									printf "$user_name${RED}:"$Password"${NC}\n"
146 | 								fi
147 | 							done
148 | 						done					
149 | 					fi # first
150 | 				fi
151 | 			done
152 | 		else
153 | 			printf "${YELLOW}[!]${NC} Aborting!\n"
154 | 		fi
155 | 		
156 | 		printf "${YELLOW}[!]${NC} Finished search!\n"
157 | 		exit #end and exit	
158 | 	fi # End of checking for domain
159 | 
160 | 	#########################################################################
161 | 	# The above code deals with querying every file for a specific domain	#
162 | 	# The below code deals with querying a specific username or file	#
163 | 	#########################################################################
164 | 
165 | 	# Deals with all the cases of having a file vs stdin
166 | 	out_to_file="N"
167 | 	#  Check to see if the user is running a file or just commandline input
168 | 	if [ $# -ge 2 ];then
169 | 		out_to_file="YF" # Yes implicit from entering a file
170 | 	else  # The user is not running a file so ask them if they want to output to a file
171 | 		read -p "Output to a file? [y/n] " out_to_file 
172 | 		# Checks input
173 | 		while [[ "$out_to_file" != [YyNn] ]];do
174 | 			printf "${YELLOW}[!]${NC} Please enter either \"y\" or \"n\"!\n"
175 | 			read -p "Output to a file? [y/n] " out_to_file 
176 | 		done
177 | 		# Informing the user
178 | 		printf "${GREEN}[+]${NC} Starting search!\n"
179 | 		if [[ "$out_to_file" == [Yy] ]];then
180 | 			# Make the dir if it doesn't exist
181 | 			if ! [ -d ./OutputFiles ];then
182 | 				mkdir OutputFiles
183 | 			fi
184 | 			printf "${GREEN}[+]${NC} Outputting all results to ${GREEN}./OutputFiles/$1_output.txt${NC}\n"
185 | 		fi
186 | 	fi
187 | 
188 | 
189 | 	#  Check to make sure the user name is at least 4 and the email has a @
190 | 	if [[ ${#user_name} -ge 4 ]] && [[ "$email" == *"@"* ]];then	
191 | 		# Grab each individual character
192 | 		first_char=${user_name:0:1}  # {variable name: starting position : how many letters}
193 | 		second_char=${user_name:1:1}
194 | 		third_char=${user_name:2:1}
195 | 		fourth_char=${user_name:3:1}
196 | 		
197 | 		#  Check to see if the folder is compressed
198 | 		if [ -e ./data/"$first_char".tar.zst ];then
199 | 			#  Decompress the data
200 | 			#printf "${YELLOW}[!]${NC} Decompressing ./data/"$first_char".tar.zst\n"
201 | 			./decompress.sh "$first_char".tar.zst > /dev/null
202 | 			#printf "${GREEN}[+]${NC} Finished decompressing!\n"
203 | 			#printf "${GREEN}[+]${NC} Starting Search!\n"
204 | 			#echo
205 | 		fi
206 | 
207 | 		#  Check the first directory
208 | 		if [ -d ./data/"$first_char" ];then
209 | 			#  Check the second directory
210 | 			if [ -d  ./data/"$first_char"/"$second_char" ];then
211 | 				#  Check the third directory
212 | 				if [ -d ./data/"$first_char"/"$second_char"/"$third_char" ];then
213 | 					if [[ "$out_to_file" == [Nn] ]];then
214 | 						printf "${GREEN}Email Address: "$email"${NC}\n"
215 | 					fi
216 | 					#  Check to see if the file exists
217 | 					if [ -e ./data/"$first_char"/"$second_char"/"$third_char"/"$fourth_char".txt ];then
218 | 						#  Open the file and search for the email address then only keep the passwords, iterate through the passwords and echo then
219 | 						cat ./data/"$first_char"/"$second_char"/"$third_char"/"$fourth_char".txt | grep -i "^$email" | while read -r Line;do
220 | 							user_name="$(echo $Line | cut -f 1 -d ":")"
221 | 							Password="$(echo $Line | cut -f 2- -d ":")"
222 | 							# check if the user wants the output to a file
223 | 							if [[ "$out_to_file" == [Yy] ]];then 
224 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
225 | 							elif [ "$out_to_file" == "YF" ];then
226 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
227 | 							else # Send the output to the console
228 | 								printf "$user_name${RED}:"$Password"${NC}\n"
229 | 							fi
230 | 						done
231 | 						
232 | 						#  Check to see if the email is in the NOT VALID file
233 | 						if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
234 | 							cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
235 | 								user_name="$(echo $Line | cut -f 1 -d ":")"
236 | 								Password="$(echo $Line | cut -f 2- -d ":")"
237 | 								# check if the user wants the output to a file
238 | 								if [[ "$out_to_file" == [Yy] ]];then 
239 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
240 | 								elif [ "$out_to_file" == "YF" ];then
241 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
242 | 								else # Send the output to the console
243 | 									printf "$user_name${RED}:"$Password"${NC}\n"
244 | 								fi
245 | 							done	
246 | 						fi
247 | 					else
248 | 						#  The file does not exists
249 | 						#  Check to make sure the directory exists and the file exists for 0UTLIERS
250 | 						if [[ -d ./data/$first_char/$second_char/$third_char/0UTLIERS && -e ./data/$first_char/$second_char/$third_char/0UTLIERS/0utliers.txt ]];then
251 | 							cat ./data/"$first_char"/"$second_char"/"$third_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do
252 | 								user_name="$(echo $Line | cut -f 1 -d ":")"
253 | 								Password="$(echo $Line | cut -f 2- -d ":")"
254 | 								# check if the user wants the output to a file
255 | 								if [[ "$out_to_file" == [Yy] ]];then 
256 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
257 | 								elif [ "$out_to_file" == "YF" ];then
258 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
259 | 								else # Send the output to the console
260 | 									printf "$user_name${RED}:"$Password"${NC}\n"
261 | 								fi
262 | 							done	
263 | 						fi
264 | 
265 | 						#  Check to see if the email is in the NOT VALID file
266 | 						if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
267 | 							cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
268 | 								user_name="$(echo $Line | cut -f 1 -d ":")"
269 | 								Password="$(echo $Line | cut -f 2- -d ":")"
270 | 								# check if the user wants the output to a file
271 | 								if [[ "$out_to_file" == [Yy] ]];then 
272 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
273 | 								elif [ "$out_to_file" == "YF" ];then
274 | 									echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
275 | 								else # Send the output to the console
276 | 									printf "$user_name${RED}:"$Password"${NC}\n"
277 | 								fi
278 | 							done	
279 | 						fi					
280 | 					fi
281 | 				else
282 | 					if [[ "$out_to_file" == [Nn] ]];then
283 | 						printf "${GREEN}Email Address: "$email"${NC}\n"
284 | 					fi
285 | 					#  The third letter directory does not exists
286 | 					if [[ -d ./data/$first_char/$second_char/0UTLIERS && -e ./data/$first_char/$second_char/0UTLIERS/0utliers.txt ]];then
287 | 						cat ./data/"$first_char"/"$second_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do
288 | 							user_name="$(echo $Line | cut -f 1 -d ":")"
289 | 							Password="$(echo $Line | cut -f 2- -d ":")"
290 | 							# check if the user wants the output to a file
291 | 							if [[ "$out_to_file" == [Yy] ]];then 
292 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
293 | 							elif [ "$out_to_file" == "YF" ];then
294 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
295 | 							else # Send the output to the console
296 | 								printf "$user_name${RED}:"$Password"${NC}\n"
297 | 							fi
298 | 						done	
299 | 					fi
300 | 
301 | 					#  Check to see if the email is in the NOT VALID file
302 | 					if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
303 | 						cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
304 | 							user_name="$(echo $Line | cut -f 1 -d ":")"
305 | 							Password="$(echo $Line | cut -f 2- -d ":")"
306 | 							# check if the user wants the output to a file
307 | 							if [[ "$out_to_file" == [Yy] ]];then 
308 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
309 | 							elif [ "$out_to_file" == "YF" ];then
310 | 								echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
311 | 							else # Send the output to the console
312 | 								printf "$user_name${RED}:"$Password"${NC}\n"
313 | 							fi
314 | 						done	
315 | 					fi
316 | 				fi
317 | 			else
318 | 				if [[ "$out_to_file" == [Nn] ]];then
319 | 					printf "${GREEN}Email Address: "$email"${NC}\n"
320 | 				fi
321 | 				#  The second letter directory does not exists
322 | 				if [[ -d ./data/$first_char/0UTLIERS && -e ./data/$first_char/0UTLIERS/0utliers.txt ]];then
323 | 					cat ./data/"$first_char"/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do
324 | 						user_name="$(echo $Line | cut -f 1 -d ":")"
325 | 						Password="$(echo $Line | cut -f 2- -d ":")"
326 | 						# check if the user wants the output to a file
327 | 						if [[ "$out_to_file" == [Yy] ]];then 
328 | 							echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
329 | 						elif [ "$out_to_file" == "YF" ];then
330 | 							echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
331 | 						else # Send the output to the console
332 | 							printf "$user_name${RED}:"$Password"${NC}\n"
333 | 						fi
334 | 					done	
335 | 				fi
336 | 
337 | 				#  Check to see if the email is in the NOT VALID file
338 | 				if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
339 | 					cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
340 | 						user_name="$(echo $Line | cut -f 1 -d ":")"
341 | 						Password="$(echo $Line | cut -f 2- -d ":")"
342 | 						# check if the user wants the output to a file
343 | 						if [[ "$out_to_file" == [Yy] ]];then 
344 | 							echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
345 | 						elif [ "$out_to_file" == "YF" ];then
346 | 							echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
347 | 						else # Send the output to the console
348 | 							printf "$user_name${RED}:"$Password"${NC}\n"
349 | 						fi
350 | 					done	
351 | 				fi
352 | 			fi
353 | 		else
354 | 			if [[ "$out_to_file" == [Nn] ]];then
355 | 				printf "${GREEN}Email Address: "$email"${NC}\n"
356 | 			fi
357 | 			#  The first letter directory does not exists
358 | 			if [[ -d ./data/0UTLIERS && -e ./data/0UTLIERS/0utliers.txt ]];then
359 | 				cat ./data/0UTLIERS/0utliers.txt | grep -i "^$email" | while read -r Line;do
360 | 					user_name="$(echo $Line | cut -f 1 -d ":")"
361 | 					Password="$(echo $Line | cut -f 2- -d ":")"
362 | 					# check if the user wants the output to a file
363 | 					if [[ "$out_to_file" == [Yy] ]];then 
364 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
365 | 					elif [ "$out_to_file" == "YF" ];then
366 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
367 | 					else # Send the output to the console
368 | 						printf "$user_name${RED}:"$Password"${NC}\n"
369 | 					fi
370 | 				done	
371 | 			fi
372 | 
373 | 			#  Check to see if the email is in the NOT VALID file
374 | 			if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
375 | 				cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
376 | 					user_name="$(echo $Line | cut -f 1 -d ":")"
377 | 					Password="$(echo $Line | cut -f 2- -d ":")"
378 | 					# check if the user wants the output to a file
379 | 					if [[ "$out_to_file" == [Yy] ]];then 
380 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
381 | 					elif [ "$out_to_file" == "YF" ];then
382 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
383 | 					else # Send the output to the console
384 | 						printf "$user_name${RED}:"$Password"${NC}\n"
385 | 					fi
386 | 				done	
387 | 			fi
388 | 		fi
389 | 	else  # If not a valid address
390 | 		first_char=${user_name:0:1}  # {variable name: starting position : how many letters}
391 | 
392 | 		#  Check to see if the folder is compressed
393 | 		if [ -e ./data/"$first_char".tar.zst ];then
394 | 			./decompress.sh "$first_char".tar.zst > /dev/null
395 | 		fi
396 | 		#  Uncompresses NOTVALID
397 | 		if [ -e ./data/NOTVALID.tar.zst ];then
398 | 			./decompress.sh NOTVALID.tar.zst > /dev/null
399 | 		fi
400 | 		# Supreses output
401 | 		if [[ "$out_to_file" == [Nn] ]];then
402 | 			printf "${GREEN}Email Address: "$email"${NC}\n"
403 | 		fi
404 | 
405 | 		# Checks if the email has an @ 
406 | 		if [[ $email == *"@"* ]];then
407 | 			#  The username is either not >= 4 or the email doesn't contain an @
408 | 			#  Check to see if the email is in the NOT VALID file
409 | 			if [[ -d ./data/NOTVALID && -e ./data/NOTVALID/FAILED_TEST.txt ]];then
410 | 				cat ./data/NOTVALID/FAILED_TEST.txt | grep -i "^$email" | while read -r Line;do
411 | 					user_name="$(echo $Line | cut -f 1 -d ":")"
412 | 					Password="$(echo $Line | cut -f 2- -d ":")"
413 | 					# check if the user wants the output to a file
414 | 					if [[ "$out_to_file" == [Yy] ]];then 
415 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$1"_output.txt
416 | 					elif [ "$out_to_file" == "YF" ];then
417 | 						echo  ""$user_name":"$Password"" >> ./OutputFiles/"$2"_output.txt
418 | 					else # Send the output to the console
419 | 						printf "$user_name${RED}:"$Password"${NC}\n"
420 | 					fi
421 | 				done	
422 | 			fi
423 | 		else
424 | 			printf "${YELLOW}[!]${NC} Please enter one email address or a file with one email address per line\n"
425 | 		fi
426 | 	fi
427 | 
428 | else
429 | 	printf "${RED}ERROR: Please change directories to the BaseQuery root directory${NC}\n"
430 | fi
431 | 
432 | 
433 | 
434 | 
435 | 
436 | 


--------------------------------------------------------------------------------
/pysort.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import time
  5 | import mmap
  6 | 
  7 | """
  8 | Author Github:   https://github.com/g666gle
  9 | Author Twitter:  https://twitter.com/g666gle1
 10 | Date:            2/16/2019
 11 | Description:     Takes in one file at a time as command line input. processes each line in the file and places the
 12 |                  information into the correct subdirectory of the data folder.
 13 | Usage:           python3 pysort.py file.txt
 14 | Version:	     1.5.0
 15 | Python Version:  3.7.1
 16 | """
 17 | 
 18 | args = sys.argv
 19 | 
 20 | # Need TODO
 21 | # Option to choose certain dirs to check for @gmail.com
 22 | # Make everything check NOTVALID just incase
 23 | # Automatically find the amount of lines in import using wc
 24 | # Change theharvester out for hunter.io api
 25 | # TODO make an option for counting the number of files
 26 | # Make check import time, import into a seperate folder and then delete it for more accurate results so you dont have to compress or decompress
 27 | # TODO fix the percentage in compress ( * by 100)
 28 | # TODO PutYourDataBasesHere for an optional HDD
 29 | # TODO and store your data on a HDD
 30 | # TODO Add support for SQL vbull CSV Json
 31 | # TODO take the SHA256 hash of the data folder before compression and after decompression
 32 | 
 33 | 
 34 | 
 35 | # FIXED
 36 | # Check bash script . when inputing a file make sure @gmail.com exports results to the file
 37 | # TODO skip .sql .csv .json files
 38 | # TODO make an output to a file option for query
 39 | # Fixed the harvester (option 4)
 40 | # Take out the second option for the email harvesting
 41 | # TODO make lookup by company name. @gmail.com
 42 | # TODO add to readme instructions to install dependencies
 43 | # Improved query to output all user information
 44 | # TODO add an install.sh script
 45 | # TODO extra checks for duplicate data
 46 | # TODO make the expected time more tailored to users hardware
 47 | # TODO automatically set the window size
 48 | # Added dependencies
 49 | # TODO uncompress all for email files; uncompress only the folder you need for one email
 50 | # TODO make another log file for usage
 51 | # TODO fix bug for checking duplicates
 52 | # added exit trap
 53 | # added comments
 54 | # fixed duplicate output bug when using Query
 55 | # TODO show how much was compressed from zstd
 56 | # TODO walk through database directory
 57 | # added support to query all possible entries
 58 | # continuous query prompt
 59 | # TODO fixed bug where spaces in import files and directories could break parts of the program
 60 | # added extra log info
 61 | # add check to see if the PutYourDatabasesHere folder has any new files before decompressing. Saving time
 62 | 
 63 | 
 64 | 
 65 | def check_duplicate(full_file_path, line):
 66 |     """
 67 |     This function takes in a path to the file and the specific line we want to check for duplicates with. First the file
 68 |     is checked to make sure it isn't empty, then the file is opened as a binary so we can store the lines as a mmap obj.
 69 |     Next if the line is a duplicate then False is returned else True
 70 |     :param full_file_path: Path to the file
 71 |     :param line: The line being checked
 72 |     :return: True if the line should be written to the file; else False
 73 |     """
 74 |     #  Check to see if the file is not empty
 75 |     if not os.stat(full_file_path).st_size == 0:
 76 |         #  Open the file as a binary file and store it in a mmap obj
 77 |         with open(full_file_path, 'rb', 0) as fp, mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) as s:
 78 |             #  Check to see if the line already exists in the file
 79 |             if s.find(str.encode(line)) != -1:
 80 |                 return False  # string is in file so do not re-write it
 81 |             return True  # string is not in file so write it to the file
 82 |     return True  # Write to the file
 83 | 
 84 | 
 85 | def place_data(line, path):
 86 |     """
 87 |     This function takes in the line of the current file and the root path to the BaseQuery directory. Checks the format
 88 |     of the file to make sure each line is in the email:password format. Then determines the depth of the correct characters
 89 |     ex) ex|ample@gmail.com  ---> would result in a depth of 2.  Then checking each directory to see if it already exists
 90 |     the username:password combo is correctly placed into a easy to query file. If a invalid character is determined in the
 91 |     first 4 chars then it will be put in a '0UTLIERS.txt' file.
 92 |     :param line: email:password
 93 |     :param path: full path to file
 94 |     :return: Either a 1 or a 0 depending on if a line has been written or not
 95 |     """
 96 |     #  Check if the line starts with a :
 97 |     if line[0] == ":":
 98 |         #  strip the colon from the line
 99 |         line = line[1:]
100 |     emailPaswd = line.split(':', 2)
101 | 
102 |     #  Checks to see if the users format is "Password:Username" instead of "Username:Password"
103 |     if len(emailPaswd) >= 2 and '@' in emailPaswd[1] and '.' in emailPaswd[1]:
104 |         #  Switches the position of the username and password
105 |         temp = emailPaswd[0]
106 |         emailPaswd[0] = emailPaswd[1].lower()
107 |         emailPaswd[1] = temp
108 |     else:
109 |         #  Change all of the email usernames to be lowercase; to be uniform
110 |         emailPaswd[0] = emailPaswd[0].lower()
111 | 
112 |     try:
113 |         #  check to see if you have a valid email address and the username is >= 4; also checks if there is a '@' in the username
114 |         if '@' in emailPaswd[0].strip() and len(emailPaswd[0].strip().split('@')[0]) >= 4 and len(emailPaswd) >= 2 and emailPaswd[0].strip().count('@') == 1:
115 |             first_letter =  emailPaswd[0][0]
116 |             second_letter = emailPaswd[0][1]
117 |             third_letter =  emailPaswd[0][2]
118 |             fourth_letter = emailPaswd[0][3]
119 | 
120 |             #  Check to see if the username has an invalid character and at what spot
121 |             if str(first_letter).isalnum():
122 |                 folder_depth = 1
123 |                 if str(second_letter).isalnum():
124 |                     folder_depth = 2
125 |                     if str(third_letter).isalnum():
126 |                         folder_depth = 3
127 |                         if str(fourth_letter).isalnum():
128 |                             folder_depth = 4
129 |             else:
130 |                 folder_depth = 0
131 | 
132 |             #  Check to see if the first letter doesn't have a directory
133 |             if not os.path.isdir(path + "/data/" + first_letter):
134 |                 #  Check to see if we start with at least one valid char
135 |                 if folder_depth >= 1:
136 |                     #  Make the directory
137 |                     os.makedirs(path + "/data/" + first_letter)
138 |                 else:
139 |                     #  If the outlier dir doesn't exist; make it and start the file
140 |                     if not os.path.isdir(path + "/data/0UTLIERS"):
141 |                         os.makedirs(path + "/data/0UTLIERS")
142 |                         #  Don't need to check for duplicates because its a new file
143 |                         with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp:
144 |                             length = len(emailPaswd)
145 |                             #  Iterate through each index of the list and write it to the file
146 |                             for index in range(length):
147 |                                 if index != length - 1:
148 |                                     fp.write(emailPaswd[index] + ":")
149 |                                 else:  # Don't add a ':' at the end of the line
150 |                                     fp.write(emailPaswd[index])
151 |                             fp.write("\n")
152 |                         return 1
153 |                     else:  # If the outlier dir already exists append the line to the file
154 |                         #  Get the new line from the emailPasswd list
155 |                         length = len(emailPaswd)
156 |                         new_line = ""
157 |                         #  Iterate through each index and add it to new_line
158 |                         for index in range(length):
159 |                             if index != length - 1:
160 |                                 new_line += emailPaswd[index] + ":"
161 |                             else:
162 |                                 new_line += emailPaswd[index]
163 | 
164 |                         if check_duplicate(path + "/data/0UTLIERS/0utliers.txt", new_line):
165 |                             # Checks to see if there are duplicates already in the file, returns true if there isn't
166 |                             with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp:
167 |                                 fp.write(new_line + "\n")
168 |                             return 1
169 |                     return 0
170 |             else:  # The directory already exists
171 |                 if folder_depth == 0:  # There is NOT at least one consecutive valid char
172 |                     #  If the outlier dir doesn't exist; make it and start the file
173 |                     if not os.path.isdir(path + "/data/0UTLIERS"):
174 |                         os.makedirs("mkdir " + path + "/data/0UTLIERS")
175 |                         with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp:
176 |                             length = len(emailPaswd)
177 |                             #  Iterate through each index of the list and write it to the file
178 |                             for index in range(length):
179 |                                 if index != length - 1:
180 |                                     fp.write(emailPaswd[index] + ":")
181 |                                 else:  # Don't add a ':' at the end of the line
182 |                                     fp.write(emailPaswd[index])
183 |                             fp.write("\n")
184 |                         return 1
185 |                     else:  # If the outlier dir already exists append the line to the file
186 |                         #  Get the new line from the emailPasswd list
187 |                         length = len(emailPaswd)
188 |                         new_line = ""
189 |                         #  Iterate through each index and add it to new_line
190 |                         for index in range(length):
191 |                             if index != length - 1:
192 |                                 new_line += emailPaswd[index] + ":"
193 |                             else:
194 |                                 new_line += emailPaswd[index]
195 | 
196 |                         if check_duplicate(path + "/data/0UTLIERS/0utliers.txt", new_line):
197 |                             with open(path + "/data/0UTLIERS/0utliers.txt", 'a') as fp:
198 |                                 #  Write to the file
199 |                                 fp.write(new_line + "\n")
200 |                             return 1
201 |                     return 0
202 | 
203 |             #  Check to see if the second letter doesn't have a directory
204 |             if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter):
205 |                 #  Check to see if we start with at least two valid char
206 |                 if folder_depth >= 2:
207 |                     #  Make the directory
208 |                     os.makedirs(path + "/data/" + first_letter + "/" + second_letter)
209 |                 else:
210 |                     #  If the outlier dir doesn't exist; make it and start the file
211 |                     if not os.path.isdir(path + "/data/" + first_letter + "/0UTLIERS"):
212 |                         os.makedirs(path + "/data/" + first_letter + "/0UTLIERS")
213 |                         with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
214 |                             length = len(emailPaswd)
215 |                             #  Iterate through each index of the list and write it to the file
216 |                             for index in range(length):
217 |                                 if index != length - 1:
218 |                                     fp.write(emailPaswd[index] + ":")
219 |                                 else:  # Don't add a ':' at the end of the line
220 |                                     fp.write(emailPaswd[index])
221 |                             fp.write("\n")
222 |                         return 1
223 |                     else:
224 |                         #  Get the new line from the emailPasswd list
225 |                         length = len(emailPaswd)
226 |                         new_line = ""
227 |                         #  Iterate through each index and add it to new_line
228 |                         for index in range(length):
229 |                             if index != length - 1:
230 |                                 new_line += emailPaswd[index] + ":"
231 |                             else:
232 |                                 new_line += emailPaswd[index]
233 | 
234 |                         #  Check for duplicates
235 |                         if check_duplicate(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", new_line):
236 |                             with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
237 |                                 fp.write(new_line + "\n")
238 |                             return 1
239 |                     return 0
240 |             else:  # The directory already exists
241 |                 if folder_depth <= 1:  # There is not at least two consecutive valid char
242 |                     #  If the outlier dir doesn't exist; make it and start the file
243 |                     if not os.path.isdir(path + "/data/" + first_letter + "/0UTLIERS"):
244 |                         os.makedirs(path + "/data/" + first_letter + "/0UTLIERS")
245 |                         with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
246 |                             length = len(emailPaswd)
247 |                             #  Iterate through each index of the list and write it to the file
248 |                             for index in range(length):
249 |                                 if index != length - 1:
250 |                                     fp.write(emailPaswd[index] + ":")
251 |                                 else:  # Don't add a ':' at the end of the line
252 |                                     fp.write(emailPaswd[index])
253 |                             fp.write("\n")
254 |                         return 1
255 |                     else:  # If the outlier dir already exists append the line to the file
256 |                         #  Get the new line from the emailPasswd list
257 |                         length = len(emailPaswd)
258 |                         new_line = ""
259 |                         #  Iterate through each index and add it to new_line
260 |                         for index in range(length):
261 |                             if index != length - 1:
262 |                                 new_line += emailPaswd[index] + ":"
263 |                             else:
264 |                                 new_line += emailPaswd[index]
265 | 
266 |                         if check_duplicate(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", new_line):
267 |                             with open(path + "/data/" + first_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
268 |                                 fp.write(new_line + "\n")
269 |                             return 1
270 |                     return 0
271 | 
272 |             #  Check to see if the third letter doesn't have a directory
273 |             if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter):
274 |                 #  Check to see if we start with at least three valid char
275 |                 if folder_depth >= 3:
276 |                     #  Make the directory
277 |                     os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter)
278 |                 else:
279 |                     if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS"):
280 |                         os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS")
281 |                         with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
282 |                             length = len(emailPaswd)
283 |                             #  Iterate through each index of the list and write it to the file
284 |                             for index in range(length):
285 |                                 if index != length - 1:
286 |                                     fp.write(emailPaswd[index] + ":")
287 |                                 else:  # Don't add a ':' at the end of the line
288 |                                     fp.write(emailPaswd[index])
289 |                             fp.write("\n")
290 |                         return 1
291 |                     else:  # If the outlier dir already exists append the line to the file
292 |                         #  Get the new line from the emailPasswd list
293 |                         length = len(emailPaswd)
294 |                         new_line = ""
295 |                         #  Iterate through each index and add it to new_line
296 |                         for index in range(length):
297 |                             if index != length - 1:
298 |                                 new_line += emailPaswd[index] + ":"
299 |                             else:
300 |                                 new_line += emailPaswd[index]
301 | 
302 |                         if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", new_line):
303 |                             with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
304 |                                 fp.write(new_line + "\n")
305 |                             return 1
306 |                     return 0
307 |             else:  # The directory already exists
308 |                 if folder_depth <= 2:  # There is not at least three consecutive valid char
309 |                     #  If the outlier dir doesn't exist; make it and start the file
310 |                     if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS"):
311 |                         os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS")
312 |                         with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
313 |                             length = len(emailPaswd)
314 |                             #  Iterate through each index of the list and write it to the file
315 |                             for index in range(length):
316 |                                 if index != length - 1:
317 |                                     fp.write(emailPaswd[index] + ":")
318 |                                 else:  # Don't add a ':' at the end of the line
319 |                                     fp.write(emailPaswd[index])
320 |                             fp.write("\n")
321 |                         return 1
322 |                     else:  # If the outlier dir already exists append the line to the file
323 |                         #  Get the new line from the emailPasswd list
324 |                         length = len(emailPaswd)
325 |                         new_line = ""
326 |                         #  Iterate through each index and add it to new_line
327 |                         for index in range(length):
328 |                             if index != length - 1:
329 |                                 new_line += emailPaswd[index] + ":"
330 |                             else:
331 |                                 new_line += emailPaswd[index]
332 | 
333 |                         if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", new_line):
334 |                             with open(path + "/data/" + first_letter + "/" + second_letter + "/0UTLIERS/0utliers.txt", 'a') as fp:
335 |                                 fp.write(new_line + "\n")
336 |                             return 1
337 |                     return 0
338 | 
339 |             #  Checks to see if the file in the third directory doesn't exists
340 |             if not os.path.isfile(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt"):
341 |                 if folder_depth == 4:  # The file doesn't exist in the third dir but there is 4 valid chars
342 |                     #  Make the file
343 |                     with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", 'a') as output_file:
344 |                         length = len(emailPaswd)
345 |                         #  Iterate through each index of the list and write it to the file
346 |                         for index in range(length):
347 |                             if index != length-1:
348 |                                 output_file.write(emailPaswd[index] + ":")
349 |                             else:  # Don't add a ':' at the end of the line
350 |                                 output_file.write(emailPaswd[index])
351 |                         output_file.write("\n")
352 |                         return 1
353 |                 elif folder_depth == 3:  # Check to see if the fourth letter is an outlier EX) exa!mple@example.com
354 |                     if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS"):
355 |                         os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS")
356 |                     #  Make the 0UTLIERS file
357 |                     with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", 'a') as output_file:
358 |                         #  Get the new line from the emailPasswd list
359 |                         length = len(emailPaswd)
360 |                         new_line = ""
361 |                         #  Iterate through each index and add it to new_line
362 |                         for index in range(length):
363 |                             if index != length - 1:
364 |                                 new_line += emailPaswd[index] + ":"
365 |                             else:
366 |                                 new_line += emailPaswd[index]
367 | 
368 |                         if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", new_line):
369 |                             output_file.write(new_line + "\n")
370 |                             return 1
371 |                 return 0
372 |             else:  # The file exists
373 |                 if folder_depth == 4:  # The file does exist in the third dir but there is 4 valid chars
374 |                     #  Get the new line from the emailPasswd list
375 |                     length = len(emailPaswd)
376 |                     new_line = ""
377 |                     #  Iterate through each index and add it to new_line
378 |                     for index in range(length):
379 |                         if index != length - 1:
380 |                             new_line += emailPaswd[index] + ":"
381 |                         else:
382 |                             new_line += emailPaswd[index]
383 | 
384 |                     if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", new_line):
385 |                         with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/" + fourth_letter + ".txt", 'a') as output_file:
386 |                             output_file.write(new_line + "\n")
387 |                         return 1
388 |                     return 0
389 |                 elif folder_depth == 3:  # The file does exist in the third dir but there is only 3 valid chars
390 |                     #  Check to see if you need to make the 0UTLIERS dir
391 |                     if not os.path.isdir(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS"):
392 |                         os.makedirs(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS")
393 | 
394 |                     #  Get the new line from the emailPasswd list
395 |                     length = len(emailPaswd)
396 |                     new_line = ""
397 |                     #  Iterate through each index and add it to new_line
398 |                     for index in range(length):
399 |                         if index != length - 1:
400 |                             new_line += emailPaswd[index] + ":"
401 |                         else:
402 |                             new_line += emailPaswd[index]
403 | 
404 |                     #  Check for duplicates and then write to the file
405 |                     if check_duplicate(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", new_line):
406 |                         #  Append the 0UTLIERS file
407 |                         with open(path + "/data/" + first_letter + "/" + second_letter + "/" + third_letter + "/0UTLIERS/0utliers.txt", 'a') as output_file:
408 |                             output_file.write(new_line + "\n")
409 |                         return 1
410 |                     return 0
411 | 
412 |         # NOT a valid email address or the username is NOT >= 4; or there is more than one '@' in the username
413 |         else:
414 |             if not os.path.isdir(path + "/data/NOTVALID"):
415 |                 os.makedirs(path + "/data/NOTVALID/")
416 |                 with open(path + "/data/NOTVALID/FAILED_TEST.txt", 'a') as fp:
417 |                     length = len(emailPaswd)
418 |                     #  Iterate through each index of the list and write it to the file
419 |                     for index in range(length):
420 |                         if index != length - 1:
421 |                             fp.write(emailPaswd[index] + ":")
422 |                         else:  # Don't add a ':' at the end of the line
423 |                             fp.write(emailPaswd[index])
424 |                     fp.write("\n")
425 |                 return 1
426 |             else:  # The directory already exists
427 |                 if line != "":
428 |                     #  Get the new line from the emailPasswd list
429 |                     length = len(emailPaswd)
430 |                     new_line = ""
431 |                     #  Iterate through each index and add it to new_line
432 |                     for index in range(length):
433 |                         if index != length - 1:
434 |                             new_line += emailPaswd[index] + ":"
435 |                         else:
436 |                             new_line += emailPaswd[index]
437 | 
438 |                     if check_duplicate(path + "/data/NOTVALID/FAILED_TEST.txt", new_line):
439 |                         #  Open the file; check if it's a duplicate and write to the file
440 |                         with open(path + "/data/NOTVALID/FAILED_TEST.txt", 'a') as fp:
441 |                             fp.write(new_line + "\n")
442 |                         return 1
443 |             return 0
444 |     except OSError:
445 |         raise
446 |     return 0
447 | 
448 | 
449 | if __name__ == '__main__':
450 | 
451 |     #  There is currently not support for these file extension; This skips them to speed up the import
452 |     if args[1].endswith(".sql") or args[1].endswith(".csv") or args[1].endswith(".json") or args[1].endswith(".sql") or args[1].endswith(".xlsx"):
453 |         exit()
454 | 
455 |     start_time = time.time()
456 |     total_lines = 0  # The amount of lines that are not white-space
457 |     written_lines = 0  # The amount of lines written
458 | 
459 |     RED = '\033[0;31m'
460 |     GREEN = '\033[0;32m'
461 |     YELLOW = '\033[1;33m'
462 |     NC = '\033[0m'  # No Color
463 |     path = os.getcwd()
464 | 
465 |     #  Check to see if the arguments are correct
466 |     if len(args) == 2 and args[1] != "":
467 |         print(GREEN + "[+]" + NC + " Opening file " + GREEN + args[1] + NC)
468 |         #  Directory guaranteed to exist from previous check in Import.sh
469 |         with open(path + "/PutYourDataBasesHere/" + args[1], 'r') as fp:
470 |             try:
471 |                 for line in fp:
472 |                     if total_lines % 10000 == 0 and total_lines != 0:
473 |                         print(GREEN + "[+]" + NC + " Processing line number: " + str(total_lines) + "\nLine: " + line)
474 |                     if line.strip() != "":
475 |                         written_lines += place_data(line.strip(), path)
476 |                         total_lines += 1
477 |             except Exception as e:
478 |                 print(RED + "Exception: " + str(e) + NC)
479 |         stop_time = time.time()
480 |         #  Output to Stdout
481 |         print()
482 |         print(GREEN + "[+]" + NC + " Total time: " + str(("%.2f" % (stop_time - start_time)) + " seconds"))
483 |         print(GREEN + "[+]" + NC + " Total lines: " + str(("%.2f" % total_lines)))
484 |         print(GREEN + "[+]" + NC + " Written lines: " + str(("%.2f" % written_lines)))
485 | 
486 |         # Log times
487 |         with open(path + "/Logs/ActivityLogs.log", 'a') as log:
488 |             log.write("[+] Total time: " + str(("%.2f" % (stop_time - start_time)) + " seconds") + "\n")
489 |             log.write("[+] Total lines: " + str(("%.2f" % total_lines)) + "\n")
490 |             log.write("[+] Written lines: " + str(("%.2f" % written_lines)) + "\n")
491 |     else:
492 |         print(YELLOW + "[!]" + NC + " Invalid arguments provided")
493 | 
494 | 
495 | 
496 | 


--------------------------------------------------------------------------------