├── whoisxmlapi_download_whois_data
    ├── whois_utils
    │   ├── __init__.py
    │   ├── whois_user_interaction.py
    │   └── whois_web_download_utils.py
    ├── requirements.txt
    ├── requirements_legacy.txt
    ├── requirements_windows.txt
    ├── HOWTO_add_a_new_feed.txt
    ├── new_generation_plans.dat
    ├── install_p12.py
    ├── README_python2.txt
    ├── CHANGELOG.txt
    ├── README.SSL.txt
    ├── FAQ.txt
    └── SPECIFICATIONS.txt
├── whoisxmlapi_whoisdownload_bash
    ├── supported_gtlds
    ├── BUGS
    ├── install_p12pack.sh
    ├── README.SSL
    ├── supported_ngtlds
    └── README
├── UsingScriptsOnWindows10.pdf
├── legacy_scripts
    ├── get_whois_info-0.0.4.zip
    ├── get_whois_info-0.0.5.zip
    ├── get_whois_info-0.0.6.zip
    ├── get_whois_info_python-0.0.6.zip
    └── README
├── whoisxmlapi_bash_csv_to_mysqldb
    ├── countries
    ├── CHANGELOG.txt
    ├── load_csv_file_into_db_old.sh
    ├── loader_schema_simple.sql
    ├── loader_schema_regular_daily_only.sql
    ├── loader_schema_regular.sql
    ├── loader_schema_full.sql
    ├── load_csv_file_into_db.sh
    └── README
├── netblocks_csv_to_mysqldb
    ├── NetblocksRDB_Diagram.png
    ├── load_netblocks_data_to_mysql.odt
    ├── load_netblocks_data_to_mysql.pdf
    └── README.md
├── website_contactscats_to_mysqldb
    ├── website_categories_schema.png
    ├── website_contats_categories_schema.png
    ├── load_contactscategories_jsonl_to_mysql.odt
    ├── load_contactscategories_jsonl_to_mysql.pdf
    ├── website_categories.ddl
    ├── README.md
    ├── website_contacts_categories.ddl
    ├── load_contactscategories_jsonl_to_mysql.py
    └── load_contactscategories_jsonl_to_mysql.txt
├── whoisxmlapi_mysqldump_loaders
    ├── legacy
    │   ├── load_mysql_data_all_for_all_tlds.sh
    │   ├── load_mysql_data_per_tables_for_all_tlds.sh
    │   ├── load_mysql_schema.sh
    │   ├── load_mysql_data_all.sh
    │   ├── load_mysql_data_per_tables_for_tld.sh
    │   ├── load_mysql_data_all_for_tld.sh
    │   └── load_mysql_data_per_tables.sh
    ├── load_mysql_utils.sh
    ├── README
    ├── load_mysql_data_all.sh
    └── load_mysql_data_per_tables.sh
├── whoisxmlapi_percona_loader_scripts
    ├── load_mysql_utils.sh
    ├── legacy
    │   └── restore_db.sh
    ├── README.txt
    ├── whoiscrawler_mysql_schema.sql
    └── load_whois_percona.sh
├── .gitignore
├── README.md
├── whoisxmlapi_csv2json
    ├── transform_json.py
    ├── transform_json_verbose.py
    └── README
└── whoisxmlapi_flexible_csv_to_mysqldb
    ├── field_types.csv
    └── README


/whoisxmlapi_download_whois_data/whois_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/supported_gtlds:
--------------------------------------------------------------------------------
1 | com,net,org,info,us,biz,mobi,pro,asia,aero,tel,name
2 | 


--------------------------------------------------------------------------------
/UsingScriptsOnWindows10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/UsingScriptsOnWindows10.pdf


--------------------------------------------------------------------------------
/legacy_scripts/get_whois_info-0.0.4.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/legacy_scripts/get_whois_info-0.0.4.zip


--------------------------------------------------------------------------------
/legacy_scripts/get_whois_info-0.0.5.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/legacy_scripts/get_whois_info-0.0.5.zip


--------------------------------------------------------------------------------
/legacy_scripts/get_whois_info-0.0.6.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/legacy_scripts/get_whois_info-0.0.6.zip


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/countries:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/whoisxmlapi_bash_csv_to_mysqldb/countries


--------------------------------------------------------------------------------
/legacy_scripts/get_whois_info_python-0.0.6.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/legacy_scripts/get_whois_info_python-0.0.6.zip


--------------------------------------------------------------------------------
/netblocks_csv_to_mysqldb/NetblocksRDB_Diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/netblocks_csv_to_mysqldb/NetblocksRDB_Diagram.png


--------------------------------------------------------------------------------
/legacy_scripts/README:
--------------------------------------------------------------------------------
1 | These are previous versions of downloader scripts which are not
2 | developed anymore. They are kept here for clients who still use them
3 | for compatibility purposes.
4 | 


--------------------------------------------------------------------------------
/netblocks_csv_to_mysqldb/load_netblocks_data_to_mysql.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/netblocks_csv_to_mysqldb/load_netblocks_data_to_mysql.odt


--------------------------------------------------------------------------------
/netblocks_csv_to_mysqldb/load_netblocks_data_to_mysql.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/netblocks_csv_to_mysqldb/load_netblocks_data_to_mysql.pdf


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/website_categories_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/website_contactscats_to_mysqldb/website_categories_schema.png


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/website_contats_categories_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/website_contactscats_to_mysqldb/website_contats_categories_schema.png


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.odt


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whois-api-llc/whois_database_download_support/HEAD/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.pdf


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/BUGS:
--------------------------------------------------------------------------------
1 | -When downloading from whois_database 
2 |  there is no way to download sql schema + separate tables.
3 |  This should be a format like sqlschematables, which downloads
4 |  whoiscrawler_$version_$tld_mysql_schema.sql.gz
5 |  and the tables subdirectory.
6 | 
7 | -Sample data cannot be downloaded.
8 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi>=2019.11.28
 2 | cffi>=1.13.2
 3 | chardet>=3.0.4
 4 | configobj>=5.0.6
 5 | configparser>=4.0.2
 6 | cryptography>=3.4
 7 | easygui>=0.98.1
 8 | idna>=2.8
 9 | pycparser>=2.19
10 | pycryptodome>=3.7.0
11 | pyOpenSSL>=19.1.0
12 | recordtype>=1.3
13 | requests>=2.22.0
14 | six>=1.13.0
15 | urllib3>=1.26.7
16 | urlparse2>=1.1.1
17 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/requirements_legacy.txt:
--------------------------------------------------------------------------------
 1 | certifi>=2019.11.28
 2 | cffi>=1.13.2
 3 | chardet>=3.0.4
 4 | configobj>=5.0.6
 5 | configparser>=4.0.2
 6 | cryptography>=2.8
 7 | easygui>=0.98.1
 8 | idna>=2.8
 9 | pycparser>=2.19
10 | pycrypto>=2.6.1
11 | pyOpenSSL>=19.1.0
12 | recordtype>=1.3
13 | requests>=2.22.0
14 | six>=1.13.0
15 | urllib3>=1.25.7
16 | urlparse2>=1.1.1
17 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/requirements_windows.txt:
--------------------------------------------------------------------------------
 1 | certifi>=2019.11.28
 2 | cffi>=1.13.2
 3 | chardet>=3.0.4
 4 | configobj>=5.0.6
 5 | configparser>=4.0.2
 6 | cryptography>=2.8
 7 | easygui>=0.98.1
 8 | idna>=2.8
 9 | pycparser>=2.19
10 | pycryptodome>=2.6.1
11 | pyOpenSSL>=19.1.0
12 | recordtype>=1.3
13 | requests>=2.22.0
14 | six>=1.13.0
15 | urllib3>=1.25.7
16 | urlparse2>=1.1.1
17 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/CHANGELOG.txt:
--------------------------------------------------------------------------------
 1 | Changelog for WhoisXML API
 2 | 
 3 | CSV importing scripts and schema
 4 | 
 5 | -0.0.2: 2018-01-10:
 6 | 	o initial release on GitHub
 7 | 
 8 | -0.0.3: 2018-11-01:
 9 | 	o a critical bug fixed: the script now handles both
10 | 	  UNIX-style (LF) ad Windows-style (CRLF) terminated
11 | 	  input csv files.
12 | 	o changelog added
13 | 	o unnecessary files removed
14 | 	
15 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_all_for_all_tlds.sh:
--------------------------------------------------------------------------------
 1 | src_root_dir="$1"
 2 | version="$2"
 3 | db_username="$3"
 4 | db_password="$4"
 5 | 
 6 | if [ ! -d "$src_root_dir" ]; then
 7 |     echo "src_root_dir $src_root_dir is not valid"
 8 |     exit
 9 | fi
10 | if [ -z "$version" ]; then
11 |     echo "version is missing"
12 |     exit
13 | fi
14 | 
15 | tlds="asia us biz mobi info org net com"
16 | for tld in $tlds; do
17 | ./load_mysql_data_all_for_tld.sh $src_root_dir $tld $version $db_username $db_password
18 | done
19 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_per_tables_for_all_tlds.sh:
--------------------------------------------------------------------------------
 1 | src_root_dir="$1"
 2 | version="$2"
 3 | db_username="$3"
 4 | db_password="$4"
 5 | 
 6 | if [ ! -d "$src_root_dir" ]; then
 7 |     echo "src_root_dir $src_root_dir is not valid"
 8 |     exit
 9 | fi
10 | if [ -z "$version" ]; then
11 |     echo "version is missing"
12 |     exit
13 | fi
14 | 
15 | tlds="pro coop asia us biz mobi info org net com"
16 | for tld in $tlds; do
17 |     ./load_mysql_data_per_tables_for_tld.sh $src_root_dir $tld $version $db_username $db_password
18 | done
19 | 


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/website_categories.ddl:
--------------------------------------------------------------------------------
 1 | /* 
 2 | Sample schema file for Website Contacts and Categories MySQLddatabase 
 3 | Categories-only version
 4 | v 0.0
 5 | (c) WhoisXML API, Inc.
 6 | */
 7 | 
 8 | CREATE TABLE category(
 9 | category VARCHAR(255) PRIMARY KEY
10 | );
11 | 
12 | CREATE TABLE domain(
13 | domainID INTEGER PRIMARY KEY AUTO_INCREMENT,
14 | domainName VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
15 | countryCode VARCHAR(2)
16 | );
17 | 
18 | CREATE TABLE domain_category(
19 | categoryID VARCHAR(255),
20 | domainID INTEGER,
21 | PRIMARY KEY (categoryID, domainID)
22 | );
23 | 
24 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_schema.sh:
--------------------------------------------------------------------------------
 1 | schema_file="$1"
 2 | if [ ! -f "$schema_file" ]; then
 3 |     echo "invalid schema file $schema_file";
 4 |     exit
 5 | fi
 6 | db="$2"
 7 | if [ -z "$db" ]; then
 8 |     echo "db is missing"
 9 |     exit
10 | fi
11 | db_username="$3"
12 | if [ -z "$db_username" ]; then
13 |     echo "db username is missing"
14 |     exit
15 | fi
16 | db_password="$4"
17 | if [ -z "$db_password" ]; then 
18 |     echo "db_password is missing"
19 |     exit
20 | fi
21 | mysql -u$db_username -p$db_password -e "create database $db"
22 | if [ ${schema_file: -3} == ".gz" ]; then
23 |    
24 |     gunzip<$schema_file | mysql -u$db_username -p$db_password $db
25 | else
26 |     mysql -u$db_username -p$db_password $db <$schema_file
27 | fi
28 | 


--------------------------------------------------------------------------------
/netblocks_csv_to_mysqldb/README.md:
--------------------------------------------------------------------------------
 1 | A Python script to create and maintain a MySQL netblocks database
 2 | 
 3 | using csv format netblocks data downloaded
 4 | 
 5 | from a Whois XML netblocks database subscription
 6 | 
 7 | (c) Whois XML API, Inc. 2019.
 8 | 
 9 | ver. 0.0.3
10 | 
11 | Contents
12 | --------
13 | 
14 | load_netblocks_data_to_mysql.py - the script
15 | 
16 | Documentation:
17 | 
18 | README.md - this file
19 | 
20 | load_netblocks_data_to_mysql.txt - Documentation - UTF-8 text format
21 | load_netblocks_data_to_mysql.pdf - Documentation - pdf format
22 | load_netblocks_data_to_mysql.odt - Documentation - OpenDocument text format
23 | load_netblocks_data_to_mysql.org - Documentation - emacs .org format
24 | 
25 | Database diagram (to supplement the text-format Documentation):
26 | 
27 | NetblocksRDB_Diagram.svg - svg format
28 | NetblocksRDB_Diagram.png - png format
29 | 
30 | 
31 | Consult the Documentation for further details
32 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/load_mysql_utils.sh:
--------------------------------------------------------------------------------
 1 | #Utilities to be included into mysql loader scripts
 2 | 
 3 | #
 4 | # Prints the version number and exits.
 5 | #
 6 | function printVersionAndExit()
 7 | {
 8 |     echo "$MYNAME Version $VERSION"
 9 |     echo ""
10 |     exit 0
11 | }
12 | #
13 | #
14 | # Prints all the arguments but only if the program is in the verbose mode.
15 | #
16 | function printVerbose()
17 | {
18 |     if [ "$VERBOSE" == "true" ]; then
19 |         echo $* >&2
20 |     fi
21 | }
22 | 
23 | #
24 | # Prints an error message to the standard error. The text will not mixed up with
25 | # the data that is printed to the standard output.
26 | #
27 | function printError()
28 | {
29 |     echo "$*" >&2
30 | }
31 | 
32 | function printMessage()
33 | {
34 |     echo -n "$*" >&2
35 | }
36 | 
37 | function printMessageNl()
38 | {
39 |     echo "$*" >&2
40 | }
41 | 
42 | function printDebug()
43 | {
44 |     if [ "$DEBUG" == "yes" ]; then
45 |         echo "$*" >&2
46 |     fi
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/whoisxmlapi_percona_loader_scripts/load_mysql_utils.sh:
--------------------------------------------------------------------------------
 1 | #Utilities to be included into mysql loader scripts
 2 | 
 3 | #
 4 | # Prints the version number and exits.
 5 | #
 6 | function printVersionAndExit()
 7 | {
 8 |     echo "$MYNAME Version $VERSION"
 9 |     echo ""
10 |     exit 0
11 | }
12 | #
13 | #
14 | # Prints all the arguments but only if the program is in the verbose mode.
15 | #
16 | function printVerbose()
17 | {
18 |     if [ "$VERBOSE" == "true" ]; then
19 |         echo $* >&2
20 |     fi
21 | }
22 | 
23 | #
24 | # Prints an error message to the standard error. The text will not mixed up with
25 | # the data that is printed to the standard output.
26 | #
27 | function printError()
28 | {
29 |     echo "$*" >&2
30 | }
31 | 
32 | function printMessage()
33 | {
34 |     echo -n "$*" >&2
35 | }
36 | 
37 | function printMessageNl()
38 | {
39 |     echo "$*" >&2
40 | }
41 | 
42 | function printDebug()
43 | {
44 |     if [ "$DEBUG" == "yes" ]; then
45 |         echo "$*" >&2
46 |     fi
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_all.sh:
--------------------------------------------------------------------------------
 1 | schema_file="$1"
 2 | if [ ! -f "$schema_file" ]; then
 3 |     echo "invalid schema file $schema_file";
 4 |     exit
 5 | fi
 6 | dump_file="$2"
 7 | if [ ! -f "$dump_file" ]; then
 8 |     echo "please specify a valid mysqldump file"
 9 |     exit
10 | fi
11 | db="$3"
12 | if [ -z "$db" ]; then
13 |     echo "db is missing"
14 |     exit
15 | fi
16 | db_username="$4"
17 | if [ -z "$db_username" ]; then
18 |     echo "db username is missing"
19 |     exit
20 | fi
21 | db_password="$5"
22 | if [ -z "$db_password" ]; then 
23 |     echo "db_password is missing"
24 |     exit
25 | fi
26 | ./load_mysql_schema.sh $schema_file $db $db_username $db_password
27 | time=`date +%s`
28 | 	echo "loading data from file $dump_file"
29 | 	if [ ${dump_file: -3} == ".gz" ]; then
30 | 
31 | 	    zcat "$dump_file" |mysql -u$db_username -p$db_password $db
32 | 	else
33 | 	
34 | 	    mysql -u$db_username -p$db_password $db <$dump_file
35 | 	fi
36 | 
37 | time2=`date +%s`
38 | dur=`expr $time2 - $time`
39 | echo "took $dur seconds"
40 | 
41 | 


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/README.md:
--------------------------------------------------------------------------------
 1 | A script to load Website Contacts & Categories data
 2 | 
 3 | in jsonl format, downloaded from the
 4 | 
 5 | Website Contacts & Categorization database by WhoisXML API, INC
 6 | 
 7 | (https://website-contacts-database.whoisxmlapi.com)
 8 | 
 9 | into a MySQL database.
10 | 
11 | (c) Whois XML API, Inc. 2019.
12 | 
13 | ver. 0.0.1
14 | 
15 | (first beta release)
16 | 
17 | CONTENTS:
18 | 
19 | load_contactscategories_jsonl_to_mysql.py - the script
20 | 
21 | recommended DB schema SQL-s:
22 | 
23 | website_categories.ddl          - categories only
24 | website_contacts_categories.ddl - categories and contacts
25 | 
26 | Documentation:
27 | 
28 | README.md - this file
29 | 
30 | load_contactscategories_jsonl_to_mysql.txt  - Documentation - UTF-8 text format
31 | load_contactscategories_jsonl_to_mysql.pdf  - Documentation - pdf format
32 | load_contactscategories_jsonl_to_mysql.odt  - Documentation - OpenDocument text format
33 | 
34 | 
35 | Database diagrams (to supplement the text-format Documentation):
36 | 
37 | website_categories_schema.png
38 | webiste_contats_categories_schema.png  


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_per_tables_for_tld.sh:
--------------------------------------------------------------------------------
 1 | src_root_dir="$1"
 2 | tld="$2"
 3 | version="$3"
 4 | db_username="$4"
 5 | db_password="$5"
 6 | 
 7 | if [ ! -d "$src_root_dir" ]; then
 8 |     echo "src_root_dir $src_root_dir is not valid"
 9 |     exit
10 | fi
11 | if [ -z "$tld" ]; then
12 |     echo "tld is missing"
13 |     exit
14 | fi
15 | if [ -z "$version" ]; then
16 |     echo "version is missing"
17 |     exit
18 | fi
19 | 
20 | schema_file="$src_root_dir/$tld/whoiscrawler_"$version"_$tld"_mysql_schema.sql.gz
21 | schema_file2="$src_root_dir/$tld/whoiscrawler_$tld"_mysql_schema.sql.gz
22 | 
23 | if [ ! -f "$schema_file" ] && [ ! -f "$schema_file2" ]; then
24 |     echo "invalid schema file $schema_file";
25 |     exit
26 | fi
27 | tables_dir=$src_root_dir/$tld/tables
28 | 
29 | if [ ! -d "$tables_dir" ]; then
30 |     echo "no valid tables dir $tables_dir"
31 |     exit
32 | fi
33 | db=whoiscrawler_$version"_$tld"
34 | if [ ! -f "$schema_file" ]; then
35 |     schema_file=$schema_file2
36 | fi
37 | if [ ! -f "$dump_file" ]; then
38 |     dump_file="$dump_file2"
39 | fi
40 | ./load_mysql_data_per_tables.sh $schema_file $tables_dir $db $db_username $db_password


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/HOWTO_add_a_new_feed.txt:
--------------------------------------------------------------------------------
 1 | For developers/testers:
 2 | 
 3 | how to add a new feed
 4 | 
 5 | 1. Open feeds.ini with your favourite programming text editor
 6 | 
 7 | 2. Add a new section (to a logical place) with the section name
 8 | 
 9 | feed__dataformat
10 | 
11 | Note: as ini files do not support hierarchy, you will need a record
12 | for each dataformat. Take care that the common data should be similar.
13 | 
14 | See the examples already there.
15 | 
16 | 3. Fill the required feeds according to the already added feeds. In
17 | file names you can use the follwing strings which will be substituted:
18 |      $dbversion: quarterly database version, e.g. 'v6' or 'v20'
19 |      $date: date string for daily feeds, e.g. '2017_08_20'
20 |      $minusdate: date string for daily feeds in the format e.g. '2017-08-20'
21 |      $_date: date string preceded by an underscore, e.g. '_2017_08_20'
22 |      $tld: tld, e.g. 'com' or 'ac.at'
23 |      $tldunderscore: tld string, dots replaced by underscores, e.g. 'ac_at' for
24 |      		     'ac.at'
25 |      $filename: a file name. Used in file masks which are derived from files such as md5mask
26 |      $ALLFILES: all files of the directory. Typically for subdirectories with csvs.	 
27 | If you need more, let us know.
28 | 
29 | 4. Give it a try.
30 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_all_for_tld.sh:
--------------------------------------------------------------------------------
 1 | src_root_dir="$1"
 2 | tld="$2"
 3 | version="$3"
 4 | db_username="$4"
 5 | db_password="$5"
 6 | 
 7 | if [ ! -d "$src_root_dir" ]; then
 8 |     echo "src_root_dir $src_root_dir is not valid"
 9 |     exit
10 | fi
11 | if [ -z "$tld" ]; then
12 |     echo "tld is missing"
13 |     exit
14 | fi
15 | if [ -z "$version" ]; then
16 |     echo "version is missing"
17 |     exit
18 | fi
19 | 
20 | schema_file="$src_root_dir/$tld/whoiscrawler_"$version"_$tld"_mysql_schema.sql.gz
21 | schema_file2="$src_root_dir/$tld/whoiscrawler_$tld"_mysql_schema.sql.gz
22 | 
23 | if [ ! -f "$schema_file" ] && [ ! -f "$schema_file2" ]; then
24 |     echo "invalid schema file $schema_file";
25 |     exit
26 | fi
27 | dump_file=$src_root_dir/$tld/whoiscrawler_"$tld"_mysql.sql.gz
28 | dump_file2=$src_root_dir/$tld/whoiscrawler_$version"_$tld"_mysql.sql.gz
29 | 
30 | if [ ! -f "$dump_file" ] && [ ! -f "$dump_file2" ]; then
31 |     echo "no valid mysqldump file $dump_file or $dump_file2 found"
32 |     exit
33 | fi
34 | db=whoiscrawler_$version"_$tld"
35 | if [ ! -f "$schema_file" ]; then
36 |     schema_file=$schema_file2
37 | fi
38 | if [ ! -f "$dump_file" ]; then
39 |     dump_file="$dump_file2"
40 | fi
41 | ./load_mysql_data_all.sh $schema_file $dump_file $db $db_username $db_password


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/new_generation_plans.dat:
--------------------------------------------------------------------------------
1 | pro:domain_names_dropped,domain_names_new,ngtlds_domain_names_dropped,ngtlds_domain_names_new
2 | enterprise:domain_names_dropped,domain_names_dropped_whois,domain_names_new,domain_names_whois,ngtlds_domain_names_dropped,ngtlds_domain_names_dropped_whois,ngtlds_domain_names_new,ngtlds_domain_names_whois
3 | custom1:domain_names_dropped,domain_names_dropped_whois,domain_names_new,domain_names_whois,domain_names_whois_archive,domain_names_whois_filtered_reg_country,ngtlds_domain_names_dropped,ngtlds_domain_names_dropped_whois,ngtlds_domain_names_new,ngtlds_domain_names_whois,ngtlds_domain_names_whois_archive,ngtlds_domain_names_whois_filtered_reg_country,ngtlds_domain_names_whois_filtered_reg_country_archive
4 | custom2:domain_names_dropped,domain_names_dropped_whois,domain_names_new,domain_names_whois,domain_names_whois_archive,domain_names_whois_filtered_reg_country,domain_names_whois_filtered_reg_country_noproxy,domain_names_whois_filtered_reg_country_noproxy_archive,ngtlds_domain_names_dropped,ngtlds_domain_names_dropped_whois,ngtlds_domain_names_new,ngtlds_domain_names_whois,ngtlds_domain_names_whois_archive,ngtlds_domain_names_whois_filtered_reg_country,ngtlds_domain_names_whois_filtered_reg_country_archive,ngtlds_domain_names_whois_filtered_reg_country_noproxy,ngtlds_domain_names_whois_filtered_reg_country_noproxy_archive
5 | lite:domain_names_new,ngtlds_domain_names_new
6 | 


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/install_p12pack.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #A simple utility to convert pkcs12 files to certificates to be used
 3 | #with the ssl-auth version of whoisdownload.bash
 4 | 
 5 | 
 6 | if [[ $1 == "" || $1 == "--help" ]];then
 7 |     cat >&2 <<EOF
 8 | A script to convert ssl credentials obtained from WhoisXML API Inc.
 9 | to use them with whoisdownload.sh.
10 | 
11 | You have to run it only once. 
12 | See also the attached README.
13 | Usage:
14 | 
15 |    ./convertkey.sh pack.p12 YourPassword
16 | 
17 | where 
18 | pack.p12 is your pk12 pack,
19 | YourPassword     is your password. 
20 | 
21 | Note: treat the generated "client.key" file confidentially.
22 | EOF
23 |     exit 6
24 | fi
25 | 
26 | if [[ ! -f $1 ]];then
27 |     echo "ERROR: the pack file $1 does not exist" >&2
28 |     exit 1
29 | fi
30 | if [ -z $2 ];then
31 |     echo "ERROR: Your password is needed" >&2
32 |     exit 1
33 | fi
34 | 
35 | 
36 | IN_PKCS="$1"
37 | IN_PW="$2"
38 | 
39 | openssl pkcs12 -clcerts -nokeys -in "$IN_PKCS" -out client.crt -password pass:"$IN_PW" -passin pass:"$IN_PW"
40 | openssl pkcs12 -cacerts -nokeys -in "$IN_PKCS" -out whoisxmlapi.ca -password pass:"$IN_PW" -passin pass:"$IN_PW"
41 | openssl pkcs12 -nocerts -in "$IN_PKCS" -out private.key -password pass:"$IN_PW" -passin pass:"$IN_PW" -passout pass:"$IN_PW"
42 | openssl rsa -in private.key -out "client.key" -passin pass:"$IN_PW"
43 | rm private.key
44 | chmod 400 client.* whoisxmlapi.ca
45 | 
46 | echo "All done. Now you can use the downloader script in this directory."
47 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/load_csv_file_into_db_old.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | USAGE="USAGE:\n\t$0 db csv_file schema mode"
 4 | 
 5 | db="$1"
 6 | csv_file="$2"
 7 | schema="$3"
 8 | mode="$4"
 9 | 
10 | if [[ $# -ne 4 ]]
11 | then
12 | 	echo -e "${USAGE}"
13 | 	exit 1
14 | fi
15 | 
16 | if [ -z "$db" ]; then
17 | 	echo -e "db is missing\n${USAGE}"
18 | 	exit 1
19 | fi
20 | if [ ! -f "$csv_file" ]; then
21 | 	echo -e "csv_file $csv_file doesn't exist\n${USAGE}"
22 | 	exit 1
23 | fi
24 | if [ ! -f "$schema" ]; then
25 | 	echo -e "schema file $schema doesn't exist\n${USAGE}"
26 | 	exit 1
27 | fi
28 | csv_file=`readlink -e $csv_file`
29 | schema=`readlink -e $schema`
30 | case ${mode} in
31 | 	simple|regular )
32 | 		table="whois_record_flat_${mode}"
33 | 	;;
34 | 	full )
35 | 		table="whois_record_flat"
36 | 	;;
37 | 	* )
38 | 		echo "mode must be specified(simple, regular, or full)"
39 | 		exit 1
40 | 	;;
41 | esac
42 | 
43 | if [[ -z $(mysql -A --skip-column-names ${db} <<< "SHOW TABLES LIKE \"${table}\";") ]]
44 | then
45 | 	mysql ${db} --verbose <${schema}
46 | fi
47 | 
48 | fields=$(head -n 1 ${csv_file}|sed 's/"//g')
49 | #nfields=$(echo ${fields}|awk -F\, '{print NF}')
50 | #ncolumns=$(mysql -A --skip-column-names ${db} <<< "SHOW COLUMNS FROM ${table};"|wc -l)
51 | #if [[ ${nfields} -ne ${ncolumns} ]]
52 | #then
53 | #	echo "Fatal: number of fileds ${nfields} not equal to nomber of columns ${ncolumns} in table ${table}"
54 | #	exit 1
55 | #fi
56 | 
57 | mysql ${db} --verbose -e "load data infile \"${csv_file}\" IGNORE into table $table
58 | 	fields terminated by ',' enclosed by '\"' LINES TERMINATED BY '\n' IGNORE 1 LINES
59 | 	(${fields})"
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/whois_utils/whois_user_interaction.py:
--------------------------------------------------------------------------------
 1 | # User interaction module of Whois API LLC end user scripts
 2 | #
 3 | #Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
 4 | #
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | import argparse
10 | from argparse import RawTextHelpFormatter
11 | 
12 | import easygui as g
13 | 
14 | import re
15 | 
16 | #Logging functions
17 | def print_error_and_exit(message):
18 |     global DIALOG_COMMUNICATION
19 |     if DIALOG_COMMUNICATION:
20 |         _ = g.msgbox('Error. \n ' + message +'\nExiting.','WhoisXML API MySQL loader script')
21 |         exit(1)
22 |     else:
23 |         sys.stderr.write('\nError: ' + message+'\n')
24 |         sys.stderr.flush()
25 |         exit(1)
26 | def print_verbose(message):
27 |     global VERBOSE
28 |     global DEBUG
29 |     if VERBOSE or DEBUG:
30 |         sys.stderr.write(message + '\n')
31 |         sys.stderr.flush()
32 | def print_debug(message):
33 |     global DEBUG
34 |     if DEBUG:
35 |         sys.stderr.write(message + '\n')
36 |         sys.stderr.flush()
37 | #File and directory utilites
38 | def get_file(path, message):
39 |     """Given a whatever path, verifies if it points to a file.
40 |     If not, gives the error message and the path. 
41 |     If yes,returns the file path"""
42 |     thefile = os.path.normpath(path)
43 |     if not os.path.isfile(thefile):
44 |         print_error_and_exit(message +'\n (File specified: %s)' %(path))
45 |     else:
46 |         return(thefile)
47 | 
48 | def get_directory(path, message):
49 |     """Given a whatever path, verifies if it points to a directory.
50 |     If not, gives the error message and the path. 
51 |     If yes,returns the file path as a pathlib object"""
52 |     thefile = os.path.normpath(path)
53 |     if not os.path.isdir(thefile):
54 |         print_error_and_exit(message +'\n (Directory specified: %s)' %(path))
55 |     else:
56 |         return(thefile)
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | These are scripts provided by WhoisXML API, Inc.,
 2 | for clients using WHOIS data feeds to obtain bulk WHOIS data
 3 | or to set up a WHOIS database.
 4 | 
 5 | The contents of the subdirectories and files are:
 6 | 
 7 | UsingScriptsOnWindows10.pdf    : a brief blog describing how to use
 8 | 			       	 all features of all our tools on
 9 | 				 Windows 10 systems.
10 | 
11 | whoisxmlapi_download_whois_data: a Python2 script for downloading
12 | 				 bulk data from daily and quarterly WHOIS
13 | 				 data feeds in various formats.
14 | 				 It can be used from command line,
15 | 				 but also supports a simple GUI.
16 | 				 For all platforms.
17 | 
18 | whoisxmlapi_whoisdownload_bash:  a bash script for downloading bulk
19 | 				 data from daily and quarterly WHOIS
20 | 				 data feeds.
21 | 
22 | whoisxmlapi_csv2json:            a Python3 script which converts WhoisXML API
23 | 				 csv files to json files.
24 | 
25 | whoisxmlapi_bash_csv_to_mysqldb: bash scripts to create and maintain
26 | 				 WHOIS databases in MySQL
27 | 				 based on csv files downloaded from
28 | 				 WhoisXML API.
29 | 				 If you do not insist on bash,
30 | 				 check also
31 | 				 whoisxmlapi_flexible_csv_to_mysqldb
32 | 				 which is in Python 3 
33 | 				 and provides extended functionality.
34 | 
35 | whoisxmlapi_flexible_csv_to_mysqldb:
36 | 				a flexible and portable script in Python
37 | 				to create and maintain
38 | 				WHOIS databases in MySQL
39 | 				based on csv files downloaded from
40 | 				WhoisXML API.	
41 | 
42 | whoisxmlapi_mysqldump_loaders:   Python2 and bash scripts to set up a
43 | 				 WHOIS database in MySQL,
44 | 				 using the data obtained from
45 | 				 WhoisXML API quarterly data feeds.
46 | 
47 | whoismxlapi_percona_loaders:    bash scripts for loading binary MySQL
48 | 				dumps of quarterly releases where available
49 | 
50 | legacy_scripts:                 miscellaneous legacy scripts not developed
51 | 				anymore, published for compatibility reasons.
52 | 				
53 | 
54 | 


--------------------------------------------------------------------------------
/whoisxmlapi_percona_loader_scripts/legacy/restore_db.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 3 | cd "$DIR" || exit 1
 4 | 
 5 | DB=$1
 6 | RESTORE_DIR=$2
 7 | DB_DATA_DIR=${3:-/var/lib/mysql}
 8 | SCHEMA_FILE=${4:-$DIR/whoiscrawler_mysql_schema.sql}
 9 | 
10 | TABLES="contact domain_names_whoisdatacollector registry_data whois_record  whois_record_ids_whoisdatacollector"
11 | 
12 | if [ -z "$DB" ]; then
13 |     echo "db is missing"
14 |     exit
15 | fi
16 | 
17 | if [ ! -d "$RESTORE_DIR" ]; then
18 |     echo "restore_dir $RESTORE_DIR must be valid, we will copy data from this directory to your database data directory"
19 |     exit
20 | fi
21 | 
22 | if [ ! -d "$DB_DATA_DIR" ]; then
23 |     echo "db_data_dir $DB_DATA_DIR must be valid, this should probably be /var/lib/mysql/ we will copy data from restore_dir to this directory/db_name"
24 |     exit
25 | fi
26 | 
27 | if [ ! -f "$SCHEMA_FILE" ]; then
28 |     echo "schema_file $SCHEMA_FILE is missing"
29 |     exit
30 | fi
31 | 
32 | 
33 | echo "creating database $DB"
34 | mysql  -e "create database $DB"
35 | mysql "$DB" < "$SCHEMA_FILE"
36 | 
37 | 
38 | if [ ! -d "$DB_DATA_DIR/$DB" ]; then
39 |     echo "$DB_DATA_DIR/$DB doesn't exist!"
40 |     exit
41 | fi
42 | 
43 | 
44 | echo "importing tablespaces"
45 | G_START_TIME=$(date +%s)
46 | 
47 | for table in $TABLES; do
48 |     START_TIME=$(date +%s)
49 |     q="set FOREIGN_KEY_CHECKS=0;ALTER TABLE $DB.$table DISCARD TABLESPACE;"
50 |     echo "$q"
51 |     mysql  -e "$q"
52 | 
53 |     file="$table.ibd"
54 |     echo "copy table file $file from $RESTORE_DIR/$DB to $DB_DATA_DIR/$DB"
55 |     cp  "$RESTORE_DIR/$DB/$file" "$DB_DATA_DIR/$DB/"
56 | 
57 |     chown -R mysql:mysql "$DB_DATA_DIR/$DB"
58 | 
59 |     q="ALTER TABLE $DB.$table IMPORT TABLESPACE"
60 |     echo "$q"
61 |     mysql -e "$q"
62 | 
63 |     END_TIME=$(date +%s)
64 |     DUR=$((END_TIME-START_TIME))
65 |     echo "import table $table took $DUR seconds"
66 | done
67 | 
68 | G_END_TIME=$(date +%s)
69 | GDUR=$((G_END_TIME-G_START_TIME))
70 | echo "import tables took $GDUR seconds"
71 | 
72 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/loader_schema_simple.sql:
--------------------------------------------------------------------------------
 1 | create table whois_record_flat_simple (
 2 | `whois_record_flat_id` bigint(20) NOT NULL AUTO_INCREMENT,
 3 | `domainName` varchar(256),
 4 | `registrarName` varchar(512),
 5 | `contactEmail` varchar(256),
 6 | `whoisServer` varchar(512),
 7 | `nameServers` varchar(256),
 8 | `createdDate` varchar(256),
 9 | `updatedDate` varchar(256),
10 | `expiresDate` varchar(256),
11 | `standardRegCreatedDate` varchar(256),
12 | `standardRegUpdatedDate` varchar(256),
13 | `standardRegExpiresDate` varchar(256),
14 | `status` text,
15 | `Audit_auditUpdatedDate` varchar(256),
16 | `registrant_email` varchar(256),
17 | `registrant_name` varchar(256),
18 | `registrant_organization` varchar(256),
19 | `registrant_street1` varchar(256),
20 | `registrant_street2` varchar(256),
21 | `registrant_street3` varchar(256),
22 | `registrant_street4` varchar(256),
23 | `registrant_city` varchar(64),
24 | `registrant_state` varchar(256),
25 | `registrant_postalCode` varchar(45),
26 | `registrant_country` varchar(45),
27 | `registrant_fax` varchar(45),
28 | `registrant_faxExt` varchar(45),
29 | `registrant_telephone` varchar(45),
30 | `registrant_telephoneExt` varchar(45),
31 | `administrativeContact_email` varchar(256),
32 | `administrativeContact_name` varchar(256),
33 | `administrativeContact_organization` varchar(256),
34 | `administrativeContact_street1` varchar(256),
35 | `administrativeContact_street2` varchar(256),
36 | `administrativeContact_street3` varchar(256),
37 | `administrativeContact_street4` varchar(256),
38 | `administrativeContact_city` varchar(64),
39 | `administrativeContact_state` varchar(256),
40 | `administrativeContact_postalCode` varchar(45),
41 | `administrativeContact_country` varchar(45),
42 | `administrativeContact_fax` varchar(45),
43 | `administrativeContact_faxExt` varchar(45),
44 | `administrativeContact_telephone` varchar(45),
45 | `administrativeContact_telephoneExt` varchar(45),
46 | `registrarIANAID` varchar(45),
47 |  primary key (`whois_record_flat_id`)
48 | )ENGINE=InnoDB ROW_FORMAT=COMPRESSED AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
49 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/install_p12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #This utility extracts a p12 pack obtained from WhoisXML API Inc.
 3 | #Into files that can be used with downloader scripts.
 4 | 
 5 | import sys
 6 | import os
 7 | from OpenSSL import crypto as c
 8 | try:
 9 |     from Crypto.PublicKey import RSA
10 |     newcryptolib = False
11 | except ModuleNotFoundError:
12 |     newcryptolib = True
13 |     from Cryptodome.PublicKey import RSA
14 | import easygui as g
15 | 
16 | windowtitle = 'WhoisXML API SSL pack converter'
17 | infile = g.fileopenbox('Choose the pack.p12 file obtained from WhoisXML API Inc.',
18 |                      windowtitle)
19 | password = g.passwordbox('Enter the password supplied with your pack',
20 |                        windowtitle)
21 | 
22 | if newcryptolib:
23 |     password = bytes(password, encoding='utf-8')
24 | try:
25 |     p12 = c.load_pkcs12(open(infile, 'rb').read(), password) 
26 | except:
27 |     g.msgbox('Error: invalid pack or password. Exiting.')
28 |     exit(6)
29 |     
30 | try:
31 |     cert = c.dump_certificate(c.FILETYPE_PEM, p12.get_certificate())
32 |     certfile = open('client.crt','wb')
33 |     certfile.write(cert)
34 |     certfile.close()
35 | 
36 |     key = c.dump_privatekey(c.FILETYPE_PEM, p12.get_privatekey())
37 |     rsakey = RSA.importKey(key)
38 |     keyfile = open('client.key','wb')
39 |     keyfile.write(rsakey.exportKey())
40 |     keyfile.close()
41 |     os.chmod('client.key', 400)
42 | 
43 |     cacert = c.dump_certificate(c.FILETYPE_PEM, p12.get_ca_certificates()[0])
44 |     cacertfile = open('whoisxmlapi.ca','wb')
45 |     cacertfile.write(cacert)
46 |     cacertfile.close()
47 | except:
48 |     g.msgbox('Error: could not overwrite one of the files.\nEnsure that the following files do not exist or can be overwritten:\n   whoisxmlapi.ca\n   client.crt\n   client.key\n')
49 |     exit(1)
50 |     
51 | g.msgbox('The files needed for authentication:\n   whoisxmlapi.ca\n   client.crt\n   client.key\n have been created.\nNow you can use ssl authentication.\n\nIMPORTANT: keep client.key secret!', windowtitle)
52 | 


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/website_contacts_categories.ddl:
--------------------------------------------------------------------------------
 1 | /* 
 2 | Sample schema file for Website Contacts and Categories MySQLddatabase 
 3 | v 0.0
 4 | (c) WhoisXML API, Inc.
 5 | */
 6 | 
 7 | CREATE TABLE category(
 8 | category VARCHAR(255) PRIMARY KEY
 9 | );
10 | 
11 | CREATE TABLE domain(
12 | domainID INTEGER PRIMARY KEY AUTO_INCREMENT,
13 | domainName VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
14 | countryCode VARCHAR(2),
15 | meta_title LONGBLOB,
16 | meta_description LONGBLOB,
17 | socialLinks_facebook TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
18 | socialLinks_googlePlus TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
19 | socialLinks_instagram TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
20 | socialLinks_twitter TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
21 | socialLinks_linkedIn TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
22 | );
23 | 
24 | CREATE TABLE domain_category(
25 | categoryID VARCHAR(255),
26 | domainID INTEGER,
27 | PRIMARY KEY (categoryID, domainID)
28 | );
29 | 
30 | CREATE TABLE email(
31 | emailID INTEGER PRIMARY KEY AUTO_INCREMENT,
32 | domainID INTEGER,
33 | description LONGBLOB,
34 | email TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
35 | CONSTRAINT FK_email_domain FOREIGN KEY(domainID) REFERENCES domain(domainID)
36 | );
37 | 
38 | CREATE TABLE phone(
39 | phoneID INTEGER PRIMARY KEY AUTO_INCREMENT, 
40 | domainID INTEGER, 
41 | description LONGBLOB,
42 | phoneNumber TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
43 | callHours LONGBLOB,
44 | CONSTRAINT FK_phone_domain FOREIGN KEY(domainID) REFERENCES domain(domainID)
45 | );
46 | 
47 | CREATE TABLE postalAddress(
48 | postalAddressID INTEGER PRIMARY KEY AUTO_INCREMENT,
49 | domainID INTEGER, 
50 | postalAddress LONGBLOB,
51 | CONSTRAINT FK_postalAddress_domain FOREIGN KEY(domainID) REFERENCES domain(domainID)
52 | );
53 | 
54 | CREATE TABLE companyName(
55 | companyNameID INTEGER PRIMARY KEY AUTO_INCREMENT,
56 | domainID INTEGER, 
57 | companyName LONGBLOB,
58 | CONSTRAINT FK_company_domain FOREIGN KEY(domainID) REFERENCES domain(domainID)
59 | );
60 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/legacy/load_mysql_data_per_tables.sh:
--------------------------------------------------------------------------------
 1 | schema_file="$1"
 2 | if [ ! -f "$schema_file" ]; then
 3 |     echo "invalid schema file $schema_file";
 4 |     exit
 5 | fi
 6 | table_files_dir="$2"
 7 | if [ ! -d "$table_files_dir" ]; then
 8 |     echo "please specify a valid directory where all table files reside in"
 9 |     exit
10 | fi
11 | db="$3"
12 | if [ -z "$db" ]; then
13 |     echo "db is missing"
14 |     exit
15 | fi
16 | db_username="$4"
17 | if [ -z "$db_username" ]; then
18 |     echo "db username is missing"
19 |     exit
20 | fi
21 | db_password="$5"
22 | if [ -z "$db_password" ]; then 
23 |     echo "db_password is missing"
24 |     exit
25 | fi
26 | ./load_mysql_schema.sh $schema_file $db $db_username $db_password
27 | 
28 | tables="whois_record registry_data contact domain_names_whoisdatacollector"
29 | 
30 | mysql -u$db_username -p$db_password $db -e "alter table whois_record drop index domain_name_index;alter table whois_record drop index domain_name;"
31 | mysql -u$db_username -p$db_password $db -e "alter table registry_data drop index domain_name_index;alter table registry_data drop index domain_name;"
32 | 
33 | table_files_dir=$table_files_dir/*
34 | 
35 | for file in $table_files_dir; do
36 |  
37 |     time=`date +%s`
38 |     if [ -f "$file" ]; then
39 |         time=`date +%s`
40 | 
41 |         echo "loading data from file $file"
42 |         if [ ${file: -3} == ".gz" ]; then
43 | 	    { echo "SET autocommit = 0;" 
44 | 	 	zcat "$file"
45 | 	 	echo "commit;" ; } | mysql -u$db_username -p$db_password --force $db
46 | 	elif [ ${file: -4} == ".sql" ]; then
47 |             { echo "SET autocommit = 0;"
48 |                 cat "$file"
49 |                 echo "commit;" ; } | mysql -u$db_username -p$db_password --force $db    
50 | 	fi
51 | 
52 |     fi    
53 |     
54 |     time2=`date +%s`
55 |     dur=`expr $time2 - $time`
56 |     echo " loading $table from file $file took $dur seconds"
57 | 
58 | done
59 | time=`date +%s`
60 | mysql --force -u$db_username -p$db_password $db -e "alter table whois_record add index domain_name_index(domain_name)"
61 | mysql --force -u$db_username -p$db_password $db -e "alter table registry_data add index domain_name_index(domain_name)"
62 |  time2=`date +%s`
63 |     dur=`expr $time2 - $time`
64 |     echo " add indices took $dur seconds"
65 | 
66 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/README_python2.txt:
--------------------------------------------------------------------------------
 1 | Supplement for the documentation of the WhoisXML API
 2 | 
 3 | Whois data download utility
 4 | 
 5 | download_whois_data.py
 6 | 
 7 | Release version 1.0.0 dated 2019-12-10.
 8 | 
 9 | Copyright (c) 2010-2021 Whois API, Inc. http://www.whoisxmlapi.com
10 | -------------------------------------------------------------------
11 | 
12 | The present file supplements README.txt's as a replacement for steps 1
13 | and 2 in  Section 2, Installation, intended for legacy  Python 2 users
14 | as opposed to the Python 3-based description of the main README. It is
15 | recommended to switch to  Python 3 as the support of  Python 2 ends on
16 | 2020-01-01.
17 | 
18 | Step 1: Install Python
19 | 
20 | The script has been tested with Python 2.7.15. If for some reason you
21 | have an earlier main version of Python 2, such as Python 2.6, you
22 | shall have compatibility issues.  (This is the case when you use the
23 | default Python on certain releases of CentOS or RHEL ver. 6.) It is
24 | always possible on those systems to set up Python 2.7.x in parallel,
25 | consult the documentation of your system.
26 | 
27 | -On Linux systems, use your package manager, e.g. "apt-get install python".
28 | 
29 | -On Windows systems, download the installer from
30 |     www.python.org, series 2 (2.7.x) for your platform, then start and
31 |     go through the installation procedure. Be careful to install with
32 |     the following options enabled:
33 |           -"Install pip" (this is the default)
34 | 	  -"Add Python to path"
35 | 
36 | Step 2: Install Dependencies
37 | 
38 | Additional required python packages are:
39 | 
40 |      argparse
41 |      easygui
42 |      requests
43 | 
44 | On both Windows and Linux you can install them by the
45 | (root/administrator) command-line:
46 | 
47 |      pip install <dependency>
48 | 
49 | where <dependency> is one of the above three packages. Alternatively,
50 | you may find these as software packages for your system (aka "apt-get
51 | install python-easygui")
52 | 
53 | If these  steps were made,  the script's  dependencies are met,  it is
54 | ready for use.
55 | 
56 | The script  supports the  access of the  data via  ssl-encrypted pages
57 | using ssl key-based authentication. Those clients who want to use this
58 | possibility should read the file
59 | 
60 | README.SSL.txt
61 | 
62 | to do the necessary steps for configuring this kind of access.
63 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/CHANGELOG.txt:
--------------------------------------------------------------------------------
 1 | Changelog for the WhoisXML API
 2 | 
 3 | Whois data download utility
 4 | 
 5 | download_whois_data.py
 6 | 
 7 | 2023-06-07
 8 | ----------
 9 | 
10 | - Fixed a bug affecting newer versions of urllib3.
11 |   (Modification tested on newer Linux environments and python3 only.)
12 | - Notes made on obsoleting sslauth in docs.
13 | 
14 | 2022-03-30
15 | ----------
16 | 
17 | - Added the "domains", "verified_domains", "missing_domains", and "
18 |   "reserved_domains" data formats (domain lists) to the quarterly
19 |   database data feeds.
20 | 
21 | 2021-08-18
22 | ----------
23 | 
24 | - Added the option --only-changed to use added/dropped tlds files
25 |   instead of supported_tlds. Works only for feeds where these files
26 |   are available.
27 | - Changed the default value of --maxtries to 3 upon users' request
28 | 
29 | 2021-07-14
30 | ----------
31 | - The broken --maxtries option has been fixed, the default value has
32 |   been changed from 5 to 1 for performance reasons
33 | - Fixed a bug in the removal of empty supported tld
34 | 
35 | 2021-06-01
36 | ----------
37 | - A minor bug fixed in interactive mode.
38 | 
39 | 2021-04-19
40 | ----------
41 | - Added support for new-generation access: subscription plan-dependent
42 |   URLs, at the moment typically  at newly-registered-domains.whoisxmlapi.com
43 | 
44 | 2021-03-19
45 | ----------
46 | - Fixed an issue in the GUI mode affecting easygui >= 0.98.2
47 | - Added the temporary workaround of sslauth to GUI mode
48 | 
49 | 2021-03-10
50 | ----------
51 | - Recovered the temporary option for sslauth, corrected http to https in urls.
52 | 
53 | 2021-02-22
54 | ----------
55 | - Fixed a bug in downloading supported tlds lists, affecting --all-tlds option
56 | 
57 | 2020-05-13
58 | ----------
59 | - Introduced a mechanism to detect premature daily downloads.
60 | 
61 | 2019-12-10
62 | ----------
63 | 
64 | - The script is made Python 3 compatible, the documentation has been
65 |   amended accordingly.
66 | 
67 | 2019-06-20
68 | ----------
69 | - The option --list-feeds shows brief feed descriptions.
70 | - The --describe-feed option has been introduced.
71 | - A bug has been fixed which affected tld-independent feeds.
72 | - An error message is generated if feeds.ini cannot be read.
73 | 
74 | 2018-11-27
75 | ----------
76 | -Added support for the feed whois_record_delta_domain_names_change_archive
77 | -Fixed a bug of not reporting unavailable files for whole directory downloads
78 | 
79 | 2018-11-12:
80 | ----------
81 | -CHANGELOG.txt introduced
82 | -Added support for the data feed "gtlds_domain_names_whois_archive"
83 | -Fixed issues related to mixing tld dependent and independent data
84 |  formats within the same session
85 | -Introduced the support for determining list of supported tlds
86 |  for archive feeds with year-named subdirectories
87 | -Refined error message for non-existing resources
88 | 


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/README.SSL:
--------------------------------------------------------------------------------
  1 | Setting up SSL authentication to use with whoisdownload.sh
  2 | 
  3 | install_p12pack.sh
  4 | 
  5 | Provided by WhoisXML API, Inc.
  6 | 
  7 | dated: 2018-02-01
  8 | 
  9 | CONTENTS:
 10 | ---------
 11 | 
 12 | 1. Brief summary
 13 | 
 14 | 2. Installing the auth credentials
 15 | 
 16 | 3. Using the script
 17 | 
 18 | 
 19 | 1. Brief summary
 20 | ----------------
 21 | 
 22 | The goal is to set up your authentication credentials so that you can
 23 | use whoisdownload.sh with the option --auth-type=ssl instead of
 24 | password authentication.
 25 | 
 26 | 2. Installing the auth credentials
 27 | ----------------------------------
 28 | 
 29 | As a starting point we assume that you have obtained a p12 pack with
 30 | certificates and keys, and a password for this file from WhoisXML API,
 31 | Inc. (If it is not the case, you will not be able to use this script.)
 32 | 
 33 | Let us call the obtained file pack.p12, and the password in the
 34 | examples will be "YourPassword".
 35 | 
 36 | You need to do the following just once:
 37 | 
 38 | -Put pack.p12 into the directory where the download scripts (and this
 39 |  file) reside.
 40 | 
 41 | -Make sure you have openssl installed. If you do not have it, install
 42 |  it (e.g. apt-get install openssl on Debian-favor systems (Ubuntu,
 43 |  Mint, Ubuntu on Windwos.)
 44 | 
 45 | -Run the following command in the directory:
 46 | 
 47 |      ./install_p12pack.sh pack.p12 YourPassword
 48 | 
 49 |  which gives the following output:
 50 | 
 51 |   MAC verified OK
 52 |   MAC verified OK
 53 |   MAC verified OK
 54 |   writing RSA key
 55 |   All done. Now you can use the downloader script in this directory.
 56 | 
 57 |   Also, it will generate the files client.crt, client.key and whoisxmlapi.ca
 58 |   needed for the authentication. These will be only readable by you.
 59 | 
 60 |   IMPORTANT: keep the generated client.key confidential.
 61 | 
 62 | If once this is done, you will be able to use the script 
 63 | 
 64 | 3. Using the script
 65 | -------------------
 66 | 
 67 | To use ssl authentication instead of password authentication, add
 68 | 
 69 | --auth-type=ssl
 70 | 
 71 | to the options of the script when you use it. You will not need the
 72 | --user and --password options then.
 73 | 
 74 | The default location of the files client.crt, client.key and
 75 | whoisxmlapi.ca is the same directory as your script.
 76 | 
 77 | You may move these files elsewhere, and specify their location with
 78 | the respective options, e.g.
 79 | 
 80 | --cacert=/home/myuser/mycustomdir/whoisxmlapi.ca
 81 | --sslcert=/home/myuser/mycustomdir/client.crt
 82 | --sslkey=/home/myuser/mycustomdir/client.key
 83 | 
 84 | You may also rename these files and specify them with their new names
 85 | using the above options.  As an alternative to the command-line
 86 | options, you may specify the location of the respective files by
 87 | setting the variables
 88 | AUTHTYPE="ssl"
 89 | and the variables
 90 | CACERTFILE
 91 | CERTFILE
 92 | KEYFILE
 93 | to the location of the auth files in your
 94 | ~/.whoisdownload.sh
 95 | file if you prefer this approach. 
 96 | IMPORTANT: the string providing file locations as well as those
 97 | possibly given as values to variables specifying full path should be
 98 | preferably full absolute paths and they should resolve properly by the
 99 | "realpath" command on your system. Hence, for instance, do not use "~"
100 | to refer to your home directory in these path strings.
101 | 


--------------------------------------------------------------------------------
/whoisxmlapi_csv2json/transform_json.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from argparse import ArgumentParser
 4 | import csv
 5 | import multiprocessing
 6 | import json
 7 | import sys
 8 | import os
 9 | from platform import system
10 | 
11 | VERSION = "0.0.2"
12 | MYNAME = sys.argv[0].replace('./', '')
13 | 
14 | # Preparing arguments
15 | argparser = ArgumentParser(description='Convert CSV to JSON format')
16 | argparser.add_argument('--version',
17 |                        help='Print version information and exit.',
18 |                        action='version',
19 |                        version=MYNAME + ' ver. ' + VERSION + '\n(c) WhoisXML API LLC.')
20 | argparser.add_argument('-i', '--path',
21 |                        help='input directory with uncompressed CSVs or single CSV file',
22 |                        type=str, required=True)
23 | argparser.add_argument('--key', help='primary key field for records, default "domainName"',
24 |                        type=str, default='domainName')
25 | argparser.add_argument('--threads',
26 |                        help='number of threads, default 1',
27 |                        type=int, default=1)
28 | argparser.add_argument('--force', help='overwrite existent files', action='store_true')
29 | argparser.add_argument('--human-readable',
30 |                        help='generate human readable output',
31 |                        action='store_true')
32 | args = argparser.parse_args()
33 | 
34 | # increase max size of the field
35 | if system() == 'Windows':
36 |     csv.field_size_limit(2147483647)
37 | else:
38 |     from sys import maxsize as csv_maxsize
39 |     csv.field_size_limit(csv_maxsize)
40 | 
41 | 
42 | def convert_json(csv_queue):
43 |     while not csv_queue.empty():
44 |         csv_file = csv_queue.get()
45 |         json_file = os.path.join(
46 |             os.path.dirname(csv_file),
47 |             os.path.basename(csv_file).replace('.csv', '.json'))
48 |         if args.force or not os.path.isfile(json_file):
49 |             out_data = dict()
50 |             with open(csv_file, 'rt') as infile:
51 |                 infile_csv = csv.DictReader(infile)
52 |                 for in_row in infile_csv:
53 |                     out_data.update({in_row[args.key]: {}})
54 |                     for field in infile_csv.fieldnames:
55 |                         if field != args.key and in_row[field] != '':
56 |                             out_data[in_row[args.key]].update({field: in_row[field]})
57 |             with open(json_file, 'wt') as json_file_obj:
58 |                 if args.human_readable:
59 |                     json_file_obj.write(json.dumps(out_data, indent=4))
60 |                 else:
61 |                     json_file_obj.write(json.dumps(out_data))
62 |             del out_data
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     # populating queue
67 |     csvQueue = multiprocessing.Queue()
68 | 
69 |     if os.path.isdir(args.path):
70 |         for csv_f in os.listdir(args.path):
71 |             if csv_f.endswith('.csv'):
72 |                 csvQueue.put(os.path.join(args.path, csv_f))
73 |     elif os.path.isfile(args.path) and args.path.endswith('.csv'):
74 |         csvQueue.put(args.path)
75 |     else:
76 |         exit(1)
77 | 
78 |     threads = []
79 |     for t in range(0, args.threads):
80 |         convert_thread = multiprocessing.Process(target=convert_json, args=(csvQueue, ))
81 |         convert_thread.start()
82 |         threads.append(convert_thread)
83 |     for convert_thread in threads:
84 |         convert_thread.join()
85 | 


--------------------------------------------------------------------------------
/whoisxmlapi_flexible_csv_to_mysqldb/field_types.csv:
--------------------------------------------------------------------------------
 1 | domainName,varchar(256)
 2 | registrarName,varchar(512)
 3 | contactEmail,varchar(256)
 4 | whoisServer,varchar(512)
 5 | nameServers,varchar(256)
 6 | createdDate,varchar(256)
 7 | updatedDate,varchar(256)
 8 | expiresDate,varchar(256)
 9 | standardRegCreatedDate,varchar(256)
10 | standardRegUpdatedDate,varchar(256)
11 | standardRegExpiresDate,varchar(256)
12 | status,text
13 | RegistryData_rawText,longtext
14 | WhoisRecord_rawText,longtext
15 | Audit_auditUpdatedDate,varchar(256)
16 | registrant_rawText,longtext
17 | registrant_email,varchar(256)
18 | registrant_name,varchar(256)
19 | registrant_organization,varchar(256)
20 | registrant_street1,varchar(256)
21 | registrant_street2,varchar(256)
22 | registrant_street3,varchar(256)
23 | registrant_street4,varchar(256)
24 | registrant_city,varchar(64)
25 | registrant_state,varchar(256)
26 | registrant_postalCode,varchar(45)
27 | registrant_country,varchar(45)
28 | registrant_fax,varchar(45)
29 | registrant_faxExt,varchar(45)
30 | registrant_telephone,varchar(45)
31 | registrant_telephoneExt,varchar(45)
32 | administrativeContact_rawText,longtext
33 | administrativeContact_email,varchar(256)
34 | administrativeContact_name,varchar(256)
35 | administrativeContact_organization,varchar(256)
36 | administrativeContact_street1,varchar(256)
37 | administrativeContact_street2,varchar(256)
38 | administrativeContact_street3,varchar(256)
39 | administrativeContact_street4,varchar(256)
40 | administrativeContact_city,varchar(64)
41 | administrativeContact_state,varchar(256)
42 | administrativeContact_postalCode,varchar(45)
43 | administrativeContact_country,varchar(45)
44 | administrativeContact_fax,varchar(45)
45 | administrativeContact_faxExt,varchar(45)
46 | administrativeContact_telephone,varchar(45)
47 | administrativeContact_telephoneExt,varchar(45)
48 | billingContact_rawText,longtext
49 | billingContact_email,varchar(256)
50 | billingContact_name,varchar(256)
51 | billingContact_organization,varchar(256)
52 | billingContact_street1,varchar(256)
53 | billingContact_street2,varchar(256)
54 | billingContact_street3,varchar(256)
55 | billingContact_street4,varchar(256)
56 | billingContact_city,varchar(64)
57 | billingContact_state,varchar(256)
58 | billingContact_postalCode,varchar(45)
59 | billingContact_country,varchar(45)
60 | billingContact_fax,varchar(45)
61 | billingContact_faxExt,varchar(45)
62 | billingContact_telephone,varchar(45)
63 | billingContact_telephoneExt,varchar(45)
64 | technicalContact_rawText,longtext
65 | technicalContact_email,varchar(256)
66 | technicalContact_name,varchar(256)
67 | technicalContact_organization,varchar(256)
68 | technicalContact_street1,varchar(256)
69 | technicalContact_street2,varchar(256)
70 | technicalContact_street3,varchar(256)
71 | technicalContact_street4,varchar(256)
72 | technicalContact_city,varchar(64)
73 | technicalContact_state,varchar(256)
74 | technicalContact_postalCode,varchar(45)
75 | technicalContact_country,varchar(45)
76 | technicalContact_fax,varchar(45)
77 | technicalContact_faxExt,varchar(45)
78 | technicalContact_telephone,varchar(45)
79 | technicalContact_telephoneExt,varchar(45)
80 | zoneContact_rawText,longtext
81 | zoneContact_email,varchar(256)
82 | zoneContact_name,varchar(256)
83 | zoneContact_organization,varchar(256)
84 | zoneContact_street1,varchar(256)
85 | zoneContact_street2,varchar(256)
86 | zoneContact_street3,varchar(256)
87 | zoneContact_street4,varchar(256)
88 | zoneContact_city,varchar(64)
89 | zoneContact_state,varchar(256)
90 | zoneContact_postalCode,varchar(45)
91 | zoneContact_country,varchar(45)
92 | zoneContact_fax,varchar(45)
93 | zoneContact_faxExt,varchar(45)
94 | zoneContact_telephone,varchar(45)
95 | zoneContact_telephoneExt,varchar(45)
96 | registrarIANAID,varchar(45)
97 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/loader_schema_regular_daily_only.sql:
--------------------------------------------------------------------------------
 1 | create table whois_record_flat_regular (
 2 | `whois_record_flat_id` bigint(20) NOT NULL AUTO_INCREMENT,
 3 | `domainName` varchar(256),
 4 | `registrarName` varchar(512),
 5 | `contactEmail` varchar(256),
 6 | `whoisServer` varchar(512),
 7 | `nameServers` varchar(256),
 8 | `createdDate` varchar(256),
 9 | `updatedDate` varchar(256),
10 | `expiresDate` varchar(256),
11 | `standardRegCreatedDate` varchar(256),
12 | `standardRegUpdatedDate` varchar(256),
13 | `standardRegExpiresDate` varchar(256),
14 | `status` text,
15 | `Audit_auditUpdatedDate` varchar(256),
16 | `registrant_email` varchar(256),
17 | `registrant_name` varchar(256),
18 | `registrant_organization` varchar(256),
19 | `registrant_street1` varchar(256),
20 | `registrant_street2` varchar(256),
21 | `registrant_street3` varchar(256),
22 | `registrant_street4` varchar(256),
23 | `registrant_city` varchar(64),
24 | `registrant_state` varchar(256),
25 | `registrant_postalCode` varchar(45),
26 | `registrant_country` varchar(45),
27 | `registrant_fax` varchar(45),
28 | `registrant_faxExt` varchar(45),
29 | `registrant_telephone` varchar(45),
30 | `registrant_telephoneExt` varchar(45),
31 | `administrativeContact_email` varchar(256),
32 | `administrativeContact_name` varchar(256),
33 | `administrativeContact_organization` varchar(256),
34 | `administrativeContact_street1` varchar(256),
35 | `administrativeContact_street2` varchar(256),
36 | `administrativeContact_street3` varchar(256),
37 | `administrativeContact_street4` varchar(256),
38 | `administrativeContact_city` varchar(64),
39 | `administrativeContact_state` varchar(256),
40 | `administrativeContact_postalCode` varchar(45),
41 | `administrativeContact_country` varchar(45),
42 | `administrativeContact_fax` varchar(45),
43 | `administrativeContact_faxExt` varchar(45),
44 | `administrativeContact_telephone` varchar(45),
45 | `administrativeContact_telephoneExt` varchar(45),
46 | `billingContact_email` varchar(256),
47 | `billingContact_name` varchar(256),
48 | `billingContact_organization` varchar(256),
49 | `billingContact_street1` varchar(256),
50 | `billingContact_street2` varchar(256),
51 | `billingContact_street3` varchar(256),
52 | `billingContact_street4` varchar(256),
53 | `billingContact_city` varchar(64),
54 | `billingContact_state` varchar(256),
55 | `billingContact_postalCode` varchar(45),
56 | `billingContact_country` varchar(45),
57 | `billingContact_fax` varchar(45),
58 | `billingContact_faxExt` varchar(45),
59 | `billingContact_telephone` varchar(45),
60 | `billingContact_telephoneExt` varchar(45),
61 | `technicalContact_email` varchar(256),
62 | `technicalContact_name` varchar(256),
63 | `technicalContact_organization` varchar(256),
64 | `technicalContact_street1` varchar(256),
65 | `technicalContact_street2` varchar(256),
66 | `technicalContact_street3` varchar(256),
67 | `technicalContact_street4` varchar(256),
68 | `technicalContact_city` varchar(64),
69 | `technicalContact_state` varchar(256),
70 | `technicalContact_postalCode` varchar(45),
71 | `technicalContact_country` varchar(45),
72 | `technicalContact_fax` varchar(45),
73 | `technicalContact_faxExt` varchar(45),
74 | `technicalContact_telephone` varchar(45),
75 | `technicalContact_telephoneExt` varchar(45),
76 | `zoneContact_email` varchar(256),
77 | `zoneContact_name` varchar(256),
78 | `zoneContact_organization` varchar(256),
79 | `zoneContact_street1` varchar(256),
80 | `zoneContact_street2` varchar(256),
81 | `zoneContact_street3` varchar(256),
82 | `zoneContact_street4` varchar(256),
83 | `zoneContact_city` varchar(64),
84 | `zoneContact_state` varchar(256),
85 | `zoneContact_postalCode` varchar(45),
86 | `zoneContact_country` varchar(45),
87 | `zoneContact_fax` varchar(45),
88 | `zoneContact_faxExt` varchar(45),
89 | `zoneContact_telephone` varchar(45),
90 | `zoneContact_telephoneExt` varchar(45),
91 | `registrarIANAID` varchar(45),
92 |  primary key (`whois_record_flat_id`)
93 | )ENGINE=InnoDB ROW_FORMAT=COMPRESSED AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
94 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/loader_schema_regular.sql:
--------------------------------------------------------------------------------
 1 | create table whois_record_flat_regular (
 2 | `whois_record_flat_id` bigint(20) NOT NULL AUTO_INCREMENT,
 3 | `domainName` varchar(256),
 4 | `registrarName` varchar(512),
 5 | `contactEmail` varchar(256),
 6 | `whoisServer` varchar(512),
 7 | `nameServers` varchar(256),
 8 | `createdDate` varchar(256),
 9 | `updatedDate` varchar(256),
10 | `expiresDate` varchar(256),
11 | `standardRegCreatedDate` varchar(256),
12 | `standardRegUpdatedDate` varchar(256),
13 | `standardRegExpiresDate` varchar(256),
14 | `status` text,
15 | `Audit_auditUpdatedDate` varchar(256),
16 | `registrant_rawText` longtext,
17 | `registrant_email` varchar(256),
18 | `registrant_name` varchar(256),
19 | `registrant_organization` varchar(256),
20 | `registrant_street1` varchar(256),
21 | `registrant_street2` varchar(256),
22 | `registrant_street3` varchar(256),
23 | `registrant_street4` varchar(256),
24 | `registrant_city` varchar(64),
25 | `registrant_state` varchar(256),
26 | `registrant_postalCode` varchar(45),
27 | `registrant_country` varchar(45),
28 | `registrant_fax` varchar(45),
29 | `registrant_faxExt` varchar(45),
30 | `registrant_telephone` varchar(45),
31 | `registrant_telephoneExt` varchar(45),
32 | `administrativeContact_rawText` longtext,
33 | `administrativeContact_email` varchar(256),
34 | `administrativeContact_name` varchar(256),
35 | `administrativeContact_organization` varchar(256),
36 | `administrativeContact_street1` varchar(256),
37 | `administrativeContact_street2` varchar(256),
38 | `administrativeContact_street3` varchar(256),
39 | `administrativeContact_street4` varchar(256),
40 | `administrativeContact_city` varchar(64),
41 | `administrativeContact_state` varchar(256),
42 | `administrativeContact_postalCode` varchar(45),
43 | `administrativeContact_country` varchar(45),
44 | `administrativeContact_fax` varchar(45),
45 | `administrativeContact_faxExt` varchar(45),
46 | `administrativeContact_telephone` varchar(45),
47 | `administrativeContact_telephoneExt` varchar(45),
48 | `billingContact_rawText` longtext,
49 | `billingContact_email` varchar(256),
50 | `billingContact_name` varchar(256),
51 | `billingContact_organization` varchar(256),
52 | `billingContact_street1` varchar(256),
53 | `billingContact_street2` varchar(256),
54 | `billingContact_street3` varchar(256),
55 | `billingContact_street4` varchar(256),
56 | `billingContact_city` varchar(64),
57 | `billingContact_state` varchar(256),
58 | `billingContact_postalCode` varchar(45),
59 | `billingContact_country` varchar(45),
60 | `billingContact_fax` varchar(45),
61 | `billingContact_faxExt` varchar(45),
62 | `billingContact_telephone` varchar(45),
63 | `billingContact_telephoneExt` varchar(45),
64 | `technicalContact_rawText` longtext,
65 | `technicalContact_email` varchar(256),
66 | `technicalContact_name` varchar(256),
67 | `technicalContact_organization` varchar(256),
68 | `technicalContact_street1` varchar(256),
69 | `technicalContact_street2` varchar(256),
70 | `technicalContact_street3` varchar(256),
71 | `technicalContact_street4` varchar(256),
72 | `technicalContact_city` varchar(64),
73 | `technicalContact_state` varchar(256),
74 | `technicalContact_postalCode` varchar(45),
75 | `technicalContact_country` varchar(45),
76 | `technicalContact_fax` varchar(45),
77 | `technicalContact_faxExt` varchar(45),
78 | `technicalContact_telephone` varchar(45),
79 | `technicalContact_telephoneExt` varchar(45),
80 | `zoneContact_rawText` longtext,
81 | `zoneContact_email` varchar(256),
82 | `zoneContact_name` varchar(256),
83 | `zoneContact_organization` varchar(256),
84 | `zoneContact_street1` varchar(256),
85 | `zoneContact_street2` varchar(256),
86 | `zoneContact_street3` varchar(256),
87 | `zoneContact_street4` varchar(256),
88 | `zoneContact_city` varchar(64),
89 | `zoneContact_state` varchar(256),
90 | `zoneContact_postalCode` varchar(45),
91 | `zoneContact_country` varchar(45),
92 | `zoneContact_fax` varchar(45),
93 | `zoneContact_faxExt` varchar(45),
94 | `zoneContact_telephone` varchar(45),
95 | `zoneContact_telephoneExt` varchar(45),
96 | `registrarIANAID` varchar(45),
97 |  primary key (`whois_record_flat_id`)
98 | )ENGINE=InnoDB ROW_FORMAT=COMPRESSED AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
99 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/loader_schema_full.sql:
--------------------------------------------------------------------------------
  1 | create table whois_record_flat (
  2 | `whois_record_flat_id` bigint(20) NOT NULL AUTO_INCREMENT,
  3 | `domainName` varchar(256),
  4 | `registrarName` varchar(512),
  5 | `contactEmail` varchar(256),
  6 | `whoisServer` varchar(512),
  7 | `nameServers` varchar(256),
  8 | `createdDate` varchar(256),
  9 | `updatedDate` varchar(256),
 10 | `expiresDate` varchar(256),
 11 | `standardRegCreatedDate` varchar(256),
 12 | `standardRegUpdatedDate` varchar(256),
 13 | `standardRegExpiresDate` varchar(256),
 14 | `status` text,
 15 | `RegistryData_rawText` longtext,
 16 | `WhoisRecord_rawText` longtext,
 17 | `Audit_auditUpdatedDate` varchar(256),
 18 | `registrant_rawText` longtext,
 19 | `registrant_email` varchar(256),
 20 | `registrant_name` varchar(256),
 21 | `registrant_organization` varchar(256),
 22 | `registrant_street1` varchar(256),
 23 | `registrant_street2` varchar(256),
 24 | `registrant_street3` varchar(256),
 25 | `registrant_street4` varchar(256),
 26 | `registrant_city` varchar(64),
 27 | `registrant_state` varchar(256),
 28 | `registrant_postalCode` varchar(45),
 29 | `registrant_country` varchar(45),
 30 | `registrant_fax` varchar(45),
 31 | `registrant_faxExt` varchar(45),
 32 | `registrant_telephone` varchar(45),
 33 | `registrant_telephoneExt` varchar(45),
 34 | `administrativeContact_rawText` longtext,
 35 | `administrativeContact_email` varchar(256),
 36 | `administrativeContact_name` varchar(256),
 37 | `administrativeContact_organization` varchar(256),
 38 | `administrativeContact_street1` varchar(256),
 39 | `administrativeContact_street2` varchar(256),
 40 | `administrativeContact_street3` varchar(256),
 41 | `administrativeContact_street4` varchar(256),
 42 | `administrativeContact_city` varchar(64),
 43 | `administrativeContact_state` varchar(256),
 44 | `administrativeContact_postalCode` varchar(45),
 45 | `administrativeContact_country` varchar(45),
 46 | `administrativeContact_fax` varchar(45),
 47 | `administrativeContact_faxExt` varchar(45),
 48 | `administrativeContact_telephone` varchar(45),
 49 | `administrativeContact_telephoneExt` varchar(45),
 50 | `billingContact_rawText` longtext,
 51 | `billingContact_email` varchar(256),
 52 | `billingContact_name` varchar(256),
 53 | `billingContact_organization` varchar(256),
 54 | `billingContact_street1` varchar(256),
 55 | `billingContact_street2` varchar(256),
 56 | `billingContact_street3` varchar(256),
 57 | `billingContact_street4` varchar(256),
 58 | `billingContact_city` varchar(64),
 59 | `billingContact_state` varchar(256),
 60 | `billingContact_postalCode` varchar(45),
 61 | `billingContact_country` varchar(45),
 62 | `billingContact_fax` varchar(45),
 63 | `billingContact_faxExt` varchar(45),
 64 | `billingContact_telephone` varchar(45),
 65 | `billingContact_telephoneExt` varchar(45),
 66 | `technicalContact_rawText` longtext,
 67 | `technicalContact_email` varchar(256),
 68 | `technicalContact_name` varchar(256),
 69 | `technicalContact_organization` varchar(256),
 70 | `technicalContact_street1` varchar(256),
 71 | `technicalContact_street2` varchar(256),
 72 | `technicalContact_street3` varchar(256),
 73 | `technicalContact_street4` varchar(256),
 74 | `technicalContact_city` varchar(64),
 75 | `technicalContact_state` varchar(256),
 76 | `technicalContact_postalCode` varchar(45),
 77 | `technicalContact_country` varchar(45),
 78 | `technicalContact_fax` varchar(45),
 79 | `technicalContact_faxExt` varchar(45),
 80 | `technicalContact_telephone` varchar(45),
 81 | `technicalContact_telephoneExt` varchar(45),
 82 | `zoneContact_rawText` longtext,
 83 | `zoneContact_email` varchar(256),
 84 | `zoneContact_name` varchar(256),
 85 | `zoneContact_organization` varchar(256),
 86 | `zoneContact_street1` varchar(256),
 87 | `zoneContact_street2` varchar(256),
 88 | `zoneContact_street3` varchar(256),
 89 | `zoneContact_street4` varchar(256),
 90 | `zoneContact_city` varchar(64),
 91 | `zoneContact_state` varchar(256),
 92 | `zoneContact_postalCode` varchar(45),
 93 | `zoneContact_country` varchar(45),
 94 | `zoneContact_fax` varchar(45),
 95 | `zoneContact_faxExt` varchar(45),
 96 | `zoneContact_telephone` varchar(45),
 97 | `zoneContact_telephoneExt` varchar(45),
 98 | `registrarIANAID` varchar(45),
 99 |  primary key (`whois_record_flat_id`)
100 | )ENGINE=InnoDB ROW_FORMAT=COMPRESSED AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
101 | 


--------------------------------------------------------------------------------
/whoisxmlapi_csv2json/transform_json_verbose.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Python3 script for converting WhoisXML API csv files
  4 | into JSON.
  5 | This is the less portable version for python3 with verbose output.
  6 | """
  7 | 
  8 | from argparse import ArgumentParser
  9 | import csv
 10 | import multiprocessing
 11 | import json
 12 | import sys
 13 | import os
 14 | from platform import system
 15 | import tqdm
 16 | 
 17 | VERSION = "0.0.2"
 18 | MYNAME = sys.argv[0].replace('./', '')
 19 | 
 20 | # Preparing arguments
 21 | argparser = ArgumentParser(description='Convert WhoisXML API CSV files to JSON format.')
 22 | argparser.add_argument('--version',
 23 |                        help='Print version information and exit.',
 24 |                        action='version',
 25 |                        version=MYNAME + ' ver. ' + VERSION + '\n(c) WhoisXML API LLC.')
 26 | argparser.add_argument('-i', '--path',
 27 |                        help='input directory with uncompressed CSVs or single CSV file',
 28 |                        type=str, required=True)
 29 | argparser.add_argument('--key', help='primary key field for records, default "domainName"',
 30 |                        type=str, default='domainName')
 31 | argparser.add_argument('--threads',
 32 |                        help='number of threads, default 1',
 33 |                        type=int, default=1)
 34 | argparser.add_argument('--force', help='overwrite existent files', action='store_true')
 35 | argparser.add_argument('--human-readable',
 36 |                        help='generate human readable output',
 37 |                        action='store_true')
 38 | argparser.add_argument('--no-progress', help='disable progress indicator', action='store_true')
 39 | argparser.add_argument('--quiet', help='suppress all output', action='store_true')
 40 | 
 41 | args = argparser.parse_args()
 42 | 
 43 | # increase max size of the field
 44 | if system() == 'Windows':
 45 |     csv.field_size_limit(2147483647)
 46 | else:
 47 |     from sys import maxsize as csv_maxsize
 48 |     csv.field_size_limit(csv_maxsize)
 49 | 
 50 | def print_verbose(text):
 51 |     """print messages if not in quiet mode"""
 52 |     if not args.quiet:
 53 |         print(text)
 54 | 
 55 | def convert_json(csv_queue):
 56 |     """the actual job, done by each thread"""
 57 |     while not csv_queue.empty():
 58 |         csv_file = csv_queue.get()
 59 |         json_file = os.path.join(
 60 |             os.path.dirname(csv_file),
 61 |             os.path.basename(csv_file).replace('.csv', '.json'))
 62 |         if args.force or not os.path.isfile(json_file):
 63 |             out_data = dict()
 64 |             with tqdm.tqdm(0, unit=' records', disable=args.no_progress) as pbar:
 65 |                 pbar.set_description("Processing %s" % csv_file)
 66 |                 with open(csv_file, 'rt') as infile:
 67 |                     infile_csv = csv.DictReader(infile)
 68 |                     for in_row in infile_csv:
 69 |                         pbar.update(1)
 70 |                         out_data.update({in_row[args.key]: {}})
 71 |                         for field in infile_csv.fieldnames:
 72 |                             if field != args.key and in_row[field] != '':
 73 |                                 out_data[in_row[args.key]].update({field: in_row[field]})
 74 |             with open(json_file, 'wt') as json_file_obj:
 75 |                 print_verbose("Writing %s" % (json_file))
 76 |                 if args.human_readable:
 77 |                     json_file_obj.write(json.dumps(out_data, indent=4))
 78 |                 else:
 79 |                     json_file_obj.write(json.dumps(out_data))
 80 |             del out_data
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     # populating queue
 85 |     csvQueue = multiprocessing.Queue()
 86 | 
 87 |     if os.path.isdir(args.path):
 88 |         for csv_f in os.listdir(args.path):
 89 |             if csv_f.endswith('.csv'):
 90 |                 csvQueue.put(os.path.join(args.path, csv_f))
 91 |     elif os.path.isfile(args.path) and args.path.endswith('.csv'):
 92 |         csvQueue.put(args.path)
 93 |     else:
 94 |         exit(1)
 95 | 
 96 |     threads = []
 97 |     for t in range(0, args.threads):
 98 |         convert_thread = multiprocessing.Process(target=convert_json, args=(csvQueue, ))
 99 |         convert_thread.start()
100 |         threads.append(convert_thread)
101 |     for convert_thread in threads:
102 |         convert_thread.join()
103 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/README.SSL.txt:
--------------------------------------------------------------------------------
  1 | Important note 2023-06-07
  2 | 
  3 | SSL authentication is obsolete and not anymore supported.
  4 | With --disable-ssl-verification your creds may work, but use it at your own risk.
  5 | 
  6 | It is recommended to prefer regular password auth.
  7 | 
  8 | ---------------------------legacy document follows
  9 | install_p12.py
 10 | 
 11 | Setting up ssl authentication to use with download_whois_data.py
 12 | 
 13 | Provided by WhoisXML API, Inc.
 14 | 
 15 | dated:   2018-12-10
 16 | updated: 2020-10-02
 17 | 
 18 | Contents:
 19 | ---------
 20 | 
 21 | 1. Brief summary
 22 | 
 23 | 2. Prerequisites
 24 | 
 25 | 3. Installing the auth credentials
 26 | 
 27 | 4. Using the downloader script
 28 | 
 29 | 1. Brief summary
 30 | ----------------
 31 | 
 32 | The goal is to set up  your authentication credentials so that you can
 33 | use  download_whois_data.py  with  the  option  --sslauth  instead  of
 34 | password authentication.
 35 | 
 36 | 2. Prerequisites
 37 | ----------------
 38 | 
 39 | You will  need a  p12 pack, a  file provided to  you by  WhoisXML API,
 40 | Inc. You  need a password you  have received in conjunction  with this
 41 | file. (This  is the file  imported to your  system or your  browser in
 42 | order to facilitate ssl auth.)
 43 | 
 44 | To be used  with the downloader script, this file  has to be converted
 45 | into three other files suitable for the script.
 46 | 
 47 | We  remark that  the files  are the  same as  those used  by the  bash
 48 | downloader  (in the  whoisdownload_bash subdirectory  of the  git repo
 49 | https://github.com/whois-api-llc/whois_database_download_support.   So
 50 | you may  choose to run  the bash script  for the conversion,  which is
 51 | described in the file "README.SSL" in that project.  You may choose to
 52 | copy  the files  whoisxmlapi.ca, client.crt  and client.key  generated
 53 | with the  bash converter next  to download_whois_data.py. If  you have
 54 | done  so, you  do not  need  to reed  further,  you can  use SSL  auth
 55 | readily.
 56 | 
 57 | So let us  assume that given your  p12 pack and password,  you want to
 58 | generate the three required files with the Python script
 59 | 
 60 | install_p12.py.
 61 | 
 62 | In addition to the  prerequisites of download_whois_data.py (described
 63 | in the main README of the project, you will need two additional Python
 64 | packages.  So as  a root,  or  in an  administrator command-prompt  on
 65 | native Windows systems (prepared to run download_whois_data.py), do
 66 | 
 67 |    pip3 install pyOpenSSL pycryptodome
 68 | 
 69 | 
 70 | if you are using series 3 Python.
 71 | 
 72 | On older Python3 versions the legacy pycrypto package may also work:
 73 | 
 74 | pip3 install pyOpenSSL pycrypto	
 75 | 
 76 | whereas if you use series 2 Python, do
 77 | 
 78 |    pip2 install pycrypto pyopenssl
 79 | 
 80 | to  install the  requirements. Alternatively  you may  search for  the
 81 | required  packages  in your  operating  system's  package manager,  or
 82 | create a Python virtual environment.
 83 | 
 84 | 
 85 | 2. Installing the auth credentials
 86 | ----------------------------------
 87 | 
 88 | You need to do the following just once:
 89 | 
 90 | -Put your p12 pack file into the directory where the download scripts
 91 |  (and this file) reside.
 92 | 
 93 | -Run the following command in the directory:
 94 | 
 95 |      install_p12.py
 96 | 
 97 |   (either  from  a shell  command-line  or  by double-clicking  it  on
 98 |   Windows).  A  series of  dialog windows will  guide you  through the
 99 |   simple process:
100 | 
101 |   -You have to select the pack.p12 file
102 | 
103 |   -Then you will be prompted for the password.
104 | 
105 |   If everything went right, a  dialog window will appear informing you
106 |   that the files are prepared and you can use the script.  If an error
107 |   occurs, you shall be informed about  it in a window: check the files
108 |   and the password and run the script again in this case.
109 | 
110 |   The files  generated by  the script  are client.crt,  client.key and
111 |   whoisxmlapi.ca  needed for  the authentication  and they  are placed
112 |   next to the script by default. The key file will be only readable by
113 |   you.
114 | 
115 |   IMPORTANT: keep the generated client.key confidential.
116 | 
117 | 3. Using the script
118 | -------------------
119 | 
120 | To use ssl authentication instead of password authentication, add
121 | 
122 | --sslauth
123 | 
124 | to the options  of the script when  you use it. You will  not need the
125 | --user and  --password options  then, and the  script will  ignore the
126 | password configuration ini file.
127 | 


--------------------------------------------------------------------------------
/whoisxmlapi_csv2json/README:
--------------------------------------------------------------------------------
  1 | Documentation for the WhoisXML API
  2 | 
  3 | WHOIS CSV to JSON converter scripts
  4 | 
  5 | Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  6 | -------------------------------------------------------------------
  7 | 
  8 | The scripts are provided for our subscribers.
  9 | 
 10 | The aim of the script is to convert WHOIS data downloaded in CSV
 11 | format to JSON.
 12 | 
 13 | It is a cross-platform solution for end-users. It should be used on
 14 | Windows and Linux/Unix type systems and does not require special
 15 | experience.
 16 | 
 17 | The user should be familiar, however, with the feeds and data formats,
 18 | which are described in the reference manuals of the respective feeds.
 19 | 
 20 | Script availability:
 21 | --------------------
 22 | 
 23 | The primary location of this script is the public GitHub repository
 24 | 
 25 | https://github.com/whois-api-llc/whois_database_download_support
 26 | 
 27 | The scripts are located in the subdirectory
 28 | 
 29 | whoisxmlapi_csv2json
 30 | 
 31 | Contents
 32 | --------
 33 | 
 34 | 1. Quickstart
 35 | 2. Options of transform_json.py
 36 | 3. A less portable verbose version: transform_json_verbose.py
 37 | 4. Output file format
 38 | 
 39 | 1. Quickstart
 40 | -------------
 41 | 
 42 | The fastest way of using the script is the following:
 43 | 
 44 | - Make sure you have either Python 2 (tested with 2.7.10) or Python 3
 45 |   installed and working on your system. 
 46 | 
 47 | - Install the following Python packages:
 48 | 
 49 |   argparse, csv, multiprocessing, json
 50 |   
 51 |   You can do it with pip, Python's package manager 
 52 |   ("pip install <package name>" in a command-line)
 53 |   or with the package manager of your system.
 54 | 
 55 | - Download "simple", "regular" or "full" CSV files from WhoisXML API
 56 |   data feeds. You can use this script with files from quarterly
 57 |   databases as well as daily feeds. 
 58 | 
 59 |   Consult the documentation of WhoisXML API data download products 
 60 |   for more information. The manuals are available from
 61 | 
 62 |   http://www.domainwhoisdatabase.com/docs/
 63 | 
 64 | - Having your CSV files in a given directory, say "foo/", you can
 65 |   convert them to JSON by using the script in command-line the
 66 |   following way:
 67 |   
 68 |   transform_json.py -i foo/
 69 |   
 70 |   The script will not produce any output. 
 71 |   Depending on the number of files, the process may take a
 72 |   longer time.
 73 |   
 74 |   The JSON files will be next to their CSV counterparts with the same
 75 |   basename.
 76 | 
 77 | 2. Options of transform_json.py
 78 | -------------------------------
 79 | 
 80 | The script is self-documenting, you can obtain the description with 
 81 | 
 82 |   transform_json.py --help
 83 | 
 84 | The output lists the options:
 85 | ---
 86 | usage: transform_json.py [-h] [--version] -i PATH [--key KEY]
 87 |                          [--threads THREADS] [--force] [--human-readable]
 88 | 
 89 | Convert CSV to JSON format
 90 | 
 91 | optional arguments:
 92 |   -h, --help            show this help message and exit
 93 |   --version             Print version information and exit.
 94 |   -i PATH, --path PATH  input directory with uncompressed CSVs or single CSV
 95 |                         file
 96 |   --key KEY             primary key field for records, default "domainName"
 97 |   --threads THREADS     number of threads, default 1
 98 |   --force               overwrite existent files
 99 |   --human-readable      generate human readable output
100 | 
101 | --
102 | Comments:
103 | 
104 | - The script supports multi-threaded operation with the --threads option.
105 | - The --human-readable option results in JSON files well-readable as text.
106 | - See also the description of the output file format in Section 
107 | 
108 | 3. A less portable verbose version: transform_json_verbose.py
109 | -------------------------------------------------------------
110 | 
111 | If you want to follow the progress of the conversion, there is another
112 | script available under the name "transform_json_verbose.py".
113 | 
114 | It requires Python 3 and an additional python package, "tqdm", to work.
115 | 
116 | In addition to the options of transform_json.py, this script produces
117 | a verbose output by default:
118 | 
119 | -A progress bar showing the status of the reading of files.
120 | 
121 | -A message about writing JSON files.
122 | 
123 | These can be suppressed by the 
124 | 
125 | --no-progress
126 | 
127 | and 
128 | 
129 | --quiet 
130 | 
131 | options respectively. 
132 | 
133 | Apart from this, the operation of the script is the same as that of
134 | transform_json.py.
135 | 
136 | 4. Output file format
137 | ---------------------
138 | 
139 | The resulting file contains a single JSON string. Within this at first
140 | level, there is each record as a value in a key-value pair, where the
141 | key is the filed specified by the --key option of the scripts, the
142 | domain name by default. In the value, the non-empty fields of the
143 | record appear as key-value pairs.
144 | 
145 | If the --human-readable option was set, the file contains proper
146 | indentations and newlines to be well readable as plain text. Without
147 | the option, a JSON file for machine processing is obtained.
148 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/FAQ.txt:
--------------------------------------------------------------------------------
  1 | FAQ.txt for
  2 | 
  3 | download_whois_data.py
  4 | 
  5 | Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  6 | -------------------------------------------------------------------
  7 | 
  8 | Q1: I  obtained an  error  stating that  my login  and password  are
  9 | invalid. I can access the page with my browser. What happened?
 10 | 
 11 | A1:  It might be  the case  that you have  specified a wrong  date or
 12 | database version. E.g. you chose v20 of quarterly cctld feeds when the
 13 | last version is v8. As the script  does not have data on the available
 14 | db  versions,  it  derives  file  locations from  the  data  you  have
 15 | specified,  but these  locations  do  not exist  on  our server.   For
 16 | security reasons the server will  report invalid login credentials, so
 17 | it will appear as if there was something wrong with your password.
 18 | 
 19 | Please double-check the feed name and the parameters you are using.
 20 | --------------------------------------------------------------------
 21 | 
 22 | Q2: The  script  says  it cannot  determine  the  list of  supported
 23 | tlds. Why?
 24 | 
 25 | A2: In spite  of all of our efforts it may  happen that the necessary
 26 | supported_tlds file is missing. Please contact support in this case.
 27 | 
 28 | --------------------------------------------------------------------
 29 | 
 30 | Q3:  The   script  reports   files  which   could  not   have  been
 31 | downloaded. When shall they be available?
 32 | 
 33 | A3: It  may happen  that some  files derived  according to  the naming
 34 | logic of a  given feed do not  exist at the time  of downloading. One
 35 | reason might  be that the  file is not yet  prepared when you  run the
 36 | script. If  you re-run the script  later with the same  parameters, it
 37 | will not  redownload files which are  already there and have  not been
 38 | changed, but  it shall find the  missing ones. It may  also occur that
 39 | the   file  will   never   exist.   In   feeds   devoted  to   changes
 40 | (domain_names_new,  domain_names_dropped),  for   instance,  it  might
 41 | happen that there were no changes in the data the given day. We do not
 42 | store empty files, so these files will be reported as unavailable, but
 43 | this is normal.
 44 | 
 45 | --------------------------------------------------------------------
 46 | 
 47 | Q4: I  do not  want the script  to check all  supported TLDs  with the
 48 | --all-tlds option in case of daily feeds; I want it to try downloading
 49 | only for those TLDs  in which there was a change on  the given day and
 50 | thus the data file exists.
 51 | 
 52 | A4:  Using  the  --only-changed  option will  result  in  the  desired
 53 | behavior. It does not work for  all daily feeds, e.g. "delta" feeds do
 54 | not support it,  but "new" and "dropped" feeds do.  If downloading for
 55 | multiple days,  an attempt will be  made to download data  for all the
 56 | TLDs which had a change on at least one of the days.
 57 | 
 58 | --------------------------------------------------------------------
 59 | 
 60 | Q5:  So far  I have  been using  "download_whois_info.py" which  I had
 61 | downloaded for a  release a few years ago. I've  just realized that it
 62 | is   not   supported   anymore   and   it   has   been   replaced   by
 63 | "download_whois_data". I  decided to  switch the  new script,  but the
 64 | options of the  script are different and not compatible.  Why? Can you
 65 | make it compatible?
 66 | 
 67 | A5:  We decided  to redesign  the  python downloader  in 2017  because
 68 | "download_whois_info.py" was  not scalable; initially it  was intended
 69 | as a  small example script  but the requirements against  a downloader
 70 | script  went  far  beyond  the   original  idea,  so  a  redesign  was
 71 | unavoidable.   The  current script  has  many  options and  much  more
 72 | capabilities than the  legacy one. So supporting the  legacy and rather
 73 | illogical  options  would  lead  tho  an  extremely  large  number  of
 74 | options. In addition the operation logic behind the new script is also
 75 | different, it is not always possible to map the new options to the old
 76 | ones. So it  will not be made compatible, but  the command-line can be
 77 | easily rewritten along the following lines:
 78 | 
 79 | - old script option: -c
 80 |   use --feed instead
 81 | 
 82 | - old script option: -l or --login:
 83 |   use --username instead
 84 | 
 85 | - old script option: -p or --password:
 86 |   use --password
 87 | 
 88 | - old script option: -d or --date, with YYYY-MM-DD:
 89 |   use --startdate with YYYYMMDD
 90 | 
 91 | - old script option: --end-date, with YYYY-MM-DD:
 92 |   use --enddate with YYYYMMDD
 93 | 
 94 | - old script option: -v or --version:
 95 |   use --dbversion instead
 96 | 
 97 | - old script option: -t, --tld, possibly with a space-separated list
 98 |   use --tlds, for more tlds, use a comma-separated list.
 99 |   It has no default value. For all tlds, use the --all-tlds options
100 | 
101 | - old script option: -f
102 |   Use --dataformats instead, a comma-separated list for all formats.
103 |   There is no "all" possibility. Use the --list-dataformats to list
104 |   the available formats for each feed.
105 | 
106 | - old script option: -odir or --output-dir
107 |   Use --output-dir. It has no default value, hence, it is a mandatory
108 |   argument now.
109 | 
110 | - old script option: --interactive
111 |   Now it invokes the GUI mode of the script.
112 |   The old function of --interactive is not supported.
113 |   Consult the documentation on how to influence
114 |   redownloading behavior, especially the --maxtries option
115 | 
116 | - old script option: --no-override
117 |   No such option. Currently if a file is there,
118 |   it will not be downloaded again unless its md5sum
119 |   does not match the file. To redownload,
120 |   delete the respective file.
121 |   
122 |   
123 | 


--------------------------------------------------------------------------------
/whoisxmlapi_percona_loader_scripts/README.txt:
--------------------------------------------------------------------------------
  1 | Documentation for WhoisXML API
  2 | MySQL binary dump loader scripts
  3 | 
  4 | Document version 1.0 dated 2017-07-24
  5 | 
  6 | Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  7 | 
  8 | The scripts are provided for our subscribers to load binary mysql dumps
  9 | obtained from our  quarterly feeds into MySQL  databases.  The scripts
 10 | can be used also as an example to create custom loader scripts.
 11 | 
 12 | Script availability:
 13 | --------------------
 14 | 
 15 | The primary location of this script is the public GitHub repository
 16 | 
 17 | https://github.com/whois-api-llc/whois_database_download_support
 18 | 
 19 | The script is located in the subdirectory
 20 | 
 21 | whoisxmlapi_percona_loaders
 22 | 
 23 | Contents:
 24 | ---------
 25 | 
 26 | 1. List of files
 27 | 
 28 | 2. Obtaining data
 29 | 
 30 | 3. Software environment
 31 | 
 32 | 4. Using the script
 33 | 
 34 | 1. List of files:
 35 | ----------------
 36 | 
 37 | README			  : this file
 38 | load_mysql_utils.sh	  : utility functions used in all scripts
 39 | 			    	This should be there in the same
 40 | 				directory as the script itself.
 41 | load_whois_percona.sh:     : The script to run.
 42 | whoiscrawler_mysql_schema.sql : The schema file needed by the script.
 43 | 			      By default it should be in the same directory
 44 | 			      as the script.
 45 | legacy			: a directory containing legacy versions of
 46 | 			  the script which were in use before July 2017.
 47 | 
 48 | 2. Obtaining data
 49 | -----------------
 50 | 
 51 | Data files which can be loaded by these scripts can be obtained from
 52 | 
 53 | http://domainwhoisdatabase.com/whois_database/v20/database_dump/percona
 54 | 
 55 | (replace v20 by the actual version)
 56 | 
 57 | and for cctlds from
 58 | 
 59 | http://www.domainwhoisdatabase.com/domain_list_quarterly/v6/database_dump/percona/
 60 | 
 61 | (replace v6 by the actual version)
 62 | 
 63 | 3. Software environment
 64 | -----------------------
 65 | 
 66 | The present version was tested with
 67 | 
 68 | mysql  Ver 14.14 Distrib 5.7.18
 69 | 
 70 | and
 71 | 
 72 | GNU bash, 4.3.48(1)-release
 73 | 
 74 | on a machine running Ubuntu Linux 16.4.02 LTS.
 75 | 
 76 | The scripts are standard ones which have to work with earlier versions
 77 | of bash  also on  other systems  (Linux, Mac OS  X, and  Windows).  It
 78 | should be compatible with other version of MySQL, too.
 79 | 
 80 | If you run into an incompatibility, please contact our support.
 81 | 
 82 | 4. Using the script
 83 | -------------------
 84 | 
 85 | Step 1. : obtain data
 86 | .....................
 87 | We assume that you are working in the directory where this scripts and
 88 | the files listed in Section 1. reside.
 89 | 
 90 | Create a subdirectory for the data to be downloaded, say "whois_data"
 91 | 
 92 | Download the data from
 93 | 
 94 | http://domainwhoisdatabase.com/whois_database/v20/database_dump/percona/
 95 | 
 96 | (please replace v20 with the database version you are using)
 97 | 
 98 | into this directory. You need the files $tld.7z for the tld-s you are
 99 | interested in. You can use the provided md5 and sha sums to verify
100 | your downloads.
101 | 
102 | Assume now, that you are interested in the domains "aaa" and "aarp",
103 | so you have "aaa.7z" and "aarp.7z" in the directory "whois_data".
104 | 
105 | Step 2. Verify your files
106 | .........................
107 | This step can be omitted, but it is recommended to do it.
108 | Run the following command-line in the script's directory:
109 | 
110 | ./load_whois_percona.sh --import-data-dir=whois_data --tlds=aaa,aarp --db-version=v20 --verbose --dry-run
111 | 
112 | (--tlds should be replaced by the comma-separated list of tld-s you
113 | are interested in, and you have to provide the version, v20 in our case.
114 | --dry-run ensures that the script will not yet do anything with MySQL)
115 | 
116 | If the script does not report any error, you have all the required
117 | data files. Notice also that the script has extracted the 7zipped data.
118 | 
119 | Step 3. Verify your database
120 | ............................
121 | 
122 | Please verify that the databases named "whoiscrawler_$dbver_$tld" do
123 | not yet exist in your mysql. If they exist, please drop them.
124 | 
125 | Verify that you have a user in MySQL who can create tables, etc. The
126 | easiest way is to use the root user. If you set up ~/my.cnf so that
127 | the root user logs in without a password when issuing the "mysql"
128 | command, you will not need to specify the mysql user and password.
129 | 
130 | In case of large domains, it is also recommended to make the fine
131 | tuning settings on your database as described in the Reference Manual
132 | of the database release.
133 | 
134 | Make sure that your mysql server stores its data in /var/lib/mysql .
135 | If it stores them in some other directory, you will have to add the
136 | --mysql-data-dir=DIRECTORY option to the command-line in the next
137 | step, where DIRECTORY is the respective directory of your server.
138 | 
139 | During the load process the script has to restart your mysql server
140 | several times. You are supposed to run it with a superuser so this
141 | should be possible. The mysql stop and start commands are configured
142 | in lines 25 and 26 of the script. By default we provide a standard
143 | Linux setting which is the default of most Linux systems (and other
144 | System V type UNIX-like systems). If you use an other platform, please
145 | customize these lines (e.g. "net stop MySQL57" and "net start MySQL57"
146 | on Windows.)
147 | 
148 | Step 4. Load the data
149 | .....................
150 | 
151 | To load your data, do
152 | 
153 | sudo ./load_whois_percona.sh --import-data-dir=whois_data --tlds=aaa,aarp --db-version=v20 --verbose
154 | 
155 | You need to have write permission to mysql-s directory to succeed. The
156 | easiest way is to run your script as root, e.g. with sudo, as above.
157 | You may set up some less risky way to do it without sudo, however.
158 | 
159 | If your ~/my.cnf is not set up to enable the root user (or some other
160 | user with database creation permissions), please use the --mysql-user
161 | and --mysql-password options, too, in order to specify the required
162 | username and password.
163 | 
164 | You will now have the data loaded into the respective databases.
165 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/README:
--------------------------------------------------------------------------------
  1 | Documentation for the WhoisXML API
  2 | 
  3 | Documentation for WhoisXML API
  4 | MySQL ASCII dump loader scripts
  5 | (BASH version)
  6 | 
  7 | (Note: there  is a  separate file  named README_Python.txt  for Python
  8 | scripts. If you do not plan to  work with huge domains such as .com or
  9 | you are using a Windows system, we recommend to first check the Python
 10 | scripts which contain a simple GUI and they are easier to use than the
 11 | bash shell scripts.)
 12 | 
 13 | Document version 1.0 dated 2017-07-14
 14 | 
 15 | Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
 16 | 
 17 | The scripts are provided for our subscribers to load ASCII mysql dumps
 18 | obtained from our  quarterly feeds into MySQL RDBMS to  set up a WHOIS
 19 | database.  The scripts can be used also as an example to create custom
 20 | loader scripts.
 21 | 
 22 | 
 23 | Script availability:
 24 | --------------------
 25 | 
 26 | The primary location of this script is the public GitHub repository
 27 | 
 28 | https://github.com/whois-api-llc/whois_database_download_support
 29 | 
 30 | The script is located in the subdirectory
 31 | 
 32 | whoisxmlapi_mysql_loaders
 33 | 
 34 | 
 35 | Contents:
 36 | ---------
 37 | 
 38 | 1. List of files
 39 | 
 40 | 2. Obtaining data
 41 | 
 42 | 3. Software environment
 43 | 
 44 | 4. Loading schema and table data from separate files
 45 | 
 46 | 5. Loading schema and table data from a single file
 47 | 
 48 | 6. Loading multiple tlds and other usage examples
 49 | 
 50 | 1. List of files:
 51 | ----------------
 52 | 
 53 | README			  : this file
 54 | load_mysql_utils.sh	  : utility functions used in all scripts
 55 | 			    	This should be there in the same
 56 | 				directory as the scripts themselves
 57 | load_mysql_data_per_tables.sh: script to load schema first, then the data
 58 | 			       from separate files
 59 | load_mysql_data_all.sh	:   script to load single-file backups
 60 | 
 61 | legacy			: a directory containing legacy versions of
 62 | 			  these scripts which were in use before July 2017.
 63 | README_Python.txt       : Documentation of the Python scripts for the same task
 64 | load_mysq_data.py       : Python (series 2) script for the same task.
 65 | 			  Documented separately in the file README_Python.txt
 66 | 
 67 | 2. Obtaining data
 68 | -----------------
 69 | 
 70 | Sample data can be obtained from
 71 | 
 72 | http://domainwhoisdatabase.com/whois_database/sample/gtlds/v20/mysqldump_sample
 73 | 
 74 | (replace v20 by the actual version)
 75 | 
 76 | Production data can be obtained from
 77 | 
 78 | http://domainwhoisdatabase.com/whois_database/v20/database_dump/mysqldump/
 79 | 
 80 | (replace v20 by the actual version)
 81 | 
 82 | Single-file backups of production data can be also downloaded with our downloader scripts. Example:
 83 | 
 84 | ./whoisdownload.sh --verbose --user my_username --password my_password  --db-version v20 --data-feeds whois_database --tld "aaa" --file-format sql --output-dir=testdir
 85 | 
 86 | We refer to the documentation of the downloader scripts for further details.
 87 | 
 88 | 3. Software environment
 89 | -----------------------
 90 | 
 91 | The present version was tested with
 92 | 
 93 | mysql  Ver 14.14 Distrib 5.7.18
 94 | 
 95 | and
 96 | 
 97 | GNU bash, 4.3.48(1)-release
 98 | 
 99 | on a machine running Ubuntu Linux 16.4.02 LTS.
100 | 
101 | The scripts are standard ones which have to work with earlier versions
102 | of bash  also on  other systems  (Linux, Mac OS  X, and  Windows).  It
103 | should be compatible with other version of MySQL, too.
104 | 
105 | If you run into an incompatibility, please contact our support.
106 | 
107 | 4. Loading schema and table data from separate files
108 | ----------------------------------------------------
109 | 
110 | Note: in the tasks described here and in Section 2.4 the syntax of the
111 | used scripts is the same if you use the --tld option.
112 | 
113 | In the following example we plan to create our MySQL table for the tld
114 | "aaa" loading the schema first, then the data.
115 | 
116 | This approach  is recommended for large  tlds such as "com".   In such
117 | cases we also recommend to  use the --show-progress option which draws
118 | a  progress bar  showing the  status of  loading of  each file  and an
119 | estimated time  when it will be  ready. Note that e.g.  loading of the
120 | data of the "com" domain will take several days, so it is important to
121 | follow what is going on.
122 | 
123 | As an input we need the files
124 | 
125 | whoiscrawler_$version_$tld_mysql_schema.sql.gz
126 | 
127 | (e.g. whoiscrawler_v20_aaa_mysql_schema.sql.gz) and the "tables"
128 | subdirectory in the same directory as this file.
129 | 
130 | The script to be used is load_mysql_data_per_tables.sh 
131 | 
132 | run  the script  with  the --help  option to  see  the parameters  and
133 | examples to load  your data (the examples in the  help message scripts
134 | are for loading the data of the "aaa" domain).
135 | 
136 | Note: the  script has three  options which can be  used to do  the job
137 | partially, or the whole in multiple steps:
138 | 
139 | --no-create-db  skips the step of creating a new database.
140 | 		In this case the script assumes that the MySQL database
141 | 		to be used already exists.
142 | --data-only	skips the loading of the schema,
143 | 		only loads data into the database.
144 | 		The database is assumed to exist in this case, too.
145 | --schema-only   Loads the schema only (and creates the database without
146 | 		--no-create-db). The data are not loaded.
147 | 
148 | 
149 | 5. Loading single-file backups
150 | ------------------------------
151 | 
152 | Note: in the tasks described here and in Section 2.3 the syntax of the
153 | used scripts is the same if you use the --tld option.
154 | 
155 | In the following example we plan to create our MySQL table for the tld
156 | "aaa" loading its single-file backup
157 | 
158 | As an input we need the file
159 | 
160 | whoiscrawler_$version_$tld_mysql.sql.gz
161 | 
162 | E.g. whoiscrawler_v20_aaa_mysql.sql.gz .
163 | 
164 | The script to be used is load_mysql_data_all.sh 
165 | 
166 | run  the script  with  the --help  option to  see  the parameters  and
167 | examples to load  your data (the examples in the  help message scripts
168 | are for loading the data of the "aaa" domain).
169 | 
170 | 6 Loading data for multiple tlds
171 | --------------------------------
172 | 
173 | Assume you  have downloaded data for  the following tlds from  the v20
174 | quarterly release:
175 | 
176 | asia,us,biz,mobi,info,org,net,com
177 | 
178 | and you have placed the data into a subdirectories
179 | 
180 | database_dump/mysqldump/$tld
181 | 
182 | of the directory where the scripts reside.
183 | 
184 | Assume you want to load them into databases named "production_db_$tld"
185 | with your mysql user "whoisuser" who has the password "whoispassword".
186 | 
187 | To load  them all  so that you  load the schema  first then  data from
188 | tables, the following command-line will do the job in bash:
189 | 
190 | 	for tld in asia us biz mobi info org net com; do ./load_mysql_data_per_tables.sh --mysql-database=production_db_$tld --mysql-user=whoisuser --mysql-password=whoispassword --schema-files-dir=database_dump/mysqldump --tld=$tld --db-version=v20;done
191 | 
192 | Alternatively, you may load the data from single files:
193 | 
194 |         for tld in asia us biz mobi info org net com; do ./load_mysql_data_all.sh --mysql-database=production_db_$tld --mysql-user=whoisuser --mysql-password=whoispassword --dump-files-dir=database_dump/mysqldump --tld=$tld --db-version=v20;done
195 | 
196 | (Note that we have changed the name of the script only.)
197 | 
198 | The  above examples  can  be  used as  a  template  to manage  various
199 | situations in bash.
200 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/load_csv_file_into_db.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | #Sample script to load the downloaded csv into a database
  4 | #Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  5 | #
  6 | #Note: IF YOU ARE READING THIS SCRIPT JUST TO COLLECT IDEAS FOR YOUR OWN LOADER,
  7 | #      VISIT THE END OF THE FILE WHERE THE REAL WORK IS DONE
  8 | #
  9 | # Global variables.
 10 | #
 11 | LANG=C
 12 | LC_ALL=C
 13 | VERSION="0.0.3"
 14 | VERBOSE="no"
 15 | DEBUG="no"
 16 | MYNAME=$(basename $0)
 17 | 
 18 | 
 19 | 
 20 | #No mysql stuff by default. This is set by mandatory args.
 21 | unset MYSQL_USER
 22 | unset MYSQL_PASSWORD
 23 | unset MYSQL_DATABASE
 24 | 
 25 | #
 26 | # Prints the version number and exits.
 27 | #
 28 | function printVersionAndExit()
 29 | {
 30 |     echo "$MYNAME Version $VERSION"
 31 |     echo ""
 32 |     exit 0
 33 | }
 34 | 
 35 | function printHelpAndExit()
 36 | {
 37 |     echo "Usage: $MYNAME [OPTION]..."
 38 |     echo "$MYNAME -- loads data from a csv file downloaded from WhoisXML API feeds"
 39 |     echo " into a table in a mysql database."
 40 |     echo ""
 41 | echo " -h, --help                  Print this help and exit."
 42 | echo " -v, --version               Print version information and exit."
 43 | echo " --verbose                   Print more messages."
 44 | echo " --mysql-user=USERNAME       User name to login to the mysql database (optional)."
 45 | echo " --mysql-password=PASSWORD   Password to login to the data source (optional)."
 46 | echo " --mysql-database=DATABASE   The name of the mysql database to load data into."
 47 | echo " --csv-format=FORMAT         The format of the csv file to be loaded. Must be one of 'regular', 'simple' or 'full'."
 48 | echo " --schema-file=SCHEMAFILE    The schema file to be used when loading. These are provided with the script."
 49 | echo " --csv-file=CSVFILE          The csv file to be loaded."
 50 |     echo ""
 51 |     echo "Example:"
 52 |     echo "$MYNAME --mysql-user=whoisuser --mysql-password=whoispassword --mysql-database=whoisdatabase --schema-file=loader_schema_simple.sql --csv-file=1.csv --csv-format=simple"
 53 |     echo ""
 54 |     echo 
 55 |     echo ""
 56 |     echo "The table into which data are loaded is "
 57 |     echo "  whois_record_flat for 'full' csv-s, "
 58 |     echo "  whois_record_flat_simple for 'simple' csv-s, and"
 59 |     echo "  whois_record_flat_regular for 'regular' csv-s."
 60 |     echo ""
 61 |     echo "Note: record id-s are auto incremented,"
 62 |     echo "      so each record is loaded again when the script is run."
 63 |     echo "      This can lead to repetitions."
 64 |     echo ""
 65 |     exit 1
 66 | }
 67 | 
 68 | #
 69 | #
 70 | # Prints all the arguments but only if the program is in the verbose mode.
 71 | #
 72 | function printVerbose()
 73 | {
 74 |     if [ "$VERBOSE" == "true" ]; then
 75 |         echo $* >&2
 76 |     fi
 77 | }
 78 | 
 79 | #
 80 | # Prints an error message to the standard error. The text will not mixed up with
 81 | # the data that is printed to the standard output.
 82 | #
 83 | function printError()
 84 | {
 85 |     echo "$*" >&2
 86 | }
 87 | 
 88 | function printMessage()
 89 | {
 90 |     echo -n "$*" >&2
 91 | }
 92 | 
 93 | function printMessageNl()
 94 | {
 95 |     echo "$*" >&2
 96 | }
 97 | 
 98 | function printDebug()
 99 | {
100 |     if [ "$DEBUG" == "yes" ]; then
101 |         echo "$*" >&2
102 |     fi
103 | }
104 | 
105 | 
106 | ARGS=$(\
107 |     getopt -o hv \
108 |         -l "help,verbose,debug,version,mysql-database:,mysql-user:,mysql-password:,csv-format:,\
109 | csv-file:,schema-file:" \
110 |         -- "$@")
111 | 
112 | 
113 | if [ $? -ne 0 ]; then
114 |     exit 6
115 | fi
116 | 
117 | eval set -- "$ARGS"
118 | 
119 | while true; do
120 |     case "$1" in
121 |         -h|--help)
122 |             shift
123 |             printHelpAndExit
124 |             ;;
125 | 
126 |         --verbose)
127 |             shift
128 |             VERBOSE="true"
129 | 	    VERBOSEARG="--verbose"
130 |             ;;
131 | 
132 | 	--debug)
133 |             shift
134 |             DEBUG="yes"
135 | 	    VERBOSEARG="--verbose"
136 |             ;;
137 | 
138 |         -v|--version)
139 |             shift
140 |             printVersionAndExit
141 |             ;;
142 |         
143 |         --mysql-user)
144 |             shift
145 |             MYSQL_USER=$1
146 |             shift
147 |             ;;
148 | 
149 |         --mysql-password)
150 |             shift
151 | 	    export MYSQL_PWD=$1
152 |             shift
153 |             ;;
154 | 
155 | 	--mysql-database)
156 |             shift
157 |             MYSQL_DATABASE=$1
158 |             shift
159 |             ;;
160 | 
161 | 
162 |         --csv-format)
163 |             shift
164 | 	    if echo $1 | grep --quiet -e "simple\|regular\|full"; then
165 | 		FORMAT=$1	    
166 | 	    else
167 | 		printError "Supported csv formats are: simple, regular, and full."
168 | 	    exit 1
169 |  	    fi
170 | 
171 |             shift
172 |             ;;
173 |         
174 |         --csv-file)
175 |             shift
176 |             CSV_FILE=$(readlink -e "$1")
177 |             if ! [ -f "$CSV_FILE" ]; then
178 |                 printError "The csv file $CSV_FILE is not found."
179 |                 exit 111
180 |             fi
181 |             shift
182 |             ;;
183 | 
184 | 	--schema-file)
185 |             shift
186 |             SCHEMA_FILE=$(readlink -e "$1")
187 |             if ! [ -f "$SCHEMA_FILE" ]; then
188 |                 printError "The schema file $SCHEMA_FILE is not found."
189 |                 exit 1
190 |             fi
191 |             shift
192 |             ;;
193 |         --)
194 |             shift
195 |             break
196 |             ;;
197 | 
198 |         *) 
199 |             ;;
200 |     esac
201 | done
202 | 
203 | #some verification before doing the real job
204 | 
205 | #Set up mysql login credentials if needed
206 | if [ -n "$MYSQL_USER" ]; then
207 |     MYSQL_ARGUMENTS="--user=$MYSQL_USER"
208 | #    if [ -n "$MYSQL_PASSWORD" ];then
209 | #	MYSQL_ARGUMENTS="$MYSQL_ARGUMENTS --password=$MYSQL_PASSWORD"
210 | #    fi;
211 | fi;
212 | 
213 | printDebug "Mysql arguments: $MYSQL_ARGUMENTS"
214 | printDebug "Mysql password: $MYSQL_PWD"
215 | 
216 | if [ -z "$MYSQL_DATABASE" ]; then
217 | 	printError "Mysql database not specified. See $MYNAME --help"
218 | 	exit 1
219 | fi
220 | if [ ! -f "$CSV_FILE" ]; then
221 | 	printError "Input csv file not specified or does not exist. See $MYNAME --help"
222 | 	exit 1
223 | fi
224 | if [ ! -f "$SCHEMA_FILE" ]; then
225 | 	printError "Schema file not specified or does not exist. See $MYNAME --help"
226 | 	exit 1
227 | fi
228 | CSV_FILE=`readlink -e $CSV_FILE`
229 | SCHEMA_FILE=`readlink -e $SCHEMA_FILE`
230 | case ${FORMAT} in
231 | 	simple|regular )
232 | 		table="whois_record_flat_${FORMAT}"
233 | 	;;
234 | 	full )
235 | 		table="whois_record_flat"
236 | 	;;
237 | 	* )
238 | 		echo "FORMAT must be specified(simple, regular, or full)"
239 | 		exit 1
240 | 	;;
241 | esac
242 | 
243 | #HERE WE DO THE REAL WORK.
244 | #IF YOU USE THIS SCRIPT JUST TO COLLECT IDEAS, START READING HERE
245 | 
246 | if [[ -z $(mysql $(eval echo "$MYSQL_ARGUMENTS") -A --skip-column-names ${MYSQL_DATABASE} <<< "SHOW TABLES LIKE \"${table}\";") ]]
247 | then
248 |     printVerbose "Loading schema for table $table."
249 |     mysql $(eval echo "$MYSQL_ARGUMENTS") ${MYSQL_DATABASE} ${VERBOSEARG} <${SCHEMA_FILE}
250 | else
251 |     printVerbose "Not loading schema, $table exists."
252 | fi
253 | 
254 | #Determining the line terminator of the csv file
255 | line_terminator="\\n"
256 | if file ${CSV_FILE} | grep -q CRLF ; then
257 |     line_terminator="\\r\\n"
258 |     printVerbose "Windows-style CRLF terminated input file detected."
259 | else
260 |     printVerbose "UNIX-style LF terminated input file detected."
261 | fi
262 | 
263 | fields=$(head -n 1 ${CSV_FILE}|sed 's/"//g')
264 | 
265 | mysql $(eval echo "$MYSQL_ARGUMENTS") ${MYSQL_DATABASE} ${VERBOSEARG} -e "load data local infile \"${CSV_FILE}\" IGNORE into table $table
266 | 	fields terminated by ',' enclosed by '\"' LINES TERMINATED BY '${line_terminator}' IGNORE 1 LINES (${fields})"
267 | 


--------------------------------------------------------------------------------
/whoisxmlapi_flexible_csv_to_mysqldb/README:
--------------------------------------------------------------------------------
  1 | flexible_csv_to_mysqldb.py -- a Python3 scripts to create and maintain a
  2 | 		      	 WHOIS database in MySQL, using csv files
  3 | 			 obtained from WhoisXML API.
  4 | 
  5 | ver. 0.0.2
  6 | 
  7 | Changelog:
  8 | 
  9 | ver. 0.0.1, dated 2018.01.09.
 10 |  - initial release
 11 |  
 12 | ver. 0.0.2, dated 2019.04.17.
 13 |  - introduced field_types.csv, fixed field types
 14 |  - added the --all-fields-as-text option
 15 |  - fixed file read with utf8 encoding on Windows platforms
 16 |  
 17 | Contents:
 18 | 
 19 | 1. INSTALLATION
 20 | 2. OBTAINING DATA
 21 | 3. USING THE SCRIPT
 22 | 
 23 | 1. INSTALLATION
 24 | 
 25 | This is a Python3 script, so you  need Python 3 to be installed. Also,
 26 | as  it deals  with  mysql connections,  you  need the  mysql.connector
 27 | package for python3, too.
 28 | 
 29 | Important: at the time of  writing this README, mysql.connector is not
 30 | yet available for  Python with version greater than  3.5. Hence please
 31 | do not use a newer version of Python 3 on any platform.
 32 | 
 33 | For Linux/Mac OS X users, you typically need os packages named
 34 | 
 35 | python3
 36 | 
 37 | and
 38 | 
 39 | python3-mysql.connector
 40 | 
 41 | On Ubuntu and its derivatives, you can install them by running
 42 | 
 43 | apt-get install python3 python3-pip
 44 | 
 45 | as root.   The mysql connector for  Python is also available  from its
 46 | official webpage for your architecture:
 47 | 
 48 | http://dev.mysql.com/downloads/connector/python
 49 | 
 50 | On  other Linux  platforms, you  install these  requirements with  the
 51 | appropriate package manager.
 52 | 
 53 | Windows users have two options:
 54 | 
 55 | If you  install Bash on Ubuntu  on Windows for your  Windows 10 system
 56 | (it is  very easy,  see our  short blog on  the topic:  ), you  can do
 57 | everything in the same way as if you were doing it under Ubuntu Linux.
 58 | Another  benefit  of this  approach  is  that  you  can also  use  our
 59 | shell-script based solutions on your Windows system.
 60 | 
 61 | If you prefer using native Python on your Windows system, this is also
 62 | possible.   In   this  case   you   need   to  install   Python   from
 63 | 
 64 | http://www.python.org
 65 | 
 66 | and    mysql.connector     available    from
 67 | 
 68 | https://dev.mysql.com/downloads/connector/python
 69 | 
 70 | Having installed these, the script should work flawlessly from the DOS
 71 | command line or PowerShell.
 72 | 
 73 | 
 74 | 2. OBTAINING DATA
 75 | 
 76 | This  script  is for  data  downloaded  from  data feeds  of  WhoisXML
 77 | API. You  may use  it with  any data, including  those from  daily and
 78 | quarterly feeds,  and for all formats,  including "simple", "regular",
 79 | and "full".
 80 | 
 81 | Please consult the  manuals of the data feeds regarding  the format of
 82 | the data.
 83 | 
 84 | You can  download data  in very  simply by  using our  Python download
 85 | script, load_mysql_data.py.
 86 | 
 87 | Our scripts are available on github under
 88 | 
 89 | https://github.com/whois-api-llc/whois_database_download_support
 90 | 
 91 | 
 92 | 3. USING THE SCRIPT
 93 | 
 94 | The script is self-documenting, please  see the details of its options
 95 | in the output with the --help option:
 96 | 
 97 | ./flexible_csv_to_mysqldb.py --help
 98 | 
 99 | (in DOS command-line, please omit "./")
100 | 
101 | We elucidate the  use of the script by providing  two simple examples,
102 | and outline additional functionality in their description.
103 | 
104 | In all  examples we use Linux/UNIX  style subdirectory specifications,
105 | that is, the path elements are  separated with "/". On Windows systems
106 | in DOS  command-line or Powershell,  you should use  backslashes ("\")
107 | instead.
108 | 
109 | Note:  the script  normally reads  the data  types of  the fields  for
110 | creating  tables from  the  file "field_types.csv"  supplied with  the
111 | script.  It  should  be in  next  to  the  script,  i.e. in  the  same
112 | directory. When  using the --all-fields-as-text option,  all the field
113 | apart from the record id "id" and the "domainName" will be of SQL type
114 | "text". The  "field_types.csv" is ignored  in this case.  This option,
115 | however, leads  to less  efficient data  storage and  limited indexing
116 | opportunities.
117 | 
118 | Example 1.
119 | ----------
120 | 
121 | We have downloaded data from the daily feed "cctld_discovered_domain_names_whois" into csv files into a local directory
122 | 
123 | /scratch/whois_data/cctld_discovered_domain_names_whois/
124 | 
125 | Our files are named *.csv.gz, e.g. 2017_11_12_eu.csv.gz. First we load
126 | all the csv.gz files in the directory into the database "csvload_test"
127 | into a newly created table "daily_test".
128 | 
129 | This can be done with the following command:
130 | 
131 | ./flexible_csv_to_mysqldb.py --mysql-user root --mysql-password MYSQLROOTPASSWORD --mysql-database csvload_test --overwrite-mysql-table --mysql-table daily_test --threads 4 /scratch/whois_data/cctld_discovered_domain_names_whois/*.csv.gz
132 | 
133 | Some important comments and tips:
134 | 
135 | - The  script  uses  the  downloaded   csv.gz  and  tar.gz  files.  It
136 |   uncompresses them first  to a temporary directory  which is "tmpcsv"
137 |   in  the working  directory  of  the scripts,  but  can be  specified
138 |   alternatively with the --temp-dir command-line option.
139 | 
140 | - The data  to be loaded can  be huge.  
141 | 
142 |   For instance, if you download "regular" csv files for all gtlds from
143 |   the quarterly  release, the uncompressed  files will need  more than
144 |   1.4 terabytes. Also, the MySQL database  itself will need a space of
145 |   similar size in addition.
146 | 
147 |   Bear this in  mind not only when specifying  the temporary directory
148 |   but also when designing your  infrastructure and estimating the time
149 |   needed for loading data.
150 | 
151 | - The  script  will first  test  if  all the  csv-s  are  of the  same
152 |   structure.  This  structure is  deduced from  the file  headers.  It
153 |   starts manipulating the database if and only if it is the case.
154 | 
155 | - To test if  it is doable without actually making  any changes to the
156 |   data, just add the --dry-run option
157 | 
158 | - The script's execution  can be made faster on  multi-core systems by
159 |   using multiple  threads.  
160 | 
161 |   E.g.   for  4  threads,  use  the  --threads  4  option  as  in  the
162 |   example. Note: the  uncompression of the files  is one-threaded, the
163 |   option affect the loading procedure  only. Each thread processes one
164 |   csv file, so multithreading is useful for multiple csv-s.
165 | 
166 | - The database will be created if it does not exist.
167 | 
168 | - If you prefer another MySQL user  than root, you need to ensure that
169 |   the user has sufficient privileges to do the operations.
170 | 
171 | - You can add additional data with the same structure to your existing
172 |   database.
173 | 
174 |   E.g. from  newly downloaded  csv-s, you  just need  to use  the same
175 |   command-line without the --overwrite-mysql-table option.
176 | 
177 | - However,  be careful  when  using --overwrite-mysql-table  : if  the
178 |   table already exists,  the script will drop it, and  create an empty
179 |   table again.
180 | 
181 | Example 2.
182 | ----------
183 | 
184 | Our data originate now from the  cctld quarterly data feed v6. We have
185 | downloaded the data for the ".eu" tld into the file
186 | 
187 | /scratch/whois_data/v6/csv/tlds/simple/csvs.eu.simple.tar.gz
188 | 
189 | We  will  load these  data  into  the  table "quarterly_test"  of  the
190 | database "csvload_test". The command is:
191 | 
192 | ./flexible_csv_to_mysqldb.py --mysql-user root --mysql-password MYSQLROOTPASSWORD --mysql-database csvload_test --mysql-table quarterly_test --overwrite-mysql-table /scratch/whois_data/v6/csv/tlds/simple/csvs.eu.simple.tar.gz
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/whoisxmlapi_bash_csv_to_mysqldb/README:
--------------------------------------------------------------------------------
  1 | README document for
  2 | 
  3 | CSV importing scripts and schema
  4 | 
  5 | Document version: 1.1
  6 | Dated: 30-11-2018.
  7 | 
  8 | Contents:
  9 | ---------
 10 | 1. List of files
 11 | 2. Script specification
 12 | 3. Example of use
 13 | 
 14 | 1. List of files
 15 | ----------------
 16 | 
 17 | load_csv_file_into_db.sh	     -- loader script in Bash, this is the one documented here
 18 | 
 19 | load_csv_file_into_db_old.sh	     -- legacy version of loader script in
 20 | 				     	Bash, used till July 2017.
 21 | 
 22 | loader_schema_full.sql        	     -- schema file for full csv-s
 23 | 				     	(daily and quarterly feeds)
 24 | loader_schema_regular.sql            -- schema file for regular csv-s for both
 25 | 				     	quarterly and daily feeds.
 26 | 					The fields registrant_rawText,
 27 | 					administrativeContact_rawText,
 28 | 					billingContact_rawText,
 29 | 					technicalContact_rawText,
 30 | 					and zoneContact_rawText
 31 | 					will remain empty when csv-s
 32 | 					from daily feeds are imported.
 33 | 
 34 | loader_schema_regular_daily_only.sql -- schema file for regular csv-s
 35 | 				        for data from daily feeds only.
 36 | 					Does not contain the
 37 | 					raw text fields
 38 | 					which are only present
 39 | 					in quarterly data.
 40 | 
 41 | loader_schema_simple.sql 	     -- schema file for simple csv-s
 42 | 				     	(quarterly feeds only)
 43 | README				     -- this file
 44 | 
 45 | 2. Script specification
 46 | -----------------------
 47 | 
 48 | Name: load_csv_file_into_db.sh -- loads data from a csv file downloaded from WhoisXML API feeds
 49 |       			       	  into a table in a mysql database.	
 50 | 
 51 | Synopsis: load_csv_file_into_db.sh [OPTION]...
 52 | 
 53 | Description: The script loads a csv file with Whois information,
 54 | 	downloaded from a subscription to WhoisXML API feeds into a
 55 | 	database table. The type of the csv file (simple, regular, or
 56 | 	full, see the manual of the feeds) should be specified and the
 57 | 	appropriate schema file supplied here should be used.
 58 | 	The table into which data are loaded is 
 59 | 	  whois_record_flat for 'full' csv-s, 
 60 | 	  whois_record_flat_simple for 'simple' csv-s, and
 61 |   	  whois_record_flat_regular for 'regular' csv-s.
 62 | 
 63 | 	The command-line options of the script are:
 64 | 
 65 | 	 -h, --help                  Print this help and exit.
 66 | 	 -v, --version               Print version information and exit.
 67 | 	 --verbose                   Print more messages.
 68 |          --mysql-user=USERNAME       User name to login to the mysql database (optional).
 69 |          --mysql-password=PASSWORD   Password to login to the data source (optional).
 70 |          --mysql-database=DATABASE   The name of the mysql database to load data into.
 71 |          --csv-format=FORMAT         The format of the csv file to be loaded. Must be one of 'regular', 'simple' or 'full'.
 72 |          --schema-file=SCHEMAFILE    The schema file to be used when loading. These are provided with the script.
 73 |          --csv-file=CSVFILE          The csv file to be loaded.
 74 | 
 75 | 3. Example of use
 76 | -----------------
 77 | 
 78 | Here we describe a simple and complete workflow.
 79 | 
 80 | Assume we want to have the quarterly data from the v19 version of
 81 | quarterly databases, for the ``aeg'' and ``active'' tlds in a mysql
 82 | database. You intend to do this by downloading csv files and importing
 83 | them. Here is what to do, from scratch.
 84 | 
 85 | Step 1. Download some simple csv-s
 86 | 
 87 | Download the latest version of whoisdownload_bash from the docs
 88 | subdirectory of quarterly releases and from
 89 | http://bestwhois.org/domain_name_data/docs/scripts, depending on your
 90 | subscription.
 91 | 
 92 | The actual version was whoisdownload_bash-0.0.16.tar.gz when this
 93 | manual was written, but it maybe newer, please download the latest
 94 | available one and use its name in the command-line below.  Untar the
 95 | file, and change into its directory:
 96 | 
 97 | tar zxvf whoisdownload_bash-0.0.16.tar.gz;cd whoisdownload_bash
 98 | 
 99 | In order to download the required csv-s, we use this download scripts
100 | in the following form (please substitute the CAPITALIZED words with
101 | your username and password):
102 | 
103 | ./whoisdownload.sh --verbose --user USERNAME --password PASSWORD  --db-version v19 --data-feeds whois_database --tld "aeg active" --file-format simple
104 | 
105 | This will have the following output to stdout:
106 | 
107 | whois_database/v19/csv/tlds/simple/csvs.aeg.simple.tar.gz [OK]
108 | whois_database/v19/csv/tlds/simple/csvs.active.simple.tar.gz [OK]
109 | 
110 | (If instead of [OK] you get something else, then it is likely that
111 | there is something wrong with your password.)
112 | 
113 | The resulting files are in the subdirectory
114 | 
115 | whois_database/v19/csv/tlds/simple/
116 | 
117 | (You may modify this behavior with the command-line argument
118 | --output-dir of whoisdownloader.sh, see also
119 | whoisdownload.sh --help
120 | )
121 | 
122 | Move the files, csvs.active.simple.tar.gz and csvs.aeg.simple.tar.gz
123 | to a suitable directory, and uncompress them:
124 | 
125 | tar zxvf csvs.active.simple.tar.gz
126 | tar zxvf csvs.aeg.simple.tar.gz
127 | 
128 | Now you have a simple subdirectory, in which there are the aeg and
129 | active subdirectories, in which there are the csv files (in the
130 | example, only one of them, named 1.csv).
131 | 
132 | Step 2 (optional).
133 | Prepare your mysql database.  We assume that mysql is already
134 | installed and you can administer it. Were this not the case, please
135 | consult the documentation of mysql.
136 | 
137 | Important: in order to load data into mysql from files, you need to
138 | disable the ``secure-file-priv'' option of mysql. This can be done by
139 | adding the following line:
140 | 
141 | secure-file-priv = ""
142 | 
143 | to your mysqld configuration file in the section [mysqld].  (The
144 | location of the configuration file varies with the
145 | installation. Traditionally it is called my.cnf. In Ubuntu systems,
146 | for instance, you will find this section in
147 | /etc/mysqld/mysql.conf.d/mysqld.cnf.) After editing the config you
148 | need to restart the mysql service.
149 | 
150 | If you already have access credentials for the appropriate user and a
151 | database you want to use exists, you may omit the next steps. In our
152 | example we shall use the username ``whoisuser'' who will have the
153 | password ``whoispassword''.
154 | 
155 | Otherwise create the respective user: as the mysql administrator do 
156 | 
157 | CREATE USER whoisuser IDENTIFIED BY 'whoispassword';
158 | 
159 | Create a database (``whoisdatabase'' in this example) which we shall
160 | use:
161 | 
162 | CREATE DATABASE whoisdatabase;
163 | 
164 | Grant all privileges on this database to this user:
165 | 
166 | GRANT ALL ON whoisdatabase.* to whoisuser;
167 | 
168 | Finally, as you want to load data from files, the user needs to have
169 | the required privileges, so do:
170 | 
171 | GRANT file ON *.* to whoisuser;
172 | 
173 | 3. Load your data into the file
174 | 
175 | Assume that you have uncompressed the downloaded files in the same
176 | subdirectory where the script resides.  To load the files you have to
177 | do
178 | 
179 | ./load_csv_file_into_db.sh --mysql-user=whoisuser --mysql-password=whoispassword --mysql-database=whoisdatabase  --schema-file=loader_schema_simple.sql --csv-file=simple/aeg/1.csv --csv-format=simple
180 | 
181 | and for the other domain:
182 | 
183 | ./load_csv_file_into_db.sh --mysql-user=whoisuser --mysql-password=whoispassword --mysql-database=whoisdatabase  --schema-file=loader_schema_simple.sql --csv-file=simple/active/1.csv --csv-format=simple
184 | 
185 | Logging in to the database, the data will be there in the table
186 | "whois_record_flat_simple", you can now work with them.
187 | 
188 | Note again that the script creates each record which was in the input
189 | csv, it does not check if it already existed. This may lead to
190 | repetitions, e.g. when the script is run twice with the same
191 | arguments.
192 | 
193 | -------end of document-------


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/supported_ngtlds:
--------------------------------------------------------------------------------
1 | aaa,aarp,abarth,abb,abbott,abbvie,abc,able,abogado,abudhabi,academy,accenture,accountant,accountants,aco,active,actor,adac,ads,adult,aeg,aetna,afamilycompany,afl,agakhan,agency,aig,aigo,airbus,airforce,airtel,akdn,alfaromeo,alibaba,alipay,allfinanz,allstate,ally,alsace,americanexpress,americanfamily,amex,amfam,amica,amsterdam,analytics,android,anquan,anz,aol,apartments,app,apple,aquarelle,aramco,archi,army,art,arte,asda,associates,athleta,attorney,auction,audi,audible,audio,auspost,author,auto,autos,avianca,aws,axa,azure,baby,baidu,banamex,bananarepublic,band,bank,bar,barcelona,barclaycard,barclays,barefoot,bargains,baseball,basketball,bauhaus,bayern,bbc,bbt,bbva,bcg,bcn,beats,beauty,beer,bentley,berlin,best,bestbuy,bet,bharti,bible,bid,bike,bing,bingo,bio,black,blackfriday,blanco,blockbuster,blog,bloomberg,blue,bms,bmw,bnl,boats,boehringer,bofa,bom,bond,boo,book,booking,boots,bosch,bostik,boston,bot,boutique,box,bradesco,bridgestone,broadway,broker,brother,brussels,budapest,bugatti,build,builders,business,buy,buzz,bzh,cab,cafe,cal,call,calvinklein,cam,camera,camp,cancerresearch,canon,capetown,capital,capitalone,car,caravan,cards,care,career,careers,cars,cartier,casa,case,caseih,cash,casino,cat,catering,catholic,cba,cbn,cbre,cbs,ceb,center,ceo,cern,cfa,cfd,chanel,channel,chase,chat,cheap,chintai,chloe,christmas,chrome,chrysler,church,cipriani,circle,cisco,citadel,citi,citic,city,cityeats,claims,cleaning,click,clinic,clinique,clothing,cloud,club,clubmed,coach,codes,coffee,college,cologne,comcast,commbank,community,company,compare,computer,comsec,condos,construction,consulting,contact,contractors,cooking,cookingchannel,cool,coop,corsica,country,coupon,coupons,courses,credit,creditcard,creditunion,cricket,crown,crs,cruise,cruises,csc,cuisinella,cymru,cyou,dabur,dad,dance,date,dating,datsun,day,dclk,dds,deal,dealer,deals,degree,delivery,dell,deloitte,delta,democrat,dental,dentist,desi,design,dev,dhl,diamonds,diet,digital,direct,directory,discount,discover,dish,diy,dnp,docs,doctor,dodge,dog,doha,domains,dot,download,drive,dtv,dubai,duck,dunlop,duns,dupont,durban,dvag,dvr,earth,eat,eco,edeka,education,email,emerck,energy,engineer,engineering,enterprises,epost,epson,equipment,ericsson,erni,esq,estate,esurance,eurovision,eus,events,everbank,exchange,expert,exposed,express,extraspace,fage,fail,fairwinds,faith,family,fan,fans,farm,farmers,fashion,fast,fedex,feedback,ferrari,ferrero,fiat,fidelity,fido,film,final,finance,financial,fire,firestone,firmdale,fish,fishing,fit,fitness,flickr,flights,flir,florist,flowers,fly,foo,food,foodnetwork,football,ford,forex,forsale,forum,foundation,fox,free,fresenius,frl,frogans,frontdoor,frontier,ftr,fujitsu,fujixerox,fund,furniture,futbol,fyi,gal,gallery,gallo,gallup,game,games,gap,garden,gbiz,gdn,gea,gent,genting,george,ggee,gift,gifts,gives,giving,glade,glass,gle,global,globo,gmail,gmbh,gmo,gmx,godaddy,gold,goldpoint,golf,goo,goodhands,goodyear,goog,google,gop,got,grainger,graphics,gratis,green,gripe,group,guardian,gucci,guge,guide,guitars,guru,hamburg,hangout,haus,hbo,hdfc,hdfcbank,health,healthcare,help,helsinki,here,hermes,hgtv,hiphop,hisamitsu,hitachi,hiv,hkt,hockey,holdings,holiday,homedepot,homegoods,homes,homesense,honda,honeywell,horse,hospital,host,hosting,hot,hoteles,hotmail,house,how,hsbc,hughes,hyatt,hyundai,ibm,icbc,ice,icu,ieee,ifm,iinet,ikano,imamat,imdb,immo,immobilien,industries,infiniti,ing,ink,institute,insurance,insure,intel,international,intuit,investments,ipiranga,irish,iselect,ismaili,ist,istanbul,itau,itv,iveco,iwc,jaguar,java,jcb,jcp,jeep,jetzt,jewelry,jio,jlc,jll,jmp,jnj,jobs,joburg,jot,joy,jpmorgan,jprs,juegos,juniper,kaufen,kddi,kerryhotels,kerrylogistics,kerryproperties,kfh,kia,kim,kinder,kindle,kitchen,kiwi,koeln,komatsu,kosher,kpmg,kpn,krd,kred,kuokgroup,kyoto,lacaixa,ladbrokes,lamborghini,lamer,lancaster,lancia,lancome,land,landrover,lanxess,lasalle,lat,latino,latrobe,law,lawyer,lds,lease,leclerc,lefrak,legal,lego,lexus,lgbt,liaison,lidl,life,lifeinsurance,lifestyle,lighting,like,lilly,limited,limo,lincoln,linde,link,lipsy,live,living,lixil,loan,loans,locker,locus,loft,lol,london,lotte,lotto,love,lpl,lplfinancial,ltd,ltda,lundbeck,lupin,luxe,luxury,macys,madrid,maif,maison,makeup,man,management,mango,market,marketing,markets,marriott,marshalls,maserati,mattel,mba,mcd,mcdonalds,mckinsey,med,media,meet,melbourne,meme,memorial,men,menu,meo,metlife,miami,microsoft,mini,mint,mit,mitsubishi,mlb,mls,mma,mobily,moda,moe,moi,mom,monash,money,monster,montblanc,mopar,mormon,mortgage,moscow,moto,motorcycles,mov,movie,movistar,msd,mtn,mtpc,mtr,museum,mutual,mutuelle,nab,nadex,nagoya,nationwide,natura,navy,nba,nec,netbank,netflix,network,neustar,new,newholland,news,next,nextdirect,nexus,nfl,ngo,nhk,nico,nike,nikon,ninja,nissan,nissay,nokia,northwesternmutual,norton,now,nowruz,nowtv,nra,nrw,ntt,nyc,obi,observer,off,office,okinawa,olayan,olayangroup,oldnavy,ollo,omega,one,ong,onl,online,onyourside,ooo,open,oracle,orange,organic,orientexpress,origins,osaka,otsuka,ott,ovh,page,pamperedchef,panasonic,panerai,paris,pars,partners,parts,party,passagens,pay,pccw,pet,pfizer,pharmacy,philips,photo,photography,photos,physio,piaget,pics,pictet,pictures,pid,pin,ping,pink,pioneer,pizza,place,play,playstation,plumbing,plus,pnc,pohl,poker,politie,porn,pramerica,praxi,press,prime,pro,prod,productions,prof,progressive,promo,properties,property,protection,pru,prudential,pub,pwc,qpon,quebec,quest,qvc,racing,radio,raid,read,realestate,realtor,realty,recipes,red,redstone,redumbrella,rehab,reise,reisen,reit,reliance,ren,rent,rentals,repair,report,republican,rest,restaurant,review,reviews,rexroth,rich,richardli,ricoh,rightathome,ril,rio,rip,rmit,rocher,rocks,rodeo,rogers,room,rsvp,ruhr,run,rwe,ryukyu,saarland,safe,safety,sakura,sale,salon,samsclub,samsung,sandvik,sandvikcoromant,sanofi,sap,sapo,sarl,sas,save,saxo,sbi,sbs,sca,scb,schaeffler,schmidt,scholarships,school,schule,schwarz,science,scjohnson,scor,scot,seat,secure,security,seek,select,sener,services,ses,seven,sew,sex,sexy,sfr,shangrila,sharp,shell,shia,shiksha,shoes,shop,shopping,shouji,show,showtime,shriram,silk,sina,singles,site,ski,skin,sky,skype,sling,smart,smile,sncf,soccer,social,softbank,software,sohu,solar,solutions,song,sony,soy,space,spiegel,spot,spreadbetting,srl,srt,stada,staples,star,starhub,statebank,statefarm,statoil,stc,stcgroup,stockholm,storage,store,stream,studio,study,style,sucks,supplies,supply,support,surf,surgery,suzuki,swatch,swiftcover,swiss,sydney,symantec,systems,tab,taipei,talk,taobao,target,tatamotors,tatar,tattoo,tax,taxi,tci,tdk,team,tech,technology,telecity,telefonica,temasek,tennis,teva,thd,theater,theatre,tiaa,tickets,tienda,tiffany,tips,tires,tirol,tjmaxx,tjx,tkmaxx,tmall,today,tokyo,tools,top,toray,toshiba,total,tours,town,toyota,toys,trade,trading,training,travel,travelchannel,travelers,travelersinsurance,trust,trv,tube,tui,tunes,tushu,tvs,ubank,ubs,uconnect,university,uno,uol,ups,vacations,vana,vanguard,vegas,ventures,verisign,versicherung,vet,viajes,video,vig,viking,villas,vin,vip,virgin,visa,vision,vista,vistaprint,viva,vivo,vlaanderen,vodka,volkswagen,volvo,vote,voto,voyage,vuelos,wales,walmart,walter,wang,wanggou,warman,watch,watches,weather,weatherchannel,webcam,weber,website,wed,wedding,weibo,weir,whoswho,wien,wiki,williamhill,win,windows,wine,winners,wme,wolterskluwer,woodside,work,works,world,wow,wtc,wtf,xbox,xerox,xfinity,xihuan,xin,xn--11b4c3d,xn--1ck2e1b,xn--1qqw23a,xn--30rr7y,xn--3bst00m,xn--3ds443g,xn--3oq18vl8pn36a,xn--3pxu8k,xn--42c2d9a,xn--45q11c,xn--4gbrim,xn--55qw42g,xn--55qx5d,xn--5su34j936bgsg,xn--5tzm5g,xn--6frz82g,xn--6qq986b3xl,xn--80adxhks,xn--80aqecdr1a,xn--80asehdb,xn--80aswg,xn--9dbq2a,xn--9et52u,xn--9krt00a,xn--b4w605ferd,xn--bck1b9a5dre4c,xn--c1avg,xn--c2br7g,xn--cck2b3b,xn--cg4bki,xn--czr694b,xn--czrs0t,xn--czru2d,xn--d1acj3b,xn--eckvdtc9d,xn--efvy88h,xn--estv75g,xn--fct429k,xn--fhbei,xn--fiq228c5hs,xn--fiq64b,xn--fjq720a,xn--flw351e,xn--fzys8d69uvgm,xn--g2xx48c,xn--gckr3f0f,xn--gk3at1e,xn--hxt814e,xn--i1b6b1a6a2e,xn--imr513n,xn--io0a7i,xn--j1aef,xn--jlq61u9w7b,xn--jvr189m,xn--kcrx77d1x4a,xn--kpu716f,xn--kput3i,xn--mgba3a3ejt,xn--mgba7c0bbn0a,xn--mgbab2bd,xn--mgbb9fbpob,xn--mgbca7dzdo,xn--mgbi4ecexp,xn--mgbt3dhd,xn--mk1bu44c,xn--ngbc5azd,xn--ngbe9e0a,xn--nqv7f,xn--nqv7fs00ema,xn--nyqy26a,xn--p1acf,xn--pbt977c,xn--pssy2u,xn--q9jyb4c,xn--qcka1pmc,xn--rhqv96g,xn--rovu88b,xn--ses554g,xn--t60b56a,xn--tckwe,xn--tiq49xqyj,xn--unup4y,xn--vermgensberater-ctb,xn--vermgensberatung-pwb,xn--vhquv,xn--vuq861b,xn--w4r85el8fhu5dnra,xn--w4rs40l,xn--xhq521b,xn--zfr164b,xperia,xxx,xyz,yachts,yahoo,yamaxun,yandex,yodobashi,yoga,yokohama,you,youtube,yun,zappos,zara,zero,zip,zippo,zone,zuerich,
2 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/load_mysql_data_all.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | #Sample script to load ASCII mysql dumps for a tld
  4 | #This loads schema+data from a single backup file
  5 | #Recommended for smaller tlds.
  6 | #Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  7 | #
  8 | #Note: IF YOU ARE READING THIS SCRIPT JUST TO COLLECT IDEAS FOR YOUR OWN LOADER,
  9 | #      VISIT THE END OF THE FILE WHERE THE REAL WORK IS DONE
 10 | #
 11 | # Global variables.
 12 | #
 13 | LANG=C
 14 | LC_ALL=C
 15 | VERSION="0.0.3"
 16 | VERBOSE="yes"
 17 | DEBUG="no"
 18 | SHOWPROGRESS="no"
 19 | MYNAME=$(basename $0)
 20 | CATCOMMAND="cat"
 21 | 
 22 | #No mysql stuff by default. This is set by mandatory args.
 23 | unset MYSQL_USER
 24 | unset MYSQL_PASSWORD
 25 | unset MYSQL_DATABASE
 26 | 
 27 | #Importing generic utilities
 28 | 
 29 | source load_mysql_utils.sh
 30 | 
 31 | function printHelpAndExit()
 32 | {
 33 |     echo "Usage: $MYNAME [OPTION]..."
 34 |     echo "$MYNAME -- loads data for a given tld"
 35 |     echo "from a schema file and separate table files "
 36 |     echo " into a table in a mysql database."
 37 |     echo ""
 38 | echo " -h, --help                  Print this help and exit."
 39 | echo " -v, --version               Print version information and exit."
 40 | echo " --verbose                   Print more messages."
 41 | echo " --show-progress             Display progress bars when loading data from dumps."
 42 | echo "                             Recommended, especially for large domains."
 43 | echo " --mysql-user=USERNAME       User name to login to the mysql database (optional)."
 44 | echo " --mysql-password=PASSWORD   Password to login to the data source (optional)."
 45 | echo " --mysql-database=DATABASE   The name of the mysql database to load data into. "
 46 | echo "                             This database is created by the script, so should not exist"
 47 | echo " --dump-file=DUMPFILE        The file to be loaded. If this is provided,"
 48 | echo "                                 the rest of the options are invalid"
 49 | echo " --tld=TLD                    load data for this tld"
 50 | echo " --dump-files-dir=DIRECTORY   The dump files for the tld-s are in this directory. Only for --tld"
 51 | echo " --db-version=STRING          The version to load download. Required for --tld Format: vNN, e.g. v19"
 52 | echo ""
 53 |     echo "Examples:"
 54 |     echo ""
 55 |     echo "  -loading sample data downloaded into a directory mysqldump_sample from "
 56 |     echo "          http://domainwhoisdatabase.com/whois_database/sample/gtlds/v20/mysqldump_sample/aaa"
 57 |     echo ""
 58 |     echo "$MYNAME --mysql-database=sample_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --dump-files-dir=mysqldump_sample --db-version=v20 --tld=aaa --verbose --show-progress"
 59 |     echo ""
 60 |     echo "      or the same task specifying the file name and path directly:"
 61 |     echo ""
 62 |     echo "$MYNAME --mysql-database=sample_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --dump-file=mysqldump_sample/aaa/whoiscrawler_v20_aaa_mysql.sql.gz --verbose --show-progress"
 63 |     echo ""
 64 |     echo ""
 65 |     echo "   -loading production data quietly, downloaded into a directory database_dump/mysqldump/aaa from"
 66 |     echo "          http://www.domainwhoisdatabase.com/whois_database/v20/database_dump/mysqldump/aaa"
 67 |     echo ""
 68 |     echo "$MYNAME --mysql-database=production_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --dump-files-dir=database_dump/mysqldump --tld=aaa --db-version=v20"
 69 |     echo ""
 70 |     echo "      or the same task verbosely, specifying the file name and path directly:"
 71 |     echo ""
 72 |     echo "$MYNAME  --mysql-database=production_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --dump-file=database_dump/mysqldump/aaa/whoiscrawler_v20_aaa_mysql.sql.gz --verbose --show-progress"
 73 | 
 74 |     exit 1
 75 | }
 76 | 
 77 | ARGS=$(\
 78 |     getopt -o hv \
 79 |         -l "help,verbose,debug,show-progress,version,v,mysql-database:,mysql-user:,mysql-password:,\
 80 | dump-file:,tld:,dump-files-dir:,db-version:" \
 81 |         -- "$@")
 82 | 
 83 | 
 84 | if [ $? -ne 0 ]; then
 85 |     exit 6
 86 | fi
 87 | 
 88 | eval set -- "$ARGS"
 89 | 
 90 | while true; do
 91 |     case "$1" in
 92 |         -h|--help)
 93 |             shift
 94 |             printHelpAndExit
 95 |             ;;
 96 | 
 97 |         --verbose)
 98 |             shift
 99 |             VERBOSE="true"
100 |             ;;
101 | 
102 | 	--debug)
103 |             shift
104 |             DEBUG="yes"
105 | 	    VERBOSEARG="--verbose"
106 |             ;;
107 | 
108 | 	--show-progress)
109 |             shift
110 | 	    if which pv > /dev/null;then
111 | 		CATCOMMAND="pv"
112 | 	    else
113 | 		printError "The show-progress argument needs pv to be installed (e.g. apt-get install pv)"
114 |                 exit 1
115 | 	    fi
116 |             ;;
117 | 	
118 |         -v|--version)
119 |             shift
120 |             printVersionAndExit
121 |             ;;
122 |         
123 |         --mysql-user)
124 |             shift
125 |             db_username=$1
126 |             shift
127 |             ;;
128 | 
129 |         --mysql-password)
130 |             shift
131 | 	    export MYSQL_PWD=$1
132 |             shift
133 |             ;;
134 | 
135 | 	--mysql-database)
136 |             shift
137 |             db=$1
138 |             shift
139 |             ;;
140 | 
141 |         --dump-file)
142 |             shift
143 |             dump_file=$(readlink -e "$1")
144 |             if ! [ -f "$dump_file" ]; then
145 |                 printError "The specified mysql file $dump_file is not found."
146 |                 exit 1
147 |             fi
148 |             shift
149 |             ;;
150 | 
151 | 	--tld)
152 |             shift
153 |             TLD=$1
154 |             shift
155 |             ;;
156 | 
157 | 	--dump-files-dir)
158 |             shift
159 | 	    DUMP_FILES_DIR=$1
160 |             if ! [ -d "$DUMP_FILES_DIR" ]; then
161 |                 printError "The specified dump file directory does not exist."
162 |                 exit 1
163 |             fi
164 |             shift
165 |             ;;
166 | 
167 | 	--db-version)
168 | 	    shift
169 | 	    #format check
170 | 	    if echo $1 | grep --quiet -e "v[0-9]*"; then
171 | 		DATABASE_VERSION=$1
172 | 	    else
173 | 		printError "Invalid db-version specification. It should be like v19 or v6"
174 | 		exit 1
175 | 	    fi
176 |             shift
177 |             ;;
178 | 
179 |         --)
180 |             shift
181 |             break
182 |             ;;
183 | 
184 |         *) 
185 |             ;;
186 |     esac
187 | done
188 | 
189 | #some verification before doing the real job
190 | 
191 | if [ -n "$dump_file" ] && [ -n "$TLD"  -o  -n "$DUMP_FILES_DIR" -o -n "$DATABASE_VERSION" ]; then
192 |     printError "Conflicting arguments. Please use either --dump-file or --tld + --dump-files-dir + --db-version."
193 |     exit 1
194 | fi
195 | 
196 | #Set up mysql login credentials if needed
197 | if [ -n "$db_username" ]; then
198 |     MYSQL_ARGUMENTS="--user=$db_username"
199 | fi
200 | 
201 | printDebug "Mysql arguments:   $MYSQL_ARGUMENTS"
202 | printDebug "Mysql Password:    $MYSQL_PWD"
203 | 
204 | if [ -z "$db" ]; then
205 | 	printError "Mysql database not specified. See $MYNAME --help"
206 | 	exit 1
207 | fi
208 | 
209 | #If the tld is specified, we find out the dumpfile name.
210 | if [ -z $dump_file ]; then
211 |     dump_file="$DUMP_FILES_DIR"/"$TLD"/whoiscrawler_"$TLD"_mysql.sql.gz
212 |     if [ ! -f "$dump_file" ]; then
213 | 	dump_file="$DUMP_FILES_DIR"/"$TLD"/whoiscrawler_"$DATABASE_VERSION"_"$TLD"_mysql.sql.gz
214 |     fi
215 |     #Quarterly feeds case
216 |     if [ ! -f "$dump_file" ]; then
217 | 	TLDUNDERSCORE=$(echo $TLD | sed -e "s/\./_/g")
218 | 	dump_file="$DUMP_FILES_DIR"/"$TLD"/domains_whoiscrawler_"$DATABASE_VERSION"_"$TLDUNDERSCORE"_mysql.sql.gz
219 |     fi
220 | fi
221 | 
222 | printVerbose "Dump file: $dump_file"
223 | 
224 | if [ ! -f "$dump_file" ]; then
225 |     printError "Database dump to be loaded is not specified or it does not exist."
226 |     printError "See $MYNAME --help"
227 |     exit 1
228 | fi
229 | 
230 | #THE REAL WORK STARTS HERE
231 | printVerbose "Creating database $db"
232 | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} -e "create database $db"
233 | time=`date +%s`
234 | 	echo "loading data from file $dump_file"
235 | 	if [ ${dump_file: -3} == ".gz" ]; then
236 | 
237 | 	    $CATCOMMAND "$dump_file" | gunzip -c |mysql ${MYSQL_ARGUMENTS} $db
238 | 	else
239 | 	
240 | 	    $CATCOMMAND $dump_file | mysql ${MYSQL_ARGUMENTS} 
241 | 	fi
242 | 
243 | time2=`date +%s`
244 | dur=`expr $time2 - $time`
245 | echo "took $dur seconds."
246 | 
247 | 


--------------------------------------------------------------------------------
/whoisxmlapi_percona_loader_scripts/whoiscrawler_mysql_schema.sql:
--------------------------------------------------------------------------------
  1 | -- MySQL dump 10.13  Distrib 5.5.8, for Linux (x86_64)
  2 | --
  3 | -- Host: localhost    Database: whoiscrawler
  4 | -- ------------------------------------------------------
  5 | -- Server version	5.5.8-log
  6 | 
  7 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
  8 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
  9 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
 10 | /*!40101 SET NAMES utf8 */;
 11 | /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
 12 | /*!40103 SET TIME_ZONE='+00:00' */;
 13 | /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
 14 | /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
 15 | /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
 16 | /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
 17 | 
 18 | --
 19 | -- Table structure for table `contact`
 20 | --
 21 | 
 22 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 23 | /*!40101 SET character_set_client = utf8 */;
 24 | CREATE TABLE IF NOT EXISTS `contact` (
 25 |   `contact_id` bigint(20) NOT NULL AUTO_INCREMENT,
 26 |   `name` varchar(256) DEFAULT NULL,
 27 |   `organization` varchar(256) DEFAULT NULL,
 28 |   `street1` varchar(256) DEFAULT NULL,
 29 |   `street2` varchar(256) DEFAULT NULL,
 30 |   `street3` varchar(256) DEFAULT NULL,
 31 |   `street4` varchar(256) DEFAULT NULL,
 32 |   `city` varchar(64) DEFAULT NULL,
 33 |   `state` varchar(45) DEFAULT NULL,
 34 |   `postal_code` varchar(45) DEFAULT NULL,
 35 |   `country` varchar(45) DEFAULT NULL,
 36 |   `email` varchar(256) DEFAULT NULL,
 37 |   `telephone` varchar(45) DEFAULT NULL,
 38 |   `telephone_ext` varchar(45) DEFAULT NULL,
 39 |   `fax` varchar(45) DEFAULT NULL,
 40 |   `fax_ext` varchar(45) DEFAULT NULL,
 41 |   `parse_code` smallint(6) DEFAULT NULL,
 42 |   `raw_text` longtext,
 43 |   `unparsable` longtext,
 44 |   `audit_created_date` varchar(45) DEFAULT NULL,
 45 |   `audit_updated_date` varchar(45) DEFAULT NULL,
 46 |   PRIMARY KEY (`contact_id`),
 47 |   KEY `audit_updated_date` (`audit_updated_date`)
 48 | ) ENGINE=InnoDB AUTO_INCREMENT=1 ROW_FORMAT=COMPRESSED DEFAULT CHARSET=utf8;
 49 | /*!40101 SET character_set_client = @saved_cs_client */;
 50 | 
 51 | --
 52 | -- Table structure for table `domain_names_whoisdatacollector`
 53 | --
 54 | 
 55 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 56 | /*!40101 SET character_set_client = utf8 */;
 57 | CREATE TABLE IF NOT EXISTS `domain_names_whoisdatacollector` (
 58 |   `domain_id` bigint(20) NOT NULL AUTO_INCREMENT,
 59 |   `domain_name` varchar(256) CHARACTER SET latin1 NOT NULL,
 60 |   `reshoot` smallint(6) DEFAULT '0',
 61 |    processed int default 0,
 62 |    registrar_name varchar(512),
 63 |    registrar_raw_text longtext,
 64 |    registry_raw_text longtext,
 65 |    processed_time timestamp,
 66 |   PRIMARY KEY (`domain_id`),
 67 |   UNIQUE KEY `domain_name` (`domain_name`),
 68 |   KEY `reshoot` (`reshoot`),
 69 |   KEY `processed` (`processed`)
 70 | ) ENGINE=InnoDB AUTO_INCREMENT=1 ROW_FORMAT=COMPRESSED DEFAULT CHARSET=utf8;
 71 | /*!40101 SET character_set_client = @saved_cs_client */;
 72 | --
 73 | -- Table structure for table `registry_data`
 74 | --
 75 | 
 76 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 77 | /*!40101 SET character_set_client = utf8 */;
 78 | CREATE TABLE IF NOT EXISTS `registry_data` (
 79 |   `registry_data_id` bigint(20) NOT NULL AUTO_INCREMENT,
 80 |   `created_date` varchar(200) DEFAULT NULL,
 81 |   `updated_date` varchar(200) DEFAULT NULL,
 82 |   `expires_date` varchar(200) DEFAULT NULL,
 83 |   `admin_contact_id` bigint(11) DEFAULT NULL,
 84 |   `registrant_id` bigint(11) DEFAULT NULL,
 85 |   `technical_contact_id` bigint(11) DEFAULT NULL,
 86 |   `zone_contact_id` bigint(11) DEFAULT NULL,
 87 |   `billing_contact_id` bigint(11) DEFAULT NULL,
 88 |   `domain_name` varchar(256) CHARACTER SET latin1 DEFAULT NULL,
 89 |   `name_servers` text,
 90 |   `status` text,
 91 |   `raw_text` longtext,
 92 |   `audit_created_date` timestamp NULL DEFAULT NULL,
 93 |   `audit_updated_date` timestamp NULL DEFAULT NULL,
 94 |   `unparsable` longtext,
 95 |   `parse_code` smallint(6) DEFAULT NULL,
 96 |   `header_text` longtext,
 97 |   `clean_text` longtext,
 98 |   `footer_text` longtext,
 99 |   `registrar_name` varchar(512) DEFAULT NULL,
100 |   `whois_server` varchar(512) DEFAULT NULL,
101 |   `referral_url` varchar(512) DEFAULT NULL,
102 |   `data_error` smallint(6) DEFAULT '0',
103 |   PRIMARY KEY (`registry_data_id`),
104 |   KEY `domain_name_index` (`domain_name`),
105 |   UNIQUE KEY `audit_updated_date` (`audit_updated_date`),
106 |   KEY `FK68C3166C7B556202` (`technical_contact_id`),
107 |   KEY `FK68C3166C79B00024` (`billing_contact_id`),
108 |   KEY `FK68C3166CB8CF12D0` (`admin_contact_id`),
109 |   KEY `FK68C3166CD0C7A375` (`registrant_id`),
110 |   KEY `FK68C3166C20710653` (`zone_contact_id`),
111 |   KEY `data_error` (`data_error`),
112 |   CONSTRAINT `FK68C3166C20710653` FOREIGN KEY (`zone_contact_id`) REFERENCES `contact` (`contact_id`),
113 |   CONSTRAINT `FK68C3166C79B00024` FOREIGN KEY (`billing_contact_id`) REFERENCES `contact` (`contact_id`),
114 |   CONSTRAINT `FK68C3166C7B556202` FOREIGN KEY (`technical_contact_id`) REFERENCES `contact` (`contact_id`),
115 |   CONSTRAINT `FK68C3166CB8CF12D0` FOREIGN KEY (`admin_contact_id`) REFERENCES `contact` (`contact_id`),
116 |   CONSTRAINT `FK68C3166CD0C7A375` FOREIGN KEY (`registrant_id`) REFERENCES `contact` (`contact_id`)
117 | ) ENGINE=InnoDB AUTO_INCREMENT=1 ROW_FORMAT=COMPRESSED DEFAULT CHARSET=utf8;
118 | /*!40101 SET character_set_client = @saved_cs_client */;
119 | 
120 | --
121 | -- Table structure for table `whois_record`
122 | --
123 | 
124 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
125 | /*!40101 SET character_set_client = utf8 */;
126 | CREATE TABLE IF NOT EXISTS `whois_record` (
127 |   `whois_record_id` bigint(20) NOT NULL AUTO_INCREMENT,
128 |   `created_date` varchar(200) DEFAULT NULL,
129 |   `updated_date` varchar(200) DEFAULT NULL,
130 |   `expires_date` varchar(200) DEFAULT NULL,
131 |   `admin_contact_id` bigint(11) DEFAULT NULL,
132 |   `registrant_id` bigint(11) DEFAULT NULL,
133 |   `technical_contact_id` bigint(11) DEFAULT NULL,
134 |   `zone_contact_id` bigint(11) DEFAULT NULL,
135 |   `billing_contact_id` bigint(11) DEFAULT NULL,
136 |   `domain_name` varchar(256) CHARACTER SET latin1 DEFAULT NULL,
137 |   `name_servers` text,
138 |   `registry_data_id` bigint(11) DEFAULT NULL,
139 |   `status` text,
140 |   `raw_text` longtext,
141 |   `audit_created_date` timestamp NULL DEFAULT NULL,
142 |   `audit_updated_date` timestamp NULL DEFAULT NULL,
143 |   `unparsable` longtext,
144 |   `parse_code` smallint(6) DEFAULT NULL,
145 |   `header_text` longtext,
146 |   `clean_text` longtext,
147 |   `footer_text` longtext,
148 |   `registrar_name` varchar(512) DEFAULT NULL,
149 |   `data_error` smallint(6) DEFAULT '0',
150 |   PRIMARY KEY (`whois_record_id`),
151 |   UNIQUE KEY `domain_name_index` (`domain_name`),
152 |   KEY `audit_updated_date` (`audit_updated_date`),
153 |   KEY `audit_created_date` (`audit_created_date`),
154 |   KEY `FKE043A3087B556202` (`technical_contact_id`),
155 |   KEY `FKE043A30879B00024` (`billing_contact_id`),
156 |   KEY `FKE043A308C7212EEF` (`registry_data_id`),
157 |   KEY `FKE043A308B8CF12D0` (`admin_contact_id`),
158 |   KEY `FKE043A308D0C7A375` (`registrant_id`),
159 |   KEY `FKE043A30820710653` (`zone_contact_id`),
160 |   KEY `data_error` (`data_error`),
161 |   CONSTRAINT `FKE043A30820710653` FOREIGN KEY (`zone_contact_id`) REFERENCES `contact` (`contact_id`),
162 |   CONSTRAINT `FKE043A30879B00024` FOREIGN KEY (`billing_contact_id`) REFERENCES `contact` (`contact_id`),
163 |   CONSTRAINT `FKE043A3087B556202` FOREIGN KEY (`technical_contact_id`) REFERENCES `contact` (`contact_id`),
164 |   CONSTRAINT `FKE043A308B8CF12D0` FOREIGN KEY (`admin_contact_id`) REFERENCES `contact` (`contact_id`),
165 |   CONSTRAINT `FKE043A308C7212EEF` FOREIGN KEY (`registry_data_id`) REFERENCES `registry_data` (`registry_data_id`),
166 |   CONSTRAINT `FKE043A308D0C7A375` FOREIGN KEY (`registrant_id`) REFERENCES `contact` (`contact_id`)
167 | ) ENGINE=InnoDB AUTO_INCREMENT=1  ROW_FORMAT=COMPRESSED DEFAULT CHARSET=utf8;
168 | /*!40101 SET character_set_client = @saved_cs_client */;
169 | /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
170 | 
171 | /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
172 | /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
173 | /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
174 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
175 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
176 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
177 | /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
178 | 
179 | -- Dump completed on 2012-01-10  5:12:00
180 | 


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | A sampe script to populate a Website Contacts and Categories 
  4 | MySQL database from WhoisXML API data.
  5 | (c) WhoisXML API, Inc. 2019.
  6 | """
  7 | 
  8 | import sys
  9 | import os
 10 | import binascii
 11 | import datetime
 12 | import argparse
 13 | import gzip
 14 | import pandas as pd
 15 | import mysql.connector as my
 16 | 
 17 | VERSION = "0.0.1"
 18 | MYNAME = sys.argv[0].replace('./', '')
 19 | 
 20 | parser = argparse.ArgumentParser(description='''
 21 | A sampe script to populate a Website Contacts and Categories 
 22 | MySQL database from WhoisXML API data.
 23 | See usage examles in the supplied README file.''',
 24 |                                  prog=MYNAME,
 25 |                                  formatter_class=argparse.RawTextHelpFormatter)
 26 | 
 27 | # Mysql setup
 28 | parser.add_argument('--version',
 29 |                     help='Print version information and exit.',
 30 |                     action='version',
 31 |                     version=MYNAME + ' ver. ' + VERSION + '\n(c) WhoisXML API Inc.')
 32 | parser.add_argument('--quiet', action='store_true', help='Suppress all informative messages.')
 33 | parser.add_argument('--mysql-host', default='127.0.0.1', type=str,
 34 |                     help='Host name or IP address to reach MySQL server (optional). Default: Localhost.')
 35 | parser.add_argument('--mysql-port', default='3306', type=str,
 36 |                     help='Port of MySQL database (optional). Default: 3306')
 37 | parser.add_argument('--mysql-user', type=str, required=False, default='',
 38 |                     help='User name to login to the MySQL database. Default: system user.')
 39 | parser.add_argument('--mysql-password', type=str, required=True, default='',
 40 |                     help='Password to login to the MySQL database')
 41 | parser.add_argument('--mysql-database', type=str, required=True,
 42 |                     help='The name of the MySQL database to load data into.')
 43 | parser.add_argument('--mysql-errors', action='store_true', help='Print wrong SQL inserts.')
 44 | parser.add_argument('--chunksize', type=int, help=
 45 |                     'Maximum size of chunks to be read from the file and committed into the DB at once. Default=100.000',
 46 |                     default=100000)
 47 | parser.add_argument('--nchunksmax', type=int, help=
 48 |                     'Number of chunks to load. Default=0, stands for all. Change for testing purposes.',
 49 |                     default=0)
 50 | parser.add_argument('--jsonl-file', type=str, required=True,
 51 |                     help='The jsonl file to load')
 52 | parser.add_argument('--categories-only', action='store_true', help='Categories only file. No contact info included.')
 53 | args = parser.parse_args()
 54 | 
 55 | 
 56 | 
 57 | def print_verbose(message):
 58 |     #Function to give some feedback
 59 |     if not args.quiet:
 60 |         sys.stderr.write(
 61 |             MYNAME + ' ' + datetime.datetime.now().strftime(
 62 |                 '%Y-%m-%d %H:%M:%S') + ': ' + message + '\n')
 63 |         sys.stderr.flush()
 64 | 
 65 | def is_gz_file(filepath):
 66 |     #Check if the file is gzipped by checking it magic number
 67 |     with open(filepath, 'rb') as test_f:
 68 |         return binascii.hexlify(test_f.read(2)) == b'1f8b'
 69 |         
 70 | def getfield(dataobj, field_name):
 71 |     """Get the field from a data object
 72 |     or return None if it does not exist
 73 |     or is an empty string."""
 74 |     try:
 75 |         result = dataobj.__getattribute__(field_name)
 76 |     except AttributeError:
 77 |         result = None
 78 |     try:
 79 |         if result.isspace():
 80 |             result = None
 81 |     except AttributeError:
 82 |         pass
 83 |     return(result)
 84 | 
 85 | def getdictval(dictionary, key):
 86 |     """Get a value from a dictionary or None if 
 87 |     does not exist or is an empty string."""
 88 |     try:
 89 |         result = dictionary[key]
 90 |     except KeyError:
 91 |         result = None
 92 |     if not result or not result.strip():
 93 |         result = None
 94 |     return(result)
 95 |     
 96 | #Some minor checks: if any files
 97 | if args.jsonl_file is not None:
 98 |     if not os.path.isfile(args.jsonl_file):
 99 |         raise ValueError(
100 |             'the specified file "%s" does not exist'%args.jsonl_file)
101 |     print_verbose('Will load data from %s\n'%args.jsonl_file)
102 | 
103 | 
104 | #Here we connect the database
105 | print_verbose("Opening db connection.")
106 | cnx = my.connect(user=args.mysql_user, password=args.mysql_password,
107 |                  host=args.mysql_host, database=args.mysql_database,
108 |                  port=args.mysql_port)
109 | cnx.set_charset_collation(charset='utf8mb4', collation='utf8mb4_unicode_ci')
110 | #Defining the cursor
111 | cursor = cnx.cursor(dictionary=True)
112 | print_verbose("Turning off foreign key checks.")
113 | cursor.execute("SET foreign_key_checks = 0")
114 | 
115 | #Main job: inserting data
116 | nerrors = 0
117 | nchunks = 0
118 | nrecords = 0
119 | #Opening input file
120 | if is_gz_file(args.jsonl_file):
121 |     print_verbose('Opening gzipped input file %s'%args.jsonl_file)
122 |     infile = gzip.open(args.jsonl_file, 'rt', encoding='utf-8')
123 | else:
124 |     print_verbose('Opening input file %s'%args.jsonl_file)
125 |     infile = open(args.jsonl_file, 'rt')
126 | for chunk in pd.read_json(infile, chunksize=args.chunksize, lines=True, encoding='UTF-8'):
127 |     records = [r for r in chunk.itertuples()]
128 |     for r in records:
129 |         #Main fields
130 |         main_data = [getfield(r, 'domainName'), getfield(r, 'countryCode')]
131 |         if main_data[0] is None:
132 |             print_verbose("Record error: undefined domain name in "+ str(r))
133 |             error_count += 1
134 |             continue
135 |         if args.categories_only:
136 |             cursor.execute(
137 |                 'INSERT INTO domain(domainName, countryCode) values(%s,%s)',
138 |                 main_data)
139 |             recordid=cursor.lastrowid
140 |         else:
141 |             #Meta fields
142 |             main_data.append(getdictval(getfield(r, 'meta'), 'title'))
143 |             main_data.append(getdictval(getfield(r, 'meta'), 'description'))
144 |             #socialLinks fields
145 |             main_data.append(getdictval(getfield(r, 'socialLinks'), 'facebook'))
146 |             main_data.append(getdictval(getfield(r, 'socialLinks'), 'googlePlus'))
147 |             main_data.append(getdictval(getfield(r, 'socialLinks'), 'instagram'))
148 |             main_data.append(getdictval(getfield(r, 'socialLinks'), 'twitter'))
149 |             main_data.append(getdictval(getfield(r, 'socialLinks'), 'linkedIn'))
150 |             cursor.execute(
151 |                 'INSERT INTO domain(domainName, countryCode, meta_title, meta_description, socialLinks_facebook, socialLinks_googlePlus, socialLinks_instagram, socialLinks_twitter, socialLinks_linkedIn) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)',
152 |                 main_data)
153 |             recordid=cursor.lastrowid
154 |             #Child records
155 |             #emails
156 |             for child in getfield(r, 'emails') or []:
157 |                 cursor.execute(
158 |                     'INSERT INTO email(domainID, description, email) VALUES(%s,%s,%s)', (
159 |                         recordid,
160 |                         getdictval(child, 'description'),
161 |                         getdictval(child, 'email')))
162 |             #phone numbers
163 |             for child in getfield(r, 'phones') or []:
164 |                 cursor.execute(
165 |                     'INSERT INTO phone(domainID, description, phoneNumber, callHours) VALUES(%s,%s,%s,%s)', (
166 |                         recordid,
167 |                         getdictval(child, 'description'),
168 |                         getdictval(child, 'phoneNumber'),
169 |                         getdictval(child, 'callHours')))
170 |             #postal addresses    
171 |             for child in getfield(r, 'postalAddresses') or []:
172 |                 cursor.execute(
173 |                     'INSERT INTO postalAddress(domainID, postalAddress) VALUES(%s,%s)', (
174 |                     recordid,
175 |                     child))
176 |             #company names
177 |             for child in getfield(r, 'companyNames') or []:
178 |                 cursor.execute(
179 |                     'INSERT INTO companyName(domainID, companyName) VALUES(%s,%s)', (
180 |                     recordid,
181 |                     child))
182 |         #Now upserting category
183 |         for child in getfield(r, 'categories') or []:
184 |             cursor.execute(
185 |                 'INSERT IGNORE INTO category(category) VALUES(%s)',
186 |                 (child,))
187 |             cursor.execute(
188 |                 'INSERT INTO domain_category(categoryID, domainID) VALUES(%s, %s)',
189 |                 (child, recordid))
190 |         nrecords += 1
191 |     cnx.commit()
192 |     print_verbose(
193 |         'Committed chunk %d (%d records of max %d)\n' % (
194 |             nchunks + 1, len(records), args.chunksize))
195 |     nchunks += 1
196 |     if nchunks == args.nchunksmax:
197 |         break
198 |     
199 | print_verbose("Committed %d records into domains"%nrecords)
200 | print_verbose("Total number of errors: %d "%nerrors)
201 | print_verbose("Closing input file")
202 | infile.close()
203 | print_verbose("Turning on foreign key checks.")
204 | cursor.execute("SET foreign_key_checks = 1")
205 | print_verbose("closing db connection.")
206 | cnx.close()
207 | 
208 | 


--------------------------------------------------------------------------------
/whoisxmlapi_whoisdownload_bash/README:
--------------------------------------------------------------------------------
  1 | Changes
  2 | ------
  3 | 
  4 | 0.0.26.
  5 |   o Added the "domains", "verified_domains", "reserved_domains", and
  6 |     "missing_domains" data formats to download domain lists from
  7 |     quarterly ccTLD and gTLD db releases
  8 |   o Modified base URLs to use https instead of http.
  9 | 
 10 | 0.0.25.
 11 |   o Removed the use of "realpath" as it is not available on Mac OS X by default.
 12 | 
 13 | 0.0.24.
 14 |   o Added support for data feeds domain_names_dropped_whois_archive
 15 |   	and ngtlds_domain_names_dropped_whois_archive
 16 | 
 17 | 0.0.23.
 18 |   o Messages and return codes for whois_database_combined feed
 19 |     revised again
 20 |   o Introduced the "thin" option to download data for tlds com and net
 21 |     from whois_database
 22 |     
 23 | 0.0.22.
 24 |   o More consistent messages when downloading multipart archives
 25 |     especially from the whois_database_combined feed
 26 |     If there is just one file in the feed, the return code will be 2 now,
 27 |     otherwise it is zero.
 28 |   o Fixed to give 1 (instead of 0) return code for unhandled feed
 29 |   o Introduced a --show-progress option to have progress bars for the downloads
 30 | 
 31 | 0.0.21.
 32 |   o More informative error messages
 33 |   o Minor code simplification
 34 |   o Fixed a but affecting explicit specification of ssl auth files
 35 |     in command-line or in variables
 36 |   
 37 | 0.0.20.
 38 |   o Fixed some minor bugs
 39 |   o Fixed some return codes, to be more coherent with the python version:
 40 |     Now
 41 |      - return code 2 is for a file which is not found on the server (previously returned 0 or 2 sometimes)
 42 |      - in case of success return code 0 is returned (in case of daily feeds it was buggy)
 43 |      - return code 1 and those greater than 2 are for abnormal termination
 44 | 
 45 | 0.0.19.
 46 |   o Fixed the following archive feeds to download data
 47 |     from the year-named direcotries of past years:
 48 |     	 -domain_names_whois_archive
 49 | 	 -domain_names_whois_filtered_reg_country_archive
 50 | 	 -domain_names_whois_filtered_reg_country_noproxy_archive
 51 | 	 -ngtlds_domain_names_whois_archive
 52 | 	 -ngtlds_domain_names_whois_filtered_reg_country_archive
 53 | 	 -ngtlds_domain_names_whois_filtered_reg_country_noproxy_archive
 54 |   o Added support for the following data feeds:
 55 |     	 -domain_names_diff_whois_filtered_reg_country2
 56 | 	 -cctld_discovered_domain_names_whois_archive
 57 | 	 -reported_for_removal
 58 |   o Implemented a "--list-supported-tlds" option
 59 | 	 
 60 | 0.0.18.
 61 |   o Added support for ssl key authentication.
 62 | 
 63 | 0.0.17:
 64 |   o Added domain_names_whois2 target.
 65 | 
 66 | 0.0.16:
 67 |   o Handling 403: Forbidden errors in wget
 68 |   o Dry run emulates 10 part-files when downloading multipart archives
 69 |     (Can be configured with the DRY_RUN_MULTIFILE_LIMIT variable
 70 |   o Quarterly downloads require explicit database specifications
 71 | 
 72 | 0.0.15:
 73 |   o Added daily ngtlds feeds such as:
 74 |         - ngtlds_domain_names_whois_filtered_reg_country
 75 |         - ngtlds_domain_names_whois_filtered_reg_country_noproxy
 76 |         - ngtlds_domain_names_whois_archive
 77 |         - ngtlds_domain_names_whois_filtered_reg_country_archive
 78 |         - ngtlds_domain_names_whois_filtered_reg_country_noproxy_archive
 79 | 
 80 | 0.0.14:
 81 |   o Cctld newly discovered daily data domain_names_new and domain_names_whois added.
 82 |   o Cctld newly registered daily data domain_names_new, domain_names_whois, domain_names_dropped,
 83 |   and domain_names_dropped_whois added.
 84 |   o Fixed a bug with whois_database_combined not working exactly as intended.
 85 |   o domain_list_quarterly sql feed fixed
 86 | 
 87 | 0.0.13:
 88 |   o Fixed bug where ngtlds feed was getting an error when downloading supported tlds
 89 | 
 90 | 0.0.12:
 91 |   o Fixed bug where "bad feeds" were not being handled properly.
 92 |   o Add file format "all" which downloads all available file formats for a given feed.
 93 |   o Added full and sql file formats to domain_names_whois_archive feed.
 94 | 
 95 | 0.0.11:
 96 |   o Added data feed support for domain_names_dropped_whois and ngtlds_domain_names_dropped_whois
 97 | 
 98 | 0.0.10:
 99 |   o Added download support for legacy gtld quarterly data v1 and v2
100 | 
101 | 0.0.9:
102 |   o Input date format fixed. 
103 | 
104 | 0.0.8:
105 |   o Added the feed whois_database_combined. Could not test because auth failed.
106 | 
107 | 0.0.7:
108 |   o Removed the --dry command line options from the tests. Now have to tests,
109 |     ut_whoisdownload with dry run and ft_whoisdownload with full download. 
110 | 
111 | 0.0.6:
112 |   o Added the --tld-file command line option. It it is provided it will expect a
113 |     list of domains (e.g. asia,us,tel) in the file and will use that instead of
114 |     the downloadable tld files.
115 |   o Modified and enhanced the tests to check the new tld download methods.
116 |   o If needed the supported_gtlds files supported_ngtlds are loaded from the
117 |     directory where the script was started. This was wrong in the previous 
118 |     version.
119 |   o Downloading of tlds for the whois_database feed is fixed.
120 | 
121 | 0.0.5:
122 |   o Added the new ways to download tld list. Need to be tested.
123 | 
124 | 0.0.4:
125 |   o All the date formats that are supported by the date(1) utility are now
126 |     supported by the --date option.
127 |   o Because of this I had to re-organize the code and so multiple dates now
128 |     available by using the --date option more than once (e.g. 
129 |     --date="2015-10-20" --date="2015-10-21"). This date change proced to be more
130 |     annoyance than I thought, sorry about that.
131 |   o It is now possible to run the program without the --date option if the
132 |     data feed needs no date to be set.
133 |   o Added the long file format name versions, so from now on regular_csv,
134 |     simple_csv, full_csv and mysqldump fiel formats are also accepted.
135 |   o Some data sources dropped some older dates I used in tests so I had to
136 |     modify my tests and re-run them.
137 | 
138 | SSL authentication setup
139 | ------------------------
140 | Consult README.SSL on how to set up ssl authentication if you have
141 | obtained the required files from WhoisXML API, INC.
142 | 
143 | If you have set up ssl authentication, you can use the
144 | 
145 | --auth-type=ssl
146 | 
147 | instead of the --user and --password options in the examples below.
148 | 
149 | Examples
150 | --------
151 | Example 1
152 | Downloading the domain_names_new data source for all the top level domains at
153 | one specific date.
154 | 
155 |     ./whoisdownload.sh \
156 |         --user=demo \
157 |         --password=XXXXXX \
158 |         --date=2018-01-10 \
159 |         --output-dir=./tmp  \
160 |         --data-feeds=domain_names_new
161 | 
162 | 
163 | Example 2
164 | Download the ngtlds_domain_names_new for three consecutive days for the
165 | abc and actor domains.
166 | 
167 |     ./whoisdownload.sh \
168 |         --user=demo \
169 |         --password=XXXXXX \
170 |         --tld="abc actor" \
171 |         --date=2018-01-10 \
172 |         --n=3 \
173 |         --output-dir=./tmp  \
174 |         --data-feeds=ngtlds_domain_names_new
175 | 
176 | 
177 | Example 3
178 | Downloading the domain_names_new data source of .aero domain for 14 days
179 | starting at a specific date.
180 | 
181 |     ./whoisdownload.sh \
182 |         --user=demo \
183 |         --password=XXXXXX \
184 |         --date=2018-01-10 \
185 |         --output-dir=./tmp  \
186 |         --tld=aero \
187 |         --n=14 \
188 |         --data-feeds=domain_names_new
189 | 
190 | 
191 | Example 4
192 | Downloading the domain_names_whois data source for all the supported tlds at a
193 | specific date.
194 | 
195 |     ./whoisdownload.sh \
196 |         --user=demo \
197 |         --password=XXXXXX \
198 |         --date=2018-01-10 \
199 |         --output-dir=./tmp  \
200 |         --data-feeds=domain_names_whois
201 | 
202 | 
203 | Example 5
204 | Downloading files for two data sources, two domains and two dates, six
205 | downloads altogether.
206 | 
207 |     ./whoisdownload.sh \
208 |         --user=demo \
209 |         --password=XXXXXX \
210 |         --output-dir=./tmp  \
211 |         --date="2018-01-20 2018-02-10" \
212 |         --tld="org info" \
213 |         --data-feeds="domain_names_new domain_names_dropped"
214 | 
215 | 
216 | Example 6
217 | 
218 | Download the v20 version of the whois_database for one tld (The date
219 | argument is required but ignored.)
220 | 
221 |     ./whoisdownload.sh \
222 |         --verbose \
223 |         --user=demo \
224 |         --password=XXXXXX \
225 |         --file-format=simple \
226 |         --db-version=v20 \
227 |         --date=2018-01-01 \
228 |         --tld=tel \
229 |         --output-dir=./tmp \
230 |         --data-feeds=whois_database
231 | 
232 | 
233 | Example 7
234 | Download the v19 version of the whois_database for for all tlds.
235 | (The date argument is required but ignored.)
236 | 
237 | ./whoisdownload.sh \
238 |         --verbose \
239 |         --user=demo \
240 |         --password=XXXXXX \
241 |         --file-format=simple \
242 |         --db-version=v19 \
243 | 	--date=2018-01-01 \	
244 |         --output-dir=./tmp \
245 |         --data-feeds=whois_database
246 | 
247 | Example 8
248 | Download the v6 version of the cctld whois_database for for all tlds.
249 | (The date argument is required but ignored.)
250 | 
251 |     ./whoisdownload.sh \
252 |         --verbose \
253 |         --user=demo \
254 |         --password=XXXXXXX \
255 |         --file-format=simple \
256 | 	--date=2018-01-01 \
257 |         --db-version=v6 \
258 |         --output-dir=./tmp \
259 |         --data-feeds=domain_list_quarterly
260 | 
261 | Example 9
262 | Download the v6 version of the cctld whois_database for for one tld.
263 | (The date argument is required but ignored.)
264 | 
265 |     ./whoisdownload.sh \
266 |         --verbose \
267 |         --user=demo \
268 |         --password=XXXXXX \
269 |         --file-format=simple \
270 | 	--date=2018-01-01 \
271 |         --db-version=v6 \
272 |         --tld=uk
273 |         --output-dir=./tmp \
274 |         --data-feeds=domain_list_quarterly
275 | 


--------------------------------------------------------------------------------
/website_contactscats_to_mysqldb/load_contactscategories_jsonl_to_mysql.txt:
--------------------------------------------------------------------------------
  1 | 	   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  2 | 	     LOAD_CONTACTSCATEGORIES_JSONL_TO_MYSQL.PY - A
  3 | 	      SCRIPT TO LOAD WEBSITE CONTACTS & CATEGORIES
  4 | 			    DATA INTO MYSQL
  5 | 
  6 | 
  7 | 			WhoisXML API, Inc. 2019.
  8 | 	   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  9 | 
 10 | 
 11 | Table of Contents
 12 | ─────────────────
 13 | 
 14 | 1 About the script
 15 | .. 1.1 A usable demonstration script
 16 | .. 1.2 Cross-platform
 17 | 2 Prerequisites
 18 | .. 2.1 Python libraries
 19 | .. 2.2 MySQL settings
 20 | 3 How to use
 21 | .. 3.1 Setting up a mysql database
 22 | 4 Loading data with the script
 23 | 5 Limitations
 24 | 6 Performance notes
 25 | 
 26 | 
 27 | version 0.0.1
 28 | 
 29 | 
 30 | 1 About the script
 31 | ══════════════════
 32 | 
 33 |   This script is intended for subscribers of Whois XML's Website
 34 |   Contacts & Categories database download product
 35 |   ([http://website-contacts-database.whoisxmlapi.com]) to help loading
 36 |   downloaded jsonl files into a MySQL database.
 37 | 
 38 | 
 39 | 1.1 A usable demonstration script
 40 | ─────────────────────────────────
 41 | 
 42 |   The script is intended to be a programming example of how to
 43 |   accomplish this task and also to be useful as it is. For this reason
 44 |   the script is not perfectly robust, there are no checks of the
 45 |   validity of the input files and the exception handling is also not
 46 |   very detailed, to avoid a complex and less readable code. If used
 47 |   properly, however, it is can be used efficiently.
 48 | 
 49 | 
 50 | 1.2 Cross-platform
 51 | ──────────────────
 52 | 
 53 |   The script has to work on any system where Python and the necessary
 54 |   libraries are available. It has been tested on Ubuntu Linux and
 55 |   Microsoft Windows, but on other platforms such as Mac OS X it should
 56 |   work, too.
 57 | 
 58 | 
 59 | 2 Prerequisites
 60 | ═══════════════
 61 | 
 62 | 2.1 Python libraries
 63 | ────────────────────
 64 | 
 65 |   The loader script is written in Series 3 Python; it was tested with
 66 |   Python 3.6.7. on Linux and Python 3.7.2 on Windows. It uses the
 67 |   following libraries:
 68 | 
 69 |   • Pandas ([https://pandas.pydata.org/]) , a data analysis library, in
 70 |     order to efficiently load chunks of jsonl files.
 71 |   • MySQL connector: the library to access MySQL databases, provided by
 72 |     Oracle.
 73 | 
 74 |   While Pandas can be simply installed with the package manager ("pip
 75 |   install pandas"), the vanilla MySQL connector is available from its
 76 |   download web-page (dev.mysql.com/downloads/connector/python/) . On
 77 |   some systems you may install both with the package manager of your OS
 78 |   (e.g. with "apt" on Debian-flavor Linuxes, including Ubuntu and Mint).
 79 | 
 80 | 
 81 | 2.2 MySQL settings
 82 | ──────────────────
 83 | 
 84 |   As the data can contain Unicode characters to be stored on 4 bytes,
 85 |   while MySQL uses a 3-byte encoding by default, it is recommended to
 86 |   enable the 4-byte Unicode system-wide, by adding the lines
 87 |   ┌────
 88 |   │ character-set-server = utf8mb4
 89 |   │ collation-server = utf8mb4_unicode_ci
 90 |   └────
 91 |   to the configuration file of the MySQL server
 92 |   (e.g. /etc/mysql/mysql.conf.d/mysqld.cnf on Ubuntu systems), and
 93 |   restarting the service. Please consult the documentation of MySQL if
 94 |   your prefer not to modify this setting system-wide.
 95 | 
 96 | 
 97 | 3 How to use
 98 | ════════════
 99 | 
100 |   We demonstrate the use of the script using the demonstration data
101 |   available from the website. There art two kinds of datasets available:
102 |   Domain names only: These contain domain names, country codes and
103 |                      categories to which the given site belongs to. A site
104 |                      can belong to multiple categories.
105 |   Domain names and contact information: these contain full information
106 |                                         including e-mails, postal
107 |                                         addresses, social network links,
108 |                                         etc. as described in the product
109 |                                         specs:
110 |                                         [https://website-contacts-database.whoisxmlapi.com/specifications]
111 |   The script supports both types of files. In this demonstration we
112 |   assume that the two sample files: categories_database_sample.jsonl
113 |   (domain names only) and contacts_database_sample.jsonl (full data) are
114 |   to be loaded into a MySQL database. In a production environment the
115 |   desired files should be used instead.
116 | 
117 |   The script automatically detects and decompresses gzip compressed
118 |   files, so if you have downloaded a large file in this format, there is
119 |   no need to uncompress it in advance.
120 | 
121 |   The following description has been prepared on a Ubuntu Linux
122 |   system. The script works on Windows, too, provided that Python, MySQL
123 |   and the necessary packages are installed. In the Windows command-line
124 |   the "./" before the script's name is not needed.
125 | 
126 | 
127 | 3.1 Setting up a mysql database
128 | ───────────────────────────────
129 | 
130 |   We create a user and a database for the purpose: as a root user we do
131 |   ┌────
132 |   │ create user websitecc identified by 'websitecc';
133 |   │ create database websitecc;
134 |   │ grant all on websitecc.* to websitecc;
135 |   └────
136 |   Next we create the schema for loading data. For this reason we run the
137 |   appropriate ddl file supplied for this script:
138 |   ┌────
139 |   │ mysql --user=websitecc --password=websitecc --database=websitecc < website_categories.ddl
140 |   └────
141 |   for domain-names only data or
142 |   ┌────
143 |   │ mysql --user=websitecc --password=websitecc --database=websitecc < website_contacts_categories.ddl
144 |   └────
145 |   for complete data. This will create a simple schema with an n-m
146 |   connection in the domains only case: [./website_categories_schema.png]
147 | 
148 |   and a similar schema with a more detailed "domain" table having more
149 |   children when contact data are also included:
150 |   [./website_contats_categories_schema.png]
151 | 
152 |   (Note: we use TEXT fields because of the unpredictable lengths and
153 |   LONGBLOBs because of the unpredictable character sets. This should be
154 |   taken into account when putting indices on these fields.)
155 | 
156 | 
157 | 4 Loading data with the script
158 | ══════════════════════════════
159 | 
160 |   Once the database has been prepared as described, and the data have
161 |   been downloaded, they can be loaded into the database with the
162 |   script. The script is self-documenting, it supports the –help option:
163 |   ┌────
164 |   │ ./contactscategories_jsonl_to_mysql.py --help
165 |   └────
166 |   gives information about the syntax and a full list of currently
167 |   supported options.
168 | 
169 |   A typical way of loading domain names only is
170 |   ┌────
171 |   │ ./load_contactscategories_jsonl_to_mysql.py \
172 |   │ --jsonl-file categories_database_sample.jsonl --domain-names-only  \
173 |   │ --mysql-password websitecc --mysql-database websitecc \
174 |   │ --mysql-user websitecc
175 |   └────
176 |   whereas loading full data can be done with
177 |   ┌────
178 |   │ ./load_contactscategories_jsonl_to_mysql.py \
179 |   │ --jsonl-file contacts_database_sample.jsonl  \
180 |   │ --mysql-password websitecc --mysql-database websitecc \
181 |   │ --mysql-user websitecc
182 |   └────
183 |   Notes:
184 |   • The –domain-names-only option can be used with files having full
185 |     information and/or schemata with full information. In this case the
186 |     contact information shall be ignored. The other way around, trying
187 |     to load domain names only files without this option will result in
188 |     error messages and malfunction.
189 | 
190 |   • The script loads lines of the files in chunks. Commits occur after
191 |     each chunk; this is a typical approach in relational data
192 |     population. The size of chunks can be tuned with the –chunksize
193 |     option, and the number of chunks to be loaded can be limited by the
194 |     –nchunks option.
195 | 
196 |   • Foreign key checks are turned off at the beginning and turned on at
197 |     the end for better performance.
198 | 
199 |   • The categories are inserted and updated dynamically.
200 | 
201 |   • If you encounter "Memory error", decreasing the chunk size can
202 |     help. This error occurs when you have a huge file to load.
203 | 
204 | 
205 | 5 Limitations
206 | ═════════════
207 | 
208 |   The script is mainly for demonstration, however it can be used in
209 |   practice. It has, however, the following limitations.
210 | 
211 |   • The schema is hard-coded; the table names should be the as in the
212 |     provided ddl-s. It can be easily customized by rewriting the script.
213 | 
214 |   • The script can only populate databases, apart from the categories it
215 |     does not verify whether a record already exists.
216 | 
217 |   • It reads records with children line by line; thus e.g. e-mails
218 |     belonging to multiple records will be duplicated. It violates
219 |     normalization. In the future there will be a mode of the script to
220 |     overcome this, but it will be optional due to performance reasons:
221 |     in many cases it is more efficient to load the data as they are and
222 |     take account the unnormalized nature.
223 | 
224 |   • The script does not yet support multi-threaded operation.
225 | 
226 | 
227 | 6 Performance notes
228 | ═══════════════════
229 | 
230 |   During the testing with a large file we had the following experience.
231 | 
232 |   The test was run on Ubuntu 18.04.1 LTS, mysqld Ver
233 |   5.7.24-0ubuntu0.18.04.1 for Linux on x86_64 (Ubuntu), Python 3.6.7,
234 |   pandas 0.23.4, mysql.connector 2.1.6; on a with Intel(R) Core(TM)
235 |   i7-7700 CPU @ 3.60GHz, 4 gigabytes of RAM, running in a Virtualbox
236 |   environment hosted on the same version of Linux, on a Dell Precision
237 |   3620 Mini Tower workstation.
238 | 
239 |   Loading 12921323 records of contacts and categories from a gzipped
240 |   jsonl file of size 951M took about 3 hours.
241 | 
242 |   We also remark that under Windows 10 on the same (virtual) hardware we
243 |   encountered "Memory error", so probably a Windows system needs more
244 |   memory for this task.
245 | 


--------------------------------------------------------------------------------
/whoisxmlapi_percona_loader_scripts/load_whois_percona.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | #Sample script to load binary mysql dumps for a tld
  4 | #
  5 | #Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  6 | #
  7 | #Note: IF YOU ARE READING THIS SCRIPT JUST TO COLLECT IDEAS FOR YOUR OWN LOADER,
  8 | #      VISIT THE END OF THE FILE WHERE THE REAL WORK IS DONE
  9 | #
 10 | # Global variables.
 11 | #
 12 | LANG=C
 13 | LC_ALL=C
 14 | VERSION="0.0.3"
 15 | VERBOSE="no"
 16 | DEBUG="no"
 17 | MYNAME=$(basename $0)
 18 | 
 19 | #No mysql stuff by default. This is set by mandatory args.
 20 | unset MYSQL_USER
 21 | unset MYSQL_PASSWORD
 22 | #Default mysql data directory
 23 | MYSQL_DATA_DIR=/var/lib/mysql
 24 | #DEFAULT MYSQL START STOP COMMAND
 25 | MYSQL_STOP_COMMAND="/etc/init.d/mysql stop"
 26 | MYSQL_START_COMMAND="/etc/init.d/mysql start"
 27 | #Dry run: do not touch the database
 28 | DRY_RUN="No"
 29 | #Importing generic utilities
 30 | 
 31 | source load_mysql_utils.sh
 32 | 
 33 | function printHelpAndExit()
 34 | {
 35 |     echo "Usage: $MYNAME [OPTION]..."
 36 |     echo "$MYNAME -- loads data for given tlds"
 37 |     echo "from a binary mysql dump "
 38 |     echo " into a table in a mysql database."
 39 |     echo ""
 40 | echo " -h, --help                  Print this help and exit."
 41 | echo " -v, --version               Print version information and exit."
 42 | echo " --verbose                   Print more messages. Recommended."
 43 | echo " --debug                     Print extensive debug messages"
 44 | echo " --dry-run                   Dry run: not touching the db, just verifying and extracting data"
 45 | echo " --mysql-user=USERNAME       User name to login to the mysql database (optional)."
 46 | echo " --mysql-password=PASSWORD   Password to login to the data source (optional)."
 47 | echo " --mysql-data-dir=DIRECTORY   The directory where mysql stores its data. "
 48 | echo "                              default: /var/lib/myqsl"
 49 | echo "                              You should have write permission on it"
 50 | echo " --import-data-dir=DIRECTORY The dump files for the tld-s are in this directory."
 51 | echo " --schema-file=SCHEMAFILE    The schema file to be used when loading."
 52 | echo "                              Defaults to the file load_binary_whois_dumps.sh"
 53 | echo "                              in the same directory where the script is."
 54 | echo " --tlds                       Comma-separated list of tlds to load."
 55 | echo " --db-version=STRING          The version to load download. Required for --tld Format: vNN, e.g. v19"
 56 | echo ""
 57 |     echo "Consult the supplied REDAME.txt for a detailed description."
 58 |     echo ""
 59 |     exit 1
 60 | }
 61 | 
 62 | ARGS=$(\
 63 |     getopt -o hv \
 64 |         -l "help,verbose,debug,version,v,dry-run,mysql-user:,mysql-password:,\
 65 | mysql-start-command:,mysql-stop-command:,import-data-dir:,schema-file:,tlds:,db-version:" \
 66 |         -- "$@")
 67 | 
 68 | if [ $? -ne 0 ]; then
 69 |     exit 6
 70 | fi
 71 | 
 72 | eval set -- "$ARGS"
 73 | 
 74 | while true; do
 75 |     case "$1" in
 76 |         -h|--help)
 77 |             shift
 78 |             printHelpAndExit
 79 |             ;;
 80 | 
 81 |         --verbose)
 82 |             shift
 83 |             VERBOSE="true"
 84 |             ;;
 85 | 
 86 | 	--debug)
 87 |             shift
 88 |             DEBUG="yes"
 89 | 	    MYSQL_VERB_ARG="--verbose"
 90 |             ;;
 91 | 
 92 | 	--dry-run)
 93 |             shift
 94 |             DRY_RUN="Yes"
 95 |             ;;
 96 | 
 97 |         -v|--version)
 98 |             shift
 99 |             printVersionAndExit
100 |             ;;
101 |         
102 |         --mysql-user)
103 |             shift
104 |             db_username=$1
105 |             shift
106 |             ;;
107 | 
108 |         --mysql-password)
109 |             shift
110 | 	    export MYSQL_PWD=$1
111 |             shift
112 |             ;;
113 | 
114 | 	--mysql-start-command)
115 |             shift
116 | 	    export MYSQL_START_COMMAND=$1
117 |             shift
118 |             ;;
119 | 
120 | 	--mysql-stop-command)
121 |             shift
122 | 	    export MYSQL_STOP_COMMAND=$1
123 |             shift
124 |             ;;
125 | 
126 | 	--mysql-data-dir)
127 |             shift
128 | 	    MYSQL_DATA_DIR=$1
129 |             if ! [ -d "$MYSQL_DATA_DIR" ]; then
130 |                 printError "The specified mysql data directory does not exist."
131 |                 exit 1
132 |             fi
133 |             shift
134 |             ;;
135 | 	
136 | 	--tlds)
137 |             shift
138 |             TLDS=$1
139 |             shift
140 |             ;;
141 | 
142 | 	--import-data-dir)
143 |             shift
144 | 	    IMPORT_DATA_DIR=$1
145 |             if ! [ -d "$IMPORT_DATA_DIR" ]; then
146 |                 printError "The specified data directory does not exist."
147 |                 exit 1
148 |             fi
149 |             shift
150 |             ;;
151 | 
152 |         --schema-file)
153 |             shift
154 | 	    export SCHEMA_FILE=$1
155 |             shift
156 |             ;;
157 | 
158 | 	--db-version)
159 | 	    shift
160 | 	    #format check
161 | 	    if echo $1 | grep --quiet -e "v[0-9]*"; then
162 | 		DATABASE_VERSION=$1
163 | 	    else
164 | 		printError "Invalid db-version specification. It should be like v19 or v6"
165 | 		exit 1
166 | 	    fi
167 |             shift
168 |             ;;
169 | 
170 |         --)
171 |             shift
172 |             break
173 |             ;;
174 | 
175 |         *) 
176 |             ;;
177 |     esac
178 | done
179 | #preliminary checks
180 | #Check if we can write mysql's data directory
181 | if ! [ -w "$MYSQL_DATA_DIR" ] && [ "$DRY_RUN" == "No" ];then
182 |     printError "You cannot write the mysql data directory. Perhaps you should run this script as root."
183 |     exit 1
184 | fi
185 | 
186 | #We need the database version
187 | if [ -z "$DATABASE_VERSION" ];then
188 |     printError "Please specify --db-version, e.g. --db-version = v20."
189 |     printError "See also the output of "
190 |     printError "$MYNAME --help"
191 |     exit 1
192 | fi
193 | 
194 | #Set up mysql login credentials if needed
195 | 
196 | if [ -n "$db_username" ]; then
197 |     MYSQL_ARGUMENTS="--user=$db_username"
198 | fi
199 | if [ -n "$MYSQL_PWD" ]; then
200 |     MYSQL_ARGUMENTS="$MYSQL_ARGUMENTS --password=$MYSQL_PWD"
201 | fi
202 | printDebug "Mysql arguments:   $MYSQL_ARGUMENTS"
203 | 
204 | 
205 | if [ $DRY_RUN == "Yes" ];then
206 |     if [ $DEBUG == "yes" -o $VERBOSE == "true" ];then
207 | 	mysql_here="echo MySQL would do mysql"
208 |     else
209 | 	mysql_here="true"
210 |     fi
211 | else
212 |     mysql_here="mysql $MYSQL_ARGUMENTS $MYSQL_VERB_ARG"
213 | fi
214 | printDebug "Mysql: $mysql_here"
215 | 
216 | #Check for the schema file
217 | 
218 | if [ -z $SCHEMA_FILE ];then
219 |     SCHEMA_FILE="whoiscrawler_mysql_schema.sql"
220 | fi;
221 | 
222 | if ! [ -f $SCHEMA_FILE ];then
223 |     printError "The scema file $SCHEMA_FILE is not found"
224 |     exit 1
225 | else
226 |     printVerbose "Schema file: $SCHEMA_FILE"
227 | fi
228 | 
229 | #Parse the list of tlds
230 | TLDS=$(echo $TLDS | sed -e s/,/\ /g | sed -e "s/\\./_/g")
231 | printDebug "TLDS: $TLDS"
232 | 
233 | FILELIST="contact.ibd registry_data.ibd whois_record.ibd domain_names_whoisdatacollector.ibd"
234 | 
235 | for TLD in $TLDS;do
236 |     printVerbose "Checking data for tld: $TLD"
237 |     TLDDOT=$(echo $TLD | tr _ .)
238 |     if [ -f $IMPORT_DATA_DIR/$TLDDOT.7z ];then
239 |        printVerbose "Compressed data found for domain $TLD, uncompressing"
240 |        wd=$(pwd)
241 |        cd $IMPORT_DATA_DIR
242 |        p7zip -d $TLDDOT.7z
243 |        cd $wd
244 |     fi
245 |     #For gtlds and cctlds we have different naming conventions
246 |     if [ -d "$IMPORT_DATA_DIR"/whoiscrawler_"$DATABASE_VERSION"_"$TLD" ];then
247 | 	TLDDIR="$IMPORT_DATA_DIR"/whoiscrawler_"$DATABASE_VERSION"_"$TLD"
248 |     else
249 | 	TLDDIR="$IMPORT_DATA_DIR"/domains_whoiscrawler_"$DATABASE_VERSION"_"$TLD"
250 |     fi
251 |     printDebug "TLD subdirectory:" $TLDDIR
252 |     
253 |     printVerbose "Checking files in $TLDDIR"
254 |     for FILE in $FILELIST;do
255 | 	printDebug "Checking $TLDDIR/$FILE"
256 | 	if ! [ -f "$TLDDIR/$FILE" ];then
257 | 	    printError "File $TLDDIR/$FILE is missing."
258 | 	    exit 1
259 | 	fi
260 |     done
261 |     printVerbose "Files for $TLD are found, OK."
262 | done
263 | 
264 | #At this point we have all the files and all the information to load the database
265 | #so we can do the
266 | #REAL JOB:
267 | TABLES="contact domain_names_whoisdatacollector registry_data whois_record"
268 | G_START_TIME=$(date +%s)
269 | 
270 | for TLD in $TLDS;do
271 |     printVerbose "Loading data for tld: $TLD"
272 |     #For gtlds and cctlds we have different naming conventions
273 |     if [ -d "$IMPORT_DATA_DIR"/whoiscrawler_"$DATABASE_VERSION"_"$TLD" ];then
274 | 	TLDDIR="$IMPORT_DATA_DIR"/whoiscrawler_"$DATABASE_VERSION"_"$TLD"
275 | 	DB=whoiscrawler_"$DATABASE_VERSION"_"$TLD"
276 |     else
277 | 	TLDDIR="$IMPORT_DATA_DIR"/domains_whoiscrawler_"$DATABASE_VERSION"_"$TLD"
278 | 	DB=domains_whoiscrawler_"$DATABASE_VERSION"_"$TLD"
279 |     fi
280 |     printVerbose "Creating database $DB."
281 |     $mysql_here -e "CREATE DATABASE $DB"
282 |     printVerbose "Loading schema."
283 |     $mysql_here "$DB" < "$SCHEMA_FILE"
284 | 
285 |     printVerbose "importing tablespaces"
286 |     G_START_TIME=$(date +%s)
287 |     for table in $TABLES; do
288 | 	START_TIME=$(date +%s)
289 | 	q="set FOREIGN_KEY_CHECKS=0;ALTER TABLE $DB.$table DISCARD TABLESPACE;"
290 | 	printDebug "$q"
291 | 	$mysql_here -e "$q"
292 | 	file="$table.ibd"
293 | 	printVerbose "Copying table file $file from $TLDDIR to $MYSQL_DATA_DIR/$DB"
294 | 	if [ $DRY_RUN == "No" ];then
295 | 	    printVerbose "Stopping MySQL server before copying."
296 | 	    $MYSQL_STOP_COMMAND
297 | 	    cp  "$TLDDIR/$file" "$MYSQL_DATA_DIR/$DB/."
298 | 	    chown -R mysql:mysql "$MYSQL_DATA_DIR/$DB"
299 | 	    printVerbose "Starting MySQL again."
300 | 	    $MYSQL_START_COMMAND
301 | 	fi
302 | 	printVerbose "Importing tablesapce"
303 | 	q="ALTER TABLE $DB.$table IMPORT TABLESPACE"
304 | 	printDebug "$q"
305 | 	$mysql_here -e "$q"
306 | 		
307 | 	END_TIME=$(date +%s)
308 | 	DUR=$((END_TIME-START_TIME))
309 | 	printVerbose "Import of the table $table for the tld $TLD took $DUR seconds"
310 |     done
311 | done
312 | 
313 | G_END_TIME=$(date +%s)
314 | GDUR=$((G_END_TIME-G_START_TIME))
315 | printVerbose "$MYNAME has finished in $GDUR seconds."
316 | 
317 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/SPECIFICATIONS.txt:
--------------------------------------------------------------------------------
  1 | SPECIFICATIONS.txt for
  2 | 
  3 | download_whois_data.py
  4 | 
  5 | Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  6 | -------------------------------------------------------------------
  7 | 
  8 | This document is intended for developers and advanced users.
  9 | 
 10 | It provides  a declarative specification.  The  requirements which the
 11 | script is supposed to meet by design are described. The business logic
 12 | of the download  process is outlined.  The return codes  of the script
 13 | are listed and the possible causes are defined.
 14 | 
 15 | For a brief guide on how to use the program consult "README.txt".
 16 | 
 17 | Contents:
 18 | ---------
 19 | 
 20 | 1. Requirements met by the downloader script
 21 | 
 22 | 2. The downloading process
 23 | 
 24 | 3. Exit codes
 25 | 
 26 | 1. Requirements met by the downloader script
 27 | --------------------------------------------
 28 | 
 29 | The script  is designed to be  a simple utility to  support http based
 30 | downloading of  data from the  feeds provided by WhoisXML API, Inc.
 31 | 
 32 | It is designed to meet the following requirements:
 33 | 
 34 | -The script is  cross-platform. It is supposed to run  on any platform
 35 |  on  which Python  >= 3.6.x  (or the  legacy Python  >=2.7.x) and  the
 36 |  dependencies of the script are available.
 37 | 
 38 | -All its functions are  available from command-line using command-line
 39 |  arguments.
 40 | 
 41 | -It is  possible to operate  the script with  a series of  GUI dialogs
 42 |  instead of command-line parameters.
 43 | 
 44 | -It is  subscription-independent: all possible data  feeds and formats
 45 |  are offered, regardless of the type  of the subscription used for the
 46 |  authentication. The script does not verify permissions, it reports an
 47 |  error if the access is denied to the given resource.
 48 | 
 49 | -It supports plain http access with simple http authentication as well
 50 |  as https access with ssl key-based authentication.
 51 | 
 52 | -The  available  data  are  specified in  a  config  file  (feeds.ini)
 53 |  provided with  the scripts.  The  feeds.ini file  is the part  of the
 54 |  distribution, the end-users are not supposed to modify it.
 55 | 
 56 | -The script determines a list of files to be downloaded offline, based
 57 |  on  the   feeds'  configuration  and  the   parameters  provided  (in
 58 |  command-line arguments  or in the  dialog utility) before  the actual
 59 |  download process.
 60 | 
 61 | -The files in the target directory  are arranged in the same directory
 62 |  structure as on the server.
 63 | 
 64 | -The  script reports  the list  of files  which were  not possible  to
 65 |  download at the end of its  operation. This is not necessary an error
 66 |  as it can be normal as the predetermined list may contain files which
 67 |  only exist under certain circumstances.
 68 | 
 69 | -The script does not verify  the dates and quarterly database versions
 70 |  specified.   A  wrong  specification  results in  error  messages  or
 71 |  reports on files which were not possible to download.
 72 | 
 73 | -It downloads md5 checksums before each file whenever available. Files
 74 |  which already exist  in the target directory are  newly downloaded if
 75 |  and  only  if  their  checksum  differs  from  the  checksum  on  the
 76 |  server. In this way  it can be used to verify  or synchronize a local
 77 |  file set.
 78 | 
 79 | -The downloading  of already  existing (partial)  files is  resumed by
 80 |  default. This  can be overridden  by the --no-resume option:  in this
 81 |  case, existent files not matching their md5 sums are downloaded again
 82 |  from scratch.
 83 | 
 84 | 
 85 | 2. The download process
 86 | -----------------------
 87 | 
 88 | The script follows a streamline procedure for downloading all needed
 89 | data. It is outlined in this section.
 90 | 
 91 | Phase I: Preparation
 92 | 
 93 | The target directory is not modified in this phase.
 94 | 
 95 | The parameters  provided by the user  are read, either by  parsing the
 96 | command-line  arguments  or  through  a  sequence  of  dialog  windows
 97 | depending  on the  mode of  operation. During  and after  this process
 98 | there are  some consistency checks. Upon  the failure of any  of these
 99 | checks the script  does terminates with an error: exits  with an error
100 | message and an error code, see Section 3 for the list of error codes.
101 | 
102 | In command-line mode the following  procedure occurs after parsing and
103 | checking the arguments.  (In interactive mode these steps  are part of
104 | the verification process during the interaction with the user.)
105 | 
106 | Based  on  this information  the  feed  downloader components  of  the
107 | scripts  are  initialized.   (These   are  objects  belonging  to  the
108 | "WhoisDataFeed"  class).  
109 | 
110 | The supplied login credentials are  verified by downloading the access
111 | test file  of each of  the feeds (specified as  the "access_test_file"
112 | attribute in  feeds.ini.  Failure  of any of  these checks  results in
113 | termination with an error.
114 | 
115 | The list of available tlds for  each feed is determined by downloading
116 | the files specifying  the actual list of supported  tlds (specified in
117 | the "supported_tlds_url" in feeds.ini). In  case of daily feeds, it is
118 | also possible  to determine the  list based on  the list of  TLDs that
119 | contain changes on the given day. The --only-changed option results in
120 | this behavior.  In this case,  the list  in the "alt_tlds_url"  of the
121 | feeds.ini  file  will  be  used.  When trying  to  download  with  the
122 | --only-changed  option   from  a  feed  that   has  no  "alt_tlds_url"
123 | specified, an error will occur. If  the list of supported tlds is date
124 | dependent, the supported tlds list is considered to be the superset of
125 | all of these. The tlds for which the download will be carried out will
126 | be the intersection of this set with the set specified by the user.
127 | 
128 | By  the end  of this  phase  the list  of  files to  be downloaded  is
129 | determined.
130 | 
131 | Phase II: downloading the files
132 | 
133 | The script loops through the list of files to be downloaded.
134 | 
135 | For each file, the following logic is followed:
136 | 
137 | 0. In case of daily feeds for which a mechanism indicating whether the
138 | data of  the feed  on the given  day in the  given format  exists, the
139 | script checks if the data are  complete. If the data are incomplete, a
140 | warning  will be  given. If  the  --no-premature option  is used,  the
141 | download of the data  in the given format from the  given feed and the
142 | given day will be skipped.
143 | 
144 | 1. The  md5 checksum of the  file is downloaded from  the server (Most
145 | feeds support  it, this step is  skipped if the feed  does not support
146 | md5 checksums.)
147 | 
148 | 2.  The  consistency of the local  file with the same  md5 checksum is
149 | verified. If the  file is there and consistent with  its md5 checksum,
150 | the downloading of the file is  considered as complete and the process
151 | is finished for this file.
152 | 
153 | 3. If the file  does not exist, there is no md5 file  or the md5 check
154 | fails, it  is verified that the  number of download attempts  for this
155 | file does  not exceed  3 (or  the number  specified in  the --maxtries
156 | option). If it is exceeded, the file considered as unavailable and the
157 | process is finished for the file.
158 | 
159 | 4. The downloading of the file is initiated after the download process
160 | (regardless of its  success), the process is repeated from  step 1 for
161 | the file. By  default the downloading of an  existent, possibly broken
162 | download is resumed.
163 | 
164 | Phase III: report and exit.
165 | 
166 | In verbose  mode a list of  files which were unavailable  is reported.
167 | The script  terminates with an exit  code 0 if all  files were checked
168 | and found O.K.  or have been downloaded if necessary.  A return code 2
169 | is generated if there were unavailable files.
170 | 
171 | 3. Exit codes
172 | -------------
173 | 
174 | 0: Normal termination.
175 | 
176 | 1: Abnormal termination.
177 |    This is the error code given upon most errors.
178 |    - No feed is specified for downloading
179 |    - The feed specified for downloading does not exist (Invalid feed.)
180 |    - No data format is specified for downloading.
181 |    - The chosen feed does not support the chosen data format.
182 |    - The database version of a quarterly feed is not specified or
183 |      it is not of the expected form
184 |      (character "v" followed by a number, e.g. v20)
185 |    - The start date for downloading from a daily feed is not specified or it is
186 |      not in the format "YYYYMMDD"
187 |    - The start end for downloading from a daily is given but
188 |      not in the format "YYYYMMDD"
189 |    - The end date of the interval for daily feed downloading is earlier than
190 |      the start date.
191 |    - Login failed due to bad login name or password, or bad SSL credentials
192 |    - The specified quarterly database does not exist
193 |      	 (results in a "Login failed message")
194 |    - The list of tlds to be downloaded is not specified.
195 |    - No tlds specified for downloading are supported by the feed.
196 |    - The output directory is not specified or does not exist.
197 |    - Invalid feed configuration file, one or more feeds are ill-defined.
198 |    - Password auth chosen, no password given and ~/.whoisxmlapi_login.ini does
199 |      not exist.
200 |    - The SSL credentials for authentication are invalid.
201 |    - The download session is not open when downloading
202 |      (internal issue, should not occur). 
203 |    - Database version specified for a daily feed
204 |      (internal issue, should not occur).
205 |    - Time interval specified for a quarterly feed
206 |      (internal issue, should not occur).
207 |    - The supported_tlds file for the feed cannot be downloaded.
208 | 
209 | 2: Normal termination but some files which can or should be there according
210 |    to the specification were not there.
211 |    (Possible causes: no file on the server because not yet generated,
212 |     no file on the server because there was no change in the tld on
213 |     the given day, etc.)
214 | 
215 | 3: Premature daily data. Some daily data in some format for some days
216 |    were not yet finalized on the server.
217 | 
218 | 6: Informational or canceled action.
219 |    Possible causes are:
220 |    - In interactive mode, "Cancel" was pressed in a dialog window.
221 |    - The program was invoked with the --list-feeds option
222 |    - The program was invoked with the --list-dataformats option
223 |    - The program was invoked with the --list-tlds option
224 | 
225 | 
226 | 


--------------------------------------------------------------------------------
/whoisxmlapi_download_whois_data/whois_utils/whois_web_download_utils.py:
--------------------------------------------------------------------------------
  1 | # Web download module of Whois API LLC end user scripts
  2 | #
  3 | # Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  4 | #
  5 | 
  6 | from __future__ import print_function
  7 | 
  8 | try:
  9 |     from urllib.parse import urlparse
 10 | except ImportError:
 11 |      from urlparse import urlparse
 12 | try:
 13 |     from HTMLParser import HTMLParser
 14 | except ImportError:
 15 |      from html.parser import HTMLParser
 16 |     
 17 | import requests
 18 | import os, hashlib, re
 19 | import datetime
 20 | import time
 21 | 
 22 | import whois_utils.whois_user_interaction as whois_user_interaction
 23 | from whois_utils.whois_user_interaction import *
 24 | whois_user_interaction.VERBOSE = True
 25 | whois_user_interaction.DEBUG = True
 26 | 
 27 | class Indexparser(HTMLParser):
 28 |     """This parser parses an autoindexed directory and finds files
 29 |     which are supposed to be the <a href > attributes not having a slash in them"""
 30 |     FileList = []
 31 |     def reset_filelist(self):
 32 |         self.FileList = []
 33 |     def handle_starttag(self, tag, attrs):
 34 |         if tag == 'a':
 35 |             for attr in attrs:
 36 |                 if not re.search(r'/', attr[1]):
 37 |                     self.FileList.append(attr[1])
 38 | 
 39 | Index_Parser = Indexparser()
 40 | 
 41 | def md5_check( path_filename, md5_file_path ):
 42 |     """ Determines if the md5 checksum checks out
 43 | 
 44 |     Return:
 45 |         Returns a true if the checksum is correct,
 46 |         false if it is wrong or either the file or the checksum does not exist.
 47 |     """
 48 |     try:
 49 |         calc_check_sum = calc_md5( path_filename )
 50 |         with open( md5_file_path ) as md5_file:
 51 |             correct_check_sum = md5_file.readline().split()[0].strip()
 52 |             if( calc_check_sum == correct_check_sum ):
 53 |                 print_verbose("MD5 check passed for %s"%path_filename)
 54 |                 return True
 55 |             print_verbose("MD5 check failed for %s"%path_filename)
 56 |             return False
 57 |     except Exception as e:
 58 |         print_verbose("Exception in MD5 check for %s:\n%s"%(path_filename,str(e)))
 59 |         return False
 60 | 
 61 | def calc_md5( path_filename ):
 62 |     """ Calculates the md5 of a file
 63 |     
 64 |     Return:
 65 |         Returns the hex digits in string form representing md5 of file
 66 |     """
 67 |     hash_md5 = hashlib.md5()
 68 |     with open( path_filename , "rb") as f:
 69 |         for chunk in iter(lambda: f.read(4096), b""):
 70 |             hash_md5.update(chunk)
 71 |     return hash_md5.hexdigest()
 72 | 
 73 | 
 74 | def web_download_and_check_file(url, md5url, session, output_dir, maxtries, no_resume):
 75 |     """Given a session, downloads the file and its md5. If it fails according to the md5, retries maxtries times"""
 76 |     filename = os.path.basename(urlparse(url).path)
 77 |     filename = os.path.abspath(os.path.join(output_dir, filename))
 78 |     if md5url != None:
 79 |         md5filename = os.path.basename(urlparse(md5url).path)
 80 |         md5filename = os.path.abspath(os.path.join(output_dir, md5filename))
 81 |     else:
 82 |         md5filname=None
 83 |         
 84 |     gotit = False
 85 |     force = False
 86 |     giveup = False
 87 |     resume = not no_resume
 88 |     ntries = 0
 89 |     while not gotit and not giveup and ntries < maxtries:
 90 |         print_verbose('Verified download of %s: attempt #%d' % (url, ntries+1))
 91 |         gotfile = web_download_file(url, session, output_dir, maxtries, force, resume=resume)
 92 |         if md5url != None:
 93 |             gotmd5 = web_download_file(md5url, session, output_dir, maxtries, True, resume=False)
 94 |         else:
 95 |             gotmd5 = False
 96 |         if gotfile and gotmd5:
 97 |             gotit = md5_check(filename, md5filename)
 98 |             if not gotit:
 99 |                 print_verbose('Verified download: attempt #%d failed, md5 does not match. Redownloading.' % (ntries+1))
100 |         elif gotfile and not gotmd5:
101 |             print_verbose('File downloaded but no md5 sum. Unverified. This can be normal.')
102 |             gotit = True
103 |         else:
104 |             print_verbose('File not found, it may not exist on the server.')
105 |             gotit = False
106 |             giveup = True
107 |         ntries += 1
108 |         #second time we redownload_anyway
109 |         force = True
110 |     return gotit
111 |         
112 |         
113 | def web_download_file(url, session, output_dir, maxtries, force, resume=True):
114 |     """Given a session, downloads the file into the directory. Creates the directory if it does not exists.
115 |     if force, downloads also if it does not exist"""
116 | 
117 |     filename = os.path.basename(urlparse(url).path)
118 |     filename = os.path.abspath(os.path.join(output_dir, filename))
119 |     print_debug('File to download: %s' % (filename))
120 |     # Make dir to output files to if it 
121 |     if not os.path.exists(output_dir):
122 |         os.makedirs(output_dir)
123 | 
124 |     url_print = os.path.basename(url)
125 |     # Redownload file, if problem occurs with network
126 |     ntries = 0
127 |     if os.path.isfile(filename) and (not force) and (not resume):
128 |         print_verbose('File %s exists.' % (filename))
129 |         return(True)
130 |     else:
131 |         while ntries < maxtries:
132 |             print_debug('Try #%d' % (ntries + 1))
133 |             resume_header = None
134 |             already_have = 0
135 |             if resume:
136 |                 file_open_mode='ab'
137 |                 try:
138 |                     already_have = os.path.getsize(filename)
139 |                     print_verbose("Already have: %d bytes of the file, trying to resume." % already_have)
140 |                 except:
141 |                     print_verbose("No partial file to resume.")
142 |                 resume_header = {'Range': 'bytes=%d-' % already_have}
143 |             else:
144 |                 file_open_mode='wb'
145 |             try:
146 |                 r = session.get(url, stream=True, timeout=30, headers=resume_header)
147 |                 print_debug('Status code: %s' % r.status_code)
148 |                 if r.status_code in set([200, 206]):
149 |                     with open(filename, file_open_mode) as out:
150 |                         if( 'content-length' in (r.headers) ):
151 |                             dl_total_length = int(r.headers.get('content-length')) + already_have
152 |                             print_debug('Total length: %s' % (str(dl_total_length)))
153 |                         dl_size=already_have
154 |                         dl_start_chunk = datetime.datetime.now()
155 | 
156 |                         sys.stdout.write("\r                                                    ")
157 |                         sys.stdout.flush()
158 |                         for chunk in r.iter_content(chunk_size=(1024*1024)):
159 |                             out.write(chunk)
160 |                             dl_end_chunk = datetime.datetime.now()
161 |                             dl_size += len(chunk)
162 |                             #if dl_start_chunk != 0 and 'content-length' in (r.headers):
163 |                             if 'content-length' in (r.headers):
164 |                                 # dl_done = int(100 * dl_size / dl_total_length)
165 |                                 dl_done = float(dl_size) / dl_total_length
166 |                                 dl_dtdelta = ( dl_end_chunk - dl_start_chunk ).microseconds
167 |                                 # sys.stdout.write("\r%s %s" % (dl_done, str( 1024 * 60 / dl_dtdelta) ))
168 |                                 # sys.stdout.write("\r{0:.2%} {1}".format(dl_done, str( 1024 * 60 / dl_dtdelta) ))
169 |                                 sys.stdout.write("\r{0} Progress: {1:.2%}".format(url_print, dl_done))
170 |                                 sys.stdout.flush()
171 |                             dl_start_chunk = datetime.datetime.now()
172 |                             # sys.stdout.write("\rFile has been downloaded successfully".format(1))
173 |                             
174 |                             # Clears line
175 |                     sys.stdout.write("\r{0} [OK]                          ".format(url_print))
176 |                     sys.stdout.flush()
177 |                                     # print "File has been downloaded successfully."
178 |                 elif r.status_code == 416:
179 |                     print("File of correct size already there")
180 |                 elif r.status_code == 401:
181 |                     print("HTTP %s Unauthorized. Login credentials are wrong." % r.status_code)
182 |                     return False
183 |                 elif r.status_code == 404:
184 |                     print("HTTP %s does not exist." % (url_print))
185 |                     ntries = maxtries + 1
186 |                     return False
187 |                 else:
188 |                     sys.stdout.write("\r%s [Failed] Status code: %s \n" % (str(url_print), str(r.status_code)))
189 |                     sys.stdout.flush()
190 |                     return False
191 |                     # print "Error HTTP %s File Not Found" % r.status_code
192 |             except requests.exceptions.Timeout or requests.exceptions.ConnectionError:
193 |                 sys.stdout.write("\rNetwork timed out. Attempting redownload or resume.")
194 |                 sys.stdout.flush()
195 |                 time.sleep(4)
196 |                 ntries += 1
197 |                 continue
198 |             except requests.exceptions.ConnectionError or requests.exceptions.ChunkedEncodingError:
199 |                 sys.stdout.write("\rNetwork timed out. Attempting redownload or resume..")
200 |                 sys.stdout.flush()
201 |                 time.sleep(4)
202 |                 ntries += 1
203 |                 continue
204 |             except requests.exceptions.ChunkedEncodingError:
205 |                 sys.stdout.write("\rChunked Encoding Error. Redownloading or resuming")
206 |                 sys.stdout.flush()
207 |                 time.sleep(4)
208 |                 ntries += 1
209 |                 continue
210 |             sys.stdout.write('\n')
211 |             sys.stdout.flush()
212 |             return(True)
213 | 
214 | def webdir_ls(url, session):
215 |     """given the session and the URL, return the list of all files in the direcotry
216 |     as a list of filenames to be appended to the URL
217 |     The URL MUST point to an autoindexed directory (not verified by the function)
218 |     An empty list is returned if something goes wrong.
219 |     """
220 |     rawdirlist = session.get(url, stream=True, timeout=30)
221 |     print_debug("Getting url: %s" % (url))
222 |     print_debug("Result:" + str(rawdirlist))
223 |     if rawdirlist.status_code == 200:
224 |         Index_Parser.reset_filelist()
225 |         Index_Parser.feed(rawdirlist.text)
226 |         return(Index_Parser.FileList)
227 |     else:
228 |         return([])
229 | 


--------------------------------------------------------------------------------
/whoisxmlapi_mysqldump_loaders/load_mysql_data_per_tables.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | #Sample script to load ASCII mysql dumps for a tld
  4 | #This loads the schema first, then load each table’s data separately.
  5 | #Recommened for a large database such as .com
  6 | #Copyright (c) 2010-2021 Whois API LLC,  http://www.whoisxmlapi.com
  7 | #
  8 | #Note: IF YOU ARE READING THIS SCRIPT JUST TO COLLECT IDEAS FOR YOUR OWN LOADER,
  9 | #      VISIT THE END OF THE FILE WHERE THE REAL WORK IS DONE
 10 | #
 11 | # Global variables.
 12 | #
 13 | LANG=C
 14 | LC_ALL=C
 15 | VERSION="0.0.3"
 16 | VERBOSE="no"
 17 | DEBUG="no"
 18 | SHOWPROGRESS="no"
 19 | MYNAME=$(basename $0)
 20 | CATCOMMAND="cat"
 21 | 
 22 | #No mysql stuff by default. This is set by mandatory args.
 23 | unset MYSQL_USER
 24 | unset MYSQL_PASSWORD
 25 | unset MYSQL_DATABASE
 26 | 
 27 | #Importing generic utilities
 28 | 
 29 | source load_mysql_utils.sh
 30 | 
 31 | function printHelpAndExit()
 32 | {
 33 |     echo "Usage: $MYNAME [OPTION]..."
 34 |     echo "$MYNAME -- loads data for a given tld"
 35 |     echo "from a schema file and separate table files "
 36 |     echo " into a table in a mysql database."
 37 |     echo ""
 38 | echo " -h, --help                  Print this help and exit."
 39 | echo " -v, --version               Print version information and exit."
 40 | echo " --verbose                   Print more messages."
 41 | echo " --show-progress             Display progress bars when loading data from dumps."
 42 | echo "                             Recommended, especially for large domains."
 43 | echo " --mysql-user=USERNAME       User name to login to the mysql database (optional)."
 44 | echo " --mysql-password=PASSWORD   Password to login to the data source (optional)."
 45 | echo " --mysql-database=DATABASE   The name of the mysql database to load data into. "
 46 | echo "                               This database is created by the script by default, "
 47 | echo "                               so should not exist, or use --no-create-db"
 48 | echo " --schema-file=SCHEMAFILE    The schema file to be used when loading. "
 49 | echo "                               IMPORTANT: should be schema only, should not contain data."
 50 | echo "                               Not to be used with --tld"
 51 | echo " --schema-only                 If specified, the table files are not loaded."
 52 | echo " --data-only                   If specified, only tables data are loaded into an existing database "
 53 | echo "                                   with already loaded schema"
 54 | echo " --no-create-db           Does not create the database newly, supposes that it exists already"
 55 | echo " --table-files-directory=TABLEFILESDIR  The directory where the dumps of all tables reside."
 56 | echo "                               This contains the files with the actual data."
 57 | echo "                               Not to be used with --tld"
 58 | echo " --tld=TLD                    load data for this tld"
 59 | echo " --schema-files-dir=DIRECTORY   The schema files for the tld-s are in this directory. The table files have to be in its subdirectory named 'tables'. Only for --tld"
 60 | echo " --db-version=STRING          The version to load download. Required for --tld Format: vNN, e.g. v19"
 61 |     echo ""
 62 |     echo "Examples:"
 63 |     echo ""
 64 |     echo "  -loading sample data downloaded into a directory mysqldump_sample from "
 65 |     echo "          http://domainwhoisdatabase.com/whois_database/sample/gtlds/v20/mysqldump_sample/aaa"
 66 |     echo ""
 67 |     echo "$MYNAME --mysql-database=sample_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --schema-files-dir=mysqldump_sample --db-version=v20 --tld=aaa --verbose --show-progress"
 68 |     echo ""
 69 |     echo "      or the same task quietly, specifying the file names and paths directly:"
 70 |     echo ""
 71 |     echo "$MYNAME --schema-file=mysqldump_sample/aaa/whoiscrawler_v20_aaa_mysql_schema.sql.gz --table-files-directory=mysqldump_sample/aaa/tables --mysql-database=sample_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword"
 72 |     echo ""
 73 |     echo "   -loading production data downloaded into a directory database_dump/mysqldump/aaa from"
 74 |     echo "          http://www.domainwhoisdatabase.com/whois_database/v20/database_dump/mysqldump/aaa"
 75 |     echo ""
 76 |     echo "$MYNAME  --mysql-database=production_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --schema-files-dir=database_dump/mysqldump --tld=aaa --db-version=v20 --verbose --show-progress"
 77 |     echo ""
 78 |     echo "      or the same task specifying the file names and paths directly:"
 79 |     echo ""
 80 |     echo "$MYNAME --schema-file=database_dump/mysqldump/aaa/whoiscrawler_v20_aaa_mysql_schema.sql.gz --table-files-directory=database_dump/mysqldump/aaa/tables --mysql-database=prod_db_aaa --mysql-user=whoisuser --mysql-password=whoispassword --verbose --show-progress"
 81 |     exit 1
 82 | }
 83 | 
 84 | ARGS=$(\
 85 |     getopt -o hv \
 86 |         -l "help,verbose,debug,show-progress,version,v,mysql-database:,mysql-user:,mysql-password:,table-files-directory:,\
 87 | schema-file:,schema-only,data-only,no-create-db,tld:,schema-files-dir:,db-version:" \
 88 |         -- "$@")
 89 | 
 90 | 
 91 | if [ $? -ne 0 ]; then
 92 |     exit 6
 93 | fi
 94 | 
 95 | eval set -- "$ARGS"
 96 | 
 97 | while true; do
 98 |     case "$1" in
 99 |         -h|--help)
100 |             shift
101 |             printHelpAndExit
102 |             ;;
103 | 
104 |         --verbose)
105 |             shift
106 |             VERBOSE="true"
107 |             ;;
108 | 
109 | 	--debug)
110 |             shift
111 |             DEBUG="yes"
112 | 	    VERBOSEARG="--verbose"
113 |             ;;
114 | 
115 | 	--show-progress)
116 |             shift
117 | 	    if which pv > /dev/null;then
118 | 		CATCOMMAND="pv"
119 | 	    else
120 | 		printError "The show-progress argument needs pv to be installed (e.g. apt-get install pv)"
121 |                 exit 1
122 | 	    fi
123 |             ;;
124 | 
125 |         -v|--version)
126 |             shift
127 |             printVersionAndExit
128 |             ;;
129 |         
130 |         --mysql-user)
131 |             shift
132 |             db_username=$1
133 |             shift
134 |             ;;
135 | 
136 |         --mysql-password)
137 |             shift
138 | 	    export MYSQL_PWD=$1
139 |             shift
140 |             ;;
141 | 
142 | 	--mysql-database)
143 |             shift
144 |             db=$1
145 |             shift
146 |             ;;
147 | 
148 |         --schema-only)
149 |             shift
150 |             SCHEMAONLY="True"
151 |             ;;
152 | 	
153 |         --data-only)
154 |             shift
155 | 	    DONTCREATEDB="True"
156 |             DATAONLY="True"
157 |             ;;
158 | 
159 | 	--no-create-db)
160 |             shift
161 |             DONTCREATEDB="True"
162 |             ;;
163 | 
164 |         --table-files-directory)
165 |             shift
166 | 	    table_files_dir=$1	    	    
167 | 	    if ! [ -d "$table_files_dir" ]; then
168 |                 printError "The directory $schema_file in which the table files should reside is not found."
169 |                 exit 1
170 |             fi
171 |             shift
172 |             ;;
173 |         
174 |         --schema-file)
175 |             shift
176 |             schema_file=$(readlink -e "$1")
177 |             if ! [ -f "$schema_file" ]; then
178 |                 printError "The specified schema file $schema_file is not found."
179 |                 exit 1
180 |             fi
181 | 	    #IF we have zgrep, we verify if the schema file is really schema-only.
182 | 	    if [ -x $(which zgrep) ]; then
183 | 		if zgrep -q "Dumping data for table" $schema_file; then
184 | 		    printError "The specified schema file $schema_file contains actual data."
185 | 		    printError "Please specify a schema-only file"
186 | 		    exit 1
187 | 		fi
188 | 	    fi
189 |             shift
190 |             ;;
191 | 
192 | 	--tld)
193 |             shift
194 |             TLD=$1
195 |             shift
196 |             ;;
197 | 
198 | 	--schema-files-dir)
199 |             shift
200 | 	    SCHEMA_FILES_DIR=$1
201 |             if ! [ -d "$SCHEMA_FILES_DIR" ]; then
202 |                 printError "The specified dump file directory does not exist."
203 |                 exit 1
204 |             fi
205 |             shift
206 |             ;;
207 | 
208 | 	--db-version)
209 | 	    shift
210 | 	    #format check
211 | 	    if echo $1 | grep --quiet -e "v[0-9]*"; then
212 | 		DATABASE_VERSION=$1
213 | 	    else
214 | 		printError "Invalid db-version specification. It should be like v19 or v6"
215 | 		exit 1
216 | 	    fi
217 |             shift
218 |             ;;
219 | 
220 |         --)
221 |             shift
222 |             break
223 |             ;;
224 | 
225 |         *) 
226 |             ;;
227 |     esac
228 | done
229 | 
230 | #some verification before doing the real job
231 | 
232 | #Set up mysql login credentials if needed
233 | if [ -n "$db_username" ]; then
234 |     MYSQL_ARGUMENTS="--user=$db_username"
235 | fi;
236 | 
237 | printDebug "Mysql arguments:   $MYSQL_ARGUMENTS"
238 | printDebug "Mysql Password:    $MYSQL_PWD"
239 | 
240 | if [ -n "$table_files_dir" -o -n "$schema_file" ] && [ -n "$TLD"  -o  -n "$SCHEMA_FILES_DIR" -o -n "$DATABASE_VERSION" ]; then
241 |     printError "Conflicting arguments. Please use either --table-files-directory + --schema-file or --tld + --schema-files-dir + --db-version."
242 |     exit 1
243 | fi
244 | 
245 | if [ -z "$db" ]; then
246 | 	printError "Mysql database not specified. See $MYNAME --help"
247 | 	exit 1
248 | fi
249 | 
250 | #If the tld is specified, we find out the schemafile name and the tables dir.
251 | if [ -z "$schema_file" ]; then
252 |     schema_file="$SCHEMA_FILES_DIR"/"$TLD"/whoiscrawler_"$TLD"_mysql_schema.sql.gz
253 |     if [ ! -f "$schema_file" ]; then
254 | 	schema_file="$SCHEMA_FILES_DIR"/"$TLD"/whoiscrawler_"$DATABASE_VERSION"_"$TLD"_mysql_schema.sql.gz
255 |     fi
256 |     #Quarterly feeds case
257 |     if [ ! -f "$schema_file" ]; then
258 | 	TLDUNDERSCORE=$(echo "$TLD" | sed -e "s/\./_/g")
259 | 	schema_file="$SCHEMA_FILES_DIR"/"$TLD"/domains_whoiscrawler_"$DATABASE_VERSION"_"$TLDUNDERSCORE"_mysql_schema.sql.gz
260 |     fi
261 |     if [ -z "$SCHEMAONLY" ]; then
262 | 	table_files_dir="$SCHEMA_FILES_DIR"/"$TLD"/tables
263 |     fi
264 | fi
265 | 
266 | printVerbose "Schema file: $schema_file"
267 | printVerbose "Tables dir: $table_files_dir"
268 | 
269 | if [ ! -f "$schema_file" ]; then
270 |     printError "Schema file not specified or does not exist. See $MYNAME --help"
271 |     exit 1
272 | fi
273 | if [ -z $SCHEMAONLY ] && [ ! -d "$table_files_dir" ]; then
274 |     printError "The directory in which the table files should reside is not specified or does not exist."
275 |     printError "See $MYNAME --help"
276 |     exit 1
277 | fi
278 | 
279 | #THE REAL WORK STARTS HERE
280 | if [ -z "$DONTCREATEDB" ]; then
281 |     printVerbose "Creating database $db"
282 |     mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} -e "create database $db"
283 |     printVerbose "Loading mysql schema"
284 | else
285 |     printVerbose "Not creating database, --no-create-db was specified."
286 | fi
287 | 
288 | if [ -z "$DATAONLY" ]; then
289 |     if [ ${schema_file: -3} == ".gz" ]; then
290 |    
291 | 	$CATCOMMAND $schema_file | gunzip -c | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db
292 |     else
293 | 	$CATCOMMAND $schema_file mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db 
294 |     fi
295 |     printVerbose "Mysql schema loaded."
296 | fi
297 | 
298 | if [ -n "$SCHEMAONLY" ]; then
299 |     printVerbose "Schema-only loading, so we are ready."
300 |     exit 0
301 | fi
302 | 
303 | tables="whois_record registry_data contact domain_names_whoisdatacollector"
304 | 
305 | printVerbose "Trying to drop some unnecessary indices to load faster."
306 | printVerbose "    They may not exists so mysql errors are normal here"
307 | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db -e "alter table whois_record drop index domain_name_index;alter table whois_record drop index domain_name;" >/dev/null 2>&1
308 | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db -e "alter table registry_data drop index domain_name_index;alter table registry_data drop index domain_name;">/dev/null 2>&1
309 | printVerbose "Unnecessary indices, if any, have been dropped."
310 | table_files_dir=$table_files_dir/*.sql.gz
311 | 
312 | printVerbose "Loading data from table files" 
313 | for file in $table_files_dir; do
314 |  
315 |     time=`date +%s`
316 |     if [ -f "$file" ]; then
317 |         time=`date +%s`
318 | 
319 |         printVerbose "loading data from file $file"
320 | 	#No verbose mysql here as the echoed output can be huge.
321 |         if [ ${file: -3} == ".gz" ]; then
322 | 	    { echo "SET autocommit = 0;" 
323 | 	 	$CATCOMMAND "$file" | gunzip -c
324 | 	 	echo "commit;" ; } | mysql ${MYSQL_ARGUMENTS} --force $db
325 | 	elif [ ${file: -4} == ".sql" ]; then
326 |             { echo "SET autocommit = 0;"
327 |                 $CATCOMMAND "$file"
328 |                 echo "commit;" ; } | mysql ${MYSQL_ARGUMENTS} --force $db    
329 | 	fi
330 | 
331 |     fi    
332 |     
333 |     time2=`date +%s`
334 |     dur=`expr $time2 - $time`
335 |     printVerbose " loading $table from file $file took $dur seconds"
336 | 
337 | done
338 | printVerbose "Creating new indices"
339 | time=`date +%s`
340 | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db -e "alter table whois_record add index domain_name_index(domain_name)"
341 | mysql ${MYSQL_ARGUMENTS} ${VERBOSEARG} $db -e "alter table registry_data add index domain_name_index(domain_name)"
342 |  time2=`date +%s`
343 |     dur=`expr $time2 - $time`
344 |     printVerbose " adding indices took $dur seconds."
345 | 
346 | 


--------------------------------------------------------------------------------