├── .gitignore ├── README.md ├── data ├── config_sample.json └── last_modified.json └── src ├── ftp_aioftp.py ├── ftp_concurrency.py ├── ftp_multiprocessing.py └── get_changed_files.py /.gitignore: -------------------------------------------------------------------------------- 1 | /data/config.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ftp_async 2 | Download/upload/check changes to all files from ftp server to a specified file path concurrently, which greatly reduces the waiting time. 3 | -------------------------------------------------------------------------------- /data/config_sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "FTP_HOST": "ftp.example_company.com", 3 | "FTP_USER": "foo", 4 | "FTP_PASS": "bar" 5 | } -------------------------------------------------------------------------------- /data/last_modified.json: -------------------------------------------------------------------------------- 1 | { 2 | "australia.txt": "2023-01-05 09:15:27", 3 | "australia.txt.md5": "2023-01-05 09:15:27", 4 | "austria.txt": "2023-01-05 09:15:28", 5 | "austria.txt.md5": "2023-01-05 09:15:28", 6 | "belgium.txt": "2023-01-05 09:15:28", 7 | "belgium.txt.md5": "2023-01-05 09:15:28", 8 | "british.txt": "2023-01-05 09:15:29", 9 | "british.txt.md5": "2023-01-05 09:15:29", 10 | "canada.txt": "2023-01-05 09:15:29", 11 | "canada.txt.md5": "2023-01-05 09:15:29", 12 | "dutch.txt": "2023-01-05 09:15:30", 13 | "dutch.txt.md5": "2023-01-05 09:15:30", 14 | "france.txt": "2023-01-05 09:15:30", 15 | "france.txt.md5": "2023-01-05 09:15:30", 16 | "germany.txt": "2023-01-05 09:15:31", 17 | "germany.txt.md5": "2023-01-05 09:15:31", 18 | "hongkong.txt": "2023-01-05 09:15:31", 19 | "hongkong.txt.md5": "2023-01-05 09:15:31", 20 | "india.txt": "2023-01-05 09:15:32", 21 | "india.txt.md5": "2023-01-05 09:15:32", 22 | "italy.txt": "2023-01-05 09:15:32", 23 | "italy.txt.md5": "2023-01-05 09:15:32", 24 | "japan.txt": "2023-01-05 09:15:33", 25 | "japan.txt.md5": "2023-01-05 09:15:33", 26 | "mexico.txt": "2023-01-05 09:15:33", 27 | "mexico.txt.md5": "2023-01-05 09:15:33", 28 | "spain.txt": "2023-01-05 09:15:34", 29 | "spain.txt.md5": "2023-01-05 09:15:34", 30 | "stockmargin_final_dtls.IB-CAN.dat": "2023-01-05 09:15:34", 31 | "stockmargin_final_dtls.IB-CAN.dat.md5": "2023-01-05 09:15:35", 32 | "stockmargin_final_dtls.IB-HK.dat": "2023-01-05 09:15:35", 33 | "stockmargin_final_dtls.IB-HK.dat.md5": "2023-01-05 09:15:35", 34 | "stockmargin_final_dtls.IB-IN.dat": "2023-01-05 09:15:36", 35 | "stockmargin_final_dtls.IB-IN.dat.md5": "2023-01-05 09:15:36", 36 | "stockmargin_final_dtls.IB-JP.dat": "2023-01-05 09:15:36", 37 | "stockmargin_final_dtls.IB-JP.dat.md5": "2023-01-05 09:15:36", 38 | "stockmargin_final_dtls.IBLLC-US.dat": "2023-01-05 09:15:37", 39 | "stockmargin_final_dtls.IBLLC-US.dat.md5": "2023-01-05 09:15:37", 40 | "swedish.txt": "2023-01-05 09:15:37", 41 | "swedish.txt.md5": "2023-01-05 09:15:37", 42 | "swiss.txt": "2023-01-05 09:15:38", 43 | "swiss.txt.md5": "2023-01-05 09:15:38", 44 | "usa.txt": "2023-01-05 09:15:38", 45 | "usa.txt.md5": "2023-01-05 09:15:38" 46 | } 47 | -------------------------------------------------------------------------------- /src/ftp_aioftp.py: -------------------------------------------------------------------------------- 1 | import asyncio, aioftp 2 | import time, json, logging, sys 3 | import ftplib 4 | 5 | # setting up logger 6 | logger = logging.getLogger(__name__) 7 | logger.setLevel(logging.DEBUG) 8 | fh = logging.FileHandler('./output.log') 9 | sh = logging.StreamHandler(sys.stdout) 10 | formatter = logging.Formatter('[%(asctime)s] - %(funcName)s - %(message)s',datefmt='%a, %d %b %Y %H:%M:%S') 11 | fh.setFormatter(formatter) 12 | sh.setFormatter(formatter) 13 | logger.addHandler(fh) 14 | logger.addHandler(sh) 15 | 16 | time_now = time.time() 17 | with open("./data/config.json") as file: 18 | data = json.load(file) 19 | FTP_HOST = data["FTP_HOST"] 20 | FTP_USER = data["FTP_USER"] 21 | FTP_PASS = data["FTP_PASS"] 22 | 23 | 24 | async def download_file(file): 25 | async with aioftp.Client.context(FTP_HOST, user=FTP_USER, password=FTP_PASS) as client: 26 | await client.download(file) 27 | logger.info(f"{file} is written") 28 | 29 | async def main(): 30 | #getting files from ftp server 31 | ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) 32 | files = ftp.nlst() 33 | #start downloading 34 | tasks = [download_file(file) for file in files] 35 | await asyncio.gather(*tasks) 36 | 37 | asyncio.run(main()) 38 | logger.info(f"Time taken: {time.time() - time_now} seconds") -------------------------------------------------------------------------------- /src/ftp_concurrency.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import ftplib 3 | import time 4 | import json 5 | import logging 6 | import os 7 | import sys 8 | 9 | # setting up logger 10 | logger = logging.getLogger(__name__) 11 | logger.setLevel(logging.DEBUG) 12 | fh = logging.FileHandler('./output.log') 13 | sh = logging.StreamHandler(sys.stdout) 14 | formatter = logging.Formatter('[%(asctime)s] - %(funcName)s - %(message)s',datefmt='%a, %d %b %Y %H:%M:%S') 15 | fh.setFormatter(formatter) 16 | sh.setFormatter(formatter) 17 | logger.addHandler(fh) 18 | logger.addHandler(sh) 19 | 20 | logging.info("getting configurations from config file") 21 | with open("./data/config.json") as file: 22 | data = json.load(file) 23 | FTP_HOST = data["FTP_HOST"] 24 | FTP_USER = data["FTP_USER"] 25 | FTP_PASS = data["FTP_PASS"] 26 | 27 | logging.info("getting files from ftp") 28 | ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) 29 | files = ftp.nlst() 30 | 31 | #starting time 32 | time_now = time.time() 33 | 34 | #download file 35 | def download_file(ftp_client, file_path): 36 | ftp_client = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) 37 | with open(file_path, 'wb') as f: 38 | ftp_client.retrbinary(f'RETR {file_path}', f.write) 39 | logging.info(f"{f} is written") 40 | 41 | #download asynchronously 42 | def main(): 43 | with concurrent.futures.ThreadPoolExecutor() as executor: 44 | futures = [executor.submit(download_file, ftp, file) for file in files] 45 | for future in futures: 46 | future.result() 47 | logging.info(f"Time taken: {time.time() - time_now} seconds") 48 | os.startfile(".") 49 | 50 | main() -------------------------------------------------------------------------------- /src/ftp_multiprocessing.py: -------------------------------------------------------------------------------- 1 | import ftplib 2 | from multiprocessing import Process 3 | import time 4 | import os, json 5 | import logging 6 | 7 | time_now = time.time() 8 | 9 | # setting up logger 10 | logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(message)s") 11 | 12 | 13 | # loading config file 14 | with open("./data/config.json") as file: 15 | data = json.load(file) 16 | FTP_HOST = data["FTP_HOST"] 17 | FTP_USER = data["FTP_USER"] 18 | FTP_PASS = data["FTP_PASS"] 19 | ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) 20 | 21 | 22 | def download_file(file): 23 | """Download a file from the FTP server""" 24 | with open(file, "wb") as f: 25 | ftp.retrbinary("RETR " + file, f.write) 26 | logging.info(f"{f} is written") 27 | 28 | 29 | def main(): 30 | """Download all files from the FTP server in parallel""" 31 | files = ftp.nlst() 32 | processes = [Process(target=download_file, args=(file,)) for file in files] 33 | for process in processes: 34 | process.start() 35 | for process in processes: 36 | process.join() 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | logging.info(f"Time taken: {time.time() - time_now} seconds") 42 | os.startfile(".") 43 | -------------------------------------------------------------------------------- /src/get_changed_files.py: -------------------------------------------------------------------------------- 1 | import ftplib 2 | from datetime import datetime, timedelta 3 | import json 4 | 5 | with open("./data/config.json") as file: 6 | data = json.load(file) 7 | FTP_HOST = data["FTP_HOST"] 8 | FTP_USER = data["FTP_USER"] 9 | FTP_PASS = data["FTP_PASS"] 10 | 11 | ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) 12 | files = ftp.nlst() 13 | 14 | with open("./data/last_modified.json") as file: 15 | last_modified_data = json.load(file) 16 | 17 | 18 | period_signal = input( 19 | "Check if file hasn't been changed in the last day(d), hour(h), minute(m) or press enter if you only want to check if there is any change, no matter the duration: " 20 | ) 21 | if period_signal == "d": 22 | period = int(input("How many days(1-30): ")) 23 | elif period_signal == "h": 24 | period = int(input("How many hours(1-23): ")) 25 | elif period_signal == "m": 26 | period = int(input("How many minutes(1-59): ")) 27 | 28 | def get_changed(files): 29 | changed_files = [] 30 | for file_name in files: 31 | changed = False 32 | current_file_date = datetime.strptime( 33 | ftp.sendcmd(f"MDTM {file_name}").split()[-1], "%Y%m%d%H%M%S" 34 | ) 35 | if period_signal == "": 36 | if last_modified_data[file_name] != current_file_date: 37 | print(f"{file_name} has changed") 38 | print(f"last changed: {last_modified_data[file_name]}") 39 | print(f"current change: {current_file_date}") 40 | data[file_name] = current_file_date 41 | else: 42 | print(f"{file_name} has not changed") 43 | else: 44 | last_modified_date = datetime.strptime( 45 | last_modified_data[file_name], "%Y-%m-%d %H:%M:%S" 46 | ) 47 | if ( 48 | current_file_date.day - last_modified_date.day > period 49 | and period_signal == "d" 50 | ): 51 | changed = True 52 | 53 | elif ( 54 | current_file_date.hour - last_modified_date.hour > period 55 | and period_signal == "h" 56 | ): 57 | changed = True 58 | 59 | elif ( 60 | current_file_date.minute - last_modified_date.minute > period 61 | and period_signal == "m" 62 | ): 63 | changed = True 64 | 65 | if changed: 66 | print(f"{file_name} has changed") 67 | print(f"last changed: {last_modified_data[file_name]}") 68 | print(f"current change: {current_file_date}") 69 | print(f"duration changed: within the last {period}{period_signal}") 70 | data[file_name] = current_file_date 71 | changed_files.append(file_name) 72 | changed = False 73 | else: 74 | print(f"{file_name} has not changed") 75 | 76 | return changed_files 77 | 78 | changed_files = get_changed(files) 79 | ftp.quit() 80 | 81 | # import ftplib 82 | # ftp = ftplib.FTP("0.0.0.0:2121", "farrel", "farrel123") 83 | # ftp.mkd("changed_files") 84 | # ftp.cwd("changed_files") 85 | 86 | # for changed_file in changed_files: 87 | # with open(changed_file, "rb") as file: 88 | # # Use the FTP `storbinary` method to upload the file 89 | # ftp.storbinary(f"STOR {changed_file}", file) 90 | 91 | # print(ftp.nlst()) --------------------------------------------------------------------------------