├── .gitignore ├── README.md ├── examples ├── download_files.py ├── download_files_with_subfolder.py ├── download_latest_file.py ├── download_list.py ├── file_properties_from_folder.py ├── upload_files.py └── upload_files_in_chunks.py ├── office365_api.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | env 3 | __* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python SharePoint Office365 API 2 | You will find example on connecting to Office 365 SharePoint using the Office 365 Rest Python Client package. 3 | 4 | ## Environment Variables 5 | 6 | To run this project, you will need to add the following environment variables to your .env file 7 | 8 | `sharepoint_email` 9 | 10 | `sharepoint_password` 11 | 12 | `sharepoint_url_site` 13 | 14 | `sharepoint_site_name` 15 | 16 | `sharepoint_doc_library` 17 | 18 | 19 | ## Installation 20 | 21 | Create Virtual Environment 22 | 23 | ```bash 24 | python -m venv env 25 | ``` 26 | 27 | Activate Environment 28 | ```bash 29 | source env/Scripts/activate 30 | ``` 31 | 32 | Install Packages 33 | ```bash 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | Install Office365 API Package Directly from Github 38 | ```bash 39 | pip install git+https://github.com/vgrem/Office365-REST-Python-Client.git#egg=Office365-REST-Python-Client 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /examples/download_files.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import re 3 | import sys, os 4 | from pathlib import PurePath 5 | 6 | # 1 args = SharePoint folder name. May include subfolders YouTube/2022 7 | FOLDER_NAME = sys.argv[1] 8 | # 2 args = locate or remote folder_dest 9 | FOLDER_DEST = sys.argv[2] 10 | # 3 args = SharePoint file name. This is used when only one file is being downloaded 11 | # If all files will be downloaded, then set this value as "None" 12 | FILE_NAME = sys.argv[3] 13 | # 4 args = SharePoint file name pattern 14 | # If no pattern match files are required to be downloaded, then set this value as "None" 15 | FILE_NAME_PATTERN = sys.argv[4] 16 | 17 | def save_file(file_n, file_obj): 18 | file_dir_path = PurePath(FOLDER_DEST, file_n) 19 | with open(file_dir_path, 'wb') as f: 20 | f.write(file_obj) 21 | 22 | def get_file(file_n, folder): 23 | file_obj = SharePoint().download_file(file_n, folder) 24 | save_file(file_n, file_obj) 25 | 26 | def get_files(folder): 27 | files_list = SharePoint()._get_files_list(folder) 28 | for file in files_list: 29 | get_file(file.name, folder) 30 | 31 | def get_files_by_pattern(keyword, folder): 32 | files_list = SharePoint()._get_files_list(folder) 33 | for file in files_list: 34 | if re.search(keyword, file.name): 35 | get_file(file.name, folder) 36 | 37 | if __name__ == '__main__': 38 | if FILE_NAME != 'None': 39 | get_file(FILE_NAME, FOLDER_NAME) 40 | elif FILE_NAME_PATTERN != 'None': 41 | get_files_by_pattern(FILE_NAME_PATTERN, FOLDER_NAME) 42 | else: 43 | get_files(FOLDER_NAME) -------------------------------------------------------------------------------- /examples/download_files_with_subfolder.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import sys, os 3 | from pathlib import PurePath 4 | 5 | # 1 args = SharePoint folder name. May include subfolders YouTube/2022 6 | FOLDER_NAME = sys.argv[1] 7 | # 2 args = locate or remote folder location 8 | FOLDER_DEST = sys.argv[2] 9 | # 3 args = Determine if all folders/files (subfolders) need to be downloaded 10 | CRAWL_FOLDERS = sys.argv[3] 11 | 12 | # save the file to locate or remote location 13 | def save_file(file_n, file_obj, subfolder): 14 | dir_path = PurePath(FOLDER_DEST, subfolder) 15 | file_dir_path = PurePath(dir_path, file_n) 16 | with open(file_dir_path, 'wb') as f: 17 | f.write(file_obj) 18 | 19 | # create directory if it doesn't exist 20 | def create_dir(path): 21 | dir_path = PurePath(FOLDER_DEST, path) 22 | if not os.path.exists(dir_path): 23 | os.makedirs(dir_path) 24 | 25 | def get_file(file_n, folder): 26 | file_obj = SharePoint().download_file(file_n, folder) 27 | save_file(file_n, file_obj, folder) 28 | 29 | def get_files(folder): 30 | files_list = SharePoint()._get_files_list(folder) 31 | for file in files_list: 32 | get_file(file.name, folder) 33 | 34 | # get back a list of subfolders from specific folder 35 | def get_folders(folder): 36 | l = [] 37 | folder_obj = SharePoint().get_folder_list(folder) 38 | for subfolder_obj in folder_obj: 39 | subfolder = '/'.join([folder, subfolder_obj.name]) 40 | l.append(subfolder) 41 | return l 42 | 43 | if __name__ == '__main__': 44 | if CRAWL_FOLDERS == 'Yes': 45 | folder_list = get_folders(FOLDER_NAME) 46 | for folder in folder_list: 47 | for subfolder in get_folders(folder): 48 | folder_list.append(subfolder) 49 | 50 | folder_list[:0] = [FOLDER_NAME] 51 | print(folder_list) 52 | for folder in folder_list: 53 | # will create folder if it doesn't exist 54 | create_dir(folder) 55 | # get the files for specific folder location in SharePoint 56 | get_files(folder) 57 | else: 58 | get_files(FOLDER_NAME) -------------------------------------------------------------------------------- /examples/download_latest_file.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import sys 3 | from pathlib import PurePath 4 | 5 | # 1 args = SharePoint Folder name 6 | FOLDER_NAME = sys.argv[1] 7 | # 2 args = location or remote folder destintion 8 | FOLDER_DEST = sys.argv[2] 9 | 10 | def save_file(file_name, file_obj, folder_dest): 11 | file_dir_path = PurePath(folder_dest, file_name) 12 | with open(file_dir_path, 'wb') as f: 13 | f.write(file_obj) 14 | 15 | def get_latest_file(folder, folder_dest): 16 | file_name, content = SharePoint().download_latest_file(folder) 17 | save_file(file_name, content, folder_dest) 18 | 19 | if __name__ == '__main__': 20 | get_latest_file(FOLDER_NAME, FOLDER_DEST) 21 | -------------------------------------------------------------------------------- /examples/download_list.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import sys 3 | import csv 4 | from pathlib import PurePath 5 | from openpyxl import Workbook 6 | 7 | # 1 args = The name of the SharePoint List 8 | SHAREPOINT_LIST_NAME = sys.argv[1] 9 | # 2 args = Export Type. it can be "Excel" or "CSV" 10 | EXPORT_TYPE = sys.argv[2] 11 | # 3 args = Local Directory Path to save list dataset 12 | DIR_PATH = sys.argv[3] 13 | # 4 args = The name of the file that will get saved on local directory 14 | FILE_NAME = sys.argv[4] 15 | 16 | def set_file_ext(file_name, export_type): 17 | if export_type == 'Excel': 18 | file_name_with_ext = '.'.join([file_name, '.xlsx']) 19 | elif export_type == 'CSV': 20 | file_name_with_ext = '.'.join([file_name, '.csv']) 21 | else: 22 | file_name_with_ext = file_name 23 | return file_name_with_ext 24 | 25 | def download_list(list_name, export_type, dir_path, file_name): 26 | sp_list = SharePoint().get_list(list_name) 27 | if export_type == 'Excel': 28 | save_to_excel(sp_list, dir_path, file_name) 29 | elif export_type == 'CSV': 30 | save_to_csv(sp_list, dir_path, file_name) 31 | else: 32 | print('Export type is not a value type') 33 | 34 | def save_to_csv(list_items, dir_path, file_name): 35 | dir_file_path = PurePath(dir_path, file_name) 36 | with open(dir_file_path, 'w', newline='\n', encoding='utf-8') as f: 37 | header = list_items[0].properties.keys() 38 | w = csv.DictWriter(f, header) 39 | w.writeheader() 40 | for item in list_items: 41 | w.writerow(item.properties) 42 | 43 | def save_to_excel(list_items, dir_path, file_name): 44 | dir_file_path = PurePath(dir_path, file_name) 45 | wb = Workbook() 46 | ws = wb.active 47 | # list of header name from SharePoint List 48 | header = list_items[0].properties.keys() 49 | # write headers on first row 50 | for idx, name in enumerate(header): 51 | ws.cell(row=1, column=idx + 1, value=name) 52 | # write line items starting on second row 53 | row = 2 54 | for dict_obj in list_items: 55 | for idx, item in enumerate(dict_obj.properties.items()): 56 | ws.cell(row=row, column=idx + 1, value=item[1]) 57 | row += 1 58 | wb.save(dir_file_path) 59 | 60 | if __name__ == '__main__': 61 | file_name = set_file_ext(FILE_NAME, EXPORT_TYPE) 62 | download_list(SHAREPOINT_LIST_NAME, EXPORT_TYPE, DIR_PATH, file_name) -------------------------------------------------------------------------------- /examples/file_properties_from_folder.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import sys 3 | 4 | # 1 args = SharePoint Folder name 5 | FOLDER_NAME = sys.argv[1] 6 | 7 | def get_properties_by_folder(folder): 8 | files_properties = SharePoint().get_file_properties_from_folder(folder) 9 | print('File count:', len(files_properties)) 10 | for file in files_properties: 11 | print(file) 12 | 13 | if __name__ == '__main__': 14 | get_properties_by_folder(FOLDER_NAME) -------------------------------------------------------------------------------- /examples/upload_files.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import re 3 | import sys, os 4 | from pathlib import PurePath 5 | 6 | # 1 args = Root Directory Path of files to upload 7 | ROOT_DIR = sys.argv[1] 8 | # 2 args = SharePoint folder name. May include subfolders to upload to 9 | SHAREPOINT_FOLDER_NAME = sys.argv[2] 10 | # 3 args = File name pattern. Only upload files with this pattern 11 | FILE_NAME_PATTERN = sys.argv[3] 12 | 13 | 14 | def upload_files(folder, keyword=None): 15 | file_list = get_list_of_files(folder) 16 | for file in file_list: 17 | if keyword is None or keyword == 'None' or re.search(keyword, file[0]): 18 | file_content = get_file_content(file[1]) 19 | SharePoint().upload_file(file[0], SHAREPOINT_FOLDER_NAME, file_content) 20 | 21 | def get_list_of_files(folder): 22 | file_list = [] 23 | folder_item_list = os.listdir(folder) 24 | for item in folder_item_list: 25 | item_full_path = PurePath(folder, item) 26 | if os.path.isfile(item_full_path): 27 | file_list.append([item, item_full_path]) 28 | return file_list 29 | 30 | # read files and return the content of files 31 | def get_file_content(file_path): 32 | with open(file_path, 'rb') as f: 33 | return f.read() 34 | 35 | if __name__ == '__main__': 36 | upload_files(ROOT_DIR, FILE_NAME_PATTERN) -------------------------------------------------------------------------------- /examples/upload_files_in_chunks.py: -------------------------------------------------------------------------------- 1 | from office365_api import SharePoint 2 | import re 3 | import sys, os 4 | from pathlib import PurePath 5 | 6 | # 1 args = Root Directory Path of files to upload 7 | ROOT_DIR = sys.argv[1] 8 | # 2 args = SharePoint folder name. May include subfolders to upload to 9 | SHAREPOINT_FOLDER_NAME = sys.argv[2] 10 | # 3 args = File chunk size 11 | CHUNK_SIZE = sys.argv[3] 12 | # 4 args = File name pattern. Only upload files with this pattern 13 | FILE_NAME_PATTERN = sys.argv[4] 14 | 15 | 16 | def upload_files(folder, sharepoint_folder, chunk_size, keyword=None): 17 | file_list = get_list_of_files(folder) 18 | for file in file_list: 19 | if keyword is None or keyword == 'None' or re.search(keyword, file[0]): 20 | file_size = os.path.getsize(file[1]) 21 | data = {'file_size': file_size} 22 | SharePoint().upload_file_in_chunks(file[1], sharepoint_folder, chunk_size, progress_status, **data) 23 | # SharePoint().upload_file_in_chunks(file[1], sharepoint_folder, chunk_size) 24 | 25 | def get_list_of_files(folder): 26 | file_list = [] 27 | folder_item_list = os.listdir(folder) 28 | for item in folder_item_list: 29 | item_full_path = PurePath(folder, item) 30 | if os.path.isfile(item_full_path): 31 | file_list.append([item, item_full_path]) 32 | return file_list 33 | 34 | def progress_status(offset, file_size): 35 | print("Uploaded '{0}' bytes from '{1}' ... '{2}'%".format(offset, file_size, round(offset/file_size * 100, 2))) 36 | 37 | if __name__ == '__main__': 38 | upload_files(ROOT_DIR, SHAREPOINT_FOLDER_NAME, int(CHUNK_SIZE), FILE_NAME_PATTERN) -------------------------------------------------------------------------------- /office365_api.py: -------------------------------------------------------------------------------- 1 | from urllib import response 2 | import environ 3 | from office365.sharepoint.client_context import ClientContext 4 | from office365.runtime.auth.user_credential import UserCredential 5 | from office365.sharepoint.files.file import File 6 | import datetime 7 | 8 | env = environ.Env() 9 | environ.Env.read_env() 10 | 11 | USERNAME = env('sharepoint_email') 12 | PASSWORD = env('sharepoint_password') 13 | SHAREPOINT_SITE = env('sharepoint_url_site') 14 | SHAREPOINT_SITE_NAME = env('sharepoint_site_name') 15 | SHAREPOINT_DOC = env('sharepoint_doc_library') 16 | 17 | class SharePoint: 18 | def _auth(self): 19 | conn = ClientContext(SHAREPOINT_SITE).with_credentials( 20 | UserCredential( 21 | USERNAME, 22 | PASSWORD 23 | ) 24 | ) 25 | return conn 26 | 27 | def _get_files_list(self, folder_name): 28 | conn = self._auth() 29 | target_folder_url = f'{SHAREPOINT_DOC}/{folder_name}' 30 | root_folder = conn.web.get_folder_by_server_relative_url(target_folder_url) 31 | root_folder.expand(["Files", "Folders"]).get().execute_query() 32 | return root_folder.files 33 | 34 | def get_folder_list(self, folder_name): 35 | conn = self._auth() 36 | target_folder_url = f'{SHAREPOINT_DOC}/{folder_name}' 37 | root_folder = conn.web.get_folder_by_server_relative_url(target_folder_url) 38 | root_folder.expand(["Folders"]).get().execute_query() 39 | return root_folder.folders 40 | 41 | def download_file(self, file_name, folder_name): 42 | conn = self._auth() 43 | file_url = f'/sites/{SHAREPOINT_SITE_NAME}/{SHAREPOINT_DOC}/{folder_name}/{file_name}' 44 | file = File.open_binary(conn, file_url) 45 | return file.content 46 | 47 | def download_latest_file(self, folder_name): 48 | date_format = "%Y-%m-%dT%H:%M:%SZ" 49 | files_list = self._get_files_list(folder_name) 50 | file_dict = {} 51 | for file in files_list: 52 | dt_obj = datetime.datetime.strptime(file.time_last_modified, date_format) 53 | file_dict[file.name] = dt_obj 54 | # sort dict object to get the latest file 55 | file_dict_sorted = {key:value for key, value in sorted(file_dict.items(), key=lambda item:item[1], reverse=True)} 56 | latest_file_name = next(iter(file_dict_sorted)) 57 | content = self.download_file(latest_file_name, folder_name) 58 | return latest_file_name, content 59 | 60 | 61 | def upload_file(self, file_name, folder_name, content): 62 | conn = self._auth() 63 | target_folder_url = f'/sites/{SHAREPOINT_SITE_NAME}/{SHAREPOINT_DOC}/{folder_name}' 64 | target_folder = conn.web.get_folder_by_server_relative_path(target_folder_url) 65 | response = target_folder.upload_file(file_name, content).execute_query() 66 | return response 67 | 68 | def upload_file_in_chunks(self, file_path, folder_name, chunk_size, chunk_uploaded=None, **kwargs): 69 | conn = self._auth() 70 | target_folder_url = f'/sites/{SHAREPOINT_SITE_NAME}/{SHAREPOINT_DOC}/{folder_name}' 71 | target_folder = conn.web.get_folder_by_server_relative_path(target_folder_url) 72 | response = target_folder.files.create_upload_session( 73 | source_path=file_path, 74 | chunk_size=chunk_size, 75 | chunk_uploaded=chunk_uploaded, 76 | **kwargs 77 | ).execute_query() 78 | return response 79 | 80 | def get_list(self, list_name): 81 | conn = self._auth() 82 | target_list = conn.web.lists.get_by_title(list_name) 83 | items = target_list.items.get().execute_query() 84 | return items 85 | 86 | def get_file_properties_from_folder(self, folder_name): 87 | files_list = self._get_files_list(folder_name) 88 | properties_list = [] 89 | for file in files_list: 90 | file_dict = { 91 | 'file_id': file.unique_id, 92 | 'file_name': file.name, 93 | 'major_version': file.major_version, 94 | 'minor_version': file.minor_version, 95 | 'file_size': file.length, 96 | 'time_created': file.time_created, 97 | 'time_last_modified': file.time_last_modified 98 | } 99 | properties_list.append(file_dict) 100 | file_dict = {} 101 | return properties_list -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.6.15 2 | cffi==1.15.1 3 | charset-normalizer==2.1.0 4 | cryptography==37.0.4 5 | django-environ==0.9.0 6 | et-xmlfile==1.1.0 7 | idna==3.3 8 | msal==1.18.0 9 | Office365-REST-Python-Client==2.3.13 10 | openpyxl==3.0.10 11 | pycparser==2.21 12 | PyJWT==2.4.0 13 | pytz==2022.1 14 | requests==2.28.1 15 | urllib3==1.26.10 16 | --------------------------------------------------------------------------------