├── .gitignore ├── LICENSE ├── Pipfile ├── README.md ├── extract_vba_source.py └── yaml └── azure-pipelines-extract-vba.yml /.gitignore: -------------------------------------------------------------------------------- 1 | /Pipfile.lock 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Takeru Saso 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | oletools = {file = "https://github.com/kijeong/oletools/archive/refs/heads/master.zip"} 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What's this project? 2 | Python script and Pipeline configuration file to extract VBA source files from MS Office files 3 | and commit/push them to a Git repository. 4 | It helps you manage your MS Office files with VBA macros on Azure DevOps. 5 | 6 | # Supporting Git Hosting Services 7 | - Microsoft Azure DevOps Service 8 | 9 | # Usage 10 | ## Settings 11 | ### for Azure DevOps 12 | 1. Add the [**azure-pipelines-extract-vba.yml**](/yaml/azure-pipelines-extract-vba.yml) to your Git repository. 13 | 2. Commit and push your local repository to the Azure DevOps Server. 14 | 3. On the Azure DevOps Server, go to the **Pipelines** and create new pipeline for your repository. 15 | 4. At the **Configure your pipeline**, select the **Existing Azure Pipelines YAML file**, 16 | and then specify the **azure-pipelines-extract-vba.yml** added your Git repository as a configuration file. 17 | 18 | ### How to run 19 | 1. When you pushed your change to your Git repository on Azure DevOps, 20 | the Pipeline will run and then extract VBA source files into the `/vba-src` directory. -------------------------------------------------------------------------------- /extract_vba_source.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encode: utf8 -*- 3 | 4 | from shutil import rmtree 5 | from pathlib import Path 6 | from argparse import ArgumentParser 7 | from oletools.olevba import VBA_Parser, VBA_Project, filter_vba 8 | 9 | OFFICE_FILE_EXTENSIONS = ( 10 | '.xlsb', '.xls', '.xlsm', '.xla', '.xlt', '.xlam', # Excel book with macro 11 | ) 12 | 13 | 14 | def get_args(): 15 | parser = ArgumentParser(description='Extract vba source files from an MS Office file with macro.') 16 | parser.add_argument('sources', metavar='MS_OFFICE_FILE', type=str, nargs='+', 17 | help='Paths to source MS Office file or directory.') 18 | parser.add_argument('--dest', type=str, default='vba_src', 19 | help='Destination directory path to output vba source files [default: ./vba_src].') 20 | parser.add_argument('--orig-extension', dest='use_orig_extension', action='store_true', 21 | help='Use an original extension (.bas, .cls, .frm) for extracted vba source files [default: use .vb].') 22 | parser.add_argument('--src-encoding', dest='src_encoding', type=str, default='cp932', 23 | help='Encoding for vba source files in an MS Office file [default: cp932].') 24 | parser.add_argument('--out-encoding', dest='out_encoding', type=str, default='utf8', 25 | help='Encoding for generated vba source files [default: utf8].') 26 | parser.add_argument('--recursive', action='store_true', 27 | help='Find sub directories recursively when a directory is specified as the sources parameter.') 28 | return parser.parse_args() 29 | 30 | 31 | def get_source_paths(sources, recursive): 32 | for src in sources: 33 | p = Path(src) 34 | if p.is_dir(): # If source is a directory, then find source files under it. 35 | for file in p.glob("**/*" if recursive else "*"): 36 | f = Path(file) 37 | if not f.name.startswith('~$') and f.suffix.lower() in OFFICE_FILE_EXTENSIONS: 38 | yield f.absolute() 39 | else: # If source is a file, then return its absolute path. 40 | yield p.absolute() 41 | 42 | 43 | def get_outputpath(parent_dir: Path, filename: str, use_orig_extension: bool): 44 | extension = filename.split('.')[-1] 45 | if extension == 'cls': 46 | subdir = parent_dir.joinpath('class') 47 | elif extension == 'frm': 48 | subdir = parent_dir.joinpath('form') 49 | else: 50 | subdir = parent_dir.joinpath('module') 51 | 52 | if not subdir.exists(): 53 | subdir.mkdir(parents=True, exist_ok=True) 54 | 55 | return Path(subdir.joinpath(filename + '.vb' if not use_orig_extension else '')) 56 | 57 | 58 | def extract_macros(parser: VBA_Parser, vba_encoding): 59 | 60 | if parser.ole_file is None: 61 | for subfile in parser.ole_subfiles: 62 | for results in extract_macros(subfile, vba_encoding): 63 | yield results 64 | else: 65 | parser.find_vba_projects() 66 | for (vba_root, project_path, dir_path) in parser.vba_projects: 67 | project = VBA_Project(parser.ole_file, vba_root, project_path, dir_path, relaxed=False) 68 | project.codec = vba_encoding 69 | project.parse_project_stream() 70 | 71 | for code_path, vba_filename, code_data in project.parse_modules(): 72 | yield (vba_filename, code_data) 73 | 74 | 75 | if __name__ == '__main__': 76 | args = get_args() 77 | 78 | # Get the root path of destination (if not exists then make it). 79 | root = Path(args.dest) 80 | if not root.exists(): 81 | root.mkdir(parents=True) 82 | elif not root.is_dir(): 83 | raise FileExistsError 84 | 85 | # Get the source MS Office file where extract the vba source files from. 86 | for source in get_source_paths(args.sources, args.recursive): 87 | src = Path(source) 88 | basename = src.stem 89 | dest = Path(root.joinpath(basename)) 90 | dest.mkdir(parents=True, exist_ok=True) 91 | rmtree(dest.absolute()) 92 | print('Extract vba files from {source} to {dest}'.format(source=source, dest=dest)) 93 | 94 | # Extract vba source files from the MS Office file and save each vba file into the sub directory as of its MS Office file name. 95 | vba_parser = VBA_Parser(src) 96 | for vba_filename, vba_code in extract_macros(vba_parser, args.src_encoding): 97 | vba_file = get_outputpath(dest, vba_filename, args.use_orig_extension) 98 | vba_file.write_text(filter_vba(vba_code), encoding=args.out_encoding) 99 | print('[{basename}] {vba_file} is generated.'.format(basename=basename, vba_file=vba_file)) 100 | -------------------------------------------------------------------------------- /yaml/azure-pipelines-extract-vba.yml: -------------------------------------------------------------------------------- 1 | # Configuration file for Azure DevOps Pipeline 2 | 3 | variables: 4 | TARGET_DIR: '.' 5 | VBA_DIR: 'vba-src' 6 | 7 | trigger: 8 | branches: 9 | include: 10 | - "*" 11 | paths: 12 | exclude: 13 | - $(VBA_DIR) 14 | 15 | pool: 16 | vmImage: 'ubuntu-latest' 17 | 18 | steps: 19 | - task: UsePythonVersion@0 20 | inputs: 21 | versionSpec: '3.8' 22 | 23 | - checkout: self 24 | submodules: true 25 | persistCredentials: true 26 | clean: true 27 | 28 | - script: | 29 | echo Set Requestor as $(Build.RequestedFor) $(Build.RequestedForEmail) 30 | git config --global user.email $(Build.RequestedForEmail) 31 | git config --global user.name $(Build.RequestedFor) 32 | if ["$(System.PullRequest.SourceBranch)" = ""]; then 33 | echo Checkout for branch $(Build.SourceBranch) 34 | export branchname=$(echo $(Build.SourceBranch) | sed s@refs/heads/@@) 35 | else 36 | echo Checkout for PR source branch $(System.PullRequest.SourceBranch) 37 | export branchname=$(echo $(System.PullRequest.SourceBranch) | sed s@refs/heads/@@) 38 | fi 39 | echo Checkout for branch-name $branchname 40 | git remote update 41 | git fetch origin $branchname 42 | git checkout $branchname 43 | displayName: 'Set the configuration for git commit/push.' 44 | 45 | - script: | 46 | echo Install pipenv 47 | python -m pip install pip --upgrade 48 | pip install pipenv 49 | echo Install extract_vba_source 50 | git clone https://github.com/takeruko/extract_vba_source.git 51 | displayName: 'Install extract_vba_source.py.' 52 | 53 | - script: | 54 | echo Specify the location of the Pipfile and run extract_vba_source.py 55 | export PIPENV_PIPFILE=$(pwd)/extract_vba_source/Pipfile 56 | pipenv install 57 | pipenv run python ./extract_vba_source/extract_vba_source.py \ 58 | --dest $(VBA_DIR) \ 59 | --src-encoding='cp932' \ 60 | --out-encoding='utf8' \ 61 | --recursive \ 62 | $(TARGET_DIR) 63 | displayName: 'Extract vba source files.' 64 | 65 | - script: | 66 | git status 67 | git add $(VBA_DIR) 68 | git status 69 | git commit -m "Extracted vba source files. [skip ci]" 70 | git push origin $branchname 71 | displayName: 'Commit vba source files.' 72 | 73 | --------------------------------------------------------------------------------