├── test └── scenario_a │ ├── contents │ ├── bar.tsv │ ├── baz.tsv │ ├── eggs.dat │ ├── foo.txt │ └── spam.list │ └── glob_list.txt ├── LICENSE ├── .gitignore ├── design.txt └── fglob /test/scenario_a/contents/bar.tsv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/scenario_a/contents/baz.tsv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/scenario_a/contents/eggs.dat: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/scenario_a/contents/foo.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/scenario_a/contents/spam.list: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/scenario_a/glob_list.txt: -------------------------------------------------------------------------------- 1 | test/scenario_a/contents/*.tsv 2 | test/scenario_a/contents/*.dat 3 | test/scenario_a/contents/*.txt 4 | test/scenario_a/contents/*.list 5 | test/scenario_a/contents/*.zip 6 | * 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /design.txt: -------------------------------------------------------------------------------- 1 | # Example inputs and outputs 2 | 3 | # input_file.list 4 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB46/SNP/*SNPs_Annotated.vcf 5 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB37/SNP/*SNPs_Annotated.vcf 6 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB29/SNP/*SNPs_Annotated.vcf 7 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB99/SNP/*SNPs_Annotated.vcf # multi-hit 8 | 9 | # stdout 10 | LINE: -rw-rw-rw- 2 blah blah /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB46/SNP/foo_SNPs_Annotated.vcf 11 | LINE: -rw-rw-rw- 2 blah blah /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB29/SNP/bar_SNPs_Annotated.vcf 12 | count is: 2 13 | 14 | 15 | # script.sh 16 | #!/bin/bash 17 | 18 | count=0 19 | while read LINE; 20 | do 21 | if [ -f $LINE ]; then 22 | echo -n "LINE: " 23 | ls -lF $LINE 24 | count=`expr $count + 1` 25 | fi 26 | done < ./05_8_2018.resultPath.txt 27 | echo "count is: $count" 28 | 29 | 30 | fglob input_file.list 31 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB46/SNP/foo_SNPs_Annotated.vcf 32 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB29/SNP/bar_SNPs_Annotated.vcf 33 | 34 | fglob --bad input_file.list 35 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB37/SNP/*SNPs_Annotated.vcf 36 | 37 | fglob --multi input_file.list 38 | /hgsccl/next-gen/Illumina/Instruments/D00341/171220_D00341_0575_AH3GFMBCX2/Results/Project_171220_D00341_0575_AH3GFMBCX2/Sample_H3GFMBCX2-1-IDMB99/SNP/*SNPs_Annotated.vcf 39 | 40 | fglob --table input_file.list 41 | ...46/SNP/*SNPs_Annotated.vcf good ...46/SNP/foo_SNPs_Annotated.vcf 42 | ...37/SNP/*SNPs_Annotated.vcf missing (empty) 43 | ...29/SNP/*SNPs_Annotated.vcf good ...29/SNP/bar_SNPs_Annotated.vcf 44 | ...99/SNP/*SNPs_Annotated.vcf multi ...99/SNP/spam_SNPs_Annotated.vcf 45 | ...99/SNP/*SNPs_Annotated.vcf multi ...99/SNP/eggs_SNPs_Annotated.vcf 46 | 47 | 48 | -------------------------------------------------------------------------------- /fglob: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Accepts a list of glob patterns as input, where each glob is expected to 4 | match a single file. Outputs the list of resulting file paths for the cases 5 | where glob does match a single file. Non-default options can show missing 6 | globs or multi-hit globs.""" 7 | 8 | 9 | import argparse 10 | from glob import glob 11 | from os.path import isfile, isdir 12 | from pathlib import Path 13 | from sys import stdin, stdout 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | # print(args) 19 | # print(args.input_file.readline()) 20 | run(args.input_file, args.mode) 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser(description=__doc__) 25 | parser.add_argument('input_file', 26 | type=argparse.FileType('rb'), 27 | nargs='?', 28 | default=stdin) 29 | parser.add_argument('--simple', action='store_const', dest='mode', const='simple', help='default') 30 | parser.add_argument('--bad', action='store_const', dest='mode', const='bad') 31 | parser.add_argument('--multi', action='store_const', dest='mode', const='multi') 32 | parser.add_argument('--table', action='store_const', dest='mode', const='table') 33 | args = parser.parse_args() 34 | if not args.mode: 35 | args.mode = 'simple' 36 | return args 37 | 38 | 39 | def run(input_file, mode): 40 | for raw_line in input_file: 41 | pattern = raw_line.rstrip(b'\r\n') 42 | hits = glob(pattern) 43 | if mode == 'simple': 44 | if len(hits) == 1: 45 | dump(hits[0]) 46 | elif mode == 'bad': 47 | if len(hits) == 0: 48 | dump(pattern) 49 | elif mode == 'multi': 50 | if len(hits) > 1: 51 | dump(pattern) 52 | elif mode == 'table': 53 | if not hits: 54 | hits = [b''] 55 | code = b'missing' 56 | elif len(hits) == 1: 57 | code = b'good' 58 | else: 59 | code = b'multi' 60 | for hit in hits: 61 | ftype = get_type(hit) 62 | dump(pattern, code, ftype, hit) 63 | else: 64 | assert False, 'We have a bug' 65 | 66 | 67 | def dump(*byte_data): 68 | """Send to stdout buffer with newline byte.""" 69 | stdout.buffer.write(b'\t'.join(byte_data)) 70 | stdout.buffer.write(b'\n') 71 | 72 | 73 | def get_type(binary_path): 74 | if isfile(binary_path): 75 | result = b'file' 76 | elif isdir(binary_path): 77 | result = b'dir' 78 | elif binary_path == b'': 79 | result = b'missing' 80 | else: 81 | result = b'other' 82 | return result 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | --------------------------------------------------------------------------------