├── .gitignore ├── README.md ├── __init__.py ├── ctakes-install ├── ctakes-server-setup.sh ├── ctakes-user-install.sh ├── log4j.xml └── run.sh ├── ctakes.py ├── databases ├── .gitignore ├── rxnorm.sh └── umls.sh ├── dateutil ├── LICENSE ├── __init__.py ├── easter.py ├── parser.py ├── relativedelta.py ├── rrule.py ├── tz.py └── tzwin.py ├── dbobject.py ├── files.py ├── mngobject.py ├── nlp.py ├── nltktags.py ├── server.py ├── sqlite.py ├── swagger.yml └── umls.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | 4 | # ignare settings 5 | umls.sh 6 | 7 | # ignore cTAKES install 8 | ctakes 9 | ctakes-svn 10 | ctakes-test 11 | apache-ctakes-4.0.0 12 | ctakes-test 13 | 14 | # ignore MetaMap install 15 | metamap 16 | metamap-test 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### What you'll be able to do ### 2 | 3 | 1. Interact with cTAKES `Default Clinical Pipeline` through a Python RESTful API doing: 4 | - Annotations for; 5 | - Anatomical sites, 6 | - Signs/Symptoms, 7 | - Procedures, 8 | - Diseases/Disorders and 9 | - Medications. 10 | 11 | **Input:** `Plain Text File` **Output:** `XMI File` 12 | 13 | **Orginal wiki page:** https://cwiki.apache.org/confluence/display/CTAKES/Default+Clinical+Pipeline 14 | 15 | ### cTAKES Install Instructions ### 16 | 17 | 1. Execute `./ctakes-install/ctakes-install.sh`, which will: 18 | - Download a copy of cTAKES into `./ctakes-install/tmp` 19 | - Extract cTAKES then copy into `ctakes-install (cTAKES_HOME)` directory 20 | - Download `ctakes-resources-4.0-bin.zip` into `./ctakes-install/tmp` 21 | - Unzip `ctakes-resources-4.0-bin.zip` and copy into `apache-ctakes-4.0.0/resources` 22 | - Remove/clean `/tmp` directory from `ctakes-install` 23 | - Set your UMLS credentials in `umls.sh` 24 | 25 | Note: If you don't have a UMLS username & password you'll need to request one at https://uts.nlm.nih.gov/license.html 26 | 27 | ### Setting up Python RESTful API Instructions ### 28 | 29 | COMING SOON! 30 | 31 | 32 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/__init__.py -------------------------------------------------------------------------------- /ctakes-install/ctakes-server-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | -------------------------------------------------------------------------------- /ctakes-install/ctakes-user-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #TODO: Add better error handling/troubleshooting. 4 | 5 | ### Script Beginning ### 6 | 7 | PWD=$(pwd) 8 | ORIG=$(echo $PWD/$(dirname $0) | sed 's#/\.##') 9 | cTAKES_HOME="$ORIG/apache-ctakes-4.0.0" 10 | 11 | #FIXME: Fix output formatting 12 | progressfilt () 13 | { 14 | local flag=false c count cr=$'\r' nl=$'\n' 15 | while IFS='' read -d '' -rn 1 c 16 | 17 | do 18 | if $flag 19 | then 20 | printf '%c' "$c" 21 | else 22 | if [[ $c != $cr && $c != $nl ]] 23 | then 24 | count=0 25 | else 26 | ((count++)) 27 | if ((count > 1)) 28 | then 29 | flag=true 30 | fi 31 | fi 32 | fi 33 | done 34 | } 35 | 36 | printf "\n\033[92m\u0F36\033[0m Install directory: $cTAKES_HOME \n" 37 | 38 | ### Checking for dependencies ### 39 | 40 | printf "\n\033[92m\u0F36\033[0m Checking for dependencies...\n" 41 | 42 | # Jave Check # 43 | 44 | if type -p java 2>&1 >/dev/null; then 45 | _java=java 46 | elif [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]]; then 47 | _java="$JAVA_HOME/bin/java" 2>&1 >/dev/null 48 | else 49 | printf "\n \u2573 Java wasn't found. Please install Java 1.8 or greater and try again!" 50 | exit 1 51 | fi 52 | 53 | if [[ "$_java" ]]; then 54 | version=$("$_java" -version 2>&1 | awk -F '"' '/version/ {print $2}') 55 | if [[ "$version" > "1.8" ]] || [[ "$version" > "10.0.0" ]]; then 56 | printf "\n \033[92m\u2713\033[0m Java 1.8 or greater is installed!\n" 57 | else 58 | printf "\n \033[91m\u2573\033[0m Current Java version is $version please upgrade to Java 1.8 or greater!\n" 59 | exit 1 60 | fi 61 | fi 62 | 63 | # Warn if install exists # 64 | 65 | if [ -d "$CTAKES_HOME" ]; then 66 | printf "\n \033[91m\u2573\033[0m cTakes install already exists!\n\n" 67 | exit 1 68 | fi 69 | 70 | # Download cTAKES user install file linux # 71 | if [ ! -d "$CTAKES_HOME" ]; then 72 | printf "\n\033[92m\u0F36\033[0m Downloading: apache-ctakes-4.0.0-bin.tar.gz\n\n" 73 | 74 | wget --progress=bar:force http://www-eu.apache.org/dist/ctakes/ctakes-4.0.0/apache-ctakes-4.0.0-bin.tar.gz -P "$ORIG/tmp/" 2>&1 | progressfilt 75 | tar -xvf $ORIG/tmp/apache-ctakes-4.0.0-bin.tar.gz -C $ORIG/$CTAKES_HOME 76 | fi 77 | 78 | # Get resource files # 79 | 80 | printf "\n\033[92m\u0F36\033[0m Downloading: ctakes-resources-4.0.0-bin.zip\n\n" 81 | cd $ORIG/tmp 82 | wget --progress=bar:force http://sourceforge.net/projects/ctakesresources/files/ctakes-resources-4.0-bin.zip -P "$ORIG/tmp/" 2>&1 | progressfilt 83 | 84 | printf "\033[92m\u0F36\033[0m Unzipping and moving resource files...\n\n" 85 | unzip ctakes-resources-4.0-bin.zip 86 | cp -R $ORIG/tmp/resources/* $ORIG/apache-ctakes-4.0.0/resources 87 | rm -r $ORIG/tmp/ 88 | 89 | # Update UMLS Credentials # 90 | if [ ! -f $PWD/umls.sh ]; then 91 | read -r -p " 92 | ༶ Add UMLS credentials? [y/N] " response 93 | response=${response,,} 94 | 95 | cd ../ 96 | 97 | if [[ "$response" =~ ^(yes|y)$ ]]; 98 | then 99 | touch $PWD/umls.sh 100 | printf "#!/bin/bash \n\nUMLS_USERNAME=\"SAMPLE_USER\"\nUMLS_PASSWORD=\"SAMPLE_PASSWORD\"\n\nexport UMLS_USERNAME\nexport UMLS_PASSWORD" >> $PWD/umls.sh 101 | chmod +x $PWD/umls.sh 102 | 103 | read -r -p "༶ Username: `echo $'\n> '`" username 104 | username=${username,,} 105 | 106 | set_password() { 107 | 108 | read -rs -p "༶ Password: `echo $'\n> '`" password_1 109 | password_1=${password_1} 110 | 111 | read -rs -p "`echo $'\r'`༶ Verify Password: `echo $'\n> '`" password_2 112 | password_2=${password_2} 113 | 114 | if [[ $password_1 = $password_2 ]];then 115 | 116 | sed -i -e "s/SAMPLE_USER/$username/g" $PWD/umls.sh 117 | sed -i -e "s/SAMPLE_PASSWORD/$password_1/g" $PWD/umls.sh 118 | 119 | else 120 | printf "\n༶ Password mismatch try again...\n" 121 | set_password 122 | fi 123 | } 124 | set_password 125 | printf "\n\033[92m\u0F36\033[0m UMLS credentials updated!\n" 126 | else 127 | printf "\n\033[92m\u0F36\033[0m No worries you can add them manually later!\n" 128 | fi 129 | fi 130 | printf "\n\u0FC9 DONE!\n\n" -------------------------------------------------------------------------------- /ctakes-install/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /ctakes-install/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Requires JAVA JDK 1.8+ 4 | 5 | # Check for UMLS credentials 6 | if [ ! -f $PWD/ctakes-install/umls.sh ]; then 7 | printf "\033[91mERROR:\033[0m You need to provide UMLS credentials in the file ./umls.sh" 1>&2 8 | exit 1 9 | else 10 | # Source UMLS credentials 11 | printf "\033[92m\u0F36\033[0m UMLS credentials file confirmed!\n\n" 12 | . ./ctakes-install/umls.sh 13 | fi 14 | 15 | # Only set CTAKES_HOME if not already set 16 | [ -z "$CTAKES_HOME" ] && CTAKES_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/apache-ctakes-4.0.0" 17 | cd $CTAKES_HOME 18 | 19 | # Launch 20 | 21 | bin/runClinicalPipeline.sh -i /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_input --xmiOut /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_output --user $UMLS_USERNAME --pass $UMLS_PASSWORD 22 | -------------------------------------------------------------------------------- /ctakes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Handling cTAKES 5 | # 6 | # 2013-05-14 Created by Pascal Pfiffner 7 | # 8 | 9 | import os 10 | import logging 11 | import codecs 12 | import inspect 13 | 14 | from xml.dom.minidom import parse 15 | from subprocess import call 16 | 17 | from nlp import NLPProcessing, list_to_sentences 18 | 19 | class cTAKES(NLPProcessing): 20 | def __init__(self, object): 21 | 22 | # print('(ctakes.py) Object being used:', object) 23 | super().__init__() 24 | 25 | self.name = 'ctakes' 26 | self.bin = os.path.dirname(os.path.abspath('%s/../' % inspect.getfile(inspect.currentframe()))) 27 | self.root = object['root'] 28 | self.cleanup = object['cleanup'] 29 | 30 | # print('(ctakes.py) Self name:', self.name) 31 | # print('(ctakes.py) Self bin:', self.bin) 32 | # print('(ctakes.py) Self root:', self.root) 33 | # print('(ctakes.py) Self cleanup:', self.cleanup, '\n') 34 | 35 | @property 36 | def _in_dir(self): 37 | return os.path.join(self.root, 'ctakes_input') 38 | 39 | @property 40 | def _out_dir(self): 41 | return os.path.join(self.root, 'ctakes_output') 42 | 43 | def _create_directories_if_needed(self): 44 | in_dir = self._in_dir 45 | out_dir = self._out_dir 46 | if not os.path.exists(in_dir): 47 | os.mkdir(in_dir) 48 | if not os.path.exists(out_dir): 49 | os.mkdir(out_dir) 50 | 51 | def _run(self): 52 | if call(['{}/cTAKES-Python-API/ctakes-install/run.sh'.format(self.bin)]) > 0: 53 | raise Exception('Error running cTakes') 54 | 55 | def _write_input(self, text, filename): 56 | if text is None \ 57 | or len(text)< 1 \ 58 | or filename is None: 59 | return False 60 | 61 | in_dir = os.path.join( 62 | self.root if self.root is not None else '.', 'ctakes_input') 63 | if not os.path.exists(in_dir): 64 | logging.error( 65 | "The input directory for cTAKES at %s does not exist" % in_dir) 66 | return False 67 | 68 | infile = os.path.join(in_dir, filename) 69 | if os.path.exists(infile): 70 | return False 71 | 72 | # write it 73 | with codecs.open(infile, 'w', 'utf-8') as handle: 74 | handle.write(list_to_sentences(text)) 75 | 76 | return True 77 | 78 | def _parse_output(self, filename, **kwargs): 79 | """ Parse cTAKES XML output. """ 80 | 81 | if filename is None: 82 | return None 83 | 84 | # is there cTAKES output? 85 | root = self.root if self.root is not None else '.' 86 | out_dir = os.path.join(root, 'ctakes_output') 87 | if not os.path.exists(out_dir): 88 | logging.error( 89 | "The output directory for cTAKES at %s does not exist" % out_dir) 90 | return None 91 | 92 | outfile = os.path.join(out_dir, "%s.xmi" % filename) 93 | if not os.path.exists(outfile): 94 | # do not log here and silently fail 95 | return None 96 | 97 | snomeds = [] 98 | cuis = [] 99 | rxnorms = [] 100 | 101 | # parse XMI file 102 | root = parse(outfile).documentElement 103 | 104 | # get all "textsem:EntityMention" which store negation information 105 | neg_ids = [] 106 | for node in root.getElementsByTagName('textsem:EntityMention'): 107 | polarity = node.attributes.get('polarity') 108 | if polarity is not None and int(polarity.value) < 0: 109 | ids = node.attributes.get('ontologyConceptArr') 110 | if ids is not None and ids.value: 111 | neg_ids.extend([int(i) for i in ids.value.split()]) 112 | 113 | # pluck apart nodes that carry codified data ("refsem" namespace) 114 | code_nodes = root.getElementsByTagNameNS( 115 | 'http:///org/apache/ctakes/typesystem/type/refsem.ecore', '*') 116 | if len(code_nodes) > 0: 117 | for node in code_nodes: 118 | #print node.toprettyxml() 119 | 120 | # check if this node is negated 121 | is_neg = False 122 | node_id_attr = node.attributes.get('xmi:id') 123 | if node_id_attr is not None: 124 | is_neg = int(node_id_attr.value) in neg_ids 125 | 126 | # extract SNOMED and RxNORM 127 | if 'codingScheme' in node.attributes.keys() \ 128 | and 'code' in node.attributes.keys(): 129 | code = node.attributes['code'].value 130 | if is_neg: 131 | code = "-%s" % code 132 | 133 | # extract SNOMED code 134 | if 'SNOMED' == node.attributes['codingScheme'].value: 135 | snomeds.append(code) 136 | 137 | # extract RXNORM code 138 | elif 'RXNORM' == node.attributes['codingScheme'].value: 139 | rxnorms.append(code) 140 | 141 | # extract UMLS CUI 142 | if 'cui' in node.attributes.keys(): 143 | code = node.attributes['cui'].value 144 | if is_neg: 145 | code = "-%s" % code 146 | cuis.append(code) 147 | 148 | # make lists unique 149 | snomeds = list(set(snomeds)) 150 | cuis = list(set(cuis)) 151 | rxnorms = list(set(rxnorms)) 152 | 153 | # clean up if instructed to do so 154 | if self.cleanup: 155 | os.remove(outfile) 156 | 157 | in_dir = os.path.join(root, 'ctakes_input') 158 | infile = os.path.join(in_dir, filename) 159 | if os.path.exists(infile): 160 | os.remove(infile) 161 | 162 | # create and return a dictionary (don't filter empty lists) 163 | ret = { 164 | 'snomed': snomeds, 165 | 'cui': cuis, 166 | 'rxnorm': rxnorms 167 | } 168 | 169 | return ret 170 | 171 | # we can execute this file to do some testing 172 | if '__main__' == __name__: 173 | 174 | ### Defines directory to run 175 | # print('(ctakes.py) creating directory_obj') 176 | directory_obj = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ctakes-test') 177 | # print('(ctakes.py) directory_obj = ', directory_obj) 178 | 179 | ### Starts cTAKES class defining root directory as run_dir & cleanup as True 180 | # print('(ctakes.py) Starting my_ctakes') 181 | my_ctakes = cTAKES({'root': directory_obj, 'cleanup': True}) 182 | 183 | my_ctakes.prepare() 184 | 185 | # create a test input file 186 | with open(os.path.join(my_ctakes.root, 'ctakes_input/test.txt'), 'w') as handle: 187 | handle.write("History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficientncy") 188 | 189 | # run 190 | print("\n\033[92m\u0F36 \033[0mStarting cTAKES Java Application...\n") 191 | try: 192 | my_ctakes.run() 193 | print("\n\033[92m\u263A \033[0mDONE!\n") 194 | except Exception as e: 195 | print("\033[91mFAILED:\033[0m {}\n".format(e)) 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /databases/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !*.sh 4 | 5 | -------------------------------------------------------------------------------- /databases/rxnorm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # create an RxNORM SQLite database (and a relations triple store). 4 | # 5 | 6 | # our SQLite database does not exist 7 | if [ ! -e rxnorm.db ]; then 8 | if [ ! -d "$1" ]; then 9 | echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html" 10 | exit 1 11 | fi 12 | if [ ! -d "$1/rrf" ]; then 13 | echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html" 14 | exit 1 15 | fi 16 | 17 | # init the database 18 | cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db 19 | 20 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles) 21 | if [ ! -e "$1/rrf/RXNREL.pipe" ]; then 22 | current=$(pwd) 23 | cd "$1/rrf" 24 | echo "-> Converting RRF files for SQLite" 25 | for f in *.RRF; do 26 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe" 27 | done 28 | cd $current 29 | fi 30 | 31 | # import tables 32 | for f in "$1/rrf/"*.pipe; do 33 | table=$(basename ${f%.pipe}) 34 | echo "-> Importing $table" 35 | sqlite3 rxnorm.db ".import '$f' '$table'" 36 | done 37 | 38 | # create an NDC table 39 | echo "-> Creating NDC table" 40 | # sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';" # we do it in 2 steps to create the primary index column 41 | sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);" 42 | sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';" 43 | sqlite3 rxnorm.db "CREATE INDEX X_RXCUI ON NDC (RXCUI);" 44 | sqlite3 rxnorm.db "CREATE INDEX X_NDC ON NDC (NDC);" 45 | 46 | # some SQLite gems 47 | ## export NDC to CSV 48 | # SELECT RXCUI, NDC FROM NDC INTO OUTFILE 'ndc.csv' FIELDS TERMINATED BY ',' LINES TERMINATED BY "\n"; 49 | ## export RxNorm-only names with their type (TTY) to CSV 50 | # SELECT RXCUI, TTY, STR FROM RXNCONSO WHERE SAB = 'RXNORM' INTO OUTFILE 'names.csv' FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY "\n"; 51 | fi 52 | 53 | # dump to N-Triples 54 | exit 0 55 | sqlite3 rxnorm.db < ." FROM RXNREL WHERE RELA != ''; 61 | SQLITE_COMMAND 62 | 63 | -------------------------------------------------------------------------------- /databases/umls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # create a UMLS SQLite database. 4 | # 5 | 6 | # our SQLite database does not exist 7 | if [ ! -e umls.db ]; then 8 | if [ ! -d "$1" ]; then 9 | echo "Provide the path to the UMLS install directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html (should check which file is needed)" 10 | exit 1 11 | fi 12 | if [ ! -d "$1/META" ]; then 13 | echo "There is no directory named META in the install directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html" 14 | exit 1 15 | fi 16 | 17 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles) 18 | if [ ! -e "$1/META/MRDEF.pipe" ]; then 19 | current=$(pwd) 20 | cd "$1/META" 21 | echo "-> Converting RRF files for SQLite" 22 | for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do 23 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe" 24 | done 25 | cd $current 26 | fi 27 | 28 | # init the database for MRDEF 29 | # table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/ 30 | sqlite3 umls.db "CREATE TABLE MRDEF ( 31 | CUI varchar, 32 | AUI varchar, 33 | ATUI varchar, 34 | SATUI varchar, 35 | SAB varchar, 36 | DEF text, 37 | SUPPRESS varchar, 38 | CVF varchar 39 | )" 40 | 41 | # init the database for MRCONSO 42 | sqlite3 umls.db "CREATE TABLE MRCONSO ( 43 | CUI varchar, 44 | LAT varchar, 45 | TS varchar, 46 | LUI varchar, 47 | STT varchar, 48 | SUI varchar, 49 | ISPREF varchar, 50 | AUI varchar, 51 | SAUI varchar, 52 | SCUI varchar, 53 | SDUI varchar, 54 | SAB varchar, 55 | TTY varchar, 56 | CODE varchar, 57 | STR text, 58 | SRL varchar, 59 | SUPPRESS varchar, 60 | CVF varchar 61 | )" 62 | 63 | # init the database for MRSTY 64 | sqlite3 umls.db "CREATE TABLE MRSTY ( 65 | CUI varchar, 66 | TUI varchar, 67 | STN varchar, 68 | STY text, 69 | ATUI varchar, 70 | CVF varchar 71 | )" 72 | 73 | # import tables 74 | for f in "$1/META/"*.pipe; do 75 | table=$(basename ${f%.pipe}) 76 | echo "-> Importing $table" 77 | sqlite3 umls.db ".import '$f' '$table'" 78 | done 79 | 80 | # create indexes 81 | echo "-> Creating indexes" 82 | sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);" 83 | sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);" 84 | sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);" 85 | sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);" 86 | sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);" 87 | sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);" 88 | sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);" 89 | 90 | # create faster lookup table 91 | echo "-> Creating fast lookup table" 92 | sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'" 93 | sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT" 94 | sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)" 95 | sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)" 96 | else 97 | echo "=> umls.db already exists" 98 | fi 99 | 100 | -------------------------------------------------------------------------------- /dateutil/LICENSE: -------------------------------------------------------------------------------- 1 | A. HISTORY OF THE SOFTWARE 2 | ========================== 3 | 4 | Python was created in the early 1990s by Guido van Rossum at Stichting 5 | Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands 6 | as a successor of a language called ABC. Guido remains Python's 7 | principal author, although it includes many contributions from others. 8 | 9 | In 1995, Guido continued his work on Python at the Corporation for 10 | National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) 11 | in Reston, Virginia where he released several versions of the 12 | software. 13 | 14 | In May 2000, Guido and the Python core development team moved to 15 | BeOpen.com to form the BeOpen PythonLabs team. In October of the same 16 | year, the PythonLabs team moved to Digital Creations (now Zope 17 | Corporation, see http://www.zope.com). In 2001, the Python Software 18 | Foundation (PSF, see http://www.python.org/psf/) was formed, a 19 | non-profit organization created specifically to own Python-related 20 | Intellectual Property. Zope Corporation is a sponsoring member of 21 | the PSF. 22 | 23 | All Python releases are Open Source (see http://www.opensource.org for 24 | the Open Source Definition). Historically, most, but not all, Python 25 | releases have also been GPL-compatible; the table below summarizes 26 | the various releases. 27 | 28 | Release Derived Year Owner GPL- 29 | from compatible? (1) 30 | 31 | 0.9.0 thru 1.2 1991-1995 CWI yes 32 | 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 33 | 1.6 1.5.2 2000 CNRI no 34 | 2.0 1.6 2000 BeOpen.com no 35 | 1.6.1 1.6 2001 CNRI yes (2) 36 | 2.1 2.0+1.6.1 2001 PSF no 37 | 2.0.1 2.0+1.6.1 2001 PSF yes 38 | 2.1.1 2.1+2.0.1 2001 PSF yes 39 | 2.2 2.1.1 2001 PSF yes 40 | 2.1.2 2.1.1 2002 PSF yes 41 | 2.1.3 2.1.2 2002 PSF yes 42 | 2.2.1 2.2 2002 PSF yes 43 | 2.2.2 2.2.1 2002 PSF yes 44 | 2.2.3 2.2.2 2003 PSF yes 45 | 2.3 2.2.2 2002-2003 PSF yes 46 | 47 | Footnotes: 48 | 49 | (1) GPL-compatible doesn't mean that we're distributing Python under 50 | the GPL. All Python licenses, unlike the GPL, let you distribute 51 | a modified version without making your changes open source. The 52 | GPL-compatible licenses make it possible to combine Python with 53 | other software that is released under the GPL; the others don't. 54 | 55 | (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, 56 | because its license has a choice of law clause. According to 57 | CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 58 | is "not incompatible" with the GPL. 59 | 60 | Thanks to the many outside volunteers who have worked under Guido's 61 | direction to make these releases possible. 62 | 63 | 64 | B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON 65 | =============================================================== 66 | 67 | PSF LICENSE AGREEMENT FOR PYTHON 2.3 68 | ------------------------------------ 69 | 70 | 1. This LICENSE AGREEMENT is between the Python Software Foundation 71 | ("PSF"), and the Individual or Organization ("Licensee") accessing and 72 | otherwise using Python 2.3 software in source or binary form and its 73 | associated documentation. 74 | 75 | 2. Subject to the terms and conditions of this License Agreement, PSF 76 | hereby grants Licensee a nonexclusive, royalty-free, world-wide 77 | license to reproduce, analyze, test, perform and/or display publicly, 78 | prepare derivative works, distribute, and otherwise use Python 2.3 79 | alone or in any derivative version, provided, however, that PSF's 80 | License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 81 | 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are 82 | retained in Python 2.3 alone or in any derivative version prepared by 83 | Licensee. 84 | 85 | 3. In the event Licensee prepares a derivative work that is based on 86 | or incorporates Python 2.3 or any part thereof, and wants to make 87 | the derivative work available to others as provided herein, then 88 | Licensee hereby agrees to include in any such work a brief summary of 89 | the changes made to Python 2.3. 90 | 91 | 4. PSF is making Python 2.3 available to Licensee on an "AS IS" 92 | basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 93 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 94 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 95 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT 96 | INFRINGE ANY THIRD PARTY RIGHTS. 97 | 98 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 99 | 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 100 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, 101 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 102 | 103 | 6. This License Agreement will automatically terminate upon a material 104 | breach of its terms and conditions. 105 | 106 | 7. Nothing in this License Agreement shall be deemed to create any 107 | relationship of agency, partnership, or joint venture between PSF and 108 | Licensee. This License Agreement does not grant permission to use PSF 109 | trademarks or trade name in a trademark sense to endorse or promote 110 | products or services of Licensee, or any third party. 111 | 112 | 8. By copying, installing or otherwise using Python 2.3, Licensee 113 | agrees to be bound by the terms and conditions of this License 114 | Agreement. 115 | 116 | 117 | BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 118 | ------------------------------------------- 119 | 120 | BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 121 | 122 | 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an 123 | office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the 124 | Individual or Organization ("Licensee") accessing and otherwise using 125 | this software in source or binary form and its associated 126 | documentation ("the Software"). 127 | 128 | 2. Subject to the terms and conditions of this BeOpen Python License 129 | Agreement, BeOpen hereby grants Licensee a non-exclusive, 130 | royalty-free, world-wide license to reproduce, analyze, test, perform 131 | and/or display publicly, prepare derivative works, distribute, and 132 | otherwise use the Software alone or in any derivative version, 133 | provided, however, that the BeOpen Python License is retained in the 134 | Software, alone or in any derivative version prepared by Licensee. 135 | 136 | 3. BeOpen is making the Software available to Licensee on an "AS IS" 137 | basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 138 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND 139 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 140 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT 141 | INFRINGE ANY THIRD PARTY RIGHTS. 142 | 143 | 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 144 | SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS 145 | AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY 146 | DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 147 | 148 | 5. This License Agreement will automatically terminate upon a material 149 | breach of its terms and conditions. 150 | 151 | 6. This License Agreement shall be governed by and interpreted in all 152 | respects by the law of the State of California, excluding conflict of 153 | law provisions. Nothing in this License Agreement shall be deemed to 154 | create any relationship of agency, partnership, or joint venture 155 | between BeOpen and Licensee. This License Agreement does not grant 156 | permission to use BeOpen trademarks or trade names in a trademark 157 | sense to endorse or promote products or services of Licensee, or any 158 | third party. As an exception, the "BeOpen Python" logos available at 159 | http://www.pythonlabs.com/logos.html may be used according to the 160 | permissions granted on that web page. 161 | 162 | 7. By copying, installing or otherwise using the software, Licensee 163 | agrees to be bound by the terms and conditions of this License 164 | Agreement. 165 | 166 | 167 | CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 168 | --------------------------------------- 169 | 170 | 1. This LICENSE AGREEMENT is between the Corporation for National 171 | Research Initiatives, having an office at 1895 Preston White Drive, 172 | Reston, VA 20191 ("CNRI"), and the Individual or Organization 173 | ("Licensee") accessing and otherwise using Python 1.6.1 software in 174 | source or binary form and its associated documentation. 175 | 176 | 2. Subject to the terms and conditions of this License Agreement, CNRI 177 | hereby grants Licensee a nonexclusive, royalty-free, world-wide 178 | license to reproduce, analyze, test, perform and/or display publicly, 179 | prepare derivative works, distribute, and otherwise use Python 1.6.1 180 | alone or in any derivative version, provided, however, that CNRI's 181 | License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 182 | 1995-2001 Corporation for National Research Initiatives; All Rights 183 | Reserved" are retained in Python 1.6.1 alone or in any derivative 184 | version prepared by Licensee. Alternately, in lieu of CNRI's License 185 | Agreement, Licensee may substitute the following text (omitting the 186 | quotes): "Python 1.6.1 is made available subject to the terms and 187 | conditions in CNRI's License Agreement. This Agreement together with 188 | Python 1.6.1 may be located on the Internet using the following 189 | unique, persistent identifier (known as a handle): 1895.22/1013. This 190 | Agreement may also be obtained from a proxy server on the Internet 191 | using the following URL: http://hdl.handle.net/1895.22/1013". 192 | 193 | 3. In the event Licensee prepares a derivative work that is based on 194 | or incorporates Python 1.6.1 or any part thereof, and wants to make 195 | the derivative work available to others as provided herein, then 196 | Licensee hereby agrees to include in any such work a brief summary of 197 | the changes made to Python 1.6.1. 198 | 199 | 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" 200 | basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 201 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND 202 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 203 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT 204 | INFRINGE ANY THIRD PARTY RIGHTS. 205 | 206 | 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 207 | 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 208 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, 209 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 210 | 211 | 6. This License Agreement will automatically terminate upon a material 212 | breach of its terms and conditions. 213 | 214 | 7. This License Agreement shall be governed by the federal 215 | intellectual property law of the United States, including without 216 | limitation the federal copyright law, and, to the extent such 217 | U.S. federal law does not apply, by the law of the Commonwealth of 218 | Virginia, excluding Virginia's conflict of law provisions. 219 | Notwithstanding the foregoing, with regard to derivative works based 220 | on Python 1.6.1 that incorporate non-separable material that was 221 | previously distributed under the GNU General Public License (GPL), the 222 | law of the Commonwealth of Virginia shall govern this License 223 | Agreement only as to issues arising under or with respect to 224 | Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this 225 | License Agreement shall be deemed to create any relationship of 226 | agency, partnership, or joint venture between CNRI and Licensee. This 227 | License Agreement does not grant permission to use CNRI trademarks or 228 | trade name in a trademark sense to endorse or promote products or 229 | services of Licensee, or any third party. 230 | 231 | 8. By clicking on the "ACCEPT" button where indicated, or by copying, 232 | installing or otherwise using Python 1.6.1, Licensee agrees to be 233 | bound by the terms and conditions of this License Agreement. 234 | 235 | ACCEPT 236 | 237 | 238 | CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 239 | -------------------------------------------------- 240 | 241 | Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, 242 | The Netherlands. All rights reserved. 243 | 244 | Permission to use, copy, modify, and distribute this software and its 245 | documentation for any purpose and without fee is hereby granted, 246 | provided that the above copyright notice appear in all copies and that 247 | both that copyright notice and this permission notice appear in 248 | supporting documentation, and that the name of Stichting Mathematisch 249 | Centrum or CWI not be used in advertising or publicity pertaining to 250 | distribution of the software without specific, written prior 251 | permission. 252 | 253 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO 254 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 255 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE 256 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 257 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 258 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 259 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 260 | -------------------------------------------------------------------------------- /dateutil/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2010 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | __version__ = "1.5" 10 | -------------------------------------------------------------------------------- /dateutil/easter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2007 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import datetime 11 | 12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"] 13 | 14 | EASTER_JULIAN = 1 15 | EASTER_ORTHODOX = 2 16 | EASTER_WESTERN = 3 17 | 18 | def easter(year, method=EASTER_WESTERN): 19 | """ 20 | This method was ported from the work done by GM Arts, 21 | on top of the algorithm by Claus Tondering, which was 22 | based in part on the algorithm of Ouding (1940), as 23 | quoted in "Explanatory Supplement to the Astronomical 24 | Almanac", P. Kenneth Seidelmann, editor. 25 | 26 | This algorithm implements three different easter 27 | calculation methods: 28 | 29 | 1 - Original calculation in Julian calendar, valid in 30 | dates after 326 AD 31 | 2 - Original method, with date converted to Gregorian 32 | calendar, valid in years 1583 to 4099 33 | 3 - Revised method, in Gregorian calendar, valid in 34 | years 1583 to 4099 as well 35 | 36 | These methods are represented by the constants: 37 | 38 | EASTER_JULIAN = 1 39 | EASTER_ORTHODOX = 2 40 | EASTER_WESTERN = 3 41 | 42 | The default method is method 3. 43 | 44 | More about the algorithm may be found at: 45 | 46 | http://users.chariot.net.au/~gmarts/eastalg.htm 47 | 48 | and 49 | 50 | http://www.tondering.dk/claus/calendar.html 51 | 52 | """ 53 | 54 | if not (1 <= method <= 3): 55 | raise ValueError, "invalid method" 56 | 57 | # g - Golden year - 1 58 | # c - Century 59 | # h - (23 - Epact) mod 30 60 | # i - Number of days from March 21 to Paschal Full Moon 61 | # j - Weekday for PFM (0=Sunday, etc) 62 | # p - Number of days from March 21 to Sunday on or before PFM 63 | # (-6 to 28 methods 1 & 3, to 56 for method 2) 64 | # e - Extra days to add for method 2 (converting Julian 65 | # date to Gregorian date) 66 | 67 | y = year 68 | g = y % 19 69 | e = 0 70 | if method < 3: 71 | # Old method 72 | i = (19*g+15)%30 73 | j = (y+y//4+i)%7 74 | if method == 2: 75 | # Extra dates to convert Julian to Gregorian date 76 | e = 10 77 | if y > 1600: 78 | e = e+y//100-16-(y//100-16)//4 79 | else: 80 | # New method 81 | c = y//100 82 | h = (c-c//4-(8*c+13)//25+19*g+15)%30 83 | i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11)) 84 | j = (y+y//4+i+2-c+c//4)%7 85 | 86 | # p can be from -6 to 56 corresponding to dates 22 March to 23 May 87 | # (later dates apply to method 2, although 23 May never actually occurs) 88 | p = i-j+e 89 | d = 1+(p+27+(p+6)//40)%31 90 | m = 3+(p+26)//30 91 | return datetime.date(int(y),int(m),int(d)) 92 | 93 | -------------------------------------------------------------------------------- /dateutil/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/dateutil/parser.py -------------------------------------------------------------------------------- /dateutil/relativedelta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2010 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import datetime 11 | import calendar 12 | 13 | __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"] 14 | 15 | class weekday(object): 16 | __slots__ = ["weekday", "n"] 17 | 18 | def __init__(self, weekday, n=None): 19 | self.weekday = weekday 20 | self.n = n 21 | 22 | def __call__(self, n): 23 | if n == self.n: 24 | return self 25 | else: 26 | return self.__class__(self.weekday, n) 27 | 28 | def __eq__(self, other): 29 | try: 30 | if self.weekday != other.weekday or self.n != other.n: 31 | return False 32 | except AttributeError: 33 | return False 34 | return True 35 | 36 | def __repr__(self): 37 | s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday] 38 | if not self.n: 39 | return s 40 | else: 41 | return "%s(%+d)" % (s, self.n) 42 | 43 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)]) 44 | 45 | class relativedelta: 46 | """ 47 | The relativedelta type is based on the specification of the excelent 48 | work done by M.-A. Lemburg in his mx.DateTime extension. However, 49 | notice that this type does *NOT* implement the same algorithm as 50 | his work. Do *NOT* expect it to behave like mx.DateTime's counterpart. 51 | 52 | There's two different ways to build a relativedelta instance. The 53 | first one is passing it two date/datetime classes: 54 | 55 | relativedelta(datetime1, datetime2) 56 | 57 | And the other way is to use the following keyword arguments: 58 | 59 | year, month, day, hour, minute, second, microsecond: 60 | Absolute information. 61 | 62 | years, months, weeks, days, hours, minutes, seconds, microseconds: 63 | Relative information, may be negative. 64 | 65 | weekday: 66 | One of the weekday instances (MO, TU, etc). These instances may 67 | receive a parameter N, specifying the Nth weekday, which could 68 | be positive or negative (like MO(+1) or MO(-2). Not specifying 69 | it is the same as specifying +1. You can also use an integer, 70 | where 0=MO. 71 | 72 | leapdays: 73 | Will add given days to the date found, if year is a leap 74 | year, and the date found is post 28 of february. 75 | 76 | yearday, nlyearday: 77 | Set the yearday or the non-leap year day (jump leap days). 78 | These are converted to day/month/leapdays information. 79 | 80 | Here is the behavior of operations with relativedelta: 81 | 82 | 1) Calculate the absolute year, using the 'year' argument, or the 83 | original datetime year, if the argument is not present. 84 | 85 | 2) Add the relative 'years' argument to the absolute year. 86 | 87 | 3) Do steps 1 and 2 for month/months. 88 | 89 | 4) Calculate the absolute day, using the 'day' argument, or the 90 | original datetime day, if the argument is not present. Then, 91 | subtract from the day until it fits in the year and month 92 | found after their operations. 93 | 94 | 5) Add the relative 'days' argument to the absolute day. Notice 95 | that the 'weeks' argument is multiplied by 7 and added to 96 | 'days'. 97 | 98 | 6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds, 99 | microsecond/microseconds. 100 | 101 | 7) If the 'weekday' argument is present, calculate the weekday, 102 | with the given (wday, nth) tuple. wday is the index of the 103 | weekday (0-6, 0=Mon), and nth is the number of weeks to add 104 | forward or backward, depending on its signal. Notice that if 105 | the calculated date is already Monday, for example, using 106 | (0, 1) or (0, -1) won't change the day. 107 | """ 108 | 109 | def __init__(self, dt1=None, dt2=None, 110 | years=0, months=0, days=0, leapdays=0, weeks=0, 111 | hours=0, minutes=0, seconds=0, microseconds=0, 112 | year=None, month=None, day=None, weekday=None, 113 | yearday=None, nlyearday=None, 114 | hour=None, minute=None, second=None, microsecond=None): 115 | if dt1 and dt2: 116 | if not isinstance(dt1, datetime.date) or \ 117 | not isinstance(dt2, datetime.date): 118 | raise TypeError, "relativedelta only diffs datetime/date" 119 | if type(dt1) is not type(dt2): 120 | if not isinstance(dt1, datetime.datetime): 121 | dt1 = datetime.datetime.fromordinal(dt1.toordinal()) 122 | elif not isinstance(dt2, datetime.datetime): 123 | dt2 = datetime.datetime.fromordinal(dt2.toordinal()) 124 | self.years = 0 125 | self.months = 0 126 | self.days = 0 127 | self.leapdays = 0 128 | self.hours = 0 129 | self.minutes = 0 130 | self.seconds = 0 131 | self.microseconds = 0 132 | self.year = None 133 | self.month = None 134 | self.day = None 135 | self.weekday = None 136 | self.hour = None 137 | self.minute = None 138 | self.second = None 139 | self.microsecond = None 140 | self._has_time = 0 141 | 142 | months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month) 143 | self._set_months(months) 144 | dtm = self.__radd__(dt2) 145 | if dt1 < dt2: 146 | while dt1 > dtm: 147 | months += 1 148 | self._set_months(months) 149 | dtm = self.__radd__(dt2) 150 | else: 151 | while dt1 < dtm: 152 | months -= 1 153 | self._set_months(months) 154 | dtm = self.__radd__(dt2) 155 | delta = dt1 - dtm 156 | self.seconds = delta.seconds+delta.days*86400 157 | self.microseconds = delta.microseconds 158 | else: 159 | self.years = years 160 | self.months = months 161 | self.days = days+weeks*7 162 | self.leapdays = leapdays 163 | self.hours = hours 164 | self.minutes = minutes 165 | self.seconds = seconds 166 | self.microseconds = microseconds 167 | self.year = year 168 | self.month = month 169 | self.day = day 170 | self.hour = hour 171 | self.minute = minute 172 | self.second = second 173 | self.microsecond = microsecond 174 | 175 | if type(weekday) is int: 176 | self.weekday = weekdays[weekday] 177 | else: 178 | self.weekday = weekday 179 | 180 | yday = 0 181 | if nlyearday: 182 | yday = nlyearday 183 | elif yearday: 184 | yday = yearday 185 | if yearday > 59: 186 | self.leapdays = -1 187 | if yday: 188 | ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366] 189 | for idx, ydays in enumerate(ydayidx): 190 | if yday <= ydays: 191 | self.month = idx+1 192 | if idx == 0: 193 | self.day = yday 194 | else: 195 | self.day = yday-ydayidx[idx-1] 196 | break 197 | else: 198 | raise ValueError, "invalid year day (%d)" % yday 199 | 200 | self._fix() 201 | 202 | def _fix(self): 203 | if abs(self.microseconds) > 999999: 204 | s = self.microseconds//abs(self.microseconds) 205 | div, mod = divmod(self.microseconds*s, 1000000) 206 | self.microseconds = mod*s 207 | self.seconds += div*s 208 | if abs(self.seconds) > 59: 209 | s = self.seconds//abs(self.seconds) 210 | div, mod = divmod(self.seconds*s, 60) 211 | self.seconds = mod*s 212 | self.minutes += div*s 213 | if abs(self.minutes) > 59: 214 | s = self.minutes//abs(self.minutes) 215 | div, mod = divmod(self.minutes*s, 60) 216 | self.minutes = mod*s 217 | self.hours += div*s 218 | if abs(self.hours) > 23: 219 | s = self.hours//abs(self.hours) 220 | div, mod = divmod(self.hours*s, 24) 221 | self.hours = mod*s 222 | self.days += div*s 223 | if abs(self.months) > 11: 224 | s = self.months//abs(self.months) 225 | div, mod = divmod(self.months*s, 12) 226 | self.months = mod*s 227 | self.years += div*s 228 | if (self.hours or self.minutes or self.seconds or self.microseconds or 229 | self.hour is not None or self.minute is not None or 230 | self.second is not None or self.microsecond is not None): 231 | self._has_time = 1 232 | else: 233 | self._has_time = 0 234 | 235 | def _set_months(self, months): 236 | self.months = months 237 | if abs(self.months) > 11: 238 | s = self.months//abs(self.months) 239 | div, mod = divmod(self.months*s, 12) 240 | self.months = mod*s 241 | self.years = div*s 242 | else: 243 | self.years = 0 244 | 245 | def __radd__(self, other): 246 | if not isinstance(other, datetime.date): 247 | raise TypeError, "unsupported type for add operation" 248 | elif self._has_time and not isinstance(other, datetime.datetime): 249 | other = datetime.datetime.fromordinal(other.toordinal()) 250 | year = (self.year or other.year)+self.years 251 | month = self.month or other.month 252 | if self.months: 253 | assert 1 <= abs(self.months) <= 12 254 | month += self.months 255 | if month > 12: 256 | year += 1 257 | month -= 12 258 | elif month < 1: 259 | year -= 1 260 | month += 12 261 | day = min(calendar.monthrange(year, month)[1], 262 | self.day or other.day) 263 | repl = {"year": year, "month": month, "day": day} 264 | for attr in ["hour", "minute", "second", "microsecond"]: 265 | value = getattr(self, attr) 266 | if value is not None: 267 | repl[attr] = value 268 | days = self.days 269 | if self.leapdays and month > 2 and calendar.isleap(year): 270 | days += self.leapdays 271 | ret = (other.replace(**repl) 272 | + datetime.timedelta(days=days, 273 | hours=self.hours, 274 | minutes=self.minutes, 275 | seconds=self.seconds, 276 | microseconds=self.microseconds)) 277 | if self.weekday: 278 | weekday, nth = self.weekday.weekday, self.weekday.n or 1 279 | jumpdays = (abs(nth)-1)*7 280 | if nth > 0: 281 | jumpdays += (7-ret.weekday()+weekday)%7 282 | else: 283 | jumpdays += (ret.weekday()-weekday)%7 284 | jumpdays *= -1 285 | ret += datetime.timedelta(days=jumpdays) 286 | return ret 287 | 288 | def __rsub__(self, other): 289 | return self.__neg__().__radd__(other) 290 | 291 | def __add__(self, other): 292 | if not isinstance(other, relativedelta): 293 | raise TypeError, "unsupported type for add operation" 294 | return relativedelta(years=other.years+self.years, 295 | months=other.months+self.months, 296 | days=other.days+self.days, 297 | hours=other.hours+self.hours, 298 | minutes=other.minutes+self.minutes, 299 | seconds=other.seconds+self.seconds, 300 | microseconds=other.microseconds+self.microseconds, 301 | leapdays=other.leapdays or self.leapdays, 302 | year=other.year or self.year, 303 | month=other.month or self.month, 304 | day=other.day or self.day, 305 | weekday=other.weekday or self.weekday, 306 | hour=other.hour or self.hour, 307 | minute=other.minute or self.minute, 308 | second=other.second or self.second, 309 | microsecond=other.second or self.microsecond) 310 | 311 | def __sub__(self, other): 312 | if not isinstance(other, relativedelta): 313 | raise TypeError, "unsupported type for sub operation" 314 | return relativedelta(years=other.years-self.years, 315 | months=other.months-self.months, 316 | days=other.days-self.days, 317 | hours=other.hours-self.hours, 318 | minutes=other.minutes-self.minutes, 319 | seconds=other.seconds-self.seconds, 320 | microseconds=other.microseconds-self.microseconds, 321 | leapdays=other.leapdays or self.leapdays, 322 | year=other.year or self.year, 323 | month=other.month or self.month, 324 | day=other.day or self.day, 325 | weekday=other.weekday or self.weekday, 326 | hour=other.hour or self.hour, 327 | minute=other.minute or self.minute, 328 | second=other.second or self.second, 329 | microsecond=other.second or self.microsecond) 330 | 331 | def __neg__(self): 332 | return relativedelta(years=-self.years, 333 | months=-self.months, 334 | days=-self.days, 335 | hours=-self.hours, 336 | minutes=-self.minutes, 337 | seconds=-self.seconds, 338 | microseconds=-self.microseconds, 339 | leapdays=self.leapdays, 340 | year=self.year, 341 | month=self.month, 342 | day=self.day, 343 | weekday=self.weekday, 344 | hour=self.hour, 345 | minute=self.minute, 346 | second=self.second, 347 | microsecond=self.microsecond) 348 | 349 | def __nonzero__(self): 350 | return not (not self.years and 351 | not self.months and 352 | not self.days and 353 | not self.hours and 354 | not self.minutes and 355 | not self.seconds and 356 | not self.microseconds and 357 | not self.leapdays and 358 | self.year is None and 359 | self.month is None and 360 | self.day is None and 361 | self.weekday is None and 362 | self.hour is None and 363 | self.minute is None and 364 | self.second is None and 365 | self.microsecond is None) 366 | 367 | def __mul__(self, other): 368 | f = float(other) 369 | return relativedelta(years=self.years*f, 370 | months=self.months*f, 371 | days=self.days*f, 372 | hours=self.hours*f, 373 | minutes=self.minutes*f, 374 | seconds=self.seconds*f, 375 | microseconds=self.microseconds*f, 376 | leapdays=self.leapdays, 377 | year=self.year, 378 | month=self.month, 379 | day=self.day, 380 | weekday=self.weekday, 381 | hour=self.hour, 382 | minute=self.minute, 383 | second=self.second, 384 | microsecond=self.microsecond) 385 | 386 | def __eq__(self, other): 387 | if not isinstance(other, relativedelta): 388 | return False 389 | if self.weekday or other.weekday: 390 | if not self.weekday or not other.weekday: 391 | return False 392 | if self.weekday.weekday != other.weekday.weekday: 393 | return False 394 | n1, n2 = self.weekday.n, other.weekday.n 395 | if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)): 396 | return False 397 | return (self.years == other.years and 398 | self.months == other.months and 399 | self.days == other.days and 400 | self.hours == other.hours and 401 | self.minutes == other.minutes and 402 | self.seconds == other.seconds and 403 | self.leapdays == other.leapdays and 404 | self.year == other.year and 405 | self.month == other.month and 406 | self.day == other.day and 407 | self.hour == other.hour and 408 | self.minute == other.minute and 409 | self.second == other.second and 410 | self.microsecond == other.microsecond) 411 | 412 | def __ne__(self, other): 413 | return not self.__eq__(other) 414 | 415 | def __div__(self, other): 416 | return self.__mul__(1/float(other)) 417 | 418 | def __repr__(self): 419 | l = [] 420 | for attr in ["years", "months", "days", "leapdays", 421 | "hours", "minutes", "seconds", "microseconds"]: 422 | value = getattr(self, attr) 423 | if value: 424 | l.append("%s=%+d" % (attr, value)) 425 | for attr in ["year", "month", "day", "weekday", 426 | "hour", "minute", "second", "microsecond"]: 427 | value = getattr(self, attr) 428 | if value is not None: 429 | l.append("%s=%s" % (attr, `value`)) 430 | return "%s(%s)" % (self.__class__.__name__, ", ".join(l)) 431 | 432 | # vim:ts=4:sw=4:et 433 | -------------------------------------------------------------------------------- /dateutil/rrule.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2010 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import itertools 11 | import datetime 12 | import calendar 13 | import thread 14 | import sys 15 | 16 | __all__ = ["rrule", "rruleset", "rrulestr", 17 | "YEARLY", "MONTHLY", "WEEKLY", "DAILY", 18 | "HOURLY", "MINUTELY", "SECONDLY", 19 | "MO", "TU", "WE", "TH", "FR", "SA", "SU"] 20 | 21 | # Every mask is 7 days longer to handle cross-year weekly periods. 22 | M366MASK = tuple([1]*31+[2]*29+[3]*31+[4]*30+[5]*31+[6]*30+ 23 | [7]*31+[8]*31+[9]*30+[10]*31+[11]*30+[12]*31+[1]*7) 24 | M365MASK = list(M366MASK) 25 | M29, M30, M31 = range(1,30), range(1,31), range(1,32) 26 | MDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7]) 27 | MDAY365MASK = list(MDAY366MASK) 28 | M29, M30, M31 = range(-29,0), range(-30,0), range(-31,0) 29 | NMDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7]) 30 | NMDAY365MASK = list(NMDAY366MASK) 31 | M366RANGE = (0,31,60,91,121,152,182,213,244,274,305,335,366) 32 | M365RANGE = (0,31,59,90,120,151,181,212,243,273,304,334,365) 33 | WDAYMASK = [0,1,2,3,4,5,6]*55 34 | del M29, M30, M31, M365MASK[59], MDAY365MASK[59], NMDAY365MASK[31] 35 | MDAY365MASK = tuple(MDAY365MASK) 36 | M365MASK = tuple(M365MASK) 37 | 38 | (YEARLY, 39 | MONTHLY, 40 | WEEKLY, 41 | DAILY, 42 | HOURLY, 43 | MINUTELY, 44 | SECONDLY) = range(7) 45 | 46 | # Imported on demand. 47 | easter = None 48 | parser = None 49 | 50 | class weekday(object): 51 | __slots__ = ["weekday", "n"] 52 | 53 | def __init__(self, weekday, n=None): 54 | if n == 0: 55 | raise ValueError, "Can't create weekday with n == 0" 56 | self.weekday = weekday 57 | self.n = n 58 | 59 | def __call__(self, n): 60 | if n == self.n: 61 | return self 62 | else: 63 | return self.__class__(self.weekday, n) 64 | 65 | def __eq__(self, other): 66 | try: 67 | if self.weekday != other.weekday or self.n != other.n: 68 | return False 69 | except AttributeError: 70 | return False 71 | return True 72 | 73 | def __repr__(self): 74 | s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday] 75 | if not self.n: 76 | return s 77 | else: 78 | return "%s(%+d)" % (s, self.n) 79 | 80 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)]) 81 | 82 | class rrulebase: 83 | def __init__(self, cache=False): 84 | if cache: 85 | self._cache = [] 86 | self._cache_lock = thread.allocate_lock() 87 | self._cache_gen = self._iter() 88 | self._cache_complete = False 89 | else: 90 | self._cache = None 91 | self._cache_complete = False 92 | self._len = None 93 | 94 | def __iter__(self): 95 | if self._cache_complete: 96 | return iter(self._cache) 97 | elif self._cache is None: 98 | return self._iter() 99 | else: 100 | return self._iter_cached() 101 | 102 | def _iter_cached(self): 103 | i = 0 104 | gen = self._cache_gen 105 | cache = self._cache 106 | acquire = self._cache_lock.acquire 107 | release = self._cache_lock.release 108 | while gen: 109 | if i == len(cache): 110 | acquire() 111 | if self._cache_complete: 112 | break 113 | try: 114 | for j in range(10): 115 | cache.append(gen.next()) 116 | except StopIteration: 117 | self._cache_gen = gen = None 118 | self._cache_complete = True 119 | break 120 | release() 121 | yield cache[i] 122 | i += 1 123 | while i < self._len: 124 | yield cache[i] 125 | i += 1 126 | 127 | def __getitem__(self, item): 128 | if self._cache_complete: 129 | return self._cache[item] 130 | elif isinstance(item, slice): 131 | if item.step and item.step < 0: 132 | return list(iter(self))[item] 133 | else: 134 | return list(itertools.islice(self, 135 | item.start or 0, 136 | item.stop or sys.maxint, 137 | item.step or 1)) 138 | elif item >= 0: 139 | gen = iter(self) 140 | try: 141 | for i in range(item+1): 142 | res = gen.next() 143 | except StopIteration: 144 | raise IndexError 145 | return res 146 | else: 147 | return list(iter(self))[item] 148 | 149 | def __contains__(self, item): 150 | if self._cache_complete: 151 | return item in self._cache 152 | else: 153 | for i in self: 154 | if i == item: 155 | return True 156 | elif i > item: 157 | return False 158 | return False 159 | 160 | # __len__() introduces a large performance penality. 161 | def count(self): 162 | if self._len is None: 163 | for x in self: pass 164 | return self._len 165 | 166 | def before(self, dt, inc=False): 167 | if self._cache_complete: 168 | gen = self._cache 169 | else: 170 | gen = self 171 | last = None 172 | if inc: 173 | for i in gen: 174 | if i > dt: 175 | break 176 | last = i 177 | else: 178 | for i in gen: 179 | if i >= dt: 180 | break 181 | last = i 182 | return last 183 | 184 | def after(self, dt, inc=False): 185 | if self._cache_complete: 186 | gen = self._cache 187 | else: 188 | gen = self 189 | if inc: 190 | for i in gen: 191 | if i >= dt: 192 | return i 193 | else: 194 | for i in gen: 195 | if i > dt: 196 | return i 197 | return None 198 | 199 | def between(self, after, before, inc=False): 200 | if self._cache_complete: 201 | gen = self._cache 202 | else: 203 | gen = self 204 | started = False 205 | l = [] 206 | if inc: 207 | for i in gen: 208 | if i > before: 209 | break 210 | elif not started: 211 | if i >= after: 212 | started = True 213 | l.append(i) 214 | else: 215 | l.append(i) 216 | else: 217 | for i in gen: 218 | if i >= before: 219 | break 220 | elif not started: 221 | if i > after: 222 | started = True 223 | l.append(i) 224 | else: 225 | l.append(i) 226 | return l 227 | 228 | class rrule(rrulebase): 229 | def __init__(self, freq, dtstart=None, 230 | interval=1, wkst=None, count=None, until=None, bysetpos=None, 231 | bymonth=None, bymonthday=None, byyearday=None, byeaster=None, 232 | byweekno=None, byweekday=None, 233 | byhour=None, byminute=None, bysecond=None, 234 | cache=False): 235 | rrulebase.__init__(self, cache) 236 | global easter 237 | if not dtstart: 238 | dtstart = datetime.datetime.now().replace(microsecond=0) 239 | elif not isinstance(dtstart, datetime.datetime): 240 | dtstart = datetime.datetime.fromordinal(dtstart.toordinal()) 241 | else: 242 | dtstart = dtstart.replace(microsecond=0) 243 | self._dtstart = dtstart 244 | self._tzinfo = dtstart.tzinfo 245 | self._freq = freq 246 | self._interval = interval 247 | self._count = count 248 | if until and not isinstance(until, datetime.datetime): 249 | until = datetime.datetime.fromordinal(until.toordinal()) 250 | self._until = until 251 | if wkst is None: 252 | self._wkst = calendar.firstweekday() 253 | elif type(wkst) is int: 254 | self._wkst = wkst 255 | else: 256 | self._wkst = wkst.weekday 257 | if bysetpos is None: 258 | self._bysetpos = None 259 | elif type(bysetpos) is int: 260 | if bysetpos == 0 or not (-366 <= bysetpos <= 366): 261 | raise ValueError("bysetpos must be between 1 and 366, " 262 | "or between -366 and -1") 263 | self._bysetpos = (bysetpos,) 264 | else: 265 | self._bysetpos = tuple(bysetpos) 266 | for pos in self._bysetpos: 267 | if pos == 0 or not (-366 <= pos <= 366): 268 | raise ValueError("bysetpos must be between 1 and 366, " 269 | "or between -366 and -1") 270 | if not (byweekno or byyearday or bymonthday or 271 | byweekday is not None or byeaster is not None): 272 | if freq == YEARLY: 273 | if not bymonth: 274 | bymonth = dtstart.month 275 | bymonthday = dtstart.day 276 | elif freq == MONTHLY: 277 | bymonthday = dtstart.day 278 | elif freq == WEEKLY: 279 | byweekday = dtstart.weekday() 280 | # bymonth 281 | if not bymonth: 282 | self._bymonth = None 283 | elif type(bymonth) is int: 284 | self._bymonth = (bymonth,) 285 | else: 286 | self._bymonth = tuple(bymonth) 287 | # byyearday 288 | if not byyearday: 289 | self._byyearday = None 290 | elif type(byyearday) is int: 291 | self._byyearday = (byyearday,) 292 | else: 293 | self._byyearday = tuple(byyearday) 294 | # byeaster 295 | if byeaster is not None: 296 | if not easter: 297 | from dateutil import easter 298 | if type(byeaster) is int: 299 | self._byeaster = (byeaster,) 300 | else: 301 | self._byeaster = tuple(byeaster) 302 | else: 303 | self._byeaster = None 304 | # bymonthay 305 | if not bymonthday: 306 | self._bymonthday = () 307 | self._bynmonthday = () 308 | elif type(bymonthday) is int: 309 | if bymonthday < 0: 310 | self._bynmonthday = (bymonthday,) 311 | self._bymonthday = () 312 | else: 313 | self._bymonthday = (bymonthday,) 314 | self._bynmonthday = () 315 | else: 316 | self._bymonthday = tuple([x for x in bymonthday if x > 0]) 317 | self._bynmonthday = tuple([x for x in bymonthday if x < 0]) 318 | # byweekno 319 | if byweekno is None: 320 | self._byweekno = None 321 | elif type(byweekno) is int: 322 | self._byweekno = (byweekno,) 323 | else: 324 | self._byweekno = tuple(byweekno) 325 | # byweekday / bynweekday 326 | if byweekday is None: 327 | self._byweekday = None 328 | self._bynweekday = None 329 | elif type(byweekday) is int: 330 | self._byweekday = (byweekday,) 331 | self._bynweekday = None 332 | elif hasattr(byweekday, "n"): 333 | if not byweekday.n or freq > MONTHLY: 334 | self._byweekday = (byweekday.weekday,) 335 | self._bynweekday = None 336 | else: 337 | self._bynweekday = ((byweekday.weekday, byweekday.n),) 338 | self._byweekday = None 339 | else: 340 | self._byweekday = [] 341 | self._bynweekday = [] 342 | for wday in byweekday: 343 | if type(wday) is int: 344 | self._byweekday.append(wday) 345 | elif not wday.n or freq > MONTHLY: 346 | self._byweekday.append(wday.weekday) 347 | else: 348 | self._bynweekday.append((wday.weekday, wday.n)) 349 | self._byweekday = tuple(self._byweekday) 350 | self._bynweekday = tuple(self._bynweekday) 351 | if not self._byweekday: 352 | self._byweekday = None 353 | elif not self._bynweekday: 354 | self._bynweekday = None 355 | # byhour 356 | if byhour is None: 357 | if freq < HOURLY: 358 | self._byhour = (dtstart.hour,) 359 | else: 360 | self._byhour = None 361 | elif type(byhour) is int: 362 | self._byhour = (byhour,) 363 | else: 364 | self._byhour = tuple(byhour) 365 | # byminute 366 | if byminute is None: 367 | if freq < MINUTELY: 368 | self._byminute = (dtstart.minute,) 369 | else: 370 | self._byminute = None 371 | elif type(byminute) is int: 372 | self._byminute = (byminute,) 373 | else: 374 | self._byminute = tuple(byminute) 375 | # bysecond 376 | if bysecond is None: 377 | if freq < SECONDLY: 378 | self._bysecond = (dtstart.second,) 379 | else: 380 | self._bysecond = None 381 | elif type(bysecond) is int: 382 | self._bysecond = (bysecond,) 383 | else: 384 | self._bysecond = tuple(bysecond) 385 | 386 | if self._freq >= HOURLY: 387 | self._timeset = None 388 | else: 389 | self._timeset = [] 390 | for hour in self._byhour: 391 | for minute in self._byminute: 392 | for second in self._bysecond: 393 | self._timeset.append( 394 | datetime.time(hour, minute, second, 395 | tzinfo=self._tzinfo)) 396 | self._timeset.sort() 397 | self._timeset = tuple(self._timeset) 398 | 399 | def _iter(self): 400 | year, month, day, hour, minute, second, weekday, yearday, _ = \ 401 | self._dtstart.timetuple() 402 | 403 | # Some local variables to speed things up a bit 404 | freq = self._freq 405 | interval = self._interval 406 | wkst = self._wkst 407 | until = self._until 408 | bymonth = self._bymonth 409 | byweekno = self._byweekno 410 | byyearday = self._byyearday 411 | byweekday = self._byweekday 412 | byeaster = self._byeaster 413 | bymonthday = self._bymonthday 414 | bynmonthday = self._bynmonthday 415 | bysetpos = self._bysetpos 416 | byhour = self._byhour 417 | byminute = self._byminute 418 | bysecond = self._bysecond 419 | 420 | ii = _iterinfo(self) 421 | ii.rebuild(year, month) 422 | 423 | getdayset = {YEARLY:ii.ydayset, 424 | MONTHLY:ii.mdayset, 425 | WEEKLY:ii.wdayset, 426 | DAILY:ii.ddayset, 427 | HOURLY:ii.ddayset, 428 | MINUTELY:ii.ddayset, 429 | SECONDLY:ii.ddayset}[freq] 430 | 431 | if freq < HOURLY: 432 | timeset = self._timeset 433 | else: 434 | gettimeset = {HOURLY:ii.htimeset, 435 | MINUTELY:ii.mtimeset, 436 | SECONDLY:ii.stimeset}[freq] 437 | if ((freq >= HOURLY and 438 | self._byhour and hour not in self._byhour) or 439 | (freq >= MINUTELY and 440 | self._byminute and minute not in self._byminute) or 441 | (freq >= SECONDLY and 442 | self._bysecond and second not in self._bysecond)): 443 | timeset = () 444 | else: 445 | timeset = gettimeset(hour, minute, second) 446 | 447 | total = 0 448 | count = self._count 449 | while True: 450 | # Get dayset with the right frequency 451 | dayset, start, end = getdayset(year, month, day) 452 | 453 | # Do the "hard" work ;-) 454 | filtered = False 455 | for i in dayset[start:end]: 456 | if ((bymonth and ii.mmask[i] not in bymonth) or 457 | (byweekno and not ii.wnomask[i]) or 458 | (byweekday and ii.wdaymask[i] not in byweekday) or 459 | (ii.nwdaymask and not ii.nwdaymask[i]) or 460 | (byeaster and not ii.eastermask[i]) or 461 | ((bymonthday or bynmonthday) and 462 | ii.mdaymask[i] not in bymonthday and 463 | ii.nmdaymask[i] not in bynmonthday) or 464 | (byyearday and 465 | ((i < ii.yearlen and i+1 not in byyearday 466 | and -ii.yearlen+i not in byyearday) or 467 | (i >= ii.yearlen and i+1-ii.yearlen not in byyearday 468 | and -ii.nextyearlen+i-ii.yearlen 469 | not in byyearday)))): 470 | dayset[i] = None 471 | filtered = True 472 | 473 | # Output results 474 | if bysetpos and timeset: 475 | poslist = [] 476 | for pos in bysetpos: 477 | if pos < 0: 478 | daypos, timepos = divmod(pos, len(timeset)) 479 | else: 480 | daypos, timepos = divmod(pos-1, len(timeset)) 481 | try: 482 | i = [x for x in dayset[start:end] 483 | if x is not None][daypos] 484 | time = timeset[timepos] 485 | except IndexError: 486 | pass 487 | else: 488 | date = datetime.date.fromordinal(ii.yearordinal+i) 489 | res = datetime.datetime.combine(date, time) 490 | if res not in poslist: 491 | poslist.append(res) 492 | poslist.sort() 493 | for res in poslist: 494 | if until and res > until: 495 | self._len = total 496 | return 497 | elif res >= self._dtstart: 498 | total += 1 499 | yield res 500 | if count: 501 | count -= 1 502 | if not count: 503 | self._len = total 504 | return 505 | else: 506 | for i in dayset[start:end]: 507 | if i is not None: 508 | date = datetime.date.fromordinal(ii.yearordinal+i) 509 | for time in timeset: 510 | res = datetime.datetime.combine(date, time) 511 | if until and res > until: 512 | self._len = total 513 | return 514 | elif res >= self._dtstart: 515 | total += 1 516 | yield res 517 | if count: 518 | count -= 1 519 | if not count: 520 | self._len = total 521 | return 522 | 523 | # Handle frequency and interval 524 | fixday = False 525 | if freq == YEARLY: 526 | year += interval 527 | if year > datetime.MAXYEAR: 528 | self._len = total 529 | return 530 | ii.rebuild(year, month) 531 | elif freq == MONTHLY: 532 | month += interval 533 | if month > 12: 534 | div, mod = divmod(month, 12) 535 | month = mod 536 | year += div 537 | if month == 0: 538 | month = 12 539 | year -= 1 540 | if year > datetime.MAXYEAR: 541 | self._len = total 542 | return 543 | ii.rebuild(year, month) 544 | elif freq == WEEKLY: 545 | if wkst > weekday: 546 | day += -(weekday+1+(6-wkst))+self._interval*7 547 | else: 548 | day += -(weekday-wkst)+self._interval*7 549 | weekday = wkst 550 | fixday = True 551 | elif freq == DAILY: 552 | day += interval 553 | fixday = True 554 | elif freq == HOURLY: 555 | if filtered: 556 | # Jump to one iteration before next day 557 | hour += ((23-hour)//interval)*interval 558 | while True: 559 | hour += interval 560 | div, mod = divmod(hour, 24) 561 | if div: 562 | hour = mod 563 | day += div 564 | fixday = True 565 | if not byhour or hour in byhour: 566 | break 567 | timeset = gettimeset(hour, minute, second) 568 | elif freq == MINUTELY: 569 | if filtered: 570 | # Jump to one iteration before next day 571 | minute += ((1439-(hour*60+minute))//interval)*interval 572 | while True: 573 | minute += interval 574 | div, mod = divmod(minute, 60) 575 | if div: 576 | minute = mod 577 | hour += div 578 | div, mod = divmod(hour, 24) 579 | if div: 580 | hour = mod 581 | day += div 582 | fixday = True 583 | filtered = False 584 | if ((not byhour or hour in byhour) and 585 | (not byminute or minute in byminute)): 586 | break 587 | timeset = gettimeset(hour, minute, second) 588 | elif freq == SECONDLY: 589 | if filtered: 590 | # Jump to one iteration before next day 591 | second += (((86399-(hour*3600+minute*60+second)) 592 | //interval)*interval) 593 | while True: 594 | second += self._interval 595 | div, mod = divmod(second, 60) 596 | if div: 597 | second = mod 598 | minute += div 599 | div, mod = divmod(minute, 60) 600 | if div: 601 | minute = mod 602 | hour += div 603 | div, mod = divmod(hour, 24) 604 | if div: 605 | hour = mod 606 | day += div 607 | fixday = True 608 | if ((not byhour or hour in byhour) and 609 | (not byminute or minute in byminute) and 610 | (not bysecond or second in bysecond)): 611 | break 612 | timeset = gettimeset(hour, minute, second) 613 | 614 | if fixday and day > 28: 615 | daysinmonth = calendar.monthrange(year, month)[1] 616 | if day > daysinmonth: 617 | while day > daysinmonth: 618 | day -= daysinmonth 619 | month += 1 620 | if month == 13: 621 | month = 1 622 | year += 1 623 | if year > datetime.MAXYEAR: 624 | self._len = total 625 | return 626 | daysinmonth = calendar.monthrange(year, month)[1] 627 | ii.rebuild(year, month) 628 | 629 | class _iterinfo(object): 630 | __slots__ = ["rrule", "lastyear", "lastmonth", 631 | "yearlen", "nextyearlen", "yearordinal", "yearweekday", 632 | "mmask", "mrange", "mdaymask", "nmdaymask", 633 | "wdaymask", "wnomask", "nwdaymask", "eastermask"] 634 | 635 | def __init__(self, rrule): 636 | for attr in self.__slots__: 637 | setattr(self, attr, None) 638 | self.rrule = rrule 639 | 640 | def rebuild(self, year, month): 641 | # Every mask is 7 days longer to handle cross-year weekly periods. 642 | rr = self.rrule 643 | if year != self.lastyear: 644 | self.yearlen = 365+calendar.isleap(year) 645 | self.nextyearlen = 365+calendar.isleap(year+1) 646 | firstyday = datetime.date(year, 1, 1) 647 | self.yearordinal = firstyday.toordinal() 648 | self.yearweekday = firstyday.weekday() 649 | 650 | wday = datetime.date(year, 1, 1).weekday() 651 | if self.yearlen == 365: 652 | self.mmask = M365MASK 653 | self.mdaymask = MDAY365MASK 654 | self.nmdaymask = NMDAY365MASK 655 | self.wdaymask = WDAYMASK[wday:] 656 | self.mrange = M365RANGE 657 | else: 658 | self.mmask = M366MASK 659 | self.mdaymask = MDAY366MASK 660 | self.nmdaymask = NMDAY366MASK 661 | self.wdaymask = WDAYMASK[wday:] 662 | self.mrange = M366RANGE 663 | 664 | if not rr._byweekno: 665 | self.wnomask = None 666 | else: 667 | self.wnomask = [0]*(self.yearlen+7) 668 | #no1wkst = firstwkst = self.wdaymask.index(rr._wkst) 669 | no1wkst = firstwkst = (7-self.yearweekday+rr._wkst)%7 670 | if no1wkst >= 4: 671 | no1wkst = 0 672 | # Number of days in the year, plus the days we got 673 | # from last year. 674 | wyearlen = self.yearlen+(self.yearweekday-rr._wkst)%7 675 | else: 676 | # Number of days in the year, minus the days we 677 | # left in last year. 678 | wyearlen = self.yearlen-no1wkst 679 | div, mod = divmod(wyearlen, 7) 680 | numweeks = div+mod//4 681 | for n in rr._byweekno: 682 | if n < 0: 683 | n += numweeks+1 684 | if not (0 < n <= numweeks): 685 | continue 686 | if n > 1: 687 | i = no1wkst+(n-1)*7 688 | if no1wkst != firstwkst: 689 | i -= 7-firstwkst 690 | else: 691 | i = no1wkst 692 | for j in range(7): 693 | self.wnomask[i] = 1 694 | i += 1 695 | if self.wdaymask[i] == rr._wkst: 696 | break 697 | if 1 in rr._byweekno: 698 | # Check week number 1 of next year as well 699 | # TODO: Check -numweeks for next year. 700 | i = no1wkst+numweeks*7 701 | if no1wkst != firstwkst: 702 | i -= 7-firstwkst 703 | if i < self.yearlen: 704 | # If week starts in next year, we 705 | # don't care about it. 706 | for j in range(7): 707 | self.wnomask[i] = 1 708 | i += 1 709 | if self.wdaymask[i] == rr._wkst: 710 | break 711 | if no1wkst: 712 | # Check last week number of last year as 713 | # well. If no1wkst is 0, either the year 714 | # started on week start, or week number 1 715 | # got days from last year, so there are no 716 | # days from last year's last week number in 717 | # this year. 718 | if -1 not in rr._byweekno: 719 | lyearweekday = datetime.date(year-1,1,1).weekday() 720 | lno1wkst = (7-lyearweekday+rr._wkst)%7 721 | lyearlen = 365+calendar.isleap(year-1) 722 | if lno1wkst >= 4: 723 | lno1wkst = 0 724 | lnumweeks = 52+(lyearlen+ 725 | (lyearweekday-rr._wkst)%7)%7//4 726 | else: 727 | lnumweeks = 52+(self.yearlen-no1wkst)%7//4 728 | else: 729 | lnumweeks = -1 730 | if lnumweeks in rr._byweekno: 731 | for i in range(no1wkst): 732 | self.wnomask[i] = 1 733 | 734 | if (rr._bynweekday and 735 | (month != self.lastmonth or year != self.lastyear)): 736 | ranges = [] 737 | if rr._freq == YEARLY: 738 | if rr._bymonth: 739 | for month in rr._bymonth: 740 | ranges.append(self.mrange[month-1:month+1]) 741 | else: 742 | ranges = [(0, self.yearlen)] 743 | elif rr._freq == MONTHLY: 744 | ranges = [self.mrange[month-1:month+1]] 745 | if ranges: 746 | # Weekly frequency won't get here, so we may not 747 | # care about cross-year weekly periods. 748 | self.nwdaymask = [0]*self.yearlen 749 | for first, last in ranges: 750 | last -= 1 751 | for wday, n in rr._bynweekday: 752 | if n < 0: 753 | i = last+(n+1)*7 754 | i -= (self.wdaymask[i]-wday)%7 755 | else: 756 | i = first+(n-1)*7 757 | i += (7-self.wdaymask[i]+wday)%7 758 | if first <= i <= last: 759 | self.nwdaymask[i] = 1 760 | 761 | if rr._byeaster: 762 | self.eastermask = [0]*(self.yearlen+7) 763 | eyday = easter.easter(year).toordinal()-self.yearordinal 764 | for offset in rr._byeaster: 765 | self.eastermask[eyday+offset] = 1 766 | 767 | self.lastyear = year 768 | self.lastmonth = month 769 | 770 | def ydayset(self, year, month, day): 771 | return range(self.yearlen), 0, self.yearlen 772 | 773 | def mdayset(self, year, month, day): 774 | set = [None]*self.yearlen 775 | start, end = self.mrange[month-1:month+1] 776 | for i in range(start, end): 777 | set[i] = i 778 | return set, start, end 779 | 780 | def wdayset(self, year, month, day): 781 | # We need to handle cross-year weeks here. 782 | set = [None]*(self.yearlen+7) 783 | i = datetime.date(year, month, day).toordinal()-self.yearordinal 784 | start = i 785 | for j in range(7): 786 | set[i] = i 787 | i += 1 788 | #if (not (0 <= i < self.yearlen) or 789 | # self.wdaymask[i] == self.rrule._wkst): 790 | # This will cross the year boundary, if necessary. 791 | if self.wdaymask[i] == self.rrule._wkst: 792 | break 793 | return set, start, i 794 | 795 | def ddayset(self, year, month, day): 796 | set = [None]*self.yearlen 797 | i = datetime.date(year, month, day).toordinal()-self.yearordinal 798 | set[i] = i 799 | return set, i, i+1 800 | 801 | def htimeset(self, hour, minute, second): 802 | set = [] 803 | rr = self.rrule 804 | for minute in rr._byminute: 805 | for second in rr._bysecond: 806 | set.append(datetime.time(hour, minute, second, 807 | tzinfo=rr._tzinfo)) 808 | set.sort() 809 | return set 810 | 811 | def mtimeset(self, hour, minute, second): 812 | set = [] 813 | rr = self.rrule 814 | for second in rr._bysecond: 815 | set.append(datetime.time(hour, minute, second, tzinfo=rr._tzinfo)) 816 | set.sort() 817 | return set 818 | 819 | def stimeset(self, hour, minute, second): 820 | return (datetime.time(hour, minute, second, 821 | tzinfo=self.rrule._tzinfo),) 822 | 823 | 824 | class rruleset(rrulebase): 825 | 826 | class _genitem: 827 | def __init__(self, genlist, gen): 828 | try: 829 | self.dt = gen() 830 | genlist.append(self) 831 | except StopIteration: 832 | pass 833 | self.genlist = genlist 834 | self.gen = gen 835 | 836 | def next(self): 837 | try: 838 | self.dt = self.gen() 839 | except StopIteration: 840 | self.genlist.remove(self) 841 | 842 | def __cmp__(self, other): 843 | return cmp(self.dt, other.dt) 844 | 845 | def __init__(self, cache=False): 846 | rrulebase.__init__(self, cache) 847 | self._rrule = [] 848 | self._rdate = [] 849 | self._exrule = [] 850 | self._exdate = [] 851 | 852 | def rrule(self, rrule): 853 | self._rrule.append(rrule) 854 | 855 | def rdate(self, rdate): 856 | self._rdate.append(rdate) 857 | 858 | def exrule(self, exrule): 859 | self._exrule.append(exrule) 860 | 861 | def exdate(self, exdate): 862 | self._exdate.append(exdate) 863 | 864 | def _iter(self): 865 | rlist = [] 866 | self._rdate.sort() 867 | self._genitem(rlist, iter(self._rdate).next) 868 | for gen in [iter(x).next for x in self._rrule]: 869 | self._genitem(rlist, gen) 870 | rlist.sort() 871 | exlist = [] 872 | self._exdate.sort() 873 | self._genitem(exlist, iter(self._exdate).next) 874 | for gen in [iter(x).next for x in self._exrule]: 875 | self._genitem(exlist, gen) 876 | exlist.sort() 877 | lastdt = None 878 | total = 0 879 | while rlist: 880 | ritem = rlist[0] 881 | if not lastdt or lastdt != ritem.dt: 882 | while exlist and exlist[0] < ritem: 883 | exlist[0].next() 884 | exlist.sort() 885 | if not exlist or ritem != exlist[0]: 886 | total += 1 887 | yield ritem.dt 888 | lastdt = ritem.dt 889 | ritem.next() 890 | rlist.sort() 891 | self._len = total 892 | 893 | class _rrulestr: 894 | 895 | _freq_map = {"YEARLY": YEARLY, 896 | "MONTHLY": MONTHLY, 897 | "WEEKLY": WEEKLY, 898 | "DAILY": DAILY, 899 | "HOURLY": HOURLY, 900 | "MINUTELY": MINUTELY, 901 | "SECONDLY": SECONDLY} 902 | 903 | _weekday_map = {"MO":0,"TU":1,"WE":2,"TH":3,"FR":4,"SA":5,"SU":6} 904 | 905 | def _handle_int(self, rrkwargs, name, value, **kwargs): 906 | rrkwargs[name.lower()] = int(value) 907 | 908 | def _handle_int_list(self, rrkwargs, name, value, **kwargs): 909 | rrkwargs[name.lower()] = [int(x) for x in value.split(',')] 910 | 911 | _handle_INTERVAL = _handle_int 912 | _handle_COUNT = _handle_int 913 | _handle_BYSETPOS = _handle_int_list 914 | _handle_BYMONTH = _handle_int_list 915 | _handle_BYMONTHDAY = _handle_int_list 916 | _handle_BYYEARDAY = _handle_int_list 917 | _handle_BYEASTER = _handle_int_list 918 | _handle_BYWEEKNO = _handle_int_list 919 | _handle_BYHOUR = _handle_int_list 920 | _handle_BYMINUTE = _handle_int_list 921 | _handle_BYSECOND = _handle_int_list 922 | 923 | def _handle_FREQ(self, rrkwargs, name, value, **kwargs): 924 | rrkwargs["freq"] = self._freq_map[value] 925 | 926 | def _handle_UNTIL(self, rrkwargs, name, value, **kwargs): 927 | global parser 928 | if not parser: 929 | from dateutil import parser 930 | try: 931 | rrkwargs["until"] = parser.parse(value, 932 | ignoretz=kwargs.get("ignoretz"), 933 | tzinfos=kwargs.get("tzinfos")) 934 | except ValueError: 935 | raise ValueError, "invalid until date" 936 | 937 | def _handle_WKST(self, rrkwargs, name, value, **kwargs): 938 | rrkwargs["wkst"] = self._weekday_map[value] 939 | 940 | def _handle_BYWEEKDAY(self, rrkwargs, name, value, **kwarsg): 941 | l = [] 942 | for wday in value.split(','): 943 | for i in range(len(wday)): 944 | if wday[i] not in '+-0123456789': 945 | break 946 | n = wday[:i] or None 947 | w = wday[i:] 948 | if n: n = int(n) 949 | l.append(weekdays[self._weekday_map[w]](n)) 950 | rrkwargs["byweekday"] = l 951 | 952 | _handle_BYDAY = _handle_BYWEEKDAY 953 | 954 | def _parse_rfc_rrule(self, line, 955 | dtstart=None, 956 | cache=False, 957 | ignoretz=False, 958 | tzinfos=None): 959 | if line.find(':') != -1: 960 | name, value = line.split(':') 961 | if name != "RRULE": 962 | raise ValueError, "unknown parameter name" 963 | else: 964 | value = line 965 | rrkwargs = {} 966 | for pair in value.split(';'): 967 | name, value = pair.split('=') 968 | name = name.upper() 969 | value = value.upper() 970 | try: 971 | getattr(self, "_handle_"+name)(rrkwargs, name, value, 972 | ignoretz=ignoretz, 973 | tzinfos=tzinfos) 974 | except AttributeError: 975 | raise ValueError, "unknown parameter '%s'" % name 976 | except (KeyError, ValueError): 977 | raise ValueError, "invalid '%s': %s" % (name, value) 978 | return rrule(dtstart=dtstart, cache=cache, **rrkwargs) 979 | 980 | def _parse_rfc(self, s, 981 | dtstart=None, 982 | cache=False, 983 | unfold=False, 984 | forceset=False, 985 | compatible=False, 986 | ignoretz=False, 987 | tzinfos=None): 988 | global parser 989 | if compatible: 990 | forceset = True 991 | unfold = True 992 | s = s.upper() 993 | if not s.strip(): 994 | raise ValueError, "empty string" 995 | if unfold: 996 | lines = s.splitlines() 997 | i = 0 998 | while i < len(lines): 999 | line = lines[i].rstrip() 1000 | if not line: 1001 | del lines[i] 1002 | elif i > 0 and line[0] == " ": 1003 | lines[i-1] += line[1:] 1004 | del lines[i] 1005 | else: 1006 | i += 1 1007 | else: 1008 | lines = s.split() 1009 | if (not forceset and len(lines) == 1 and 1010 | (s.find(':') == -1 or s.startswith('RRULE:'))): 1011 | return self._parse_rfc_rrule(lines[0], cache=cache, 1012 | dtstart=dtstart, ignoretz=ignoretz, 1013 | tzinfos=tzinfos) 1014 | else: 1015 | rrulevals = [] 1016 | rdatevals = [] 1017 | exrulevals = [] 1018 | exdatevals = [] 1019 | for line in lines: 1020 | if not line: 1021 | continue 1022 | if line.find(':') == -1: 1023 | name = "RRULE" 1024 | value = line 1025 | else: 1026 | name, value = line.split(':', 1) 1027 | parms = name.split(';') 1028 | if not parms: 1029 | raise ValueError, "empty property name" 1030 | name = parms[0] 1031 | parms = parms[1:] 1032 | if name == "RRULE": 1033 | for parm in parms: 1034 | raise ValueError, "unsupported RRULE parm: "+parm 1035 | rrulevals.append(value) 1036 | elif name == "RDATE": 1037 | for parm in parms: 1038 | if parm != "VALUE=DATE-TIME": 1039 | raise ValueError, "unsupported RDATE parm: "+parm 1040 | rdatevals.append(value) 1041 | elif name == "EXRULE": 1042 | for parm in parms: 1043 | raise ValueError, "unsupported EXRULE parm: "+parm 1044 | exrulevals.append(value) 1045 | elif name == "EXDATE": 1046 | for parm in parms: 1047 | if parm != "VALUE=DATE-TIME": 1048 | raise ValueError, "unsupported RDATE parm: "+parm 1049 | exdatevals.append(value) 1050 | elif name == "DTSTART": 1051 | for parm in parms: 1052 | raise ValueError, "unsupported DTSTART parm: "+parm 1053 | if not parser: 1054 | from dateutil import parser 1055 | dtstart = parser.parse(value, ignoretz=ignoretz, 1056 | tzinfos=tzinfos) 1057 | else: 1058 | raise ValueError, "unsupported property: "+name 1059 | if (forceset or len(rrulevals) > 1 or 1060 | rdatevals or exrulevals or exdatevals): 1061 | if not parser and (rdatevals or exdatevals): 1062 | from dateutil import parser 1063 | set = rruleset(cache=cache) 1064 | for value in rrulevals: 1065 | set.rrule(self._parse_rfc_rrule(value, dtstart=dtstart, 1066 | ignoretz=ignoretz, 1067 | tzinfos=tzinfos)) 1068 | for value in rdatevals: 1069 | for datestr in value.split(','): 1070 | set.rdate(parser.parse(datestr, 1071 | ignoretz=ignoretz, 1072 | tzinfos=tzinfos)) 1073 | for value in exrulevals: 1074 | set.exrule(self._parse_rfc_rrule(value, dtstart=dtstart, 1075 | ignoretz=ignoretz, 1076 | tzinfos=tzinfos)) 1077 | for value in exdatevals: 1078 | for datestr in value.split(','): 1079 | set.exdate(parser.parse(datestr, 1080 | ignoretz=ignoretz, 1081 | tzinfos=tzinfos)) 1082 | if compatible and dtstart: 1083 | set.rdate(dtstart) 1084 | return set 1085 | else: 1086 | return self._parse_rfc_rrule(rrulevals[0], 1087 | dtstart=dtstart, 1088 | cache=cache, 1089 | ignoretz=ignoretz, 1090 | tzinfos=tzinfos) 1091 | 1092 | def __call__(self, s, **kwargs): 1093 | return self._parse_rfc(s, **kwargs) 1094 | 1095 | rrulestr = _rrulestr() 1096 | 1097 | # vim:ts=4:sw=4:et 1098 | -------------------------------------------------------------------------------- /dateutil/tz.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2007 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import datetime 11 | import struct 12 | import time 13 | import sys 14 | import os 15 | 16 | relativedelta = None 17 | parser = None 18 | rrule = None 19 | 20 | __all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", 21 | "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"] 22 | 23 | try: 24 | from dateutil.tzwin import tzwin, tzwinlocal 25 | except (ImportError, OSError): 26 | tzwin, tzwinlocal = None, None 27 | 28 | ZERO = datetime.timedelta(0) 29 | EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal() 30 | 31 | class tzutc(datetime.tzinfo): 32 | 33 | def utcoffset(self, dt): 34 | return ZERO 35 | 36 | def dst(self, dt): 37 | return ZERO 38 | 39 | def tzname(self, dt): 40 | return "UTC" 41 | 42 | def __eq__(self, other): 43 | return (isinstance(other, tzutc) or 44 | (isinstance(other, tzoffset) and other._offset == ZERO)) 45 | 46 | def __ne__(self, other): 47 | return not self.__eq__(other) 48 | 49 | def __repr__(self): 50 | return "%s()" % self.__class__.__name__ 51 | 52 | __reduce__ = object.__reduce__ 53 | 54 | class tzoffset(datetime.tzinfo): 55 | 56 | def __init__(self, name, offset): 57 | self._name = name 58 | self._offset = datetime.timedelta(seconds=offset) 59 | 60 | def utcoffset(self, dt): 61 | return self._offset 62 | 63 | def dst(self, dt): 64 | return ZERO 65 | 66 | def tzname(self, dt): 67 | return self._name 68 | 69 | def __eq__(self, other): 70 | return (isinstance(other, tzoffset) and 71 | self._offset == other._offset) 72 | 73 | def __ne__(self, other): 74 | return not self.__eq__(other) 75 | 76 | def __repr__(self): 77 | return "%s(%s, %s)" % (self.__class__.__name__, 78 | `self._name`, 79 | self._offset.days*86400+self._offset.seconds) 80 | 81 | __reduce__ = object.__reduce__ 82 | 83 | class tzlocal(datetime.tzinfo): 84 | 85 | _std_offset = datetime.timedelta(seconds=-time.timezone) 86 | if time.daylight: 87 | _dst_offset = datetime.timedelta(seconds=-time.altzone) 88 | else: 89 | _dst_offset = _std_offset 90 | 91 | def utcoffset(self, dt): 92 | if self._isdst(dt): 93 | return self._dst_offset 94 | else: 95 | return self._std_offset 96 | 97 | def dst(self, dt): 98 | if self._isdst(dt): 99 | return self._dst_offset-self._std_offset 100 | else: 101 | return ZERO 102 | 103 | def tzname(self, dt): 104 | return time.tzname[self._isdst(dt)] 105 | 106 | def _isdst(self, dt): 107 | # We can't use mktime here. It is unstable when deciding if 108 | # the hour near to a change is DST or not. 109 | # 110 | # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour, 111 | # dt.minute, dt.second, dt.weekday(), 0, -1)) 112 | # return time.localtime(timestamp).tm_isdst 113 | # 114 | # The code above yields the following result: 115 | # 116 | #>>> import tz, datetime 117 | #>>> t = tz.tzlocal() 118 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() 119 | #'BRDT' 120 | #>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname() 121 | #'BRST' 122 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() 123 | #'BRST' 124 | #>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname() 125 | #'BRDT' 126 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() 127 | #'BRDT' 128 | # 129 | # Here is a more stable implementation: 130 | # 131 | timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400 132 | + dt.hour * 3600 133 | + dt.minute * 60 134 | + dt.second) 135 | return time.localtime(timestamp+time.timezone).tm_isdst 136 | 137 | def __eq__(self, other): 138 | if not isinstance(other, tzlocal): 139 | return False 140 | return (self._std_offset == other._std_offset and 141 | self._dst_offset == other._dst_offset) 142 | return True 143 | 144 | def __ne__(self, other): 145 | return not self.__eq__(other) 146 | 147 | def __repr__(self): 148 | return "%s()" % self.__class__.__name__ 149 | 150 | __reduce__ = object.__reduce__ 151 | 152 | class _ttinfo(object): 153 | __slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"] 154 | 155 | def __init__(self): 156 | for attr in self.__slots__: 157 | setattr(self, attr, None) 158 | 159 | def __repr__(self): 160 | l = [] 161 | for attr in self.__slots__: 162 | value = getattr(self, attr) 163 | if value is not None: 164 | l.append("%s=%s" % (attr, `value`)) 165 | return "%s(%s)" % (self.__class__.__name__, ", ".join(l)) 166 | 167 | def __eq__(self, other): 168 | if not isinstance(other, _ttinfo): 169 | return False 170 | return (self.offset == other.offset and 171 | self.delta == other.delta and 172 | self.isdst == other.isdst and 173 | self.abbr == other.abbr and 174 | self.isstd == other.isstd and 175 | self.isgmt == other.isgmt) 176 | 177 | def __ne__(self, other): 178 | return not self.__eq__(other) 179 | 180 | def __getstate__(self): 181 | state = {} 182 | for name in self.__slots__: 183 | state[name] = getattr(self, name, None) 184 | return state 185 | 186 | def __setstate__(self, state): 187 | for name in self.__slots__: 188 | if name in state: 189 | setattr(self, name, state[name]) 190 | 191 | class tzfile(datetime.tzinfo): 192 | 193 | # http://www.twinsun.com/tz/tz-link.htm 194 | # ftp://elsie.nci.nih.gov/pub/tz*.tar.gz 195 | 196 | def __init__(self, fileobj): 197 | if isinstance(fileobj, basestring): 198 | self._filename = fileobj 199 | fileobj = open(fileobj) 200 | elif hasattr(fileobj, "name"): 201 | self._filename = fileobj.name 202 | else: 203 | self._filename = `fileobj` 204 | 205 | # From tzfile(5): 206 | # 207 | # The time zone information files used by tzset(3) 208 | # begin with the magic characters "TZif" to identify 209 | # them as time zone information files, followed by 210 | # sixteen bytes reserved for future use, followed by 211 | # six four-byte values of type long, written in a 212 | # ``standard'' byte order (the high-order byte 213 | # of the value is written first). 214 | 215 | if fileobj.read(4) != "TZif": 216 | raise ValueError, "magic not found" 217 | 218 | fileobj.read(16) 219 | 220 | ( 221 | # The number of UTC/local indicators stored in the file. 222 | ttisgmtcnt, 223 | 224 | # The number of standard/wall indicators stored in the file. 225 | ttisstdcnt, 226 | 227 | # The number of leap seconds for which data is 228 | # stored in the file. 229 | leapcnt, 230 | 231 | # The number of "transition times" for which data 232 | # is stored in the file. 233 | timecnt, 234 | 235 | # The number of "local time types" for which data 236 | # is stored in the file (must not be zero). 237 | typecnt, 238 | 239 | # The number of characters of "time zone 240 | # abbreviation strings" stored in the file. 241 | charcnt, 242 | 243 | ) = struct.unpack(">6l", fileobj.read(24)) 244 | 245 | # The above header is followed by tzh_timecnt four-byte 246 | # values of type long, sorted in ascending order. 247 | # These values are written in ``standard'' byte order. 248 | # Each is used as a transition time (as returned by 249 | # time(2)) at which the rules for computing local time 250 | # change. 251 | 252 | if timecnt: 253 | self._trans_list = struct.unpack(">%dl" % timecnt, 254 | fileobj.read(timecnt*4)) 255 | else: 256 | self._trans_list = [] 257 | 258 | # Next come tzh_timecnt one-byte values of type unsigned 259 | # char; each one tells which of the different types of 260 | # ``local time'' types described in the file is associated 261 | # with the same-indexed transition time. These values 262 | # serve as indices into an array of ttinfo structures that 263 | # appears next in the file. 264 | 265 | if timecnt: 266 | self._trans_idx = struct.unpack(">%dB" % timecnt, 267 | fileobj.read(timecnt)) 268 | else: 269 | self._trans_idx = [] 270 | 271 | # Each ttinfo structure is written as a four-byte value 272 | # for tt_gmtoff of type long, in a standard byte 273 | # order, followed by a one-byte value for tt_isdst 274 | # and a one-byte value for tt_abbrind. In each 275 | # structure, tt_gmtoff gives the number of 276 | # seconds to be added to UTC, tt_isdst tells whether 277 | # tm_isdst should be set by localtime(3), and 278 | # tt_abbrind serves as an index into the array of 279 | # time zone abbreviation characters that follow the 280 | # ttinfo structure(s) in the file. 281 | 282 | ttinfo = [] 283 | 284 | for i in range(typecnt): 285 | ttinfo.append(struct.unpack(">lbb", fileobj.read(6))) 286 | 287 | abbr = fileobj.read(charcnt) 288 | 289 | # Then there are tzh_leapcnt pairs of four-byte 290 | # values, written in standard byte order; the 291 | # first value of each pair gives the time (as 292 | # returned by time(2)) at which a leap second 293 | # occurs; the second gives the total number of 294 | # leap seconds to be applied after the given time. 295 | # The pairs of values are sorted in ascending order 296 | # by time. 297 | 298 | # Not used, for now 299 | if leapcnt: 300 | leap = struct.unpack(">%dl" % (leapcnt*2), 301 | fileobj.read(leapcnt*8)) 302 | 303 | # Then there are tzh_ttisstdcnt standard/wall 304 | # indicators, each stored as a one-byte value; 305 | # they tell whether the transition times associated 306 | # with local time types were specified as standard 307 | # time or wall clock time, and are used when 308 | # a time zone file is used in handling POSIX-style 309 | # time zone environment variables. 310 | 311 | if ttisstdcnt: 312 | isstd = struct.unpack(">%db" % ttisstdcnt, 313 | fileobj.read(ttisstdcnt)) 314 | 315 | # Finally, there are tzh_ttisgmtcnt UTC/local 316 | # indicators, each stored as a one-byte value; 317 | # they tell whether the transition times associated 318 | # with local time types were specified as UTC or 319 | # local time, and are used when a time zone file 320 | # is used in handling POSIX-style time zone envi- 321 | # ronment variables. 322 | 323 | if ttisgmtcnt: 324 | isgmt = struct.unpack(">%db" % ttisgmtcnt, 325 | fileobj.read(ttisgmtcnt)) 326 | 327 | # ** Everything has been read ** 328 | 329 | # Build ttinfo list 330 | self._ttinfo_list = [] 331 | for i in range(typecnt): 332 | gmtoff, isdst, abbrind = ttinfo[i] 333 | # Round to full-minutes if that's not the case. Python's 334 | # datetime doesn't accept sub-minute timezones. Check 335 | # http://python.org/sf/1447945 for some information. 336 | gmtoff = (gmtoff+30)//60*60 337 | tti = _ttinfo() 338 | tti.offset = gmtoff 339 | tti.delta = datetime.timedelta(seconds=gmtoff) 340 | tti.isdst = isdst 341 | tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)] 342 | tti.isstd = (ttisstdcnt > i and isstd[i] != 0) 343 | tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0) 344 | self._ttinfo_list.append(tti) 345 | 346 | # Replace ttinfo indexes for ttinfo objects. 347 | trans_idx = [] 348 | for idx in self._trans_idx: 349 | trans_idx.append(self._ttinfo_list[idx]) 350 | self._trans_idx = tuple(trans_idx) 351 | 352 | # Set standard, dst, and before ttinfos. before will be 353 | # used when a given time is before any transitions, 354 | # and will be set to the first non-dst ttinfo, or to 355 | # the first dst, if all of them are dst. 356 | self._ttinfo_std = None 357 | self._ttinfo_dst = None 358 | self._ttinfo_before = None 359 | if self._ttinfo_list: 360 | if not self._trans_list: 361 | self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0] 362 | else: 363 | for i in range(timecnt-1,-1,-1): 364 | tti = self._trans_idx[i] 365 | if not self._ttinfo_std and not tti.isdst: 366 | self._ttinfo_std = tti 367 | elif not self._ttinfo_dst and tti.isdst: 368 | self._ttinfo_dst = tti 369 | if self._ttinfo_std and self._ttinfo_dst: 370 | break 371 | else: 372 | if self._ttinfo_dst and not self._ttinfo_std: 373 | self._ttinfo_std = self._ttinfo_dst 374 | 375 | for tti in self._ttinfo_list: 376 | if not tti.isdst: 377 | self._ttinfo_before = tti 378 | break 379 | else: 380 | self._ttinfo_before = self._ttinfo_list[0] 381 | 382 | # Now fix transition times to become relative to wall time. 383 | # 384 | # I'm not sure about this. In my tests, the tz source file 385 | # is setup to wall time, and in the binary file isstd and 386 | # isgmt are off, so it should be in wall time. OTOH, it's 387 | # always in gmt time. Let me know if you have comments 388 | # about this. 389 | laststdoffset = 0 390 | self._trans_list = list(self._trans_list) 391 | for i in range(len(self._trans_list)): 392 | tti = self._trans_idx[i] 393 | if not tti.isdst: 394 | # This is std time. 395 | self._trans_list[i] += tti.offset 396 | laststdoffset = tti.offset 397 | else: 398 | # This is dst time. Convert to std. 399 | self._trans_list[i] += laststdoffset 400 | self._trans_list = tuple(self._trans_list) 401 | 402 | def _find_ttinfo(self, dt, laststd=0): 403 | timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400 404 | + dt.hour * 3600 405 | + dt.minute * 60 406 | + dt.second) 407 | idx = 0 408 | for trans in self._trans_list: 409 | if timestamp < trans: 410 | break 411 | idx += 1 412 | else: 413 | return self._ttinfo_std 414 | if idx == 0: 415 | return self._ttinfo_before 416 | if laststd: 417 | while idx > 0: 418 | tti = self._trans_idx[idx-1] 419 | if not tti.isdst: 420 | return tti 421 | idx -= 1 422 | else: 423 | return self._ttinfo_std 424 | else: 425 | return self._trans_idx[idx-1] 426 | 427 | def utcoffset(self, dt): 428 | if not self._ttinfo_std: 429 | return ZERO 430 | return self._find_ttinfo(dt).delta 431 | 432 | def dst(self, dt): 433 | if not self._ttinfo_dst: 434 | return ZERO 435 | tti = self._find_ttinfo(dt) 436 | if not tti.isdst: 437 | return ZERO 438 | 439 | # The documentation says that utcoffset()-dst() must 440 | # be constant for every dt. 441 | return tti.delta-self._find_ttinfo(dt, laststd=1).delta 442 | 443 | # An alternative for that would be: 444 | # 445 | # return self._ttinfo_dst.offset-self._ttinfo_std.offset 446 | # 447 | # However, this class stores historical changes in the 448 | # dst offset, so I belive that this wouldn't be the right 449 | # way to implement this. 450 | 451 | def tzname(self, dt): 452 | if not self._ttinfo_std: 453 | return None 454 | return self._find_ttinfo(dt).abbr 455 | 456 | def __eq__(self, other): 457 | if not isinstance(other, tzfile): 458 | return False 459 | return (self._trans_list == other._trans_list and 460 | self._trans_idx == other._trans_idx and 461 | self._ttinfo_list == other._ttinfo_list) 462 | 463 | def __ne__(self, other): 464 | return not self.__eq__(other) 465 | 466 | 467 | def __repr__(self): 468 | return "%s(%s)" % (self.__class__.__name__, `self._filename`) 469 | 470 | def __reduce__(self): 471 | if not os.path.isfile(self._filename): 472 | raise ValueError, "Unpickable %s class" % self.__class__.__name__ 473 | return (self.__class__, (self._filename,)) 474 | 475 | class tzrange(datetime.tzinfo): 476 | 477 | def __init__(self, stdabbr, stdoffset=None, 478 | dstabbr=None, dstoffset=None, 479 | start=None, end=None): 480 | global relativedelta 481 | if not relativedelta: 482 | from dateutil import relativedelta 483 | self._std_abbr = stdabbr 484 | self._dst_abbr = dstabbr 485 | if stdoffset is not None: 486 | self._std_offset = datetime.timedelta(seconds=stdoffset) 487 | else: 488 | self._std_offset = ZERO 489 | if dstoffset is not None: 490 | self._dst_offset = datetime.timedelta(seconds=dstoffset) 491 | elif dstabbr and stdoffset is not None: 492 | self._dst_offset = self._std_offset+datetime.timedelta(hours=+1) 493 | else: 494 | self._dst_offset = ZERO 495 | if dstabbr and start is None: 496 | self._start_delta = relativedelta.relativedelta( 497 | hours=+2, month=4, day=1, weekday=relativedelta.SU(+1)) 498 | else: 499 | self._start_delta = start 500 | if dstabbr and end is None: 501 | self._end_delta = relativedelta.relativedelta( 502 | hours=+1, month=10, day=31, weekday=relativedelta.SU(-1)) 503 | else: 504 | self._end_delta = end 505 | 506 | def utcoffset(self, dt): 507 | if self._isdst(dt): 508 | return self._dst_offset 509 | else: 510 | return self._std_offset 511 | 512 | def dst(self, dt): 513 | if self._isdst(dt): 514 | return self._dst_offset-self._std_offset 515 | else: 516 | return ZERO 517 | 518 | def tzname(self, dt): 519 | if self._isdst(dt): 520 | return self._dst_abbr 521 | else: 522 | return self._std_abbr 523 | 524 | def _isdst(self, dt): 525 | if not self._start_delta: 526 | return False 527 | year = datetime.datetime(dt.year,1,1) 528 | start = year+self._start_delta 529 | end = year+self._end_delta 530 | dt = dt.replace(tzinfo=None) 531 | if start < end: 532 | return dt >= start and dt < end 533 | else: 534 | return dt >= start or dt < end 535 | 536 | def __eq__(self, other): 537 | if not isinstance(other, tzrange): 538 | return False 539 | return (self._std_abbr == other._std_abbr and 540 | self._dst_abbr == other._dst_abbr and 541 | self._std_offset == other._std_offset and 542 | self._dst_offset == other._dst_offset and 543 | self._start_delta == other._start_delta and 544 | self._end_delta == other._end_delta) 545 | 546 | def __ne__(self, other): 547 | return not self.__eq__(other) 548 | 549 | def __repr__(self): 550 | return "%s(...)" % self.__class__.__name__ 551 | 552 | __reduce__ = object.__reduce__ 553 | 554 | class tzstr(tzrange): 555 | 556 | def __init__(self, s): 557 | global parser 558 | if not parser: 559 | from dateutil import parser 560 | self._s = s 561 | 562 | res = parser._parsetz(s) 563 | if res is None: 564 | raise ValueError, "unknown string format" 565 | 566 | # Here we break the compatibility with the TZ variable handling. 567 | # GMT-3 actually *means* the timezone -3. 568 | if res.stdabbr in ("GMT", "UTC"): 569 | res.stdoffset *= -1 570 | 571 | # We must initialize it first, since _delta() needs 572 | # _std_offset and _dst_offset set. Use False in start/end 573 | # to avoid building it two times. 574 | tzrange.__init__(self, res.stdabbr, res.stdoffset, 575 | res.dstabbr, res.dstoffset, 576 | start=False, end=False) 577 | 578 | if not res.dstabbr: 579 | self._start_delta = None 580 | self._end_delta = None 581 | else: 582 | self._start_delta = self._delta(res.start) 583 | if self._start_delta: 584 | self._end_delta = self._delta(res.end, isend=1) 585 | 586 | def _delta(self, x, isend=0): 587 | kwargs = {} 588 | if x.month is not None: 589 | kwargs["month"] = x.month 590 | if x.weekday is not None: 591 | kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week) 592 | if x.week > 0: 593 | kwargs["day"] = 1 594 | else: 595 | kwargs["day"] = 31 596 | elif x.day: 597 | kwargs["day"] = x.day 598 | elif x.yday is not None: 599 | kwargs["yearday"] = x.yday 600 | elif x.jyday is not None: 601 | kwargs["nlyearday"] = x.jyday 602 | if not kwargs: 603 | # Default is to start on first sunday of april, and end 604 | # on last sunday of october. 605 | if not isend: 606 | kwargs["month"] = 4 607 | kwargs["day"] = 1 608 | kwargs["weekday"] = relativedelta.SU(+1) 609 | else: 610 | kwargs["month"] = 10 611 | kwargs["day"] = 31 612 | kwargs["weekday"] = relativedelta.SU(-1) 613 | if x.time is not None: 614 | kwargs["seconds"] = x.time 615 | else: 616 | # Default is 2AM. 617 | kwargs["seconds"] = 7200 618 | if isend: 619 | # Convert to standard time, to follow the documented way 620 | # of working with the extra hour. See the documentation 621 | # of the tzinfo class. 622 | delta = self._dst_offset-self._std_offset 623 | kwargs["seconds"] -= delta.seconds+delta.days*86400 624 | return relativedelta.relativedelta(**kwargs) 625 | 626 | def __repr__(self): 627 | return "%s(%s)" % (self.__class__.__name__, `self._s`) 628 | 629 | class _tzicalvtzcomp: 630 | def __init__(self, tzoffsetfrom, tzoffsetto, isdst, 631 | tzname=None, rrule=None): 632 | self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom) 633 | self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto) 634 | self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom 635 | self.isdst = isdst 636 | self.tzname = tzname 637 | self.rrule = rrule 638 | 639 | class _tzicalvtz(datetime.tzinfo): 640 | def __init__(self, tzid, comps=[]): 641 | self._tzid = tzid 642 | self._comps = comps 643 | self._cachedate = [] 644 | self._cachecomp = [] 645 | 646 | def _find_comp(self, dt): 647 | if len(self._comps) == 1: 648 | return self._comps[0] 649 | dt = dt.replace(tzinfo=None) 650 | try: 651 | return self._cachecomp[self._cachedate.index(dt)] 652 | except ValueError: 653 | pass 654 | lastcomp = None 655 | lastcompdt = None 656 | for comp in self._comps: 657 | if not comp.isdst: 658 | # Handle the extra hour in DST -> STD 659 | compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True) 660 | else: 661 | compdt = comp.rrule.before(dt, inc=True) 662 | if compdt and (not lastcompdt or lastcompdt < compdt): 663 | lastcompdt = compdt 664 | lastcomp = comp 665 | if not lastcomp: 666 | # RFC says nothing about what to do when a given 667 | # time is before the first onset date. We'll look for the 668 | # first standard component, or the first component, if 669 | # none is found. 670 | for comp in self._comps: 671 | if not comp.isdst: 672 | lastcomp = comp 673 | break 674 | else: 675 | lastcomp = comp[0] 676 | self._cachedate.insert(0, dt) 677 | self._cachecomp.insert(0, lastcomp) 678 | if len(self._cachedate) > 10: 679 | self._cachedate.pop() 680 | self._cachecomp.pop() 681 | return lastcomp 682 | 683 | def utcoffset(self, dt): 684 | return self._find_comp(dt).tzoffsetto 685 | 686 | def dst(self, dt): 687 | comp = self._find_comp(dt) 688 | if comp.isdst: 689 | return comp.tzoffsetdiff 690 | else: 691 | return ZERO 692 | 693 | def tzname(self, dt): 694 | return self._find_comp(dt).tzname 695 | 696 | def __repr__(self): 697 | return "" % `self._tzid` 698 | 699 | __reduce__ = object.__reduce__ 700 | 701 | class tzical: 702 | def __init__(self, fileobj): 703 | global rrule 704 | if not rrule: 705 | from dateutil import rrule 706 | 707 | if isinstance(fileobj, basestring): 708 | self._s = fileobj 709 | fileobj = open(fileobj) 710 | elif hasattr(fileobj, "name"): 711 | self._s = fileobj.name 712 | else: 713 | self._s = `fileobj` 714 | 715 | self._vtz = {} 716 | 717 | self._parse_rfc(fileobj.read()) 718 | 719 | def keys(self): 720 | return self._vtz.keys() 721 | 722 | def get(self, tzid=None): 723 | if tzid is None: 724 | keys = self._vtz.keys() 725 | if len(keys) == 0: 726 | raise ValueError, "no timezones defined" 727 | elif len(keys) > 1: 728 | raise ValueError, "more than one timezone available" 729 | tzid = keys[0] 730 | return self._vtz.get(tzid) 731 | 732 | def _parse_offset(self, s): 733 | s = s.strip() 734 | if not s: 735 | raise ValueError, "empty offset" 736 | if s[0] in ('+', '-'): 737 | signal = (-1,+1)[s[0]=='+'] 738 | s = s[1:] 739 | else: 740 | signal = +1 741 | if len(s) == 4: 742 | return (int(s[:2])*3600+int(s[2:])*60)*signal 743 | elif len(s) == 6: 744 | return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal 745 | else: 746 | raise ValueError, "invalid offset: "+s 747 | 748 | def _parse_rfc(self, s): 749 | lines = s.splitlines() 750 | if not lines: 751 | raise ValueError, "empty string" 752 | 753 | # Unfold 754 | i = 0 755 | while i < len(lines): 756 | line = lines[i].rstrip() 757 | if not line: 758 | del lines[i] 759 | elif i > 0 and line[0] == " ": 760 | lines[i-1] += line[1:] 761 | del lines[i] 762 | else: 763 | i += 1 764 | 765 | tzid = None 766 | comps = [] 767 | invtz = False 768 | comptype = None 769 | for line in lines: 770 | if not line: 771 | continue 772 | name, value = line.split(':', 1) 773 | parms = name.split(';') 774 | if not parms: 775 | raise ValueError, "empty property name" 776 | name = parms[0].upper() 777 | parms = parms[1:] 778 | if invtz: 779 | if name == "BEGIN": 780 | if value in ("STANDARD", "DAYLIGHT"): 781 | # Process component 782 | pass 783 | else: 784 | raise ValueError, "unknown component: "+value 785 | comptype = value 786 | founddtstart = False 787 | tzoffsetfrom = None 788 | tzoffsetto = None 789 | rrulelines = [] 790 | tzname = None 791 | elif name == "END": 792 | if value == "VTIMEZONE": 793 | if comptype: 794 | raise ValueError, \ 795 | "component not closed: "+comptype 796 | if not tzid: 797 | raise ValueError, \ 798 | "mandatory TZID not found" 799 | if not comps: 800 | raise ValueError, \ 801 | "at least one component is needed" 802 | # Process vtimezone 803 | self._vtz[tzid] = _tzicalvtz(tzid, comps) 804 | invtz = False 805 | elif value == comptype: 806 | if not founddtstart: 807 | raise ValueError, \ 808 | "mandatory DTSTART not found" 809 | if tzoffsetfrom is None: 810 | raise ValueError, \ 811 | "mandatory TZOFFSETFROM not found" 812 | if tzoffsetto is None: 813 | raise ValueError, \ 814 | "mandatory TZOFFSETFROM not found" 815 | # Process component 816 | rr = None 817 | if rrulelines: 818 | rr = rrule.rrulestr("\n".join(rrulelines), 819 | compatible=True, 820 | ignoretz=True, 821 | cache=True) 822 | comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto, 823 | (comptype == "DAYLIGHT"), 824 | tzname, rr) 825 | comps.append(comp) 826 | comptype = None 827 | else: 828 | raise ValueError, \ 829 | "invalid component end: "+value 830 | elif comptype: 831 | if name == "DTSTART": 832 | rrulelines.append(line) 833 | founddtstart = True 834 | elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"): 835 | rrulelines.append(line) 836 | elif name == "TZOFFSETFROM": 837 | if parms: 838 | raise ValueError, \ 839 | "unsupported %s parm: %s "%(name, parms[0]) 840 | tzoffsetfrom = self._parse_offset(value) 841 | elif name == "TZOFFSETTO": 842 | if parms: 843 | raise ValueError, \ 844 | "unsupported TZOFFSETTO parm: "+parms[0] 845 | tzoffsetto = self._parse_offset(value) 846 | elif name == "TZNAME": 847 | if parms: 848 | raise ValueError, \ 849 | "unsupported TZNAME parm: "+parms[0] 850 | tzname = value 851 | elif name == "COMMENT": 852 | pass 853 | else: 854 | raise ValueError, "unsupported property: "+name 855 | else: 856 | if name == "TZID": 857 | if parms: 858 | raise ValueError, \ 859 | "unsupported TZID parm: "+parms[0] 860 | tzid = value 861 | elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"): 862 | pass 863 | else: 864 | raise ValueError, "unsupported property: "+name 865 | elif name == "BEGIN" and value == "VTIMEZONE": 866 | tzid = None 867 | comps = [] 868 | invtz = True 869 | 870 | def __repr__(self): 871 | return "%s(%s)" % (self.__class__.__name__, `self._s`) 872 | 873 | if sys.platform != "win32": 874 | TZFILES = ["/etc/localtime", "localtime"] 875 | TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"] 876 | else: 877 | TZFILES = [] 878 | TZPATHS = [] 879 | 880 | def gettz(name=None): 881 | tz = None 882 | if not name: 883 | try: 884 | name = os.environ["TZ"] 885 | except KeyError: 886 | pass 887 | if name is None or name == ":": 888 | for filepath in TZFILES: 889 | if not os.path.isabs(filepath): 890 | filename = filepath 891 | for path in TZPATHS: 892 | filepath = os.path.join(path, filename) 893 | if os.path.isfile(filepath): 894 | break 895 | else: 896 | continue 897 | if os.path.isfile(filepath): 898 | try: 899 | tz = tzfile(filepath) 900 | break 901 | except (IOError, OSError, ValueError): 902 | pass 903 | else: 904 | tz = tzlocal() 905 | else: 906 | if name.startswith(":"): 907 | name = name[:-1] 908 | if os.path.isabs(name): 909 | if os.path.isfile(name): 910 | tz = tzfile(name) 911 | else: 912 | tz = None 913 | else: 914 | for path in TZPATHS: 915 | filepath = os.path.join(path, name) 916 | if not os.path.isfile(filepath): 917 | filepath = filepath.replace(' ','_') 918 | if not os.path.isfile(filepath): 919 | continue 920 | try: 921 | tz = tzfile(filepath) 922 | break 923 | except (IOError, OSError, ValueError): 924 | pass 925 | else: 926 | tz = None 927 | if tzwin: 928 | try: 929 | tz = tzwin(name) 930 | except OSError: 931 | pass 932 | if not tz: 933 | from dateutil.zoneinfo import gettz 934 | tz = gettz(name) 935 | if not tz: 936 | for c in name: 937 | # name must have at least one offset to be a tzstr 938 | if c in "0123456789": 939 | try: 940 | tz = tzstr(name) 941 | except ValueError: 942 | pass 943 | break 944 | else: 945 | if name in ("GMT", "UTC"): 946 | tz = tzutc() 947 | elif name in time.tzname: 948 | tz = tzlocal() 949 | return tz 950 | 951 | # vim:ts=4:sw=4:et 952 | -------------------------------------------------------------------------------- /dateutil/tzwin.py: -------------------------------------------------------------------------------- 1 | # This code was originally contributed by Jeffrey Harris. 2 | import datetime 3 | import struct 4 | import _winreg 5 | 6 | __author__ = "Jeffrey Harris & Gustavo Niemeyer " 7 | 8 | __all__ = ["tzwin", "tzwinlocal"] 9 | 10 | ONEWEEK = datetime.timedelta(7) 11 | 12 | TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones" 13 | TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones" 14 | TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation" 15 | 16 | def _settzkeyname(): 17 | global TZKEYNAME 18 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) 19 | try: 20 | _winreg.OpenKey(handle, TZKEYNAMENT).Close() 21 | TZKEYNAME = TZKEYNAMENT 22 | except WindowsError: 23 | TZKEYNAME = TZKEYNAME9X 24 | handle.Close() 25 | 26 | _settzkeyname() 27 | 28 | class tzwinbase(datetime.tzinfo): 29 | """tzinfo class based on win32's timezones available in the registry.""" 30 | 31 | def utcoffset(self, dt): 32 | if self._isdst(dt): 33 | return datetime.timedelta(minutes=self._dstoffset) 34 | else: 35 | return datetime.timedelta(minutes=self._stdoffset) 36 | 37 | def dst(self, dt): 38 | if self._isdst(dt): 39 | minutes = self._dstoffset - self._stdoffset 40 | return datetime.timedelta(minutes=minutes) 41 | else: 42 | return datetime.timedelta(0) 43 | 44 | def tzname(self, dt): 45 | if self._isdst(dt): 46 | return self._dstname 47 | else: 48 | return self._stdname 49 | 50 | def list(): 51 | """Return a list of all time zones known to the system.""" 52 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) 53 | tzkey = _winreg.OpenKey(handle, TZKEYNAME) 54 | result = [_winreg.EnumKey(tzkey, i) 55 | for i in range(_winreg.QueryInfoKey(tzkey)[0])] 56 | tzkey.Close() 57 | handle.Close() 58 | return result 59 | list = staticmethod(list) 60 | 61 | def display(self): 62 | return self._display 63 | 64 | def _isdst(self, dt): 65 | dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek, 66 | self._dsthour, self._dstminute, 67 | self._dstweeknumber) 68 | dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek, 69 | self._stdhour, self._stdminute, 70 | self._stdweeknumber) 71 | if dston < dstoff: 72 | return dston <= dt.replace(tzinfo=None) < dstoff 73 | else: 74 | return not dstoff <= dt.replace(tzinfo=None) < dston 75 | 76 | 77 | class tzwin(tzwinbase): 78 | 79 | def __init__(self, name): 80 | self._name = name 81 | 82 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) 83 | tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name)) 84 | keydict = valuestodict(tzkey) 85 | tzkey.Close() 86 | handle.Close() 87 | 88 | self._stdname = keydict["Std"].encode("iso-8859-1") 89 | self._dstname = keydict["Dlt"].encode("iso-8859-1") 90 | 91 | self._display = keydict["Display"] 92 | 93 | # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm 94 | tup = struct.unpack("=3l16h", keydict["TZI"]) 95 | self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1 96 | self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1 97 | 98 | (self._stdmonth, 99 | self._stddayofweek, # Sunday = 0 100 | self._stdweeknumber, # Last = 5 101 | self._stdhour, 102 | self._stdminute) = tup[4:9] 103 | 104 | (self._dstmonth, 105 | self._dstdayofweek, # Sunday = 0 106 | self._dstweeknumber, # Last = 5 107 | self._dsthour, 108 | self._dstminute) = tup[12:17] 109 | 110 | def __repr__(self): 111 | return "tzwin(%s)" % repr(self._name) 112 | 113 | def __reduce__(self): 114 | return (self.__class__, (self._name,)) 115 | 116 | 117 | class tzwinlocal(tzwinbase): 118 | 119 | def __init__(self): 120 | 121 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) 122 | 123 | tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME) 124 | keydict = valuestodict(tzlocalkey) 125 | tzlocalkey.Close() 126 | 127 | self._stdname = keydict["StandardName"].encode("iso-8859-1") 128 | self._dstname = keydict["DaylightName"].encode("iso-8859-1") 129 | 130 | try: 131 | tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname)) 132 | _keydict = valuestodict(tzkey) 133 | self._display = _keydict["Display"] 134 | tzkey.Close() 135 | except OSError: 136 | self._display = None 137 | 138 | handle.Close() 139 | 140 | self._stdoffset = -keydict["Bias"]-keydict["StandardBias"] 141 | self._dstoffset = self._stdoffset-keydict["DaylightBias"] 142 | 143 | 144 | # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm 145 | tup = struct.unpack("=8h", keydict["StandardStart"]) 146 | 147 | (self._stdmonth, 148 | self._stddayofweek, # Sunday = 0 149 | self._stdweeknumber, # Last = 5 150 | self._stdhour, 151 | self._stdminute) = tup[1:6] 152 | 153 | tup = struct.unpack("=8h", keydict["DaylightStart"]) 154 | 155 | (self._dstmonth, 156 | self._dstdayofweek, # Sunday = 0 157 | self._dstweeknumber, # Last = 5 158 | self._dsthour, 159 | self._dstminute) = tup[1:6] 160 | 161 | def __reduce__(self): 162 | return (self.__class__, ()) 163 | 164 | def picknthweekday(year, month, dayofweek, hour, minute, whichweek): 165 | """dayofweek == 0 means Sunday, whichweek 5 means last instance""" 166 | first = datetime.datetime(year, month, 1, hour, minute) 167 | weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1)) 168 | for n in xrange(whichweek): 169 | dt = weekdayone+(whichweek-n)*ONEWEEK 170 | if dt.month == month: 171 | return dt 172 | 173 | def valuestodict(key): 174 | """Convert a registry key's values to a dictionary.""" 175 | dict = {} 176 | size = _winreg.QueryInfoKey(key)[1] 177 | for i in range(size): 178 | data = _winreg.EnumValue(key, i) 179 | dict[data[0]] = data[1] 180 | return dict 181 | -------------------------------------------------------------------------------- /dbobject.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Representing an object that can store to SQLite 5 | # 6 | # 2013-04-16 Created by Pascal Pfiffner 7 | # 8 | 9 | import logging 10 | 11 | from sqlite import SQLite 12 | 13 | 14 | class DBObject (object): 15 | """ A superclass for objects that can dehydrate to and hydrate from SQLite. 16 | 17 | Very crude and basic for the time being, but still takes away much of the 18 | cruft for subclasses. 19 | """ 20 | 21 | sqlite_default_db = 'databases/storage.db' 22 | sqlite_handle = None 23 | sqlite_must_commit = False 24 | 25 | table_name = None 26 | table_key = None 27 | 28 | def __init__(self): 29 | self.id = None 30 | self.hydrated = False 31 | 32 | 33 | # -------------------------------------------------------------------------- Dehydration 34 | def should_insert(self): 35 | """ Return True if the receiver should be inserted (i.e. is not already 36 | in the db). """ 37 | return False 38 | 39 | def will_insert(self): 40 | """ Called before the insert query is performed, you can use this as a 41 | hook. """ 42 | pass 43 | 44 | def insert_tuple(self): 45 | """ Cheap solution for now: return the INSERT sql as first and a list 46 | of values as second object. """ 47 | return None, None 48 | 49 | def did_insert(self): 50 | pass 51 | 52 | def insert(self): 53 | """ Runs an INSERT query for the receiver. 54 | This method will not check with "should_insert()"! """ 55 | self.will_insert() 56 | 57 | sql, params = self.insert_tuple() 58 | if sql is None or params is None: 59 | return False 60 | 61 | cls = self.__class__ 62 | cls.sqlite_assure_handle() 63 | self.id = cls.sqlite_handle.executeInsert(sql, params) 64 | cls.sqlite_must_commit = True 65 | self.did_insert() 66 | 67 | return True 68 | 69 | 70 | def should_update(self): 71 | return True 72 | 73 | def update_tuple(self): 74 | """ Cheap solution for now: return the UPDATE sql as first and a list 75 | of values as second object. """ 76 | return None, None 77 | 78 | def update(self): 79 | """ Runs the UPDATE query on the receiver. """ 80 | 81 | sql, params = self.update_tuple() 82 | if sql is None or params is None: 83 | return False 84 | 85 | cls = self.__class__ 86 | cls.sqlite_assure_handle() 87 | if cls.sqlite_handle.execute(sql, params): 88 | cls.sqlite_must_commit = True 89 | self.hydrated = True 90 | return True 91 | 92 | return False 93 | 94 | def did_store(self): 95 | """ Called after a successful call to self.store(). """ 96 | pass 97 | 98 | def store(self): 99 | """ Stores the receiver's data to SQLite. You must MANUALLY COMMIT! 100 | """ 101 | 102 | # do we need to insert first? 103 | if self.should_insert() and not self.insert(): 104 | logging.warning("Failed to INSERT %s" % self) 105 | 106 | # perform the update 107 | if self.should_update() and not self.update(): 108 | logging.warning("Failed to UPDATE %s" % self) 109 | return False 110 | 111 | self.did_store() 112 | return True 113 | 114 | 115 | # -------------------------------------------------------------------------- Hydration 116 | def load(self, force=False): 117 | """ Hydrate from database. """ 118 | pass 119 | 120 | def from_db(self, data): 121 | """ Fill from an SQLite-retrieved list. """ 122 | pass 123 | 124 | 125 | # -------------------------------------------------------------------------- SQLite Methods 126 | def sqlite_execute(self, sql, params): 127 | """ Executes the given SQL statement with the given parameters. 128 | Returns True on success, False otherwise. """ 129 | 130 | cls = self.__class__ 131 | cls.sqlite_assure_handle() 132 | if cls.sqlite_handle.execute(sql, params): 133 | cls.sqlite_must_commit = True 134 | self.hydrated = True 135 | return True 136 | 137 | return False 138 | 139 | @classmethod 140 | def sqlite_select(cls, sql, params): 141 | """ Executes the SQL statement and returns the response. You can use 142 | this method in an iterator. """ 143 | 144 | cls.sqlite_assure_handle() 145 | return cls.sqlite_handle.execute(sql, params) 146 | 147 | @classmethod 148 | def sqlite_select_one(cls, sql, params): 149 | """ Executes the SQL statement and returns the first response row. 150 | """ 151 | 152 | cls.sqlite_assure_handle() 153 | return cls.sqlite_handle.executeOne(sql, params) 154 | 155 | @classmethod 156 | def add_index(cls, table_column): 157 | """ Adds an index for the given table column if there is none. 158 | """ 159 | if table_column is None: 160 | return 161 | 162 | cls.sqlite_assure_handle() 163 | idx_name = "%s_index" % table_column 164 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS %s ON %s (%s)" % (idx_name, cls.table_name, table_column)) 165 | 166 | 167 | # -------------------------------------------------------------------------- Class Methods 168 | @classmethod 169 | def sqlite_assure_handle(cls): 170 | if cls.sqlite_handle is None: 171 | cls.sqlite_handle = SQLite.get(cls.sqlite_default_db) 172 | 173 | @classmethod 174 | def sqlite_release_handle(cls): 175 | cls.sqlite_handle = None 176 | 177 | @classmethod 178 | def sqlite_commit_if_needed(cls): 179 | """ Commits to SQLite if the flag had been set. """ 180 | if cls.sqlite_handle is None: 181 | return 182 | 183 | if cls.sqlite_must_commit: 184 | cls.sqlite_must_commit = False 185 | cls.sqlite_handle.commit() 186 | 187 | 188 | # -------------------------------------------------------------------------- Table Setup 189 | @classmethod 190 | def table_structure(cls): 191 | """ Return the table structure here. """ 192 | return None 193 | 194 | @classmethod 195 | def setup_tables(cls, db_path=None): 196 | if db_path is not None: 197 | cls.sqlite_default_db = db_path 198 | 199 | struct = cls.table_structure() 200 | if struct is None: 201 | return False 202 | 203 | cls.sqlite_assure_handle() 204 | if cls.sqlite_handle.create(cls.table_name, struct): 205 | cls.did_setup_tables(db_path) 206 | 207 | @classmethod 208 | def did_setup_tables(cls, db_path): 209 | pass 210 | 211 | # call the table setup to be sure it was set up 212 | # SubClass.setup_tables() 213 | 214 | 215 | -------------------------------------------------------------------------------- /files.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | 4 | def get_timestamp(): 5 | return datetime.now().strftime(("%Y-%m-%d %H:%M:%S")) 6 | 7 | FILES = { 8 | "1": { 9 | "id": "1", 10 | "title": "clinical_ax.txt", 11 | "author": "Dr. John Doe", 12 | "client_name": "Foo Bar", 13 | "timestamp": get_timestamp() 14 | }, 15 | "2": { 16 | "id": "2", 17 | "title": "clinical_review.txt", 18 | "author": "Dr. Scooby Doo", 19 | "client_name": "Foo Bar", 20 | "timestamp": get_timestamp() 21 | }, 22 | "3": { 23 | "id": "3", 24 | "title": "clinical_note.txt", 25 | "author": "Dr. Donald Duck", 26 | "client_name": "Foo Bar", 27 | "timestamp": get_timestamp() 28 | } 29 | } 30 | 31 | def read(): 32 | 33 | # Create the list of people from our data 34 | return [FILES[key] for key in sorted(FILES.keys())] 35 | -------------------------------------------------------------------------------- /mngobject.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Superclass for objects stored in MongoDB 5 | # 6 | # 2013-07-10 Created by Pascal Pfiffner 7 | # 8 | 9 | import logging 10 | import collections 11 | 12 | from pymongo import MongoClient 13 | 14 | 15 | class MNGObject (object): 16 | """ Superclass for an object stored in a MongoDB collection. """ 17 | 18 | def __init__(self, id=None): 19 | self.id = id 20 | self.doc = None 21 | self.loaded = False 22 | 23 | 24 | # -------------------------------------------------------------------------- MongoDB 25 | database_uri = "mongodb://localhost:27017" 26 | 27 | # the MongoDB database may be 'None', in which case the default db will be 28 | # used, and if that doesn't work it will fall back to use 'default' 29 | database_name = None 30 | 31 | # the MongoDB collection that holds documents of this class 32 | collection_name = None 33 | 34 | _collection = None 35 | 36 | @classmethod 37 | def collection(cls): 38 | """ Returns a Mongo Collection object, creating it if necessary. """ 39 | if cls._collection is None: 40 | if not cls.collection_name: 41 | raise Exception("No collection has been set for %s" % cls) 42 | 43 | client = MongoClient(cls.database_uri) 44 | if cls.database_name is None: 45 | try: 46 | db = client.get_default_database() 47 | except Exception as e: 48 | logging.debug("Failed to get default database: %s" % e) 49 | db = client['default'] 50 | else: 51 | db = client[cls.database_name] 52 | cls._collection = db[cls.collection_name] 53 | 54 | return cls._collection 55 | 56 | @classmethod 57 | def test_connection(cls): 58 | """ Tests the database by inserting, retrieving and deleting a document. 59 | """ 60 | old_coll = cls.collection_name 61 | cls.collection_name = 'foo' 62 | 63 | obj = MNGObject() 64 | obj.doc = { 65 | 'title': "This is a connection test document" 66 | } 67 | 68 | ret = None 69 | 70 | # try storing 71 | try: 72 | obj.store() 73 | 74 | # try loading 75 | sec = MNGObject(obj.id) 76 | try: 77 | sec.load() 78 | 79 | # compare titles 80 | t1 = obj.doc.get('title') if obj.doc else None 81 | t2 = sec.doc.get('title') if sec.doc else None 82 | if t1 == t2: 83 | 84 | # try removing 85 | try: 86 | if not sec.remove(): 87 | raise Exception('failed to remove') 88 | except Exception as e: 89 | ret = "TEST FAILED with remove() exception: %s" % e 90 | else: 91 | ret = "TEST FAILED, insertion and retrieval do not match (%s != %s)" % (t1, t2) 92 | except Exception as e: 93 | ret = "TEST FAILED with load() exception: %s" % e 94 | except Exception as e: 95 | ret = "TEST FAILED with store() exception: %s" % e 96 | 97 | 98 | # clean up 99 | try: 100 | cls._collection.drop() 101 | cls._collection = None 102 | except: 103 | logging.error("Failed to drop collection: %s" % e) 104 | 105 | cls.connection_name = old_coll 106 | 107 | return ret 108 | 109 | 110 | # -------------------------------------------------------------------------- Document Manipulation 111 | def ensure_doc_id(self): 112 | had_doc = True 113 | if self.doc is None: 114 | had_doc = False 115 | self.doc = {} 116 | 117 | if self.id: 118 | self.doc['_id'] = self.id 119 | elif had_doc: 120 | self.id = self.doc.get('_id') 121 | if self.id is None: 122 | self.id = self.doc.get('id') 123 | self.doc['_id'] = self.id 124 | 125 | def replace_with(self, json): 126 | """ Replaces the document tree with the given JSON tree. 127 | 128 | The document id is set from the receiver's id if it's there, otherwise 129 | it's being searched in the doc in this order: 130 | - if self.id is not None, the doc's "_id" will be set to self.id 131 | - if doc["_id"] is present, this becomes self.id 132 | - if doc["id"] is present, this becomes self.id and is set as the 133 | docs "_id" 134 | """ 135 | if not self.loaded: 136 | self.load() 137 | 138 | self.doc = json 139 | self.loaded = True 140 | 141 | # set or update our id 142 | self.ensure_doc_id() 143 | self.did_update_doc() 144 | 145 | def update_with(self, json): 146 | """ Updates the document tree by merging it with the given JSON tree. 147 | 148 | The id of the document is automatically set in this order: 149 | - if self.id is not None, the doc's "_id" will be set to self.id 150 | - if doc["_id"] is present, this becomes self.id 151 | - if doc["id"] is present, this becomes self.id and is set as the 152 | docs "_id" 153 | """ 154 | 155 | if not self.loaded: 156 | self.load() 157 | 158 | # set or update contents 159 | if self.doc is None: 160 | self.doc = json 161 | else: 162 | self.doc = deepUpdate(self.doc, json) 163 | self.loaded = True 164 | 165 | # set or update our id 166 | self.ensure_doc_id() 167 | self.did_update_doc() 168 | 169 | def did_update_doc(self): 170 | """ Called when self.doc has been changed programmatically (i.e. NOT 171 | after loading from database). 172 | 173 | You can call this manually if you directly assign self.doc and want 174 | this to trigger. The default implementation does nothing. 175 | """ 176 | pass 177 | 178 | def update_subtree(self, keypath, tree): 179 | assert False, "Not implemented" 180 | 181 | def replace_subtree(self, keypath, tree): 182 | """ replaces the existing tree at keypath with the new tree. """ 183 | 184 | if not self.loaded and self.id: 185 | self.load() 186 | 187 | self.ensure_doc_id() 188 | self.doc = replaceSubtree(self.doc, keypath, tree) 189 | self.loaded = True 190 | 191 | 192 | # -------------------------------------------------------------------------- Dehydration 193 | def store(self, subtree=None): 194 | """ Stores the receiver's data to the collection, letting Mongo decide 195 | between an insert and an update. 196 | If "subtree" is not None, an update is forced only on the given subtree 197 | which should have the format: {'keypath': value}. """ 198 | 199 | # throw up if there is no content and we're not saving a subtree 200 | if self.doc is None and subtree is None: 201 | raise Exception("This object does not have content") 202 | 203 | cls = self.__class__ 204 | 205 | # update if there's a subtree, otherwise use "save" 206 | if subtree is not None: 207 | if self.id is None: 208 | raise Exception("No id is set, cannot update subtree %s" % subtree) 209 | res = cls.collection().update({"_id": self.id}, {"$set": subtree}) 210 | if res is not None: 211 | if res.get('err'): 212 | logging.warning("Error while saving subtree: %s" % res.get('err')) 213 | 214 | # instead of loading again, would be nice to update self.doc 215 | # appropriately 216 | self.doc = None 217 | self.load() 218 | else: 219 | self.id = cls.collection().save(self.doc, manipulate=True) 220 | 221 | self.did_store() 222 | 223 | return True 224 | 225 | def did_store(self): 226 | """ Called after a successful call to "store". """ 227 | pass 228 | 229 | 230 | # -------------------------------------------------------------------------- Hydration 231 | def load(self, force=False): 232 | """ Hydrate from database, if the instance has an id. 233 | If the document already has an in-memory representation, data loaded 234 | from database will be superseeded by the in-memory properties unless 235 | "force" is set to True, which will make all in-memory data to be 236 | discarded. 237 | 238 | Arguments: 239 | force -- if True will discard any in-memory changes to self.doc 240 | """ 241 | 242 | if self.id is None: 243 | return 244 | 245 | found = self.__class__.collection().find_one({"_id": self.id}) 246 | if found is not None: 247 | if force or self.doc is None: 248 | self.doc = found 249 | else: 250 | self.doc = deepUpdate(found, self.doc) 251 | 252 | self.loaded = True 253 | 254 | 255 | # -------------------------------------------------------------------------- Multiple 256 | @classmethod 257 | def retrieve(cls, id_list=[]): 258 | """ Retrieves multiple documents by id. """ 259 | 260 | found = [] 261 | for document in cls.collection().find({"_id": {"$in": id_list}}): 262 | obj = cls() 263 | obj.update_with(document) 264 | 265 | found.append(obj) 266 | 267 | return found 268 | 269 | 270 | # -------------------------------------------------------------------------- Deletion 271 | def remove(self): 272 | """ Delete from database. """ 273 | 274 | if self.id is None: 275 | raise Exception("This object does not have an id, cannot remove") 276 | 277 | ret = self.__class__.collection().remove(spec_or_id=self.id) 278 | return ret.get('err') is None if ret else False 279 | 280 | 281 | 282 | def deepUpdate(d, u): 283 | """ Deep merges two dictionaries, overwriting "d"s values with "u"s where 284 | present. """ 285 | if u is None: 286 | return d 287 | 288 | # if we have "u" and "d" is not a mapping object, we overwrite it with "u" 289 | if d is None or not isinstance(d, collections.Mapping): 290 | return u 291 | 292 | # iterate over keys and values and update 293 | for k, v in u.iteritems(): 294 | if isinstance(v, collections.Mapping): 295 | old = d.get(k) 296 | d[k] = deepUpdate(old, v) if old else v 297 | else: 298 | d[k] = u[k] 299 | 300 | return d 301 | 302 | def deleteSubtree(tree, keypath): 303 | """ Deletes the content at keypath. """ 304 | if not keypath: 305 | raise Exception("You must provide a keypath") 306 | 307 | existing = tree 308 | path = keypath.split('.') 309 | while len(path) > 1: 310 | p = path.pop(0) 311 | existing = existing.get(p) 312 | 313 | # if we don't have a tree to update it's not there anyway, go home 314 | if existing is None: 315 | return tree 316 | 317 | del existing[path[0]] 318 | 319 | return tree 320 | 321 | 322 | def replaceSubtree(tree, keypath, json): 323 | """ Replaces or creates a subtree at keypath. """ 324 | if not keypath: 325 | raise Exception("You must provide a keypath") 326 | if json is None: 327 | return deleteSubtree(tree, keypath) 328 | 329 | existing = tree or {} 330 | path = keypath.split('.') 331 | while len(path) > 1: 332 | p = path.pop(0) 333 | previous = existing 334 | existing = existing.get(p) 335 | if existing is None: 336 | existing = {} 337 | previous[p] = existing 338 | 339 | if existing is None: 340 | existing = {} 341 | existing[path[0]] = json 342 | 343 | return tree 344 | 345 | 346 | if '__main__' == __name__: 347 | a = {'a': 1, 'b': 1, 'c': {'ca': 1, 'cb': 1, 'cc': {'cca': 1, 'ccb': 1}}, 'e': {'ea': 1}} 348 | b = {'a': 2, 'c': {'ca': 2, 'cb': {'cba': 2, 'cbb': 2}, 'cd': {'cda': 2, 'cdb': 2, 'cdc': 2}}, 'e': 2} 349 | 350 | print "replaceSubtree()" 351 | print "before ", a 352 | print "replace 1", replaceSubtree(a, 'c.ca', 3) 353 | print "replace 2", replaceSubtree(a, 'c.cc.cca', 3) 354 | print "replace 3", replaceSubtree(a, 'c.ce.cea', 3) 355 | print 356 | print "deleteSubtree()" 357 | print "before ", a 358 | print "delete 1", deleteSubtree(a, 'c.ce.cea') 359 | print "delete 2", deleteSubtree(a, 'd.da.dda') 360 | print 361 | print "deepUpdate(a, b)" 362 | print "a: ", a 363 | print "b: ", b 364 | print "-> ", deepUpdate(a, b) 365 | 366 | -------------------------------------------------------------------------------- /nlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # cTAKES and RegEx wizardry 4 | # 5 | # 2012-12-14 Created by Pascal Pfiffner 6 | # 7 | 8 | import os 9 | import re 10 | import logging 11 | 12 | class NLPProcessing (object): 13 | """ Abstract base class for handling NLP pipelines. """ 14 | # print('\n(nlp.py) Initializing NLP w/ object:', object, '\n') 15 | 16 | def __init__(self): 17 | # print('\n(nlp.py) Setting definitions for self') 18 | 19 | self.name = 'nlp' 20 | self.bin = '.' 21 | self.root = None 22 | self.cleanup = True 23 | self.did_prepare = False 24 | 25 | # print('(nlp.py) Definitions set as:', '\n(nlp.py) Self Name:', self.name, '\n(nlp.py) Self bin:', self.bin, '\n(nlp.py) Self root:', 26 | # self.root, '\n(nlp.py) Self cleanup:', self.cleanup, '\n(nlp.py) Self did prepare:', self.did_prepare, '\n') 27 | 28 | # -------------------------------------------------------------------------- Preparations 29 | def set_relative_root(self, directory): 30 | self.root = os.path.abspath(directory if directory is not None else '.') 31 | 32 | def prepare(self): 33 | """ Performs steps necessary to setup the pipeline, such as creating 34 | input and output directories or pipes. """ 35 | # print('Preparations started w/:\n','Root =', self.root, '\n') 36 | self._prepare() 37 | self.did_prepare = True 38 | 39 | def _prepare(self): 40 | if self.root is None: 41 | raise Exception("No root directory defined for NLP process %s" % self.name) 42 | 43 | if not os.path.exists(self.root): 44 | os.mkdir(self.root) 45 | 46 | self._create_directories_if_needed() 47 | 48 | if not os.path.exists(self.root): 49 | raise Exception( 50 | "Failed to create root directory for NLP process %s" % self.name) 51 | 52 | def _create_directories_if_needed(self): 53 | """ Override to create directories needed to run the pipeline. """ 54 | pass 55 | 56 | # -------------------------------------------------------------------------- Running 57 | def run(self): 58 | """ Runs the NLP pipeline, raises an exception on error. """ 59 | if not self.did_prepare: 60 | self.prepare() 61 | self._run() 62 | 63 | def _run(self): 64 | """ Internal use, subclasses should override this method since it is 65 | called after necessary preparation has been performed. """ 66 | raise Exception("Cannot run an abstract NLP pipeline class instance") 67 | 68 | def write_input(self, text, filename): 69 | if not self.did_prepare: 70 | self.prepare() 71 | 72 | return self._write_input(text, filename) 73 | 74 | def _write_input(self, text, filename): 75 | return False 76 | 77 | def parse_output(self, filename, **kwargs): 78 | if not self.did_prepare: 79 | self.prepare() 80 | 81 | return self._parse_output(filename, **kwargs) 82 | 83 | def _parse_output(self, filename, **kwargs): 84 | """ return a dictionary (or None) like: 85 | { 'snomed': [1, 2, 2], 'rxnorm': [4, 5, 6] } 86 | """ 87 | return None 88 | 89 | 90 | # ------------------------------------------------------------------------------ Helper Functions 91 | def split_inclusion_exclusion(string): 92 | """ Returns a tuple of lists describing inclusion and exclusion criteria. 93 | """ 94 | 95 | if not string or len(string)< 1: 96 | raise Exception('No string given') 97 | 98 | # split on newlines 99 | rows = re.compile(r'(?:\n\s*){2,}').split(string) 100 | 101 | # loop all rows 102 | missed = [] 103 | inc = [] 104 | exc = [] 105 | at_inc = False 106 | at_exc = False 107 | 108 | for string in rows: 109 | if len(string) < 1 or 'none' == string: 110 | continue 111 | 112 | clean = re.sub(r'[\n\s]+', ' ', string).strip() 113 | 114 | # detect switching to inclusion criteria 115 | # exclusion criteria sometimes say "None if patients fulfill inclusion 116 | # criteria.", try to avoid detecting that as header! 117 | if re.search(r'^[^\w]*inclusion criteria', clean, re.IGNORECASE) is not None \ 118 | and re.search(r'exclusion', clean, re.IGNORECASE) is None: 119 | at_inc = True 120 | at_exc = False 121 | 122 | # detect switching to exclusion criteria 123 | elif re.search(r'exclusion criteria', clean, re.IGNORECASE) is not None \ 124 | and re.search(r'inclusion', clean, re.IGNORECASE) is None: 125 | at_inc = False 126 | at_exc = True 127 | 128 | # assign accordingly 129 | elif at_inc: 130 | inc.append(clean) 131 | elif at_exc: 132 | exc.append(clean) 133 | else: 134 | missed.append(clean) 135 | 136 | # if there was no inclusion/exclusion split, we assume the text describes inclusion criteria 137 | if len(inc) < 1 or len(exc) < 1: 138 | logging.debug( 139 | "No explicit separation of inclusion/exclusion criteria found, assuming the text to describe inclusion criteria") 140 | inc.extend(missed) 141 | exc = [] 142 | 143 | return (inc, exc) 144 | 145 | 146 | def list_to_sentences(string): 147 | """ Splits text at newlines and puts it back together after stripping new- 148 | lines and enumeration symbols, joined by a period. 149 | """ 150 | if string is None: 151 | return None 152 | 153 | lines = string.splitlines() 154 | 155 | curr = '' 156 | processed = [] 157 | for line in lines: 158 | stripped = line.strip() 159 | 160 | # empty line 161 | if 0 == len(stripped): 162 | if curr: 163 | processed.append(re.sub(r'\.\s*$', '', curr)) 164 | curr = '' 165 | 166 | # beginning a new fragment 167 | elif not curr or 0 == len(curr): 168 | curr = re.sub(r'^[-\d\.\(\)]+\s*', '', stripped) 169 | 170 | # new line item? true when it starts with "-", "1." or "1)" (with 171 | # optional dash) or if the indent level is less than before (simple 172 | # whitespace count) (NO LONGER IMPLEMENTED) 173 | elif re.match(r'^-\s+', stripped) \ 174 | or re.match(r'^\d+\.\s+', stripped) \ 175 | or re.match(r'^(-\s*)?\d+\)\s+', stripped): 176 | 177 | if curr: 178 | processed.append(re.sub(r'\.\s*$', '', curr)) 179 | curr = re.sub(r'^(-|(\d+\.)|((-\s*)?\d+\)))\s*', '', stripped) 180 | 181 | # append to previous fragment 182 | else: 183 | curr = '%s %s' % (curr, stripped) 184 | 185 | if curr: 186 | processed.append(re.sub(r'\.\s*$', '', curr)) 187 | 188 | sentences = '. '.join(processed) if len(processed) > 0 else '' 189 | if len(sentences) > 0: 190 | sentences += '.' 191 | 192 | return sentences 193 | 194 | 195 | def list_trim(string): 196 | """ Trim text phases that are part of the string because the string was 197 | pulled off of a list, e.g. a leading "-" or "1." 198 | """ 199 | 200 | string.strip() 201 | string = re.sub('\s+', ' ', string) # multi-whitespace 202 | string = re.sub('^-\s+', '', string, count=1) # leading "-" 203 | string = re.sub('^\d+\.\s+', '', string, count=1) # leading "1." 204 | string = re.sub('^(-\s*)?\d+\)\s+', '', string, count=1) # leading "1)" with optional dash 205 | 206 | return string 207 | -------------------------------------------------------------------------------- /nltktags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Handling NLTK to generate tags 5 | # 6 | # 2013-10-25 Created by Pascal Pfiffner 7 | # 8 | 9 | import os 10 | import logging 11 | import codecs 12 | import inspect 13 | import nltk 14 | import operator 15 | 16 | from nlp import NLPProcessing, list_to_sentences 17 | 18 | 19 | class NLTKTags (NLPProcessing): 20 | """ Aggregate handling tasks specifically for NLTK. """ 21 | 22 | def __init__(self): 23 | super(NLTKTags, self).__init__() 24 | self.name = 'nltk-tags' 25 | 26 | 27 | @property 28 | def _in_dir(self): 29 | return os.path.join(self.root, 'nltk-tags-in') 30 | 31 | @property 32 | def _out_dir(self): 33 | return os.path.join(self.root, 'nltk-tags-out') 34 | 35 | def _create_directories_if_needed(self): 36 | in_dir = self._in_dir 37 | out_dir = self._out_dir 38 | if not os.path.exists(in_dir): 39 | os.mkdir(in_dir) 40 | if not os.path.exists(out_dir): 41 | os.mkdir(out_dir) 42 | 43 | def _run(self): 44 | in_dir = self._in_dir 45 | out_dir = self._out_dir 46 | if not os.path.exists(in_dir) or not os.path.exists(out_dir): 47 | return 48 | 49 | # init our simple noun-phrase chunker 50 | grammar = r""" 51 | NUM: 52 | {} # "%" is interpreted as NN... 53 | 54 | NBAR: 55 | {**+} # Nouns and Adjectives, terminated with Nouns 56 | 57 | NP: 58 | {} # An NBAR is also a NP 59 | {} # Above, connected with in/of/etc... 60 | """ 61 | chunker = nltk.RegexpParser(grammar) 62 | 63 | filelist = os.listdir(in_dir) 64 | tag_count = {} 65 | i = 0 66 | for f in filelist: 67 | i = i + 1 68 | logging.debug(" Reading file %d of %d" % (i, len(filelist))) 69 | with codecs.open(os.path.join(in_dir, f), 'r', 'utf-8') as handle: 70 | text = handle.read() 71 | 72 | # use NLTK to chunk the text 73 | chunks = [] 74 | sentences = nltk.sent_tokenize(text) 75 | if sentences and len(sentences) > 0: 76 | for sentence in sentences: 77 | tokens = nltk.word_tokenize(sentence) 78 | tagged = nltk.pos_tag(tokens) 79 | tree = chunker.parse(tagged) 80 | 81 | # get noun phrases 82 | np = [] 83 | for st in _nltk_find_leaves(tree, 'NP'): 84 | leaves = st.leaves() 85 | if len(leaves) > 0: 86 | tag = ' '.join([noun[0] for noun in leaves]).lower() 87 | np.append(tag) 88 | 89 | # count tags 90 | if tag in tag_count: 91 | tag_count[tag] = tag_count[tag] + 1 92 | else: 93 | tag_count[tag] = 1 94 | 95 | if len(np) > 0: 96 | chunks.extend(np) 97 | 98 | # write to outfile 99 | if len(chunks) > 0: 100 | outfile = os.path.join(out_dir, f) 101 | with codecs.open(outfile, 'w', 'utf-8') as w_handle: 102 | for chunk in chunks: 103 | w_handle.write("%s\n" % unicode(chunk)) 104 | 105 | # tag count 106 | if len(tag_count) > 0: 107 | with codecs.open(os.path.join(out_dir, 'tags.txt'), 'w', 'utf-8') as handle: 108 | for tag in sorted(tag_count.iteritems(), key=operator.itemgetter(1), reverse=True): 109 | handle.write("%s: %d\n" % (tag[0], int(tag[1]))) 110 | 111 | 112 | def _write_input(self, text, filename): 113 | if text is None \ 114 | or len(text) < 1 \ 115 | or filename is None: 116 | return False 117 | 118 | in_dir = self._in_dir 119 | if not os.path.exists(in_dir): 120 | logging.error("The input directory for %s at %s does not exist" % (self.name, in_dir)) 121 | return False 122 | 123 | infile = os.path.join(in_dir, filename) 124 | if os.path.exists(infile): 125 | return False 126 | 127 | # write it 128 | with codecs.open(infile, 'w', 'utf-8') as handle: 129 | # handle.write(unicode(text)) 130 | # handle.write("\n=====\n") 131 | handle.write(unicode(list_to_sentences(text))) 132 | 133 | return True 134 | 135 | 136 | def _parse_output(self, filename, **kwargs): 137 | """ Parse NLTK output. """ 138 | 139 | if filename is None: 140 | return None 141 | 142 | # is there cTAKES output? 143 | out_dir = self._out_dir 144 | if not os.path.exists(out_dir): 145 | logging.error("The output directory for %s at %s does not exist" % (self.name, out_dir)) 146 | return None 147 | 148 | outfile = os.path.join(out_dir, filename) 149 | if not os.path.exists(outfile): 150 | # do not log here and silently fail 151 | return None 152 | 153 | tags = [] 154 | 155 | # read tags 156 | with codecs.open(outfile, 'r', 'utf-8') as handle: 157 | #line = handle.readline(keepends=False) # "keepends" not supported in readline! (http://bugs.python.org/issue8630) 158 | lines = handle.readlines() 159 | for line in lines: 160 | tags.append(line.strip()) 161 | 162 | # create and return a dictionary (don't filter empty lists) 163 | ret = { 164 | 'tags': tags, 165 | } 166 | 167 | # clean up 168 | if self.cleanup: 169 | os.remove(outfile) 170 | 171 | in_dir = self._in_dir 172 | infile = os.path.join(in_dir, filename) 173 | if os.path.exists(infile): 174 | os.remove(infile) 175 | 176 | return ret 177 | 178 | 179 | def _nltk_find_leaves(tree, leave_name): 180 | try: 181 | tree.node 182 | except AttributeError: 183 | return [] 184 | 185 | res = [] 186 | if leave_name == tree.node: 187 | res.append(tree) 188 | else: 189 | for child in tree: 190 | leaves = _nltk_find_leaves(child, leave_name) 191 | if len(leaves) > 0: 192 | res.extend(leaves) 193 | 194 | return res 195 | 196 | 197 | # we can execute this file to do some testing 198 | if '__main__' == __name__: 199 | testtext = "History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficiency." 200 | testfile = 'test.txt' 201 | 202 | run_dir = os.path.join(os.path.dirname(__file__), 'nltk-tags-test') 203 | my_nlp = NLTKTags({'root': run_dir, 'cleanup': True}) 204 | my_nlp.prepare() 205 | 206 | # create test input 207 | if not my_nlp.write_input(testtext, testfile): 208 | print "xx> Failed to write test input to file" 209 | 210 | # run 211 | try: 212 | my_nlp.run() 213 | except Exception as e: 214 | print "xx> Failed: %s" % e 215 | 216 | # parse output 217 | ret = my_nlp.parse_output(testfile) 218 | print ret 219 | 220 | # clean up 221 | os.rmdir(my_nlp._in_dir) 222 | os.rmdir(my_nlp._out_dir) 223 | os.rmdir(run_dir) 224 | 225 | print "--> Done" 226 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | import connexion 2 | 3 | # Create the application instance 4 | app = connexion.App(__name__, specification_dir='./') 5 | 6 | # Read the swagger.yml file to configure the endpoints 7 | app.add_api('swagger.yml') 8 | 9 | # Create a URL route in our application for "/" 10 | 11 | 12 | @app.route('/') 13 | def home(): 14 | 15 | return "Works!" 16 | 17 | 18 | # If we're running in stand alone mode, run the application 19 | if __name__ == '__main__': 20 | app.run(host='0.0.0.0', port=5000, debug=True) 21 | -------------------------------------------------------------------------------- /sqlite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Simplifying SQLite access 4 | # 5 | # 2012-12-14 Created by Pascal Pfiffner 6 | # 7 | 8 | 9 | import sqlite3 10 | import threading 11 | 12 | 13 | SQLITE_INSTANCES = {} 14 | 15 | 16 | class SQLite (object): 17 | """ SQLite access 18 | """ 19 | 20 | @classmethod 21 | def get(cls, database): 22 | """ Use this to get SQLite instances for a given database. Avoids 23 | creating multiple instances for the same database. 24 | 25 | We keep instances around per thread per database, maybe there should be 26 | a way to turn this off. However, here we always release instances for 27 | threads that are no longer alive. If this is better than just always 28 | creating a new instance should be tested. 29 | """ 30 | 31 | global SQLITE_INSTANCES 32 | 33 | # group per thread 34 | thread_id = threading.current_thread().ident 35 | if thread_id not in SQLITE_INSTANCES: 36 | SQLITE_INSTANCES[thread_id] = {} 37 | by_thread = SQLITE_INSTANCES[thread_id] 38 | 39 | # group per database 40 | if database not in by_thread: 41 | sql = SQLite(database) 42 | by_thread[database] = sql 43 | 44 | # free up memory for terminated threads 45 | clean = {} 46 | for alive in threading.enumerate(): 47 | if alive.ident in SQLITE_INSTANCES: 48 | clean[alive.ident] = SQLITE_INSTANCES[alive.ident] 49 | SQLITE_INSTANCES = clean 50 | 51 | return by_thread[database] 52 | 53 | 54 | def __init__(self, database=None): 55 | if database is None: 56 | raise Exception('No database provided') 57 | 58 | self.database = database 59 | self.handle = None 60 | self.cursor = None 61 | 62 | 63 | def execute(self, sql, params=()): 64 | """ Executes an SQL command and returns the cursor.execute, which can 65 | be used as an iterator. 66 | Supply the params as tuple, i.e. (param,) and (param1,param2,...) 67 | """ 68 | if not sql or len(sql) < 1: 69 | raise Exception('No SQL to execute') 70 | if not self.cursor: 71 | self.connect() 72 | 73 | return self.cursor.execute(sql, params) 74 | 75 | 76 | def executeInsert(self, sql, params=()): 77 | """ Executes an SQL command (should be INSERT OR REPLACE) and returns 78 | the last row id, 0 on failure. 79 | """ 80 | if self.execute(sql, params): 81 | return self.cursor.lastrowid if self.cursor.lastrowid else 0 82 | 83 | return 0 84 | 85 | 86 | def executeUpdate(self, sql, params=()): 87 | """ Executes an SQL command (should be UPDATE) and returns the number 88 | of affected rows. 89 | """ 90 | if self.execute(sql, params): 91 | return self.cursor.rowcount 92 | 93 | return 0 94 | 95 | 96 | def executeOne(self, sql, params): 97 | """ Returns the first row returned by executing the command 98 | """ 99 | self.execute(sql, params) 100 | return self.cursor.fetchone() 101 | 102 | 103 | def create(self, table_name, table_structure): 104 | """ Executes a CREATE TABLE IF NOT EXISTS query with the given structure. 105 | Input is NOT sanitized, watch it! 106 | """ 107 | create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure) 108 | self.execute(create_query) 109 | return True 110 | 111 | 112 | def commit(self): 113 | self.handle.commit() 114 | 115 | 116 | def connect(self): 117 | if self.cursor is not None: 118 | return 119 | 120 | self.handle = sqlite3.connect(self.database) 121 | self.cursor = self.handle.cursor() 122 | 123 | 124 | def close(self): 125 | if self.cursor is None: 126 | return 127 | 128 | self.handle.close() 129 | self.cursor = None 130 | self.handle = None 131 | 132 | 133 | # singleton init whack-a-hack 134 | #SQLite = _SQLite() 135 | #del _SQLite 136 | -------------------------------------------------------------------------------- /swagger.yml: -------------------------------------------------------------------------------- 1 | swagger: "2.0" 2 | info: 3 | description: Coonects to the cTAKES Default Clinical Pipeline through a RESTful service! 4 | version: "1.0.0" 5 | title: cTAKES RESTful API 6 | consumes: 7 | - "application/json" 8 | produces: 9 | - "application/json" 10 | 11 | basePath: "/api" 12 | 13 | paths: 14 | /defaultClinicalPipline: 15 | get: 16 | operationId: "files.read" 17 | tags: 18 | - "Default Clinical Pipeline" 19 | summary: "Plain text file structure supported by the server application" 20 | description: "Read plain text file" 21 | responses: 22 | 200: 23 | description: "Successful read plain text file operation!" 24 | schema: 25 | type: "array" 26 | items: 27 | properties: 28 | id: 29 | type: "string" 30 | title: 31 | type: "string" 32 | author: 33 | type: "string" 34 | client_name: 35 | type: "string" 36 | timestamp: 37 | type: "string" -------------------------------------------------------------------------------- /umls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # utilities to handle UMLS 5 | # 6 | # 2013-01-01 Created by Pascal Pfiffner 7 | # 8 | 9 | 10 | import csv 11 | import sys 12 | import os.path 13 | import logging 14 | 15 | from sqlite import SQLite 16 | 17 | 18 | class UMLS (object): 19 | """ A class for importing UMLS terminologies into an SQLite database. 20 | """ 21 | 22 | @classmethod 23 | def check_databases(cls): 24 | """ Check if our databases are in place and if not, import them. 25 | Will raise on errors! 26 | 27 | UMLS: (umls.db) 28 | If missing prompt to use the `umls.sh` script 29 | 30 | SNOMED: (snomed.db) 31 | Read SNOMED CT from tab-separated files and create an SQLite database. 32 | """ 33 | 34 | # UMLS 35 | umls_db = os.path.join('databases', 'umls.db') 36 | if not os.path.exists(umls_db): 37 | raise("The UMLS database at %s does not exist. Run the import script `databases/umls.sh`." % umls_db) 38 | 39 | # SNOMED 40 | SNOMED.sqlite_handle = None 41 | try: 42 | SNOMED.setup_tables() 43 | except Exception as e: 44 | raise("SNOMED setup failed: %s" % e) 45 | 46 | # RxNorm 47 | rxnorm_db = os.path.join('databases', 'rxnorm.db') 48 | if not os.path.exists(rxnorm_db): 49 | raise("The RxNorm database at %s does not exist. Run the import script `databases/rxnorm.sh`." % rxnorm_db) 50 | 51 | else: 52 | rx_map = { 53 | 'descriptions': 'snomed_desc.csv', 54 | 'relationships': 'snomed_rel.csv' 55 | } 56 | 57 | # need to import? 58 | for table, filename in rx_map.iteritems(): 59 | num_query = 'SELECT COUNT(*) FROM %s' % table 60 | num_existing = SNOMED.sqlite_handle.executeOne(num_query, ())[0] 61 | if num_existing > 0: 62 | continue 63 | 64 | snomed_file = os.path.join('databases', filename) 65 | if not os.path.exists(snomed_file): 66 | raise("Need to import SNOMED, but the file %s is not present. Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html" % filename) 67 | 68 | SNOMED.import_csv_into_table(snomed_file, table) 69 | 70 | 71 | 72 | class UMLSLookup (object): 73 | """ UMLS lookup """ 74 | 75 | sqlite_handle = None 76 | did_check_dbs = False 77 | preferred_sources = ['"SNOMEDCT"', '"MTH"'] 78 | 79 | def __init__(self): 80 | self.sqlite = SQLite.get('databases/umls.db') 81 | 82 | def lookup_code(self, cui, preferred=True): 83 | """ Return a list with triples that contain: 84 | - name 85 | - source 86 | - semantic type 87 | by looking it up in our "descriptions" database. 88 | The "preferred" settings has the effect that only names from SNOMED 89 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in 90 | our "descriptions" table is much faster than combing through the full 91 | MRCONSO table. 92 | """ 93 | if cui is None or len(cui) < 1: 94 | return [] 95 | 96 | # lazy UMLS db checking 97 | if not UMLSLookup.did_check_dbs: 98 | UMLSLookup.did_check_dbs = True 99 | try: 100 | UMLS.check_databases() 101 | except Exception as e: 102 | logging.error(e) 103 | # should this crash and burn? 104 | 105 | # take care of negations 106 | negated = '-' == cui[0] 107 | if negated: 108 | cui = cui[1:] 109 | 110 | parts = cui.split('@', 1) 111 | lookup_cui = parts[0] 112 | 113 | # STR: Name 114 | # SAB: Abbreviated Source Name 115 | # STY: Semantic Type 116 | if preferred: 117 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN (%s)' % ", ".join(UMLSLookup.preferred_sources) 118 | else: 119 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?' 120 | 121 | # return as list 122 | arr = [] 123 | for res in self.sqlite.execute(sql, (lookup_cui,)): 124 | if negated: 125 | arr.append(("[NEGATED] %s" % res[0], res[1], res[2])) 126 | else: 127 | arr.append(res) 128 | 129 | return arr 130 | 131 | 132 | def lookup_code_meaning(self, cui, preferred=True, no_html=True): 133 | """ Return a string (an empty string if the cui is null or not found) 134 | by looking it up in our "descriptions" database. 135 | The "preferred" settings has the effect that only names from SNOMED 136 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in 137 | our "descriptions" table is much faster than combing through the full 138 | MRCONSO table. 139 | """ 140 | names = [] 141 | for res in self.lookup_code(cui, preferred): 142 | if no_html: 143 | names.append("%s (%s) [%s]" % (res[0], res[1], res[2])) 144 | else: 145 | names.append("%s (%s: %s)" % (res[0], res[1], res[2])) 146 | 147 | comp = ", " if no_html else "
\n" 148 | return comp.join(names) if len(names) > 0 else '' 149 | 150 | 151 | 152 | class SNOMED (object): 153 | sqlite_handle = None 154 | 155 | # -------------------------------------------------------------------------- Database Setup 156 | @classmethod 157 | def import_csv_into_table(cls, snomed_file, table_name): 158 | """ Import SNOMED CSV into our SQLite database. 159 | The SNOMED CSV files can be parsed by Python's CSV parser with the 160 | "excel-tab" flavor. 161 | """ 162 | 163 | logging.debug('..> Importing SNOMED %s into snomed.db...' % table_name) 164 | 165 | # not yet imported, parse tab-separated file and import 166 | with open(snomed_file, 'rb') as csv_handle: 167 | cls.sqlite_handle.isolation_level = 'EXCLUSIVE' 168 | sql = cls.insert_query_for(table_name) 169 | reader = unicode_csv_reader(csv_handle, dialect='excel-tab') 170 | i = 0 171 | try: 172 | for row in reader: 173 | if i > 0: # first row is the header row 174 | 175 | # execute SQL (we just ignore duplicates) 176 | params = cls.insert_tuple_from_csv_row_for(table_name, row) 177 | try: 178 | cls.sqlite_handle.execute(sql, params) 179 | except Exception as e: 180 | sys.exit(u'Cannot insert %s: %s' % (params, e)) 181 | i += 1 182 | 183 | # commit to file 184 | cls.sqlite_handle.commit() 185 | cls.did_import(table_name) 186 | cls.sqlite_handle.isolation_level = None 187 | 188 | except csv.Error as e: 189 | sys.exit('CSV error on line %d: %s' % (reader.line_num, e)) 190 | 191 | logging.debug('..> %d concepts parsed' % (i-1)) 192 | 193 | 194 | @classmethod 195 | def setup_tables(cls): 196 | """ Creates the SQLite tables we need, not the tables we deserve. 197 | """ 198 | if cls.sqlite_handle is None: 199 | cls.sqlite_handle = SQLite.get('databases/snomed.db') 200 | 201 | # descriptions 202 | cls.sqlite_handle.create('descriptions', '''( 203 | concept_id INTEGER PRIMARY KEY, 204 | lang TEXT, 205 | term TEXT, 206 | isa VARCHAR, 207 | active INT 208 | )''') 209 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)") 210 | 211 | # relationships 212 | cls.sqlite_handle.create('relationships', '''( 213 | relationship_id INTEGER PRIMARY KEY, 214 | source_id INT, 215 | destination_id INT, 216 | rel_type INT, 217 | rel_text VARCHAR, 218 | active INT 219 | )''') 220 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)") 221 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)") 222 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_type_index ON relationships (rel_type)") 223 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)") 224 | 225 | 226 | @classmethod 227 | def insert_query_for(cls, table_name): 228 | """ Returns the insert query needed for the given table 229 | """ 230 | if 'descriptions' == table_name: 231 | return '''INSERT OR IGNORE INTO descriptions 232 | (concept_id, lang, term, isa, active) 233 | VALUES 234 | (?, ?, ?, ?, ?)''' 235 | if 'relationships' == table_name: 236 | return '''INSERT OR IGNORE INTO relationships 237 | (relationship_id, source_id, destination_id, rel_type, active) 238 | VALUES 239 | (?, ?, ?, ?, ?)''' 240 | return None 241 | 242 | 243 | @classmethod 244 | def insert_tuple_from_csv_row_for(cls, table_name, row): 245 | if 'descriptions' == table_name: 246 | isa = '' 247 | if len(row) > 6: 248 | if '900000000000013009' == row[6]: 249 | isa = 'synonym' 250 | elif '900000000000003001' == row[6]: 251 | isa = 'full' 252 | return (int(row[4]), row[5], row[7], isa, int(row[2])) 253 | if 'relationships' == table_name: 254 | return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2])) 255 | return None 256 | 257 | 258 | @classmethod 259 | def did_import(cls, table_name): 260 | """ Allows us to set hooks after tables have been imported 261 | """ 262 | if 'relationships' == table_name: 263 | cls.sqlite_handle.execute(''' 264 | UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003 265 | ''') 266 | cls.sqlite_handle.execute(''' 267 | UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007 268 | ''') 269 | 270 | 271 | 272 | class SNOMEDLookup (object): 273 | """ SNOMED lookup """ 274 | 275 | sqlite_handle = None 276 | 277 | 278 | def __init__(self): 279 | self.sqlite = SQLite.get('databases/snomed.db') 280 | 281 | def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True): 282 | """ Returns HTML for all matches of the given SNOMED id. 283 | The "preferred" flag here currently has no function. 284 | """ 285 | if snomed_id is None or len(snomed_id) < 1: 286 | return '' 287 | 288 | sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?' 289 | names = [] 290 | 291 | # loop over results 292 | for res in self.sqlite.execute(sql, (snomed_id,)): 293 | if not no_html and ('synonym' == res[1] or 0 == res[2]): 294 | names.append("%s" % res[0]) 295 | else: 296 | names.append(res[0]) 297 | 298 | if no_html: 299 | return ", ".join(names) if len(names) > 0 else '' 300 | return "
\n".join(names) if len(names) > 0 else '' 301 | 302 | 303 | 304 | class RxNormLookup (object): 305 | """ RxNorm lookup """ 306 | 307 | sqlite_handle = None 308 | 309 | 310 | def __init__(self): 311 | self.sqlite = SQLite.get('databases/rxnorm.db') 312 | 313 | def lookup_code_meaning(self, rx_id, preferred=True, no_html=True): 314 | """ Return HTML for the meaning of the given code. 315 | If preferred is True (the default), only one match will be returned, 316 | looking for specific TTY and using the "best" one. """ 317 | if rx_id is None or len(rx_id) < 1: 318 | return '' 319 | 320 | # retrieve all matches 321 | sql = 'SELECT STR, TTY, RXAUI FROM RXNCONSO WHERE RXCUI = ? AND LAT = "ENG"' 322 | found = [] 323 | names = [] 324 | format_str = "%s [%s]" 325 | 326 | # loop over them 327 | for res in self.sqlite.execute(sql, (rx_id,)): 328 | found.append(res) 329 | 330 | if len(found) > 0: 331 | 332 | # preferred name only 333 | if preferred: 334 | for tty in ['BN', 'IN', 'PIN', 'SBDC', 'SCDC', 'SBD', 'SCD', 'MIN']: 335 | for res in found: 336 | if tty == res[1]: 337 | names.append(format_str % (res[2], res[0], res[1])) 338 | break 339 | else: 340 | continue 341 | break 342 | 343 | if len(names) < 1: 344 | res = found[0] 345 | names.append(format_str % (res[2], res[0], res[1])) 346 | 347 | # return a list of all names 348 | else: 349 | for res in found: 350 | names.append(format_str % (res[2], res[0], res[1])) 351 | 352 | return "
\n".join(names) if len(names) > 0 else '' 353 | 354 | 355 | 356 | # the standard Python CSV reader can't do unicode, here's the workaround 357 | def unicode_csv_reader(utf8_data, dialect=csv.excel, **kwargs): 358 | csv_reader = csv.reader(utf8_data, dialect=dialect, **kwargs) 359 | for row in csv_reader: 360 | yield [unicode(cell, 'utf-8') for cell in row] 361 | 362 | --------------------------------------------------------------------------------