├── .gitignore
├── README.md
├── __init__.py
├── ctakes-install
    ├── ctakes-server-setup.sh
    ├── ctakes-user-install.sh
    ├── log4j.xml
    └── run.sh
├── ctakes.py
├── databases
    ├── .gitignore
    ├── rxnorm.sh
    └── umls.sh
├── dateutil
    ├── LICENSE
    ├── __init__.py
    ├── easter.py
    ├── parser.py
    ├── relativedelta.py
    ├── rrule.py
    ├── tz.py
    └── tzwin.py
├── dbobject.py
├── files.py
├── mngobject.py
├── nlp.py
├── nltktags.py
├── server.py
├── sqlite.py
├── swagger.yml
└── umls.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.pyc
 3 | 
 4 | # ignare settings
 5 | umls.sh
 6 | 
 7 | # ignore cTAKES install
 8 | ctakes
 9 | ctakes-svn
10 | ctakes-test
11 | apache-ctakes-4.0.0
12 | ctakes-test
13 | 
14 | # ignore MetaMap install
15 | metamap
16 | metamap-test
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### What you'll be able to do ###
 2 | 
 3 | 1. Interact with cTAKES `Default Clinical Pipeline` through a Python RESTful API doing:
 4 |     - Annotations for;
 5 |         - Anatomical sites, 
 6 |         - Signs/Symptoms, 
 7 |         - Procedures,
 8 |         - Diseases/Disorders and 
 9 |         - Medications.
10 |         
11 | **Input:** `Plain Text File`    **Output:** `XMI File`
12 |         
13 | **Orginal wiki page:** https://cwiki.apache.org/confluence/display/CTAKES/Default+Clinical+Pipeline
14 | 
15 | ### cTAKES Install Instructions ###
16 | 
17 | 1. Execute `./ctakes-install/ctakes-install.sh`, which will:
18 |     - Download a copy of cTAKES into `./ctakes-install/tmp`
19 |     - Extract cTAKES then copy into `ctakes-install (cTAKES_HOME)` directory
20 |     - Download `ctakes-resources-4.0-bin.zip` into `./ctakes-install/tmp`
21 |     - Unzip `ctakes-resources-4.0-bin.zip` and copy into `apache-ctakes-4.0.0/resources`
22 |     - Remove/clean `/tmp` directory from `ctakes-install`
23 |     - Set your UMLS credentials in `umls.sh`
24 |     
25 | Note: If you don't have a UMLS username & password you'll need to request one at https://uts.nlm.nih.gov/license.html
26 |     
27 | ### Setting up Python RESTful API Instructions ###
28 | 
29 | COMING SOON!
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/__init__.py


--------------------------------------------------------------------------------
/ctakes-install/ctakes-server-setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | 


--------------------------------------------------------------------------------
/ctakes-install/ctakes-user-install.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #TODO: Add better error handling/troubleshooting.
  4 | 
  5 | ### Script Beginning ###
  6 | 
  7 | PWD=$(pwd)
  8 | ORIG=$(echo $PWD/$(dirname $0) | sed 's#/\.##')
  9 | cTAKES_HOME="$ORIG/apache-ctakes-4.0.0"
 10 | 
 11 | #FIXME: Fix output formatting
 12 | progressfilt ()
 13 | {
 14 |     local flag=false c count cr=$'\r' nl=$'\n'
 15 |     while IFS='' read -d '' -rn 1 c
 16 |     
 17 |     do
 18 |         if $flag
 19 |         then
 20 |             printf '%c' "$c"
 21 |         else
 22 |             if [[ $c != $cr && $c != $nl ]]
 23 |             then
 24 |                 count=0
 25 |             else
 26 |                 ((count++))
 27 |                 if ((count > 1))
 28 |                 then
 29 |                     flag=true
 30 |                 fi
 31 |             fi
 32 |         fi
 33 |     done
 34 | }
 35 | 
 36 | printf "\n\033[92m\u0F36\033[0m Install directory: $cTAKES_HOME \n"
 37 | 
 38 | ### Checking for dependencies ###
 39 | 
 40 | printf "\n\033[92m\u0F36\033[0m Checking for dependencies...\n"
 41 | 
 42 | # Jave Check #
 43 | 
 44 | if type -p java 2>&1 >/dev/null; then
 45 |     _java=java
 46 | elif [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]];  then     
 47 |     _java="$JAVA_HOME/bin/java" 2>&1 >/dev/null
 48 | else
 49 |     printf "\n  \u2573 Java wasn't found. Please install Java 1.8 or greater and try again!"
 50 | 	exit 1
 51 | fi
 52 | 
 53 | if [[ "$_java" ]]; then
 54 |     version=$("$_java" -version 2>&1 | awk -F '"' '/version/ {print $2}')
 55 |     if [[ "$version" > "1.8" ]] || [[ "$version" > "10.0.0" ]]; then
 56 |         printf "\n  \033[92m\u2713\033[0m Java 1.8 or greater is installed!\n"
 57 |     else         
 58 |         printf "\n  \033[91m\u2573\033[0m Current Java version is $version please upgrade to Java 1.8 or greater!\n"
 59 | 		exit 1
 60 |     fi
 61 | fi
 62 | 
 63 | # Warn if install exists #
 64 | 
 65 | if [ -d "$CTAKES_HOME" ]; then
 66 | 	printf "\n  \033[91m\u2573\033[0m cTakes install already exists!\n\n"
 67 | 	exit 1
 68 | fi
 69 | 
 70 | # Download cTAKES user install file linux #
 71 | if [ ! -d "$CTAKES_HOME" ]; then
 72 | 	printf "\n\033[92m\u0F36\033[0m Downloading: apache-ctakes-4.0.0-bin.tar.gz\n\n"
 73 | 
 74 |     wget --progress=bar:force http://www-eu.apache.org/dist/ctakes/ctakes-4.0.0/apache-ctakes-4.0.0-bin.tar.gz -P "$ORIG/tmp/" 2>&1 | progressfilt
 75 |     tar -xvf $ORIG/tmp/apache-ctakes-4.0.0-bin.tar.gz -C $ORIG/$CTAKES_HOME
 76 | fi
 77 | 
 78 | # Get resource files #
 79 | 
 80 | printf "\n\033[92m\u0F36\033[0m Downloading: ctakes-resources-4.0.0-bin.zip\n\n"
 81 | cd $ORIG/tmp
 82 | wget --progress=bar:force http://sourceforge.net/projects/ctakesresources/files/ctakes-resources-4.0-bin.zip -P "$ORIG/tmp/" 2>&1 | progressfilt
 83 | 
 84 | printf "\033[92m\u0F36\033[0m Unzipping and moving resource files...\n\n"
 85 | unzip ctakes-resources-4.0-bin.zip
 86 | cp -R $ORIG/tmp/resources/* $ORIG/apache-ctakes-4.0.0/resources
 87 | rm -r $ORIG/tmp/
 88 | 
 89 | # Update UMLS Credentials #
 90 | if [ ! -f $PWD/umls.sh ]; then
 91 |     read -r -p "
 92 |     ༶ Add UMLS credentials? [y/N] " response
 93 |     response=${response,,}
 94 | 
 95 |     cd ../
 96 | 
 97 |     if [[ "$response" =~ ^(yes|y)$ ]]; 
 98 |     then
 99 |         touch $PWD/umls.sh
100 |         printf "#!/bin/bash \n\nUMLS_USERNAME=\"SAMPLE_USER\"\nUMLS_PASSWORD=\"SAMPLE_PASSWORD\"\n\nexport UMLS_USERNAME\nexport UMLS_PASSWORD" >> $PWD/umls.sh
101 |         chmod +x $PWD/umls.sh
102 | 
103 |         read -r -p "༶ Username: `echo $'\n> '`" username
104 |         username=${username,,}
105 | 
106 |         set_password() {
107 | 
108 |             read -rs -p "༶ Password: `echo $'\n> '`" password_1
109 |             password_1=${password_1}
110 | 
111 |             read -rs -p "`echo $'\r'`༶ Verify Password: `echo $'\n> '`" password_2
112 |             password_2=${password_2}
113 | 
114 |             if [[ $password_1 = $password_2 ]];then
115 | 
116 |                sed -i -e "s/SAMPLE_USER/$username/g" $PWD/umls.sh
117 |                sed -i -e "s/SAMPLE_PASSWORD/$password_1/g" $PWD/umls.sh
118 | 
119 |             else
120 |                 printf "\n༶ Password mismatch try again...\n"
121 |                 set_password
122 |             fi
123 |         }
124 |         set_password
125 |         printf "\n\033[92m\u0F36\033[0m UMLS credentials updated!\n"
126 |     else
127 |         printf "\n\033[92m\u0F36\033[0m No worries you can add them manually later!\n"
128 |     fi
129 | fi
130 | printf "\n\u0FC9 DONE!\n\n"


--------------------------------------------------------------------------------
/ctakes-install/log4j.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
 3 | <log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
 4 |     <appender name="stdout" class="org.apache.log4j.ConsoleAppender">
 5 |         <layout class="org.apache.log4j.PatternLayout">
 6 |             <param name="ConversionPattern" value="%d{ABSOLUTE} %5p %t %c{1}:%L - %m%n"/>
 7 |         </layout>
 8 |     </appender>
 9 |     <root>
10 |         <level value="debug"/>
11 |         <appender-ref ref="stdout"/>
12 |     </root>
13 | </log4j:configuration>
14 | 


--------------------------------------------------------------------------------
/ctakes-install/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # Requires JAVA JDK 1.8+
 4 | 
 5 | # Check for UMLS credentials
 6 | if [ ! -f $PWD/ctakes-install/umls.sh ]; then
 7 |     printf "\033[91mERROR:\033[0m You need to provide UMLS credentials in the file ./umls.sh" 1>&2
 8 | 	exit 1
 9 | else
10 | # Source UMLS credentials
11 | 	printf "\033[92m\u0F36\033[0m UMLS credentials file confirmed!\n\n"
12 | 	. ./ctakes-install/umls.sh
13 | fi
14 | 
15 | # Only set CTAKES_HOME if not already set
16 | [ -z "$CTAKES_HOME" ] && CTAKES_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/apache-ctakes-4.0.0"
17 | cd $CTAKES_HOME
18 | 
19 | # Launch
20 | 
21 | bin/runClinicalPipeline.sh -i /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_input  --xmiOut /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_output --user $UMLS_USERNAME --pass $UMLS_PASSWORD
22 | 


--------------------------------------------------------------------------------
/ctakes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Handling cTAKES
  5 | #
  6 | #	2013-05-14	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | import os
 10 | import logging
 11 | import codecs
 12 | import inspect
 13 | 
 14 | from xml.dom.minidom import parse
 15 | from subprocess import call
 16 | 
 17 | from nlp import NLPProcessing, list_to_sentences
 18 | 
 19 | class cTAKES(NLPProcessing):
 20 |     def __init__(self, object):
 21 | 
 22 |         # print('(ctakes.py) Object being used:', object)
 23 |         super().__init__()
 24 | 
 25 |         self.name = 'ctakes'
 26 |         self.bin = os.path.dirname(os.path.abspath('%s/../' % inspect.getfile(inspect.currentframe())))
 27 |         self.root = object['root']
 28 |         self.cleanup = object['cleanup']
 29 | 
 30 |         # print('(ctakes.py) Self name:', self.name)
 31 |         # print('(ctakes.py) Self bin:', self.bin)
 32 |         # print('(ctakes.py) Self root:', self.root)
 33 |         # print('(ctakes.py) Self cleanup:', self.cleanup, '\n')
 34 | 
 35 |     @property
 36 |     def _in_dir(self):
 37 |         return os.path.join(self.root, 'ctakes_input')
 38 | 
 39 |     @property
 40 |     def _out_dir(self):
 41 |         return os.path.join(self.root, 'ctakes_output')
 42 | 
 43 |     def _create_directories_if_needed(self):
 44 |         in_dir = self._in_dir
 45 |         out_dir = self._out_dir
 46 |         if not os.path.exists(in_dir):
 47 |             os.mkdir(in_dir)
 48 |         if not os.path.exists(out_dir):
 49 |             os.mkdir(out_dir)
 50 | 
 51 |     def _run(self):
 52 |         if call(['{}/cTAKES-Python-API/ctakes-install/run.sh'.format(self.bin)]) > 0:
 53 |             raise Exception('Error running cTakes')
 54 | 
 55 |     def _write_input(self, text, filename):
 56 |         if text is None \
 57 |                 or len(text)< 1 \
 58 |                 or filename is None:
 59 |             return False
 60 | 
 61 |         in_dir = os.path.join(
 62 |             self.root if self.root is not None else '.', 'ctakes_input')
 63 |         if not os.path.exists(in_dir):
 64 |             logging.error(
 65 |                 "The input directory for cTAKES at %s does not exist" % in_dir)
 66 |             return False
 67 | 
 68 |         infile = os.path.join(in_dir, filename)
 69 |         if os.path.exists(infile):
 70 |             return False
 71 | 
 72 |         # write it
 73 |         with codecs.open(infile, 'w', 'utf-8') as handle:
 74 |             handle.write(list_to_sentences(text))
 75 | 
 76 |         return True
 77 | 
 78 |     def _parse_output(self, filename, **kwargs):
 79 |         """ Parse cTAKES XML output. """
 80 | 
 81 |         if filename is None:
 82 |             return None
 83 | 
 84 |         # is there cTAKES output?
 85 |         root = self.root if self.root is not None else '.'
 86 |         out_dir = os.path.join(root, 'ctakes_output')
 87 |         if not os.path.exists(out_dir):
 88 |             logging.error(
 89 |                 "The output directory for cTAKES at %s does not exist" % out_dir)
 90 |             return None
 91 | 
 92 |         outfile = os.path.join(out_dir, "%s.xmi" % filename)
 93 |         if not os.path.exists(outfile):
 94 |             # do not log here and silently fail
 95 |             return None
 96 | 
 97 |         snomeds = []
 98 |         cuis = []
 99 |         rxnorms = []
100 | 
101 |         # parse XMI file
102 |         root = parse(outfile).documentElement
103 | 
104 |         # get all "textsem:EntityMention" which store negation information
105 |         neg_ids = []
106 |         for node in root.getElementsByTagName('textsem:EntityMention'):
107 |             polarity = node.attributes.get('polarity')
108 |             if polarity is not None and int(polarity.value) < 0:
109 |                 ids = node.attributes.get('ontologyConceptArr')
110 |                 if ids is not None and ids.value:
111 |                     neg_ids.extend([int(i) for i in ids.value.split()])
112 | 
113 |         # pluck apart nodes that carry codified data ("refsem" namespace)
114 |         code_nodes = root.getElementsByTagNameNS(
115 |             'http:///org/apache/ctakes/typesystem/type/refsem.ecore', '*')
116 |         if len(code_nodes) > 0:
117 |             for node in code_nodes:
118 |                 #print node.toprettyxml()
119 | 
120 |                 # check if this node is negated
121 |                 is_neg = False
122 |                 node_id_attr = node.attributes.get('xmi:id')
123 |                 if node_id_attr is not None:
124 |                     is_neg = int(node_id_attr.value) in neg_ids
125 | 
126 |                 # extract SNOMED and RxNORM
127 |                 if 'codingScheme' in node.attributes.keys() \
128 |                         and 'code' in node.attributes.keys():
129 |                     code = node.attributes['code'].value
130 |                     if is_neg:
131 |                         code = "-%s" % code
132 | 
133 |                     # extract SNOMED code
134 |                     if 'SNOMED' == node.attributes['codingScheme'].value:
135 |                         snomeds.append(code)
136 | 
137 |                     # extract RXNORM code
138 |                     elif 'RXNORM' == node.attributes['codingScheme'].value:
139 |                         rxnorms.append(code)
140 | 
141 |                 # extract UMLS CUI
142 |                 if 'cui' in node.attributes.keys():
143 |                     code = node.attributes['cui'].value
144 |                     if is_neg:
145 |                         code = "-%s" % code
146 |                     cuis.append(code)
147 | 
148 |             # make lists unique
149 |             snomeds = list(set(snomeds))
150 |             cuis = list(set(cuis))
151 |             rxnorms = list(set(rxnorms))
152 | 
153 |         # clean up if instructed to do so
154 |         if self.cleanup:
155 |             os.remove(outfile)
156 | 
157 |             in_dir = os.path.join(root, 'ctakes_input')
158 |             infile = os.path.join(in_dir, filename)
159 |             if os.path.exists(infile):
160 |                 os.remove(infile)
161 | 
162 |         # create and return a dictionary (don't filter empty lists)
163 |         ret = {
164 |             'snomed': snomeds,
165 |             'cui': cuis,
166 |             'rxnorm': rxnorms
167 |         }
168 | 
169 |         return ret
170 | 
171 | # we can execute this file to do some testing
172 | if '__main__' == __name__:
173 | 
174 |     ### Defines directory to run 
175 |     # print('(ctakes.py) creating directory_obj')
176 |     directory_obj = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ctakes-test')
177 |     # print('(ctakes.py) directory_obj = ', directory_obj)
178 | 
179 | 	### Starts cTAKES class defining root directory as run_dir & cleanup as True
180 |     # print('(ctakes.py) Starting my_ctakes')
181 |     my_ctakes = cTAKES({'root': directory_obj, 'cleanup': True})
182 | 
183 |     my_ctakes.prepare()
184 | 
185 | 	# create a test input file
186 |     with open(os.path.join(my_ctakes.root, 'ctakes_input/test.txt'), 'w') as handle:
187 | 	    handle.write("History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficientncy")
188 | 
189 | 	# run
190 |     print("\n\033[92m\u0F36 \033[0mStarting cTAKES Java Application...\n")
191 |     try:
192 |         my_ctakes.run()
193 |         print("\n\033[92m\u263A \033[0mDONE!\n")
194 |     except Exception as e:
195 |         print("\033[91mFAILED:\033[0m {}\n".format(e))
196 | 
197 |         
198 | 
199 |     
200 | 
201 |     
202 | 


--------------------------------------------------------------------------------
/databases/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !*.sh
4 | 
5 | 


--------------------------------------------------------------------------------
/databases/rxnorm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | #  create an RxNORM SQLite database (and a relations triple store).
 4 | #
 5 | 
 6 | # our SQLite database does not exist
 7 | if [ ! -e rxnorm.db ]; then
 8 | 	if [ ! -d "$1" ]; then
 9 | 		echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
10 | 		exit 1
11 | 	fi
12 | 	if [ ! -d "$1/rrf" ]; then
13 | 		echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
14 | 		exit 1
15 | 	fi
16 | 	
17 | 	# init the database
18 | 	cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db
19 | 	
20 | 	# convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
21 | 	if [ ! -e "$1/rrf/RXNREL.pipe" ]; then
22 | 		current=$(pwd)
23 | 		cd "$1/rrf"
24 | 		echo "-> Converting RRF files for SQLite"
25 | 		for f in *.RRF; do
26 | 			sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
27 | 		done
28 | 		cd $current
29 | 	fi
30 | 	
31 | 	# import tables
32 | 	for f in "$1/rrf/"*.pipe; do
33 | 		table=$(basename ${f%.pipe})
34 | 		echo "-> Importing $table"
35 | 		sqlite3 rxnorm.db ".import '$f' '$table'"
36 | 	done
37 | 	
38 | 	# create an NDC table
39 | 	echo "-> Creating NDC table"
40 | 	# sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';"	# we do it in 2 steps to create the primary index column
41 | 	sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);"
42 | 	sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';"
43 | 	sqlite3 rxnorm.db "CREATE INDEX X_RXCUI ON NDC (RXCUI);"
44 | 	sqlite3 rxnorm.db "CREATE INDEX X_NDC ON NDC (NDC);"
45 | 	
46 | 	# some SQLite gems
47 | 	## export NDC to CSV
48 | 	# SELECT RXCUI, NDC FROM NDC INTO OUTFILE 'ndc.csv' FIELDS TERMINATED BY ',' LINES TERMINATED BY "\n";
49 | 	## export RxNorm-only names with their type (TTY) to CSV
50 | 	# SELECT RXCUI, TTY, STR FROM RXNCONSO WHERE SAB = 'RXNORM' INTO OUTFILE 'names.csv' FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY "\n";
51 | fi
52 | 
53 | # dump to N-Triples
54 | exit 0
55 | sqlite3 rxnorm.db <<SQLITE_COMMAND
56 | .headers OFF
57 | .separator ""
58 | .mode list
59 | .out rxnorm.nt
60 | SELECT "<http://purl.bioontology.org/ontology/RXNORM/", RXCUI2, "> <http://purl.bioontology.org/ontology/RXNORM#", RELA, "> <http://purl.bioontology.org/ontology/RXNORM/", RXCUI1, "> ." FROM RXNREL WHERE RELA != '';
61 | SQLITE_COMMAND
62 | 
63 | 


--------------------------------------------------------------------------------
/databases/umls.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #
  3 | #  create a UMLS SQLite database.
  4 | #
  5 | 
  6 | # our SQLite database does not exist
  7 | if [ ! -e umls.db ]; then
  8 | 	if [ ! -d "$1" ]; then
  9 | 		echo "Provide the path to the UMLS install directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html (should check which file is needed)"
 10 | 		exit 1
 11 | 	fi
 12 | 	if [ ! -d "$1/META" ]; then
 13 | 		echo "There is no directory named META in the install directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html"
 14 | 		exit 1
 15 | 	fi
 16 | 	
 17 | 	# convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
 18 | 	if [ ! -e "$1/META/MRDEF.pipe" ]; then
 19 | 		current=$(pwd)
 20 | 		cd "$1/META"
 21 | 		echo "-> Converting RRF files for SQLite"
 22 | 		for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do
 23 | 			sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
 24 | 		done
 25 | 		cd $current
 26 | 	fi
 27 | 	
 28 | 	# init the database for MRDEF
 29 | 	# table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/
 30 | 	sqlite3 umls.db "CREATE TABLE MRDEF (
 31 | 		CUI varchar,
 32 | 		AUI varchar,
 33 | 		ATUI varchar,
 34 | 		SATUI varchar,
 35 | 		SAB varchar,
 36 | 		DEF text,
 37 | 		SUPPRESS varchar,
 38 | 		CVF varchar
 39 | 	)"
 40 | 	
 41 | 	# init the database for MRCONSO
 42 | 	sqlite3 umls.db "CREATE TABLE MRCONSO (
 43 | 		CUI varchar,
 44 | 		LAT varchar,
 45 | 		TS varchar,
 46 | 		LUI varchar,
 47 | 		STT varchar,
 48 | 		SUI varchar,
 49 | 		ISPREF varchar,
 50 | 		AUI varchar,
 51 | 		SAUI varchar,
 52 | 		SCUI varchar,
 53 | 		SDUI varchar,
 54 | 		SAB varchar,
 55 | 		TTY varchar,
 56 | 		CODE varchar,
 57 | 		STR text,
 58 | 		SRL varchar,
 59 | 		SUPPRESS varchar,
 60 | 		CVF varchar
 61 | 	)"
 62 | 	
 63 | 	# init the database for MRSTY
 64 | 	sqlite3 umls.db "CREATE TABLE MRSTY (
 65 | 		CUI varchar,
 66 | 		TUI varchar,
 67 | 		STN varchar,
 68 | 		STY text,
 69 | 		ATUI varchar,
 70 | 		CVF varchar
 71 | 	)"
 72 | 	
 73 | 	# import tables
 74 | 	for f in "$1/META/"*.pipe; do
 75 | 		table=$(basename ${f%.pipe})
 76 | 		echo "-> Importing $table"
 77 | 		sqlite3 umls.db ".import '$f' '$table'"
 78 | 	done
 79 | 	
 80 | 	# create indexes
 81 | 	echo "-> Creating indexes"
 82 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);"
 83 | 	sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);"
 84 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);"
 85 | 	sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);"
 86 | 	sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);"
 87 | 	sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);"
 88 | 	sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);"
 89 | 	
 90 | 	# create faster lookup table
 91 | 	echo "-> Creating fast lookup table"
 92 | 	sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'"
 93 | 	sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT"
 94 | 	sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)"
 95 | 	sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)"
 96 | else
 97 | 	echo "=> umls.db already exists"
 98 | fi
 99 | 
100 | 


--------------------------------------------------------------------------------
/dateutil/LICENSE:
--------------------------------------------------------------------------------
  1 | A. HISTORY OF THE SOFTWARE
  2 | ==========================
  3 | 
  4 | Python was created in the early 1990s by Guido van Rossum at Stichting
  5 | Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
  6 | as a successor of a language called ABC.  Guido remains Python's
  7 | principal author, although it includes many contributions from others.
  8 | 
  9 | In 1995, Guido continued his work on Python at the Corporation for
 10 | National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
 11 | in Reston, Virginia where he released several versions of the
 12 | software.
 13 | 
 14 | In May 2000, Guido and the Python core development team moved to
 15 | BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
 16 | year, the PythonLabs team moved to Digital Creations (now Zope
 17 | Corporation, see http://www.zope.com).  In 2001, the Python Software
 18 | Foundation (PSF, see http://www.python.org/psf/) was formed, a
 19 | non-profit organization created specifically to own Python-related
 20 | Intellectual Property.  Zope Corporation is a sponsoring member of
 21 | the PSF.
 22 | 
 23 | All Python releases are Open Source (see http://www.opensource.org for
 24 | the Open Source Definition).  Historically, most, but not all, Python
 25 | releases have also been GPL-compatible; the table below summarizes
 26 | the various releases.
 27 | 
 28 |     Release         Derived     Year        Owner       GPL-
 29 | 		    from                                compatible? (1)
 30 | 
 31 |     0.9.0 thru 1.2              1991-1995   CWI         yes
 32 |     1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
 33 |     1.6             1.5.2       2000        CNRI        no
 34 |     2.0             1.6         2000        BeOpen.com  no
 35 |     1.6.1           1.6         2001        CNRI        yes (2)
 36 |     2.1             2.0+1.6.1   2001        PSF         no
 37 |     2.0.1           2.0+1.6.1   2001        PSF         yes
 38 |     2.1.1           2.1+2.0.1   2001        PSF         yes
 39 |     2.2             2.1.1       2001        PSF         yes
 40 |     2.1.2           2.1.1       2002        PSF         yes
 41 |     2.1.3           2.1.2       2002        PSF         yes
 42 |     2.2.1           2.2         2002        PSF         yes
 43 |     2.2.2           2.2.1       2002        PSF         yes
 44 |     2.2.3           2.2.2       2003        PSF         yes
 45 |     2.3             2.2.2       2002-2003   PSF         yes
 46 | 
 47 | Footnotes:
 48 | 
 49 | (1) GPL-compatible doesn't mean that we're distributing Python under
 50 |     the GPL.  All Python licenses, unlike the GPL, let you distribute
 51 |     a modified version without making your changes open source.  The
 52 |     GPL-compatible licenses make it possible to combine Python with
 53 |     other software that is released under the GPL; the others don't.
 54 | 
 55 | (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
 56 |     because its license has a choice of law clause.  According to
 57 |     CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
 58 |     is "not incompatible" with the GPL.
 59 | 
 60 | Thanks to the many outside volunteers who have worked under Guido's
 61 | direction to make these releases possible.
 62 | 
 63 | 
 64 | B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
 65 | ===============================================================
 66 | 
 67 | PSF LICENSE AGREEMENT FOR PYTHON 2.3
 68 | ------------------------------------
 69 | 
 70 | 1. This LICENSE AGREEMENT is between the Python Software Foundation
 71 | ("PSF"), and the Individual or Organization ("Licensee") accessing and
 72 | otherwise using Python 2.3 software in source or binary form and its
 73 | associated documentation.
 74 | 
 75 | 2. Subject to the terms and conditions of this License Agreement, PSF
 76 | hereby grants Licensee a nonexclusive, royalty-free, world-wide
 77 | license to reproduce, analyze, test, perform and/or display publicly,
 78 | prepare derivative works, distribute, and otherwise use Python 2.3
 79 | alone or in any derivative version, provided, however, that PSF's
 80 | License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
 81 | 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
 82 | retained in Python 2.3 alone or in any derivative version prepared by
 83 | Licensee.
 84 | 
 85 | 3. In the event Licensee prepares a derivative work that is based on
 86 | or incorporates Python 2.3 or any part thereof, and wants to make
 87 | the derivative work available to others as provided herein, then
 88 | Licensee hereby agrees to include in any such work a brief summary of
 89 | the changes made to Python 2.3.
 90 | 
 91 | 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
 92 | basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
 93 | IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
 94 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
 95 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
 96 | INFRINGE ANY THIRD PARTY RIGHTS.
 97 | 
 98 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
 99 | 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
100 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
101 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
102 | 
103 | 6. This License Agreement will automatically terminate upon a material
104 | breach of its terms and conditions.
105 | 
106 | 7. Nothing in this License Agreement shall be deemed to create any
107 | relationship of agency, partnership, or joint venture between PSF and
108 | Licensee.  This License Agreement does not grant permission to use PSF
109 | trademarks or trade name in a trademark sense to endorse or promote
110 | products or services of Licensee, or any third party.
111 | 
112 | 8. By copying, installing or otherwise using Python 2.3, Licensee
113 | agrees to be bound by the terms and conditions of this License
114 | Agreement.
115 | 
116 | 
117 | BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
118 | -------------------------------------------
119 | 
120 | BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
121 | 
122 | 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
123 | office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
124 | Individual or Organization ("Licensee") accessing and otherwise using
125 | this software in source or binary form and its associated
126 | documentation ("the Software").
127 | 
128 | 2. Subject to the terms and conditions of this BeOpen Python License
129 | Agreement, BeOpen hereby grants Licensee a non-exclusive,
130 | royalty-free, world-wide license to reproduce, analyze, test, perform
131 | and/or display publicly, prepare derivative works, distribute, and
132 | otherwise use the Software alone or in any derivative version,
133 | provided, however, that the BeOpen Python License is retained in the
134 | Software, alone or in any derivative version prepared by Licensee.
135 | 
136 | 3. BeOpen is making the Software available to Licensee on an "AS IS"
137 | basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
138 | IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
139 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
140 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
141 | INFRINGE ANY THIRD PARTY RIGHTS.
142 | 
143 | 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
144 | SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
145 | AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
146 | DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
147 | 
148 | 5. This License Agreement will automatically terminate upon a material
149 | breach of its terms and conditions.
150 | 
151 | 6. This License Agreement shall be governed by and interpreted in all
152 | respects by the law of the State of California, excluding conflict of
153 | law provisions.  Nothing in this License Agreement shall be deemed to
154 | create any relationship of agency, partnership, or joint venture
155 | between BeOpen and Licensee.  This License Agreement does not grant
156 | permission to use BeOpen trademarks or trade names in a trademark
157 | sense to endorse or promote products or services of Licensee, or any
158 | third party.  As an exception, the "BeOpen Python" logos available at
159 | http://www.pythonlabs.com/logos.html may be used according to the
160 | permissions granted on that web page.
161 | 
162 | 7. By copying, installing or otherwise using the software, Licensee
163 | agrees to be bound by the terms and conditions of this License
164 | Agreement.
165 | 
166 | 
167 | CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
168 | ---------------------------------------
169 | 
170 | 1. This LICENSE AGREEMENT is between the Corporation for National
171 | Research Initiatives, having an office at 1895 Preston White Drive,
172 | Reston, VA 20191 ("CNRI"), and the Individual or Organization
173 | ("Licensee") accessing and otherwise using Python 1.6.1 software in
174 | source or binary form and its associated documentation.
175 | 
176 | 2. Subject to the terms and conditions of this License Agreement, CNRI
177 | hereby grants Licensee a nonexclusive, royalty-free, world-wide
178 | license to reproduce, analyze, test, perform and/or display publicly,
179 | prepare derivative works, distribute, and otherwise use Python 1.6.1
180 | alone or in any derivative version, provided, however, that CNRI's
181 | License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
182 | 1995-2001 Corporation for National Research Initiatives; All Rights
183 | Reserved" are retained in Python 1.6.1 alone or in any derivative
184 | version prepared by Licensee.  Alternately, in lieu of CNRI's License
185 | Agreement, Licensee may substitute the following text (omitting the
186 | quotes): "Python 1.6.1 is made available subject to the terms and
187 | conditions in CNRI's License Agreement.  This Agreement together with
188 | Python 1.6.1 may be located on the Internet using the following
189 | unique, persistent identifier (known as a handle): 1895.22/1013.  This
190 | Agreement may also be obtained from a proxy server on the Internet
191 | using the following URL: http://hdl.handle.net/1895.22/1013".
192 | 
193 | 3. In the event Licensee prepares a derivative work that is based on
194 | or incorporates Python 1.6.1 or any part thereof, and wants to make
195 | the derivative work available to others as provided herein, then
196 | Licensee hereby agrees to include in any such work a brief summary of
197 | the changes made to Python 1.6.1.
198 | 
199 | 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
200 | basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
201 | IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
202 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
203 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
204 | INFRINGE ANY THIRD PARTY RIGHTS.
205 | 
206 | 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
207 | 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
208 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
209 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
210 | 
211 | 6. This License Agreement will automatically terminate upon a material
212 | breach of its terms and conditions.
213 | 
214 | 7. This License Agreement shall be governed by the federal
215 | intellectual property law of the United States, including without
216 | limitation the federal copyright law, and, to the extent such
217 | U.S. federal law does not apply, by the law of the Commonwealth of
218 | Virginia, excluding Virginia's conflict of law provisions.
219 | Notwithstanding the foregoing, with regard to derivative works based
220 | on Python 1.6.1 that incorporate non-separable material that was
221 | previously distributed under the GNU General Public License (GPL), the
222 | law of the Commonwealth of Virginia shall govern this License
223 | Agreement only as to issues arising under or with respect to
224 | Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
225 | License Agreement shall be deemed to create any relationship of
226 | agency, partnership, or joint venture between CNRI and Licensee.  This
227 | License Agreement does not grant permission to use CNRI trademarks or
228 | trade name in a trademark sense to endorse or promote products or
229 | services of Licensee, or any third party.
230 | 
231 | 8. By clicking on the "ACCEPT" button where indicated, or by copying,
232 | installing or otherwise using Python 1.6.1, Licensee agrees to be
233 | bound by the terms and conditions of this License Agreement.
234 | 
235 |         ACCEPT
236 | 
237 | 
238 | CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
239 | --------------------------------------------------
240 | 
241 | Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
242 | The Netherlands.  All rights reserved.
243 | 
244 | Permission to use, copy, modify, and distribute this software and its
245 | documentation for any purpose and without fee is hereby granted,
246 | provided that the above copyright notice appear in all copies and that
247 | both that copyright notice and this permission notice appear in
248 | supporting documentation, and that the name of Stichting Mathematisch
249 | Centrum or CWI not be used in advertising or publicity pertaining to
250 | distribution of the software without specific, written prior
251 | permission.
252 | 
253 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
254 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
255 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
256 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
257 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
258 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
259 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
260 | 


--------------------------------------------------------------------------------
/dateutil/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2003-2010  Gustavo Niemeyer <gustavo@niemeyer.net>
 3 | 
 4 | This module offers extensions to the standard python 2.3+
 5 | datetime module.
 6 | """
 7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
 8 | __license__ = "PSF License"
 9 | __version__ = "1.5"
10 | 


--------------------------------------------------------------------------------
/dateutil/easter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2003-2007  Gustavo Niemeyer <gustavo@niemeyer.net>
 3 | 
 4 | This module offers extensions to the standard python 2.3+
 5 | datetime module.
 6 | """
 7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
 8 | __license__ = "PSF License"
 9 | 
10 | import datetime
11 | 
12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
13 | 
14 | EASTER_JULIAN   = 1
15 | EASTER_ORTHODOX = 2
16 | EASTER_WESTERN  = 3
17 | 
18 | def easter(year, method=EASTER_WESTERN):
19 |     """
20 |     This method was ported from the work done by GM Arts,
21 |     on top of the algorithm by Claus Tondering, which was
22 |     based in part on the algorithm of Ouding (1940), as
23 |     quoted in "Explanatory Supplement to the Astronomical
24 |     Almanac", P.  Kenneth Seidelmann, editor.
25 | 
26 |     This algorithm implements three different easter
27 |     calculation methods:
28 |     
29 |     1 - Original calculation in Julian calendar, valid in
30 |         dates after 326 AD
31 |     2 - Original method, with date converted to Gregorian
32 |         calendar, valid in years 1583 to 4099
33 |     3 - Revised method, in Gregorian calendar, valid in
34 |         years 1583 to 4099 as well
35 | 
36 |     These methods are represented by the constants:
37 | 
38 |     EASTER_JULIAN   = 1
39 |     EASTER_ORTHODOX = 2
40 |     EASTER_WESTERN  = 3
41 | 
42 |     The default method is method 3.
43 |     
44 |     More about the algorithm may be found at:
45 | 
46 |     http://users.chariot.net.au/~gmarts/eastalg.htm
47 | 
48 |     and
49 | 
50 |     http://www.tondering.dk/claus/calendar.html
51 | 
52 |     """
53 | 
54 |     if not (1 <= method <= 3):
55 |         raise ValueError, "invalid method"
56 | 
57 |     # g - Golden year - 1
58 |     # c - Century
59 |     # h - (23 - Epact) mod 30
60 |     # i - Number of days from March 21 to Paschal Full Moon
61 |     # j - Weekday for PFM (0=Sunday, etc)
62 |     # p - Number of days from March 21 to Sunday on or before PFM
63 |     #     (-6 to 28 methods 1 & 3, to 56 for method 2)
64 |     # e - Extra days to add for method 2 (converting Julian
65 |     #     date to Gregorian date)
66 | 
67 |     y = year
68 |     g = y % 19
69 |     e = 0
70 |     if method < 3:
71 |         # Old method
72 |         i = (19*g+15)%30
73 |         j = (y+y//4+i)%7
74 |         if method == 2:
75 |             # Extra dates to convert Julian to Gregorian date
76 |             e = 10
77 |             if y > 1600:
78 |                 e = e+y//100-16-(y//100-16)//4
79 |     else:
80 |         # New method
81 |         c = y//100
82 |         h = (c-c//4-(8*c+13)//25+19*g+15)%30
83 |         i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
84 |         j = (y+y//4+i+2-c+c//4)%7
85 | 
86 |     # p can be from -6 to 56 corresponding to dates 22 March to 23 May
87 |     # (later dates apply to method 2, although 23 May never actually occurs)
88 |     p = i-j+e
89 |     d = 1+(p+27+(p+6)//40)%31
90 |     m = 3+(p+26)//30
91 |     return datetime.date(int(y),int(m),int(d))
92 | 
93 | 


--------------------------------------------------------------------------------
/dateutil/parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/dateutil/parser.py


--------------------------------------------------------------------------------
/dateutil/relativedelta.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2003-2010  Gustavo Niemeyer <gustavo@niemeyer.net>
  3 | 
  4 | This module offers extensions to the standard python 2.3+
  5 | datetime module.
  6 | """
  7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
  8 | __license__ = "PSF License"
  9 | 
 10 | import datetime
 11 | import calendar
 12 | 
 13 | __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
 14 | 
 15 | class weekday(object):
 16 |     __slots__ = ["weekday", "n"]
 17 | 
 18 |     def __init__(self, weekday, n=None):
 19 |         self.weekday = weekday
 20 |         self.n = n
 21 | 
 22 |     def __call__(self, n):
 23 |         if n == self.n:
 24 |             return self
 25 |         else:
 26 |             return self.__class__(self.weekday, n)
 27 | 
 28 |     def __eq__(self, other):
 29 |         try:
 30 |             if self.weekday != other.weekday or self.n != other.n:
 31 |                 return False
 32 |         except AttributeError:
 33 |             return False
 34 |         return True
 35 | 
 36 |     def __repr__(self):
 37 |         s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
 38 |         if not self.n:
 39 |             return s
 40 |         else:
 41 |             return "%s(%+d)" % (s, self.n)
 42 | 
 43 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
 44 | 
 45 | class relativedelta:
 46 |     """
 47 | The relativedelta type is based on the specification of the excelent
 48 | work done by M.-A. Lemburg in his mx.DateTime extension. However,
 49 | notice that this type does *NOT* implement the same algorithm as
 50 | his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
 51 | 
 52 | There's two different ways to build a relativedelta instance. The
 53 | first one is passing it two date/datetime classes:
 54 | 
 55 |     relativedelta(datetime1, datetime2)
 56 | 
 57 | And the other way is to use the following keyword arguments:
 58 | 
 59 |     year, month, day, hour, minute, second, microsecond:
 60 |         Absolute information.
 61 | 
 62 |     years, months, weeks, days, hours, minutes, seconds, microseconds:
 63 |         Relative information, may be negative.
 64 | 
 65 |     weekday:
 66 |         One of the weekday instances (MO, TU, etc). These instances may
 67 |         receive a parameter N, specifying the Nth weekday, which could
 68 |         be positive or negative (like MO(+1) or MO(-2). Not specifying
 69 |         it is the same as specifying +1. You can also use an integer,
 70 |         where 0=MO.
 71 | 
 72 |     leapdays:
 73 |         Will add given days to the date found, if year is a leap
 74 |         year, and the date found is post 28 of february.
 75 | 
 76 |     yearday, nlyearday:
 77 |         Set the yearday or the non-leap year day (jump leap days).
 78 |         These are converted to day/month/leapdays information.
 79 | 
 80 | Here is the behavior of operations with relativedelta:
 81 | 
 82 | 1) Calculate the absolute year, using the 'year' argument, or the
 83 |    original datetime year, if the argument is not present.
 84 | 
 85 | 2) Add the relative 'years' argument to the absolute year.
 86 | 
 87 | 3) Do steps 1 and 2 for month/months.
 88 | 
 89 | 4) Calculate the absolute day, using the 'day' argument, or the
 90 |    original datetime day, if the argument is not present. Then,
 91 |    subtract from the day until it fits in the year and month
 92 |    found after their operations.
 93 | 
 94 | 5) Add the relative 'days' argument to the absolute day. Notice
 95 |    that the 'weeks' argument is multiplied by 7 and added to
 96 |    'days'.
 97 | 
 98 | 6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
 99 |    microsecond/microseconds.
100 | 
101 | 7) If the 'weekday' argument is present, calculate the weekday,
102 |    with the given (wday, nth) tuple. wday is the index of the
103 |    weekday (0-6, 0=Mon), and nth is the number of weeks to add
104 |    forward or backward, depending on its signal. Notice that if
105 |    the calculated date is already Monday, for example, using
106 |    (0, 1) or (0, -1) won't change the day.
107 |     """
108 | 
109 |     def __init__(self, dt1=None, dt2=None,
110 |                  years=0, months=0, days=0, leapdays=0, weeks=0,
111 |                  hours=0, minutes=0, seconds=0, microseconds=0,
112 |                  year=None, month=None, day=None, weekday=None,
113 |                  yearday=None, nlyearday=None,
114 |                  hour=None, minute=None, second=None, microsecond=None):
115 |         if dt1 and dt2:
116 |             if not isinstance(dt1, datetime.date) or \
117 |                not isinstance(dt2, datetime.date):
118 |                 raise TypeError, "relativedelta only diffs datetime/date"
119 |             if type(dt1) is not type(dt2):
120 |                 if not isinstance(dt1, datetime.datetime):
121 |                     dt1 = datetime.datetime.fromordinal(dt1.toordinal())
122 |                 elif not isinstance(dt2, datetime.datetime):
123 |                     dt2 = datetime.datetime.fromordinal(dt2.toordinal())
124 |             self.years = 0
125 |             self.months = 0
126 |             self.days = 0
127 |             self.leapdays = 0
128 |             self.hours = 0
129 |             self.minutes = 0
130 |             self.seconds = 0
131 |             self.microseconds = 0
132 |             self.year = None
133 |             self.month = None
134 |             self.day = None
135 |             self.weekday = None
136 |             self.hour = None
137 |             self.minute = None
138 |             self.second = None
139 |             self.microsecond = None
140 |             self._has_time = 0
141 | 
142 |             months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
143 |             self._set_months(months)
144 |             dtm = self.__radd__(dt2)
145 |             if dt1 < dt2:
146 |                 while dt1 > dtm:
147 |                     months += 1
148 |                     self._set_months(months)
149 |                     dtm = self.__radd__(dt2)
150 |             else:
151 |                 while dt1 < dtm:
152 |                     months -= 1
153 |                     self._set_months(months)
154 |                     dtm = self.__radd__(dt2)
155 |             delta = dt1 - dtm
156 |             self.seconds = delta.seconds+delta.days*86400
157 |             self.microseconds = delta.microseconds
158 |         else:
159 |             self.years = years
160 |             self.months = months
161 |             self.days = days+weeks*7
162 |             self.leapdays = leapdays
163 |             self.hours = hours
164 |             self.minutes = minutes
165 |             self.seconds = seconds
166 |             self.microseconds = microseconds
167 |             self.year = year
168 |             self.month = month
169 |             self.day = day
170 |             self.hour = hour
171 |             self.minute = minute
172 |             self.second = second
173 |             self.microsecond = microsecond
174 | 
175 |             if type(weekday) is int:
176 |                 self.weekday = weekdays[weekday]
177 |             else:
178 |                 self.weekday = weekday
179 | 
180 |             yday = 0
181 |             if nlyearday:
182 |                 yday = nlyearday
183 |             elif yearday:
184 |                 yday = yearday
185 |                 if yearday > 59:
186 |                     self.leapdays = -1
187 |             if yday:
188 |                 ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
189 |                 for idx, ydays in enumerate(ydayidx):
190 |                     if yday <= ydays:
191 |                         self.month = idx+1
192 |                         if idx == 0:
193 |                             self.day = yday
194 |                         else:
195 |                             self.day = yday-ydayidx[idx-1]
196 |                         break
197 |                 else:
198 |                     raise ValueError, "invalid year day (%d)" % yday
199 | 
200 |         self._fix()
201 | 
202 |     def _fix(self):
203 |         if abs(self.microseconds) > 999999:
204 |             s = self.microseconds//abs(self.microseconds)
205 |             div, mod = divmod(self.microseconds*s, 1000000)
206 |             self.microseconds = mod*s
207 |             self.seconds += div*s
208 |         if abs(self.seconds) > 59:
209 |             s = self.seconds//abs(self.seconds)
210 |             div, mod = divmod(self.seconds*s, 60)
211 |             self.seconds = mod*s
212 |             self.minutes += div*s
213 |         if abs(self.minutes) > 59:
214 |             s = self.minutes//abs(self.minutes)
215 |             div, mod = divmod(self.minutes*s, 60)
216 |             self.minutes = mod*s
217 |             self.hours += div*s
218 |         if abs(self.hours) > 23:
219 |             s = self.hours//abs(self.hours)
220 |             div, mod = divmod(self.hours*s, 24)
221 |             self.hours = mod*s
222 |             self.days += div*s
223 |         if abs(self.months) > 11:
224 |             s = self.months//abs(self.months)
225 |             div, mod = divmod(self.months*s, 12)
226 |             self.months = mod*s
227 |             self.years += div*s
228 |         if (self.hours or self.minutes or self.seconds or self.microseconds or
229 |             self.hour is not None or self.minute is not None or
230 |             self.second is not None or self.microsecond is not None):
231 |             self._has_time = 1
232 |         else:
233 |             self._has_time = 0
234 | 
235 |     def _set_months(self, months):
236 |         self.months = months
237 |         if abs(self.months) > 11:
238 |             s = self.months//abs(self.months)
239 |             div, mod = divmod(self.months*s, 12)
240 |             self.months = mod*s
241 |             self.years = div*s
242 |         else:
243 |             self.years = 0
244 | 
245 |     def __radd__(self, other):
246 |         if not isinstance(other, datetime.date):
247 |             raise TypeError, "unsupported type for add operation"
248 |         elif self._has_time and not isinstance(other, datetime.datetime):
249 |             other = datetime.datetime.fromordinal(other.toordinal())
250 |         year = (self.year or other.year)+self.years
251 |         month = self.month or other.month
252 |         if self.months:
253 |             assert 1 <= abs(self.months) <= 12
254 |             month += self.months
255 |             if month > 12:
256 |                 year += 1
257 |                 month -= 12
258 |             elif month < 1:
259 |                 year -= 1
260 |                 month += 12
261 |         day = min(calendar.monthrange(year, month)[1],
262 |                   self.day or other.day)
263 |         repl = {"year": year, "month": month, "day": day}
264 |         for attr in ["hour", "minute", "second", "microsecond"]:
265 |             value = getattr(self, attr)
266 |             if value is not None:
267 |                 repl[attr] = value
268 |         days = self.days
269 |         if self.leapdays and month > 2 and calendar.isleap(year):
270 |             days += self.leapdays
271 |         ret = (other.replace(**repl)
272 |                + datetime.timedelta(days=days,
273 |                                     hours=self.hours,
274 |                                     minutes=self.minutes,
275 |                                     seconds=self.seconds,
276 |                                     microseconds=self.microseconds))
277 |         if self.weekday:
278 |             weekday, nth = self.weekday.weekday, self.weekday.n or 1
279 |             jumpdays = (abs(nth)-1)*7
280 |             if nth > 0:
281 |                 jumpdays += (7-ret.weekday()+weekday)%7
282 |             else:
283 |                 jumpdays += (ret.weekday()-weekday)%7
284 |                 jumpdays *= -1
285 |             ret += datetime.timedelta(days=jumpdays)
286 |         return ret
287 | 
288 |     def __rsub__(self, other):
289 |         return self.__neg__().__radd__(other)
290 | 
291 |     def __add__(self, other):
292 |         if not isinstance(other, relativedelta):
293 |             raise TypeError, "unsupported type for add operation"
294 |         return relativedelta(years=other.years+self.years,
295 |                              months=other.months+self.months,
296 |                              days=other.days+self.days,
297 |                              hours=other.hours+self.hours,
298 |                              minutes=other.minutes+self.minutes,
299 |                              seconds=other.seconds+self.seconds,
300 |                              microseconds=other.microseconds+self.microseconds,
301 |                              leapdays=other.leapdays or self.leapdays,
302 |                              year=other.year or self.year,
303 |                              month=other.month or self.month,
304 |                              day=other.day or self.day,
305 |                              weekday=other.weekday or self.weekday,
306 |                              hour=other.hour or self.hour,
307 |                              minute=other.minute or self.minute,
308 |                              second=other.second or self.second,
309 |                              microsecond=other.second or self.microsecond)
310 | 
311 |     def __sub__(self, other):
312 |         if not isinstance(other, relativedelta):
313 |             raise TypeError, "unsupported type for sub operation"
314 |         return relativedelta(years=other.years-self.years,
315 |                              months=other.months-self.months,
316 |                              days=other.days-self.days,
317 |                              hours=other.hours-self.hours,
318 |                              minutes=other.minutes-self.minutes,
319 |                              seconds=other.seconds-self.seconds,
320 |                              microseconds=other.microseconds-self.microseconds,
321 |                              leapdays=other.leapdays or self.leapdays,
322 |                              year=other.year or self.year,
323 |                              month=other.month or self.month,
324 |                              day=other.day or self.day,
325 |                              weekday=other.weekday or self.weekday,
326 |                              hour=other.hour or self.hour,
327 |                              minute=other.minute or self.minute,
328 |                              second=other.second or self.second,
329 |                              microsecond=other.second or self.microsecond)
330 | 
331 |     def __neg__(self):
332 |         return relativedelta(years=-self.years,
333 |                              months=-self.months,
334 |                              days=-self.days,
335 |                              hours=-self.hours,
336 |                              minutes=-self.minutes,
337 |                              seconds=-self.seconds,
338 |                              microseconds=-self.microseconds,
339 |                              leapdays=self.leapdays,
340 |                              year=self.year,
341 |                              month=self.month,
342 |                              day=self.day,
343 |                              weekday=self.weekday,
344 |                              hour=self.hour,
345 |                              minute=self.minute,
346 |                              second=self.second,
347 |                              microsecond=self.microsecond)
348 | 
349 |     def __nonzero__(self):
350 |         return not (not self.years and
351 |                     not self.months and
352 |                     not self.days and
353 |                     not self.hours and
354 |                     not self.minutes and
355 |                     not self.seconds and
356 |                     not self.microseconds and
357 |                     not self.leapdays and
358 |                     self.year is None and
359 |                     self.month is None and
360 |                     self.day is None and
361 |                     self.weekday is None and
362 |                     self.hour is None and
363 |                     self.minute is None and
364 |                     self.second is None and
365 |                     self.microsecond is None)
366 | 
367 |     def __mul__(self, other):
368 |         f = float(other)
369 |         return relativedelta(years=self.years*f,
370 |                              months=self.months*f,
371 |                              days=self.days*f,
372 |                              hours=self.hours*f,
373 |                              minutes=self.minutes*f,
374 |                              seconds=self.seconds*f,
375 |                              microseconds=self.microseconds*f,
376 |                              leapdays=self.leapdays,
377 |                              year=self.year,
378 |                              month=self.month,
379 |                              day=self.day,
380 |                              weekday=self.weekday,
381 |                              hour=self.hour,
382 |                              minute=self.minute,
383 |                              second=self.second,
384 |                              microsecond=self.microsecond)
385 | 
386 |     def __eq__(self, other):
387 |         if not isinstance(other, relativedelta):
388 |             return False
389 |         if self.weekday or other.weekday:
390 |             if not self.weekday or not other.weekday:
391 |                 return False
392 |             if self.weekday.weekday != other.weekday.weekday:
393 |                 return False
394 |             n1, n2 = self.weekday.n, other.weekday.n
395 |             if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
396 |                 return False
397 |         return (self.years == other.years and
398 |                 self.months == other.months and
399 |                 self.days == other.days and
400 |                 self.hours == other.hours and
401 |                 self.minutes == other.minutes and
402 |                 self.seconds == other.seconds and
403 |                 self.leapdays == other.leapdays and
404 |                 self.year == other.year and
405 |                 self.month == other.month and
406 |                 self.day == other.day and
407 |                 self.hour == other.hour and
408 |                 self.minute == other.minute and
409 |                 self.second == other.second and
410 |                 self.microsecond == other.microsecond)
411 | 
412 |     def __ne__(self, other):
413 |         return not self.__eq__(other)
414 | 
415 |     def __div__(self, other):
416 |         return self.__mul__(1/float(other))
417 | 
418 |     def __repr__(self):
419 |         l = []
420 |         for attr in ["years", "months", "days", "leapdays",
421 |                      "hours", "minutes", "seconds", "microseconds"]:
422 |             value = getattr(self, attr)
423 |             if value:
424 |                 l.append("%s=%+d" % (attr, value))
425 |         for attr in ["year", "month", "day", "weekday",
426 |                      "hour", "minute", "second", "microsecond"]:
427 |             value = getattr(self, attr)
428 |             if value is not None:
429 |                 l.append("%s=%s" % (attr, `value`))
430 |         return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
431 | 
432 | # vim:ts=4:sw=4:et
433 | 


--------------------------------------------------------------------------------
/dateutil/rrule.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Copyright (c) 2003-2010  Gustavo Niemeyer <gustavo@niemeyer.net>
   3 | 
   4 | This module offers extensions to the standard python 2.3+
   5 | datetime module.
   6 | """
   7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
   8 | __license__ = "PSF License"
   9 | 
  10 | import itertools
  11 | import datetime
  12 | import calendar
  13 | import thread
  14 | import sys
  15 | 
  16 | __all__ = ["rrule", "rruleset", "rrulestr",
  17 |            "YEARLY", "MONTHLY", "WEEKLY", "DAILY",
  18 |            "HOURLY", "MINUTELY", "SECONDLY",
  19 |            "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
  20 | 
  21 | # Every mask is 7 days longer to handle cross-year weekly periods.
  22 | M366MASK = tuple([1]*31+[2]*29+[3]*31+[4]*30+[5]*31+[6]*30+
  23 |                  [7]*31+[8]*31+[9]*30+[10]*31+[11]*30+[12]*31+[1]*7)
  24 | M365MASK = list(M366MASK)
  25 | M29, M30, M31 = range(1,30), range(1,31), range(1,32)
  26 | MDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7])
  27 | MDAY365MASK = list(MDAY366MASK)
  28 | M29, M30, M31 = range(-29,0), range(-30,0), range(-31,0)
  29 | NMDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7])
  30 | NMDAY365MASK = list(NMDAY366MASK)
  31 | M366RANGE = (0,31,60,91,121,152,182,213,244,274,305,335,366)
  32 | M365RANGE = (0,31,59,90,120,151,181,212,243,273,304,334,365)
  33 | WDAYMASK = [0,1,2,3,4,5,6]*55
  34 | del M29, M30, M31, M365MASK[59], MDAY365MASK[59], NMDAY365MASK[31]
  35 | MDAY365MASK = tuple(MDAY365MASK)
  36 | M365MASK = tuple(M365MASK)
  37 | 
  38 | (YEARLY,
  39 |  MONTHLY,
  40 |  WEEKLY,
  41 |  DAILY,
  42 |  HOURLY,
  43 |  MINUTELY,
  44 |  SECONDLY) = range(7)
  45 | 
  46 | # Imported on demand.
  47 | easter = None
  48 | parser = None
  49 | 
  50 | class weekday(object):
  51 |     __slots__ = ["weekday", "n"]
  52 | 
  53 |     def __init__(self, weekday, n=None):
  54 |         if n == 0:
  55 |             raise ValueError, "Can't create weekday with n == 0"
  56 |         self.weekday = weekday
  57 |         self.n = n
  58 | 
  59 |     def __call__(self, n):
  60 |         if n == self.n:
  61 |             return self
  62 |         else:
  63 |             return self.__class__(self.weekday, n)
  64 | 
  65 |     def __eq__(self, other):
  66 |         try:
  67 |             if self.weekday != other.weekday or self.n != other.n:
  68 |                 return False
  69 |         except AttributeError:
  70 |             return False
  71 |         return True
  72 | 
  73 |     def __repr__(self):
  74 |         s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
  75 |         if not self.n:
  76 |             return s
  77 |         else:
  78 |             return "%s(%+d)" % (s, self.n)
  79 | 
  80 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
  81 | 
  82 | class rrulebase:
  83 |     def __init__(self, cache=False):
  84 |         if cache:
  85 |             self._cache = []
  86 |             self._cache_lock = thread.allocate_lock()
  87 |             self._cache_gen  = self._iter()
  88 |             self._cache_complete = False
  89 |         else:
  90 |             self._cache = None
  91 |             self._cache_complete = False
  92 |         self._len = None
  93 | 
  94 |     def __iter__(self):
  95 |         if self._cache_complete:
  96 |             return iter(self._cache)
  97 |         elif self._cache is None:
  98 |             return self._iter()
  99 |         else:
 100 |             return self._iter_cached()
 101 | 
 102 |     def _iter_cached(self):
 103 |         i = 0
 104 |         gen = self._cache_gen
 105 |         cache = self._cache
 106 |         acquire = self._cache_lock.acquire
 107 |         release = self._cache_lock.release
 108 |         while gen:
 109 |             if i == len(cache):
 110 |                 acquire()
 111 |                 if self._cache_complete:
 112 |                     break
 113 |                 try:
 114 |                     for j in range(10):
 115 |                         cache.append(gen.next())
 116 |                 except StopIteration:
 117 |                     self._cache_gen = gen = None
 118 |                     self._cache_complete = True
 119 |                     break
 120 |                 release()
 121 |             yield cache[i]
 122 |             i += 1
 123 |         while i < self._len:
 124 |             yield cache[i]
 125 |             i += 1
 126 | 
 127 |     def __getitem__(self, item):
 128 |         if self._cache_complete:
 129 |             return self._cache[item]
 130 |         elif isinstance(item, slice):
 131 |             if item.step and item.step < 0:
 132 |                 return list(iter(self))[item]
 133 |             else:
 134 |                 return list(itertools.islice(self,
 135 |                                              item.start or 0,
 136 |                                              item.stop or sys.maxint,
 137 |                                              item.step or 1))
 138 |         elif item >= 0:
 139 |             gen = iter(self)
 140 |             try:
 141 |                 for i in range(item+1):
 142 |                     res = gen.next()
 143 |             except StopIteration:
 144 |                 raise IndexError
 145 |             return res
 146 |         else:
 147 |             return list(iter(self))[item]
 148 | 
 149 |     def __contains__(self, item):
 150 |         if self._cache_complete:
 151 |             return item in self._cache
 152 |         else:
 153 |             for i in self:
 154 |                 if i == item:
 155 |                     return True
 156 |                 elif i > item:
 157 |                     return False
 158 |         return False
 159 | 
 160 |     # __len__() introduces a large performance penality.
 161 |     def count(self):
 162 |         if self._len is None:
 163 |             for x in self: pass
 164 |         return self._len
 165 | 
 166 |     def before(self, dt, inc=False):
 167 |         if self._cache_complete:
 168 |             gen = self._cache
 169 |         else:
 170 |             gen = self
 171 |         last = None
 172 |         if inc:
 173 |             for i in gen:
 174 |                 if i > dt:
 175 |                     break
 176 |                 last = i
 177 |         else:
 178 |             for i in gen:
 179 |                 if i >= dt:
 180 |                     break
 181 |                 last = i
 182 |         return last
 183 | 
 184 |     def after(self, dt, inc=False):
 185 |         if self._cache_complete:
 186 |             gen = self._cache
 187 |         else:
 188 |             gen = self
 189 |         if inc:
 190 |             for i in gen:
 191 |                 if i >= dt:
 192 |                     return i
 193 |         else:
 194 |             for i in gen:
 195 |                 if i > dt:
 196 |                     return i
 197 |         return None
 198 | 
 199 |     def between(self, after, before, inc=False):
 200 |         if self._cache_complete:
 201 |             gen = self._cache
 202 |         else:
 203 |             gen = self
 204 |         started = False
 205 |         l = []
 206 |         if inc:
 207 |             for i in gen:
 208 |                 if i > before:
 209 |                     break
 210 |                 elif not started:
 211 |                     if i >= after:
 212 |                         started = True
 213 |                         l.append(i)
 214 |                 else:
 215 |                     l.append(i)
 216 |         else:
 217 |             for i in gen:
 218 |                 if i >= before:
 219 |                     break
 220 |                 elif not started:
 221 |                     if i > after:
 222 |                         started = True
 223 |                         l.append(i)
 224 |                 else:
 225 |                     l.append(i)
 226 |         return l
 227 | 
 228 | class rrule(rrulebase):
 229 |     def __init__(self, freq, dtstart=None,
 230 |                  interval=1, wkst=None, count=None, until=None, bysetpos=None,
 231 |                  bymonth=None, bymonthday=None, byyearday=None, byeaster=None,
 232 |                  byweekno=None, byweekday=None,
 233 |                  byhour=None, byminute=None, bysecond=None,
 234 |                  cache=False):
 235 |         rrulebase.__init__(self, cache)
 236 |         global easter
 237 |         if not dtstart:
 238 |             dtstart = datetime.datetime.now().replace(microsecond=0)
 239 |         elif not isinstance(dtstart, datetime.datetime):
 240 |             dtstart = datetime.datetime.fromordinal(dtstart.toordinal())
 241 |         else:
 242 |             dtstart = dtstart.replace(microsecond=0)
 243 |         self._dtstart = dtstart
 244 |         self._tzinfo = dtstart.tzinfo
 245 |         self._freq = freq
 246 |         self._interval = interval
 247 |         self._count = count
 248 |         if until and not isinstance(until, datetime.datetime):
 249 |             until = datetime.datetime.fromordinal(until.toordinal())
 250 |         self._until = until
 251 |         if wkst is None:
 252 |             self._wkst = calendar.firstweekday()
 253 |         elif type(wkst) is int:
 254 |             self._wkst = wkst
 255 |         else:
 256 |             self._wkst = wkst.weekday
 257 |         if bysetpos is None:
 258 |             self._bysetpos = None
 259 |         elif type(bysetpos) is int:
 260 |             if bysetpos == 0 or not (-366 <= bysetpos <= 366):
 261 |                 raise ValueError("bysetpos must be between 1 and 366, "
 262 |                                  "or between -366 and -1")
 263 |             self._bysetpos = (bysetpos,)
 264 |         else:
 265 |             self._bysetpos = tuple(bysetpos)
 266 |             for pos in self._bysetpos:
 267 |                 if pos == 0 or not (-366 <= pos <= 366):
 268 |                     raise ValueError("bysetpos must be between 1 and 366, "
 269 |                                      "or between -366 and -1")
 270 |         if not (byweekno or byyearday or bymonthday or
 271 |                 byweekday is not None or byeaster is not None):
 272 |             if freq == YEARLY:
 273 |                 if not bymonth:
 274 |                     bymonth = dtstart.month
 275 |                 bymonthday = dtstart.day
 276 |             elif freq == MONTHLY:
 277 |                 bymonthday = dtstart.day
 278 |             elif freq == WEEKLY:
 279 |                 byweekday = dtstart.weekday()
 280 |         # bymonth
 281 |         if not bymonth:
 282 |             self._bymonth = None
 283 |         elif type(bymonth) is int:
 284 |             self._bymonth = (bymonth,)
 285 |         else:
 286 |             self._bymonth = tuple(bymonth)
 287 |         # byyearday
 288 |         if not byyearday:
 289 |             self._byyearday = None
 290 |         elif type(byyearday) is int:
 291 |             self._byyearday = (byyearday,)
 292 |         else:
 293 |             self._byyearday = tuple(byyearday)
 294 |         # byeaster
 295 |         if byeaster is not None:
 296 |             if not easter:
 297 |                 from dateutil import easter
 298 |             if type(byeaster) is int:
 299 |                 self._byeaster = (byeaster,)
 300 |             else:
 301 |                 self._byeaster = tuple(byeaster)
 302 |         else:
 303 |             self._byeaster = None
 304 |         # bymonthay
 305 |         if not bymonthday:
 306 |             self._bymonthday = ()
 307 |             self._bynmonthday = ()
 308 |         elif type(bymonthday) is int:
 309 |             if bymonthday < 0:
 310 |                 self._bynmonthday = (bymonthday,)
 311 |                 self._bymonthday = ()
 312 |             else:
 313 |                 self._bymonthday = (bymonthday,)
 314 |                 self._bynmonthday = ()
 315 |         else:
 316 |             self._bymonthday = tuple([x for x in bymonthday if x > 0])
 317 |             self._bynmonthday = tuple([x for x in bymonthday if x < 0])
 318 |         # byweekno
 319 |         if byweekno is None:
 320 |             self._byweekno = None
 321 |         elif type(byweekno) is int:
 322 |             self._byweekno = (byweekno,)
 323 |         else:
 324 |             self._byweekno = tuple(byweekno)
 325 |         # byweekday / bynweekday
 326 |         if byweekday is None:
 327 |             self._byweekday = None
 328 |             self._bynweekday = None
 329 |         elif type(byweekday) is int:
 330 |             self._byweekday = (byweekday,)
 331 |             self._bynweekday = None
 332 |         elif hasattr(byweekday, "n"):
 333 |             if not byweekday.n or freq > MONTHLY:
 334 |                 self._byweekday = (byweekday.weekday,)
 335 |                 self._bynweekday = None
 336 |             else:
 337 |                 self._bynweekday = ((byweekday.weekday, byweekday.n),)
 338 |                 self._byweekday = None
 339 |         else:
 340 |             self._byweekday = []
 341 |             self._bynweekday = []
 342 |             for wday in byweekday:
 343 |                 if type(wday) is int:
 344 |                     self._byweekday.append(wday)
 345 |                 elif not wday.n or freq > MONTHLY:
 346 |                     self._byweekday.append(wday.weekday)
 347 |                 else:
 348 |                     self._bynweekday.append((wday.weekday, wday.n))
 349 |             self._byweekday = tuple(self._byweekday)
 350 |             self._bynweekday = tuple(self._bynweekday)
 351 |             if not self._byweekday:
 352 |                 self._byweekday = None
 353 |             elif not self._bynweekday:
 354 |                 self._bynweekday = None
 355 |         # byhour
 356 |         if byhour is None:
 357 |             if freq < HOURLY:
 358 |                 self._byhour = (dtstart.hour,)
 359 |             else:
 360 |                 self._byhour = None
 361 |         elif type(byhour) is int:
 362 |             self._byhour = (byhour,)
 363 |         else:
 364 |             self._byhour = tuple(byhour)
 365 |         # byminute
 366 |         if byminute is None:
 367 |             if freq < MINUTELY:
 368 |                 self._byminute = (dtstart.minute,)
 369 |             else:
 370 |                 self._byminute = None
 371 |         elif type(byminute) is int:
 372 |             self._byminute = (byminute,)
 373 |         else:
 374 |             self._byminute = tuple(byminute)
 375 |         # bysecond
 376 |         if bysecond is None:
 377 |             if freq < SECONDLY:
 378 |                 self._bysecond = (dtstart.second,)
 379 |             else:
 380 |                 self._bysecond = None
 381 |         elif type(bysecond) is int:
 382 |             self._bysecond = (bysecond,)
 383 |         else:
 384 |             self._bysecond = tuple(bysecond)
 385 | 
 386 |         if self._freq >= HOURLY:
 387 |             self._timeset = None
 388 |         else:
 389 |             self._timeset = []
 390 |             for hour in self._byhour:
 391 |                 for minute in self._byminute:
 392 |                     for second in self._bysecond:
 393 |                         self._timeset.append(
 394 |                                 datetime.time(hour, minute, second,
 395 |                                                     tzinfo=self._tzinfo))
 396 |             self._timeset.sort()
 397 |             self._timeset = tuple(self._timeset)
 398 | 
 399 |     def _iter(self):
 400 |         year, month, day, hour, minute, second, weekday, yearday, _ = \
 401 |             self._dtstart.timetuple()
 402 | 
 403 |         # Some local variables to speed things up a bit
 404 |         freq = self._freq
 405 |         interval = self._interval
 406 |         wkst = self._wkst
 407 |         until = self._until
 408 |         bymonth = self._bymonth
 409 |         byweekno = self._byweekno
 410 |         byyearday = self._byyearday
 411 |         byweekday = self._byweekday
 412 |         byeaster = self._byeaster
 413 |         bymonthday = self._bymonthday
 414 |         bynmonthday = self._bynmonthday
 415 |         bysetpos = self._bysetpos
 416 |         byhour = self._byhour
 417 |         byminute = self._byminute
 418 |         bysecond = self._bysecond
 419 | 
 420 |         ii = _iterinfo(self)
 421 |         ii.rebuild(year, month)
 422 | 
 423 |         getdayset = {YEARLY:ii.ydayset,
 424 |                      MONTHLY:ii.mdayset,
 425 |                      WEEKLY:ii.wdayset,
 426 |                      DAILY:ii.ddayset,
 427 |                      HOURLY:ii.ddayset,
 428 |                      MINUTELY:ii.ddayset,
 429 |                      SECONDLY:ii.ddayset}[freq]
 430 |         
 431 |         if freq < HOURLY:
 432 |             timeset = self._timeset
 433 |         else:
 434 |             gettimeset = {HOURLY:ii.htimeset,
 435 |                           MINUTELY:ii.mtimeset,
 436 |                           SECONDLY:ii.stimeset}[freq]
 437 |             if ((freq >= HOURLY and
 438 |                  self._byhour and hour not in self._byhour) or
 439 |                 (freq >= MINUTELY and
 440 |                  self._byminute and minute not in self._byminute) or
 441 |                 (freq >= SECONDLY and
 442 |                  self._bysecond and second not in self._bysecond)):
 443 |                 timeset = ()
 444 |             else:
 445 |                 timeset = gettimeset(hour, minute, second)
 446 | 
 447 |         total = 0
 448 |         count = self._count
 449 |         while True:
 450 |             # Get dayset with the right frequency
 451 |             dayset, start, end = getdayset(year, month, day)
 452 | 
 453 |             # Do the "hard" work ;-)
 454 |             filtered = False
 455 |             for i in dayset[start:end]:
 456 |                 if ((bymonth and ii.mmask[i] not in bymonth) or
 457 |                     (byweekno and not ii.wnomask[i]) or
 458 |                     (byweekday and ii.wdaymask[i] not in byweekday) or
 459 |                     (ii.nwdaymask and not ii.nwdaymask[i]) or
 460 |                     (byeaster and not ii.eastermask[i]) or
 461 |                     ((bymonthday or bynmonthday) and
 462 |                      ii.mdaymask[i] not in bymonthday and
 463 |                      ii.nmdaymask[i] not in bynmonthday) or
 464 |                     (byyearday and
 465 |                      ((i < ii.yearlen and i+1 not in byyearday
 466 |                                       and -ii.yearlen+i not in byyearday) or
 467 |                       (i >= ii.yearlen and i+1-ii.yearlen not in byyearday
 468 |                                        and -ii.nextyearlen+i-ii.yearlen
 469 |                                            not in byyearday)))):
 470 |                     dayset[i] = None
 471 |                     filtered = True
 472 | 
 473 |             # Output results
 474 |             if bysetpos and timeset:
 475 |                 poslist = []
 476 |                 for pos in bysetpos:
 477 |                     if pos < 0:
 478 |                         daypos, timepos = divmod(pos, len(timeset))
 479 |                     else:
 480 |                         daypos, timepos = divmod(pos-1, len(timeset))
 481 |                     try:
 482 |                         i = [x for x in dayset[start:end]
 483 |                                 if x is not None][daypos]
 484 |                         time = timeset[timepos]
 485 |                     except IndexError:
 486 |                         pass
 487 |                     else:
 488 |                         date = datetime.date.fromordinal(ii.yearordinal+i)
 489 |                         res = datetime.datetime.combine(date, time)
 490 |                         if res not in poslist:
 491 |                             poslist.append(res)
 492 |                 poslist.sort()
 493 |                 for res in poslist:
 494 |                     if until and res > until:
 495 |                         self._len = total
 496 |                         return
 497 |                     elif res >= self._dtstart:
 498 |                         total += 1
 499 |                         yield res
 500 |                         if count:
 501 |                             count -= 1
 502 |                             if not count:
 503 |                                 self._len = total
 504 |                                 return
 505 |             else:
 506 |                 for i in dayset[start:end]:
 507 |                     if i is not None:
 508 |                         date = datetime.date.fromordinal(ii.yearordinal+i)
 509 |                         for time in timeset:
 510 |                             res = datetime.datetime.combine(date, time)
 511 |                             if until and res > until:
 512 |                                 self._len = total
 513 |                                 return
 514 |                             elif res >= self._dtstart:
 515 |                                 total += 1
 516 |                                 yield res
 517 |                                 if count:
 518 |                                     count -= 1
 519 |                                     if not count:
 520 |                                         self._len = total
 521 |                                         return
 522 | 
 523 |             # Handle frequency and interval
 524 |             fixday = False
 525 |             if freq == YEARLY:
 526 |                 year += interval
 527 |                 if year > datetime.MAXYEAR:
 528 |                     self._len = total
 529 |                     return
 530 |                 ii.rebuild(year, month)
 531 |             elif freq == MONTHLY:
 532 |                 month += interval
 533 |                 if month > 12:
 534 |                     div, mod = divmod(month, 12)
 535 |                     month = mod
 536 |                     year += div
 537 |                     if month == 0:
 538 |                         month = 12
 539 |                         year -= 1
 540 |                     if year > datetime.MAXYEAR:
 541 |                         self._len = total
 542 |                         return
 543 |                 ii.rebuild(year, month)
 544 |             elif freq == WEEKLY:
 545 |                 if wkst > weekday:
 546 |                     day += -(weekday+1+(6-wkst))+self._interval*7
 547 |                 else:
 548 |                     day += -(weekday-wkst)+self._interval*7
 549 |                 weekday = wkst
 550 |                 fixday = True
 551 |             elif freq == DAILY:
 552 |                 day += interval
 553 |                 fixday = True
 554 |             elif freq == HOURLY:
 555 |                 if filtered:
 556 |                     # Jump to one iteration before next day
 557 |                     hour += ((23-hour)//interval)*interval
 558 |                 while True:
 559 |                     hour += interval
 560 |                     div, mod = divmod(hour, 24)
 561 |                     if div:
 562 |                         hour = mod
 563 |                         day += div
 564 |                         fixday = True
 565 |                     if not byhour or hour in byhour:
 566 |                         break
 567 |                 timeset = gettimeset(hour, minute, second)
 568 |             elif freq == MINUTELY:
 569 |                 if filtered:
 570 |                     # Jump to one iteration before next day
 571 |                     minute += ((1439-(hour*60+minute))//interval)*interval
 572 |                 while True:
 573 |                     minute += interval
 574 |                     div, mod = divmod(minute, 60)
 575 |                     if div:
 576 |                         minute = mod
 577 |                         hour += div
 578 |                         div, mod = divmod(hour, 24)
 579 |                         if div:
 580 |                             hour = mod
 581 |                             day += div
 582 |                             fixday = True
 583 |                             filtered = False
 584 |                     if ((not byhour or hour in byhour) and
 585 |                         (not byminute or minute in byminute)):
 586 |                         break
 587 |                 timeset = gettimeset(hour, minute, second)
 588 |             elif freq == SECONDLY:
 589 |                 if filtered:
 590 |                     # Jump to one iteration before next day
 591 |                     second += (((86399-(hour*3600+minute*60+second))
 592 |                                 //interval)*interval)
 593 |                 while True:
 594 |                     second += self._interval
 595 |                     div, mod = divmod(second, 60)
 596 |                     if div:
 597 |                         second = mod
 598 |                         minute += div
 599 |                         div, mod = divmod(minute, 60)
 600 |                         if div:
 601 |                             minute = mod
 602 |                             hour += div
 603 |                             div, mod = divmod(hour, 24)
 604 |                             if div:
 605 |                                 hour = mod
 606 |                                 day += div
 607 |                                 fixday = True
 608 |                     if ((not byhour or hour in byhour) and
 609 |                         (not byminute or minute in byminute) and
 610 |                         (not bysecond or second in bysecond)):
 611 |                         break
 612 |                 timeset = gettimeset(hour, minute, second)
 613 | 
 614 |             if fixday and day > 28:
 615 |                 daysinmonth = calendar.monthrange(year, month)[1]
 616 |                 if day > daysinmonth:
 617 |                     while day > daysinmonth:
 618 |                         day -= daysinmonth
 619 |                         month += 1
 620 |                         if month == 13:
 621 |                             month = 1
 622 |                             year += 1
 623 |                             if year > datetime.MAXYEAR:
 624 |                                 self._len = total
 625 |                                 return
 626 |                         daysinmonth = calendar.monthrange(year, month)[1]
 627 |                     ii.rebuild(year, month)
 628 | 
 629 | class _iterinfo(object):
 630 |     __slots__ = ["rrule", "lastyear", "lastmonth",
 631 |                  "yearlen", "nextyearlen", "yearordinal", "yearweekday",
 632 |                  "mmask", "mrange", "mdaymask", "nmdaymask",
 633 |                  "wdaymask", "wnomask", "nwdaymask", "eastermask"]
 634 | 
 635 |     def __init__(self, rrule):
 636 |         for attr in self.__slots__:
 637 |             setattr(self, attr, None)
 638 |         self.rrule = rrule
 639 | 
 640 |     def rebuild(self, year, month):
 641 |         # Every mask is 7 days longer to handle cross-year weekly periods.
 642 |         rr = self.rrule
 643 |         if year != self.lastyear:
 644 |             self.yearlen = 365+calendar.isleap(year)
 645 |             self.nextyearlen = 365+calendar.isleap(year+1)
 646 |             firstyday = datetime.date(year, 1, 1)
 647 |             self.yearordinal = firstyday.toordinal()
 648 |             self.yearweekday = firstyday.weekday()
 649 | 
 650 |             wday = datetime.date(year, 1, 1).weekday()
 651 |             if self.yearlen == 365:
 652 |                 self.mmask = M365MASK
 653 |                 self.mdaymask = MDAY365MASK
 654 |                 self.nmdaymask = NMDAY365MASK
 655 |                 self.wdaymask = WDAYMASK[wday:]
 656 |                 self.mrange = M365RANGE
 657 |             else:
 658 |                 self.mmask = M366MASK
 659 |                 self.mdaymask = MDAY366MASK
 660 |                 self.nmdaymask = NMDAY366MASK
 661 |                 self.wdaymask = WDAYMASK[wday:]
 662 |                 self.mrange = M366RANGE
 663 | 
 664 |             if not rr._byweekno:
 665 |                 self.wnomask = None
 666 |             else:
 667 |                 self.wnomask = [0]*(self.yearlen+7)
 668 |                 #no1wkst = firstwkst = self.wdaymask.index(rr._wkst)
 669 |                 no1wkst = firstwkst = (7-self.yearweekday+rr._wkst)%7
 670 |                 if no1wkst >= 4:
 671 |                     no1wkst = 0
 672 |                     # Number of days in the year, plus the days we got
 673 |                     # from last year.
 674 |                     wyearlen = self.yearlen+(self.yearweekday-rr._wkst)%7
 675 |                 else:
 676 |                     # Number of days in the year, minus the days we
 677 |                     # left in last year.
 678 |                     wyearlen = self.yearlen-no1wkst
 679 |                 div, mod = divmod(wyearlen, 7)
 680 |                 numweeks = div+mod//4
 681 |                 for n in rr._byweekno:
 682 |                     if n < 0:
 683 |                         n += numweeks+1
 684 |                     if not (0 < n <= numweeks):
 685 |                         continue
 686 |                     if n > 1:
 687 |                         i = no1wkst+(n-1)*7
 688 |                         if no1wkst != firstwkst:
 689 |                             i -= 7-firstwkst
 690 |                     else:
 691 |                         i = no1wkst
 692 |                     for j in range(7):
 693 |                         self.wnomask[i] = 1
 694 |                         i += 1
 695 |                         if self.wdaymask[i] == rr._wkst:
 696 |                             break
 697 |                 if 1 in rr._byweekno:
 698 |                     # Check week number 1 of next year as well
 699 |                     # TODO: Check -numweeks for next year.
 700 |                     i = no1wkst+numweeks*7
 701 |                     if no1wkst != firstwkst:
 702 |                         i -= 7-firstwkst
 703 |                     if i < self.yearlen:
 704 |                         # If week starts in next year, we
 705 |                         # don't care about it.
 706 |                         for j in range(7):
 707 |                             self.wnomask[i] = 1
 708 |                             i += 1
 709 |                             if self.wdaymask[i] == rr._wkst:
 710 |                                 break
 711 |                 if no1wkst:
 712 |                     # Check last week number of last year as
 713 |                     # well. If no1wkst is 0, either the year
 714 |                     # started on week start, or week number 1
 715 |                     # got days from last year, so there are no
 716 |                     # days from last year's last week number in
 717 |                     # this year.
 718 |                     if -1 not in rr._byweekno:
 719 |                         lyearweekday = datetime.date(year-1,1,1).weekday()
 720 |                         lno1wkst = (7-lyearweekday+rr._wkst)%7
 721 |                         lyearlen = 365+calendar.isleap(year-1)
 722 |                         if lno1wkst >= 4:
 723 |                             lno1wkst = 0
 724 |                             lnumweeks = 52+(lyearlen+
 725 |                                            (lyearweekday-rr._wkst)%7)%7//4
 726 |                         else:
 727 |                             lnumweeks = 52+(self.yearlen-no1wkst)%7//4
 728 |                     else:
 729 |                         lnumweeks = -1
 730 |                     if lnumweeks in rr._byweekno:
 731 |                         for i in range(no1wkst):
 732 |                             self.wnomask[i] = 1
 733 | 
 734 |         if (rr._bynweekday and
 735 |             (month != self.lastmonth or year != self.lastyear)):
 736 |             ranges = []
 737 |             if rr._freq == YEARLY:
 738 |                 if rr._bymonth:
 739 |                     for month in rr._bymonth:
 740 |                         ranges.append(self.mrange[month-1:month+1])
 741 |                 else:
 742 |                     ranges = [(0, self.yearlen)]
 743 |             elif rr._freq == MONTHLY:
 744 |                 ranges = [self.mrange[month-1:month+1]]
 745 |             if ranges:
 746 |                 # Weekly frequency won't get here, so we may not
 747 |                 # care about cross-year weekly periods.
 748 |                 self.nwdaymask = [0]*self.yearlen
 749 |                 for first, last in ranges:
 750 |                     last -= 1
 751 |                     for wday, n in rr._bynweekday:
 752 |                         if n < 0:
 753 |                             i = last+(n+1)*7
 754 |                             i -= (self.wdaymask[i]-wday)%7
 755 |                         else:
 756 |                             i = first+(n-1)*7
 757 |                             i += (7-self.wdaymask[i]+wday)%7
 758 |                         if first <= i <= last:
 759 |                             self.nwdaymask[i] = 1
 760 | 
 761 |         if rr._byeaster:
 762 |             self.eastermask = [0]*(self.yearlen+7)
 763 |             eyday = easter.easter(year).toordinal()-self.yearordinal
 764 |             for offset in rr._byeaster:
 765 |                 self.eastermask[eyday+offset] = 1
 766 | 
 767 |         self.lastyear = year
 768 |         self.lastmonth = month
 769 | 
 770 |     def ydayset(self, year, month, day):
 771 |         return range(self.yearlen), 0, self.yearlen
 772 | 
 773 |     def mdayset(self, year, month, day):
 774 |         set = [None]*self.yearlen
 775 |         start, end = self.mrange[month-1:month+1]
 776 |         for i in range(start, end):
 777 |             set[i] = i
 778 |         return set, start, end
 779 | 
 780 |     def wdayset(self, year, month, day):
 781 |         # We need to handle cross-year weeks here.
 782 |         set = [None]*(self.yearlen+7)
 783 |         i = datetime.date(year, month, day).toordinal()-self.yearordinal
 784 |         start = i
 785 |         for j in range(7):
 786 |             set[i] = i
 787 |             i += 1
 788 |             #if (not (0 <= i < self.yearlen) or
 789 |             #    self.wdaymask[i] == self.rrule._wkst):
 790 |             # This will cross the year boundary, if necessary.
 791 |             if self.wdaymask[i] == self.rrule._wkst:
 792 |                 break
 793 |         return set, start, i
 794 | 
 795 |     def ddayset(self, year, month, day):
 796 |         set = [None]*self.yearlen
 797 |         i = datetime.date(year, month, day).toordinal()-self.yearordinal
 798 |         set[i] = i
 799 |         return set, i, i+1
 800 | 
 801 |     def htimeset(self, hour, minute, second):
 802 |         set = []
 803 |         rr = self.rrule
 804 |         for minute in rr._byminute:
 805 |             for second in rr._bysecond:
 806 |                 set.append(datetime.time(hour, minute, second,
 807 |                                          tzinfo=rr._tzinfo))
 808 |         set.sort()
 809 |         return set
 810 | 
 811 |     def mtimeset(self, hour, minute, second):
 812 |         set = []
 813 |         rr = self.rrule
 814 |         for second in rr._bysecond:
 815 |             set.append(datetime.time(hour, minute, second, tzinfo=rr._tzinfo))
 816 |         set.sort()
 817 |         return set
 818 | 
 819 |     def stimeset(self, hour, minute, second):
 820 |         return (datetime.time(hour, minute, second,
 821 |                 tzinfo=self.rrule._tzinfo),)
 822 | 
 823 | 
 824 | class rruleset(rrulebase):
 825 | 
 826 |     class _genitem:
 827 |         def __init__(self, genlist, gen):
 828 |             try:
 829 |                 self.dt = gen()
 830 |                 genlist.append(self)
 831 |             except StopIteration:
 832 |                 pass
 833 |             self.genlist = genlist
 834 |             self.gen = gen
 835 | 
 836 |         def next(self):
 837 |             try:
 838 |                 self.dt = self.gen()
 839 |             except StopIteration:
 840 |                 self.genlist.remove(self)
 841 | 
 842 |         def __cmp__(self, other):
 843 |             return cmp(self.dt, other.dt)
 844 | 
 845 |     def __init__(self, cache=False):
 846 |         rrulebase.__init__(self, cache)
 847 |         self._rrule = []
 848 |         self._rdate = []
 849 |         self._exrule = []
 850 |         self._exdate = []
 851 | 
 852 |     def rrule(self, rrule):
 853 |         self._rrule.append(rrule)
 854 |     
 855 |     def rdate(self, rdate):
 856 |         self._rdate.append(rdate)
 857 | 
 858 |     def exrule(self, exrule):
 859 |         self._exrule.append(exrule)
 860 | 
 861 |     def exdate(self, exdate):
 862 |         self._exdate.append(exdate)
 863 | 
 864 |     def _iter(self):
 865 |         rlist = []
 866 |         self._rdate.sort()
 867 |         self._genitem(rlist, iter(self._rdate).next)
 868 |         for gen in [iter(x).next for x in self._rrule]:
 869 |             self._genitem(rlist, gen)
 870 |         rlist.sort()
 871 |         exlist = []
 872 |         self._exdate.sort()
 873 |         self._genitem(exlist, iter(self._exdate).next)
 874 |         for gen in [iter(x).next for x in self._exrule]:
 875 |             self._genitem(exlist, gen)
 876 |         exlist.sort()
 877 |         lastdt = None
 878 |         total = 0
 879 |         while rlist:
 880 |             ritem = rlist[0]
 881 |             if not lastdt or lastdt != ritem.dt:
 882 |                 while exlist and exlist[0] < ritem:
 883 |                     exlist[0].next()
 884 |                     exlist.sort()
 885 |                 if not exlist or ritem != exlist[0]:
 886 |                     total += 1
 887 |                     yield ritem.dt
 888 |                 lastdt = ritem.dt
 889 |             ritem.next()
 890 |             rlist.sort()
 891 |         self._len = total
 892 | 
 893 | class _rrulestr:
 894 | 
 895 |     _freq_map = {"YEARLY": YEARLY,
 896 |                  "MONTHLY": MONTHLY,
 897 |                  "WEEKLY": WEEKLY,
 898 |                  "DAILY": DAILY,
 899 |                  "HOURLY": HOURLY,
 900 |                  "MINUTELY": MINUTELY,
 901 |                  "SECONDLY": SECONDLY}
 902 | 
 903 |     _weekday_map = {"MO":0,"TU":1,"WE":2,"TH":3,"FR":4,"SA":5,"SU":6}
 904 | 
 905 |     def _handle_int(self, rrkwargs, name, value, **kwargs):
 906 |         rrkwargs[name.lower()] = int(value)
 907 | 
 908 |     def _handle_int_list(self, rrkwargs, name, value, **kwargs):
 909 |         rrkwargs[name.lower()] = [int(x) for x in value.split(',')]
 910 | 
 911 |     _handle_INTERVAL   = _handle_int
 912 |     _handle_COUNT      = _handle_int
 913 |     _handle_BYSETPOS   = _handle_int_list
 914 |     _handle_BYMONTH    = _handle_int_list
 915 |     _handle_BYMONTHDAY = _handle_int_list
 916 |     _handle_BYYEARDAY  = _handle_int_list
 917 |     _handle_BYEASTER   = _handle_int_list
 918 |     _handle_BYWEEKNO   = _handle_int_list
 919 |     _handle_BYHOUR     = _handle_int_list
 920 |     _handle_BYMINUTE   = _handle_int_list
 921 |     _handle_BYSECOND   = _handle_int_list
 922 | 
 923 |     def _handle_FREQ(self, rrkwargs, name, value, **kwargs):
 924 |         rrkwargs["freq"] = self._freq_map[value]
 925 | 
 926 |     def _handle_UNTIL(self, rrkwargs, name, value, **kwargs):
 927 |         global parser
 928 |         if not parser:
 929 |             from dateutil import parser
 930 |         try:
 931 |             rrkwargs["until"] = parser.parse(value,
 932 |                                            ignoretz=kwargs.get("ignoretz"),
 933 |                                            tzinfos=kwargs.get("tzinfos"))
 934 |         except ValueError:
 935 |             raise ValueError, "invalid until date"
 936 | 
 937 |     def _handle_WKST(self, rrkwargs, name, value, **kwargs):
 938 |         rrkwargs["wkst"] = self._weekday_map[value]
 939 | 
 940 |     def _handle_BYWEEKDAY(self, rrkwargs, name, value, **kwarsg):
 941 |         l = []
 942 |         for wday in value.split(','):
 943 |             for i in range(len(wday)):
 944 |                 if wday[i] not in '+-0123456789':
 945 |                     break
 946 |             n = wday[:i] or None
 947 |             w = wday[i:]
 948 |             if n: n = int(n)
 949 |             l.append(weekdays[self._weekday_map[w]](n))
 950 |         rrkwargs["byweekday"] = l
 951 | 
 952 |     _handle_BYDAY = _handle_BYWEEKDAY
 953 | 
 954 |     def _parse_rfc_rrule(self, line,
 955 |                          dtstart=None,
 956 |                          cache=False,
 957 |                          ignoretz=False,
 958 |                          tzinfos=None):
 959 |         if line.find(':') != -1:
 960 |             name, value = line.split(':')
 961 |             if name != "RRULE":
 962 |                 raise ValueError, "unknown parameter name"
 963 |         else:
 964 |             value = line
 965 |         rrkwargs = {}
 966 |         for pair in value.split(';'):
 967 |             name, value = pair.split('=')
 968 |             name = name.upper()
 969 |             value = value.upper()
 970 |             try:
 971 |                 getattr(self, "_handle_"+name)(rrkwargs, name, value,
 972 |                                                ignoretz=ignoretz,
 973 |                                                tzinfos=tzinfos)
 974 |             except AttributeError:
 975 |                 raise ValueError, "unknown parameter '%s'" % name
 976 |             except (KeyError, ValueError):
 977 |                 raise ValueError, "invalid '%s': %s" % (name, value)
 978 |         return rrule(dtstart=dtstart, cache=cache, **rrkwargs)
 979 | 
 980 |     def _parse_rfc(self, s,
 981 |                    dtstart=None,
 982 |                    cache=False,
 983 |                    unfold=False,
 984 |                    forceset=False,
 985 |                    compatible=False,
 986 |                    ignoretz=False,
 987 |                    tzinfos=None):
 988 |         global parser
 989 |         if compatible:
 990 |             forceset = True
 991 |             unfold = True
 992 |         s = s.upper()
 993 |         if not s.strip():
 994 |             raise ValueError, "empty string"
 995 |         if unfold:
 996 |             lines = s.splitlines()
 997 |             i = 0
 998 |             while i < len(lines):
 999 |                 line = lines[i].rstrip()
1000 |                 if not line:
1001 |                     del lines[i]
1002 |                 elif i > 0 and line[0] == " ":
1003 |                     lines[i-1] += line[1:]
1004 |                     del lines[i]
1005 |                 else:
1006 |                     i += 1
1007 |         else:
1008 |             lines = s.split()
1009 |         if (not forceset and len(lines) == 1 and
1010 |             (s.find(':') == -1 or s.startswith('RRULE:'))):
1011 |             return self._parse_rfc_rrule(lines[0], cache=cache,
1012 |                                          dtstart=dtstart, ignoretz=ignoretz,
1013 |                                          tzinfos=tzinfos)
1014 |         else:
1015 |             rrulevals = []
1016 |             rdatevals = []
1017 |             exrulevals = []
1018 |             exdatevals = []
1019 |             for line in lines:
1020 |                 if not line:
1021 |                     continue
1022 |                 if line.find(':') == -1:
1023 |                     name = "RRULE"
1024 |                     value = line
1025 |                 else:
1026 |                     name, value = line.split(':', 1)
1027 |                 parms = name.split(';')
1028 |                 if not parms:
1029 |                     raise ValueError, "empty property name"
1030 |                 name = parms[0]
1031 |                 parms = parms[1:]
1032 |                 if name == "RRULE":
1033 |                     for parm in parms:
1034 |                         raise ValueError, "unsupported RRULE parm: "+parm
1035 |                     rrulevals.append(value)
1036 |                 elif name == "RDATE":
1037 |                     for parm in parms:
1038 |                         if parm != "VALUE=DATE-TIME":
1039 |                             raise ValueError, "unsupported RDATE parm: "+parm
1040 |                     rdatevals.append(value)
1041 |                 elif name == "EXRULE":
1042 |                     for parm in parms:
1043 |                         raise ValueError, "unsupported EXRULE parm: "+parm
1044 |                     exrulevals.append(value)
1045 |                 elif name == "EXDATE":
1046 |                     for parm in parms:
1047 |                         if parm != "VALUE=DATE-TIME":
1048 |                             raise ValueError, "unsupported RDATE parm: "+parm
1049 |                     exdatevals.append(value)
1050 |                 elif name == "DTSTART":
1051 |                     for parm in parms:
1052 |                         raise ValueError, "unsupported DTSTART parm: "+parm
1053 |                     if not parser:
1054 |                         from dateutil import parser
1055 |                     dtstart = parser.parse(value, ignoretz=ignoretz,
1056 |                                            tzinfos=tzinfos)
1057 |                 else:
1058 |                     raise ValueError, "unsupported property: "+name
1059 |             if (forceset or len(rrulevals) > 1 or
1060 |                 rdatevals or exrulevals or exdatevals):
1061 |                 if not parser and (rdatevals or exdatevals):
1062 |                     from dateutil import parser
1063 |                 set = rruleset(cache=cache)
1064 |                 for value in rrulevals:
1065 |                     set.rrule(self._parse_rfc_rrule(value, dtstart=dtstart,
1066 |                                                     ignoretz=ignoretz,
1067 |                                                     tzinfos=tzinfos))
1068 |                 for value in rdatevals:
1069 |                     for datestr in value.split(','):
1070 |                         set.rdate(parser.parse(datestr,
1071 |                                                ignoretz=ignoretz,
1072 |                                                tzinfos=tzinfos))
1073 |                 for value in exrulevals:
1074 |                     set.exrule(self._parse_rfc_rrule(value, dtstart=dtstart,
1075 |                                                      ignoretz=ignoretz,
1076 |                                                      tzinfos=tzinfos))
1077 |                 for value in exdatevals:
1078 |                     for datestr in value.split(','):
1079 |                         set.exdate(parser.parse(datestr,
1080 |                                                 ignoretz=ignoretz,
1081 |                                                 tzinfos=tzinfos))
1082 |                 if compatible and dtstart:
1083 |                     set.rdate(dtstart)
1084 |                 return set
1085 |             else:
1086 |                 return self._parse_rfc_rrule(rrulevals[0],
1087 |                                              dtstart=dtstart,
1088 |                                              cache=cache,
1089 |                                              ignoretz=ignoretz,
1090 |                                              tzinfos=tzinfos)
1091 | 
1092 |     def __call__(self, s, **kwargs):
1093 |         return self._parse_rfc(s, **kwargs)
1094 | 
1095 | rrulestr = _rrulestr()
1096 | 
1097 | # vim:ts=4:sw=4:et
1098 | 


--------------------------------------------------------------------------------
/dateutil/tz.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2003-2007  Gustavo Niemeyer <gustavo@niemeyer.net>
  3 | 
  4 | This module offers extensions to the standard python 2.3+
  5 | datetime module.
  6 | """
  7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
  8 | __license__ = "PSF License"
  9 | 
 10 | import datetime
 11 | import struct
 12 | import time
 13 | import sys
 14 | import os
 15 | 
 16 | relativedelta = None
 17 | parser = None
 18 | rrule = None
 19 | 
 20 | __all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
 21 |            "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
 22 | 
 23 | try:
 24 |     from dateutil.tzwin import tzwin, tzwinlocal
 25 | except (ImportError, OSError):
 26 |     tzwin, tzwinlocal = None, None
 27 | 
 28 | ZERO = datetime.timedelta(0)
 29 | EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
 30 | 
 31 | class tzutc(datetime.tzinfo):
 32 | 
 33 |     def utcoffset(self, dt):
 34 |         return ZERO
 35 |      
 36 |     def dst(self, dt):
 37 |         return ZERO
 38 | 
 39 |     def tzname(self, dt):
 40 |         return "UTC"
 41 | 
 42 |     def __eq__(self, other):
 43 |         return (isinstance(other, tzutc) or
 44 |                 (isinstance(other, tzoffset) and other._offset == ZERO))
 45 | 
 46 |     def __ne__(self, other):
 47 |         return not self.__eq__(other)
 48 | 
 49 |     def __repr__(self):
 50 |         return "%s()" % self.__class__.__name__
 51 | 
 52 |     __reduce__ = object.__reduce__
 53 | 
 54 | class tzoffset(datetime.tzinfo):
 55 | 
 56 |     def __init__(self, name, offset):
 57 |         self._name = name
 58 |         self._offset = datetime.timedelta(seconds=offset)
 59 | 
 60 |     def utcoffset(self, dt):
 61 |         return self._offset
 62 | 
 63 |     def dst(self, dt):
 64 |         return ZERO
 65 | 
 66 |     def tzname(self, dt):
 67 |         return self._name
 68 | 
 69 |     def __eq__(self, other):
 70 |         return (isinstance(other, tzoffset) and
 71 |                 self._offset == other._offset)
 72 | 
 73 |     def __ne__(self, other):
 74 |         return not self.__eq__(other)
 75 | 
 76 |     def __repr__(self):
 77 |         return "%s(%s, %s)" % (self.__class__.__name__,
 78 |                                `self._name`,
 79 |                                self._offset.days*86400+self._offset.seconds)
 80 | 
 81 |     __reduce__ = object.__reduce__
 82 | 
 83 | class tzlocal(datetime.tzinfo):
 84 | 
 85 |     _std_offset = datetime.timedelta(seconds=-time.timezone)
 86 |     if time.daylight:
 87 |         _dst_offset = datetime.timedelta(seconds=-time.altzone)
 88 |     else:
 89 |         _dst_offset = _std_offset
 90 | 
 91 |     def utcoffset(self, dt):
 92 |         if self._isdst(dt):
 93 |             return self._dst_offset
 94 |         else:
 95 |             return self._std_offset
 96 | 
 97 |     def dst(self, dt):
 98 |         if self._isdst(dt):
 99 |             return self._dst_offset-self._std_offset
100 |         else:
101 |             return ZERO
102 | 
103 |     def tzname(self, dt):
104 |         return time.tzname[self._isdst(dt)]
105 | 
106 |     def _isdst(self, dt):
107 |         # We can't use mktime here. It is unstable when deciding if
108 |         # the hour near to a change is DST or not.
109 |         # 
110 |         # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
111 |         #                         dt.minute, dt.second, dt.weekday(), 0, -1))
112 |         # return time.localtime(timestamp).tm_isdst
113 |         #
114 |         # The code above yields the following result:
115 |         #
116 |         #>>> import tz, datetime
117 |         #>>> t = tz.tzlocal()
118 |         #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
119 |         #'BRDT'
120 |         #>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
121 |         #'BRST'
122 |         #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
123 |         #'BRST'
124 |         #>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
125 |         #'BRDT'
126 |         #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
127 |         #'BRDT'
128 |         #
129 |         # Here is a more stable implementation:
130 |         #
131 |         timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
132 |                      + dt.hour * 3600
133 |                      + dt.minute * 60
134 |                      + dt.second)
135 |         return time.localtime(timestamp+time.timezone).tm_isdst
136 | 
137 |     def __eq__(self, other):
138 |         if not isinstance(other, tzlocal):
139 |             return False
140 |         return (self._std_offset == other._std_offset and
141 |                 self._dst_offset == other._dst_offset)
142 |         return True
143 | 
144 |     def __ne__(self, other):
145 |         return not self.__eq__(other)
146 | 
147 |     def __repr__(self):
148 |         return "%s()" % self.__class__.__name__
149 | 
150 |     __reduce__ = object.__reduce__
151 | 
152 | class _ttinfo(object):
153 |     __slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
154 | 
155 |     def __init__(self):
156 |         for attr in self.__slots__:
157 |             setattr(self, attr, None)
158 | 
159 |     def __repr__(self):
160 |         l = []
161 |         for attr in self.__slots__:
162 |             value = getattr(self, attr)
163 |             if value is not None:
164 |                 l.append("%s=%s" % (attr, `value`))
165 |         return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
166 | 
167 |     def __eq__(self, other):
168 |         if not isinstance(other, _ttinfo):
169 |             return False
170 |         return (self.offset == other.offset and
171 |                 self.delta == other.delta and
172 |                 self.isdst == other.isdst and
173 |                 self.abbr == other.abbr and
174 |                 self.isstd == other.isstd and
175 |                 self.isgmt == other.isgmt)
176 | 
177 |     def __ne__(self, other):
178 |         return not self.__eq__(other)
179 | 
180 |     def __getstate__(self):
181 |         state = {}
182 |         for name in self.__slots__:
183 |             state[name] = getattr(self, name, None)
184 |         return state
185 | 
186 |     def __setstate__(self, state):
187 |         for name in self.__slots__:
188 |             if name in state:
189 |                 setattr(self, name, state[name])
190 | 
191 | class tzfile(datetime.tzinfo):
192 | 
193 |     # http://www.twinsun.com/tz/tz-link.htm
194 |     # ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
195 |     
196 |     def __init__(self, fileobj):
197 |         if isinstance(fileobj, basestring):
198 |             self._filename = fileobj
199 |             fileobj = open(fileobj)
200 |         elif hasattr(fileobj, "name"):
201 |             self._filename = fileobj.name
202 |         else:
203 |             self._filename = `fileobj`
204 | 
205 |         # From tzfile(5):
206 |         #
207 |         # The time zone information files used by tzset(3)
208 |         # begin with the magic characters "TZif" to identify
209 |         # them as time zone information files, followed by
210 |         # sixteen bytes reserved for future use, followed by
211 |         # six four-byte values of type long, written in a
212 |         # ``standard'' byte order (the high-order  byte
213 |         # of the value is written first).
214 | 
215 |         if fileobj.read(4) != "TZif":
216 |             raise ValueError, "magic not found"
217 | 
218 |         fileobj.read(16)
219 | 
220 |         (
221 |          # The number of UTC/local indicators stored in the file.
222 |          ttisgmtcnt,
223 | 
224 |          # The number of standard/wall indicators stored in the file.
225 |          ttisstdcnt,
226 |          
227 |          # The number of leap seconds for which data is
228 |          # stored in the file.
229 |          leapcnt,
230 | 
231 |          # The number of "transition times" for which data
232 |          # is stored in the file.
233 |          timecnt,
234 | 
235 |          # The number of "local time types" for which data
236 |          # is stored in the file (must not be zero).
237 |          typecnt,
238 | 
239 |          # The  number  of  characters  of "time zone
240 |          # abbreviation strings" stored in the file.
241 |          charcnt,
242 | 
243 |         ) = struct.unpack(">6l", fileobj.read(24))
244 | 
245 |         # The above header is followed by tzh_timecnt four-byte
246 |         # values  of  type long,  sorted  in ascending order.
247 |         # These values are written in ``standard'' byte order.
248 |         # Each is used as a transition time (as  returned  by
249 |         # time(2)) at which the rules for computing local time
250 |         # change.
251 | 
252 |         if timecnt:
253 |             self._trans_list = struct.unpack(">%dl" % timecnt,
254 |                                              fileobj.read(timecnt*4))
255 |         else:
256 |             self._trans_list = []
257 | 
258 |         # Next come tzh_timecnt one-byte values of type unsigned
259 |         # char; each one tells which of the different types of
260 |         # ``local time'' types described in the file is associated
261 |         # with the same-indexed transition time. These values
262 |         # serve as indices into an array of ttinfo structures that
263 |         # appears next in the file.
264 |         
265 |         if timecnt:
266 |             self._trans_idx = struct.unpack(">%dB" % timecnt,
267 |                                             fileobj.read(timecnt))
268 |         else:
269 |             self._trans_idx = []
270 |         
271 |         # Each ttinfo structure is written as a four-byte value
272 |         # for tt_gmtoff  of  type long,  in  a  standard  byte
273 |         # order, followed  by a one-byte value for tt_isdst
274 |         # and a one-byte  value  for  tt_abbrind.   In  each
275 |         # structure, tt_gmtoff  gives  the  number  of
276 |         # seconds to be added to UTC, tt_isdst tells whether
277 |         # tm_isdst should be set by  localtime(3),  and
278 |         # tt_abbrind serves  as an index into the array of
279 |         # time zone abbreviation characters that follow the
280 |         # ttinfo structure(s) in the file.
281 | 
282 |         ttinfo = []
283 | 
284 |         for i in range(typecnt):
285 |             ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
286 | 
287 |         abbr = fileobj.read(charcnt)
288 | 
289 |         # Then there are tzh_leapcnt pairs of four-byte
290 |         # values, written in  standard byte  order;  the
291 |         # first  value  of  each pair gives the time (as
292 |         # returned by time(2)) at which a leap second
293 |         # occurs;  the  second  gives the  total  number of
294 |         # leap seconds to be applied after the given time.
295 |         # The pairs of values are sorted in ascending order
296 |         # by time.
297 | 
298 |         # Not used, for now
299 |         if leapcnt:
300 |             leap = struct.unpack(">%dl" % (leapcnt*2),
301 |                                  fileobj.read(leapcnt*8))
302 | 
303 |         # Then there are tzh_ttisstdcnt standard/wall
304 |         # indicators, each stored as a one-byte value;
305 |         # they tell whether the transition times associated
306 |         # with local time types were specified as standard
307 |         # time or wall clock time, and are used when
308 |         # a time zone file is used in handling POSIX-style
309 |         # time zone environment variables.
310 | 
311 |         if ttisstdcnt:
312 |             isstd = struct.unpack(">%db" % ttisstdcnt,
313 |                                   fileobj.read(ttisstdcnt))
314 | 
315 |         # Finally, there are tzh_ttisgmtcnt UTC/local
316 |         # indicators, each stored as a one-byte value;
317 |         # they tell whether the transition times associated
318 |         # with local time types were specified as UTC or
319 |         # local time, and are used when a time zone file
320 |         # is used in handling POSIX-style time zone envi-
321 |         # ronment variables.
322 | 
323 |         if ttisgmtcnt:
324 |             isgmt = struct.unpack(">%db" % ttisgmtcnt,
325 |                                   fileobj.read(ttisgmtcnt))
326 | 
327 |         # ** Everything has been read **
328 | 
329 |         # Build ttinfo list
330 |         self._ttinfo_list = []
331 |         for i in range(typecnt):
332 |             gmtoff, isdst, abbrind =  ttinfo[i]
333 |             # Round to full-minutes if that's not the case. Python's
334 |             # datetime doesn't accept sub-minute timezones. Check
335 |             # http://python.org/sf/1447945 for some information.
336 |             gmtoff = (gmtoff+30)//60*60
337 |             tti = _ttinfo()
338 |             tti.offset = gmtoff
339 |             tti.delta = datetime.timedelta(seconds=gmtoff)
340 |             tti.isdst = isdst
341 |             tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
342 |             tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
343 |             tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
344 |             self._ttinfo_list.append(tti)
345 | 
346 |         # Replace ttinfo indexes for ttinfo objects.
347 |         trans_idx = []
348 |         for idx in self._trans_idx:
349 |             trans_idx.append(self._ttinfo_list[idx])
350 |         self._trans_idx = tuple(trans_idx)
351 | 
352 |         # Set standard, dst, and before ttinfos. before will be
353 |         # used when a given time is before any transitions,
354 |         # and will be set to the first non-dst ttinfo, or to
355 |         # the first dst, if all of them are dst.
356 |         self._ttinfo_std = None
357 |         self._ttinfo_dst = None
358 |         self._ttinfo_before = None
359 |         if self._ttinfo_list:
360 |             if not self._trans_list:
361 |                 self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
362 |             else:
363 |                 for i in range(timecnt-1,-1,-1):
364 |                     tti = self._trans_idx[i]
365 |                     if not self._ttinfo_std and not tti.isdst:
366 |                         self._ttinfo_std = tti
367 |                     elif not self._ttinfo_dst and tti.isdst:
368 |                         self._ttinfo_dst = tti
369 |                     if self._ttinfo_std and self._ttinfo_dst:
370 |                         break
371 |                 else:
372 |                     if self._ttinfo_dst and not self._ttinfo_std:
373 |                         self._ttinfo_std = self._ttinfo_dst
374 | 
375 |                 for tti in self._ttinfo_list:
376 |                     if not tti.isdst:
377 |                         self._ttinfo_before = tti
378 |                         break
379 |                 else:
380 |                     self._ttinfo_before = self._ttinfo_list[0]
381 | 
382 |         # Now fix transition times to become relative to wall time.
383 |         #
384 |         # I'm not sure about this. In my tests, the tz source file
385 |         # is setup to wall time, and in the binary file isstd and
386 |         # isgmt are off, so it should be in wall time. OTOH, it's
387 |         # always in gmt time. Let me know if you have comments
388 |         # about this.
389 |         laststdoffset = 0
390 |         self._trans_list = list(self._trans_list)
391 |         for i in range(len(self._trans_list)):
392 |             tti = self._trans_idx[i]
393 |             if not tti.isdst:
394 |                 # This is std time.
395 |                 self._trans_list[i] += tti.offset
396 |                 laststdoffset = tti.offset
397 |             else:
398 |                 # This is dst time. Convert to std.
399 |                 self._trans_list[i] += laststdoffset
400 |         self._trans_list = tuple(self._trans_list)
401 | 
402 |     def _find_ttinfo(self, dt, laststd=0):
403 |         timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
404 |                      + dt.hour * 3600
405 |                      + dt.minute * 60
406 |                      + dt.second)
407 |         idx = 0
408 |         for trans in self._trans_list:
409 |             if timestamp < trans:
410 |                 break
411 |             idx += 1
412 |         else:
413 |             return self._ttinfo_std
414 |         if idx == 0:
415 |             return self._ttinfo_before
416 |         if laststd:
417 |             while idx > 0:
418 |                 tti = self._trans_idx[idx-1]
419 |                 if not tti.isdst:
420 |                     return tti
421 |                 idx -= 1
422 |             else:
423 |                 return self._ttinfo_std
424 |         else:
425 |             return self._trans_idx[idx-1]
426 | 
427 |     def utcoffset(self, dt):
428 |         if not self._ttinfo_std:
429 |             return ZERO
430 |         return self._find_ttinfo(dt).delta
431 | 
432 |     def dst(self, dt):
433 |         if not self._ttinfo_dst:
434 |             return ZERO
435 |         tti = self._find_ttinfo(dt)
436 |         if not tti.isdst:
437 |             return ZERO
438 | 
439 |         # The documentation says that utcoffset()-dst() must
440 |         # be constant for every dt.
441 |         return tti.delta-self._find_ttinfo(dt, laststd=1).delta
442 | 
443 |         # An alternative for that would be:
444 |         #
445 |         # return self._ttinfo_dst.offset-self._ttinfo_std.offset
446 |         #
447 |         # However, this class stores historical changes in the
448 |         # dst offset, so I belive that this wouldn't be the right
449 |         # way to implement this.
450 |         
451 |     def tzname(self, dt):
452 |         if not self._ttinfo_std:
453 |             return None
454 |         return self._find_ttinfo(dt).abbr
455 | 
456 |     def __eq__(self, other):
457 |         if not isinstance(other, tzfile):
458 |             return False
459 |         return (self._trans_list == other._trans_list and
460 |                 self._trans_idx == other._trans_idx and
461 |                 self._ttinfo_list == other._ttinfo_list)
462 | 
463 |     def __ne__(self, other):
464 |         return not self.__eq__(other)
465 | 
466 | 
467 |     def __repr__(self):
468 |         return "%s(%s)" % (self.__class__.__name__, `self._filename`)
469 | 
470 |     def __reduce__(self):
471 |         if not os.path.isfile(self._filename):
472 |             raise ValueError, "Unpickable %s class" % self.__class__.__name__
473 |         return (self.__class__, (self._filename,))
474 | 
475 | class tzrange(datetime.tzinfo):
476 | 
477 |     def __init__(self, stdabbr, stdoffset=None,
478 |                  dstabbr=None, dstoffset=None,
479 |                  start=None, end=None):
480 |         global relativedelta
481 |         if not relativedelta:
482 |             from dateutil import relativedelta
483 |         self._std_abbr = stdabbr
484 |         self._dst_abbr = dstabbr
485 |         if stdoffset is not None:
486 |             self._std_offset = datetime.timedelta(seconds=stdoffset)
487 |         else:
488 |             self._std_offset = ZERO
489 |         if dstoffset is not None:
490 |             self._dst_offset = datetime.timedelta(seconds=dstoffset)
491 |         elif dstabbr and stdoffset is not None:
492 |             self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
493 |         else:
494 |             self._dst_offset = ZERO
495 |         if dstabbr and start is None:
496 |             self._start_delta = relativedelta.relativedelta(
497 |                     hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
498 |         else:
499 |             self._start_delta = start
500 |         if dstabbr and end is None:
501 |             self._end_delta = relativedelta.relativedelta(
502 |                     hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
503 |         else:
504 |             self._end_delta = end
505 | 
506 |     def utcoffset(self, dt):
507 |         if self._isdst(dt):
508 |             return self._dst_offset
509 |         else:
510 |             return self._std_offset
511 | 
512 |     def dst(self, dt):
513 |         if self._isdst(dt):
514 |             return self._dst_offset-self._std_offset
515 |         else:
516 |             return ZERO
517 | 
518 |     def tzname(self, dt):
519 |         if self._isdst(dt):
520 |             return self._dst_abbr
521 |         else:
522 |             return self._std_abbr
523 | 
524 |     def _isdst(self, dt):
525 |         if not self._start_delta:
526 |             return False
527 |         year = datetime.datetime(dt.year,1,1)
528 |         start = year+self._start_delta
529 |         end = year+self._end_delta
530 |         dt = dt.replace(tzinfo=None)
531 |         if start < end:
532 |             return dt >= start and dt < end
533 |         else:
534 |             return dt >= start or dt < end
535 | 
536 |     def __eq__(self, other):
537 |         if not isinstance(other, tzrange):
538 |             return False
539 |         return (self._std_abbr == other._std_abbr and
540 |                 self._dst_abbr == other._dst_abbr and
541 |                 self._std_offset == other._std_offset and
542 |                 self._dst_offset == other._dst_offset and
543 |                 self._start_delta == other._start_delta and
544 |                 self._end_delta == other._end_delta)
545 | 
546 |     def __ne__(self, other):
547 |         return not self.__eq__(other)
548 | 
549 |     def __repr__(self):
550 |         return "%s(...)" % self.__class__.__name__
551 | 
552 |     __reduce__ = object.__reduce__
553 | 
554 | class tzstr(tzrange):
555 |     
556 |     def __init__(self, s):
557 |         global parser
558 |         if not parser:
559 |             from dateutil import parser
560 |         self._s = s
561 | 
562 |         res = parser._parsetz(s)
563 |         if res is None:
564 |             raise ValueError, "unknown string format"
565 | 
566 |         # Here we break the compatibility with the TZ variable handling.
567 |         # GMT-3 actually *means* the timezone -3.
568 |         if res.stdabbr in ("GMT", "UTC"):
569 |             res.stdoffset *= -1
570 | 
571 |         # We must initialize it first, since _delta() needs
572 |         # _std_offset and _dst_offset set. Use False in start/end
573 |         # to avoid building it two times.
574 |         tzrange.__init__(self, res.stdabbr, res.stdoffset,
575 |                          res.dstabbr, res.dstoffset,
576 |                          start=False, end=False)
577 | 
578 |         if not res.dstabbr:
579 |             self._start_delta = None
580 |             self._end_delta = None
581 |         else:
582 |             self._start_delta = self._delta(res.start)
583 |             if self._start_delta:
584 |                 self._end_delta = self._delta(res.end, isend=1)
585 | 
586 |     def _delta(self, x, isend=0):
587 |         kwargs = {}
588 |         if x.month is not None:
589 |             kwargs["month"] = x.month
590 |             if x.weekday is not None:
591 |                 kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
592 |                 if x.week > 0:
593 |                     kwargs["day"] = 1
594 |                 else:
595 |                     kwargs["day"] = 31
596 |             elif x.day:
597 |                 kwargs["day"] = x.day
598 |         elif x.yday is not None:
599 |             kwargs["yearday"] = x.yday
600 |         elif x.jyday is not None:
601 |             kwargs["nlyearday"] = x.jyday
602 |         if not kwargs:
603 |             # Default is to start on first sunday of april, and end
604 |             # on last sunday of october.
605 |             if not isend:
606 |                 kwargs["month"] = 4
607 |                 kwargs["day"] = 1
608 |                 kwargs["weekday"] = relativedelta.SU(+1)
609 |             else:
610 |                 kwargs["month"] = 10
611 |                 kwargs["day"] = 31
612 |                 kwargs["weekday"] = relativedelta.SU(-1)
613 |         if x.time is not None:
614 |             kwargs["seconds"] = x.time
615 |         else:
616 |             # Default is 2AM.
617 |             kwargs["seconds"] = 7200
618 |         if isend:
619 |             # Convert to standard time, to follow the documented way
620 |             # of working with the extra hour. See the documentation
621 |             # of the tzinfo class.
622 |             delta = self._dst_offset-self._std_offset
623 |             kwargs["seconds"] -= delta.seconds+delta.days*86400
624 |         return relativedelta.relativedelta(**kwargs)
625 | 
626 |     def __repr__(self):
627 |         return "%s(%s)" % (self.__class__.__name__, `self._s`)
628 | 
629 | class _tzicalvtzcomp:
630 |     def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
631 |                        tzname=None, rrule=None):
632 |         self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
633 |         self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
634 |         self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
635 |         self.isdst = isdst
636 |         self.tzname = tzname
637 |         self.rrule = rrule
638 | 
639 | class _tzicalvtz(datetime.tzinfo):
640 |     def __init__(self, tzid, comps=[]):
641 |         self._tzid = tzid
642 |         self._comps = comps
643 |         self._cachedate = []
644 |         self._cachecomp = []
645 | 
646 |     def _find_comp(self, dt):
647 |         if len(self._comps) == 1:
648 |             return self._comps[0]
649 |         dt = dt.replace(tzinfo=None)
650 |         try:
651 |             return self._cachecomp[self._cachedate.index(dt)]
652 |         except ValueError:
653 |             pass
654 |         lastcomp = None
655 |         lastcompdt = None
656 |         for comp in self._comps:
657 |             if not comp.isdst:
658 |                 # Handle the extra hour in DST -> STD
659 |                 compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
660 |             else:
661 |                 compdt = comp.rrule.before(dt, inc=True)
662 |             if compdt and (not lastcompdt or lastcompdt < compdt):
663 |                 lastcompdt = compdt
664 |                 lastcomp = comp
665 |         if not lastcomp:
666 |             # RFC says nothing about what to do when a given
667 |             # time is before the first onset date. We'll look for the
668 |             # first standard component, or the first component, if
669 |             # none is found.
670 |             for comp in self._comps:
671 |                 if not comp.isdst:
672 |                     lastcomp = comp
673 |                     break
674 |             else:
675 |                 lastcomp = comp[0]
676 |         self._cachedate.insert(0, dt)
677 |         self._cachecomp.insert(0, lastcomp)
678 |         if len(self._cachedate) > 10:
679 |             self._cachedate.pop()
680 |             self._cachecomp.pop()
681 |         return lastcomp
682 | 
683 |     def utcoffset(self, dt):
684 |         return self._find_comp(dt).tzoffsetto
685 | 
686 |     def dst(self, dt):
687 |         comp = self._find_comp(dt)
688 |         if comp.isdst:
689 |             return comp.tzoffsetdiff
690 |         else:
691 |             return ZERO
692 | 
693 |     def tzname(self, dt):
694 |         return self._find_comp(dt).tzname
695 | 
696 |     def __repr__(self):
697 |         return "<tzicalvtz %s>" % `self._tzid`
698 | 
699 |     __reduce__ = object.__reduce__
700 | 
701 | class tzical:
702 |     def __init__(self, fileobj):
703 |         global rrule
704 |         if not rrule:
705 |             from dateutil import rrule
706 | 
707 |         if isinstance(fileobj, basestring):
708 |             self._s = fileobj
709 |             fileobj = open(fileobj)
710 |         elif hasattr(fileobj, "name"):
711 |             self._s = fileobj.name
712 |         else:
713 |             self._s = `fileobj`
714 | 
715 |         self._vtz = {}
716 | 
717 |         self._parse_rfc(fileobj.read())
718 | 
719 |     def keys(self):
720 |         return self._vtz.keys()
721 | 
722 |     def get(self, tzid=None):
723 |         if tzid is None:
724 |             keys = self._vtz.keys()
725 |             if len(keys) == 0:
726 |                 raise ValueError, "no timezones defined"
727 |             elif len(keys) > 1:
728 |                 raise ValueError, "more than one timezone available"
729 |             tzid = keys[0]
730 |         return self._vtz.get(tzid)
731 | 
732 |     def _parse_offset(self, s):
733 |         s = s.strip()
734 |         if not s:
735 |             raise ValueError, "empty offset"
736 |         if s[0] in ('+', '-'):
737 |             signal = (-1,+1)[s[0]=='+']
738 |             s = s[1:]
739 |         else:
740 |             signal = +1
741 |         if len(s) == 4:
742 |             return (int(s[:2])*3600+int(s[2:])*60)*signal
743 |         elif len(s) == 6:
744 |             return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
745 |         else:
746 |             raise ValueError, "invalid offset: "+s
747 | 
748 |     def _parse_rfc(self, s):
749 |         lines = s.splitlines()
750 |         if not lines:
751 |             raise ValueError, "empty string"
752 | 
753 |         # Unfold
754 |         i = 0
755 |         while i < len(lines):
756 |             line = lines[i].rstrip()
757 |             if not line:
758 |                 del lines[i]
759 |             elif i > 0 and line[0] == " ":
760 |                 lines[i-1] += line[1:]
761 |                 del lines[i]
762 |             else:
763 |                 i += 1
764 | 
765 |         tzid = None
766 |         comps = []
767 |         invtz = False
768 |         comptype = None
769 |         for line in lines:
770 |             if not line:
771 |                 continue
772 |             name, value = line.split(':', 1)
773 |             parms = name.split(';')
774 |             if not parms:
775 |                 raise ValueError, "empty property name"
776 |             name = parms[0].upper()
777 |             parms = parms[1:]
778 |             if invtz:
779 |                 if name == "BEGIN":
780 |                     if value in ("STANDARD", "DAYLIGHT"):
781 |                         # Process component
782 |                         pass
783 |                     else:
784 |                         raise ValueError, "unknown component: "+value
785 |                     comptype = value
786 |                     founddtstart = False
787 |                     tzoffsetfrom = None
788 |                     tzoffsetto = None
789 |                     rrulelines = []
790 |                     tzname = None
791 |                 elif name == "END":
792 |                     if value == "VTIMEZONE":
793 |                         if comptype:
794 |                             raise ValueError, \
795 |                                   "component not closed: "+comptype
796 |                         if not tzid:
797 |                             raise ValueError, \
798 |                                   "mandatory TZID not found"
799 |                         if not comps:
800 |                             raise ValueError, \
801 |                                   "at least one component is needed"
802 |                         # Process vtimezone
803 |                         self._vtz[tzid] = _tzicalvtz(tzid, comps)
804 |                         invtz = False
805 |                     elif value == comptype:
806 |                         if not founddtstart:
807 |                             raise ValueError, \
808 |                                   "mandatory DTSTART not found"
809 |                         if tzoffsetfrom is None:
810 |                             raise ValueError, \
811 |                                   "mandatory TZOFFSETFROM not found"
812 |                         if tzoffsetto is None:
813 |                             raise ValueError, \
814 |                                   "mandatory TZOFFSETFROM not found"
815 |                         # Process component
816 |                         rr = None
817 |                         if rrulelines:
818 |                             rr = rrule.rrulestr("\n".join(rrulelines),
819 |                                                 compatible=True,
820 |                                                 ignoretz=True,
821 |                                                 cache=True)
822 |                         comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
823 |                                               (comptype == "DAYLIGHT"),
824 |                                               tzname, rr)
825 |                         comps.append(comp)
826 |                         comptype = None
827 |                     else:
828 |                         raise ValueError, \
829 |                               "invalid component end: "+value
830 |                 elif comptype:
831 |                     if name == "DTSTART":
832 |                         rrulelines.append(line)
833 |                         founddtstart = True
834 |                     elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
835 |                         rrulelines.append(line)
836 |                     elif name == "TZOFFSETFROM":
837 |                         if parms:
838 |                             raise ValueError, \
839 |                                   "unsupported %s parm: %s "%(name, parms[0])
840 |                         tzoffsetfrom = self._parse_offset(value)
841 |                     elif name == "TZOFFSETTO":
842 |                         if parms:
843 |                             raise ValueError, \
844 |                                   "unsupported TZOFFSETTO parm: "+parms[0]
845 |                         tzoffsetto = self._parse_offset(value)
846 |                     elif name == "TZNAME":
847 |                         if parms:
848 |                             raise ValueError, \
849 |                                   "unsupported TZNAME parm: "+parms[0]
850 |                         tzname = value
851 |                     elif name == "COMMENT":
852 |                         pass
853 |                     else:
854 |                         raise ValueError, "unsupported property: "+name
855 |                 else:
856 |                     if name == "TZID":
857 |                         if parms:
858 |                             raise ValueError, \
859 |                                   "unsupported TZID parm: "+parms[0]
860 |                         tzid = value
861 |                     elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
862 |                         pass
863 |                     else:
864 |                         raise ValueError, "unsupported property: "+name
865 |             elif name == "BEGIN" and value == "VTIMEZONE":
866 |                 tzid = None
867 |                 comps = []
868 |                 invtz = True
869 | 
870 |     def __repr__(self):
871 |         return "%s(%s)" % (self.__class__.__name__, `self._s`)
872 | 
873 | if sys.platform != "win32":
874 |     TZFILES = ["/etc/localtime", "localtime"]
875 |     TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
876 | else:
877 |     TZFILES = []
878 |     TZPATHS = []
879 | 
880 | def gettz(name=None):
881 |     tz = None
882 |     if not name:
883 |         try:
884 |             name = os.environ["TZ"]
885 |         except KeyError:
886 |             pass
887 |     if name is None or name == ":":
888 |         for filepath in TZFILES:
889 |             if not os.path.isabs(filepath):
890 |                 filename = filepath
891 |                 for path in TZPATHS:
892 |                     filepath = os.path.join(path, filename)
893 |                     if os.path.isfile(filepath):
894 |                         break
895 |                 else:
896 |                     continue
897 |             if os.path.isfile(filepath):
898 |                 try:
899 |                     tz = tzfile(filepath)
900 |                     break
901 |                 except (IOError, OSError, ValueError):
902 |                     pass
903 |         else:
904 |             tz = tzlocal()
905 |     else:
906 |         if name.startswith(":"):
907 |             name = name[:-1]
908 |         if os.path.isabs(name):
909 |             if os.path.isfile(name):
910 |                 tz = tzfile(name)
911 |             else:
912 |                 tz = None
913 |         else:
914 |             for path in TZPATHS:
915 |                 filepath = os.path.join(path, name)
916 |                 if not os.path.isfile(filepath):
917 |                     filepath = filepath.replace(' ','_')
918 |                     if not os.path.isfile(filepath):
919 |                         continue
920 |                 try:
921 |                     tz = tzfile(filepath)
922 |                     break
923 |                 except (IOError, OSError, ValueError):
924 |                     pass
925 |             else:
926 |                 tz = None
927 |                 if tzwin:
928 |                     try:
929 |                         tz = tzwin(name)
930 |                     except OSError:
931 |                         pass
932 |                 if not tz:
933 |                     from dateutil.zoneinfo import gettz
934 |                     tz = gettz(name)
935 |                 if not tz:
936 |                     for c in name:
937 |                         # name must have at least one offset to be a tzstr
938 |                         if c in "0123456789":
939 |                             try:
940 |                                 tz = tzstr(name)
941 |                             except ValueError:
942 |                                 pass
943 |                             break
944 |                     else:
945 |                         if name in ("GMT", "UTC"):
946 |                             tz = tzutc()
947 |                         elif name in time.tzname:
948 |                             tz = tzlocal()
949 |     return tz
950 | 
951 | # vim:ts=4:sw=4:et
952 | 


--------------------------------------------------------------------------------
/dateutil/tzwin.py:
--------------------------------------------------------------------------------
  1 | # This code was originally contributed by Jeffrey Harris.
  2 | import datetime
  3 | import struct
  4 | import _winreg
  5 | 
  6 | __author__ = "Jeffrey Harris & Gustavo Niemeyer <gustavo@niemeyer.net>"
  7 | 
  8 | __all__ = ["tzwin", "tzwinlocal"]
  9 | 
 10 | ONEWEEK = datetime.timedelta(7)
 11 | 
 12 | TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
 13 | TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
 14 | TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
 15 | 
 16 | def _settzkeyname():
 17 |     global TZKEYNAME
 18 |     handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
 19 |     try:
 20 |         _winreg.OpenKey(handle, TZKEYNAMENT).Close()
 21 |         TZKEYNAME = TZKEYNAMENT
 22 |     except WindowsError:
 23 |         TZKEYNAME = TZKEYNAME9X
 24 |     handle.Close()
 25 | 
 26 | _settzkeyname()
 27 | 
 28 | class tzwinbase(datetime.tzinfo):
 29 |     """tzinfo class based on win32's timezones available in the registry."""
 30 | 
 31 |     def utcoffset(self, dt):
 32 |         if self._isdst(dt):
 33 |             return datetime.timedelta(minutes=self._dstoffset)
 34 |         else:
 35 |             return datetime.timedelta(minutes=self._stdoffset)
 36 | 
 37 |     def dst(self, dt):
 38 |         if self._isdst(dt):
 39 |             minutes = self._dstoffset - self._stdoffset
 40 |             return datetime.timedelta(minutes=minutes)
 41 |         else:
 42 |             return datetime.timedelta(0)
 43 |         
 44 |     def tzname(self, dt):
 45 |         if self._isdst(dt):
 46 |             return self._dstname
 47 |         else:
 48 |             return self._stdname
 49 | 
 50 |     def list():
 51 |         """Return a list of all time zones known to the system."""
 52 |         handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
 53 |         tzkey = _winreg.OpenKey(handle, TZKEYNAME)
 54 |         result = [_winreg.EnumKey(tzkey, i)
 55 |                   for i in range(_winreg.QueryInfoKey(tzkey)[0])]
 56 |         tzkey.Close()
 57 |         handle.Close()
 58 |         return result
 59 |     list = staticmethod(list)
 60 | 
 61 |     def display(self):
 62 |         return self._display
 63 |     
 64 |     def _isdst(self, dt):
 65 |         dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
 66 |                                self._dsthour, self._dstminute,
 67 |                                self._dstweeknumber)
 68 |         dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
 69 |                                 self._stdhour, self._stdminute,
 70 |                                 self._stdweeknumber)
 71 |         if dston < dstoff:
 72 |             return dston <= dt.replace(tzinfo=None) < dstoff
 73 |         else:
 74 |             return not dstoff <= dt.replace(tzinfo=None) < dston
 75 | 
 76 | 
 77 | class tzwin(tzwinbase):
 78 | 
 79 |     def __init__(self, name):
 80 |         self._name = name
 81 | 
 82 |         handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
 83 |         tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
 84 |         keydict = valuestodict(tzkey)
 85 |         tzkey.Close()
 86 |         handle.Close()
 87 | 
 88 |         self._stdname = keydict["Std"].encode("iso-8859-1")
 89 |         self._dstname = keydict["Dlt"].encode("iso-8859-1")
 90 | 
 91 |         self._display = keydict["Display"]
 92 |         
 93 |         # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
 94 |         tup = struct.unpack("=3l16h", keydict["TZI"])
 95 |         self._stdoffset = -tup[0]-tup[1]         # Bias + StandardBias * -1
 96 |         self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
 97 |         
 98 |         (self._stdmonth,
 99 |          self._stddayofweek,  # Sunday = 0
100 |          self._stdweeknumber, # Last = 5
101 |          self._stdhour,
102 |          self._stdminute) = tup[4:9]
103 | 
104 |         (self._dstmonth,
105 |          self._dstdayofweek,  # Sunday = 0
106 |          self._dstweeknumber, # Last = 5
107 |          self._dsthour,
108 |          self._dstminute) = tup[12:17]
109 | 
110 |     def __repr__(self):
111 |         return "tzwin(%s)" % repr(self._name)
112 | 
113 |     def __reduce__(self):
114 |         return (self.__class__, (self._name,))
115 | 
116 | 
117 | class tzwinlocal(tzwinbase):
118 |     
119 |     def __init__(self):
120 | 
121 |         handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
122 | 
123 |         tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
124 |         keydict = valuestodict(tzlocalkey)
125 |         tzlocalkey.Close()
126 | 
127 |         self._stdname = keydict["StandardName"].encode("iso-8859-1")
128 |         self._dstname = keydict["DaylightName"].encode("iso-8859-1")
129 | 
130 |         try:
131 |             tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
132 |             _keydict = valuestodict(tzkey)
133 |             self._display = _keydict["Display"]
134 |             tzkey.Close()
135 |         except OSError:
136 |             self._display = None
137 | 
138 |         handle.Close()
139 |         
140 |         self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
141 |         self._dstoffset = self._stdoffset-keydict["DaylightBias"]
142 | 
143 | 
144 |         # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
145 |         tup = struct.unpack("=8h", keydict["StandardStart"])
146 | 
147 |         (self._stdmonth,
148 |          self._stddayofweek,  # Sunday = 0
149 |          self._stdweeknumber, # Last = 5
150 |          self._stdhour,
151 |          self._stdminute) = tup[1:6]
152 | 
153 |         tup = struct.unpack("=8h", keydict["DaylightStart"])
154 | 
155 |         (self._dstmonth,
156 |          self._dstdayofweek,  # Sunday = 0
157 |          self._dstweeknumber, # Last = 5
158 |          self._dsthour,
159 |          self._dstminute) = tup[1:6]
160 | 
161 |     def __reduce__(self):
162 |         return (self.__class__, ())
163 | 
164 | def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
165 |     """dayofweek == 0 means Sunday, whichweek 5 means last instance"""
166 |     first = datetime.datetime(year, month, 1, hour, minute)
167 |     weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
168 |     for n in xrange(whichweek):
169 |         dt = weekdayone+(whichweek-n)*ONEWEEK
170 |         if dt.month == month:
171 |             return dt
172 | 
173 | def valuestodict(key):
174 |     """Convert a registry key's values to a dictionary."""
175 |     dict = {}
176 |     size = _winreg.QueryInfoKey(key)[1]
177 |     for i in range(size):
178 |         data = _winreg.EnumValue(key, i)
179 |         dict[data[0]] = data[1]
180 |     return dict
181 | 


--------------------------------------------------------------------------------
/dbobject.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Representing an object that can store to SQLite
  5 | #
  6 | #	2013-04-16	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | import logging
 10 | 
 11 | from sqlite import SQLite
 12 | 
 13 | 
 14 | class DBObject (object):
 15 | 	""" A superclass for objects that can dehydrate to and hydrate from SQLite.
 16 | 	
 17 | 	Very crude and basic for the time being, but still takes away much of the
 18 | 	cruft for subclasses.
 19 | 	"""
 20 | 	
 21 | 	sqlite_default_db = 'databases/storage.db'
 22 | 	sqlite_handle = None
 23 | 	sqlite_must_commit = False
 24 | 	
 25 | 	table_name = None
 26 | 	table_key = None
 27 | 	
 28 | 	def __init__(self):
 29 | 		self.id = None
 30 | 		self.hydrated = False
 31 | 	
 32 | 	
 33 | 	# -------------------------------------------------------------------------- Dehydration
 34 | 	def should_insert(self):
 35 | 		""" Return True if the receiver should be inserted (i.e. is not already
 36 | 		in the db). """
 37 | 		return False
 38 | 	
 39 | 	def will_insert(self):
 40 | 		""" Called before the insert query is performed, you can use this as a
 41 | 		hook. """
 42 | 		pass
 43 | 	
 44 | 	def insert_tuple(self):
 45 | 		""" Cheap solution for now: return the INSERT sql as first and a list
 46 | 		of values as second object. """
 47 | 		return None, None
 48 | 	
 49 | 	def did_insert(self):
 50 | 		pass
 51 | 	
 52 | 	def insert(self):
 53 | 		""" Runs an INSERT query for the receiver.
 54 | 		This method will not check with "should_insert()"! """
 55 | 		self.will_insert()
 56 | 		
 57 | 		sql, params = self.insert_tuple()
 58 | 		if sql is None or params is None:
 59 | 			return False
 60 | 		
 61 | 		cls = self.__class__
 62 | 		cls.sqlite_assure_handle()
 63 | 		self.id = cls.sqlite_handle.executeInsert(sql, params)
 64 | 		cls.sqlite_must_commit = True
 65 | 		self.did_insert()
 66 | 		
 67 | 		return True
 68 | 	
 69 | 	
 70 | 	def should_update(self):
 71 | 		return True
 72 | 	
 73 | 	def update_tuple(self):
 74 | 		""" Cheap solution for now: return the UPDATE sql as first and a list
 75 | 		of values as second object. """
 76 | 		return None, None
 77 | 	
 78 | 	def update(self):
 79 | 		""" Runs the UPDATE query on the receiver. """
 80 | 		
 81 | 		sql, params = self.update_tuple()
 82 | 		if sql is None or params is None:
 83 | 			return False
 84 | 		
 85 | 		cls = self.__class__
 86 | 		cls.sqlite_assure_handle()
 87 | 		if cls.sqlite_handle.execute(sql, params):
 88 | 			cls.sqlite_must_commit = True
 89 | 			self.hydrated = True
 90 | 			return True
 91 | 		
 92 | 		return False
 93 | 	
 94 | 	def did_store(self):
 95 | 		""" Called after a successful call to self.store(). """
 96 | 		pass
 97 | 	
 98 | 	def store(self):
 99 | 		""" Stores the receiver's data to SQLite. You must MANUALLY COMMIT!
100 | 		"""
101 | 		
102 | 		# do we need to insert first?
103 | 		if self.should_insert() and not self.insert():
104 | 			logging.warning("Failed to INSERT %s" % self)
105 | 		
106 | 		# perform the update
107 | 		if self.should_update() and not self.update():
108 | 			logging.warning("Failed to UPDATE %s" % self)
109 | 			return False
110 | 		
111 | 		self.did_store()
112 | 		return True
113 | 	
114 | 	
115 | 	# -------------------------------------------------------------------------- Hydration
116 | 	def load(self, force=False):
117 | 		""" Hydrate from database. """
118 | 		pass
119 | 	
120 | 	def from_db(self, data):
121 | 		""" Fill from an SQLite-retrieved list. """
122 | 		pass
123 | 	
124 | 	
125 | 	# -------------------------------------------------------------------------- SQLite Methods
126 | 	def sqlite_execute(self, sql, params):
127 | 		""" Executes the given SQL statement with the given parameters.
128 | 		Returns True on success, False otherwise. """
129 | 		
130 | 		cls = self.__class__
131 | 		cls.sqlite_assure_handle()
132 | 		if cls.sqlite_handle.execute(sql, params):
133 | 			cls.sqlite_must_commit = True
134 | 			self.hydrated = True
135 | 			return True
136 | 		
137 | 		return False
138 | 	
139 | 	@classmethod
140 | 	def sqlite_select(cls, sql, params):
141 | 		""" Executes the SQL statement and returns the response. You can use
142 | 		this method in an iterator. """
143 | 		
144 | 		cls.sqlite_assure_handle()
145 | 		return cls.sqlite_handle.execute(sql, params)
146 | 	
147 | 	@classmethod
148 | 	def sqlite_select_one(cls, sql, params):
149 | 		""" Executes the SQL statement and returns the first response row.
150 | 		"""
151 | 		
152 | 		cls.sqlite_assure_handle()
153 | 		return cls.sqlite_handle.executeOne(sql, params)
154 | 	
155 | 	@classmethod
156 | 	def add_index(cls, table_column):
157 | 		""" Adds an index for the given table column if there is none.
158 | 		"""
159 | 		if table_column is None:
160 | 			return
161 | 		
162 | 		cls.sqlite_assure_handle()
163 | 		idx_name = "%s_index" % table_column
164 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS %s ON %s (%s)" % (idx_name, cls.table_name, table_column))
165 | 	
166 | 	
167 | 	# -------------------------------------------------------------------------- Class Methods
168 | 	@classmethod
169 | 	def sqlite_assure_handle(cls):
170 | 		if cls.sqlite_handle is None:
171 | 			cls.sqlite_handle = SQLite.get(cls.sqlite_default_db)
172 | 	
173 | 	@classmethod
174 | 	def sqlite_release_handle(cls):
175 | 		cls.sqlite_handle = None
176 | 	
177 | 	@classmethod
178 | 	def sqlite_commit_if_needed(cls):
179 | 		""" Commits to SQLite if the flag had been set. """
180 | 		if cls.sqlite_handle is None:
181 | 			return
182 | 		
183 | 		if cls.sqlite_must_commit:
184 | 			cls.sqlite_must_commit = False
185 | 			cls.sqlite_handle.commit()
186 | 	
187 | 	
188 | 	# -------------------------------------------------------------------------- Table Setup
189 | 	@classmethod
190 | 	def table_structure(cls):
191 | 		""" Return the table structure here. """
192 | 		return None
193 | 	
194 | 	@classmethod
195 | 	def setup_tables(cls, db_path=None):
196 | 		if db_path is not None:
197 | 			cls.sqlite_default_db = db_path
198 | 		
199 | 		struct = cls.table_structure()
200 | 		if struct is None:
201 | 			return False
202 | 		
203 | 		cls.sqlite_assure_handle()
204 | 		if cls.sqlite_handle.create(cls.table_name, struct):
205 | 			cls.did_setup_tables(db_path)
206 | 	
207 | 	@classmethod
208 | 	def did_setup_tables(cls, db_path):
209 | 		pass
210 | 	
211 | 	# call the table setup to be sure it was set up
212 | 	# SubClass.setup_tables()
213 | 	
214 | 	
215 | 


--------------------------------------------------------------------------------
/files.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | 
 4 | def get_timestamp():
 5 |     return datetime.now().strftime(("%Y-%m-%d %H:%M:%S"))
 6 | 
 7 | FILES = {
 8 |     "1": {
 9 |         "id": "1",
10 |         "title": "clinical_ax.txt",
11 |         "author": "Dr. John Doe",
12 |         "client_name": "Foo Bar",
13 |         "timestamp": get_timestamp()
14 |     },
15 |     "2": {
16 |         "id": "2",
17 |         "title": "clinical_review.txt",
18 |         "author": "Dr. Scooby Doo",
19 |         "client_name": "Foo Bar",
20 |         "timestamp": get_timestamp()
21 |     },
22 |     "3": {
23 |         "id": "3",
24 |         "title": "clinical_note.txt",
25 |         "author": "Dr. Donald Duck",
26 |         "client_name": "Foo Bar",
27 |         "timestamp": get_timestamp()
28 |     }
29 | }
30 | 
31 | def read():
32 | 
33 |     # Create the list of people from our data
34 |     return [FILES[key] for key in sorted(FILES.keys())]
35 | 


--------------------------------------------------------------------------------
/mngobject.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Superclass for objects stored in MongoDB
  5 | #
  6 | #	2013-07-10	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | import logging
 10 | import collections
 11 | 
 12 | from pymongo import MongoClient
 13 | 
 14 | 
 15 | class MNGObject (object):
 16 | 	""" Superclass for an object stored in a MongoDB collection. """
 17 | 	
 18 | 	def __init__(self, id=None):
 19 | 		self.id = id
 20 | 		self.doc = None
 21 | 		self.loaded = False
 22 | 	
 23 | 	
 24 | 	# -------------------------------------------------------------------------- MongoDB
 25 | 	database_uri = "mongodb://localhost:27017"
 26 | 	
 27 | 	# the MongoDB database may be 'None', in which case the default db will be
 28 | 	# used, and if that doesn't work it will fall back to use 'default'
 29 | 	database_name = None
 30 | 	
 31 | 	# the MongoDB collection that holds documents of this class
 32 | 	collection_name = None
 33 | 	
 34 | 	_collection = None
 35 | 	
 36 | 	@classmethod
 37 | 	def collection(cls):
 38 | 		""" Returns a Mongo Collection object, creating it if necessary. """
 39 | 		if cls._collection is None:
 40 | 			if not cls.collection_name:
 41 | 				raise Exception("No collection has been set for %s" % cls)
 42 | 			
 43 | 			client = MongoClient(cls.database_uri)
 44 | 			if cls.database_name is None:
 45 | 				try:
 46 | 					db = client.get_default_database()
 47 | 				except Exception as e:
 48 | 					logging.debug("Failed to get default database: %s" % e)
 49 | 					db = client['default']
 50 | 			else:
 51 | 				db = client[cls.database_name]
 52 | 			cls._collection = db[cls.collection_name]
 53 | 		
 54 | 		return cls._collection
 55 | 	
 56 | 	@classmethod
 57 | 	def test_connection(cls):
 58 | 		""" Tests the database by inserting, retrieving and deleting a document.
 59 | 		"""
 60 | 		old_coll = cls.collection_name
 61 | 		cls.collection_name = 'foo'
 62 | 		
 63 | 		obj = MNGObject()
 64 | 		obj.doc = {
 65 | 			'title': "This is a connection test document"
 66 | 		}
 67 | 		
 68 | 		ret = None
 69 | 		
 70 | 		# try storing
 71 | 		try:
 72 | 			obj.store()
 73 | 			
 74 | 			# try loading
 75 | 			sec = MNGObject(obj.id)
 76 | 			try:
 77 | 				sec.load()
 78 | 				
 79 | 				# compare titles
 80 | 				t1 = obj.doc.get('title') if obj.doc else None
 81 | 				t2 = sec.doc.get('title') if sec.doc else None
 82 | 				if t1 == t2:
 83 | 					
 84 | 					# try removing
 85 | 					try:
 86 | 						if not sec.remove():
 87 | 							raise Exception('failed to remove')
 88 | 					except Exception as e:
 89 | 						ret = "TEST FAILED with remove() exception: %s" % e
 90 | 				else:
 91 | 					ret = "TEST FAILED, insertion and retrieval do not match (%s != %s)" % (t1, t2)
 92 | 			except Exception as e:
 93 | 				ret = "TEST FAILED with load() exception: %s" % e
 94 | 		except Exception as e:
 95 | 			ret = "TEST FAILED with store() exception: %s" % e
 96 | 
 97 | 		
 98 | 		# clean up
 99 | 		try:
100 | 			cls._collection.drop()
101 | 			cls._collection = None
102 | 		except:
103 | 			logging.error("Failed to drop collection: %s" % e)
104 | 		
105 | 		cls.connection_name = old_coll
106 | 		
107 | 		return ret
108 | 	
109 | 	
110 | 	# -------------------------------------------------------------------------- Document Manipulation
111 | 	def ensure_doc_id(self):
112 | 		had_doc = True
113 | 		if self.doc is None:
114 | 			had_doc = False
115 | 			self.doc = {}
116 | 		
117 | 		if self.id:
118 | 			self.doc['_id'] = self.id
119 | 		elif had_doc:
120 | 			self.id = self.doc.get('_id')
121 | 			if self.id is None:
122 | 				self.id = self.doc.get('id')
123 | 				self.doc['_id'] = self.id
124 | 	
125 | 	def replace_with(self, json):
126 | 		""" Replaces the document tree with the given JSON tree.
127 | 		
128 | 		The document id is set from the receiver's id if it's there, otherwise
129 | 		it's being searched in the doc in this order:
130 | 		- if self.id is not None, the doc's "_id" will be set to self.id
131 | 		- if doc["_id"] is present, this becomes self.id
132 | 		- if doc["id"] is present, this becomes self.id and is set as the
133 | 		  docs "_id"
134 | 		"""
135 | 		if not self.loaded:
136 | 			self.load()
137 | 		
138 | 		self.doc = json
139 | 		self.loaded = True
140 | 		
141 | 		# set or update our id
142 | 		self.ensure_doc_id()
143 | 		self.did_update_doc()
144 | 	
145 | 	def update_with(self, json):
146 | 		""" Updates the document tree by merging it with the given JSON tree.
147 | 		
148 | 		The id of the document is automatically set in this order:
149 | 		- if self.id is not None, the doc's "_id" will be set to self.id
150 | 		- if doc["_id"] is present, this becomes self.id
151 | 		- if doc["id"] is present, this becomes self.id and is set as the
152 | 		  docs "_id"
153 | 		"""
154 | 		
155 | 		if not self.loaded:
156 | 			self.load()
157 | 		
158 | 		# set or update contents
159 | 		if self.doc is None:
160 | 			self.doc = json
161 | 		else:
162 | 			self.doc = deepUpdate(self.doc, json)
163 | 		self.loaded = True
164 | 		
165 | 		# set or update our id
166 | 		self.ensure_doc_id()
167 | 		self.did_update_doc()
168 | 	
169 | 	def did_update_doc(self):
170 | 		""" Called when self.doc has been changed programmatically (i.e. NOT
171 | 		after loading from database).
172 | 		
173 | 		You can call this manually if you directly assign self.doc and want
174 | 		this to trigger. The default implementation does nothing.
175 | 		"""
176 | 		pass
177 | 	
178 | 	def update_subtree(self, keypath, tree):
179 | 		assert False, "Not implemented"
180 | 	
181 | 	def replace_subtree(self, keypath, tree):
182 | 		""" replaces the existing tree at keypath with the new tree. """
183 | 		
184 | 		if not self.loaded and self.id:
185 | 			self.load()
186 | 		
187 | 		self.ensure_doc_id()
188 | 		self.doc = replaceSubtree(self.doc, keypath, tree)
189 | 		self.loaded = True
190 | 	
191 | 	
192 | 	# -------------------------------------------------------------------------- Dehydration
193 | 	def store(self, subtree=None):
194 | 		""" Stores the receiver's data to the collection, letting Mongo decide
195 | 		between an insert and an update.
196 | 		If "subtree" is not None, an update is forced only on the given subtree
197 | 		which should have the format: {'keypath': value}. """
198 | 		
199 | 		# throw up if there is no content and we're not saving a subtree
200 | 		if self.doc is None and subtree is None:
201 | 			raise Exception("This object does not have content")
202 | 		
203 | 		cls = self.__class__
204 | 		
205 | 		# update if there's a subtree, otherwise use "save"
206 | 		if subtree is not None:
207 | 			if self.id is None:
208 | 				raise Exception("No id is set, cannot update subtree %s" % subtree)
209 | 			res = cls.collection().update({"_id": self.id}, {"$set": subtree})
210 | 			if res is not None:
211 | 				if res.get('err'):
212 | 					logging.warning("Error while saving subtree: %s" % res.get('err'))
213 | 				
214 | 				# instead of loading again, would be nice to update self.doc
215 | 				# appropriately
216 | 				self.doc = None
217 | 				self.load()
218 | 		else:
219 | 			self.id = cls.collection().save(self.doc, manipulate=True)
220 | 		
221 | 		self.did_store()
222 | 		
223 | 		return True
224 | 	
225 | 	def did_store(self):
226 | 		""" Called after a successful call to "store". """
227 | 		pass
228 | 	
229 | 	
230 | 	# -------------------------------------------------------------------------- Hydration
231 | 	def load(self, force=False):
232 | 		""" Hydrate from database, if the instance has an id.
233 | 		If the document already has an in-memory representation, data loaded
234 | 		from database will be superseeded by the in-memory properties unless
235 | 		"force" is set to True, which will make all in-memory data to be
236 | 		discarded.
237 | 		
238 | 		Arguments:
239 | 		force -- if True will discard any in-memory changes to self.doc
240 | 		"""
241 | 		
242 | 		if self.id is None:
243 | 			return
244 | 		
245 | 		found = self.__class__.collection().find_one({"_id": self.id})
246 | 		if found is not None:
247 | 			if force or self.doc is None:
248 | 				self.doc = found
249 | 			else:
250 | 				self.doc = deepUpdate(found, self.doc)
251 | 		
252 | 		self.loaded = True
253 | 	
254 | 	
255 | 	# -------------------------------------------------------------------------- Multiple
256 | 	@classmethod
257 | 	def retrieve(cls, id_list=[]):
258 | 		""" Retrieves multiple documents by id. """
259 | 		
260 | 		found = []
261 | 		for document in cls.collection().find({"_id": {"$in": id_list}}):
262 | 			obj = cls()
263 | 			obj.update_with(document)
264 | 			
265 | 			found.append(obj)
266 | 		
267 | 		return found
268 | 	
269 | 	
270 | 	# -------------------------------------------------------------------------- Deletion
271 | 	def remove(self):
272 | 		""" Delete from database. """
273 | 		
274 | 		if self.id is None:
275 | 			raise Exception("This object does not have an id, cannot remove")
276 | 		
277 | 		ret = self.__class__.collection().remove(spec_or_id=self.id)
278 | 		return ret.get('err') is None if ret else False
279 | 
280 | 
281 | 
282 | def deepUpdate(d, u):
283 | 	""" Deep merges two dictionaries, overwriting "d"s values with "u"s where
284 | 	present. """
285 | 	if u is None:
286 | 		return d
287 | 	
288 | 	# if we have "u" and "d" is not a mapping object, we overwrite it with "u"
289 | 	if d is None or not isinstance(d, collections.Mapping):
290 | 		return u
291 | 	
292 | 	# iterate over keys and values and update
293 | 	for k, v in u.iteritems():
294 | 		if isinstance(v, collections.Mapping):
295 | 			old = d.get(k)
296 | 			d[k] = deepUpdate(old, v) if old else v
297 | 		else:
298 | 			d[k] = u[k]
299 | 	
300 | 	return d
301 | 
302 | def deleteSubtree(tree, keypath):
303 | 	""" Deletes the content at keypath. """
304 | 	if not keypath:
305 | 		raise Exception("You must provide a keypath")
306 | 	
307 | 	existing = tree
308 | 	path = keypath.split('.')
309 | 	while len(path) > 1:
310 | 		p = path.pop(0)
311 | 		existing = existing.get(p)
312 | 		
313 | 		# if we don't have a tree to update it's not there anyway, go home
314 | 		if existing is None:
315 | 			return tree
316 | 	
317 | 	del existing[path[0]]
318 | 	
319 | 	return tree
320 | 
321 | 
322 | def replaceSubtree(tree, keypath, json):
323 | 	""" Replaces or creates a subtree at keypath. """
324 | 	if not keypath:
325 | 		raise Exception("You must provide a keypath")
326 | 	if json is None:
327 | 		return deleteSubtree(tree, keypath)
328 | 	
329 | 	existing = tree or {}
330 | 	path = keypath.split('.')
331 | 	while len(path) > 1:
332 | 		p = path.pop(0)
333 | 		previous = existing
334 | 		existing = existing.get(p)
335 | 		if existing is None:
336 | 			existing = {}
337 | 			previous[p] = existing
338 | 	
339 | 	if existing is None:
340 | 		existing = {}
341 | 	existing[path[0]] = json
342 | 	
343 | 	return tree
344 | 
345 | 
346 | if '__main__' == __name__:
347 | 	a = {'a': 1, 'b': 1,	'c': {'ca': 1, 'cb': 1,						'cc': {'cca': 1, 'ccb': 1}},				'e': {'ea': 1}}
348 | 	b = {'a': 2,			'c': {'ca': 2, 'cb': {'cba': 2, 'cbb': 2},		'cd': {'cda': 2, 'cdb': 2, 'cdc': 2}},	'e': 2}
349 | 	
350 | 	print "replaceSubtree()"
351 | 	print "before   ", a
352 | 	print "replace 1", replaceSubtree(a, 'c.ca', 3)
353 | 	print "replace 2", replaceSubtree(a, 'c.cc.cca', 3)
354 | 	print "replace 3", replaceSubtree(a, 'c.ce.cea', 3)
355 | 	print
356 | 	print "deleteSubtree()"
357 | 	print "before  ", a
358 | 	print "delete 1", deleteSubtree(a, 'c.ce.cea')
359 | 	print "delete 2", deleteSubtree(a, 'd.da.dda')
360 | 	print
361 | 	print "deepUpdate(a, b)"
362 | 	print "a: ", a
363 | 	print "b: ", b
364 | 	print "-> ", deepUpdate(a, b)
365 | 	
366 | 


--------------------------------------------------------------------------------
/nlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | #	cTAKES and RegEx wizardry
  4 | #
  5 | #	2012-12-14	Created by Pascal Pfiffner
  6 | #
  7 | 
  8 | import os
  9 | import re
 10 | import logging
 11 | 
 12 | class NLPProcessing (object):
 13 | 	""" Abstract base class for handling NLP pipelines. """
 14 | 	# print('\n(nlp.py) Initializing NLP w/ object:', object, '\n')
 15 | 
 16 | 	def __init__(self):
 17 | 		# print('\n(nlp.py) Setting definitions for self')
 18 | 
 19 | 		self.name = 'nlp'
 20 | 		self.bin = '.'
 21 | 		self.root = None
 22 | 		self.cleanup = True
 23 | 		self.did_prepare = False
 24 | 
 25 | 		# print('(nlp.py) Definitions set as:', '\n(nlp.py) Self Name:', self.name, '\n(nlp.py) Self bin:', self.bin, '\n(nlp.py) Self root:',
 26 | 		#       self.root, '\n(nlp.py) Self cleanup:', self.cleanup, '\n(nlp.py) Self did prepare:', self.did_prepare, '\n')
 27 | 
 28 | 	# -------------------------------------------------------------------------- Preparations
 29 | 	def set_relative_root(self, directory):
 30 | 		self.root = os.path.abspath(directory if directory is not None else '.')
 31 | 
 32 | 	def prepare(self):
 33 | 		""" Performs steps necessary to setup the pipeline, such as creating
 34 | 		input and output directories or pipes. """
 35 | 		# print('Preparations started w/:\n','Root =', self.root, '\n')
 36 | 		self._prepare()
 37 | 		self.did_prepare = True
 38 | 
 39 | 	def _prepare(self):
 40 | 		if self.root is None:
 41 | 			raise Exception("No root directory defined for NLP process %s" % self.name)
 42 | 
 43 | 		if not os.path.exists(self.root):
 44 | 			os.mkdir(self.root)
 45 | 
 46 | 		self._create_directories_if_needed()
 47 | 
 48 | 		if not os.path.exists(self.root):
 49 | 			raise Exception(
 50 | 				"Failed to create root directory for NLP process %s" % self.name)
 51 | 
 52 | 	def _create_directories_if_needed(self):
 53 | 		""" Override to create directories needed to run the pipeline. """
 54 | 		pass
 55 | 
 56 | 	# -------------------------------------------------------------------------- Running
 57 | 	def run(self):
 58 | 		""" Runs the NLP pipeline, raises an exception on error. """
 59 | 		if not self.did_prepare:
 60 | 			self.prepare()
 61 | 		self._run()
 62 | 
 63 | 	def _run(self):
 64 | 		""" Internal use, subclasses should override this method since it is
 65 | 		called after necessary preparation has been performed. """
 66 | 		raise Exception("Cannot run an abstract NLP pipeline class instance")
 67 | 
 68 | 	def write_input(self, text, filename):
 69 | 		if not self.did_prepare:
 70 | 			self.prepare()
 71 | 
 72 | 		return self._write_input(text, filename)
 73 | 
 74 | 	def _write_input(self, text, filename):
 75 | 		return False
 76 | 
 77 | 	def parse_output(self, filename, **kwargs):
 78 | 		if not self.did_prepare:
 79 | 			self.prepare()
 80 | 
 81 | 		return self._parse_output(filename, **kwargs)
 82 | 
 83 | 	def _parse_output(self, filename, **kwargs):
 84 | 		""" return a dictionary (or None) like:
 85 | 		{ 'snomed': [1, 2, 2], 'rxnorm': [4, 5, 6] }
 86 | 		"""
 87 | 		return None
 88 | 
 89 | 
 90 | # ------------------------------------------------------------------------------ Helper Functions
 91 | def split_inclusion_exclusion(string):
 92 | 	""" Returns a tuple of lists describing inclusion and exclusion criteria.
 93 | 	"""
 94 | 
 95 | 	if not string or len(string)< 1:
 96 | 		raise Exception('No string given')
 97 | 
 98 | 	# split on newlines
 99 | 	rows = re.compile(r'(?:\n\s*){2,}').split(string)
100 | 
101 | 	# loop all rows
102 | 	missed = []
103 | 	inc = []
104 | 	exc = []
105 | 	at_inc = False
106 | 	at_exc = False
107 | 
108 | 	for string in rows:
109 | 		if len(string) < 1 or 'none' == string:
110 | 			continue
111 | 
112 | 		clean = re.sub(r'[\n\s]+', ' ', string).strip()
113 | 
114 | 		# detect switching to inclusion criteria
115 | 		# exclusion criteria sometimes say "None if patients fulfill inclusion
116 | 		# criteria.", try to avoid detecting that as header!
117 | 		if re.search(r'^[^\w]*inclusion criteria', clean, re.IGNORECASE) is not None \
118 | 			and re.search(r'exclusion', clean, re.IGNORECASE) is None:
119 | 			at_inc = True
120 | 			at_exc = False
121 | 
122 | 		# detect switching to exclusion criteria
123 | 		elif re.search(r'exclusion criteria', clean, re.IGNORECASE) is not None \
124 | 			and re.search(r'inclusion', clean, re.IGNORECASE) is None:
125 | 			at_inc = False
126 | 			at_exc = True
127 | 
128 | 		# assign accordingly
129 | 		elif at_inc:
130 | 			inc.append(clean)
131 | 		elif at_exc:
132 | 			exc.append(clean)
133 | 		else:
134 | 			missed.append(clean)
135 | 
136 | 	# if there was no inclusion/exclusion split, we assume the text describes inclusion criteria
137 | 	if len(inc) < 1 or len(exc) < 1:
138 | 		logging.debug(
139 | 			"No explicit separation of inclusion/exclusion criteria found, assuming the text to describe inclusion criteria")
140 | 		inc.extend(missed)
141 | 		exc = []
142 | 
143 | 	return (inc, exc)
144 | 
145 | 
146 | def list_to_sentences(string):
147 | 	""" Splits text at newlines and puts it back together after stripping new-
148 | 	lines and enumeration symbols, joined by a period.
149 | 	"""
150 | 	if string is None:
151 | 		return None
152 | 
153 | 	lines = string.splitlines()
154 | 
155 | 	curr = ''
156 | 	processed = []
157 | 	for line in lines:
158 | 		stripped = line.strip()
159 | 
160 | 		# empty line
161 | 		if 0 == len(stripped):
162 | 			if curr:
163 | 				processed.append(re.sub(r'\.\s*$', '', curr))
164 | 			curr = ''
165 | 
166 | 		# beginning a new fragment
167 | 		elif not curr or 0 == len(curr):
168 | 			curr = re.sub(r'^[-\d\.\(\)]+\s*', '', stripped)
169 | 
170 | 		# new line item? true when it starts with "-", "1." or "1)" (with
171 | 		# optional dash) or if the indent level is less than before (simple
172 | 		# whitespace count) (NO LONGER IMPLEMENTED)
173 | 		elif re.match(r'^-\s+', stripped) \
174 | 			or re.match(r'^\d+\.\s+', stripped) \
175 | 			or re.match(r'^(-\s*)?\d+\)\s+', stripped):
176 | 
177 | 			if curr:
178 | 				processed.append(re.sub(r'\.\s*$', '', curr))
179 | 			curr = re.sub(r'^(-|(\d+\.)|((-\s*)?\d+\)))\s*', '', stripped)
180 | 
181 | 		# append to previous fragment
182 | 		else:
183 | 			curr = '%s %s' % (curr, stripped)
184 | 
185 | 	if curr:
186 | 		processed.append(re.sub(r'\.\s*$', '', curr))
187 | 
188 | 	sentences = '. '.join(processed) if len(processed) > 0 else ''
189 | 	if len(sentences) > 0:
190 | 		sentences += '.'
191 | 
192 | 	return sentences
193 | 
194 | 
195 | def list_trim(string):
196 | 	""" Trim text phases that are part of the string because the string was
197 | 	pulled off of a list, e.g. a leading "-" or "1."
198 | 	"""
199 | 
200 | 	string.strip()
201 | 	string = re.sub('\s+', ' ', string)						# multi-whitespace
202 | 	string = re.sub('^-\s+', '', string, count=1)			# leading "-"
203 | 	string = re.sub('^\d+\.\s+', '', string, count=1)			# leading "1."
204 | 	string = re.sub('^(-\s*)?\d+\)\s+', '', string, count=1)		# leading "1)" with optional dash
205 | 
206 | 	return string
207 | 


--------------------------------------------------------------------------------
/nltktags.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	Handling NLTK to generate tags
  5 | #
  6 | #	2013-10-25	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | import os
 10 | import logging
 11 | import codecs
 12 | import inspect
 13 | import nltk
 14 | import operator
 15 | 
 16 | from nlp import NLPProcessing, list_to_sentences
 17 | 
 18 | 
 19 | class NLTKTags (NLPProcessing):
 20 | 	""" Aggregate handling tasks specifically for NLTK. """
 21 | 	
 22 | 	def __init__(self):
 23 | 		super(NLTKTags, self).__init__()
 24 | 		self.name = 'nltk-tags'
 25 | 	
 26 | 	
 27 | 	@property
 28 | 	def _in_dir(self):
 29 | 		return os.path.join(self.root, 'nltk-tags-in')
 30 | 	
 31 | 	@property
 32 | 	def _out_dir(self):
 33 | 		return os.path.join(self.root, 'nltk-tags-out')
 34 | 	
 35 | 	def _create_directories_if_needed(self):
 36 | 		in_dir = self._in_dir
 37 | 		out_dir = self._out_dir
 38 | 		if not os.path.exists(in_dir):
 39 | 			os.mkdir(in_dir)
 40 | 		if not os.path.exists(out_dir):
 41 | 			os.mkdir(out_dir)
 42 | 	
 43 | 	def _run(self):
 44 | 		in_dir = self._in_dir
 45 | 		out_dir = self._out_dir
 46 | 		if not os.path.exists(in_dir) or not os.path.exists(out_dir):
 47 | 			return
 48 | 		
 49 | 		# init our simple noun-phrase chunker
 50 | 		grammar = r"""
 51 | 			NUM:
 52 | 				{<CD>}				# "%" is interpreted as NN...
 53 | 			
 54 | 			NBAR:
 55 | 				{<NN.*|JJ>*<NUM>*<NN.*>+}  # Nouns and Adjectives, terminated with Nouns
 56 | 			
 57 | 			NP:
 58 | 				{<NBAR>}			# An NBAR is also a NP
 59 | 				{<NBAR><IN><NBAR>}  # Above, connected with in/of/etc...
 60 | 		"""
 61 | 		chunker = nltk.RegexpParser(grammar)
 62 | 		
 63 | 		filelist = os.listdir(in_dir)
 64 | 		tag_count = {}
 65 | 		i = 0
 66 | 		for f in filelist:
 67 | 			i = i + 1
 68 | 			logging.debug("  Reading file %d of %d" % (i, len(filelist)))
 69 | 			with codecs.open(os.path.join(in_dir, f), 'r', 'utf-8') as handle:
 70 | 				text = handle.read()
 71 | 				
 72 | 				# use NLTK to chunk the text
 73 | 				chunks = []
 74 | 				sentences = nltk.sent_tokenize(text)
 75 | 				if sentences and len(sentences) > 0:
 76 | 					for sentence in sentences:
 77 | 						tokens = nltk.word_tokenize(sentence)
 78 | 						tagged = nltk.pos_tag(tokens)
 79 | 						tree = chunker.parse(tagged)
 80 | 						
 81 | 						# get noun phrases
 82 | 						np = []
 83 | 						for st in _nltk_find_leaves(tree, 'NP'):
 84 | 							leaves = st.leaves()
 85 | 							if len(leaves) > 0:
 86 | 								tag = ' '.join([noun[0] for noun in leaves]).lower()
 87 | 								np.append(tag)
 88 | 								
 89 | 								# count tags
 90 | 								if tag in tag_count:
 91 | 									tag_count[tag] = tag_count[tag] + 1
 92 | 								else:
 93 | 									tag_count[tag] = 1
 94 | 						
 95 | 						if len(np) > 0:
 96 | 							chunks.extend(np)
 97 | 				
 98 | 				# write to outfile
 99 | 				if len(chunks) > 0:
100 | 					outfile = os.path.join(out_dir, f)
101 | 					with codecs.open(outfile, 'w', 'utf-8') as w_handle:
102 | 						for chunk in chunks:
103 | 							w_handle.write("%s\n" % unicode(chunk))
104 | 		
105 | 		# tag count
106 | 		if len(tag_count) > 0:
107 | 			with codecs.open(os.path.join(out_dir, 'tags.txt'), 'w', 'utf-8') as handle:
108 | 				for tag in sorted(tag_count.iteritems(), key=operator.itemgetter(1), reverse=True):
109 | 					handle.write("%s: %d\n" % (tag[0], int(tag[1])))
110 | 	
111 | 	
112 | 	def _write_input(self, text, filename):
113 | 		if text is None \
114 | 			or len(text) < 1 \
115 | 			or filename is None:
116 | 			return False
117 | 		
118 | 		in_dir = self._in_dir
119 | 		if not os.path.exists(in_dir):
120 | 			logging.error("The input directory for %s at %s does not exist" % (self.name, in_dir))
121 | 			return False
122 | 		
123 | 		infile = os.path.join(in_dir, filename)
124 | 		if os.path.exists(infile):
125 | 			return False
126 | 		
127 | 		# write it
128 | 		with codecs.open(infile, 'w', 'utf-8') as handle:
129 | 			# handle.write(unicode(text))
130 | 			# handle.write("\n=====\n")
131 | 			handle.write(unicode(list_to_sentences(text)))
132 | 		
133 | 		return True
134 | 	
135 | 	
136 | 	def _parse_output(self, filename, **kwargs):
137 | 		""" Parse NLTK output. """
138 | 		
139 | 		if filename is None:
140 | 			return None
141 | 		
142 | 		# is there cTAKES output?
143 | 		out_dir = self._out_dir
144 | 		if not os.path.exists(out_dir):
145 | 			logging.error("The output directory for %s at %s does not exist" % (self.name, out_dir))
146 | 			return None
147 | 		
148 | 		outfile = os.path.join(out_dir, filename)
149 | 		if not os.path.exists(outfile):
150 | 			# do not log here and silently fail
151 | 			return None
152 | 		
153 | 		tags = []
154 | 		
155 | 		# read tags
156 | 		with codecs.open(outfile, 'r', 'utf-8') as handle:
157 | 			#line = handle.readline(keepends=False)		# "keepends" not supported in readline! (http://bugs.python.org/issue8630)
158 | 			lines = handle.readlines()
159 | 			for line in lines:
160 | 				tags.append(line.strip())
161 | 		
162 | 		# create and return a dictionary (don't filter empty lists)
163 | 		ret = {
164 | 			'tags': tags,
165 | 		}
166 | 		
167 | 		# clean up
168 | 		if self.cleanup:
169 | 			os.remove(outfile)
170 | 			
171 | 			in_dir = self._in_dir
172 | 			infile = os.path.join(in_dir, filename)
173 | 			if os.path.exists(infile):
174 | 				os.remove(infile)
175 | 		
176 | 		return ret
177 | 
178 | 
179 | def _nltk_find_leaves(tree, leave_name):
180 | 	try:
181 | 		tree.node
182 | 	except AttributeError:
183 | 		return []
184 | 	
185 | 	res = []
186 | 	if leave_name == tree.node:
187 | 		res.append(tree)
188 | 	else:
189 | 		for child in tree:
190 | 			leaves = _nltk_find_leaves(child, leave_name)
191 | 			if len(leaves) > 0:
192 | 				res.extend(leaves)
193 | 	
194 | 	return res
195 | 
196 | 
197 | # we can execute this file to do some testing
198 | if '__main__' == __name__:
199 | 	testtext = "History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficiency."
200 | 	testfile = 'test.txt'
201 | 	
202 | 	run_dir = os.path.join(os.path.dirname(__file__), 'nltk-tags-test')
203 | 	my_nlp = NLTKTags({'root': run_dir, 'cleanup': True})
204 | 	my_nlp.prepare()
205 | 	
206 | 	# create test input
207 | 	if not my_nlp.write_input(testtext, testfile):
208 | 		print "xx>  Failed to write test input to file"
209 | 	
210 | 	# run
211 | 	try:
212 | 		my_nlp.run()
213 | 	except Exception as e:
214 | 		print "xx>  Failed: %s" % e
215 | 	
216 | 	# parse output
217 | 	ret = my_nlp.parse_output(testfile)
218 | 	print ret
219 | 	
220 | 	# clean up
221 | 	os.rmdir(my_nlp._in_dir)
222 | 	os.rmdir(my_nlp._out_dir)
223 | 	os.rmdir(run_dir)
224 | 	
225 | 	print "-->  Done"
226 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | import connexion
 2 | 
 3 | # Create the application instance
 4 | app = connexion.App(__name__, specification_dir='./')
 5 | 
 6 | # Read the swagger.yml file to configure the endpoints
 7 | app.add_api('swagger.yml')
 8 | 
 9 | # Create a URL route in our application for "/"
10 | 
11 | 
12 | @app.route('/')
13 | def home():
14 |     
15 |     return "Works!"
16 | 
17 | 
18 | # If we're running in stand alone mode, run the application
19 | if __name__ == '__main__':
20 |     app.run(host='0.0.0.0', port=5000, debug=True)
21 | 


--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | #	Simplifying SQLite access
  4 | #
  5 | #	2012-12-14	Created by Pascal Pfiffner
  6 | #
  7 | 
  8 | 
  9 | import sqlite3
 10 | import threading
 11 | 
 12 | 
 13 | SQLITE_INSTANCES = {}
 14 | 
 15 | 
 16 | class SQLite (object):
 17 | 	""" SQLite access
 18 | 	"""
 19 | 	
 20 | 	@classmethod
 21 | 	def get(cls, database):
 22 | 		""" Use this to get SQLite instances for a given database. Avoids
 23 | 		creating multiple instances for the same database.
 24 | 		
 25 | 		We keep instances around per thread per database, maybe there should be
 26 | 		a way to turn this off. However, here we always release instances for
 27 | 		threads that are no longer alive. If this is better than just always
 28 | 		creating a new instance should be tested.
 29 | 		"""
 30 | 		
 31 | 		global SQLITE_INSTANCES
 32 | 		
 33 | 		# group per thread
 34 | 		thread_id = threading.current_thread().ident
 35 | 		if thread_id not in SQLITE_INSTANCES:
 36 | 			SQLITE_INSTANCES[thread_id] = {}
 37 | 		by_thread = SQLITE_INSTANCES[thread_id]
 38 | 		
 39 | 		# group per database
 40 | 		if database not in by_thread:
 41 | 			sql = SQLite(database)
 42 | 			by_thread[database] = sql
 43 | 		
 44 | 		# free up memory for terminated threads
 45 | 		clean = {}
 46 | 		for alive in threading.enumerate():
 47 | 			if alive.ident in SQLITE_INSTANCES:
 48 | 				clean[alive.ident] = SQLITE_INSTANCES[alive.ident]
 49 | 		SQLITE_INSTANCES = clean
 50 | 		
 51 | 		return by_thread[database]
 52 | 	
 53 | 	
 54 | 	def __init__(self, database=None):
 55 | 		if database is None:
 56 | 			raise Exception('No database provided')
 57 | 		
 58 | 		self.database = database
 59 | 		self.handle = None
 60 | 		self.cursor = None
 61 | 	
 62 | 		
 63 | 	def execute(self, sql, params=()):
 64 | 		""" Executes an SQL command and returns the cursor.execute, which can
 65 | 		be used as an iterator.
 66 | 		Supply the params as tuple, i.e. (param,) and (param1,param2,...)
 67 | 		"""
 68 | 		if not sql or len(sql) < 1:
 69 | 			raise Exception('No SQL to execute')
 70 | 		if not self.cursor:
 71 | 			self.connect()
 72 | 		
 73 | 		return self.cursor.execute(sql, params)
 74 | 
 75 | 
 76 | 	def executeInsert(self, sql, params=()):
 77 | 		""" Executes an SQL command (should be INSERT OR REPLACE) and returns
 78 | 		the last row id, 0 on failure.
 79 | 		"""
 80 | 		if self.execute(sql, params):
 81 | 			return self.cursor.lastrowid if self.cursor.lastrowid else 0
 82 | 		
 83 | 		return 0
 84 | 
 85 | 
 86 | 	def executeUpdate(self, sql, params=()):
 87 | 		""" Executes an SQL command (should be UPDATE) and returns the number
 88 | 		of affected rows.
 89 | 		"""
 90 | 		if self.execute(sql, params):
 91 | 			return self.cursor.rowcount
 92 | 		
 93 | 		return 0
 94 | 
 95 | 
 96 | 	def executeOne(self, sql, params):
 97 | 		""" Returns the first row returned by executing the command
 98 | 		"""
 99 | 		self.execute(sql, params)
100 | 		return self.cursor.fetchone()
101 | 
102 | 
103 | 	def create(self, table_name, table_structure):
104 | 		""" Executes a CREATE TABLE IF NOT EXISTS query with the given structure.
105 | 		Input is NOT sanitized, watch it!
106 | 		"""
107 | 		create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure)
108 | 		self.execute(create_query)
109 | 		return True
110 | 
111 | 
112 | 	def commit(self):
113 | 		self.handle.commit()
114 | 
115 | 
116 | 	def connect(self):
117 | 		if self.cursor is not None:
118 | 			return
119 | 		
120 | 		self.handle = sqlite3.connect(self.database)
121 | 		self.cursor = self.handle.cursor()
122 | 
123 | 
124 | 	def close(self):
125 | 		if self.cursor is None:
126 | 			return
127 | 		
128 | 		self.handle.close()
129 | 		self.cursor = None
130 | 		self.handle = None
131 | 
132 | 
133 | # singleton init whack-a-hack
134 | #SQLite = _SQLite()
135 | #del _SQLite
136 | 


--------------------------------------------------------------------------------
/swagger.yml:
--------------------------------------------------------------------------------
 1 | swagger: "2.0"
 2 | info:
 3 |   description: Coonects to the cTAKES Default Clinical Pipeline through a RESTful service!
 4 |   version: "1.0.0"
 5 |   title: cTAKES RESTful API
 6 | consumes:
 7 |   - "application/json"
 8 | produces:
 9 |   - "application/json"
10 | 
11 | basePath: "/api"
12 | 
13 | paths:
14 |   /defaultClinicalPipline:
15 |     get:
16 |       operationId: "files.read"
17 |       tags:
18 |         - "Default Clinical Pipeline"
19 |       summary: "Plain text file structure supported by the server application"
20 |       description: "Read plain text file"
21 |       responses:
22 |         200:
23 |           description: "Successful read plain text file operation!"
24 |           schema:
25 |             type: "array"
26 |             items:
27 |               properties:
28 |                 id:
29 |                   type: "string"
30 |                 title:
31 |                   type: "string"
32 |                 author:
33 |                   type: "string"
34 |                 client_name:
35 |                   type: "string"
36 |                 timestamp:
37 |                   type: "string"


--------------------------------------------------------------------------------
/umls.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #	utilities to handle UMLS
  5 | #
  6 | #	2013-01-01	Created by Pascal Pfiffner
  7 | #
  8 | 
  9 | 
 10 | import csv
 11 | import sys
 12 | import os.path
 13 | import logging
 14 | 
 15 | from sqlite import SQLite
 16 | 
 17 | 
 18 | class UMLS (object):
 19 | 	""" A class for importing UMLS terminologies into an SQLite database.
 20 | 	"""
 21 | 	
 22 | 	@classmethod
 23 | 	def check_databases(cls):
 24 | 		""" Check if our databases are in place and if not, import them.
 25 | 		Will raise on errors!
 26 | 		
 27 | 		UMLS: (umls.db)
 28 | 		If missing prompt to use the `umls.sh` script
 29 | 		
 30 | 		SNOMED: (snomed.db)
 31 | 		Read SNOMED CT from tab-separated files and create an SQLite database.
 32 | 		"""
 33 | 		
 34 | 		# UMLS
 35 | 		umls_db = os.path.join('databases', 'umls.db')
 36 | 		if not os.path.exists(umls_db):
 37 | 			raise("The UMLS database at %s does not exist. Run the import script `databases/umls.sh`." % umls_db)
 38 | 		
 39 | 		# SNOMED
 40 | 		SNOMED.sqlite_handle = None
 41 | 		try:
 42 | 			SNOMED.setup_tables()
 43 | 		except Exception as e:
 44 | 			raise("SNOMED setup failed: %s" % e)
 45 | 		
 46 | 		# RxNorm
 47 | 		rxnorm_db = os.path.join('databases', 'rxnorm.db')
 48 | 		if not os.path.exists(rxnorm_db):
 49 | 			raise("The RxNorm database at %s does not exist. Run the import script `databases/rxnorm.sh`." % rxnorm_db)
 50 | 		
 51 | 		else:
 52 | 			rx_map = {
 53 | 				'descriptions': 'snomed_desc.csv',
 54 | 				'relationships': 'snomed_rel.csv'
 55 | 			}
 56 | 			
 57 | 			# need to import?
 58 | 			for table, filename in rx_map.iteritems():
 59 | 				num_query = 'SELECT COUNT(*) FROM %s' % table
 60 | 				num_existing = SNOMED.sqlite_handle.executeOne(num_query, ())[0]
 61 | 				if num_existing > 0:
 62 | 					continue
 63 | 				
 64 | 				snomed_file = os.path.join('databases', filename)
 65 | 				if not os.path.exists(snomed_file):
 66 | 					raise("Need to import SNOMED, but the file %s is not present. Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html" % filename)
 67 | 				
 68 | 				SNOMED.import_csv_into_table(snomed_file, table)
 69 | 
 70 | 
 71 | 
 72 | class UMLSLookup (object):
 73 | 	""" UMLS lookup """
 74 | 	
 75 | 	sqlite_handle = None
 76 | 	did_check_dbs = False
 77 | 	preferred_sources = ['"SNOMEDCT"', '"MTH"']	
 78 | 	
 79 | 	def __init__(self):
 80 | 		self.sqlite = SQLite.get('databases/umls.db')
 81 | 	
 82 | 	def lookup_code(self, cui, preferred=True):
 83 | 		""" Return a list with triples that contain:
 84 | 		- name
 85 | 		- source
 86 | 		- semantic type
 87 | 		by looking it up in our "descriptions" database.
 88 | 		The "preferred" settings has the effect that only names from SNOMED
 89 | 		(SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
 90 | 		our "descriptions" table is much faster than combing through the full
 91 | 		MRCONSO table.
 92 | 		"""
 93 | 		if cui is None or len(cui) < 1:
 94 | 			return []
 95 | 		
 96 | 		# lazy UMLS db checking
 97 | 		if not UMLSLookup.did_check_dbs:
 98 | 			UMLSLookup.did_check_dbs = True
 99 | 			try:
100 | 				UMLS.check_databases()
101 | 			except Exception as e:
102 | 				logging.error(e)
103 | 				# should this crash and burn?
104 | 		
105 | 		# take care of negations
106 | 		negated = '-' == cui[0]
107 | 		if negated:
108 | 			cui = cui[1:]
109 | 		
110 | 		parts = cui.split('@', 1)
111 | 		lookup_cui = parts[0]
112 | 		
113 | 		# STR: Name
114 | 		# SAB: Abbreviated Source Name
115 | 		# STY: Semantic Type
116 | 		if preferred:
117 | 			sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN (%s)' % ", ".join(UMLSLookup.preferred_sources)
118 | 		else:
119 | 			sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?'
120 | 		
121 | 		# return as list
122 | 		arr = []
123 | 		for res in self.sqlite.execute(sql, (lookup_cui,)):
124 | 			if negated:
125 | 				arr.append(("[NEGATED] %s" % res[0], res[1], res[2]))
126 | 			else:
127 | 				arr.append(res)
128 | 		
129 | 		return arr
130 | 		
131 | 	
132 | 	def lookup_code_meaning(self, cui, preferred=True, no_html=True):
133 | 		""" Return a string (an empty string if the cui is null or not found)
134 | 		by looking it up in our "descriptions" database.
135 | 		The "preferred" settings has the effect that only names from SNOMED
136 | 		(SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
137 | 		our "descriptions" table is much faster than combing through the full
138 | 		MRCONSO table.
139 | 		"""
140 | 		names = []
141 | 		for res in self.lookup_code(cui, preferred):
142 | 			if no_html:
143 | 				names.append("%s (%s)  [%s]" % (res[0], res[1], res[2]))
144 | 			else:
145 | 				names.append("%s (<span style=\"color:#090;\">%s</span>: %s)" % (res[0], res[1], res[2]))
146 | 		
147 | 		comp = ", " if no_html else "<br/>\n"
148 | 		return comp.join(names) if len(names) > 0 else ''
149 | 
150 | 	
151 | 
152 | class SNOMED (object):
153 | 	sqlite_handle = None
154 | 	
155 | 	# -------------------------------------------------------------------------- Database Setup
156 | 	@classmethod
157 | 	def import_csv_into_table(cls, snomed_file, table_name):
158 | 		""" Import SNOMED CSV into our SQLite database.
159 | 		The SNOMED CSV files can be parsed by Python's CSV parser with the
160 | 		"excel-tab" flavor.
161 | 		"""
162 | 		
163 | 		logging.debug('..>  Importing SNOMED %s into snomed.db...' % table_name)
164 | 		
165 | 		# not yet imported, parse tab-separated file and import
166 | 		with open(snomed_file, 'rb') as csv_handle:
167 | 			cls.sqlite_handle.isolation_level = 'EXCLUSIVE'
168 | 			sql = cls.insert_query_for(table_name)
169 | 			reader = unicode_csv_reader(csv_handle, dialect='excel-tab')
170 | 			i = 0
171 | 			try:
172 | 				for row in reader:
173 | 					if i > 0:			# first row is the header row
174 | 						
175 | 						# execute SQL (we just ignore duplicates)
176 | 						params = cls.insert_tuple_from_csv_row_for(table_name, row)
177 | 						try:
178 | 							cls.sqlite_handle.execute(sql, params)
179 | 						except Exception as e:
180 | 							sys.exit(u'Cannot insert %s: %s' % (params, e))
181 | 					i += 1
182 | 				
183 | 				# commit to file
184 | 				cls.sqlite_handle.commit()
185 | 				cls.did_import(table_name)
186 | 				cls.sqlite_handle.isolation_level = None
187 | 			
188 | 			except csv.Error as e:
189 | 				sys.exit('CSV error on line %d: %s' % (reader.line_num, e))
190 | 
191 | 		logging.debug('..>  %d concepts parsed' % (i-1))
192 | 
193 | 
194 | 	@classmethod
195 | 	def setup_tables(cls):
196 | 		""" Creates the SQLite tables we need, not the tables we deserve.
197 | 		"""
198 | 		if cls.sqlite_handle is None:
199 | 			cls.sqlite_handle = SQLite.get('databases/snomed.db')
200 | 		
201 | 		# descriptions
202 | 		cls.sqlite_handle.create('descriptions', '''(
203 | 				concept_id INTEGER PRIMARY KEY,
204 | 				lang TEXT,
205 | 				term TEXT,
206 | 				isa VARCHAR,
207 | 				active INT
208 | 			)''')
209 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)")
210 | 		
211 | 		# relationships
212 | 		cls.sqlite_handle.create('relationships', '''(
213 | 				relationship_id INTEGER PRIMARY KEY,
214 | 				source_id INT,
215 | 				destination_id INT,
216 | 				rel_type INT,
217 | 				rel_text VARCHAR,
218 | 				active INT
219 | 			)''')
220 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)")
221 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)")
222 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_type_index ON relationships (rel_type)")
223 | 		cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)")
224 | 		
225 | 	
226 | 	@classmethod
227 | 	def insert_query_for(cls, table_name):
228 | 		""" Returns the insert query needed for the given table
229 | 		"""
230 | 		if 'descriptions' == table_name:
231 | 			return '''INSERT OR IGNORE INTO descriptions
232 | 						(concept_id, lang, term, isa, active)
233 | 						VALUES
234 | 						(?, ?, ?, ?, ?)'''
235 | 		if 'relationships' == table_name:
236 | 			return '''INSERT OR IGNORE INTO relationships
237 | 						(relationship_id, source_id, destination_id, rel_type, active)
238 | 						VALUES
239 | 						(?, ?, ?, ?, ?)'''
240 | 		return None
241 | 	
242 | 	
243 | 	@classmethod
244 | 	def insert_tuple_from_csv_row_for(cls, table_name, row):
245 | 		if 'descriptions' == table_name:
246 | 			isa = ''
247 | 			if len(row) > 6:
248 | 				if '900000000000013009' == row[6]:
249 | 					isa = 'synonym'
250 | 				elif '900000000000003001' == row[6]:
251 | 					isa = 'full'
252 | 			return (int(row[4]), row[5], row[7], isa, int(row[2]))
253 | 		if 'relationships' == table_name:
254 | 			return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2]))
255 | 		return None
256 | 	
257 | 	
258 | 	@classmethod
259 | 	def did_import(cls, table_name):
260 | 		""" Allows us to set hooks after tables have been imported
261 | 		"""
262 | 		if 'relationships' == table_name:
263 | 			cls.sqlite_handle.execute('''
264 | 				UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003
265 | 			''')
266 | 			cls.sqlite_handle.execute('''
267 | 				UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007
268 | 			''')
269 | 
270 | 
271 | 
272 | class SNOMEDLookup (object):
273 | 	""" SNOMED lookup """
274 | 	
275 | 	sqlite_handle = None
276 | 	
277 | 	
278 | 	def __init__(self):
279 | 		self.sqlite = SQLite.get('databases/snomed.db')
280 | 	
281 | 	def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True):
282 | 		""" Returns HTML for all matches of the given SNOMED id.
283 | 		The "preferred" flag here currently has no function.
284 | 		"""
285 | 		if snomed_id is None or len(snomed_id) < 1:
286 | 			return ''
287 | 		
288 | 		sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?'
289 | 		names = []
290 | 		
291 | 		# loop over results
292 | 		for res in self.sqlite.execute(sql, (snomed_id,)):
293 | 			if not no_html and ('synonym' == res[1] or 0 == res[2]):
294 | 				names.append("<span style=\"color:#888;\">%s</span>" % res[0])
295 | 			else:
296 | 				names.append(res[0])
297 | 		
298 | 		if no_html:
299 | 			return ", ".join(names) if len(names) > 0 else ''
300 | 		return "<br/>\n".join(names) if len(names) > 0 else ''
301 | 
302 | 
303 | 
304 | class RxNormLookup (object):
305 | 	""" RxNorm lookup """
306 | 	
307 | 	sqlite_handle = None
308 | 	
309 | 	
310 | 	def __init__(self):
311 | 		self.sqlite = SQLite.get('databases/rxnorm.db')
312 | 	
313 | 	def lookup_code_meaning(self, rx_id, preferred=True, no_html=True):
314 | 		""" Return HTML for the meaning of the given code.
315 | 		If preferred is True (the default), only one match will be returned,
316 | 		looking for specific TTY and using the "best" one. """
317 | 		if rx_id is None or len(rx_id) < 1:
318 | 			return ''
319 | 		
320 | 		# retrieve all matches
321 | 		sql = 'SELECT STR, TTY, RXAUI FROM RXNCONSO WHERE RXCUI = ? AND LAT = "ENG"'
322 | 		found = []
323 | 		names = []
324 | 		format_str = "<span title=\"RXAUI: %s\">%s <span style=\"color:#888;\">[%s]</span></span>"
325 | 		
326 | 		# loop over them
327 | 		for res in self.sqlite.execute(sql, (rx_id,)):
328 | 			found.append(res)
329 | 		
330 | 		if len(found) > 0:
331 | 			
332 | 			# preferred name only
333 | 			if preferred:
334 | 				for tty in ['BN', 'IN', 'PIN', 'SBDC', 'SCDC', 'SBD', 'SCD', 'MIN']:
335 | 					for res in found:
336 | 						if tty == res[1]:
337 | 							names.append(format_str % (res[2], res[0], res[1]))
338 | 							break
339 | 					else:
340 | 						continue
341 | 					break
342 | 				
343 | 				if len(names) < 1:
344 | 					res = found[0]
345 | 					names.append(format_str % (res[2], res[0], res[1]))
346 | 			
347 | 			# return a list of all names
348 | 			else:
349 | 				for res in found:
350 | 					names.append(format_str % (res[2], res[0], res[1]))
351 | 		
352 | 		return "<br/>\n".join(names) if len(names) > 0 else ''
353 | 	
354 | 
355 | 
356 | # the standard Python CSV reader can't do unicode, here's the workaround
357 | def unicode_csv_reader(utf8_data, dialect=csv.excel, **kwargs):
358 | 	csv_reader = csv.reader(utf8_data, dialect=dialect, **kwargs)
359 | 	for row in csv_reader:
360 | 		yield [unicode(cell, 'utf-8') for cell in row]
361 | 
362 | 


--------------------------------------------------------------------------------