├── .gitignore
├── README.md
├── __init__.py
├── ctakes-install
├── ctakes-server-setup.sh
├── ctakes-user-install.sh
├── log4j.xml
└── run.sh
├── ctakes.py
├── databases
├── .gitignore
├── rxnorm.sh
└── umls.sh
├── dateutil
├── LICENSE
├── __init__.py
├── easter.py
├── parser.py
├── relativedelta.py
├── rrule.py
├── tz.py
└── tzwin.py
├── dbobject.py
├── files.py
├── mngobject.py
├── nlp.py
├── nltktags.py
├── server.py
├── sqlite.py
├── swagger.yml
└── umls.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.pyc
3 |
4 | # ignare settings
5 | umls.sh
6 |
7 | # ignore cTAKES install
8 | ctakes
9 | ctakes-svn
10 | ctakes-test
11 | apache-ctakes-4.0.0
12 | ctakes-test
13 |
14 | # ignore MetaMap install
15 | metamap
16 | metamap-test
17 |
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### What you'll be able to do ###
2 |
3 | 1. Interact with cTAKES `Default Clinical Pipeline` through a Python RESTful API doing:
4 | - Annotations for;
5 | - Anatomical sites,
6 | - Signs/Symptoms,
7 | - Procedures,
8 | - Diseases/Disorders and
9 | - Medications.
10 |
11 | **Input:** `Plain Text File` **Output:** `XMI File`
12 |
13 | **Orginal wiki page:** https://cwiki.apache.org/confluence/display/CTAKES/Default+Clinical+Pipeline
14 |
15 | ### cTAKES Install Instructions ###
16 |
17 | 1. Execute `./ctakes-install/ctakes-install.sh`, which will:
18 | - Download a copy of cTAKES into `./ctakes-install/tmp`
19 | - Extract cTAKES then copy into `ctakes-install (cTAKES_HOME)` directory
20 | - Download `ctakes-resources-4.0-bin.zip` into `./ctakes-install/tmp`
21 | - Unzip `ctakes-resources-4.0-bin.zip` and copy into `apache-ctakes-4.0.0/resources`
22 | - Remove/clean `/tmp` directory from `ctakes-install`
23 | - Set your UMLS credentials in `umls.sh`
24 |
25 | Note: If you don't have a UMLS username & password you'll need to request one at https://uts.nlm.nih.gov/license.html
26 |
27 | ### Setting up Python RESTful API Instructions ###
28 |
29 | COMING SOON!
30 |
31 |
32 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/__init__.py
--------------------------------------------------------------------------------
/ctakes-install/ctakes-server-setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
--------------------------------------------------------------------------------
/ctakes-install/ctakes-user-install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #TODO: Add better error handling/troubleshooting.
4 |
5 | ### Script Beginning ###
6 |
7 | PWD=$(pwd)
8 | ORIG=$(echo $PWD/$(dirname $0) | sed 's#/\.##')
9 | cTAKES_HOME="$ORIG/apache-ctakes-4.0.0"
10 |
11 | #FIXME: Fix output formatting
12 | progressfilt ()
13 | {
14 | local flag=false c count cr=$'\r' nl=$'\n'
15 | while IFS='' read -d '' -rn 1 c
16 |
17 | do
18 | if $flag
19 | then
20 | printf '%c' "$c"
21 | else
22 | if [[ $c != $cr && $c != $nl ]]
23 | then
24 | count=0
25 | else
26 | ((count++))
27 | if ((count > 1))
28 | then
29 | flag=true
30 | fi
31 | fi
32 | fi
33 | done
34 | }
35 |
36 | printf "\n\033[92m\u0F36\033[0m Install directory: $cTAKES_HOME \n"
37 |
38 | ### Checking for dependencies ###
39 |
40 | printf "\n\033[92m\u0F36\033[0m Checking for dependencies...\n"
41 |
42 | # Jave Check #
43 |
44 | if type -p java 2>&1 >/dev/null; then
45 | _java=java
46 | elif [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]]; then
47 | _java="$JAVA_HOME/bin/java" 2>&1 >/dev/null
48 | else
49 | printf "\n \u2573 Java wasn't found. Please install Java 1.8 or greater and try again!"
50 | exit 1
51 | fi
52 |
53 | if [[ "$_java" ]]; then
54 | version=$("$_java" -version 2>&1 | awk -F '"' '/version/ {print $2}')
55 | if [[ "$version" > "1.8" ]] || [[ "$version" > "10.0.0" ]]; then
56 | printf "\n \033[92m\u2713\033[0m Java 1.8 or greater is installed!\n"
57 | else
58 | printf "\n \033[91m\u2573\033[0m Current Java version is $version please upgrade to Java 1.8 or greater!\n"
59 | exit 1
60 | fi
61 | fi
62 |
63 | # Warn if install exists #
64 |
65 | if [ -d "$CTAKES_HOME" ]; then
66 | printf "\n \033[91m\u2573\033[0m cTakes install already exists!\n\n"
67 | exit 1
68 | fi
69 |
70 | # Download cTAKES user install file linux #
71 | if [ ! -d "$CTAKES_HOME" ]; then
72 | printf "\n\033[92m\u0F36\033[0m Downloading: apache-ctakes-4.0.0-bin.tar.gz\n\n"
73 |
74 | wget --progress=bar:force http://www-eu.apache.org/dist/ctakes/ctakes-4.0.0/apache-ctakes-4.0.0-bin.tar.gz -P "$ORIG/tmp/" 2>&1 | progressfilt
75 | tar -xvf $ORIG/tmp/apache-ctakes-4.0.0-bin.tar.gz -C $ORIG/$CTAKES_HOME
76 | fi
77 |
78 | # Get resource files #
79 |
80 | printf "\n\033[92m\u0F36\033[0m Downloading: ctakes-resources-4.0.0-bin.zip\n\n"
81 | cd $ORIG/tmp
82 | wget --progress=bar:force http://sourceforge.net/projects/ctakesresources/files/ctakes-resources-4.0-bin.zip -P "$ORIG/tmp/" 2>&1 | progressfilt
83 |
84 | printf "\033[92m\u0F36\033[0m Unzipping and moving resource files...\n\n"
85 | unzip ctakes-resources-4.0-bin.zip
86 | cp -R $ORIG/tmp/resources/* $ORIG/apache-ctakes-4.0.0/resources
87 | rm -r $ORIG/tmp/
88 |
89 | # Update UMLS Credentials #
90 | if [ ! -f $PWD/umls.sh ]; then
91 | read -r -p "
92 | ༶ Add UMLS credentials? [y/N] " response
93 | response=${response,,}
94 |
95 | cd ../
96 |
97 | if [[ "$response" =~ ^(yes|y)$ ]];
98 | then
99 | touch $PWD/umls.sh
100 | printf "#!/bin/bash \n\nUMLS_USERNAME=\"SAMPLE_USER\"\nUMLS_PASSWORD=\"SAMPLE_PASSWORD\"\n\nexport UMLS_USERNAME\nexport UMLS_PASSWORD" >> $PWD/umls.sh
101 | chmod +x $PWD/umls.sh
102 |
103 | read -r -p "༶ Username: `echo $'\n> '`" username
104 | username=${username,,}
105 |
106 | set_password() {
107 |
108 | read -rs -p "༶ Password: `echo $'\n> '`" password_1
109 | password_1=${password_1}
110 |
111 | read -rs -p "`echo $'\r'`༶ Verify Password: `echo $'\n> '`" password_2
112 | password_2=${password_2}
113 |
114 | if [[ $password_1 = $password_2 ]];then
115 |
116 | sed -i -e "s/SAMPLE_USER/$username/g" $PWD/umls.sh
117 | sed -i -e "s/SAMPLE_PASSWORD/$password_1/g" $PWD/umls.sh
118 |
119 | else
120 | printf "\n༶ Password mismatch try again...\n"
121 | set_password
122 | fi
123 | }
124 | set_password
125 | printf "\n\033[92m\u0F36\033[0m UMLS credentials updated!\n"
126 | else
127 | printf "\n\033[92m\u0F36\033[0m No worries you can add them manually later!\n"
128 | fi
129 | fi
130 | printf "\n\u0FC9 DONE!\n\n"
--------------------------------------------------------------------------------
/ctakes-install/log4j.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/ctakes-install/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # Requires JAVA JDK 1.8+
4 |
5 | # Check for UMLS credentials
6 | if [ ! -f $PWD/ctakes-install/umls.sh ]; then
7 | printf "\033[91mERROR:\033[0m You need to provide UMLS credentials in the file ./umls.sh" 1>&2
8 | exit 1
9 | else
10 | # Source UMLS credentials
11 | printf "\033[92m\u0F36\033[0m UMLS credentials file confirmed!\n\n"
12 | . ./ctakes-install/umls.sh
13 | fi
14 |
15 | # Only set CTAKES_HOME if not already set
16 | [ -z "$CTAKES_HOME" ] && CTAKES_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/apache-ctakes-4.0.0"
17 | cd $CTAKES_HOME
18 |
19 | # Launch
20 |
21 | bin/runClinicalPipeline.sh -i /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_input --xmiOut /home/a1d3n/Desktop/cTAKES-Python-API/ctakes-test/ctakes_output --user $UMLS_USERNAME --pass $UMLS_PASSWORD
22 |
--------------------------------------------------------------------------------
/ctakes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Handling cTAKES
5 | #
6 | # 2013-05-14 Created by Pascal Pfiffner
7 | #
8 |
9 | import os
10 | import logging
11 | import codecs
12 | import inspect
13 |
14 | from xml.dom.minidom import parse
15 | from subprocess import call
16 |
17 | from nlp import NLPProcessing, list_to_sentences
18 |
19 | class cTAKES(NLPProcessing):
20 | def __init__(self, object):
21 |
22 | # print('(ctakes.py) Object being used:', object)
23 | super().__init__()
24 |
25 | self.name = 'ctakes'
26 | self.bin = os.path.dirname(os.path.abspath('%s/../' % inspect.getfile(inspect.currentframe())))
27 | self.root = object['root']
28 | self.cleanup = object['cleanup']
29 |
30 | # print('(ctakes.py) Self name:', self.name)
31 | # print('(ctakes.py) Self bin:', self.bin)
32 | # print('(ctakes.py) Self root:', self.root)
33 | # print('(ctakes.py) Self cleanup:', self.cleanup, '\n')
34 |
35 | @property
36 | def _in_dir(self):
37 | return os.path.join(self.root, 'ctakes_input')
38 |
39 | @property
40 | def _out_dir(self):
41 | return os.path.join(self.root, 'ctakes_output')
42 |
43 | def _create_directories_if_needed(self):
44 | in_dir = self._in_dir
45 | out_dir = self._out_dir
46 | if not os.path.exists(in_dir):
47 | os.mkdir(in_dir)
48 | if not os.path.exists(out_dir):
49 | os.mkdir(out_dir)
50 |
51 | def _run(self):
52 | if call(['{}/cTAKES-Python-API/ctakes-install/run.sh'.format(self.bin)]) > 0:
53 | raise Exception('Error running cTakes')
54 |
55 | def _write_input(self, text, filename):
56 | if text is None \
57 | or len(text)< 1 \
58 | or filename is None:
59 | return False
60 |
61 | in_dir = os.path.join(
62 | self.root if self.root is not None else '.', 'ctakes_input')
63 | if not os.path.exists(in_dir):
64 | logging.error(
65 | "The input directory for cTAKES at %s does not exist" % in_dir)
66 | return False
67 |
68 | infile = os.path.join(in_dir, filename)
69 | if os.path.exists(infile):
70 | return False
71 |
72 | # write it
73 | with codecs.open(infile, 'w', 'utf-8') as handle:
74 | handle.write(list_to_sentences(text))
75 |
76 | return True
77 |
78 | def _parse_output(self, filename, **kwargs):
79 | """ Parse cTAKES XML output. """
80 |
81 | if filename is None:
82 | return None
83 |
84 | # is there cTAKES output?
85 | root = self.root if self.root is not None else '.'
86 | out_dir = os.path.join(root, 'ctakes_output')
87 | if not os.path.exists(out_dir):
88 | logging.error(
89 | "The output directory for cTAKES at %s does not exist" % out_dir)
90 | return None
91 |
92 | outfile = os.path.join(out_dir, "%s.xmi" % filename)
93 | if not os.path.exists(outfile):
94 | # do not log here and silently fail
95 | return None
96 |
97 | snomeds = []
98 | cuis = []
99 | rxnorms = []
100 |
101 | # parse XMI file
102 | root = parse(outfile).documentElement
103 |
104 | # get all "textsem:EntityMention" which store negation information
105 | neg_ids = []
106 | for node in root.getElementsByTagName('textsem:EntityMention'):
107 | polarity = node.attributes.get('polarity')
108 | if polarity is not None and int(polarity.value) < 0:
109 | ids = node.attributes.get('ontologyConceptArr')
110 | if ids is not None and ids.value:
111 | neg_ids.extend([int(i) for i in ids.value.split()])
112 |
113 | # pluck apart nodes that carry codified data ("refsem" namespace)
114 | code_nodes = root.getElementsByTagNameNS(
115 | 'http:///org/apache/ctakes/typesystem/type/refsem.ecore', '*')
116 | if len(code_nodes) > 0:
117 | for node in code_nodes:
118 | #print node.toprettyxml()
119 |
120 | # check if this node is negated
121 | is_neg = False
122 | node_id_attr = node.attributes.get('xmi:id')
123 | if node_id_attr is not None:
124 | is_neg = int(node_id_attr.value) in neg_ids
125 |
126 | # extract SNOMED and RxNORM
127 | if 'codingScheme' in node.attributes.keys() \
128 | and 'code' in node.attributes.keys():
129 | code = node.attributes['code'].value
130 | if is_neg:
131 | code = "-%s" % code
132 |
133 | # extract SNOMED code
134 | if 'SNOMED' == node.attributes['codingScheme'].value:
135 | snomeds.append(code)
136 |
137 | # extract RXNORM code
138 | elif 'RXNORM' == node.attributes['codingScheme'].value:
139 | rxnorms.append(code)
140 |
141 | # extract UMLS CUI
142 | if 'cui' in node.attributes.keys():
143 | code = node.attributes['cui'].value
144 | if is_neg:
145 | code = "-%s" % code
146 | cuis.append(code)
147 |
148 | # make lists unique
149 | snomeds = list(set(snomeds))
150 | cuis = list(set(cuis))
151 | rxnorms = list(set(rxnorms))
152 |
153 | # clean up if instructed to do so
154 | if self.cleanup:
155 | os.remove(outfile)
156 |
157 | in_dir = os.path.join(root, 'ctakes_input')
158 | infile = os.path.join(in_dir, filename)
159 | if os.path.exists(infile):
160 | os.remove(infile)
161 |
162 | # create and return a dictionary (don't filter empty lists)
163 | ret = {
164 | 'snomed': snomeds,
165 | 'cui': cuis,
166 | 'rxnorm': rxnorms
167 | }
168 |
169 | return ret
170 |
171 | # we can execute this file to do some testing
172 | if '__main__' == __name__:
173 |
174 | ### Defines directory to run
175 | # print('(ctakes.py) creating directory_obj')
176 | directory_obj = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ctakes-test')
177 | # print('(ctakes.py) directory_obj = ', directory_obj)
178 |
179 | ### Starts cTAKES class defining root directory as run_dir & cleanup as True
180 | # print('(ctakes.py) Starting my_ctakes')
181 | my_ctakes = cTAKES({'root': directory_obj, 'cleanup': True})
182 |
183 | my_ctakes.prepare()
184 |
185 | # create a test input file
186 | with open(os.path.join(my_ctakes.root, 'ctakes_input/test.txt'), 'w') as handle:
187 | handle.write("History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficientncy")
188 |
189 | # run
190 | print("\n\033[92m\u0F36 \033[0mStarting cTAKES Java Application...\n")
191 | try:
192 | my_ctakes.run()
193 | print("\n\033[92m\u263A \033[0mDONE!\n")
194 | except Exception as e:
195 | print("\033[91mFAILED:\033[0m {}\n".format(e))
196 |
197 |
198 |
199 |
200 |
201 |
202 |
--------------------------------------------------------------------------------
/databases/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !*.sh
4 |
5 |
--------------------------------------------------------------------------------
/databases/rxnorm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # create an RxNORM SQLite database (and a relations triple store).
4 | #
5 |
6 | # our SQLite database does not exist
7 | if [ ! -e rxnorm.db ]; then
8 | if [ ! -d "$1" ]; then
9 | echo "Provide the path to the RxNorm directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
10 | exit 1
11 | fi
12 | if [ ! -d "$1/rrf" ]; then
13 | echo "There is no directory named rrf in the directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
14 | exit 1
15 | fi
16 |
17 | # init the database
18 | cat "$1/scripts/mysql/Table_scripts_mysql_rxn.sql" | sqlite3 rxnorm.db
19 |
20 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
21 | if [ ! -e "$1/rrf/RXNREL.pipe" ]; then
22 | current=$(pwd)
23 | cd "$1/rrf"
24 | echo "-> Converting RRF files for SQLite"
25 | for f in *.RRF; do
26 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
27 | done
28 | cd $current
29 | fi
30 |
31 | # import tables
32 | for f in "$1/rrf/"*.pipe; do
33 | table=$(basename ${f%.pipe})
34 | echo "-> Importing $table"
35 | sqlite3 rxnorm.db ".import '$f' '$table'"
36 | done
37 |
38 | # create an NDC table
39 | echo "-> Creating NDC table"
40 | # sqlite3 rxnorm.db "CREATE TABLE NDC AS SELECT RXCUI, ATV AS NDC FROM RXNSAT WHERE ATN = 'NDC';" # we do it in 2 steps to create the primary index column
41 | sqlite3 rxnorm.db "CREATE TABLE NDC (RXCUI INT, NDC VARCHAR);"
42 | sqlite3 rxnorm.db "INSERT INTO NDC SELECT RXCUI, ATV FROM RXNSAT WHERE ATN = 'NDC';"
43 | sqlite3 rxnorm.db "CREATE INDEX X_RXCUI ON NDC (RXCUI);"
44 | sqlite3 rxnorm.db "CREATE INDEX X_NDC ON NDC (NDC);"
45 |
46 | # some SQLite gems
47 | ## export NDC to CSV
48 | # SELECT RXCUI, NDC FROM NDC INTO OUTFILE 'ndc.csv' FIELDS TERMINATED BY ',' LINES TERMINATED BY "\n";
49 | ## export RxNorm-only names with their type (TTY) to CSV
50 | # SELECT RXCUI, TTY, STR FROM RXNCONSO WHERE SAB = 'RXNORM' INTO OUTFILE 'names.csv' FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY "\n";
51 | fi
52 |
53 | # dump to N-Triples
54 | exit 0
55 | sqlite3 rxnorm.db < ." FROM RXNREL WHERE RELA != '';
61 | SQLITE_COMMAND
62 |
63 |
--------------------------------------------------------------------------------
/databases/umls.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # create a UMLS SQLite database.
4 | #
5 |
6 | # our SQLite database does not exist
7 | if [ ! -e umls.db ]; then
8 | if [ ! -d "$1" ]; then
9 | echo "Provide the path to the UMLS install directory as first argument when invoking this script. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html (should check which file is needed)"
10 | exit 1
11 | fi
12 | if [ ! -d "$1/META" ]; then
13 | echo "There is no directory named META in the install directory you provided. Download the latest version here: http://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html"
14 | exit 1
15 | fi
16 |
17 | # convert RRF files (strip last pipe and remove quote (") characters, those are giving SQLite troubles)
18 | if [ ! -e "$1/META/MRDEF.pipe" ]; then
19 | current=$(pwd)
20 | cd "$1/META"
21 | echo "-> Converting RRF files for SQLite"
22 | for f in MRCONSO.RRF MRDEF.RRF MRSTY.RRF; do
23 | sed -e 's/.$//' -e 's/"//g' "$f" > "${f%RRF}pipe"
24 | done
25 | cd $current
26 | fi
27 |
28 | # init the database for MRDEF
29 | # table structure here: http://www.ncbi.nlm.nih.gov/books/NBK9685/
30 | sqlite3 umls.db "CREATE TABLE MRDEF (
31 | CUI varchar,
32 | AUI varchar,
33 | ATUI varchar,
34 | SATUI varchar,
35 | SAB varchar,
36 | DEF text,
37 | SUPPRESS varchar,
38 | CVF varchar
39 | )"
40 |
41 | # init the database for MRCONSO
42 | sqlite3 umls.db "CREATE TABLE MRCONSO (
43 | CUI varchar,
44 | LAT varchar,
45 | TS varchar,
46 | LUI varchar,
47 | STT varchar,
48 | SUI varchar,
49 | ISPREF varchar,
50 | AUI varchar,
51 | SAUI varchar,
52 | SCUI varchar,
53 | SDUI varchar,
54 | SAB varchar,
55 | TTY varchar,
56 | CODE varchar,
57 | STR text,
58 | SRL varchar,
59 | SUPPRESS varchar,
60 | CVF varchar
61 | )"
62 |
63 | # init the database for MRSTY
64 | sqlite3 umls.db "CREATE TABLE MRSTY (
65 | CUI varchar,
66 | TUI varchar,
67 | STN varchar,
68 | STY text,
69 | ATUI varchar,
70 | CVF varchar
71 | )"
72 |
73 | # import tables
74 | for f in "$1/META/"*.pipe; do
75 | table=$(basename ${f%.pipe})
76 | echo "-> Importing $table"
77 | sqlite3 umls.db ".import '$f' '$table'"
78 | done
79 |
80 | # create indexes
81 | echo "-> Creating indexes"
82 | sqlite3 umls.db "CREATE INDEX X_CUI_MRDEF ON MRDEF (CUI);"
83 | sqlite3 umls.db "CREATE INDEX X_SAB_MRDEF ON MRDEF (SAB);"
84 | sqlite3 umls.db "CREATE INDEX X_CUI_MRCONSO ON MRCONSO (CUI);"
85 | sqlite3 umls.db "CREATE INDEX X_LAT_MRCONSO ON MRCONSO (LAT);"
86 | sqlite3 umls.db "CREATE INDEX X_TS_MRCONSO ON MRCONSO (TS);"
87 | sqlite3 umls.db "CREATE INDEX X_CUI_MRSTY ON MRSTY (CUI);"
88 | sqlite3 umls.db "CREATE INDEX X_TUI_MRSTY ON MRSTY (TUI);"
89 |
90 | # create faster lookup table
91 | echo "-> Creating fast lookup table"
92 | sqlite3 umls.db "CREATE TABLE descriptions AS SELECT CUI, LAT, SAB, TTY, STR FROM MRCONSO WHERE LAT = 'ENG' AND TS = 'P' AND ISPREF = 'Y'"
93 | sqlite3 umls.db "ALTER TABLE descriptions ADD COLUMN STY TEXT"
94 | sqlite3 umls.db "CREATE INDEX X_CUI_desc ON descriptions (CUI)"
95 | sqlite3 umls.db "UPDATE descriptions SET STY = (SELECT GROUP_CONCAT(MRSTY.TUI, '|') FROM MRSTY WHERE MRSTY.CUI = descriptions.CUI GROUP BY MRSTY.CUI)"
96 | else
97 | echo "=> umls.db already exists"
98 | fi
99 |
100 |
--------------------------------------------------------------------------------
/dateutil/LICENSE:
--------------------------------------------------------------------------------
1 | A. HISTORY OF THE SOFTWARE
2 | ==========================
3 |
4 | Python was created in the early 1990s by Guido van Rossum at Stichting
5 | Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
6 | as a successor of a language called ABC. Guido remains Python's
7 | principal author, although it includes many contributions from others.
8 |
9 | In 1995, Guido continued his work on Python at the Corporation for
10 | National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
11 | in Reston, Virginia where he released several versions of the
12 | software.
13 |
14 | In May 2000, Guido and the Python core development team moved to
15 | BeOpen.com to form the BeOpen PythonLabs team. In October of the same
16 | year, the PythonLabs team moved to Digital Creations (now Zope
17 | Corporation, see http://www.zope.com). In 2001, the Python Software
18 | Foundation (PSF, see http://www.python.org/psf/) was formed, a
19 | non-profit organization created specifically to own Python-related
20 | Intellectual Property. Zope Corporation is a sponsoring member of
21 | the PSF.
22 |
23 | All Python releases are Open Source (see http://www.opensource.org for
24 | the Open Source Definition). Historically, most, but not all, Python
25 | releases have also been GPL-compatible; the table below summarizes
26 | the various releases.
27 |
28 | Release Derived Year Owner GPL-
29 | from compatible? (1)
30 |
31 | 0.9.0 thru 1.2 1991-1995 CWI yes
32 | 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
33 | 1.6 1.5.2 2000 CNRI no
34 | 2.0 1.6 2000 BeOpen.com no
35 | 1.6.1 1.6 2001 CNRI yes (2)
36 | 2.1 2.0+1.6.1 2001 PSF no
37 | 2.0.1 2.0+1.6.1 2001 PSF yes
38 | 2.1.1 2.1+2.0.1 2001 PSF yes
39 | 2.2 2.1.1 2001 PSF yes
40 | 2.1.2 2.1.1 2002 PSF yes
41 | 2.1.3 2.1.2 2002 PSF yes
42 | 2.2.1 2.2 2002 PSF yes
43 | 2.2.2 2.2.1 2002 PSF yes
44 | 2.2.3 2.2.2 2003 PSF yes
45 | 2.3 2.2.2 2002-2003 PSF yes
46 |
47 | Footnotes:
48 |
49 | (1) GPL-compatible doesn't mean that we're distributing Python under
50 | the GPL. All Python licenses, unlike the GPL, let you distribute
51 | a modified version without making your changes open source. The
52 | GPL-compatible licenses make it possible to combine Python with
53 | other software that is released under the GPL; the others don't.
54 |
55 | (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
56 | because its license has a choice of law clause. According to
57 | CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
58 | is "not incompatible" with the GPL.
59 |
60 | Thanks to the many outside volunteers who have worked under Guido's
61 | direction to make these releases possible.
62 |
63 |
64 | B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
65 | ===============================================================
66 |
67 | PSF LICENSE AGREEMENT FOR PYTHON 2.3
68 | ------------------------------------
69 |
70 | 1. This LICENSE AGREEMENT is between the Python Software Foundation
71 | ("PSF"), and the Individual or Organization ("Licensee") accessing and
72 | otherwise using Python 2.3 software in source or binary form and its
73 | associated documentation.
74 |
75 | 2. Subject to the terms and conditions of this License Agreement, PSF
76 | hereby grants Licensee a nonexclusive, royalty-free, world-wide
77 | license to reproduce, analyze, test, perform and/or display publicly,
78 | prepare derivative works, distribute, and otherwise use Python 2.3
79 | alone or in any derivative version, provided, however, that PSF's
80 | License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
81 | 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
82 | retained in Python 2.3 alone or in any derivative version prepared by
83 | Licensee.
84 |
85 | 3. In the event Licensee prepares a derivative work that is based on
86 | or incorporates Python 2.3 or any part thereof, and wants to make
87 | the derivative work available to others as provided herein, then
88 | Licensee hereby agrees to include in any such work a brief summary of
89 | the changes made to Python 2.3.
90 |
91 | 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
92 | basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
93 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
94 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
95 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
96 | INFRINGE ANY THIRD PARTY RIGHTS.
97 |
98 | 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
99 | 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
100 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
101 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
102 |
103 | 6. This License Agreement will automatically terminate upon a material
104 | breach of its terms and conditions.
105 |
106 | 7. Nothing in this License Agreement shall be deemed to create any
107 | relationship of agency, partnership, or joint venture between PSF and
108 | Licensee. This License Agreement does not grant permission to use PSF
109 | trademarks or trade name in a trademark sense to endorse or promote
110 | products or services of Licensee, or any third party.
111 |
112 | 8. By copying, installing or otherwise using Python 2.3, Licensee
113 | agrees to be bound by the terms and conditions of this License
114 | Agreement.
115 |
116 |
117 | BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
118 | -------------------------------------------
119 |
120 | BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
121 |
122 | 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
123 | office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
124 | Individual or Organization ("Licensee") accessing and otherwise using
125 | this software in source or binary form and its associated
126 | documentation ("the Software").
127 |
128 | 2. Subject to the terms and conditions of this BeOpen Python License
129 | Agreement, BeOpen hereby grants Licensee a non-exclusive,
130 | royalty-free, world-wide license to reproduce, analyze, test, perform
131 | and/or display publicly, prepare derivative works, distribute, and
132 | otherwise use the Software alone or in any derivative version,
133 | provided, however, that the BeOpen Python License is retained in the
134 | Software, alone or in any derivative version prepared by Licensee.
135 |
136 | 3. BeOpen is making the Software available to Licensee on an "AS IS"
137 | basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
138 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
139 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
140 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
141 | INFRINGE ANY THIRD PARTY RIGHTS.
142 |
143 | 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
144 | SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
145 | AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
146 | DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
147 |
148 | 5. This License Agreement will automatically terminate upon a material
149 | breach of its terms and conditions.
150 |
151 | 6. This License Agreement shall be governed by and interpreted in all
152 | respects by the law of the State of California, excluding conflict of
153 | law provisions. Nothing in this License Agreement shall be deemed to
154 | create any relationship of agency, partnership, or joint venture
155 | between BeOpen and Licensee. This License Agreement does not grant
156 | permission to use BeOpen trademarks or trade names in a trademark
157 | sense to endorse or promote products or services of Licensee, or any
158 | third party. As an exception, the "BeOpen Python" logos available at
159 | http://www.pythonlabs.com/logos.html may be used according to the
160 | permissions granted on that web page.
161 |
162 | 7. By copying, installing or otherwise using the software, Licensee
163 | agrees to be bound by the terms and conditions of this License
164 | Agreement.
165 |
166 |
167 | CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
168 | ---------------------------------------
169 |
170 | 1. This LICENSE AGREEMENT is between the Corporation for National
171 | Research Initiatives, having an office at 1895 Preston White Drive,
172 | Reston, VA 20191 ("CNRI"), and the Individual or Organization
173 | ("Licensee") accessing and otherwise using Python 1.6.1 software in
174 | source or binary form and its associated documentation.
175 |
176 | 2. Subject to the terms and conditions of this License Agreement, CNRI
177 | hereby grants Licensee a nonexclusive, royalty-free, world-wide
178 | license to reproduce, analyze, test, perform and/or display publicly,
179 | prepare derivative works, distribute, and otherwise use Python 1.6.1
180 | alone or in any derivative version, provided, however, that CNRI's
181 | License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
182 | 1995-2001 Corporation for National Research Initiatives; All Rights
183 | Reserved" are retained in Python 1.6.1 alone or in any derivative
184 | version prepared by Licensee. Alternately, in lieu of CNRI's License
185 | Agreement, Licensee may substitute the following text (omitting the
186 | quotes): "Python 1.6.1 is made available subject to the terms and
187 | conditions in CNRI's License Agreement. This Agreement together with
188 | Python 1.6.1 may be located on the Internet using the following
189 | unique, persistent identifier (known as a handle): 1895.22/1013. This
190 | Agreement may also be obtained from a proxy server on the Internet
191 | using the following URL: http://hdl.handle.net/1895.22/1013".
192 |
193 | 3. In the event Licensee prepares a derivative work that is based on
194 | or incorporates Python 1.6.1 or any part thereof, and wants to make
195 | the derivative work available to others as provided herein, then
196 | Licensee hereby agrees to include in any such work a brief summary of
197 | the changes made to Python 1.6.1.
198 |
199 | 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
200 | basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
201 | IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
202 | DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
203 | FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
204 | INFRINGE ANY THIRD PARTY RIGHTS.
205 |
206 | 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
207 | 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
208 | A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
209 | OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
210 |
211 | 6. This License Agreement will automatically terminate upon a material
212 | breach of its terms and conditions.
213 |
214 | 7. This License Agreement shall be governed by the federal
215 | intellectual property law of the United States, including without
216 | limitation the federal copyright law, and, to the extent such
217 | U.S. federal law does not apply, by the law of the Commonwealth of
218 | Virginia, excluding Virginia's conflict of law provisions.
219 | Notwithstanding the foregoing, with regard to derivative works based
220 | on Python 1.6.1 that incorporate non-separable material that was
221 | previously distributed under the GNU General Public License (GPL), the
222 | law of the Commonwealth of Virginia shall govern this License
223 | Agreement only as to issues arising under or with respect to
224 | Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
225 | License Agreement shall be deemed to create any relationship of
226 | agency, partnership, or joint venture between CNRI and Licensee. This
227 | License Agreement does not grant permission to use CNRI trademarks or
228 | trade name in a trademark sense to endorse or promote products or
229 | services of Licensee, or any third party.
230 |
231 | 8. By clicking on the "ACCEPT" button where indicated, or by copying,
232 | installing or otherwise using Python 1.6.1, Licensee agrees to be
233 | bound by the terms and conditions of this License Agreement.
234 |
235 | ACCEPT
236 |
237 |
238 | CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
239 | --------------------------------------------------
240 |
241 | Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
242 | The Netherlands. All rights reserved.
243 |
244 | Permission to use, copy, modify, and distribute this software and its
245 | documentation for any purpose and without fee is hereby granted,
246 | provided that the above copyright notice appear in all copies and that
247 | both that copyright notice and this permission notice appear in
248 | supporting documentation, and that the name of Stichting Mathematisch
249 | Centrum or CWI not be used in advertising or publicity pertaining to
250 | distribution of the software without specific, written prior
251 | permission.
252 |
253 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
254 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
255 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
256 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
257 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
258 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
259 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
260 |
--------------------------------------------------------------------------------
/dateutil/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2003-2010 Gustavo Niemeyer
3 |
4 | This module offers extensions to the standard python 2.3+
5 | datetime module.
6 | """
7 | __author__ = "Gustavo Niemeyer "
8 | __license__ = "PSF License"
9 | __version__ = "1.5"
10 |
--------------------------------------------------------------------------------
/dateutil/easter.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2003-2007 Gustavo Niemeyer
3 |
4 | This module offers extensions to the standard python 2.3+
5 | datetime module.
6 | """
7 | __author__ = "Gustavo Niemeyer "
8 | __license__ = "PSF License"
9 |
10 | import datetime
11 |
12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
13 |
14 | EASTER_JULIAN = 1
15 | EASTER_ORTHODOX = 2
16 | EASTER_WESTERN = 3
17 |
18 | def easter(year, method=EASTER_WESTERN):
19 | """
20 | This method was ported from the work done by GM Arts,
21 | on top of the algorithm by Claus Tondering, which was
22 | based in part on the algorithm of Ouding (1940), as
23 | quoted in "Explanatory Supplement to the Astronomical
24 | Almanac", P. Kenneth Seidelmann, editor.
25 |
26 | This algorithm implements three different easter
27 | calculation methods:
28 |
29 | 1 - Original calculation in Julian calendar, valid in
30 | dates after 326 AD
31 | 2 - Original method, with date converted to Gregorian
32 | calendar, valid in years 1583 to 4099
33 | 3 - Revised method, in Gregorian calendar, valid in
34 | years 1583 to 4099 as well
35 |
36 | These methods are represented by the constants:
37 |
38 | EASTER_JULIAN = 1
39 | EASTER_ORTHODOX = 2
40 | EASTER_WESTERN = 3
41 |
42 | The default method is method 3.
43 |
44 | More about the algorithm may be found at:
45 |
46 | http://users.chariot.net.au/~gmarts/eastalg.htm
47 |
48 | and
49 |
50 | http://www.tondering.dk/claus/calendar.html
51 |
52 | """
53 |
54 | if not (1 <= method <= 3):
55 | raise ValueError, "invalid method"
56 |
57 | # g - Golden year - 1
58 | # c - Century
59 | # h - (23 - Epact) mod 30
60 | # i - Number of days from March 21 to Paschal Full Moon
61 | # j - Weekday for PFM (0=Sunday, etc)
62 | # p - Number of days from March 21 to Sunday on or before PFM
63 | # (-6 to 28 methods 1 & 3, to 56 for method 2)
64 | # e - Extra days to add for method 2 (converting Julian
65 | # date to Gregorian date)
66 |
67 | y = year
68 | g = y % 19
69 | e = 0
70 | if method < 3:
71 | # Old method
72 | i = (19*g+15)%30
73 | j = (y+y//4+i)%7
74 | if method == 2:
75 | # Extra dates to convert Julian to Gregorian date
76 | e = 10
77 | if y > 1600:
78 | e = e+y//100-16-(y//100-16)//4
79 | else:
80 | # New method
81 | c = y//100
82 | h = (c-c//4-(8*c+13)//25+19*g+15)%30
83 | i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
84 | j = (y+y//4+i+2-c+c//4)%7
85 |
86 | # p can be from -6 to 56 corresponding to dates 22 March to 23 May
87 | # (later dates apply to method 2, although 23 May never actually occurs)
88 | p = i-j+e
89 | d = 1+(p+27+(p+6)//40)%31
90 | m = 3+(p+26)//30
91 | return datetime.date(int(y),int(m),int(d))
92 |
93 |
--------------------------------------------------------------------------------
/dateutil/parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kenpachiii/cTAKES-Python-API/7e051443982537ba59dc1e70ca360b079552c46e/dateutil/parser.py
--------------------------------------------------------------------------------
/dateutil/relativedelta.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2003-2010 Gustavo Niemeyer
3 |
4 | This module offers extensions to the standard python 2.3+
5 | datetime module.
6 | """
7 | __author__ = "Gustavo Niemeyer "
8 | __license__ = "PSF License"
9 |
10 | import datetime
11 | import calendar
12 |
13 | __all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
14 |
15 | class weekday(object):
16 | __slots__ = ["weekday", "n"]
17 |
18 | def __init__(self, weekday, n=None):
19 | self.weekday = weekday
20 | self.n = n
21 |
22 | def __call__(self, n):
23 | if n == self.n:
24 | return self
25 | else:
26 | return self.__class__(self.weekday, n)
27 |
28 | def __eq__(self, other):
29 | try:
30 | if self.weekday != other.weekday or self.n != other.n:
31 | return False
32 | except AttributeError:
33 | return False
34 | return True
35 |
36 | def __repr__(self):
37 | s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
38 | if not self.n:
39 | return s
40 | else:
41 | return "%s(%+d)" % (s, self.n)
42 |
43 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
44 |
45 | class relativedelta:
46 | """
47 | The relativedelta type is based on the specification of the excelent
48 | work done by M.-A. Lemburg in his mx.DateTime extension. However,
49 | notice that this type does *NOT* implement the same algorithm as
50 | his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
51 |
52 | There's two different ways to build a relativedelta instance. The
53 | first one is passing it two date/datetime classes:
54 |
55 | relativedelta(datetime1, datetime2)
56 |
57 | And the other way is to use the following keyword arguments:
58 |
59 | year, month, day, hour, minute, second, microsecond:
60 | Absolute information.
61 |
62 | years, months, weeks, days, hours, minutes, seconds, microseconds:
63 | Relative information, may be negative.
64 |
65 | weekday:
66 | One of the weekday instances (MO, TU, etc). These instances may
67 | receive a parameter N, specifying the Nth weekday, which could
68 | be positive or negative (like MO(+1) or MO(-2). Not specifying
69 | it is the same as specifying +1. You can also use an integer,
70 | where 0=MO.
71 |
72 | leapdays:
73 | Will add given days to the date found, if year is a leap
74 | year, and the date found is post 28 of february.
75 |
76 | yearday, nlyearday:
77 | Set the yearday or the non-leap year day (jump leap days).
78 | These are converted to day/month/leapdays information.
79 |
80 | Here is the behavior of operations with relativedelta:
81 |
82 | 1) Calculate the absolute year, using the 'year' argument, or the
83 | original datetime year, if the argument is not present.
84 |
85 | 2) Add the relative 'years' argument to the absolute year.
86 |
87 | 3) Do steps 1 and 2 for month/months.
88 |
89 | 4) Calculate the absolute day, using the 'day' argument, or the
90 | original datetime day, if the argument is not present. Then,
91 | subtract from the day until it fits in the year and month
92 | found after their operations.
93 |
94 | 5) Add the relative 'days' argument to the absolute day. Notice
95 | that the 'weeks' argument is multiplied by 7 and added to
96 | 'days'.
97 |
98 | 6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
99 | microsecond/microseconds.
100 |
101 | 7) If the 'weekday' argument is present, calculate the weekday,
102 | with the given (wday, nth) tuple. wday is the index of the
103 | weekday (0-6, 0=Mon), and nth is the number of weeks to add
104 | forward or backward, depending on its signal. Notice that if
105 | the calculated date is already Monday, for example, using
106 | (0, 1) or (0, -1) won't change the day.
107 | """
108 |
109 | def __init__(self, dt1=None, dt2=None,
110 | years=0, months=0, days=0, leapdays=0, weeks=0,
111 | hours=0, minutes=0, seconds=0, microseconds=0,
112 | year=None, month=None, day=None, weekday=None,
113 | yearday=None, nlyearday=None,
114 | hour=None, minute=None, second=None, microsecond=None):
115 | if dt1 and dt2:
116 | if not isinstance(dt1, datetime.date) or \
117 | not isinstance(dt2, datetime.date):
118 | raise TypeError, "relativedelta only diffs datetime/date"
119 | if type(dt1) is not type(dt2):
120 | if not isinstance(dt1, datetime.datetime):
121 | dt1 = datetime.datetime.fromordinal(dt1.toordinal())
122 | elif not isinstance(dt2, datetime.datetime):
123 | dt2 = datetime.datetime.fromordinal(dt2.toordinal())
124 | self.years = 0
125 | self.months = 0
126 | self.days = 0
127 | self.leapdays = 0
128 | self.hours = 0
129 | self.minutes = 0
130 | self.seconds = 0
131 | self.microseconds = 0
132 | self.year = None
133 | self.month = None
134 | self.day = None
135 | self.weekday = None
136 | self.hour = None
137 | self.minute = None
138 | self.second = None
139 | self.microsecond = None
140 | self._has_time = 0
141 |
142 | months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
143 | self._set_months(months)
144 | dtm = self.__radd__(dt2)
145 | if dt1 < dt2:
146 | while dt1 > dtm:
147 | months += 1
148 | self._set_months(months)
149 | dtm = self.__radd__(dt2)
150 | else:
151 | while dt1 < dtm:
152 | months -= 1
153 | self._set_months(months)
154 | dtm = self.__radd__(dt2)
155 | delta = dt1 - dtm
156 | self.seconds = delta.seconds+delta.days*86400
157 | self.microseconds = delta.microseconds
158 | else:
159 | self.years = years
160 | self.months = months
161 | self.days = days+weeks*7
162 | self.leapdays = leapdays
163 | self.hours = hours
164 | self.minutes = minutes
165 | self.seconds = seconds
166 | self.microseconds = microseconds
167 | self.year = year
168 | self.month = month
169 | self.day = day
170 | self.hour = hour
171 | self.minute = minute
172 | self.second = second
173 | self.microsecond = microsecond
174 |
175 | if type(weekday) is int:
176 | self.weekday = weekdays[weekday]
177 | else:
178 | self.weekday = weekday
179 |
180 | yday = 0
181 | if nlyearday:
182 | yday = nlyearday
183 | elif yearday:
184 | yday = yearday
185 | if yearday > 59:
186 | self.leapdays = -1
187 | if yday:
188 | ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
189 | for idx, ydays in enumerate(ydayidx):
190 | if yday <= ydays:
191 | self.month = idx+1
192 | if idx == 0:
193 | self.day = yday
194 | else:
195 | self.day = yday-ydayidx[idx-1]
196 | break
197 | else:
198 | raise ValueError, "invalid year day (%d)" % yday
199 |
200 | self._fix()
201 |
202 | def _fix(self):
203 | if abs(self.microseconds) > 999999:
204 | s = self.microseconds//abs(self.microseconds)
205 | div, mod = divmod(self.microseconds*s, 1000000)
206 | self.microseconds = mod*s
207 | self.seconds += div*s
208 | if abs(self.seconds) > 59:
209 | s = self.seconds//abs(self.seconds)
210 | div, mod = divmod(self.seconds*s, 60)
211 | self.seconds = mod*s
212 | self.minutes += div*s
213 | if abs(self.minutes) > 59:
214 | s = self.minutes//abs(self.minutes)
215 | div, mod = divmod(self.minutes*s, 60)
216 | self.minutes = mod*s
217 | self.hours += div*s
218 | if abs(self.hours) > 23:
219 | s = self.hours//abs(self.hours)
220 | div, mod = divmod(self.hours*s, 24)
221 | self.hours = mod*s
222 | self.days += div*s
223 | if abs(self.months) > 11:
224 | s = self.months//abs(self.months)
225 | div, mod = divmod(self.months*s, 12)
226 | self.months = mod*s
227 | self.years += div*s
228 | if (self.hours or self.minutes or self.seconds or self.microseconds or
229 | self.hour is not None or self.minute is not None or
230 | self.second is not None or self.microsecond is not None):
231 | self._has_time = 1
232 | else:
233 | self._has_time = 0
234 |
235 | def _set_months(self, months):
236 | self.months = months
237 | if abs(self.months) > 11:
238 | s = self.months//abs(self.months)
239 | div, mod = divmod(self.months*s, 12)
240 | self.months = mod*s
241 | self.years = div*s
242 | else:
243 | self.years = 0
244 |
245 | def __radd__(self, other):
246 | if not isinstance(other, datetime.date):
247 | raise TypeError, "unsupported type for add operation"
248 | elif self._has_time and not isinstance(other, datetime.datetime):
249 | other = datetime.datetime.fromordinal(other.toordinal())
250 | year = (self.year or other.year)+self.years
251 | month = self.month or other.month
252 | if self.months:
253 | assert 1 <= abs(self.months) <= 12
254 | month += self.months
255 | if month > 12:
256 | year += 1
257 | month -= 12
258 | elif month < 1:
259 | year -= 1
260 | month += 12
261 | day = min(calendar.monthrange(year, month)[1],
262 | self.day or other.day)
263 | repl = {"year": year, "month": month, "day": day}
264 | for attr in ["hour", "minute", "second", "microsecond"]:
265 | value = getattr(self, attr)
266 | if value is not None:
267 | repl[attr] = value
268 | days = self.days
269 | if self.leapdays and month > 2 and calendar.isleap(year):
270 | days += self.leapdays
271 | ret = (other.replace(**repl)
272 | + datetime.timedelta(days=days,
273 | hours=self.hours,
274 | minutes=self.minutes,
275 | seconds=self.seconds,
276 | microseconds=self.microseconds))
277 | if self.weekday:
278 | weekday, nth = self.weekday.weekday, self.weekday.n or 1
279 | jumpdays = (abs(nth)-1)*7
280 | if nth > 0:
281 | jumpdays += (7-ret.weekday()+weekday)%7
282 | else:
283 | jumpdays += (ret.weekday()-weekday)%7
284 | jumpdays *= -1
285 | ret += datetime.timedelta(days=jumpdays)
286 | return ret
287 |
288 | def __rsub__(self, other):
289 | return self.__neg__().__radd__(other)
290 |
291 | def __add__(self, other):
292 | if not isinstance(other, relativedelta):
293 | raise TypeError, "unsupported type for add operation"
294 | return relativedelta(years=other.years+self.years,
295 | months=other.months+self.months,
296 | days=other.days+self.days,
297 | hours=other.hours+self.hours,
298 | minutes=other.minutes+self.minutes,
299 | seconds=other.seconds+self.seconds,
300 | microseconds=other.microseconds+self.microseconds,
301 | leapdays=other.leapdays or self.leapdays,
302 | year=other.year or self.year,
303 | month=other.month or self.month,
304 | day=other.day or self.day,
305 | weekday=other.weekday or self.weekday,
306 | hour=other.hour or self.hour,
307 | minute=other.minute or self.minute,
308 | second=other.second or self.second,
309 | microsecond=other.second or self.microsecond)
310 |
311 | def __sub__(self, other):
312 | if not isinstance(other, relativedelta):
313 | raise TypeError, "unsupported type for sub operation"
314 | return relativedelta(years=other.years-self.years,
315 | months=other.months-self.months,
316 | days=other.days-self.days,
317 | hours=other.hours-self.hours,
318 | minutes=other.minutes-self.minutes,
319 | seconds=other.seconds-self.seconds,
320 | microseconds=other.microseconds-self.microseconds,
321 | leapdays=other.leapdays or self.leapdays,
322 | year=other.year or self.year,
323 | month=other.month or self.month,
324 | day=other.day or self.day,
325 | weekday=other.weekday or self.weekday,
326 | hour=other.hour or self.hour,
327 | minute=other.minute or self.minute,
328 | second=other.second or self.second,
329 | microsecond=other.second or self.microsecond)
330 |
331 | def __neg__(self):
332 | return relativedelta(years=-self.years,
333 | months=-self.months,
334 | days=-self.days,
335 | hours=-self.hours,
336 | minutes=-self.minutes,
337 | seconds=-self.seconds,
338 | microseconds=-self.microseconds,
339 | leapdays=self.leapdays,
340 | year=self.year,
341 | month=self.month,
342 | day=self.day,
343 | weekday=self.weekday,
344 | hour=self.hour,
345 | minute=self.minute,
346 | second=self.second,
347 | microsecond=self.microsecond)
348 |
349 | def __nonzero__(self):
350 | return not (not self.years and
351 | not self.months and
352 | not self.days and
353 | not self.hours and
354 | not self.minutes and
355 | not self.seconds and
356 | not self.microseconds and
357 | not self.leapdays and
358 | self.year is None and
359 | self.month is None and
360 | self.day is None and
361 | self.weekday is None and
362 | self.hour is None and
363 | self.minute is None and
364 | self.second is None and
365 | self.microsecond is None)
366 |
367 | def __mul__(self, other):
368 | f = float(other)
369 | return relativedelta(years=self.years*f,
370 | months=self.months*f,
371 | days=self.days*f,
372 | hours=self.hours*f,
373 | minutes=self.minutes*f,
374 | seconds=self.seconds*f,
375 | microseconds=self.microseconds*f,
376 | leapdays=self.leapdays,
377 | year=self.year,
378 | month=self.month,
379 | day=self.day,
380 | weekday=self.weekday,
381 | hour=self.hour,
382 | minute=self.minute,
383 | second=self.second,
384 | microsecond=self.microsecond)
385 |
386 | def __eq__(self, other):
387 | if not isinstance(other, relativedelta):
388 | return False
389 | if self.weekday or other.weekday:
390 | if not self.weekday or not other.weekday:
391 | return False
392 | if self.weekday.weekday != other.weekday.weekday:
393 | return False
394 | n1, n2 = self.weekday.n, other.weekday.n
395 | if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
396 | return False
397 | return (self.years == other.years and
398 | self.months == other.months and
399 | self.days == other.days and
400 | self.hours == other.hours and
401 | self.minutes == other.minutes and
402 | self.seconds == other.seconds and
403 | self.leapdays == other.leapdays and
404 | self.year == other.year and
405 | self.month == other.month and
406 | self.day == other.day and
407 | self.hour == other.hour and
408 | self.minute == other.minute and
409 | self.second == other.second and
410 | self.microsecond == other.microsecond)
411 |
412 | def __ne__(self, other):
413 | return not self.__eq__(other)
414 |
415 | def __div__(self, other):
416 | return self.__mul__(1/float(other))
417 |
418 | def __repr__(self):
419 | l = []
420 | for attr in ["years", "months", "days", "leapdays",
421 | "hours", "minutes", "seconds", "microseconds"]:
422 | value = getattr(self, attr)
423 | if value:
424 | l.append("%s=%+d" % (attr, value))
425 | for attr in ["year", "month", "day", "weekday",
426 | "hour", "minute", "second", "microsecond"]:
427 | value = getattr(self, attr)
428 | if value is not None:
429 | l.append("%s=%s" % (attr, `value`))
430 | return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
431 |
432 | # vim:ts=4:sw=4:et
433 |
--------------------------------------------------------------------------------
/dateutil/rrule.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2003-2010 Gustavo Niemeyer
3 |
4 | This module offers extensions to the standard python 2.3+
5 | datetime module.
6 | """
7 | __author__ = "Gustavo Niemeyer "
8 | __license__ = "PSF License"
9 |
10 | import itertools
11 | import datetime
12 | import calendar
13 | import thread
14 | import sys
15 |
16 | __all__ = ["rrule", "rruleset", "rrulestr",
17 | "YEARLY", "MONTHLY", "WEEKLY", "DAILY",
18 | "HOURLY", "MINUTELY", "SECONDLY",
19 | "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
20 |
21 | # Every mask is 7 days longer to handle cross-year weekly periods.
22 | M366MASK = tuple([1]*31+[2]*29+[3]*31+[4]*30+[5]*31+[6]*30+
23 | [7]*31+[8]*31+[9]*30+[10]*31+[11]*30+[12]*31+[1]*7)
24 | M365MASK = list(M366MASK)
25 | M29, M30, M31 = range(1,30), range(1,31), range(1,32)
26 | MDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7])
27 | MDAY365MASK = list(MDAY366MASK)
28 | M29, M30, M31 = range(-29,0), range(-30,0), range(-31,0)
29 | NMDAY366MASK = tuple(M31+M29+M31+M30+M31+M30+M31+M31+M30+M31+M30+M31+M31[:7])
30 | NMDAY365MASK = list(NMDAY366MASK)
31 | M366RANGE = (0,31,60,91,121,152,182,213,244,274,305,335,366)
32 | M365RANGE = (0,31,59,90,120,151,181,212,243,273,304,334,365)
33 | WDAYMASK = [0,1,2,3,4,5,6]*55
34 | del M29, M30, M31, M365MASK[59], MDAY365MASK[59], NMDAY365MASK[31]
35 | MDAY365MASK = tuple(MDAY365MASK)
36 | M365MASK = tuple(M365MASK)
37 |
38 | (YEARLY,
39 | MONTHLY,
40 | WEEKLY,
41 | DAILY,
42 | HOURLY,
43 | MINUTELY,
44 | SECONDLY) = range(7)
45 |
46 | # Imported on demand.
47 | easter = None
48 | parser = None
49 |
50 | class weekday(object):
51 | __slots__ = ["weekday", "n"]
52 |
53 | def __init__(self, weekday, n=None):
54 | if n == 0:
55 | raise ValueError, "Can't create weekday with n == 0"
56 | self.weekday = weekday
57 | self.n = n
58 |
59 | def __call__(self, n):
60 | if n == self.n:
61 | return self
62 | else:
63 | return self.__class__(self.weekday, n)
64 |
65 | def __eq__(self, other):
66 | try:
67 | if self.weekday != other.weekday or self.n != other.n:
68 | return False
69 | except AttributeError:
70 | return False
71 | return True
72 |
73 | def __repr__(self):
74 | s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
75 | if not self.n:
76 | return s
77 | else:
78 | return "%s(%+d)" % (s, self.n)
79 |
80 | MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
81 |
82 | class rrulebase:
83 | def __init__(self, cache=False):
84 | if cache:
85 | self._cache = []
86 | self._cache_lock = thread.allocate_lock()
87 | self._cache_gen = self._iter()
88 | self._cache_complete = False
89 | else:
90 | self._cache = None
91 | self._cache_complete = False
92 | self._len = None
93 |
94 | def __iter__(self):
95 | if self._cache_complete:
96 | return iter(self._cache)
97 | elif self._cache is None:
98 | return self._iter()
99 | else:
100 | return self._iter_cached()
101 |
102 | def _iter_cached(self):
103 | i = 0
104 | gen = self._cache_gen
105 | cache = self._cache
106 | acquire = self._cache_lock.acquire
107 | release = self._cache_lock.release
108 | while gen:
109 | if i == len(cache):
110 | acquire()
111 | if self._cache_complete:
112 | break
113 | try:
114 | for j in range(10):
115 | cache.append(gen.next())
116 | except StopIteration:
117 | self._cache_gen = gen = None
118 | self._cache_complete = True
119 | break
120 | release()
121 | yield cache[i]
122 | i += 1
123 | while i < self._len:
124 | yield cache[i]
125 | i += 1
126 |
127 | def __getitem__(self, item):
128 | if self._cache_complete:
129 | return self._cache[item]
130 | elif isinstance(item, slice):
131 | if item.step and item.step < 0:
132 | return list(iter(self))[item]
133 | else:
134 | return list(itertools.islice(self,
135 | item.start or 0,
136 | item.stop or sys.maxint,
137 | item.step or 1))
138 | elif item >= 0:
139 | gen = iter(self)
140 | try:
141 | for i in range(item+1):
142 | res = gen.next()
143 | except StopIteration:
144 | raise IndexError
145 | return res
146 | else:
147 | return list(iter(self))[item]
148 |
149 | def __contains__(self, item):
150 | if self._cache_complete:
151 | return item in self._cache
152 | else:
153 | for i in self:
154 | if i == item:
155 | return True
156 | elif i > item:
157 | return False
158 | return False
159 |
160 | # __len__() introduces a large performance penality.
161 | def count(self):
162 | if self._len is None:
163 | for x in self: pass
164 | return self._len
165 |
166 | def before(self, dt, inc=False):
167 | if self._cache_complete:
168 | gen = self._cache
169 | else:
170 | gen = self
171 | last = None
172 | if inc:
173 | for i in gen:
174 | if i > dt:
175 | break
176 | last = i
177 | else:
178 | for i in gen:
179 | if i >= dt:
180 | break
181 | last = i
182 | return last
183 |
184 | def after(self, dt, inc=False):
185 | if self._cache_complete:
186 | gen = self._cache
187 | else:
188 | gen = self
189 | if inc:
190 | for i in gen:
191 | if i >= dt:
192 | return i
193 | else:
194 | for i in gen:
195 | if i > dt:
196 | return i
197 | return None
198 |
199 | def between(self, after, before, inc=False):
200 | if self._cache_complete:
201 | gen = self._cache
202 | else:
203 | gen = self
204 | started = False
205 | l = []
206 | if inc:
207 | for i in gen:
208 | if i > before:
209 | break
210 | elif not started:
211 | if i >= after:
212 | started = True
213 | l.append(i)
214 | else:
215 | l.append(i)
216 | else:
217 | for i in gen:
218 | if i >= before:
219 | break
220 | elif not started:
221 | if i > after:
222 | started = True
223 | l.append(i)
224 | else:
225 | l.append(i)
226 | return l
227 |
228 | class rrule(rrulebase):
229 | def __init__(self, freq, dtstart=None,
230 | interval=1, wkst=None, count=None, until=None, bysetpos=None,
231 | bymonth=None, bymonthday=None, byyearday=None, byeaster=None,
232 | byweekno=None, byweekday=None,
233 | byhour=None, byminute=None, bysecond=None,
234 | cache=False):
235 | rrulebase.__init__(self, cache)
236 | global easter
237 | if not dtstart:
238 | dtstart = datetime.datetime.now().replace(microsecond=0)
239 | elif not isinstance(dtstart, datetime.datetime):
240 | dtstart = datetime.datetime.fromordinal(dtstart.toordinal())
241 | else:
242 | dtstart = dtstart.replace(microsecond=0)
243 | self._dtstart = dtstart
244 | self._tzinfo = dtstart.tzinfo
245 | self._freq = freq
246 | self._interval = interval
247 | self._count = count
248 | if until and not isinstance(until, datetime.datetime):
249 | until = datetime.datetime.fromordinal(until.toordinal())
250 | self._until = until
251 | if wkst is None:
252 | self._wkst = calendar.firstweekday()
253 | elif type(wkst) is int:
254 | self._wkst = wkst
255 | else:
256 | self._wkst = wkst.weekday
257 | if bysetpos is None:
258 | self._bysetpos = None
259 | elif type(bysetpos) is int:
260 | if bysetpos == 0 or not (-366 <= bysetpos <= 366):
261 | raise ValueError("bysetpos must be between 1 and 366, "
262 | "or between -366 and -1")
263 | self._bysetpos = (bysetpos,)
264 | else:
265 | self._bysetpos = tuple(bysetpos)
266 | for pos in self._bysetpos:
267 | if pos == 0 or not (-366 <= pos <= 366):
268 | raise ValueError("bysetpos must be between 1 and 366, "
269 | "or between -366 and -1")
270 | if not (byweekno or byyearday or bymonthday or
271 | byweekday is not None or byeaster is not None):
272 | if freq == YEARLY:
273 | if not bymonth:
274 | bymonth = dtstart.month
275 | bymonthday = dtstart.day
276 | elif freq == MONTHLY:
277 | bymonthday = dtstart.day
278 | elif freq == WEEKLY:
279 | byweekday = dtstart.weekday()
280 | # bymonth
281 | if not bymonth:
282 | self._bymonth = None
283 | elif type(bymonth) is int:
284 | self._bymonth = (bymonth,)
285 | else:
286 | self._bymonth = tuple(bymonth)
287 | # byyearday
288 | if not byyearday:
289 | self._byyearday = None
290 | elif type(byyearday) is int:
291 | self._byyearday = (byyearday,)
292 | else:
293 | self._byyearday = tuple(byyearday)
294 | # byeaster
295 | if byeaster is not None:
296 | if not easter:
297 | from dateutil import easter
298 | if type(byeaster) is int:
299 | self._byeaster = (byeaster,)
300 | else:
301 | self._byeaster = tuple(byeaster)
302 | else:
303 | self._byeaster = None
304 | # bymonthay
305 | if not bymonthday:
306 | self._bymonthday = ()
307 | self._bynmonthday = ()
308 | elif type(bymonthday) is int:
309 | if bymonthday < 0:
310 | self._bynmonthday = (bymonthday,)
311 | self._bymonthday = ()
312 | else:
313 | self._bymonthday = (bymonthday,)
314 | self._bynmonthday = ()
315 | else:
316 | self._bymonthday = tuple([x for x in bymonthday if x > 0])
317 | self._bynmonthday = tuple([x for x in bymonthday if x < 0])
318 | # byweekno
319 | if byweekno is None:
320 | self._byweekno = None
321 | elif type(byweekno) is int:
322 | self._byweekno = (byweekno,)
323 | else:
324 | self._byweekno = tuple(byweekno)
325 | # byweekday / bynweekday
326 | if byweekday is None:
327 | self._byweekday = None
328 | self._bynweekday = None
329 | elif type(byweekday) is int:
330 | self._byweekday = (byweekday,)
331 | self._bynweekday = None
332 | elif hasattr(byweekday, "n"):
333 | if not byweekday.n or freq > MONTHLY:
334 | self._byweekday = (byweekday.weekday,)
335 | self._bynweekday = None
336 | else:
337 | self._bynweekday = ((byweekday.weekday, byweekday.n),)
338 | self._byweekday = None
339 | else:
340 | self._byweekday = []
341 | self._bynweekday = []
342 | for wday in byweekday:
343 | if type(wday) is int:
344 | self._byweekday.append(wday)
345 | elif not wday.n or freq > MONTHLY:
346 | self._byweekday.append(wday.weekday)
347 | else:
348 | self._bynweekday.append((wday.weekday, wday.n))
349 | self._byweekday = tuple(self._byweekday)
350 | self._bynweekday = tuple(self._bynweekday)
351 | if not self._byweekday:
352 | self._byweekday = None
353 | elif not self._bynweekday:
354 | self._bynweekday = None
355 | # byhour
356 | if byhour is None:
357 | if freq < HOURLY:
358 | self._byhour = (dtstart.hour,)
359 | else:
360 | self._byhour = None
361 | elif type(byhour) is int:
362 | self._byhour = (byhour,)
363 | else:
364 | self._byhour = tuple(byhour)
365 | # byminute
366 | if byminute is None:
367 | if freq < MINUTELY:
368 | self._byminute = (dtstart.minute,)
369 | else:
370 | self._byminute = None
371 | elif type(byminute) is int:
372 | self._byminute = (byminute,)
373 | else:
374 | self._byminute = tuple(byminute)
375 | # bysecond
376 | if bysecond is None:
377 | if freq < SECONDLY:
378 | self._bysecond = (dtstart.second,)
379 | else:
380 | self._bysecond = None
381 | elif type(bysecond) is int:
382 | self._bysecond = (bysecond,)
383 | else:
384 | self._bysecond = tuple(bysecond)
385 |
386 | if self._freq >= HOURLY:
387 | self._timeset = None
388 | else:
389 | self._timeset = []
390 | for hour in self._byhour:
391 | for minute in self._byminute:
392 | for second in self._bysecond:
393 | self._timeset.append(
394 | datetime.time(hour, minute, second,
395 | tzinfo=self._tzinfo))
396 | self._timeset.sort()
397 | self._timeset = tuple(self._timeset)
398 |
399 | def _iter(self):
400 | year, month, day, hour, minute, second, weekday, yearday, _ = \
401 | self._dtstart.timetuple()
402 |
403 | # Some local variables to speed things up a bit
404 | freq = self._freq
405 | interval = self._interval
406 | wkst = self._wkst
407 | until = self._until
408 | bymonth = self._bymonth
409 | byweekno = self._byweekno
410 | byyearday = self._byyearday
411 | byweekday = self._byweekday
412 | byeaster = self._byeaster
413 | bymonthday = self._bymonthday
414 | bynmonthday = self._bynmonthday
415 | bysetpos = self._bysetpos
416 | byhour = self._byhour
417 | byminute = self._byminute
418 | bysecond = self._bysecond
419 |
420 | ii = _iterinfo(self)
421 | ii.rebuild(year, month)
422 |
423 | getdayset = {YEARLY:ii.ydayset,
424 | MONTHLY:ii.mdayset,
425 | WEEKLY:ii.wdayset,
426 | DAILY:ii.ddayset,
427 | HOURLY:ii.ddayset,
428 | MINUTELY:ii.ddayset,
429 | SECONDLY:ii.ddayset}[freq]
430 |
431 | if freq < HOURLY:
432 | timeset = self._timeset
433 | else:
434 | gettimeset = {HOURLY:ii.htimeset,
435 | MINUTELY:ii.mtimeset,
436 | SECONDLY:ii.stimeset}[freq]
437 | if ((freq >= HOURLY and
438 | self._byhour and hour not in self._byhour) or
439 | (freq >= MINUTELY and
440 | self._byminute and minute not in self._byminute) or
441 | (freq >= SECONDLY and
442 | self._bysecond and second not in self._bysecond)):
443 | timeset = ()
444 | else:
445 | timeset = gettimeset(hour, minute, second)
446 |
447 | total = 0
448 | count = self._count
449 | while True:
450 | # Get dayset with the right frequency
451 | dayset, start, end = getdayset(year, month, day)
452 |
453 | # Do the "hard" work ;-)
454 | filtered = False
455 | for i in dayset[start:end]:
456 | if ((bymonth and ii.mmask[i] not in bymonth) or
457 | (byweekno and not ii.wnomask[i]) or
458 | (byweekday and ii.wdaymask[i] not in byweekday) or
459 | (ii.nwdaymask and not ii.nwdaymask[i]) or
460 | (byeaster and not ii.eastermask[i]) or
461 | ((bymonthday or bynmonthday) and
462 | ii.mdaymask[i] not in bymonthday and
463 | ii.nmdaymask[i] not in bynmonthday) or
464 | (byyearday and
465 | ((i < ii.yearlen and i+1 not in byyearday
466 | and -ii.yearlen+i not in byyearday) or
467 | (i >= ii.yearlen and i+1-ii.yearlen not in byyearday
468 | and -ii.nextyearlen+i-ii.yearlen
469 | not in byyearday)))):
470 | dayset[i] = None
471 | filtered = True
472 |
473 | # Output results
474 | if bysetpos and timeset:
475 | poslist = []
476 | for pos in bysetpos:
477 | if pos < 0:
478 | daypos, timepos = divmod(pos, len(timeset))
479 | else:
480 | daypos, timepos = divmod(pos-1, len(timeset))
481 | try:
482 | i = [x for x in dayset[start:end]
483 | if x is not None][daypos]
484 | time = timeset[timepos]
485 | except IndexError:
486 | pass
487 | else:
488 | date = datetime.date.fromordinal(ii.yearordinal+i)
489 | res = datetime.datetime.combine(date, time)
490 | if res not in poslist:
491 | poslist.append(res)
492 | poslist.sort()
493 | for res in poslist:
494 | if until and res > until:
495 | self._len = total
496 | return
497 | elif res >= self._dtstart:
498 | total += 1
499 | yield res
500 | if count:
501 | count -= 1
502 | if not count:
503 | self._len = total
504 | return
505 | else:
506 | for i in dayset[start:end]:
507 | if i is not None:
508 | date = datetime.date.fromordinal(ii.yearordinal+i)
509 | for time in timeset:
510 | res = datetime.datetime.combine(date, time)
511 | if until and res > until:
512 | self._len = total
513 | return
514 | elif res >= self._dtstart:
515 | total += 1
516 | yield res
517 | if count:
518 | count -= 1
519 | if not count:
520 | self._len = total
521 | return
522 |
523 | # Handle frequency and interval
524 | fixday = False
525 | if freq == YEARLY:
526 | year += interval
527 | if year > datetime.MAXYEAR:
528 | self._len = total
529 | return
530 | ii.rebuild(year, month)
531 | elif freq == MONTHLY:
532 | month += interval
533 | if month > 12:
534 | div, mod = divmod(month, 12)
535 | month = mod
536 | year += div
537 | if month == 0:
538 | month = 12
539 | year -= 1
540 | if year > datetime.MAXYEAR:
541 | self._len = total
542 | return
543 | ii.rebuild(year, month)
544 | elif freq == WEEKLY:
545 | if wkst > weekday:
546 | day += -(weekday+1+(6-wkst))+self._interval*7
547 | else:
548 | day += -(weekday-wkst)+self._interval*7
549 | weekday = wkst
550 | fixday = True
551 | elif freq == DAILY:
552 | day += interval
553 | fixday = True
554 | elif freq == HOURLY:
555 | if filtered:
556 | # Jump to one iteration before next day
557 | hour += ((23-hour)//interval)*interval
558 | while True:
559 | hour += interval
560 | div, mod = divmod(hour, 24)
561 | if div:
562 | hour = mod
563 | day += div
564 | fixday = True
565 | if not byhour or hour in byhour:
566 | break
567 | timeset = gettimeset(hour, minute, second)
568 | elif freq == MINUTELY:
569 | if filtered:
570 | # Jump to one iteration before next day
571 | minute += ((1439-(hour*60+minute))//interval)*interval
572 | while True:
573 | minute += interval
574 | div, mod = divmod(minute, 60)
575 | if div:
576 | minute = mod
577 | hour += div
578 | div, mod = divmod(hour, 24)
579 | if div:
580 | hour = mod
581 | day += div
582 | fixday = True
583 | filtered = False
584 | if ((not byhour or hour in byhour) and
585 | (not byminute or minute in byminute)):
586 | break
587 | timeset = gettimeset(hour, minute, second)
588 | elif freq == SECONDLY:
589 | if filtered:
590 | # Jump to one iteration before next day
591 | second += (((86399-(hour*3600+minute*60+second))
592 | //interval)*interval)
593 | while True:
594 | second += self._interval
595 | div, mod = divmod(second, 60)
596 | if div:
597 | second = mod
598 | minute += div
599 | div, mod = divmod(minute, 60)
600 | if div:
601 | minute = mod
602 | hour += div
603 | div, mod = divmod(hour, 24)
604 | if div:
605 | hour = mod
606 | day += div
607 | fixday = True
608 | if ((not byhour or hour in byhour) and
609 | (not byminute or minute in byminute) and
610 | (not bysecond or second in bysecond)):
611 | break
612 | timeset = gettimeset(hour, minute, second)
613 |
614 | if fixday and day > 28:
615 | daysinmonth = calendar.monthrange(year, month)[1]
616 | if day > daysinmonth:
617 | while day > daysinmonth:
618 | day -= daysinmonth
619 | month += 1
620 | if month == 13:
621 | month = 1
622 | year += 1
623 | if year > datetime.MAXYEAR:
624 | self._len = total
625 | return
626 | daysinmonth = calendar.monthrange(year, month)[1]
627 | ii.rebuild(year, month)
628 |
629 | class _iterinfo(object):
630 | __slots__ = ["rrule", "lastyear", "lastmonth",
631 | "yearlen", "nextyearlen", "yearordinal", "yearweekday",
632 | "mmask", "mrange", "mdaymask", "nmdaymask",
633 | "wdaymask", "wnomask", "nwdaymask", "eastermask"]
634 |
635 | def __init__(self, rrule):
636 | for attr in self.__slots__:
637 | setattr(self, attr, None)
638 | self.rrule = rrule
639 |
640 | def rebuild(self, year, month):
641 | # Every mask is 7 days longer to handle cross-year weekly periods.
642 | rr = self.rrule
643 | if year != self.lastyear:
644 | self.yearlen = 365+calendar.isleap(year)
645 | self.nextyearlen = 365+calendar.isleap(year+1)
646 | firstyday = datetime.date(year, 1, 1)
647 | self.yearordinal = firstyday.toordinal()
648 | self.yearweekday = firstyday.weekday()
649 |
650 | wday = datetime.date(year, 1, 1).weekday()
651 | if self.yearlen == 365:
652 | self.mmask = M365MASK
653 | self.mdaymask = MDAY365MASK
654 | self.nmdaymask = NMDAY365MASK
655 | self.wdaymask = WDAYMASK[wday:]
656 | self.mrange = M365RANGE
657 | else:
658 | self.mmask = M366MASK
659 | self.mdaymask = MDAY366MASK
660 | self.nmdaymask = NMDAY366MASK
661 | self.wdaymask = WDAYMASK[wday:]
662 | self.mrange = M366RANGE
663 |
664 | if not rr._byweekno:
665 | self.wnomask = None
666 | else:
667 | self.wnomask = [0]*(self.yearlen+7)
668 | #no1wkst = firstwkst = self.wdaymask.index(rr._wkst)
669 | no1wkst = firstwkst = (7-self.yearweekday+rr._wkst)%7
670 | if no1wkst >= 4:
671 | no1wkst = 0
672 | # Number of days in the year, plus the days we got
673 | # from last year.
674 | wyearlen = self.yearlen+(self.yearweekday-rr._wkst)%7
675 | else:
676 | # Number of days in the year, minus the days we
677 | # left in last year.
678 | wyearlen = self.yearlen-no1wkst
679 | div, mod = divmod(wyearlen, 7)
680 | numweeks = div+mod//4
681 | for n in rr._byweekno:
682 | if n < 0:
683 | n += numweeks+1
684 | if not (0 < n <= numweeks):
685 | continue
686 | if n > 1:
687 | i = no1wkst+(n-1)*7
688 | if no1wkst != firstwkst:
689 | i -= 7-firstwkst
690 | else:
691 | i = no1wkst
692 | for j in range(7):
693 | self.wnomask[i] = 1
694 | i += 1
695 | if self.wdaymask[i] == rr._wkst:
696 | break
697 | if 1 in rr._byweekno:
698 | # Check week number 1 of next year as well
699 | # TODO: Check -numweeks for next year.
700 | i = no1wkst+numweeks*7
701 | if no1wkst != firstwkst:
702 | i -= 7-firstwkst
703 | if i < self.yearlen:
704 | # If week starts in next year, we
705 | # don't care about it.
706 | for j in range(7):
707 | self.wnomask[i] = 1
708 | i += 1
709 | if self.wdaymask[i] == rr._wkst:
710 | break
711 | if no1wkst:
712 | # Check last week number of last year as
713 | # well. If no1wkst is 0, either the year
714 | # started on week start, or week number 1
715 | # got days from last year, so there are no
716 | # days from last year's last week number in
717 | # this year.
718 | if -1 not in rr._byweekno:
719 | lyearweekday = datetime.date(year-1,1,1).weekday()
720 | lno1wkst = (7-lyearweekday+rr._wkst)%7
721 | lyearlen = 365+calendar.isleap(year-1)
722 | if lno1wkst >= 4:
723 | lno1wkst = 0
724 | lnumweeks = 52+(lyearlen+
725 | (lyearweekday-rr._wkst)%7)%7//4
726 | else:
727 | lnumweeks = 52+(self.yearlen-no1wkst)%7//4
728 | else:
729 | lnumweeks = -1
730 | if lnumweeks in rr._byweekno:
731 | for i in range(no1wkst):
732 | self.wnomask[i] = 1
733 |
734 | if (rr._bynweekday and
735 | (month != self.lastmonth or year != self.lastyear)):
736 | ranges = []
737 | if rr._freq == YEARLY:
738 | if rr._bymonth:
739 | for month in rr._bymonth:
740 | ranges.append(self.mrange[month-1:month+1])
741 | else:
742 | ranges = [(0, self.yearlen)]
743 | elif rr._freq == MONTHLY:
744 | ranges = [self.mrange[month-1:month+1]]
745 | if ranges:
746 | # Weekly frequency won't get here, so we may not
747 | # care about cross-year weekly periods.
748 | self.nwdaymask = [0]*self.yearlen
749 | for first, last in ranges:
750 | last -= 1
751 | for wday, n in rr._bynweekday:
752 | if n < 0:
753 | i = last+(n+1)*7
754 | i -= (self.wdaymask[i]-wday)%7
755 | else:
756 | i = first+(n-1)*7
757 | i += (7-self.wdaymask[i]+wday)%7
758 | if first <= i <= last:
759 | self.nwdaymask[i] = 1
760 |
761 | if rr._byeaster:
762 | self.eastermask = [0]*(self.yearlen+7)
763 | eyday = easter.easter(year).toordinal()-self.yearordinal
764 | for offset in rr._byeaster:
765 | self.eastermask[eyday+offset] = 1
766 |
767 | self.lastyear = year
768 | self.lastmonth = month
769 |
770 | def ydayset(self, year, month, day):
771 | return range(self.yearlen), 0, self.yearlen
772 |
773 | def mdayset(self, year, month, day):
774 | set = [None]*self.yearlen
775 | start, end = self.mrange[month-1:month+1]
776 | for i in range(start, end):
777 | set[i] = i
778 | return set, start, end
779 |
780 | def wdayset(self, year, month, day):
781 | # We need to handle cross-year weeks here.
782 | set = [None]*(self.yearlen+7)
783 | i = datetime.date(year, month, day).toordinal()-self.yearordinal
784 | start = i
785 | for j in range(7):
786 | set[i] = i
787 | i += 1
788 | #if (not (0 <= i < self.yearlen) or
789 | # self.wdaymask[i] == self.rrule._wkst):
790 | # This will cross the year boundary, if necessary.
791 | if self.wdaymask[i] == self.rrule._wkst:
792 | break
793 | return set, start, i
794 |
795 | def ddayset(self, year, month, day):
796 | set = [None]*self.yearlen
797 | i = datetime.date(year, month, day).toordinal()-self.yearordinal
798 | set[i] = i
799 | return set, i, i+1
800 |
801 | def htimeset(self, hour, minute, second):
802 | set = []
803 | rr = self.rrule
804 | for minute in rr._byminute:
805 | for second in rr._bysecond:
806 | set.append(datetime.time(hour, minute, second,
807 | tzinfo=rr._tzinfo))
808 | set.sort()
809 | return set
810 |
811 | def mtimeset(self, hour, minute, second):
812 | set = []
813 | rr = self.rrule
814 | for second in rr._bysecond:
815 | set.append(datetime.time(hour, minute, second, tzinfo=rr._tzinfo))
816 | set.sort()
817 | return set
818 |
819 | def stimeset(self, hour, minute, second):
820 | return (datetime.time(hour, minute, second,
821 | tzinfo=self.rrule._tzinfo),)
822 |
823 |
824 | class rruleset(rrulebase):
825 |
826 | class _genitem:
827 | def __init__(self, genlist, gen):
828 | try:
829 | self.dt = gen()
830 | genlist.append(self)
831 | except StopIteration:
832 | pass
833 | self.genlist = genlist
834 | self.gen = gen
835 |
836 | def next(self):
837 | try:
838 | self.dt = self.gen()
839 | except StopIteration:
840 | self.genlist.remove(self)
841 |
842 | def __cmp__(self, other):
843 | return cmp(self.dt, other.dt)
844 |
845 | def __init__(self, cache=False):
846 | rrulebase.__init__(self, cache)
847 | self._rrule = []
848 | self._rdate = []
849 | self._exrule = []
850 | self._exdate = []
851 |
852 | def rrule(self, rrule):
853 | self._rrule.append(rrule)
854 |
855 | def rdate(self, rdate):
856 | self._rdate.append(rdate)
857 |
858 | def exrule(self, exrule):
859 | self._exrule.append(exrule)
860 |
861 | def exdate(self, exdate):
862 | self._exdate.append(exdate)
863 |
864 | def _iter(self):
865 | rlist = []
866 | self._rdate.sort()
867 | self._genitem(rlist, iter(self._rdate).next)
868 | for gen in [iter(x).next for x in self._rrule]:
869 | self._genitem(rlist, gen)
870 | rlist.sort()
871 | exlist = []
872 | self._exdate.sort()
873 | self._genitem(exlist, iter(self._exdate).next)
874 | for gen in [iter(x).next for x in self._exrule]:
875 | self._genitem(exlist, gen)
876 | exlist.sort()
877 | lastdt = None
878 | total = 0
879 | while rlist:
880 | ritem = rlist[0]
881 | if not lastdt or lastdt != ritem.dt:
882 | while exlist and exlist[0] < ritem:
883 | exlist[0].next()
884 | exlist.sort()
885 | if not exlist or ritem != exlist[0]:
886 | total += 1
887 | yield ritem.dt
888 | lastdt = ritem.dt
889 | ritem.next()
890 | rlist.sort()
891 | self._len = total
892 |
893 | class _rrulestr:
894 |
895 | _freq_map = {"YEARLY": YEARLY,
896 | "MONTHLY": MONTHLY,
897 | "WEEKLY": WEEKLY,
898 | "DAILY": DAILY,
899 | "HOURLY": HOURLY,
900 | "MINUTELY": MINUTELY,
901 | "SECONDLY": SECONDLY}
902 |
903 | _weekday_map = {"MO":0,"TU":1,"WE":2,"TH":3,"FR":4,"SA":5,"SU":6}
904 |
905 | def _handle_int(self, rrkwargs, name, value, **kwargs):
906 | rrkwargs[name.lower()] = int(value)
907 |
908 | def _handle_int_list(self, rrkwargs, name, value, **kwargs):
909 | rrkwargs[name.lower()] = [int(x) for x in value.split(',')]
910 |
911 | _handle_INTERVAL = _handle_int
912 | _handle_COUNT = _handle_int
913 | _handle_BYSETPOS = _handle_int_list
914 | _handle_BYMONTH = _handle_int_list
915 | _handle_BYMONTHDAY = _handle_int_list
916 | _handle_BYYEARDAY = _handle_int_list
917 | _handle_BYEASTER = _handle_int_list
918 | _handle_BYWEEKNO = _handle_int_list
919 | _handle_BYHOUR = _handle_int_list
920 | _handle_BYMINUTE = _handle_int_list
921 | _handle_BYSECOND = _handle_int_list
922 |
923 | def _handle_FREQ(self, rrkwargs, name, value, **kwargs):
924 | rrkwargs["freq"] = self._freq_map[value]
925 |
926 | def _handle_UNTIL(self, rrkwargs, name, value, **kwargs):
927 | global parser
928 | if not parser:
929 | from dateutil import parser
930 | try:
931 | rrkwargs["until"] = parser.parse(value,
932 | ignoretz=kwargs.get("ignoretz"),
933 | tzinfos=kwargs.get("tzinfos"))
934 | except ValueError:
935 | raise ValueError, "invalid until date"
936 |
937 | def _handle_WKST(self, rrkwargs, name, value, **kwargs):
938 | rrkwargs["wkst"] = self._weekday_map[value]
939 |
940 | def _handle_BYWEEKDAY(self, rrkwargs, name, value, **kwarsg):
941 | l = []
942 | for wday in value.split(','):
943 | for i in range(len(wday)):
944 | if wday[i] not in '+-0123456789':
945 | break
946 | n = wday[:i] or None
947 | w = wday[i:]
948 | if n: n = int(n)
949 | l.append(weekdays[self._weekday_map[w]](n))
950 | rrkwargs["byweekday"] = l
951 |
952 | _handle_BYDAY = _handle_BYWEEKDAY
953 |
954 | def _parse_rfc_rrule(self, line,
955 | dtstart=None,
956 | cache=False,
957 | ignoretz=False,
958 | tzinfos=None):
959 | if line.find(':') != -1:
960 | name, value = line.split(':')
961 | if name != "RRULE":
962 | raise ValueError, "unknown parameter name"
963 | else:
964 | value = line
965 | rrkwargs = {}
966 | for pair in value.split(';'):
967 | name, value = pair.split('=')
968 | name = name.upper()
969 | value = value.upper()
970 | try:
971 | getattr(self, "_handle_"+name)(rrkwargs, name, value,
972 | ignoretz=ignoretz,
973 | tzinfos=tzinfos)
974 | except AttributeError:
975 | raise ValueError, "unknown parameter '%s'" % name
976 | except (KeyError, ValueError):
977 | raise ValueError, "invalid '%s': %s" % (name, value)
978 | return rrule(dtstart=dtstart, cache=cache, **rrkwargs)
979 |
980 | def _parse_rfc(self, s,
981 | dtstart=None,
982 | cache=False,
983 | unfold=False,
984 | forceset=False,
985 | compatible=False,
986 | ignoretz=False,
987 | tzinfos=None):
988 | global parser
989 | if compatible:
990 | forceset = True
991 | unfold = True
992 | s = s.upper()
993 | if not s.strip():
994 | raise ValueError, "empty string"
995 | if unfold:
996 | lines = s.splitlines()
997 | i = 0
998 | while i < len(lines):
999 | line = lines[i].rstrip()
1000 | if not line:
1001 | del lines[i]
1002 | elif i > 0 and line[0] == " ":
1003 | lines[i-1] += line[1:]
1004 | del lines[i]
1005 | else:
1006 | i += 1
1007 | else:
1008 | lines = s.split()
1009 | if (not forceset and len(lines) == 1 and
1010 | (s.find(':') == -1 or s.startswith('RRULE:'))):
1011 | return self._parse_rfc_rrule(lines[0], cache=cache,
1012 | dtstart=dtstart, ignoretz=ignoretz,
1013 | tzinfos=tzinfos)
1014 | else:
1015 | rrulevals = []
1016 | rdatevals = []
1017 | exrulevals = []
1018 | exdatevals = []
1019 | for line in lines:
1020 | if not line:
1021 | continue
1022 | if line.find(':') == -1:
1023 | name = "RRULE"
1024 | value = line
1025 | else:
1026 | name, value = line.split(':', 1)
1027 | parms = name.split(';')
1028 | if not parms:
1029 | raise ValueError, "empty property name"
1030 | name = parms[0]
1031 | parms = parms[1:]
1032 | if name == "RRULE":
1033 | for parm in parms:
1034 | raise ValueError, "unsupported RRULE parm: "+parm
1035 | rrulevals.append(value)
1036 | elif name == "RDATE":
1037 | for parm in parms:
1038 | if parm != "VALUE=DATE-TIME":
1039 | raise ValueError, "unsupported RDATE parm: "+parm
1040 | rdatevals.append(value)
1041 | elif name == "EXRULE":
1042 | for parm in parms:
1043 | raise ValueError, "unsupported EXRULE parm: "+parm
1044 | exrulevals.append(value)
1045 | elif name == "EXDATE":
1046 | for parm in parms:
1047 | if parm != "VALUE=DATE-TIME":
1048 | raise ValueError, "unsupported RDATE parm: "+parm
1049 | exdatevals.append(value)
1050 | elif name == "DTSTART":
1051 | for parm in parms:
1052 | raise ValueError, "unsupported DTSTART parm: "+parm
1053 | if not parser:
1054 | from dateutil import parser
1055 | dtstart = parser.parse(value, ignoretz=ignoretz,
1056 | tzinfos=tzinfos)
1057 | else:
1058 | raise ValueError, "unsupported property: "+name
1059 | if (forceset or len(rrulevals) > 1 or
1060 | rdatevals or exrulevals or exdatevals):
1061 | if not parser and (rdatevals or exdatevals):
1062 | from dateutil import parser
1063 | set = rruleset(cache=cache)
1064 | for value in rrulevals:
1065 | set.rrule(self._parse_rfc_rrule(value, dtstart=dtstart,
1066 | ignoretz=ignoretz,
1067 | tzinfos=tzinfos))
1068 | for value in rdatevals:
1069 | for datestr in value.split(','):
1070 | set.rdate(parser.parse(datestr,
1071 | ignoretz=ignoretz,
1072 | tzinfos=tzinfos))
1073 | for value in exrulevals:
1074 | set.exrule(self._parse_rfc_rrule(value, dtstart=dtstart,
1075 | ignoretz=ignoretz,
1076 | tzinfos=tzinfos))
1077 | for value in exdatevals:
1078 | for datestr in value.split(','):
1079 | set.exdate(parser.parse(datestr,
1080 | ignoretz=ignoretz,
1081 | tzinfos=tzinfos))
1082 | if compatible and dtstart:
1083 | set.rdate(dtstart)
1084 | return set
1085 | else:
1086 | return self._parse_rfc_rrule(rrulevals[0],
1087 | dtstart=dtstart,
1088 | cache=cache,
1089 | ignoretz=ignoretz,
1090 | tzinfos=tzinfos)
1091 |
1092 | def __call__(self, s, **kwargs):
1093 | return self._parse_rfc(s, **kwargs)
1094 |
1095 | rrulestr = _rrulestr()
1096 |
1097 | # vim:ts=4:sw=4:et
1098 |
--------------------------------------------------------------------------------
/dateutil/tz.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2003-2007 Gustavo Niemeyer
3 |
4 | This module offers extensions to the standard python 2.3+
5 | datetime module.
6 | """
7 | __author__ = "Gustavo Niemeyer "
8 | __license__ = "PSF License"
9 |
10 | import datetime
11 | import struct
12 | import time
13 | import sys
14 | import os
15 |
16 | relativedelta = None
17 | parser = None
18 | rrule = None
19 |
20 | __all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
21 | "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
22 |
23 | try:
24 | from dateutil.tzwin import tzwin, tzwinlocal
25 | except (ImportError, OSError):
26 | tzwin, tzwinlocal = None, None
27 |
28 | ZERO = datetime.timedelta(0)
29 | EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
30 |
31 | class tzutc(datetime.tzinfo):
32 |
33 | def utcoffset(self, dt):
34 | return ZERO
35 |
36 | def dst(self, dt):
37 | return ZERO
38 |
39 | def tzname(self, dt):
40 | return "UTC"
41 |
42 | def __eq__(self, other):
43 | return (isinstance(other, tzutc) or
44 | (isinstance(other, tzoffset) and other._offset == ZERO))
45 |
46 | def __ne__(self, other):
47 | return not self.__eq__(other)
48 |
49 | def __repr__(self):
50 | return "%s()" % self.__class__.__name__
51 |
52 | __reduce__ = object.__reduce__
53 |
54 | class tzoffset(datetime.tzinfo):
55 |
56 | def __init__(self, name, offset):
57 | self._name = name
58 | self._offset = datetime.timedelta(seconds=offset)
59 |
60 | def utcoffset(self, dt):
61 | return self._offset
62 |
63 | def dst(self, dt):
64 | return ZERO
65 |
66 | def tzname(self, dt):
67 | return self._name
68 |
69 | def __eq__(self, other):
70 | return (isinstance(other, tzoffset) and
71 | self._offset == other._offset)
72 |
73 | def __ne__(self, other):
74 | return not self.__eq__(other)
75 |
76 | def __repr__(self):
77 | return "%s(%s, %s)" % (self.__class__.__name__,
78 | `self._name`,
79 | self._offset.days*86400+self._offset.seconds)
80 |
81 | __reduce__ = object.__reduce__
82 |
83 | class tzlocal(datetime.tzinfo):
84 |
85 | _std_offset = datetime.timedelta(seconds=-time.timezone)
86 | if time.daylight:
87 | _dst_offset = datetime.timedelta(seconds=-time.altzone)
88 | else:
89 | _dst_offset = _std_offset
90 |
91 | def utcoffset(self, dt):
92 | if self._isdst(dt):
93 | return self._dst_offset
94 | else:
95 | return self._std_offset
96 |
97 | def dst(self, dt):
98 | if self._isdst(dt):
99 | return self._dst_offset-self._std_offset
100 | else:
101 | return ZERO
102 |
103 | def tzname(self, dt):
104 | return time.tzname[self._isdst(dt)]
105 |
106 | def _isdst(self, dt):
107 | # We can't use mktime here. It is unstable when deciding if
108 | # the hour near to a change is DST or not.
109 | #
110 | # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
111 | # dt.minute, dt.second, dt.weekday(), 0, -1))
112 | # return time.localtime(timestamp).tm_isdst
113 | #
114 | # The code above yields the following result:
115 | #
116 | #>>> import tz, datetime
117 | #>>> t = tz.tzlocal()
118 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
119 | #'BRDT'
120 | #>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
121 | #'BRST'
122 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
123 | #'BRST'
124 | #>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
125 | #'BRDT'
126 | #>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
127 | #'BRDT'
128 | #
129 | # Here is a more stable implementation:
130 | #
131 | timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
132 | + dt.hour * 3600
133 | + dt.minute * 60
134 | + dt.second)
135 | return time.localtime(timestamp+time.timezone).tm_isdst
136 |
137 | def __eq__(self, other):
138 | if not isinstance(other, tzlocal):
139 | return False
140 | return (self._std_offset == other._std_offset and
141 | self._dst_offset == other._dst_offset)
142 | return True
143 |
144 | def __ne__(self, other):
145 | return not self.__eq__(other)
146 |
147 | def __repr__(self):
148 | return "%s()" % self.__class__.__name__
149 |
150 | __reduce__ = object.__reduce__
151 |
152 | class _ttinfo(object):
153 | __slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
154 |
155 | def __init__(self):
156 | for attr in self.__slots__:
157 | setattr(self, attr, None)
158 |
159 | def __repr__(self):
160 | l = []
161 | for attr in self.__slots__:
162 | value = getattr(self, attr)
163 | if value is not None:
164 | l.append("%s=%s" % (attr, `value`))
165 | return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
166 |
167 | def __eq__(self, other):
168 | if not isinstance(other, _ttinfo):
169 | return False
170 | return (self.offset == other.offset and
171 | self.delta == other.delta and
172 | self.isdst == other.isdst and
173 | self.abbr == other.abbr and
174 | self.isstd == other.isstd and
175 | self.isgmt == other.isgmt)
176 |
177 | def __ne__(self, other):
178 | return not self.__eq__(other)
179 |
180 | def __getstate__(self):
181 | state = {}
182 | for name in self.__slots__:
183 | state[name] = getattr(self, name, None)
184 | return state
185 |
186 | def __setstate__(self, state):
187 | for name in self.__slots__:
188 | if name in state:
189 | setattr(self, name, state[name])
190 |
191 | class tzfile(datetime.tzinfo):
192 |
193 | # http://www.twinsun.com/tz/tz-link.htm
194 | # ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
195 |
196 | def __init__(self, fileobj):
197 | if isinstance(fileobj, basestring):
198 | self._filename = fileobj
199 | fileobj = open(fileobj)
200 | elif hasattr(fileobj, "name"):
201 | self._filename = fileobj.name
202 | else:
203 | self._filename = `fileobj`
204 |
205 | # From tzfile(5):
206 | #
207 | # The time zone information files used by tzset(3)
208 | # begin with the magic characters "TZif" to identify
209 | # them as time zone information files, followed by
210 | # sixteen bytes reserved for future use, followed by
211 | # six four-byte values of type long, written in a
212 | # ``standard'' byte order (the high-order byte
213 | # of the value is written first).
214 |
215 | if fileobj.read(4) != "TZif":
216 | raise ValueError, "magic not found"
217 |
218 | fileobj.read(16)
219 |
220 | (
221 | # The number of UTC/local indicators stored in the file.
222 | ttisgmtcnt,
223 |
224 | # The number of standard/wall indicators stored in the file.
225 | ttisstdcnt,
226 |
227 | # The number of leap seconds for which data is
228 | # stored in the file.
229 | leapcnt,
230 |
231 | # The number of "transition times" for which data
232 | # is stored in the file.
233 | timecnt,
234 |
235 | # The number of "local time types" for which data
236 | # is stored in the file (must not be zero).
237 | typecnt,
238 |
239 | # The number of characters of "time zone
240 | # abbreviation strings" stored in the file.
241 | charcnt,
242 |
243 | ) = struct.unpack(">6l", fileobj.read(24))
244 |
245 | # The above header is followed by tzh_timecnt four-byte
246 | # values of type long, sorted in ascending order.
247 | # These values are written in ``standard'' byte order.
248 | # Each is used as a transition time (as returned by
249 | # time(2)) at which the rules for computing local time
250 | # change.
251 |
252 | if timecnt:
253 | self._trans_list = struct.unpack(">%dl" % timecnt,
254 | fileobj.read(timecnt*4))
255 | else:
256 | self._trans_list = []
257 |
258 | # Next come tzh_timecnt one-byte values of type unsigned
259 | # char; each one tells which of the different types of
260 | # ``local time'' types described in the file is associated
261 | # with the same-indexed transition time. These values
262 | # serve as indices into an array of ttinfo structures that
263 | # appears next in the file.
264 |
265 | if timecnt:
266 | self._trans_idx = struct.unpack(">%dB" % timecnt,
267 | fileobj.read(timecnt))
268 | else:
269 | self._trans_idx = []
270 |
271 | # Each ttinfo structure is written as a four-byte value
272 | # for tt_gmtoff of type long, in a standard byte
273 | # order, followed by a one-byte value for tt_isdst
274 | # and a one-byte value for tt_abbrind. In each
275 | # structure, tt_gmtoff gives the number of
276 | # seconds to be added to UTC, tt_isdst tells whether
277 | # tm_isdst should be set by localtime(3), and
278 | # tt_abbrind serves as an index into the array of
279 | # time zone abbreviation characters that follow the
280 | # ttinfo structure(s) in the file.
281 |
282 | ttinfo = []
283 |
284 | for i in range(typecnt):
285 | ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
286 |
287 | abbr = fileobj.read(charcnt)
288 |
289 | # Then there are tzh_leapcnt pairs of four-byte
290 | # values, written in standard byte order; the
291 | # first value of each pair gives the time (as
292 | # returned by time(2)) at which a leap second
293 | # occurs; the second gives the total number of
294 | # leap seconds to be applied after the given time.
295 | # The pairs of values are sorted in ascending order
296 | # by time.
297 |
298 | # Not used, for now
299 | if leapcnt:
300 | leap = struct.unpack(">%dl" % (leapcnt*2),
301 | fileobj.read(leapcnt*8))
302 |
303 | # Then there are tzh_ttisstdcnt standard/wall
304 | # indicators, each stored as a one-byte value;
305 | # they tell whether the transition times associated
306 | # with local time types were specified as standard
307 | # time or wall clock time, and are used when
308 | # a time zone file is used in handling POSIX-style
309 | # time zone environment variables.
310 |
311 | if ttisstdcnt:
312 | isstd = struct.unpack(">%db" % ttisstdcnt,
313 | fileobj.read(ttisstdcnt))
314 |
315 | # Finally, there are tzh_ttisgmtcnt UTC/local
316 | # indicators, each stored as a one-byte value;
317 | # they tell whether the transition times associated
318 | # with local time types were specified as UTC or
319 | # local time, and are used when a time zone file
320 | # is used in handling POSIX-style time zone envi-
321 | # ronment variables.
322 |
323 | if ttisgmtcnt:
324 | isgmt = struct.unpack(">%db" % ttisgmtcnt,
325 | fileobj.read(ttisgmtcnt))
326 |
327 | # ** Everything has been read **
328 |
329 | # Build ttinfo list
330 | self._ttinfo_list = []
331 | for i in range(typecnt):
332 | gmtoff, isdst, abbrind = ttinfo[i]
333 | # Round to full-minutes if that's not the case. Python's
334 | # datetime doesn't accept sub-minute timezones. Check
335 | # http://python.org/sf/1447945 for some information.
336 | gmtoff = (gmtoff+30)//60*60
337 | tti = _ttinfo()
338 | tti.offset = gmtoff
339 | tti.delta = datetime.timedelta(seconds=gmtoff)
340 | tti.isdst = isdst
341 | tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
342 | tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
343 | tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
344 | self._ttinfo_list.append(tti)
345 |
346 | # Replace ttinfo indexes for ttinfo objects.
347 | trans_idx = []
348 | for idx in self._trans_idx:
349 | trans_idx.append(self._ttinfo_list[idx])
350 | self._trans_idx = tuple(trans_idx)
351 |
352 | # Set standard, dst, and before ttinfos. before will be
353 | # used when a given time is before any transitions,
354 | # and will be set to the first non-dst ttinfo, or to
355 | # the first dst, if all of them are dst.
356 | self._ttinfo_std = None
357 | self._ttinfo_dst = None
358 | self._ttinfo_before = None
359 | if self._ttinfo_list:
360 | if not self._trans_list:
361 | self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
362 | else:
363 | for i in range(timecnt-1,-1,-1):
364 | tti = self._trans_idx[i]
365 | if not self._ttinfo_std and not tti.isdst:
366 | self._ttinfo_std = tti
367 | elif not self._ttinfo_dst and tti.isdst:
368 | self._ttinfo_dst = tti
369 | if self._ttinfo_std and self._ttinfo_dst:
370 | break
371 | else:
372 | if self._ttinfo_dst and not self._ttinfo_std:
373 | self._ttinfo_std = self._ttinfo_dst
374 |
375 | for tti in self._ttinfo_list:
376 | if not tti.isdst:
377 | self._ttinfo_before = tti
378 | break
379 | else:
380 | self._ttinfo_before = self._ttinfo_list[0]
381 |
382 | # Now fix transition times to become relative to wall time.
383 | #
384 | # I'm not sure about this. In my tests, the tz source file
385 | # is setup to wall time, and in the binary file isstd and
386 | # isgmt are off, so it should be in wall time. OTOH, it's
387 | # always in gmt time. Let me know if you have comments
388 | # about this.
389 | laststdoffset = 0
390 | self._trans_list = list(self._trans_list)
391 | for i in range(len(self._trans_list)):
392 | tti = self._trans_idx[i]
393 | if not tti.isdst:
394 | # This is std time.
395 | self._trans_list[i] += tti.offset
396 | laststdoffset = tti.offset
397 | else:
398 | # This is dst time. Convert to std.
399 | self._trans_list[i] += laststdoffset
400 | self._trans_list = tuple(self._trans_list)
401 |
402 | def _find_ttinfo(self, dt, laststd=0):
403 | timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
404 | + dt.hour * 3600
405 | + dt.minute * 60
406 | + dt.second)
407 | idx = 0
408 | for trans in self._trans_list:
409 | if timestamp < trans:
410 | break
411 | idx += 1
412 | else:
413 | return self._ttinfo_std
414 | if idx == 0:
415 | return self._ttinfo_before
416 | if laststd:
417 | while idx > 0:
418 | tti = self._trans_idx[idx-1]
419 | if not tti.isdst:
420 | return tti
421 | idx -= 1
422 | else:
423 | return self._ttinfo_std
424 | else:
425 | return self._trans_idx[idx-1]
426 |
427 | def utcoffset(self, dt):
428 | if not self._ttinfo_std:
429 | return ZERO
430 | return self._find_ttinfo(dt).delta
431 |
432 | def dst(self, dt):
433 | if not self._ttinfo_dst:
434 | return ZERO
435 | tti = self._find_ttinfo(dt)
436 | if not tti.isdst:
437 | return ZERO
438 |
439 | # The documentation says that utcoffset()-dst() must
440 | # be constant for every dt.
441 | return tti.delta-self._find_ttinfo(dt, laststd=1).delta
442 |
443 | # An alternative for that would be:
444 | #
445 | # return self._ttinfo_dst.offset-self._ttinfo_std.offset
446 | #
447 | # However, this class stores historical changes in the
448 | # dst offset, so I belive that this wouldn't be the right
449 | # way to implement this.
450 |
451 | def tzname(self, dt):
452 | if not self._ttinfo_std:
453 | return None
454 | return self._find_ttinfo(dt).abbr
455 |
456 | def __eq__(self, other):
457 | if not isinstance(other, tzfile):
458 | return False
459 | return (self._trans_list == other._trans_list and
460 | self._trans_idx == other._trans_idx and
461 | self._ttinfo_list == other._ttinfo_list)
462 |
463 | def __ne__(self, other):
464 | return not self.__eq__(other)
465 |
466 |
467 | def __repr__(self):
468 | return "%s(%s)" % (self.__class__.__name__, `self._filename`)
469 |
470 | def __reduce__(self):
471 | if not os.path.isfile(self._filename):
472 | raise ValueError, "Unpickable %s class" % self.__class__.__name__
473 | return (self.__class__, (self._filename,))
474 |
475 | class tzrange(datetime.tzinfo):
476 |
477 | def __init__(self, stdabbr, stdoffset=None,
478 | dstabbr=None, dstoffset=None,
479 | start=None, end=None):
480 | global relativedelta
481 | if not relativedelta:
482 | from dateutil import relativedelta
483 | self._std_abbr = stdabbr
484 | self._dst_abbr = dstabbr
485 | if stdoffset is not None:
486 | self._std_offset = datetime.timedelta(seconds=stdoffset)
487 | else:
488 | self._std_offset = ZERO
489 | if dstoffset is not None:
490 | self._dst_offset = datetime.timedelta(seconds=dstoffset)
491 | elif dstabbr and stdoffset is not None:
492 | self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
493 | else:
494 | self._dst_offset = ZERO
495 | if dstabbr and start is None:
496 | self._start_delta = relativedelta.relativedelta(
497 | hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
498 | else:
499 | self._start_delta = start
500 | if dstabbr and end is None:
501 | self._end_delta = relativedelta.relativedelta(
502 | hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
503 | else:
504 | self._end_delta = end
505 |
506 | def utcoffset(self, dt):
507 | if self._isdst(dt):
508 | return self._dst_offset
509 | else:
510 | return self._std_offset
511 |
512 | def dst(self, dt):
513 | if self._isdst(dt):
514 | return self._dst_offset-self._std_offset
515 | else:
516 | return ZERO
517 |
518 | def tzname(self, dt):
519 | if self._isdst(dt):
520 | return self._dst_abbr
521 | else:
522 | return self._std_abbr
523 |
524 | def _isdst(self, dt):
525 | if not self._start_delta:
526 | return False
527 | year = datetime.datetime(dt.year,1,1)
528 | start = year+self._start_delta
529 | end = year+self._end_delta
530 | dt = dt.replace(tzinfo=None)
531 | if start < end:
532 | return dt >= start and dt < end
533 | else:
534 | return dt >= start or dt < end
535 |
536 | def __eq__(self, other):
537 | if not isinstance(other, tzrange):
538 | return False
539 | return (self._std_abbr == other._std_abbr and
540 | self._dst_abbr == other._dst_abbr and
541 | self._std_offset == other._std_offset and
542 | self._dst_offset == other._dst_offset and
543 | self._start_delta == other._start_delta and
544 | self._end_delta == other._end_delta)
545 |
546 | def __ne__(self, other):
547 | return not self.__eq__(other)
548 |
549 | def __repr__(self):
550 | return "%s(...)" % self.__class__.__name__
551 |
552 | __reduce__ = object.__reduce__
553 |
554 | class tzstr(tzrange):
555 |
556 | def __init__(self, s):
557 | global parser
558 | if not parser:
559 | from dateutil import parser
560 | self._s = s
561 |
562 | res = parser._parsetz(s)
563 | if res is None:
564 | raise ValueError, "unknown string format"
565 |
566 | # Here we break the compatibility with the TZ variable handling.
567 | # GMT-3 actually *means* the timezone -3.
568 | if res.stdabbr in ("GMT", "UTC"):
569 | res.stdoffset *= -1
570 |
571 | # We must initialize it first, since _delta() needs
572 | # _std_offset and _dst_offset set. Use False in start/end
573 | # to avoid building it two times.
574 | tzrange.__init__(self, res.stdabbr, res.stdoffset,
575 | res.dstabbr, res.dstoffset,
576 | start=False, end=False)
577 |
578 | if not res.dstabbr:
579 | self._start_delta = None
580 | self._end_delta = None
581 | else:
582 | self._start_delta = self._delta(res.start)
583 | if self._start_delta:
584 | self._end_delta = self._delta(res.end, isend=1)
585 |
586 | def _delta(self, x, isend=0):
587 | kwargs = {}
588 | if x.month is not None:
589 | kwargs["month"] = x.month
590 | if x.weekday is not None:
591 | kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
592 | if x.week > 0:
593 | kwargs["day"] = 1
594 | else:
595 | kwargs["day"] = 31
596 | elif x.day:
597 | kwargs["day"] = x.day
598 | elif x.yday is not None:
599 | kwargs["yearday"] = x.yday
600 | elif x.jyday is not None:
601 | kwargs["nlyearday"] = x.jyday
602 | if not kwargs:
603 | # Default is to start on first sunday of april, and end
604 | # on last sunday of october.
605 | if not isend:
606 | kwargs["month"] = 4
607 | kwargs["day"] = 1
608 | kwargs["weekday"] = relativedelta.SU(+1)
609 | else:
610 | kwargs["month"] = 10
611 | kwargs["day"] = 31
612 | kwargs["weekday"] = relativedelta.SU(-1)
613 | if x.time is not None:
614 | kwargs["seconds"] = x.time
615 | else:
616 | # Default is 2AM.
617 | kwargs["seconds"] = 7200
618 | if isend:
619 | # Convert to standard time, to follow the documented way
620 | # of working with the extra hour. See the documentation
621 | # of the tzinfo class.
622 | delta = self._dst_offset-self._std_offset
623 | kwargs["seconds"] -= delta.seconds+delta.days*86400
624 | return relativedelta.relativedelta(**kwargs)
625 |
626 | def __repr__(self):
627 | return "%s(%s)" % (self.__class__.__name__, `self._s`)
628 |
629 | class _tzicalvtzcomp:
630 | def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
631 | tzname=None, rrule=None):
632 | self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
633 | self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
634 | self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
635 | self.isdst = isdst
636 | self.tzname = tzname
637 | self.rrule = rrule
638 |
639 | class _tzicalvtz(datetime.tzinfo):
640 | def __init__(self, tzid, comps=[]):
641 | self._tzid = tzid
642 | self._comps = comps
643 | self._cachedate = []
644 | self._cachecomp = []
645 |
646 | def _find_comp(self, dt):
647 | if len(self._comps) == 1:
648 | return self._comps[0]
649 | dt = dt.replace(tzinfo=None)
650 | try:
651 | return self._cachecomp[self._cachedate.index(dt)]
652 | except ValueError:
653 | pass
654 | lastcomp = None
655 | lastcompdt = None
656 | for comp in self._comps:
657 | if not comp.isdst:
658 | # Handle the extra hour in DST -> STD
659 | compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
660 | else:
661 | compdt = comp.rrule.before(dt, inc=True)
662 | if compdt and (not lastcompdt or lastcompdt < compdt):
663 | lastcompdt = compdt
664 | lastcomp = comp
665 | if not lastcomp:
666 | # RFC says nothing about what to do when a given
667 | # time is before the first onset date. We'll look for the
668 | # first standard component, or the first component, if
669 | # none is found.
670 | for comp in self._comps:
671 | if not comp.isdst:
672 | lastcomp = comp
673 | break
674 | else:
675 | lastcomp = comp[0]
676 | self._cachedate.insert(0, dt)
677 | self._cachecomp.insert(0, lastcomp)
678 | if len(self._cachedate) > 10:
679 | self._cachedate.pop()
680 | self._cachecomp.pop()
681 | return lastcomp
682 |
683 | def utcoffset(self, dt):
684 | return self._find_comp(dt).tzoffsetto
685 |
686 | def dst(self, dt):
687 | comp = self._find_comp(dt)
688 | if comp.isdst:
689 | return comp.tzoffsetdiff
690 | else:
691 | return ZERO
692 |
693 | def tzname(self, dt):
694 | return self._find_comp(dt).tzname
695 |
696 | def __repr__(self):
697 | return "" % `self._tzid`
698 |
699 | __reduce__ = object.__reduce__
700 |
701 | class tzical:
702 | def __init__(self, fileobj):
703 | global rrule
704 | if not rrule:
705 | from dateutil import rrule
706 |
707 | if isinstance(fileobj, basestring):
708 | self._s = fileobj
709 | fileobj = open(fileobj)
710 | elif hasattr(fileobj, "name"):
711 | self._s = fileobj.name
712 | else:
713 | self._s = `fileobj`
714 |
715 | self._vtz = {}
716 |
717 | self._parse_rfc(fileobj.read())
718 |
719 | def keys(self):
720 | return self._vtz.keys()
721 |
722 | def get(self, tzid=None):
723 | if tzid is None:
724 | keys = self._vtz.keys()
725 | if len(keys) == 0:
726 | raise ValueError, "no timezones defined"
727 | elif len(keys) > 1:
728 | raise ValueError, "more than one timezone available"
729 | tzid = keys[0]
730 | return self._vtz.get(tzid)
731 |
732 | def _parse_offset(self, s):
733 | s = s.strip()
734 | if not s:
735 | raise ValueError, "empty offset"
736 | if s[0] in ('+', '-'):
737 | signal = (-1,+1)[s[0]=='+']
738 | s = s[1:]
739 | else:
740 | signal = +1
741 | if len(s) == 4:
742 | return (int(s[:2])*3600+int(s[2:])*60)*signal
743 | elif len(s) == 6:
744 | return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
745 | else:
746 | raise ValueError, "invalid offset: "+s
747 |
748 | def _parse_rfc(self, s):
749 | lines = s.splitlines()
750 | if not lines:
751 | raise ValueError, "empty string"
752 |
753 | # Unfold
754 | i = 0
755 | while i < len(lines):
756 | line = lines[i].rstrip()
757 | if not line:
758 | del lines[i]
759 | elif i > 0 and line[0] == " ":
760 | lines[i-1] += line[1:]
761 | del lines[i]
762 | else:
763 | i += 1
764 |
765 | tzid = None
766 | comps = []
767 | invtz = False
768 | comptype = None
769 | for line in lines:
770 | if not line:
771 | continue
772 | name, value = line.split(':', 1)
773 | parms = name.split(';')
774 | if not parms:
775 | raise ValueError, "empty property name"
776 | name = parms[0].upper()
777 | parms = parms[1:]
778 | if invtz:
779 | if name == "BEGIN":
780 | if value in ("STANDARD", "DAYLIGHT"):
781 | # Process component
782 | pass
783 | else:
784 | raise ValueError, "unknown component: "+value
785 | comptype = value
786 | founddtstart = False
787 | tzoffsetfrom = None
788 | tzoffsetto = None
789 | rrulelines = []
790 | tzname = None
791 | elif name == "END":
792 | if value == "VTIMEZONE":
793 | if comptype:
794 | raise ValueError, \
795 | "component not closed: "+comptype
796 | if not tzid:
797 | raise ValueError, \
798 | "mandatory TZID not found"
799 | if not comps:
800 | raise ValueError, \
801 | "at least one component is needed"
802 | # Process vtimezone
803 | self._vtz[tzid] = _tzicalvtz(tzid, comps)
804 | invtz = False
805 | elif value == comptype:
806 | if not founddtstart:
807 | raise ValueError, \
808 | "mandatory DTSTART not found"
809 | if tzoffsetfrom is None:
810 | raise ValueError, \
811 | "mandatory TZOFFSETFROM not found"
812 | if tzoffsetto is None:
813 | raise ValueError, \
814 | "mandatory TZOFFSETFROM not found"
815 | # Process component
816 | rr = None
817 | if rrulelines:
818 | rr = rrule.rrulestr("\n".join(rrulelines),
819 | compatible=True,
820 | ignoretz=True,
821 | cache=True)
822 | comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
823 | (comptype == "DAYLIGHT"),
824 | tzname, rr)
825 | comps.append(comp)
826 | comptype = None
827 | else:
828 | raise ValueError, \
829 | "invalid component end: "+value
830 | elif comptype:
831 | if name == "DTSTART":
832 | rrulelines.append(line)
833 | founddtstart = True
834 | elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
835 | rrulelines.append(line)
836 | elif name == "TZOFFSETFROM":
837 | if parms:
838 | raise ValueError, \
839 | "unsupported %s parm: %s "%(name, parms[0])
840 | tzoffsetfrom = self._parse_offset(value)
841 | elif name == "TZOFFSETTO":
842 | if parms:
843 | raise ValueError, \
844 | "unsupported TZOFFSETTO parm: "+parms[0]
845 | tzoffsetto = self._parse_offset(value)
846 | elif name == "TZNAME":
847 | if parms:
848 | raise ValueError, \
849 | "unsupported TZNAME parm: "+parms[0]
850 | tzname = value
851 | elif name == "COMMENT":
852 | pass
853 | else:
854 | raise ValueError, "unsupported property: "+name
855 | else:
856 | if name == "TZID":
857 | if parms:
858 | raise ValueError, \
859 | "unsupported TZID parm: "+parms[0]
860 | tzid = value
861 | elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
862 | pass
863 | else:
864 | raise ValueError, "unsupported property: "+name
865 | elif name == "BEGIN" and value == "VTIMEZONE":
866 | tzid = None
867 | comps = []
868 | invtz = True
869 |
870 | def __repr__(self):
871 | return "%s(%s)" % (self.__class__.__name__, `self._s`)
872 |
873 | if sys.platform != "win32":
874 | TZFILES = ["/etc/localtime", "localtime"]
875 | TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
876 | else:
877 | TZFILES = []
878 | TZPATHS = []
879 |
880 | def gettz(name=None):
881 | tz = None
882 | if not name:
883 | try:
884 | name = os.environ["TZ"]
885 | except KeyError:
886 | pass
887 | if name is None or name == ":":
888 | for filepath in TZFILES:
889 | if not os.path.isabs(filepath):
890 | filename = filepath
891 | for path in TZPATHS:
892 | filepath = os.path.join(path, filename)
893 | if os.path.isfile(filepath):
894 | break
895 | else:
896 | continue
897 | if os.path.isfile(filepath):
898 | try:
899 | tz = tzfile(filepath)
900 | break
901 | except (IOError, OSError, ValueError):
902 | pass
903 | else:
904 | tz = tzlocal()
905 | else:
906 | if name.startswith(":"):
907 | name = name[:-1]
908 | if os.path.isabs(name):
909 | if os.path.isfile(name):
910 | tz = tzfile(name)
911 | else:
912 | tz = None
913 | else:
914 | for path in TZPATHS:
915 | filepath = os.path.join(path, name)
916 | if not os.path.isfile(filepath):
917 | filepath = filepath.replace(' ','_')
918 | if not os.path.isfile(filepath):
919 | continue
920 | try:
921 | tz = tzfile(filepath)
922 | break
923 | except (IOError, OSError, ValueError):
924 | pass
925 | else:
926 | tz = None
927 | if tzwin:
928 | try:
929 | tz = tzwin(name)
930 | except OSError:
931 | pass
932 | if not tz:
933 | from dateutil.zoneinfo import gettz
934 | tz = gettz(name)
935 | if not tz:
936 | for c in name:
937 | # name must have at least one offset to be a tzstr
938 | if c in "0123456789":
939 | try:
940 | tz = tzstr(name)
941 | except ValueError:
942 | pass
943 | break
944 | else:
945 | if name in ("GMT", "UTC"):
946 | tz = tzutc()
947 | elif name in time.tzname:
948 | tz = tzlocal()
949 | return tz
950 |
951 | # vim:ts=4:sw=4:et
952 |
--------------------------------------------------------------------------------
/dateutil/tzwin.py:
--------------------------------------------------------------------------------
1 | # This code was originally contributed by Jeffrey Harris.
2 | import datetime
3 | import struct
4 | import _winreg
5 |
6 | __author__ = "Jeffrey Harris & Gustavo Niemeyer "
7 |
8 | __all__ = ["tzwin", "tzwinlocal"]
9 |
10 | ONEWEEK = datetime.timedelta(7)
11 |
12 | TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
13 | TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
14 | TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
15 |
16 | def _settzkeyname():
17 | global TZKEYNAME
18 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
19 | try:
20 | _winreg.OpenKey(handle, TZKEYNAMENT).Close()
21 | TZKEYNAME = TZKEYNAMENT
22 | except WindowsError:
23 | TZKEYNAME = TZKEYNAME9X
24 | handle.Close()
25 |
26 | _settzkeyname()
27 |
28 | class tzwinbase(datetime.tzinfo):
29 | """tzinfo class based on win32's timezones available in the registry."""
30 |
31 | def utcoffset(self, dt):
32 | if self._isdst(dt):
33 | return datetime.timedelta(minutes=self._dstoffset)
34 | else:
35 | return datetime.timedelta(minutes=self._stdoffset)
36 |
37 | def dst(self, dt):
38 | if self._isdst(dt):
39 | minutes = self._dstoffset - self._stdoffset
40 | return datetime.timedelta(minutes=minutes)
41 | else:
42 | return datetime.timedelta(0)
43 |
44 | def tzname(self, dt):
45 | if self._isdst(dt):
46 | return self._dstname
47 | else:
48 | return self._stdname
49 |
50 | def list():
51 | """Return a list of all time zones known to the system."""
52 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
53 | tzkey = _winreg.OpenKey(handle, TZKEYNAME)
54 | result = [_winreg.EnumKey(tzkey, i)
55 | for i in range(_winreg.QueryInfoKey(tzkey)[0])]
56 | tzkey.Close()
57 | handle.Close()
58 | return result
59 | list = staticmethod(list)
60 |
61 | def display(self):
62 | return self._display
63 |
64 | def _isdst(self, dt):
65 | dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
66 | self._dsthour, self._dstminute,
67 | self._dstweeknumber)
68 | dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
69 | self._stdhour, self._stdminute,
70 | self._stdweeknumber)
71 | if dston < dstoff:
72 | return dston <= dt.replace(tzinfo=None) < dstoff
73 | else:
74 | return not dstoff <= dt.replace(tzinfo=None) < dston
75 |
76 |
77 | class tzwin(tzwinbase):
78 |
79 | def __init__(self, name):
80 | self._name = name
81 |
82 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
83 | tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
84 | keydict = valuestodict(tzkey)
85 | tzkey.Close()
86 | handle.Close()
87 |
88 | self._stdname = keydict["Std"].encode("iso-8859-1")
89 | self._dstname = keydict["Dlt"].encode("iso-8859-1")
90 |
91 | self._display = keydict["Display"]
92 |
93 | # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
94 | tup = struct.unpack("=3l16h", keydict["TZI"])
95 | self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
96 | self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
97 |
98 | (self._stdmonth,
99 | self._stddayofweek, # Sunday = 0
100 | self._stdweeknumber, # Last = 5
101 | self._stdhour,
102 | self._stdminute) = tup[4:9]
103 |
104 | (self._dstmonth,
105 | self._dstdayofweek, # Sunday = 0
106 | self._dstweeknumber, # Last = 5
107 | self._dsthour,
108 | self._dstminute) = tup[12:17]
109 |
110 | def __repr__(self):
111 | return "tzwin(%s)" % repr(self._name)
112 |
113 | def __reduce__(self):
114 | return (self.__class__, (self._name,))
115 |
116 |
117 | class tzwinlocal(tzwinbase):
118 |
119 | def __init__(self):
120 |
121 | handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
122 |
123 | tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
124 | keydict = valuestodict(tzlocalkey)
125 | tzlocalkey.Close()
126 |
127 | self._stdname = keydict["StandardName"].encode("iso-8859-1")
128 | self._dstname = keydict["DaylightName"].encode("iso-8859-1")
129 |
130 | try:
131 | tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
132 | _keydict = valuestodict(tzkey)
133 | self._display = _keydict["Display"]
134 | tzkey.Close()
135 | except OSError:
136 | self._display = None
137 |
138 | handle.Close()
139 |
140 | self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
141 | self._dstoffset = self._stdoffset-keydict["DaylightBias"]
142 |
143 |
144 | # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
145 | tup = struct.unpack("=8h", keydict["StandardStart"])
146 |
147 | (self._stdmonth,
148 | self._stddayofweek, # Sunday = 0
149 | self._stdweeknumber, # Last = 5
150 | self._stdhour,
151 | self._stdminute) = tup[1:6]
152 |
153 | tup = struct.unpack("=8h", keydict["DaylightStart"])
154 |
155 | (self._dstmonth,
156 | self._dstdayofweek, # Sunday = 0
157 | self._dstweeknumber, # Last = 5
158 | self._dsthour,
159 | self._dstminute) = tup[1:6]
160 |
161 | def __reduce__(self):
162 | return (self.__class__, ())
163 |
164 | def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
165 | """dayofweek == 0 means Sunday, whichweek 5 means last instance"""
166 | first = datetime.datetime(year, month, 1, hour, minute)
167 | weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
168 | for n in xrange(whichweek):
169 | dt = weekdayone+(whichweek-n)*ONEWEEK
170 | if dt.month == month:
171 | return dt
172 |
173 | def valuestodict(key):
174 | """Convert a registry key's values to a dictionary."""
175 | dict = {}
176 | size = _winreg.QueryInfoKey(key)[1]
177 | for i in range(size):
178 | data = _winreg.EnumValue(key, i)
179 | dict[data[0]] = data[1]
180 | return dict
181 |
--------------------------------------------------------------------------------
/dbobject.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Representing an object that can store to SQLite
5 | #
6 | # 2013-04-16 Created by Pascal Pfiffner
7 | #
8 |
9 | import logging
10 |
11 | from sqlite import SQLite
12 |
13 |
14 | class DBObject (object):
15 | """ A superclass for objects that can dehydrate to and hydrate from SQLite.
16 |
17 | Very crude and basic for the time being, but still takes away much of the
18 | cruft for subclasses.
19 | """
20 |
21 | sqlite_default_db = 'databases/storage.db'
22 | sqlite_handle = None
23 | sqlite_must_commit = False
24 |
25 | table_name = None
26 | table_key = None
27 |
28 | def __init__(self):
29 | self.id = None
30 | self.hydrated = False
31 |
32 |
33 | # -------------------------------------------------------------------------- Dehydration
34 | def should_insert(self):
35 | """ Return True if the receiver should be inserted (i.e. is not already
36 | in the db). """
37 | return False
38 |
39 | def will_insert(self):
40 | """ Called before the insert query is performed, you can use this as a
41 | hook. """
42 | pass
43 |
44 | def insert_tuple(self):
45 | """ Cheap solution for now: return the INSERT sql as first and a list
46 | of values as second object. """
47 | return None, None
48 |
49 | def did_insert(self):
50 | pass
51 |
52 | def insert(self):
53 | """ Runs an INSERT query for the receiver.
54 | This method will not check with "should_insert()"! """
55 | self.will_insert()
56 |
57 | sql, params = self.insert_tuple()
58 | if sql is None or params is None:
59 | return False
60 |
61 | cls = self.__class__
62 | cls.sqlite_assure_handle()
63 | self.id = cls.sqlite_handle.executeInsert(sql, params)
64 | cls.sqlite_must_commit = True
65 | self.did_insert()
66 |
67 | return True
68 |
69 |
70 | def should_update(self):
71 | return True
72 |
73 | def update_tuple(self):
74 | """ Cheap solution for now: return the UPDATE sql as first and a list
75 | of values as second object. """
76 | return None, None
77 |
78 | def update(self):
79 | """ Runs the UPDATE query on the receiver. """
80 |
81 | sql, params = self.update_tuple()
82 | if sql is None or params is None:
83 | return False
84 |
85 | cls = self.__class__
86 | cls.sqlite_assure_handle()
87 | if cls.sqlite_handle.execute(sql, params):
88 | cls.sqlite_must_commit = True
89 | self.hydrated = True
90 | return True
91 |
92 | return False
93 |
94 | def did_store(self):
95 | """ Called after a successful call to self.store(). """
96 | pass
97 |
98 | def store(self):
99 | """ Stores the receiver's data to SQLite. You must MANUALLY COMMIT!
100 | """
101 |
102 | # do we need to insert first?
103 | if self.should_insert() and not self.insert():
104 | logging.warning("Failed to INSERT %s" % self)
105 |
106 | # perform the update
107 | if self.should_update() and not self.update():
108 | logging.warning("Failed to UPDATE %s" % self)
109 | return False
110 |
111 | self.did_store()
112 | return True
113 |
114 |
115 | # -------------------------------------------------------------------------- Hydration
116 | def load(self, force=False):
117 | """ Hydrate from database. """
118 | pass
119 |
120 | def from_db(self, data):
121 | """ Fill from an SQLite-retrieved list. """
122 | pass
123 |
124 |
125 | # -------------------------------------------------------------------------- SQLite Methods
126 | def sqlite_execute(self, sql, params):
127 | """ Executes the given SQL statement with the given parameters.
128 | Returns True on success, False otherwise. """
129 |
130 | cls = self.__class__
131 | cls.sqlite_assure_handle()
132 | if cls.sqlite_handle.execute(sql, params):
133 | cls.sqlite_must_commit = True
134 | self.hydrated = True
135 | return True
136 |
137 | return False
138 |
139 | @classmethod
140 | def sqlite_select(cls, sql, params):
141 | """ Executes the SQL statement and returns the response. You can use
142 | this method in an iterator. """
143 |
144 | cls.sqlite_assure_handle()
145 | return cls.sqlite_handle.execute(sql, params)
146 |
147 | @classmethod
148 | def sqlite_select_one(cls, sql, params):
149 | """ Executes the SQL statement and returns the first response row.
150 | """
151 |
152 | cls.sqlite_assure_handle()
153 | return cls.sqlite_handle.executeOne(sql, params)
154 |
155 | @classmethod
156 | def add_index(cls, table_column):
157 | """ Adds an index for the given table column if there is none.
158 | """
159 | if table_column is None:
160 | return
161 |
162 | cls.sqlite_assure_handle()
163 | idx_name = "%s_index" % table_column
164 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS %s ON %s (%s)" % (idx_name, cls.table_name, table_column))
165 |
166 |
167 | # -------------------------------------------------------------------------- Class Methods
168 | @classmethod
169 | def sqlite_assure_handle(cls):
170 | if cls.sqlite_handle is None:
171 | cls.sqlite_handle = SQLite.get(cls.sqlite_default_db)
172 |
173 | @classmethod
174 | def sqlite_release_handle(cls):
175 | cls.sqlite_handle = None
176 |
177 | @classmethod
178 | def sqlite_commit_if_needed(cls):
179 | """ Commits to SQLite if the flag had been set. """
180 | if cls.sqlite_handle is None:
181 | return
182 |
183 | if cls.sqlite_must_commit:
184 | cls.sqlite_must_commit = False
185 | cls.sqlite_handle.commit()
186 |
187 |
188 | # -------------------------------------------------------------------------- Table Setup
189 | @classmethod
190 | def table_structure(cls):
191 | """ Return the table structure here. """
192 | return None
193 |
194 | @classmethod
195 | def setup_tables(cls, db_path=None):
196 | if db_path is not None:
197 | cls.sqlite_default_db = db_path
198 |
199 | struct = cls.table_structure()
200 | if struct is None:
201 | return False
202 |
203 | cls.sqlite_assure_handle()
204 | if cls.sqlite_handle.create(cls.table_name, struct):
205 | cls.did_setup_tables(db_path)
206 |
207 | @classmethod
208 | def did_setup_tables(cls, db_path):
209 | pass
210 |
211 | # call the table setup to be sure it was set up
212 | # SubClass.setup_tables()
213 |
214 |
215 |
--------------------------------------------------------------------------------
/files.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 |
4 | def get_timestamp():
5 | return datetime.now().strftime(("%Y-%m-%d %H:%M:%S"))
6 |
7 | FILES = {
8 | "1": {
9 | "id": "1",
10 | "title": "clinical_ax.txt",
11 | "author": "Dr. John Doe",
12 | "client_name": "Foo Bar",
13 | "timestamp": get_timestamp()
14 | },
15 | "2": {
16 | "id": "2",
17 | "title": "clinical_review.txt",
18 | "author": "Dr. Scooby Doo",
19 | "client_name": "Foo Bar",
20 | "timestamp": get_timestamp()
21 | },
22 | "3": {
23 | "id": "3",
24 | "title": "clinical_note.txt",
25 | "author": "Dr. Donald Duck",
26 | "client_name": "Foo Bar",
27 | "timestamp": get_timestamp()
28 | }
29 | }
30 |
31 | def read():
32 |
33 | # Create the list of people from our data
34 | return [FILES[key] for key in sorted(FILES.keys())]
35 |
--------------------------------------------------------------------------------
/mngobject.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Superclass for objects stored in MongoDB
5 | #
6 | # 2013-07-10 Created by Pascal Pfiffner
7 | #
8 |
9 | import logging
10 | import collections
11 |
12 | from pymongo import MongoClient
13 |
14 |
15 | class MNGObject (object):
16 | """ Superclass for an object stored in a MongoDB collection. """
17 |
18 | def __init__(self, id=None):
19 | self.id = id
20 | self.doc = None
21 | self.loaded = False
22 |
23 |
24 | # -------------------------------------------------------------------------- MongoDB
25 | database_uri = "mongodb://localhost:27017"
26 |
27 | # the MongoDB database may be 'None', in which case the default db will be
28 | # used, and if that doesn't work it will fall back to use 'default'
29 | database_name = None
30 |
31 | # the MongoDB collection that holds documents of this class
32 | collection_name = None
33 |
34 | _collection = None
35 |
36 | @classmethod
37 | def collection(cls):
38 | """ Returns a Mongo Collection object, creating it if necessary. """
39 | if cls._collection is None:
40 | if not cls.collection_name:
41 | raise Exception("No collection has been set for %s" % cls)
42 |
43 | client = MongoClient(cls.database_uri)
44 | if cls.database_name is None:
45 | try:
46 | db = client.get_default_database()
47 | except Exception as e:
48 | logging.debug("Failed to get default database: %s" % e)
49 | db = client['default']
50 | else:
51 | db = client[cls.database_name]
52 | cls._collection = db[cls.collection_name]
53 |
54 | return cls._collection
55 |
56 | @classmethod
57 | def test_connection(cls):
58 | """ Tests the database by inserting, retrieving and deleting a document.
59 | """
60 | old_coll = cls.collection_name
61 | cls.collection_name = 'foo'
62 |
63 | obj = MNGObject()
64 | obj.doc = {
65 | 'title': "This is a connection test document"
66 | }
67 |
68 | ret = None
69 |
70 | # try storing
71 | try:
72 | obj.store()
73 |
74 | # try loading
75 | sec = MNGObject(obj.id)
76 | try:
77 | sec.load()
78 |
79 | # compare titles
80 | t1 = obj.doc.get('title') if obj.doc else None
81 | t2 = sec.doc.get('title') if sec.doc else None
82 | if t1 == t2:
83 |
84 | # try removing
85 | try:
86 | if not sec.remove():
87 | raise Exception('failed to remove')
88 | except Exception as e:
89 | ret = "TEST FAILED with remove() exception: %s" % e
90 | else:
91 | ret = "TEST FAILED, insertion and retrieval do not match (%s != %s)" % (t1, t2)
92 | except Exception as e:
93 | ret = "TEST FAILED with load() exception: %s" % e
94 | except Exception as e:
95 | ret = "TEST FAILED with store() exception: %s" % e
96 |
97 |
98 | # clean up
99 | try:
100 | cls._collection.drop()
101 | cls._collection = None
102 | except:
103 | logging.error("Failed to drop collection: %s" % e)
104 |
105 | cls.connection_name = old_coll
106 |
107 | return ret
108 |
109 |
110 | # -------------------------------------------------------------------------- Document Manipulation
111 | def ensure_doc_id(self):
112 | had_doc = True
113 | if self.doc is None:
114 | had_doc = False
115 | self.doc = {}
116 |
117 | if self.id:
118 | self.doc['_id'] = self.id
119 | elif had_doc:
120 | self.id = self.doc.get('_id')
121 | if self.id is None:
122 | self.id = self.doc.get('id')
123 | self.doc['_id'] = self.id
124 |
125 | def replace_with(self, json):
126 | """ Replaces the document tree with the given JSON tree.
127 |
128 | The document id is set from the receiver's id if it's there, otherwise
129 | it's being searched in the doc in this order:
130 | - if self.id is not None, the doc's "_id" will be set to self.id
131 | - if doc["_id"] is present, this becomes self.id
132 | - if doc["id"] is present, this becomes self.id and is set as the
133 | docs "_id"
134 | """
135 | if not self.loaded:
136 | self.load()
137 |
138 | self.doc = json
139 | self.loaded = True
140 |
141 | # set or update our id
142 | self.ensure_doc_id()
143 | self.did_update_doc()
144 |
145 | def update_with(self, json):
146 | """ Updates the document tree by merging it with the given JSON tree.
147 |
148 | The id of the document is automatically set in this order:
149 | - if self.id is not None, the doc's "_id" will be set to self.id
150 | - if doc["_id"] is present, this becomes self.id
151 | - if doc["id"] is present, this becomes self.id and is set as the
152 | docs "_id"
153 | """
154 |
155 | if not self.loaded:
156 | self.load()
157 |
158 | # set or update contents
159 | if self.doc is None:
160 | self.doc = json
161 | else:
162 | self.doc = deepUpdate(self.doc, json)
163 | self.loaded = True
164 |
165 | # set or update our id
166 | self.ensure_doc_id()
167 | self.did_update_doc()
168 |
169 | def did_update_doc(self):
170 | """ Called when self.doc has been changed programmatically (i.e. NOT
171 | after loading from database).
172 |
173 | You can call this manually if you directly assign self.doc and want
174 | this to trigger. The default implementation does nothing.
175 | """
176 | pass
177 |
178 | def update_subtree(self, keypath, tree):
179 | assert False, "Not implemented"
180 |
181 | def replace_subtree(self, keypath, tree):
182 | """ replaces the existing tree at keypath with the new tree. """
183 |
184 | if not self.loaded and self.id:
185 | self.load()
186 |
187 | self.ensure_doc_id()
188 | self.doc = replaceSubtree(self.doc, keypath, tree)
189 | self.loaded = True
190 |
191 |
192 | # -------------------------------------------------------------------------- Dehydration
193 | def store(self, subtree=None):
194 | """ Stores the receiver's data to the collection, letting Mongo decide
195 | between an insert and an update.
196 | If "subtree" is not None, an update is forced only on the given subtree
197 | which should have the format: {'keypath': value}. """
198 |
199 | # throw up if there is no content and we're not saving a subtree
200 | if self.doc is None and subtree is None:
201 | raise Exception("This object does not have content")
202 |
203 | cls = self.__class__
204 |
205 | # update if there's a subtree, otherwise use "save"
206 | if subtree is not None:
207 | if self.id is None:
208 | raise Exception("No id is set, cannot update subtree %s" % subtree)
209 | res = cls.collection().update({"_id": self.id}, {"$set": subtree})
210 | if res is not None:
211 | if res.get('err'):
212 | logging.warning("Error while saving subtree: %s" % res.get('err'))
213 |
214 | # instead of loading again, would be nice to update self.doc
215 | # appropriately
216 | self.doc = None
217 | self.load()
218 | else:
219 | self.id = cls.collection().save(self.doc, manipulate=True)
220 |
221 | self.did_store()
222 |
223 | return True
224 |
225 | def did_store(self):
226 | """ Called after a successful call to "store". """
227 | pass
228 |
229 |
230 | # -------------------------------------------------------------------------- Hydration
231 | def load(self, force=False):
232 | """ Hydrate from database, if the instance has an id.
233 | If the document already has an in-memory representation, data loaded
234 | from database will be superseeded by the in-memory properties unless
235 | "force" is set to True, which will make all in-memory data to be
236 | discarded.
237 |
238 | Arguments:
239 | force -- if True will discard any in-memory changes to self.doc
240 | """
241 |
242 | if self.id is None:
243 | return
244 |
245 | found = self.__class__.collection().find_one({"_id": self.id})
246 | if found is not None:
247 | if force or self.doc is None:
248 | self.doc = found
249 | else:
250 | self.doc = deepUpdate(found, self.doc)
251 |
252 | self.loaded = True
253 |
254 |
255 | # -------------------------------------------------------------------------- Multiple
256 | @classmethod
257 | def retrieve(cls, id_list=[]):
258 | """ Retrieves multiple documents by id. """
259 |
260 | found = []
261 | for document in cls.collection().find({"_id": {"$in": id_list}}):
262 | obj = cls()
263 | obj.update_with(document)
264 |
265 | found.append(obj)
266 |
267 | return found
268 |
269 |
270 | # -------------------------------------------------------------------------- Deletion
271 | def remove(self):
272 | """ Delete from database. """
273 |
274 | if self.id is None:
275 | raise Exception("This object does not have an id, cannot remove")
276 |
277 | ret = self.__class__.collection().remove(spec_or_id=self.id)
278 | return ret.get('err') is None if ret else False
279 |
280 |
281 |
282 | def deepUpdate(d, u):
283 | """ Deep merges two dictionaries, overwriting "d"s values with "u"s where
284 | present. """
285 | if u is None:
286 | return d
287 |
288 | # if we have "u" and "d" is not a mapping object, we overwrite it with "u"
289 | if d is None or not isinstance(d, collections.Mapping):
290 | return u
291 |
292 | # iterate over keys and values and update
293 | for k, v in u.iteritems():
294 | if isinstance(v, collections.Mapping):
295 | old = d.get(k)
296 | d[k] = deepUpdate(old, v) if old else v
297 | else:
298 | d[k] = u[k]
299 |
300 | return d
301 |
302 | def deleteSubtree(tree, keypath):
303 | """ Deletes the content at keypath. """
304 | if not keypath:
305 | raise Exception("You must provide a keypath")
306 |
307 | existing = tree
308 | path = keypath.split('.')
309 | while len(path) > 1:
310 | p = path.pop(0)
311 | existing = existing.get(p)
312 |
313 | # if we don't have a tree to update it's not there anyway, go home
314 | if existing is None:
315 | return tree
316 |
317 | del existing[path[0]]
318 |
319 | return tree
320 |
321 |
322 | def replaceSubtree(tree, keypath, json):
323 | """ Replaces or creates a subtree at keypath. """
324 | if not keypath:
325 | raise Exception("You must provide a keypath")
326 | if json is None:
327 | return deleteSubtree(tree, keypath)
328 |
329 | existing = tree or {}
330 | path = keypath.split('.')
331 | while len(path) > 1:
332 | p = path.pop(0)
333 | previous = existing
334 | existing = existing.get(p)
335 | if existing is None:
336 | existing = {}
337 | previous[p] = existing
338 |
339 | if existing is None:
340 | existing = {}
341 | existing[path[0]] = json
342 |
343 | return tree
344 |
345 |
346 | if '__main__' == __name__:
347 | a = {'a': 1, 'b': 1, 'c': {'ca': 1, 'cb': 1, 'cc': {'cca': 1, 'ccb': 1}}, 'e': {'ea': 1}}
348 | b = {'a': 2, 'c': {'ca': 2, 'cb': {'cba': 2, 'cbb': 2}, 'cd': {'cda': 2, 'cdb': 2, 'cdc': 2}}, 'e': 2}
349 |
350 | print "replaceSubtree()"
351 | print "before ", a
352 | print "replace 1", replaceSubtree(a, 'c.ca', 3)
353 | print "replace 2", replaceSubtree(a, 'c.cc.cca', 3)
354 | print "replace 3", replaceSubtree(a, 'c.ce.cea', 3)
355 | print
356 | print "deleteSubtree()"
357 | print "before ", a
358 | print "delete 1", deleteSubtree(a, 'c.ce.cea')
359 | print "delete 2", deleteSubtree(a, 'd.da.dda')
360 | print
361 | print "deepUpdate(a, b)"
362 | print "a: ", a
363 | print "b: ", b
364 | print "-> ", deepUpdate(a, b)
365 |
366 |
--------------------------------------------------------------------------------
/nlp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # cTAKES and RegEx wizardry
4 | #
5 | # 2012-12-14 Created by Pascal Pfiffner
6 | #
7 |
8 | import os
9 | import re
10 | import logging
11 |
12 | class NLPProcessing (object):
13 | """ Abstract base class for handling NLP pipelines. """
14 | # print('\n(nlp.py) Initializing NLP w/ object:', object, '\n')
15 |
16 | def __init__(self):
17 | # print('\n(nlp.py) Setting definitions for self')
18 |
19 | self.name = 'nlp'
20 | self.bin = '.'
21 | self.root = None
22 | self.cleanup = True
23 | self.did_prepare = False
24 |
25 | # print('(nlp.py) Definitions set as:', '\n(nlp.py) Self Name:', self.name, '\n(nlp.py) Self bin:', self.bin, '\n(nlp.py) Self root:',
26 | # self.root, '\n(nlp.py) Self cleanup:', self.cleanup, '\n(nlp.py) Self did prepare:', self.did_prepare, '\n')
27 |
28 | # -------------------------------------------------------------------------- Preparations
29 | def set_relative_root(self, directory):
30 | self.root = os.path.abspath(directory if directory is not None else '.')
31 |
32 | def prepare(self):
33 | """ Performs steps necessary to setup the pipeline, such as creating
34 | input and output directories or pipes. """
35 | # print('Preparations started w/:\n','Root =', self.root, '\n')
36 | self._prepare()
37 | self.did_prepare = True
38 |
39 | def _prepare(self):
40 | if self.root is None:
41 | raise Exception("No root directory defined for NLP process %s" % self.name)
42 |
43 | if not os.path.exists(self.root):
44 | os.mkdir(self.root)
45 |
46 | self._create_directories_if_needed()
47 |
48 | if not os.path.exists(self.root):
49 | raise Exception(
50 | "Failed to create root directory for NLP process %s" % self.name)
51 |
52 | def _create_directories_if_needed(self):
53 | """ Override to create directories needed to run the pipeline. """
54 | pass
55 |
56 | # -------------------------------------------------------------------------- Running
57 | def run(self):
58 | """ Runs the NLP pipeline, raises an exception on error. """
59 | if not self.did_prepare:
60 | self.prepare()
61 | self._run()
62 |
63 | def _run(self):
64 | """ Internal use, subclasses should override this method since it is
65 | called after necessary preparation has been performed. """
66 | raise Exception("Cannot run an abstract NLP pipeline class instance")
67 |
68 | def write_input(self, text, filename):
69 | if not self.did_prepare:
70 | self.prepare()
71 |
72 | return self._write_input(text, filename)
73 |
74 | def _write_input(self, text, filename):
75 | return False
76 |
77 | def parse_output(self, filename, **kwargs):
78 | if not self.did_prepare:
79 | self.prepare()
80 |
81 | return self._parse_output(filename, **kwargs)
82 |
83 | def _parse_output(self, filename, **kwargs):
84 | """ return a dictionary (or None) like:
85 | { 'snomed': [1, 2, 2], 'rxnorm': [4, 5, 6] }
86 | """
87 | return None
88 |
89 |
90 | # ------------------------------------------------------------------------------ Helper Functions
91 | def split_inclusion_exclusion(string):
92 | """ Returns a tuple of lists describing inclusion and exclusion criteria.
93 | """
94 |
95 | if not string or len(string)< 1:
96 | raise Exception('No string given')
97 |
98 | # split on newlines
99 | rows = re.compile(r'(?:\n\s*){2,}').split(string)
100 |
101 | # loop all rows
102 | missed = []
103 | inc = []
104 | exc = []
105 | at_inc = False
106 | at_exc = False
107 |
108 | for string in rows:
109 | if len(string) < 1 or 'none' == string:
110 | continue
111 |
112 | clean = re.sub(r'[\n\s]+', ' ', string).strip()
113 |
114 | # detect switching to inclusion criteria
115 | # exclusion criteria sometimes say "None if patients fulfill inclusion
116 | # criteria.", try to avoid detecting that as header!
117 | if re.search(r'^[^\w]*inclusion criteria', clean, re.IGNORECASE) is not None \
118 | and re.search(r'exclusion', clean, re.IGNORECASE) is None:
119 | at_inc = True
120 | at_exc = False
121 |
122 | # detect switching to exclusion criteria
123 | elif re.search(r'exclusion criteria', clean, re.IGNORECASE) is not None \
124 | and re.search(r'inclusion', clean, re.IGNORECASE) is None:
125 | at_inc = False
126 | at_exc = True
127 |
128 | # assign accordingly
129 | elif at_inc:
130 | inc.append(clean)
131 | elif at_exc:
132 | exc.append(clean)
133 | else:
134 | missed.append(clean)
135 |
136 | # if there was no inclusion/exclusion split, we assume the text describes inclusion criteria
137 | if len(inc) < 1 or len(exc) < 1:
138 | logging.debug(
139 | "No explicit separation of inclusion/exclusion criteria found, assuming the text to describe inclusion criteria")
140 | inc.extend(missed)
141 | exc = []
142 |
143 | return (inc, exc)
144 |
145 |
146 | def list_to_sentences(string):
147 | """ Splits text at newlines and puts it back together after stripping new-
148 | lines and enumeration symbols, joined by a period.
149 | """
150 | if string is None:
151 | return None
152 |
153 | lines = string.splitlines()
154 |
155 | curr = ''
156 | processed = []
157 | for line in lines:
158 | stripped = line.strip()
159 |
160 | # empty line
161 | if 0 == len(stripped):
162 | if curr:
163 | processed.append(re.sub(r'\.\s*$', '', curr))
164 | curr = ''
165 |
166 | # beginning a new fragment
167 | elif not curr or 0 == len(curr):
168 | curr = re.sub(r'^[-\d\.\(\)]+\s*', '', stripped)
169 |
170 | # new line item? true when it starts with "-", "1." or "1)" (with
171 | # optional dash) or if the indent level is less than before (simple
172 | # whitespace count) (NO LONGER IMPLEMENTED)
173 | elif re.match(r'^-\s+', stripped) \
174 | or re.match(r'^\d+\.\s+', stripped) \
175 | or re.match(r'^(-\s*)?\d+\)\s+', stripped):
176 |
177 | if curr:
178 | processed.append(re.sub(r'\.\s*$', '', curr))
179 | curr = re.sub(r'^(-|(\d+\.)|((-\s*)?\d+\)))\s*', '', stripped)
180 |
181 | # append to previous fragment
182 | else:
183 | curr = '%s %s' % (curr, stripped)
184 |
185 | if curr:
186 | processed.append(re.sub(r'\.\s*$', '', curr))
187 |
188 | sentences = '. '.join(processed) if len(processed) > 0 else ''
189 | if len(sentences) > 0:
190 | sentences += '.'
191 |
192 | return sentences
193 |
194 |
195 | def list_trim(string):
196 | """ Trim text phases that are part of the string because the string was
197 | pulled off of a list, e.g. a leading "-" or "1."
198 | """
199 |
200 | string.strip()
201 | string = re.sub('\s+', ' ', string) # multi-whitespace
202 | string = re.sub('^-\s+', '', string, count=1) # leading "-"
203 | string = re.sub('^\d+\.\s+', '', string, count=1) # leading "1."
204 | string = re.sub('^(-\s*)?\d+\)\s+', '', string, count=1) # leading "1)" with optional dash
205 |
206 | return string
207 |
--------------------------------------------------------------------------------
/nltktags.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Handling NLTK to generate tags
5 | #
6 | # 2013-10-25 Created by Pascal Pfiffner
7 | #
8 |
9 | import os
10 | import logging
11 | import codecs
12 | import inspect
13 | import nltk
14 | import operator
15 |
16 | from nlp import NLPProcessing, list_to_sentences
17 |
18 |
19 | class NLTKTags (NLPProcessing):
20 | """ Aggregate handling tasks specifically for NLTK. """
21 |
22 | def __init__(self):
23 | super(NLTKTags, self).__init__()
24 | self.name = 'nltk-tags'
25 |
26 |
27 | @property
28 | def _in_dir(self):
29 | return os.path.join(self.root, 'nltk-tags-in')
30 |
31 | @property
32 | def _out_dir(self):
33 | return os.path.join(self.root, 'nltk-tags-out')
34 |
35 | def _create_directories_if_needed(self):
36 | in_dir = self._in_dir
37 | out_dir = self._out_dir
38 | if not os.path.exists(in_dir):
39 | os.mkdir(in_dir)
40 | if not os.path.exists(out_dir):
41 | os.mkdir(out_dir)
42 |
43 | def _run(self):
44 | in_dir = self._in_dir
45 | out_dir = self._out_dir
46 | if not os.path.exists(in_dir) or not os.path.exists(out_dir):
47 | return
48 |
49 | # init our simple noun-phrase chunker
50 | grammar = r"""
51 | NUM:
52 | {} # "%" is interpreted as NN...
53 |
54 | NBAR:
55 | {**+} # Nouns and Adjectives, terminated with Nouns
56 |
57 | NP:
58 | {} # An NBAR is also a NP
59 | {} # Above, connected with in/of/etc...
60 | """
61 | chunker = nltk.RegexpParser(grammar)
62 |
63 | filelist = os.listdir(in_dir)
64 | tag_count = {}
65 | i = 0
66 | for f in filelist:
67 | i = i + 1
68 | logging.debug(" Reading file %d of %d" % (i, len(filelist)))
69 | with codecs.open(os.path.join(in_dir, f), 'r', 'utf-8') as handle:
70 | text = handle.read()
71 |
72 | # use NLTK to chunk the text
73 | chunks = []
74 | sentences = nltk.sent_tokenize(text)
75 | if sentences and len(sentences) > 0:
76 | for sentence in sentences:
77 | tokens = nltk.word_tokenize(sentence)
78 | tagged = nltk.pos_tag(tokens)
79 | tree = chunker.parse(tagged)
80 |
81 | # get noun phrases
82 | np = []
83 | for st in _nltk_find_leaves(tree, 'NP'):
84 | leaves = st.leaves()
85 | if len(leaves) > 0:
86 | tag = ' '.join([noun[0] for noun in leaves]).lower()
87 | np.append(tag)
88 |
89 | # count tags
90 | if tag in tag_count:
91 | tag_count[tag] = tag_count[tag] + 1
92 | else:
93 | tag_count[tag] = 1
94 |
95 | if len(np) > 0:
96 | chunks.extend(np)
97 |
98 | # write to outfile
99 | if len(chunks) > 0:
100 | outfile = os.path.join(out_dir, f)
101 | with codecs.open(outfile, 'w', 'utf-8') as w_handle:
102 | for chunk in chunks:
103 | w_handle.write("%s\n" % unicode(chunk))
104 |
105 | # tag count
106 | if len(tag_count) > 0:
107 | with codecs.open(os.path.join(out_dir, 'tags.txt'), 'w', 'utf-8') as handle:
108 | for tag in sorted(tag_count.iteritems(), key=operator.itemgetter(1), reverse=True):
109 | handle.write("%s: %d\n" % (tag[0], int(tag[1])))
110 |
111 |
112 | def _write_input(self, text, filename):
113 | if text is None \
114 | or len(text) < 1 \
115 | or filename is None:
116 | return False
117 |
118 | in_dir = self._in_dir
119 | if not os.path.exists(in_dir):
120 | logging.error("The input directory for %s at %s does not exist" % (self.name, in_dir))
121 | return False
122 |
123 | infile = os.path.join(in_dir, filename)
124 | if os.path.exists(infile):
125 | return False
126 |
127 | # write it
128 | with codecs.open(infile, 'w', 'utf-8') as handle:
129 | # handle.write(unicode(text))
130 | # handle.write("\n=====\n")
131 | handle.write(unicode(list_to_sentences(text)))
132 |
133 | return True
134 |
135 |
136 | def _parse_output(self, filename, **kwargs):
137 | """ Parse NLTK output. """
138 |
139 | if filename is None:
140 | return None
141 |
142 | # is there cTAKES output?
143 | out_dir = self._out_dir
144 | if not os.path.exists(out_dir):
145 | logging.error("The output directory for %s at %s does not exist" % (self.name, out_dir))
146 | return None
147 |
148 | outfile = os.path.join(out_dir, filename)
149 | if not os.path.exists(outfile):
150 | # do not log here and silently fail
151 | return None
152 |
153 | tags = []
154 |
155 | # read tags
156 | with codecs.open(outfile, 'r', 'utf-8') as handle:
157 | #line = handle.readline(keepends=False) # "keepends" not supported in readline! (http://bugs.python.org/issue8630)
158 | lines = handle.readlines()
159 | for line in lines:
160 | tags.append(line.strip())
161 |
162 | # create and return a dictionary (don't filter empty lists)
163 | ret = {
164 | 'tags': tags,
165 | }
166 |
167 | # clean up
168 | if self.cleanup:
169 | os.remove(outfile)
170 |
171 | in_dir = self._in_dir
172 | infile = os.path.join(in_dir, filename)
173 | if os.path.exists(infile):
174 | os.remove(infile)
175 |
176 | return ret
177 |
178 |
179 | def _nltk_find_leaves(tree, leave_name):
180 | try:
181 | tree.node
182 | except AttributeError:
183 | return []
184 |
185 | res = []
186 | if leave_name == tree.node:
187 | res.append(tree)
188 | else:
189 | for child in tree:
190 | leaves = _nltk_find_leaves(child, leave_name)
191 | if len(leaves) > 0:
192 | res.extend(leaves)
193 |
194 | return res
195 |
196 |
197 | # we can execute this file to do some testing
198 | if '__main__' == __name__:
199 | testtext = "History of clincally significant hypogammaglobulinemia, common variable immunodeficiency, or humeral immunodeficiency."
200 | testfile = 'test.txt'
201 |
202 | run_dir = os.path.join(os.path.dirname(__file__), 'nltk-tags-test')
203 | my_nlp = NLTKTags({'root': run_dir, 'cleanup': True})
204 | my_nlp.prepare()
205 |
206 | # create test input
207 | if not my_nlp.write_input(testtext, testfile):
208 | print "xx> Failed to write test input to file"
209 |
210 | # run
211 | try:
212 | my_nlp.run()
213 | except Exception as e:
214 | print "xx> Failed: %s" % e
215 |
216 | # parse output
217 | ret = my_nlp.parse_output(testfile)
218 | print ret
219 |
220 | # clean up
221 | os.rmdir(my_nlp._in_dir)
222 | os.rmdir(my_nlp._out_dir)
223 | os.rmdir(run_dir)
224 |
225 | print "--> Done"
226 |
--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
1 | import connexion
2 |
3 | # Create the application instance
4 | app = connexion.App(__name__, specification_dir='./')
5 |
6 | # Read the swagger.yml file to configure the endpoints
7 | app.add_api('swagger.yml')
8 |
9 | # Create a URL route in our application for "/"
10 |
11 |
12 | @app.route('/')
13 | def home():
14 |
15 | return "Works!"
16 |
17 |
18 | # If we're running in stand alone mode, run the application
19 | if __name__ == '__main__':
20 | app.run(host='0.0.0.0', port=5000, debug=True)
21 |
--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Simplifying SQLite access
4 | #
5 | # 2012-12-14 Created by Pascal Pfiffner
6 | #
7 |
8 |
9 | import sqlite3
10 | import threading
11 |
12 |
13 | SQLITE_INSTANCES = {}
14 |
15 |
16 | class SQLite (object):
17 | """ SQLite access
18 | """
19 |
20 | @classmethod
21 | def get(cls, database):
22 | """ Use this to get SQLite instances for a given database. Avoids
23 | creating multiple instances for the same database.
24 |
25 | We keep instances around per thread per database, maybe there should be
26 | a way to turn this off. However, here we always release instances for
27 | threads that are no longer alive. If this is better than just always
28 | creating a new instance should be tested.
29 | """
30 |
31 | global SQLITE_INSTANCES
32 |
33 | # group per thread
34 | thread_id = threading.current_thread().ident
35 | if thread_id not in SQLITE_INSTANCES:
36 | SQLITE_INSTANCES[thread_id] = {}
37 | by_thread = SQLITE_INSTANCES[thread_id]
38 |
39 | # group per database
40 | if database not in by_thread:
41 | sql = SQLite(database)
42 | by_thread[database] = sql
43 |
44 | # free up memory for terminated threads
45 | clean = {}
46 | for alive in threading.enumerate():
47 | if alive.ident in SQLITE_INSTANCES:
48 | clean[alive.ident] = SQLITE_INSTANCES[alive.ident]
49 | SQLITE_INSTANCES = clean
50 |
51 | return by_thread[database]
52 |
53 |
54 | def __init__(self, database=None):
55 | if database is None:
56 | raise Exception('No database provided')
57 |
58 | self.database = database
59 | self.handle = None
60 | self.cursor = None
61 |
62 |
63 | def execute(self, sql, params=()):
64 | """ Executes an SQL command and returns the cursor.execute, which can
65 | be used as an iterator.
66 | Supply the params as tuple, i.e. (param,) and (param1,param2,...)
67 | """
68 | if not sql or len(sql) < 1:
69 | raise Exception('No SQL to execute')
70 | if not self.cursor:
71 | self.connect()
72 |
73 | return self.cursor.execute(sql, params)
74 |
75 |
76 | def executeInsert(self, sql, params=()):
77 | """ Executes an SQL command (should be INSERT OR REPLACE) and returns
78 | the last row id, 0 on failure.
79 | """
80 | if self.execute(sql, params):
81 | return self.cursor.lastrowid if self.cursor.lastrowid else 0
82 |
83 | return 0
84 |
85 |
86 | def executeUpdate(self, sql, params=()):
87 | """ Executes an SQL command (should be UPDATE) and returns the number
88 | of affected rows.
89 | """
90 | if self.execute(sql, params):
91 | return self.cursor.rowcount
92 |
93 | return 0
94 |
95 |
96 | def executeOne(self, sql, params):
97 | """ Returns the first row returned by executing the command
98 | """
99 | self.execute(sql, params)
100 | return self.cursor.fetchone()
101 |
102 |
103 | def create(self, table_name, table_structure):
104 | """ Executes a CREATE TABLE IF NOT EXISTS query with the given structure.
105 | Input is NOT sanitized, watch it!
106 | """
107 | create_query = 'CREATE TABLE IF NOT EXISTS %s %s' % (table_name, table_structure)
108 | self.execute(create_query)
109 | return True
110 |
111 |
112 | def commit(self):
113 | self.handle.commit()
114 |
115 |
116 | def connect(self):
117 | if self.cursor is not None:
118 | return
119 |
120 | self.handle = sqlite3.connect(self.database)
121 | self.cursor = self.handle.cursor()
122 |
123 |
124 | def close(self):
125 | if self.cursor is None:
126 | return
127 |
128 | self.handle.close()
129 | self.cursor = None
130 | self.handle = None
131 |
132 |
133 | # singleton init whack-a-hack
134 | #SQLite = _SQLite()
135 | #del _SQLite
136 |
--------------------------------------------------------------------------------
/swagger.yml:
--------------------------------------------------------------------------------
1 | swagger: "2.0"
2 | info:
3 | description: Coonects to the cTAKES Default Clinical Pipeline through a RESTful service!
4 | version: "1.0.0"
5 | title: cTAKES RESTful API
6 | consumes:
7 | - "application/json"
8 | produces:
9 | - "application/json"
10 |
11 | basePath: "/api"
12 |
13 | paths:
14 | /defaultClinicalPipline:
15 | get:
16 | operationId: "files.read"
17 | tags:
18 | - "Default Clinical Pipeline"
19 | summary: "Plain text file structure supported by the server application"
20 | description: "Read plain text file"
21 | responses:
22 | 200:
23 | description: "Successful read plain text file operation!"
24 | schema:
25 | type: "array"
26 | items:
27 | properties:
28 | id:
29 | type: "string"
30 | title:
31 | type: "string"
32 | author:
33 | type: "string"
34 | client_name:
35 | type: "string"
36 | timestamp:
37 | type: "string"
--------------------------------------------------------------------------------
/umls.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # utilities to handle UMLS
5 | #
6 | # 2013-01-01 Created by Pascal Pfiffner
7 | #
8 |
9 |
10 | import csv
11 | import sys
12 | import os.path
13 | import logging
14 |
15 | from sqlite import SQLite
16 |
17 |
18 | class UMLS (object):
19 | """ A class for importing UMLS terminologies into an SQLite database.
20 | """
21 |
22 | @classmethod
23 | def check_databases(cls):
24 | """ Check if our databases are in place and if not, import them.
25 | Will raise on errors!
26 |
27 | UMLS: (umls.db)
28 | If missing prompt to use the `umls.sh` script
29 |
30 | SNOMED: (snomed.db)
31 | Read SNOMED CT from tab-separated files and create an SQLite database.
32 | """
33 |
34 | # UMLS
35 | umls_db = os.path.join('databases', 'umls.db')
36 | if not os.path.exists(umls_db):
37 | raise("The UMLS database at %s does not exist. Run the import script `databases/umls.sh`." % umls_db)
38 |
39 | # SNOMED
40 | SNOMED.sqlite_handle = None
41 | try:
42 | SNOMED.setup_tables()
43 | except Exception as e:
44 | raise("SNOMED setup failed: %s" % e)
45 |
46 | # RxNorm
47 | rxnorm_db = os.path.join('databases', 'rxnorm.db')
48 | if not os.path.exists(rxnorm_db):
49 | raise("The RxNorm database at %s does not exist. Run the import script `databases/rxnorm.sh`." % rxnorm_db)
50 |
51 | else:
52 | rx_map = {
53 | 'descriptions': 'snomed_desc.csv',
54 | 'relationships': 'snomed_rel.csv'
55 | }
56 |
57 | # need to import?
58 | for table, filename in rx_map.iteritems():
59 | num_query = 'SELECT COUNT(*) FROM %s' % table
60 | num_existing = SNOMED.sqlite_handle.executeOne(num_query, ())[0]
61 | if num_existing > 0:
62 | continue
63 |
64 | snomed_file = os.path.join('databases', filename)
65 | if not os.path.exists(snomed_file):
66 | raise("Need to import SNOMED, but the file %s is not present. Download SNOMED from http://www.nlm.nih.gov/research/umls/licensedcontent/snomedctfiles.html" % filename)
67 |
68 | SNOMED.import_csv_into_table(snomed_file, table)
69 |
70 |
71 |
72 | class UMLSLookup (object):
73 | """ UMLS lookup """
74 |
75 | sqlite_handle = None
76 | did_check_dbs = False
77 | preferred_sources = ['"SNOMEDCT"', '"MTH"']
78 |
79 | def __init__(self):
80 | self.sqlite = SQLite.get('databases/umls.db')
81 |
82 | def lookup_code(self, cui, preferred=True):
83 | """ Return a list with triples that contain:
84 | - name
85 | - source
86 | - semantic type
87 | by looking it up in our "descriptions" database.
88 | The "preferred" settings has the effect that only names from SNOMED
89 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
90 | our "descriptions" table is much faster than combing through the full
91 | MRCONSO table.
92 | """
93 | if cui is None or len(cui) < 1:
94 | return []
95 |
96 | # lazy UMLS db checking
97 | if not UMLSLookup.did_check_dbs:
98 | UMLSLookup.did_check_dbs = True
99 | try:
100 | UMLS.check_databases()
101 | except Exception as e:
102 | logging.error(e)
103 | # should this crash and burn?
104 |
105 | # take care of negations
106 | negated = '-' == cui[0]
107 | if negated:
108 | cui = cui[1:]
109 |
110 | parts = cui.split('@', 1)
111 | lookup_cui = parts[0]
112 |
113 | # STR: Name
114 | # SAB: Abbreviated Source Name
115 | # STY: Semantic Type
116 | if preferred:
117 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ? AND SAB IN (%s)' % ", ".join(UMLSLookup.preferred_sources)
118 | else:
119 | sql = 'SELECT STR, SAB, STY FROM descriptions WHERE CUI = ?'
120 |
121 | # return as list
122 | arr = []
123 | for res in self.sqlite.execute(sql, (lookup_cui,)):
124 | if negated:
125 | arr.append(("[NEGATED] %s" % res[0], res[1], res[2]))
126 | else:
127 | arr.append(res)
128 |
129 | return arr
130 |
131 |
132 | def lookup_code_meaning(self, cui, preferred=True, no_html=True):
133 | """ Return a string (an empty string if the cui is null or not found)
134 | by looking it up in our "descriptions" database.
135 | The "preferred" settings has the effect that only names from SNOMED
136 | (SNOMEDCD) and the Metathesaurus (MTH) will be reported. A lookup in
137 | our "descriptions" table is much faster than combing through the full
138 | MRCONSO table.
139 | """
140 | names = []
141 | for res in self.lookup_code(cui, preferred):
142 | if no_html:
143 | names.append("%s (%s) [%s]" % (res[0], res[1], res[2]))
144 | else:
145 | names.append("%s (%s: %s)" % (res[0], res[1], res[2]))
146 |
147 | comp = ", " if no_html else "
\n"
148 | return comp.join(names) if len(names) > 0 else ''
149 |
150 |
151 |
152 | class SNOMED (object):
153 | sqlite_handle = None
154 |
155 | # -------------------------------------------------------------------------- Database Setup
156 | @classmethod
157 | def import_csv_into_table(cls, snomed_file, table_name):
158 | """ Import SNOMED CSV into our SQLite database.
159 | The SNOMED CSV files can be parsed by Python's CSV parser with the
160 | "excel-tab" flavor.
161 | """
162 |
163 | logging.debug('..> Importing SNOMED %s into snomed.db...' % table_name)
164 |
165 | # not yet imported, parse tab-separated file and import
166 | with open(snomed_file, 'rb') as csv_handle:
167 | cls.sqlite_handle.isolation_level = 'EXCLUSIVE'
168 | sql = cls.insert_query_for(table_name)
169 | reader = unicode_csv_reader(csv_handle, dialect='excel-tab')
170 | i = 0
171 | try:
172 | for row in reader:
173 | if i > 0: # first row is the header row
174 |
175 | # execute SQL (we just ignore duplicates)
176 | params = cls.insert_tuple_from_csv_row_for(table_name, row)
177 | try:
178 | cls.sqlite_handle.execute(sql, params)
179 | except Exception as e:
180 | sys.exit(u'Cannot insert %s: %s' % (params, e))
181 | i += 1
182 |
183 | # commit to file
184 | cls.sqlite_handle.commit()
185 | cls.did_import(table_name)
186 | cls.sqlite_handle.isolation_level = None
187 |
188 | except csv.Error as e:
189 | sys.exit('CSV error on line %d: %s' % (reader.line_num, e))
190 |
191 | logging.debug('..> %d concepts parsed' % (i-1))
192 |
193 |
194 | @classmethod
195 | def setup_tables(cls):
196 | """ Creates the SQLite tables we need, not the tables we deserve.
197 | """
198 | if cls.sqlite_handle is None:
199 | cls.sqlite_handle = SQLite.get('databases/snomed.db')
200 |
201 | # descriptions
202 | cls.sqlite_handle.create('descriptions', '''(
203 | concept_id INTEGER PRIMARY KEY,
204 | lang TEXT,
205 | term TEXT,
206 | isa VARCHAR,
207 | active INT
208 | )''')
209 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS isa_index ON descriptions (isa)")
210 |
211 | # relationships
212 | cls.sqlite_handle.create('relationships', '''(
213 | relationship_id INTEGER PRIMARY KEY,
214 | source_id INT,
215 | destination_id INT,
216 | rel_type INT,
217 | rel_text VARCHAR,
218 | active INT
219 | )''')
220 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS source_index ON relationships (source_id)")
221 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS destination_index ON relationships (destination_id)")
222 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_type_index ON relationships (rel_type)")
223 | cls.sqlite_handle.execute("CREATE INDEX IF NOT EXISTS rel_text_index ON relationships (rel_text)")
224 |
225 |
226 | @classmethod
227 | def insert_query_for(cls, table_name):
228 | """ Returns the insert query needed for the given table
229 | """
230 | if 'descriptions' == table_name:
231 | return '''INSERT OR IGNORE INTO descriptions
232 | (concept_id, lang, term, isa, active)
233 | VALUES
234 | (?, ?, ?, ?, ?)'''
235 | if 'relationships' == table_name:
236 | return '''INSERT OR IGNORE INTO relationships
237 | (relationship_id, source_id, destination_id, rel_type, active)
238 | VALUES
239 | (?, ?, ?, ?, ?)'''
240 | return None
241 |
242 |
243 | @classmethod
244 | def insert_tuple_from_csv_row_for(cls, table_name, row):
245 | if 'descriptions' == table_name:
246 | isa = ''
247 | if len(row) > 6:
248 | if '900000000000013009' == row[6]:
249 | isa = 'synonym'
250 | elif '900000000000003001' == row[6]:
251 | isa = 'full'
252 | return (int(row[4]), row[5], row[7], isa, int(row[2]))
253 | if 'relationships' == table_name:
254 | return (int(row[0]), int(row[4]), int(row[5]), int(row[7]), int(row[2]))
255 | return None
256 |
257 |
258 | @classmethod
259 | def did_import(cls, table_name):
260 | """ Allows us to set hooks after tables have been imported
261 | """
262 | if 'relationships' == table_name:
263 | cls.sqlite_handle.execute('''
264 | UPDATE relationships SET rel_text = 'isa' WHERE rel_type = 116680003
265 | ''')
266 | cls.sqlite_handle.execute('''
267 | UPDATE relationships SET rel_text = 'finding_site' WHERE rel_type = 363698007
268 | ''')
269 |
270 |
271 |
272 | class SNOMEDLookup (object):
273 | """ SNOMED lookup """
274 |
275 | sqlite_handle = None
276 |
277 |
278 | def __init__(self):
279 | self.sqlite = SQLite.get('databases/snomed.db')
280 |
281 | def lookup_code_meaning(self, snomed_id, preferred=True, no_html=True):
282 | """ Returns HTML for all matches of the given SNOMED id.
283 | The "preferred" flag here currently has no function.
284 | """
285 | if snomed_id is None or len(snomed_id) < 1:
286 | return ''
287 |
288 | sql = 'SELECT term, isa, active FROM descriptions WHERE concept_id = ?'
289 | names = []
290 |
291 | # loop over results
292 | for res in self.sqlite.execute(sql, (snomed_id,)):
293 | if not no_html and ('synonym' == res[1] or 0 == res[2]):
294 | names.append("%s" % res[0])
295 | else:
296 | names.append(res[0])
297 |
298 | if no_html:
299 | return ", ".join(names) if len(names) > 0 else ''
300 | return "
\n".join(names) if len(names) > 0 else ''
301 |
302 |
303 |
304 | class RxNormLookup (object):
305 | """ RxNorm lookup """
306 |
307 | sqlite_handle = None
308 |
309 |
310 | def __init__(self):
311 | self.sqlite = SQLite.get('databases/rxnorm.db')
312 |
313 | def lookup_code_meaning(self, rx_id, preferred=True, no_html=True):
314 | """ Return HTML for the meaning of the given code.
315 | If preferred is True (the default), only one match will be returned,
316 | looking for specific TTY and using the "best" one. """
317 | if rx_id is None or len(rx_id) < 1:
318 | return ''
319 |
320 | # retrieve all matches
321 | sql = 'SELECT STR, TTY, RXAUI FROM RXNCONSO WHERE RXCUI = ? AND LAT = "ENG"'
322 | found = []
323 | names = []
324 | format_str = "%s [%s]"
325 |
326 | # loop over them
327 | for res in self.sqlite.execute(sql, (rx_id,)):
328 | found.append(res)
329 |
330 | if len(found) > 0:
331 |
332 | # preferred name only
333 | if preferred:
334 | for tty in ['BN', 'IN', 'PIN', 'SBDC', 'SCDC', 'SBD', 'SCD', 'MIN']:
335 | for res in found:
336 | if tty == res[1]:
337 | names.append(format_str % (res[2], res[0], res[1]))
338 | break
339 | else:
340 | continue
341 | break
342 |
343 | if len(names) < 1:
344 | res = found[0]
345 | names.append(format_str % (res[2], res[0], res[1]))
346 |
347 | # return a list of all names
348 | else:
349 | for res in found:
350 | names.append(format_str % (res[2], res[0], res[1]))
351 |
352 | return "
\n".join(names) if len(names) > 0 else ''
353 |
354 |
355 |
356 | # the standard Python CSV reader can't do unicode, here's the workaround
357 | def unicode_csv_reader(utf8_data, dialect=csv.excel, **kwargs):
358 | csv_reader = csv.reader(utf8_data, dialect=dialect, **kwargs)
359 | for row in csv_reader:
360 | yield [unicode(cell, 'utf-8') for cell in row]
361 |
362 |
--------------------------------------------------------------------------------