├── downcast
    ├── __init__.py
    ├── db
    │   ├── __init__.py
    │   ├── bcp
    │   │   ├── __init__.py
    │   │   ├── util.py
    │   │   ├── types.py
    │   │   └── cursor.py
    │   ├── exceptions.py
    │   ├── query.py
    │   └── dwcbcp.py
    ├── output
    │   ├── __init__.py
    │   ├── mapping.py
    │   ├── process.py
    │   ├── patients.py
    │   ├── log.py
    │   ├── files.py
    │   ├── enums.py
    │   ├── numerics.py
    │   ├── alerts.py
    │   └── timemap.py
    ├── util.py
    ├── timeconv.py
    ├── bcpmerge.py
    ├── timestamp.py
    ├── attributes.py
    ├── main.py
    ├── server.py
    ├── shell.py
    ├── subprocess.py
    └── messages.py
├── .gitattributes
├── .gitignore
├── server.conf.example
├── downcast.py
├── dwcsql.py
├── downcast-bcpdstfix.py
├── downcast-bcpmerge.py
├── dwctimeconv.py
├── test-extractor
├── test-archive
├── README
├── test-wave-message
├── test-dispatcher
├── INTERNALS
├── test-parsers
└── bcp-scripts
    └── bulk-verify


/downcast/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/downcast/db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/downcast/output/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py    diff=python
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *#*
3 | *.orig
4 | *.rej
5 | *.patch
6 | 
7 | *.pyc
8 | *.conf
9 | 


--------------------------------------------------------------------------------
/server.conf.example:
--------------------------------------------------------------------------------
 1 | # Example server.conf file - edit as needed.
 2 | 
 3 | # Read input from a running SQL Server instance
 4 | # (requires password authentication)
 5 | [example-live]
 6 | hostname = 192.168.123.45
 7 | username = somebody
 8 | password = 12341234
 9 | database = Philips.PatientData
10 | 
11 | # Read input from a single-day BCP data dump
12 | [example-bcp]
13 | type = bcp
14 | bcp-path = /data/dwc/2001-05-01
15 | 
16 | # Read input from a multi-day BCP data dump
17 | # (each day should be stored in a separate directory, listed in order)
18 | [example-bcp-multiple]
19 | type = bcp
20 | bcp-path = /data/dwc/2001-05-01:/data/dwc/2001-05-02:/data/dwc/2001-05-03
21 | 


--------------------------------------------------------------------------------
/downcast.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | # downcast - tools for unpacking patient data from DWC
 4 | #
 5 | # Copyright (c) 2018 Laboratory for Computational Physiology
 6 | #
 7 | # This program is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | from downcast.main import main
21 | main()
22 | 


--------------------------------------------------------------------------------
/dwcsql.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | # dwcsql - simple interactive frontend for the DWC SQL database
 4 | #
 5 | # Copyright (c) 2018 Laboratory for Computational Physiology
 6 | #
 7 | # This program is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | from downcast.shell import main
21 | main()
22 | 


--------------------------------------------------------------------------------
/downcast-bcpdstfix.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | # downcast - tools for unpacking patient data from DWC
 4 | #
 5 | # Copyright (c) 2021 Laboratory for Computational Physiology
 6 | #
 7 | # This program is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | from downcast.bcpdstfix import main
21 | main()
22 | 


--------------------------------------------------------------------------------
/downcast-bcpmerge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | # downcast - tools for unpacking patient data from DWC
 4 | #
 5 | # Copyright (c) 2021 Laboratory for Computational Physiology
 6 | #
 7 | # This program is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | from downcast.bcpmerge import main
21 | main()
22 | 


--------------------------------------------------------------------------------
/dwctimeconv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | #
 3 | # dwctimeconv - convert between time formats in a converted record
 4 | #
 5 | # Copyright (c) 2020 Laboratory for Computational Physiology
 6 | #
 7 | # This program is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | from downcast.timeconv import main
21 | main()
22 | 


--------------------------------------------------------------------------------
/downcast/db/bcp/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | import os
20 | import re
21 | import bisect
22 | import struct
23 | 
24 | from ..exceptions import *
25 | from .types import *
26 | from .connection import BCPConnection
27 | 
28 | def connect():
29 |     """Connect to a database consisting of a set of bcp-format files."""
30 |     return BCPConnection()
31 | 
32 | apilevel = '2.0'
33 | paramstyle = 'qmark'
34 | threadsafety = 1
35 | 


--------------------------------------------------------------------------------
/downcast/output/mapping.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | from ..messages import PatientMappingMessage
20 | 
21 | class PatientMappingHandler:
22 |     def __init__(self, archive):
23 |         self.archive = archive
24 | 
25 |     def send_message(self, chn, msg, source, ttl):
26 |         if not isinstance(msg, PatientMappingMessage):
27 |             return
28 | 
29 |         source.nack_message(chn, msg, self)
30 |         msg.origin.set_patient_id(msg.mapping_id, msg.patient_id)
31 |         source.ack_message(chn, msg, self)
32 | 
33 |     def flush(self):
34 |         pass
35 | 


--------------------------------------------------------------------------------
/downcast/output/process.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | import os
20 | import sys
21 | import cProfile
22 | from multiprocessing import Process
23 | 
24 | from ..util import setproctitle
25 | 
26 | class WorkerProcess(Process):
27 |     def run(self):
28 |         name = self.name
29 |         if name is not None:
30 |             setproctitle('downcast:%s' % (name,))
31 | 
32 |         # Invoke the target function, with profiling if enabled
33 |         pf = os.environ.get('DOWNCAST_PROFILE_OUT', None)
34 |         if pf is not None and name is not None:
35 |             pf = '%s.%s' % (pf, name)
36 |             cProfile.runctx('Process.run(self)', globals(), locals(), pf)
37 |         else:
38 |             Process.run(self)
39 | 


--------------------------------------------------------------------------------
/test-extractor:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | from downcast.server import DWCDB
 4 | from downcast.extractor import (Extractor, WaveSampleQueue, NumericValueQueue,
 5 |                                 EnumerationValueQueue, AlertQueue,
 6 |                                 PatientMappingQueue, PatientBasicInfoQueue,
 7 |                                 PatientDateAttributeQueue,
 8 |                                 PatientStringAttributeQueue, BedTagQueue)
 9 | from downcast.timestamp import T
10 | 
11 | class TestHandler():
12 |     def send_message(self, channel, message, source, ttl):
13 |         print('%s\t%s\t%s' % (message.timestamp, channel,
14 |                               type(message).__name__))
15 |         source.ack_message(channel, message, self)
16 | 
17 | DWCDB.load_config('server.conf')
18 | db = DWCDB('demo')
19 | ex = Extractor(db, '/tmp/downcast-extractor-test', fatal_exceptions = True)
20 | 
21 | ex.add_handler(TestHandler())
22 | 
23 | st = None
24 | ex.add_queue(WaveSampleQueue('waves', start_time = st,
25 |                              messages_per_batch = 10))
26 | ex.add_queue(NumericValueQueue('numerics', start_time = st,
27 |                                messages_per_batch = 10))
28 | ex.add_queue(EnumerationValueQueue('enums', start_time = st,
29 |                                    messages_per_batch = 10))
30 | ex.add_queue(AlertQueue('alerts', start_time = st,
31 |                         messages_per_batch = 10))
32 | ex.add_queue(PatientMappingQueue('mapping', start_time = st,
33 |                                  messages_per_batch = 10))
34 | ex.add_queue(PatientBasicInfoQueue('patients', start_time = st,
35 |                                    messages_per_batch = 10))
36 | ex.add_queue(PatientStringAttributeQueue('strings', start_time = st,
37 |                                          messages_per_batch = 10))
38 | ex.add_queue(PatientDateAttributeQueue('dates', start_time = st,
39 |                                        messages_per_batch = 10))
40 | ex.add_queue(BedTagQueue('beds', start_time = st,
41 |                          messages_per_batch = 10))
42 | 
43 | for _ in range(100):
44 |     ex.run()
45 | 


--------------------------------------------------------------------------------
/downcast/db/exceptions.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | class Error(Exception):
20 |     """Base exception type for database errors."""
21 |     pass
22 | 
23 | class InterfaceError(Error):
24 |     """Base exception type relating to the database interface."""
25 |     pass
26 | 
27 | class DatabaseError(Error):
28 |     """Base exception type relating to the database."""
29 |     pass
30 | 
31 | class OperationalError(DatabaseError):
32 |     """Exception caused by an error in database operation."""
33 |     pass
34 | 
35 | class DataSyntaxError(OperationalError):
36 |     """Exception caused by a malformed entry in the data file."""
37 |     pass
38 | 
39 | class ProgrammingError(DatabaseError):
40 |     """Exception caused by errors in the query syntax."""
41 |     pass
42 | 
43 | class ParameterCountError(ProgrammingError):
44 |     """Exception caused by supplying the wrong number of query parameters."""
45 |     def __init__(self, message, context = None):
46 |         ProgrammingError.__init__(self, message)
47 |         self.context = context
48 | 
49 | class DataError(DatabaseError):
50 |     """Exception caused by an error in processed data."""
51 |     pass
52 | 
53 | class IntegrityError(DatabaseError):
54 |     """Exception caused by an error in database integrity."""
55 |     pass
56 | 
57 | class InternalError(DatabaseError):
58 |     """Exception caused by an internal database error."""
59 |     pass
60 | 
61 | class NotSupportedError(DatabaseError):
62 |     """Exception caused by an unsupported operation."""
63 |     pass
64 | 


--------------------------------------------------------------------------------
/test-archive:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import shutil
 4 | from datetime import timedelta
 5 | 
 6 | from downcast.server import DWCDB
 7 | from downcast.extractor import (Extractor, WaveSampleQueue, NumericValueQueue,
 8 |                                 EnumerationValueQueue, AlertQueue,
 9 |                                 PatientMappingQueue, PatientBasicInfoQueue,
10 |                                 PatientStringAttributeQueue,
11 |                                 PatientDateAttributeQueue)
12 | from downcast.timestamp import T
13 | from downcast.output.archive import Archive
14 | from downcast.output.numerics import NumericValueHandler
15 | from downcast.output.waveforms import WaveSampleHandler
16 | from downcast.output.mapping import PatientMappingHandler
17 | from downcast.output.patients import PatientHandler
18 | 
19 | DWCDB.load_config('server.conf')
20 | 
21 | def test(dest_dir, iterations):
22 |     db = DWCDB('demo')
23 |     ex = Extractor(db, dest_dir, fatal_exceptions = True, debug = True)
24 |     arx = Archive(dest_dir)
25 |     nh = NumericValueHandler(arx)
26 |     ex.add_handler(nh)
27 |     mh = PatientMappingHandler(arx)
28 |     ex.add_handler(mh)
29 |     ph = PatientHandler(arx)
30 |     ex.add_handler(ph)
31 |     wh = WaveSampleHandler(arx)
32 |     ex.add_handler(wh)
33 | 
34 |     pmq = PatientMappingQueue('mapping')
35 |     pmdelay = timedelta(minutes = 30)
36 |     ex.add_queue(pmq)
37 | 
38 | #    ex.add_queue(PatientBasicInfoQueue('patients'))
39 | #    ex.add_queue(PatientStringAttributeQueue('strings'))
40 | #    ex.add_queue(PatientDateAttributeQueue('dates'))
41 | 
42 |     st = T('2016-01-28 14:00:00.000 -05:00')
43 |     ex.add_queue(NumericValueQueue('numerics', start_time = st,
44 |                                    messages_per_batch = 100))
45 |     ex.add_queue(WaveSampleQueue('waves', start_time = st,
46 |                                  messages_per_batch = 100))
47 |     for _ in range(iterations):
48 |         ex.run()
49 |     ex.flush()
50 | 
51 | shutil.rmtree('/tmp/downcast-extractor-test', ignore_errors = True)
52 | test('/tmp/downcast-extractor-test', 5)
53 | test('/tmp/downcast-extractor-test', 5)
54 | test('/tmp/downcast-extractor-test', 5)
55 | test('/tmp/downcast-extractor-test', 5)
56 | 
57 | shutil.rmtree('/tmp/downcast-extractor-test2', ignore_errors = True)
58 | test('/tmp/downcast-extractor-test2', 20)
59 | 


--------------------------------------------------------------------------------
/downcast/util.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | import os
20 | import tempfile
21 | 
22 | try:
23 |     import setproctitle
24 |     setproctitle = setproctitle.setproctitle
25 | except ImportError:
26 |     def setproctitle(title):
27 |         pass
28 | 
29 | # fdatasync: ensure data for the given file descriptor is written to disk
30 | # (implemented using fsync if the OS does not appear to support fdatasync)
31 | with tempfile.TemporaryFile() as f:
32 |     try:
33 |         os.fdatasync(f.fileno())
34 |     except Exception:
35 |         fdatasync = os.fsync
36 |     else:
37 |         fdatasync = os.fdatasync
38 | 
39 | _ascii_substitutions = {
40 |     '\N{HEAVY ASTERISK}': '*',                  # ✱
41 |     '\N{MICRO SIGN}': 'u',                      # µ
42 |     '\N{DEGREE SIGN}': 'deg',                   # °
43 |     '\N{SUBSCRIPT TWO}': '2',                   # ₂
44 |     '\N{SUPERSCRIPT TWO}': '^2',                # ²
45 |     '\N{GREEK CAPITAL LETTER DELTA}': 'Delta',  # Δ
46 | }
47 | for x in list(range(32)) + [127]:
48 |     _ascii_substitutions[x] = ' '
49 | _ascii_substitutions = str.maketrans(_ascii_substitutions)
50 | 
51 | def string_to_ascii(string):
52 |     """
53 |     Convert various characters to approximate ASCII equivalents.
54 | 
55 |     >>> string_to_ascii('✱✱✱ VTach')
56 |     '*** VTach'
57 |     >>> string_to_ascii('µV')
58 |     'uV'
59 |     >>> string_to_ascii('°C')
60 |     'degC'
61 |     >>> string_to_ascii('SpO₂')
62 |     'SpO2'
63 |     >>> string_to_ascii('ml/m²')
64 |     'ml/m^2'
65 |     >>> string_to_ascii('ΔTemp')
66 |     'DeltaTemp'
67 |     """
68 |     return string.translate(_ascii_substitutions)
69 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Downcast
 2 | --------
 3 | 
 4 | This repository contains tools for processing and converting data from
 5 | the DWC system into WFDB and other open formats.
 6 | 
 7 | 
 8 | Requirements
 9 | ------------
10 | 
11 | Python 3.4 or later is required.  A Unix-like platform is required -
12 | Debian and CentOS have been tested; Mac OS might work as well.  This
13 | package will not work on Windows.
14 | 
15 | For processing data in BCP format, the ply package is required.
16 | 
17 | For processing data directly from SQL Server, the pymssql package is
18 | required.  (This package is now mostly abandoned and should probably
19 | be replaced with a different backend.)
20 | 
21 | 
22 | Quick start
23 | -----------
24 | 
25 | If you have access to the demo DWC database, download and unpack these
26 | files (about 30 GB uncompressed.)  You will then need to create a
27 | "server.conf" file, which should look like this:
28 | 
29 | [demo]
30 | type = bcp
31 | bcp-path = /home/user/dwc-demo
32 | 
33 | (where /home/user/dwc-demo is the directory containing "Alert.dat",
34 | "Alert.fmt", etc.)  See server.conf.example for other examples.
35 | 
36 | The demo database spans the time period from 1:00 AM EDT on October
37 | 31, 2004, to midnight EST on November 1.  To parse and convert a slice
38 | of the data (say, from 10:00 to 10:05 AM), first we initialize an
39 | output directory and set the starting time:
40 | 
41 |   $ ./downcast.py --init --server demo \
42 |                   --output-dir /home/user/dwc-test-output \
43 |                   --start "2004-10-31 10:00:00.000 -05:00"
44 | 
45 | Then run a batch conversion while specifying the end time:
46 | 
47 |   $ ./downcast.py --batch --server demo \
48 |                   --output-dir /home/user/dwc-test-output \
49 |                   --end "2004-10-31 10:05:00.000 -05:00"
50 | 
51 | If we wanted to keep going, we could run the same --batch command
52 | again, increasing the end timestamp each time.  We don't need to
53 | specify the starting timestamp for --batch, since the "current"
54 | timestamp is saved automatically.
55 | 
56 | To "finalize" the output (and forcibly truncate all patient records at
57 | the specified end time), we use the --terminate option.  This wouldn't
58 | be done for a real database conversion, but it's useful for a simple
59 | test:
60 | 
61 |   $ ./downcast.py --batch --server demo \
62 |                   --output-dir /home/user/dwc-test-output \
63 |                   --end "2004-10-31 10:05:00.000 -05:00" \
64 |                   --terminate
65 | 
66 | This should result in a bunch of patient records in WFDB format,
67 | stored in /home/user/dwc-test-output.
68 | 


--------------------------------------------------------------------------------
/test-wave-message:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | from collections import OrderedDict
 4 | import pymssql
 5 | import sys
 6 | 
 7 | from downcast.dispatcher import Dispatcher
 8 | from downcast.server import DWCDB
 9 | from downcast.parser import WaveSampleParser
10 | from downcast.messages import WaveSampleMessage
11 | 
12 | class TestHandler:
13 |     def __init__(self):
14 |         self.prev_sequence_number = None
15 |         self.cur_sequence_number = None
16 |         self.cur_wave_ids = OrderedDict()
17 | 
18 |     def send_message(self, chn, msg, source, ttl):
19 |         if isinstance(msg, WaveSampleMessage):
20 |             source.nack_message(chn, msg, self)
21 | 
22 |             if msg.wave_id == 27 and msg.sequence_number == 507278718464:
23 |                 raise Exception('we crash now')
24 | 
25 |             if self.cur_sequence_number is None:
26 |                 self.cur_sequence_number = msg.sequence_number
27 | 
28 |             if msg.sequence_number == self.cur_sequence_number:
29 |                 self.cur_wave_ids[msg.wave_id] = 1
30 | 
31 |             if msg.sequence_number > self.cur_sequence_number or ttl == 0:
32 |                 source.nack_message(chn, msg, self, replay = True)
33 |                 sys.stdout.write('\nT=%d: ' % self.cur_sequence_number)
34 |                 self.prev_sequence_number = self.cur_sequence_number
35 |                 self.cur_sequence_number = msg.sequence_number
36 |                 self.cur_wave_ids.clear()
37 | 
38 |             if msg.sequence_number == self.prev_sequence_number:
39 |                 sys.stdout.write(' [%d]' % msg.wave_id)
40 |                 source.ack_message(chn, msg, self)
41 |             elif msg.sequence_number < self.cur_sequence_number:
42 |                 print('*** message out of sequence (%d < %d)'
43 |                       % (msg.sequence_number, self.cur_sequence_number))
44 | 
45 |     def flush(self):
46 |         return
47 | 
48 | class TestDeadLetterHandler:
49 |     def send_message(self, chn, msg, source, ttl):
50 |         print('*** Dead letter (T=%d, W=%d)' % (msg.sequence_number, msg.wave_id))
51 | 
52 | class TestGenerator:
53 |     def __init__(self):
54 |         DWCDB.load_config('server.conf')
55 |         self.db = DWCDB('demo')
56 |         self.dispatcher = Dispatcher()
57 | 
58 |     def parse(self, parser):
59 |         for msg in self.db.get_messages(parser):
60 |             self.dispatcher.send_message(msg.mapping_id, msg, self, 100)
61 | 
62 |     def ack_message(self, chn, msg, recipient):
63 |         return
64 |     def nack_message(self, chn, msg, recipient):
65 |         return
66 | 
67 | g = TestGenerator()
68 | h = TestHandler()
69 | d = TestDeadLetterHandler()
70 | g.dispatcher.add_handler(h)
71 | g.dispatcher.add_dead_letter_handler(d)
72 | 
73 | parser = WaveSampleParser(limit = 500, mapping_id = '85965f09-e8c2-4e79-8c1c-cb1775bd2550')
74 | g.parse(parser)
75 | 
76 | print('\n--- terminating ---')
77 | g.dispatcher.terminate()
78 | g.dispatcher.flush()
79 | print('')
80 | 


--------------------------------------------------------------------------------
/downcast/db/bcp/util.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2021 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | import io
20 | import os
21 | import tempfile
22 | 
23 | _dev_fd_yields_independent_ofd = False
24 | with tempfile.TemporaryFile() as f1:
25 |     f1.write(b'what hath god wrought')
26 |     f1.flush()
27 |     try:
28 |         with open('/dev/fd/%d' % f1.fileno(), 'rb') as f2:
29 |             if (os.path.samefile(f1.fileno(), f2.fileno())
30 |                 and (os.lseek(f1.fileno(), 0, os.SEEK_CUR)
31 |                      != os.lseek(f2.fileno(), 0, os.SEEK_CUR))):
32 |                 _dev_fd_yields_independent_ofd = True
33 |     except OSError:
34 |         pass
35 | 
36 | def open_copy(fileobj, *args, **kwargs):
37 |     """
38 |     Open a new file object that refers to the same underlying file.
39 | 
40 |     The input must be a Python file object.  The result will be an
41 |     independent file object that refers to the same file.
42 | 
43 |     If the operating system provides a /dev/fd filesystem, and that
44 |     filesystem allows creating independent OFDs, then this can be done even
45 |     if the original file has been deleted or renamed.
46 | 
47 |     If the operating system *doesn't* provide /dev/fd, or if /dev/fd
48 |     uses dup semantics, this will attempt to reopen the original
49 |     filename (fileobj.path) instead, which will fail if the original
50 |     file has been deleted or renamed.
51 |     """
52 |     if isinstance(fileobj, io.TextIOWrapper):
53 |         fileobj = fileobj.buffer
54 |     if isinstance(fileobj, io.BufferedReader):
55 |         fileobj = fileobj.raw
56 |     if not isinstance(fileobj, io.FileIO):
57 |         raise TypeError('not a native file object')
58 | 
59 |     if _dev_fd_yields_independent_ofd:
60 |         return open('/dev/fd/%d' % fileobj.fileno(), *args, **kwargs)
61 |     else:
62 |         oldpath = fileobj.name
63 |         oldfd = fileobj.fileno()
64 |         newfile = open(oldpath, *args, **kwargs)
65 |         try:
66 |             if os.path.samefile(oldfd, newfile.fileno()):
67 |                 return newfile
68 |             else:
69 |                 raise FileNotFoundError(0, 'File has been renamed', oldpath)
70 |         except OSError:
71 |             newfile.close()
72 |             raise
73 | 


--------------------------------------------------------------------------------
/test-dispatcher:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | from weakref import WeakSet
 4 | from downcast.dispatcher import Dispatcher
 5 | 
 6 | class TestMessage:
 7 |     def __init__(self, seqnum, msgstr):
 8 |         self.seqnum = seqnum
 9 |         self.msgstr = msgstr
10 | 
11 |     def __str__(self):
12 |         return ('[%d:%s]' % (self.seqnum, self.msgstr))
13 | 
14 | # Note this is NOT meant as an example of how you should write a
15 | # handler, it's just to exercise the dispatcher logic
16 | class TestHandler:
17 |     def __init__(self):
18 |         self.seqnum = 0
19 | 
20 |     def send_message(self, channel, msg, dispatcher, ttl):
21 |         print(" (%d)" % msg.seqnum)
22 |         if msg.seqnum < self.seqnum:
23 |             print("  ignored out-of-date %d" % msg.seqnum)
24 |         else:
25 |             if ttl == 0:
26 |                 self.seqnum = msg.seqnum
27 | 
28 |             dispatcher.nack_message(channel, msg, self, replay = True)
29 |             if msg.seqnum == self.seqnum:
30 |                 print("  >> RECEIVED %d: %s" % (msg.seqnum, msg.msgstr))
31 |                 self.seqnum = msg.seqnum + 1
32 |                 dispatcher.ack_message(channel, msg, self)
33 | 
34 |     def flush(self):
35 |         return
36 | 
37 | class TestDeadLetterHandler:
38 |     def send_message(self, channel, msg, dispatcher, ttl):
39 |         print("  Dead letter: %s" % msg)
40 | 
41 | class TestGenerator:
42 |     def __init__(self):
43 |         self.dispatcher = Dispatcher(fatal_exceptions = True)
44 |         self.dead_messages = WeakSet()
45 | 
46 |     def gen_message(self, channel, seqnum, msgstr):
47 |         msg = TestMessage(seqnum, msgstr)
48 |         print("created %d" % seqnum)
49 |         self.dispatcher.send_message(channel, msg, self, 10)
50 |         msg = None
51 |         if len(self.dead_messages) != 0:
52 |             print("*** LEAKED: %d" % len(self.dead_messages))
53 | 
54 |     def ack_message(self, channel, msg, recipient):
55 |         self.dead_messages.add(msg)
56 |         print("deleted %d" % msg.seqnum)
57 | 
58 |     def nack_message(self, channel, msg, recipient):
59 |         print("deferred %d" % msg.seqnum)
60 | 
61 | g = TestGenerator()
62 | h = TestHandler()
63 | d = TestDeadLetterHandler()
64 | g.dispatcher.add_handler(h)
65 | g.dispatcher.add_dead_letter_handler(d)
66 | 
67 | g.gen_message('x', 0, "test zero")
68 | g.gen_message('x', 1, "test one")
69 | g.gen_message('x', 5, "test five")
70 | g.gen_message('x', 2, "test two")
71 | g.gen_message('x', 3, "test three")
72 | g.gen_message('x', 4, "test four")
73 | g.gen_message('x', -1000, "way out of order")
74 | g.gen_message('x', 6, "test six")
75 | g.gen_message('x', 7, "test seven")
76 | g.gen_message('x', 15, "test fifteen")
77 | g.gen_message('x', 14, "test fourteen")
78 | g.gen_message('x', 13, "test thirteen")
79 | g.gen_message('x', 12, "test twelve")
80 | g.gen_message('x', 11, "test eleven")
81 | g.gen_message('x', 10, "test ten")
82 | g.gen_message('x', 9, "test nine")
83 | g.gen_message('x', 8, "test eight")
84 | g.gen_message('x', 17, "test seventeen")
85 | print("--- flushing ---")
86 | g.dispatcher.flush()
87 | print("--- terminating ---")
88 | g.dispatcher.terminate()
89 | 


--------------------------------------------------------------------------------
/downcast/timeconv.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # dwctimeconv - convert between time formats in a converted record
 3 | #
 4 | # Copyright (c) 2020 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | import argparse
20 | import json
21 | import os
22 | import re
23 | import sys
24 | 
25 | from .timestamp import T
26 | from .output.archive import ArchiveRecord
27 | 
28 | def main():
29 |     p = argparse.ArgumentParser()
30 |     p.add_argument('--record', metavar = 'PATH', default = '.')
31 |     p.add_argument('timestamps', metavar = 'TIMESTAMP', nargs = '+')
32 |     opts = p.parse_args()
33 | 
34 |     rec = ArchiveRecord(path = opts.record, servername = 'unknown',
35 |                         record_id = os.path.basename(opts.record),
36 |                         datestamp = 'unknown')
37 |     seqnum0 = rec.seqnum0()
38 | 
39 |     for ts in opts.timestamps:
40 |         if re.fullmatch('S\d+', ts):
41 |             # sequence number
42 |             seqnum = int(ts[1:])
43 |             time = rec.time_map.get_time(seqnum)
44 |             if seqnum0 is not None:
45 |                 counter = seqnum - seqnum0
46 |         elif re.fullmatch('c\d+', ts):
47 |             # counter value
48 |             counter = int(ts[1:])
49 |             if seqnum0 is not None:
50 |                 seqnum = seqnum0 + counter
51 |                 time = rec.time_map.get_time(seqnum)
52 |         else:
53 |             # wall clock timestamp
54 |             try:
55 |                 time = T(ts)
56 |             except ValueError:
57 |                 sys.stderr.write('%s: invalid argument: %s\n' % (sys.argv[0], ts))
58 |                 sys.stderr.write('valid timestamp formats:\n')
59 |                 sys.stderr.write('  YYYY-MM-DD HH:MM:SS.SSS +ZZ:ZZ\n')
60 |                 sys.stderr.write('  S#########  (DWC sequence number)\n')
61 |                 sys.stderr.write('  c#########  (WFDB counter value)\n')
62 |                 sys.exit(1)
63 | 
64 |             seqnum = rec.time_map.get_seqnum(time)
65 |             if seqnum0 is not None:
66 |                 counter = seqnum - seqnum0
67 | 
68 |         if time is None:
69 |             time_str = '-'
70 |         else:
71 |             time_str = str(time)
72 | 
73 |         if seqnum is None:
74 |             seqnum_str = '-'
75 |         else:
76 |             seqnum_str = 'S%s' % seqnum
77 | 
78 |         if counter is None:
79 |             counter_str = '-'
80 |         else:
81 |             counter_str = 'c%s' % counter
82 | 
83 |         print('%-24s\t%-8s\t%-8s' % (time_str, seqnum_str, counter_str))
84 | 


--------------------------------------------------------------------------------
/downcast/output/patients.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # downcast - tools for unpacking patient data from DWC
 3 | #
 4 | # Copyright (c) 2018 Laboratory for Computational Physiology
 5 | #
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | from ..messages import (PatientBasicInfoMessage,
20 |                         PatientDateAttributeMessage,
21 |                         PatientStringAttributeMessage)
22 | 
23 | class PatientHandler:
24 |     def __init__(self, archive):
25 |         self.archive = archive
26 | 
27 |     def send_message(self, chn, msg, source, ttl):
28 |         if isinstance(msg, PatientBasicInfoMessage):
29 |             source.nack_message(chn, msg, self)
30 |             record = self.archive.get_record(msg)
31 |             if record is None:
32 |                 return
33 |             self._log_info(record, msg, 'BedLabel', msg.bed_label)
34 |             self._log_info(record, msg, 'Alias', msg.alias)
35 |             self._log_info(record, msg, 'Category', msg.category)
36 |             self._log_info(record, msg, 'Height', msg.height)
37 |             self._log_info(record, msg, 'HeightUnit', msg.height_unit)
38 |             self._log_info(record, msg, 'Weight', msg.weight)
39 |             self._log_info(record, msg, 'WeightUnit', msg.weight_unit)
40 |             self._log_info(record, msg, 'PressureUnit', msg.pressure_unit)
41 |             self._log_info(record, msg, 'PacedMode', msg.paced_mode)
42 |             self._log_info(record, msg, 'ResuscitationStatus',
43 |                            msg.resuscitation_status)
44 |             self._log_info(record, msg, 'AdmitState', msg.admit_state)
45 |             self._log_info(record, msg, 'ClinicalUnit', msg.clinical_unit)
46 |             self._log_info(record, msg, 'Gender', msg.gender)
47 |             source.ack_message(chn, msg, self)
48 | 
49 |         elif isinstance(msg, PatientDateAttributeMessage):
50 |             source.nack_message(chn, msg, self)
51 |             record = self.archive.get_record(msg)
52 |             if record is None:
53 |                 return
54 |             self._log_info(record, msg, 'd:%s' % msg.name, msg.value)
55 |             source.ack_message(chn, msg, self)
56 |         elif isinstance(msg, PatientStringAttributeMessage):
57 |             source.nack_message(chn, msg, self)
58 |             record = self.archive.get_record(msg)
59 |             if record is None:
60 |                 return
61 |             self._log_info(record, msg, 's:%s' % msg.name, msg.value)
62 |             source.ack_message(chn, msg, self)
63 | 
64 |     def _log_info(self, record, msg, key, value):
65 |         logfile = record.open_log_file('_phi_patient_info')
66 |         logfile.append('%s,%s,%s' % (msg.timestamp, _escape(key),
67 |                                      _escape(str(value))))
68 | 
69 |     def flush(self):
70 |         self.archive.flush()
71 | 
72 | _escape_chars = list(range(32)) + [127] + [ord(x) for x in ',"\'\\']
73 | _escape_table = str.maketrans({x: '\\%03o' % x for x in _escape_chars})
74 | def _escape(s):
75 |     return s.translate(_escape_table)
76 | 


--------------------------------------------------------------------------------
/downcast/db/bcp/types.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import uuid
 20 | import decimal
 21 | from datetime import datetime, timezone
 22 | 
 23 | from ... import timestamp
 24 | 
 25 | class BCPType:
 26 |     """
 27 |     Base type for database column types.
 28 | 
 29 |     Derived classes must implement a function from_bytes(), which
 30 |     converts a byte string (retrieved from the BCP file) into the
 31 |     appropriate data type.
 32 | 
 33 |     A second function from_param() may also be defined, which converts
 34 |     a Python value (passed as a parameter to execute()) into the
 35 |     appropriate type for comparison.  By default, the identity
 36 |     function is used.
 37 |     """
 38 | 
 39 |     # def from_bytes(b):
 40 |     #     return b
 41 | 
 42 |     def from_param(p):
 43 |         return p
 44 | 
 45 | # DB-API types
 46 | 
 47 | class BINARY(BCPType):
 48 |     """BCP type for a binary column."""
 49 |     def from_bytes(b):
 50 |         return b
 51 | 
 52 | class STRING(BCPType):
 53 |     """BCP type for a string column."""
 54 |     def from_bytes(b):
 55 |         if b == b'\0':
 56 |             return ''
 57 |         else:
 58 |             return b.decode('UTF-8')
 59 | 
 60 | class NUMBER(BCPType):
 61 |     """BCP type for a real number column."""
 62 |     def from_bytes(b):
 63 |         return decimal.Decimal(b.decode())
 64 | 
 65 | class DATETIME(BCPType):
 66 |     """BCP type for a timestamp column."""
 67 |     def from_bytes(b):
 68 |         return timestamp.T(b.decode())
 69 |     def from_param(p):
 70 |         return timestamp.T(p)
 71 | 
 72 | class ROWID(BCPType):
 73 |     """BCP type for a row-ID column."""
 74 |     def from_bytes(b):
 75 |         return int(b)
 76 | 
 77 | # Additional types
 78 | 
 79 | class INTEGER(BCPType):
 80 |     """BCP type for an integer column."""
 81 |     def from_bytes(b):
 82 |         return int(b)
 83 | 
 84 | class BOOLEAN(BCPType):
 85 |     """BCP type for a boolean column."""
 86 |     def from_bytes(b):
 87 |         return bool(int(b))
 88 |     def from_param(p):
 89 |         return bool(p)
 90 | 
 91 | class UUID(BCPType):
 92 |     """BCP type for a UUID column."""
 93 |     def from_bytes(b):
 94 |         return uuid.UUID(b.decode())
 95 |     def from_param(p):
 96 |         return uuid.UUID(p)
 97 | 
 98 | # DB-API conversion functions
 99 | 
100 | Binary = bytes
101 | Date = datetime.date
102 | Time = datetime.time
103 | 
104 | def Timestamp(year, month, day, hour, minute, second):
105 |     return datetime(year, month, day, hour, minute, second,
106 |                     tzinfo = timezone.utc)
107 | 
108 | def TimestampFromTicks(ticks):
109 |     return datetime.fromtimestamp(ticks, tz = timezone.utc)
110 | 
111 | def DateFromTicks(ticks):
112 |     return TimestampFromTicks(ticks).date()
113 | 
114 | def TimeFromTicks(ticks):
115 |     return TimestampFromTicks(ticks).time()
116 | 


--------------------------------------------------------------------------------
/downcast/bcpmerge.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2021 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import argparse
 20 | import heapq
 21 | import os
 22 | 
 23 | from .db import dwcbcp
 24 | from .messages import (AlertMessage, BedTagMessage, EnumerationValueMessage,
 25 |                        NumericValueMessage, PatientBasicInfoMessage,
 26 |                        PatientDateAttributeMessage, PatientMappingMessage,
 27 |                        PatientStringAttributeMessage, WaveSampleMessage,
 28 |                        bcp_format_description, bcp_format_message)
 29 | from .parser import (AlertParser, BedTagParser, EnumerationValueParser,
 30 |                      NumericValueParser, PatientBasicInfoParser,
 31 |                      PatientDateAttributeParser, PatientMappingParser,
 32 |                      PatientStringAttributeParser, WaveSampleParser)
 33 | from .timestamp import T
 34 | 
 35 | def merge_files(table_abbr, input_files, output_data_file,
 36 |                 output_format_file = None, start = None, end = None):
 37 |     parser_message_types = {
 38 |         'Alert': (AlertParser,
 39 |                   AlertMessage),
 40 |         'BedTag': (BedTagParser,
 41 |                    BedTagMessage),
 42 |         'EnumerationValue': (EnumerationValueParser,
 43 |                              EnumerationValueMessage),
 44 |         'NumericValue': (NumericValueParser,
 45 |                          NumericValueMessage),
 46 |         'Patient': (PatientBasicInfoParser,
 47 |                     PatientBasicInfoMessage),
 48 |         'PatientDateAttribute': (PatientDateAttributeParser,
 49 |                                  PatientDateAttributeMessage),
 50 |         'PatientMapping': (PatientMappingParser,
 51 |                            PatientMappingMessage),
 52 |         'PatientStringAttribute': (PatientStringAttributeParser,
 53 |                                    PatientStringAttributeMessage),
 54 |         'WaveSample': (WaveSampleParser,
 55 |                        WaveSampleMessage),
 56 |     }
 57 |     (parser_type, message_type) = parser_message_types[table_abbr]
 58 |     table = '_Export.%s_' % table_abbr
 59 | 
 60 |     input_files = list(input_files)
 61 |     dbs = []
 62 |     cursors = []
 63 |     message_iters = []
 64 |     for (data_file, format_file) in input_files:
 65 |         db = dwcbcp.DWCBCPConnection([])
 66 |         db.add_data_file(table, data_file, format_file)
 67 |         dbs.append(db)
 68 |         cursor = db.cursor()
 69 |         cursors.append(cursor)
 70 |         parser = parser_type(limit = None, dialect = 'sqlite',
 71 |                              paramstyle = dwcbcp.paramstyle,
 72 |                              time_ge = start, time_lt = end)
 73 |         message_iter = parser.parse(origin = None, cursor = cursor)
 74 |         message_iters.append(message_iter)
 75 | 
 76 |     with open(output_data_file, 'wb') as outf:
 77 |         for message in heapq.merge(*message_iters,
 78 |                                    key = lambda x: x.timestamp):
 79 |             outf.write(bcp_format_message(message))
 80 | 
 81 |     if output_format_file is not None:
 82 |         with open(output_format_file, 'w') as fmtf:
 83 |             fmtf.write(bcp_format_description(message_type))
 84 | 
 85 | def _parse_timestamp(arg):
 86 |     try:
 87 |         return T(arg)
 88 |     except Exception:
 89 |         raise ArgumentTypeError(
 90 |             "%r is not in the format 'YYYY-MM-DD HH:MM:SS.SSS +ZZ:ZZ'" % arg)
 91 | 
 92 | def main():
 93 |     p = argparse.ArgumentParser()
 94 |     p.add_argument('-t', '--table', metavar = 'TABLE')
 95 |     p.add_argument('-f', '--format-file',
 96 |                    metavar = 'TABLE.fmt', required = True)
 97 |     p.add_argument('-o', '--output-file',
 98 |                    metavar = 'OUTPUT.dat', required = True)
 99 |     p.add_argument('--start', metavar = 'TIME', type = _parse_timestamp)
100 |     p.add_argument('--end', metavar = 'TIME', type = _parse_timestamp)
101 |     p.add_argument('input_files', metavar = 'INPUT.dat', nargs = '+')
102 |     opts = p.parse_args()
103 | 
104 |     table_abbr = opts.table
105 |     if table_abbr is None:
106 |         table_abbr, _ = os.path.splitext(os.path.basename(opts.format_file))
107 | 
108 |     input_files = [(f, opts.format_file) for f in opts.input_files]
109 | 
110 |     output_table_abbr, _ = os.path.splitext(os.path.basename(opts.output_file))
111 |     output_format_file = os.path.join(os.path.dirname(opts.output_file),
112 |                                       output_table_abbr + '.fmt')
113 | 
114 |     merge_files(table_abbr, input_files, opts.output_file,
115 |                 output_format_file = output_format_file,
116 |                 start = opts.start, end = opts.end)
117 | 


--------------------------------------------------------------------------------
/downcast/output/log.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import heapq
 20 | import re
 21 | 
 22 | _timestamp_pattern = re.compile(b'S?[0-9]+\n')
 23 | 
 24 | class ArchiveLogReader:
 25 |     """Class for reading log entries from a mostly-sorted input file.
 26 | 
 27 |     Each line in the input file is either a data record, a timestamp,
 28 |     or a sequence number.
 29 | 
 30 |     Timestamps are written as a decimal integer (interpreted as a
 31 |     string of digits, giving the UTC year, month, day, hour, minute,
 32 |     second, and microsecond).
 33 | 
 34 |     Sequence numbers are written as the letter 'S' followed by a
 35 |     decimal integer (interpreted as the number of milliseconds since
 36 |     the epoch.)
 37 | 
 38 |     All other lines in the file are treated as data records, and are
 39 |     associated with the preceding timestamp and sequence number (thus
 40 |     allowing basic compression, while keeping the file format
 41 |     extremely simple.)
 42 | 
 43 |     When reading the file, data records are returned in order (sorting
 44 |     first by sequence number, then by timestamp, then by order in the
 45 |     input file.)  This will be done efficiently if the input file is
 46 |     mostly sorted to begin with (and less efficiently otherwise.)
 47 | 
 48 |     No attempt is made to remove duplicate or invalid records - this
 49 |     must be done by the caller.
 50 | 
 51 |     If the file is modified after being opened, then garbage in,
 52 |     garbage out.
 53 |     """
 54 | 
 55 |     def __init__(self, filename, allow_missing = False):
 56 |         # Open the file
 57 |         try:
 58 |             fp = open(filename, 'rb')
 59 |         except FileNotFoundError:
 60 |             if allow_missing:
 61 |                 self._fp = None
 62 |                 self._subsequences = None
 63 |                 return
 64 |             else:
 65 |                 raise
 66 |         self._fp = fp
 67 |         self._subsequences = None
 68 | 
 69 |     def close(self):
 70 |         if self._fp:
 71 |             self._fp.close()
 72 |         self._subsequences = None
 73 | 
 74 |     def __enter__(self):
 75 |         return self
 76 | 
 77 |     def __exit__(self, exc_type, exc_val, exc_tb):
 78 |         self.close()
 79 | 
 80 |     def missing(self):
 81 |         return (self._fp is None)
 82 | 
 83 |     def unsorted_items(self):
 84 |         fp = self._fp
 85 |         if not fp:
 86 |             return
 87 |         fp.seek(0)
 88 |         subseq = []
 89 |         prev_t = None
 90 |         sn = ts = 0
 91 |         t = (sn, ts)
 92 |         for line in fp:
 93 |             if _timestamp_pattern.fullmatch(line):
 94 |                 if line[0] == 83: # ASCII 'S'
 95 |                     sn = int(line[1:])
 96 |                     t = (sn, ts)
 97 |                 else:
 98 |                     ts = int(line)
 99 |                     t = (sn, ts)
100 |             else:
101 |                 yield (sn, ts, line)
102 |                 if not subseq or t < prev_t:
103 |                     fpos = fp.tell() - len(line)
104 |                     subseq.append((sn, ts, fpos))
105 |                 prev_t = t
106 |         heapq.heapify(subseq)
107 |         self._subsequences = subseq
108 | 
109 |     def sorted_items(self):
110 |         if self._subsequences is None:
111 |             for _ in self.unsorted_items():
112 |                 pass
113 | 
114 |         fp = self._fp
115 |         subseq = self._subsequences
116 |         self._subsequences = None
117 |         while subseq:
118 |             # Begin reading the earliest subsequence
119 |             p = heapq.heappop(subseq)
120 |             (sn, ts, fpos) = prev_p = p
121 |             fp.seek(fpos)
122 | 
123 |             for line in fp:
124 |                 if _timestamp_pattern.fullmatch(line):
125 |                     if line[0] == 83: # ASCII 'S'
126 |                         sn = int(line[1:])
127 |                         p = (sn, ts, fpos)
128 |                     else:
129 |                         ts = int(line)
130 |                         p = (sn, ts, fpos)
131 |                 else:
132 |                     if p < prev_p:
133 |                         # reached end of subsequence
134 |                         break
135 |                     # Note that because the subsequences are disjoint,
136 |                     # this comparison is valid even though fpos is not
137 |                     # continuously updated.
138 |                     elif subseq and p > subseq[0]:
139 |                         # switch to other subsequence
140 |                         fpos = fp.tell() - len(line)
141 |                         p = heapq.heapreplace(subseq, (sn, ts, fpos))
142 |                         (sn, ts, fpos) = prev_p = p
143 |                         fp.seek(fpos)
144 |                         next_p = subseq[0]
145 |                     else:
146 |                         # continue with current subsequence
147 |                         yield (sn, ts, line)
148 |                         prev_p = p
149 | 


--------------------------------------------------------------------------------
/INTERNALS:
--------------------------------------------------------------------------------
  1 | Block diagram
  2 | =============
  3 | 
  4 |          +-----------+
  5 |          |           |
  6 |          | Extractor |
  7 |          |           |
  8 |          +-----------+
  9 |                |
 10 |                | schedules
 11 |                | queries
 12 |                v
 13 |       +----------------+
 14 |       |                |   sets parameters   +---------------+
 15 |       |                | ------------------> |               |
 16 |       |                |    generates SQL    | MessageParser |
 17 |       |                | <------------------ |               |
 18 |       | ExtractorQueue |                     +---------------+
 19 |       |                |
 20 |       |                |       +-------------+
 21 |       |                | ----> | state files |
 22 |       |                |       +-------------+
 23 |       +----------------+
 24 |        sends |    ^
 25 |     messages |    | sends
 26 |              v    | acknowledgements
 27 |         +------------+
 28 |         |            |
 29 |         | Dispatcher |
 30 |         |            |
 31 |         +------------+
 32 |        sends |    ^
 33 |     messages |    | sends
 34 |              v    | acknowledgements
 35 |        +--------------+
 36 |        |              |       +--------------+
 37 |        | OuputHandler | ----> | output files |
 38 |        |              |       +--------------+
 39 |        +--------------+
 40 | 
 41 | 
 42 | Extractor
 43 | =========
 44 | 
 45 |  The Extractor manages the overall flow of the conversion process:
 46 |  deciding the order that queries are issued.
 47 | 
 48 |  Messages will need to be retrieved from many different sources,
 49 |  represented by "queues", and then passed on to the dispatcher and
 50 |  output handlers.  The task of the extractor is to decide which
 51 |  queue(s) to read.
 52 | 
 53 |  (A "message", by the way, is an object that will generally correspond
 54 |  to a single row in a single table of the DWC database, but this may
 55 |  not always be the case.)
 56 | 
 57 | 
 58 | ExtractorQueue
 59 | ==============
 60 | 
 61 |  An ExtractorQueue is created for each input data source (~ each
 62 |  database table that is to be polled.)
 63 | 
 64 |  The ExtractorQueue, with the help of a MessageParser, determines what
 65 |  SQL queries are to be issued, making sure that no messages are missed
 66 |  while avoiding (as much as possible) querying the same message more
 67 |  than once.  This is closely related to the task of tracking which
 68 |  messages have been seen and acknowledged so far.
 69 | 
 70 |  In general, there will be some "earliest unacked" timestamp (before
 71 |  which, all messages have been acknowledged), and some "latest seen"
 72 |  timestamp (after which, no messages have yet been seen.)  In between
 73 |  are the messages that have been seen but not yet acknowledged.
 74 | 
 75 |  When conversion is halted, the queue will need to save a state file
 76 |  containing:
 77 | 
 78 |   - the "earliest unacked" timestamp
 79 | 
 80 |   - a list of all the messages after that point that have already been
 81 |     acknowledged
 82 | 
 83 |  so that when conversion is resumed, it can resume from the same
 84 |  point, and ideally ignore all messages that have already been
 85 |  processed.  The format of this state file needs to be determined; it
 86 |  must be fairly simple and robust, so that newer versions of the
 87 |  program can read state files created by older versions, and vice
 88 |  versa.
 89 | 
 90 | 
 91 | MessageParser
 92 | =============
 93 | 
 94 |  A MessageParser handles the actual details of the database structure:
 95 |  how to translate a particular request for messages into an SQL
 96 |  statement, and how to translate the results into the appropriate
 97 |  message type.  If details of the database are changed from one DWC
 98 |  version to another, this is where they will need to be addressed.
 99 | 
100 | 
101 | Dispatcher
102 | ==========
103 | 
104 |  The Dispatcher keeps track of all messages that have been received by
105 |  the various queues, and passes them on to the various output
106 |  handlers.
107 | 
108 |  The chief purpose of having an intermediate dispatcher object is to
109 |  ensure that all messages, whatever their origin, will eventually
110 |  expire, and will not be kept in "seen but not yet acknowledged" state
111 |  indefinitely.
112 | 
113 | 
114 | OutputHandler
115 | =============
116 | 
117 |  The various OutputHandlers are responsible for interpreting the
118 |  incoming messages and storing them in the appropriate output files.
119 | 
120 |  When a handler receives a message, it can do several things with it:
121 | 
122 |   - ignore it, implicitly indicating that the handler doesn't know
123 |     what to do with it;
124 | 
125 |   - acknowledge it, indicating that its contents have now been saved
126 |     to the appropriate output file, and the message may now be
127 |     discarded;
128 | 
129 |   - or defer it, indicating that the handler is interested in this
130 |     message but is unable to process it immediately.
131 | 
132 |  Deferring a message can occur for several reasons:
133 | 
134 |   - because the handler requires additional metadata that is not yet
135 |     available;
136 | 
137 |   - because the handler wants to aggregate all simultaneous events
138 |     (e.g., numerics or wave samples) in a single file, and it hasn't
139 |     yet received all of the messages for this time period;
140 | 
141 |   - or because the messages it has received are not in chronological
142 |     order, and it's waiting to see if a later message will fill in the
143 |     gap.
144 | 
145 |  (The precise details will need to be established once we have an
146 |  actual database to examine; for example, we don't currently know
147 |  whether it's even possible for messages to appear out of order.)
148 | 
149 |  In addition to incomplete output files, output handlers must be able
150 |  to save their current state to appropriate state files, so that (just
151 |  as with queue state files) the program can be stopped and restarted
152 |  without creating any discontinuity in the output.
153 | 


--------------------------------------------------------------------------------
/downcast/timestamp.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import re
 20 | from datetime import datetime, timedelta, timezone
 21 | 
 22 | class T(datetime):
 23 |     """Date/time class using MS SQL time string format.
 24 | 
 25 |     This class is a wrapper around the standard datetime class, but
 26 |     its constructor accepts either a datetime object, or a string in
 27 |     the ISO 8601 format used by MS SQL.
 28 | 
 29 |     Addition, subtraction, and comparison work as for normal datetime
 30 |     objects.  repr and str produce something sensible.
 31 |     """
 32 | 
 33 |     # Note that the following pattern recognizes several formats:
 34 |     #
 35 |     #  YYYY-MM-DD HH:MM:SS.SSS +ZZ:ZZ    (MS SQL)
 36 |     #  YYYY-MM-DD HH:MM:SS.SSSSSS+ZZ:ZZ  (datetime.__str__)
 37 |     #  YYYY-MM-DD HH:MM:SS+ZZ:ZZ         (datetime.__str__ if microseconds = 0)
 38 |     #
 39 |     # The first format is what should normally be used, but for some
 40 |     # reason the timestamps in _phi_time_map files are sometimes
 41 |     # written in the latter two formats - this is a bug somewhere in
 42 |     # downcast.output.timemap, but for now we need to support the
 43 |     # existing _phi_time_map files.
 44 | 
 45 |     _pattern = re.compile('\A(\d+)-(\d+)-(\d+)\s+' +
 46 |                           '(\d+):(\d+):(\d+)(\.\d+)?\s*' +
 47 |                           '([-+])(\d+):(\d+)\Z', re.ASCII)
 48 | 
 49 |     def __new__(cls, val, *args):
 50 |         # The constructor may be called in various ways:
 51 |         #  - T(str), to explicitly convert from a time string
 52 |         #  - T(datetime), to explicitly convert from a datetime
 53 |         #  - T(int, int, int, int, int, int, int, tzinfo),
 54 |         #     used by __add__ and __sub__ in Python 3.8
 55 |         #  - T(bytes, tzinfo), used by pickle.loads
 56 |         # Only the first two (single argument) forms should be used by
 57 |         # applications.
 58 | 
 59 |         if args:
 60 |             return datetime.__new__(cls, val, *args)
 61 | 
 62 |         if isinstance(val, datetime):
 63 |             tz = val.tzinfo
 64 |             if tz is None:
 65 |                 raise TypeError('missing timezone')
 66 |             return datetime.__new__(
 67 |                 cls,
 68 |                 year = val.year,
 69 |                 month = val.month,
 70 |                 day = val.day,
 71 |                 hour = val.hour,
 72 |                 minute = val.minute,
 73 |                 second = val.second,
 74 |                 microsecond = val.microsecond,
 75 |                 tzinfo = tz)
 76 | 
 77 |         m = T._pattern.match(val)
 78 |         if m is None:
 79 |             raise ValueError('malformed timestamp string %r' % (val,))
 80 | 
 81 |         second = int(m.group(6))
 82 |         microsecond = round(float(m.group(7) or 0) * 1000000)
 83 |         # datetime doesn't support leap seconds, and DWC probably
 84 |         # doesn't support them either, but allow for the possibility
 85 |         # here just in case.  If there is a leap second, it is
 86 |         # silently compressed into the final millisecond of the
 87 |         # preceding second; this will result in one or more
 88 |         # discontinuities in the record time map.
 89 |         if second == 60:
 90 |             second = 59
 91 |             microsecond = 999000 + microsecond // 1000
 92 | 
 93 |         tzs = 1 if m.group(8) == '+' else -1
 94 |         tz = timezone(timedelta(hours = tzs * int(m.group(9)),
 95 |                                 minutes = tzs * int(m.group(10))))
 96 | 
 97 |         return datetime.__new__(
 98 |             cls,
 99 |             year = int(m.group(1)),
100 |             month = int(m.group(2)),
101 |             day = int(m.group(3)),
102 |             hour = int(m.group(4)),
103 |             minute = int(m.group(5)),
104 |             second = second,
105 |             microsecond = microsecond,
106 |             tzinfo = tz)
107 | 
108 |     def __str__(self):
109 |         tzoffs = round(self.tzinfo.utcoffset(None).total_seconds() / 60)
110 |         (tzh, tzm) = divmod(abs(tzoffs), 60)
111 |         if self.microsecond % 1000 == 0:
112 |             f = '%03d' % (self.microsecond // 1000)
113 |         else:
114 |             f = '%06d' % self.microsecond
115 |         return ('%04d-%02d-%02d %02d:%02d:%02d.%s %s%02d:%02d'
116 |                 % (self.year, self.month, self.day,
117 |                    self.hour, self.minute, self.second, f,
118 |                    ('-' if tzoffs < 0 else '+'), tzh, tzm))
119 | 
120 |     def __repr__(self):
121 |         return ('%s(%r)' % (self.__class__.__name__, T.__str__(self)))
122 | 
123 |     def strftime_local(self, fmt):
124 |         """Format time as a string, using its original timezone."""
125 |         return datetime.strftime(self, fmt)
126 | 
127 |     def strftime_utc(self, fmt):
128 |         """Convert time to UTC and format as a string."""
129 |         return datetime.strftime(self.astimezone(timezone.utc), fmt)
130 | 
131 | 
132 | if not isinstance(T('1800-01-01 00:00:00.000 +00:00') + timedelta(0), T):
133 |     # the following are redundant in Python 3.8
134 |     # also, the above line is a nice sanity check in case Python
135 |     # decides to break this stuff *again*
136 | 
137 |     def _add_and_convert(a, b):
138 |         return T(datetime.__add__(a, b))
139 |     T.__add__ = _add_and_convert
140 |     T.__radd__ = _add_and_convert
141 | 
142 |     def _sub_and_convert(a, b):
143 |         d = datetime.__sub__(a, b)
144 |         if isinstance(d, datetime):
145 |             return T(d)
146 |         else:
147 |             return d
148 |     T.__sub__ = _sub_and_convert
149 | 
150 | 
151 | def delta_ms(time_a, time_b):
152 |     """Compute the difference between two timestamps in milliseconds."""
153 |     delta = time_a - time_b
154 |     return ((delta.days * 86400 + delta.seconds) * 1000
155 |             + (delta.microseconds // 1000))
156 | 
157 | very_old_timestamp = T('1800-01-01 00:00:00.000 +00:00')
158 | dwc_epoch = T('2000-01-01 12:00:00.000 +00:00')
159 | 


--------------------------------------------------------------------------------
/downcast/output/files.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import os
 20 | import errno
 21 | import mmap
 22 | 
 23 | from ..util import fdatasync
 24 | 
 25 | class ArchiveLogFile:
 26 |     """Append-only text log output file.
 27 | 
 28 |     Messages can only be appended to the end of the log file.
 29 |     Messages must be strings and are always encoded as UTF-8.
 30 | 
 31 |     When the file is opened, if it ends with an incomplete message
 32 |     (i.e., the program writing the file crashed or ran out of space),
 33 |     a special marker is appended to indicate that the line is invalid.
 34 |     """
 35 | 
 36 |     def __init__(self, filename):
 37 |         # Open file
 38 |         self.fp = open(filename, 'a+b')
 39 |         self.fsync_before_close = False
 40 | 
 41 |         # Check if file ends with \n; if not, append a marker to
 42 |         # indicate the last line is invalid
 43 |         try:
 44 |             self.fp.seek(-1, os.SEEK_END)
 45 |         except OSError as e:
 46 |             if e.errno == errno.EINVAL:
 47 |                 return
 48 |             else:
 49 |                 raise
 50 |         c = self.fp.read(1)
 51 |         if c != b'\n' and c != b'':
 52 |             self.fp.write(b'\030\r####\030\n')
 53 | 
 54 |     def append(self, msg):
 55 |         """Write a message to the end of the file.
 56 | 
 57 |         A line feed is appended automatically.
 58 |         """
 59 |         self.fp.write(msg.encode('UTF-8'))
 60 |         self.fp.write(b'\n')
 61 | 
 62 |     def append_raw(self, msg):
 63 |         """Write a raw binary message to the end of the file."""
 64 |         self.fp.write(msg)
 65 | 
 66 |     def flush(self, fsync = True):
 67 |         """Ensure that previous messages are saved to disk."""
 68 |         self.fp.flush()
 69 |         if fsync:
 70 |             fdatasync(self.fp.fileno())
 71 | 
 72 |     def close(self, fsync = True):
 73 |         """Flush and close the file."""
 74 | 
 75 |         # closing should be idempotent - but raise an exception if
 76 |         # fsync = True and file was previously closed without fsync
 77 |         if self.fp.closed:
 78 |             if not fsync or self.fsync_before_close:
 79 |                 return
 80 | 
 81 |         self.flush(fsync = fsync)
 82 |         self.fp.close()
 83 |         self.fsync_before_close = fsync
 84 | 
 85 | class ArchiveBinaryFile:
 86 |     """Random-access binary output file.
 87 | 
 88 |     Binary data may be written to any location in the file.  This uses
 89 |     mmap internally, so the output file must support mmap.
 90 | 
 91 |     For efficiency, the file on disk will be resized in units of
 92 |     mmap.PAGESIZE (or more) at a time; the file will be truncated to
 93 |     its "real" size when flush or close is called.
 94 |     """
 95 | 
 96 |     def __init__(self, filename, window_size = None):
 97 |         # Open the file R/W and create if missing, never truncate
 98 |         self.fd = os.open(filename, os.O_RDWR|os.O_CREAT, 0o666)
 99 |         self.fsync_before_close = False
100 | 
101 |         self.current_size = os.lseek(self.fd, 0, os.SEEK_END)
102 |         self.real_size = self.current_size
103 | 
104 |         self.window_size = mmap.PAGESIZE * 2
105 |         if window_size is not None:
106 |             while self.window_size < window_size:
107 |                 self.window_size *= 2
108 | 
109 |         self.map_start = self.map_end = 0
110 |         self.map_buffer = None
111 | 
112 |     def _map_range(self, start, end):
113 |         if end < self.map_start or start >= self.map_end:
114 |             start -= start % mmap.PAGESIZE
115 |             if end < start + self.window_size:
116 |                 end = start + self.window_size
117 |             else:
118 |                 end += mmap.PAGESIZE - (end % mmap.PAGESIZE)
119 |             if end > self.current_size:
120 |                 os.ftruncate(self.fd, end)
121 |                 self.current_size = end
122 |             self.map_buffer = mmap.mmap(self.fd, end - start, offset = start)
123 |             self.map_start = start
124 |             self.map_end = end
125 | 
126 |     def size(self):
127 |         """Get the size of the file."""
128 |         return self.real_size
129 | 
130 |     def truncate(self, size):
131 |         """Truncate or extend the file to the given size."""
132 |         self.real_size = size
133 | 
134 |     def write(self, pos, data, mask = None):
135 |         """Write data to the file, extending it if necessary.
136 | 
137 |         If mask is specified, it must be the same length as data; only
138 |         the bits set in the mask are modified.
139 |         """
140 |         end = pos + len(data)
141 |         if end > self.real_size:
142 |             self.real_size = end
143 |         self._map_range(pos, end)
144 |         i = pos - self.map_start
145 |         if mask is None:
146 |             self.map_buffer[i : i + len(data)] = data
147 |         else:
148 |             for j in range(len(data)):
149 |                 self.map_buffer[i + j] = ((self.map_buffer[i + j] & ~mask[j])
150 |                                           | (data[j] & mask[j]))
151 | 
152 |     def flush(self, fsync = True):
153 |         """Ensure that the file contents are saved to disk."""
154 |         self.map_start = self.map_end = 0
155 |         if self.map_buffer is not None:
156 |             self.map_buffer.close()
157 |             self.map_buffer = None
158 |         if self.real_size != self.current_size:
159 |             os.ftruncate(self.fd, self.real_size)
160 |             self.current_size = self.real_size
161 |         if fsync:
162 |             fdatasync(self.fd)
163 | 
164 |     def close(self, fsync = True):
165 |         """Flush and close the file."""
166 | 
167 |         # closing should be idempotent - but raise an exception if
168 |         # fsync = True and file was previously closed without fsync
169 |         if self.fd is None:
170 |             if not fsync or self.fsync_before_close:
171 |                 return
172 | 
173 |         self.flush(fsync = fsync)
174 |         os.close(self.fd)
175 |         self.fd = None
176 |         self.fsync_before_close = fsync
177 | 
178 |     def __del__(self):
179 |         self.close(fsync = False)
180 | 


--------------------------------------------------------------------------------
/downcast/output/enums.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from datetime import datetime, timezone
 20 | import os
 21 | 
 22 | from ..messages import EnumerationValueMessage
 23 | from .wfdb import (Annotator, AnnotationType)
 24 | 
 25 | _del_control = str.maketrans({x: ' ' for x in list(range(32)) + [127]})
 26 | 
 27 | class EnumerationValueHandler:
 28 |     def __init__(self, archive):
 29 |         self.archive = archive
 30 |         self.last_event = {}
 31 | 
 32 |     def send_message(self, chn, msg, source, ttl):
 33 |         if not isinstance(msg, EnumerationValueMessage):
 34 |             return
 35 | 
 36 |         source.nack_message(chn, msg, self)
 37 | 
 38 |         # Load metadata for this numeric
 39 |         attr = msg.origin.get_enumeration_attr(msg.enumeration_id, (ttl <= 0))
 40 |         if attr is None:
 41 |             # Metadata not yet available - hold message in pending and
 42 |             # continue processing
 43 |             return
 44 | 
 45 |         # Look up the corresponding record
 46 |         record = self.archive.get_record(msg)
 47 |         if record is None:
 48 |             # Record not yet available - hold message in pending and
 49 |             # continue processing
 50 |             return
 51 | 
 52 |         # Dump original message to BCP file if desired
 53 |         if record.dump(msg):
 54 |             source.ack_message(chn, msg, self)
 55 |             return
 56 | 
 57 |         # Open or create a log file
 58 |         logfile = record.open_log_file('_phi_enums')
 59 | 
 60 |         # Write the sequence number and timestamp to the log file
 61 |         # (if they don't differ from the previous event)
 62 |         sn = msg.sequence_number
 63 |         ts = msg.timestamp
 64 |         (old_sn, old_ts) = self.last_event.get(record, (None, None))
 65 |         if sn != old_sn:
 66 |             logfile.append('S%s' % sn)
 67 |         if ts != old_ts:
 68 |             logfile.append(ts.strftime_utc('%Y%m%d%H%M%S%f'))
 69 |         self.last_event[record] = (sn, ts)
 70 | 
 71 |         # Write value to the log file
 72 |         lbl = attr.label.translate(_del_control)
 73 |         val = msg.value
 74 |         if val is None:
 75 |             val = ''
 76 |         else:
 77 |             val = val.translate(_del_control)
 78 |         logfile.append('%s\t%d\t%s' % (attr.label, attr.value_physio_id, val))
 79 |         source.ack_message(chn, msg, self)
 80 | 
 81 |     def flush(self):
 82 |         self.archive.flush()
 83 | 
 84 | # Known DWC annotation codes, and corresponding WFDB anntyp / subtyp / aux
 85 | _ann_code = {
 86 |     b'148631': (AnnotationType.NORMAL,  0, None), # N - normal
 87 |     b'148767': (AnnotationType.PVC,     0, None), # V - ventricular
 88 |     b'147983': (AnnotationType.SVPB,    0, None), # S - supraventricular
 89 |     b'148063': (AnnotationType.PACE,    0, None), # P - paced (most common?)
 90 |     b'147543': (AnnotationType.PACE,    1, None), # P - paced
 91 |     b'147591': (AnnotationType.PACE,    2, None), # P - paced (least common?)
 92 |     b'147631': (AnnotationType.PACESP,  0, None), # ' - single pacer spike
 93 |     b'148751': (AnnotationType.PACESP,  1, None), # " - bivent. pacer spike
 94 |     b'148783': (AnnotationType.LEARN,   0, None), # L - learning
 95 |     b'147551': (AnnotationType.NOTE,    0, b'M'), # M - missed beat
 96 |     b'195396': (AnnotationType.UNKNOWN, 0, None), # B - QRS, unspecified type
 97 |     b'148759': (AnnotationType.UNKNOWN, 1, None), # ? - QRS, unclassifiable
 98 |     b'147527': (AnnotationType.ARFCT,   0, None), # A - artifact
 99 |     b'148743': (AnnotationType.NOTE,    0, b'_'), # I - signals inoperable
100 | }
101 | 
102 | # Unknown annotations are mapped to an anntyp based on the first
103 | # letter of the label
104 | _ann_letter = {
105 |     b'N': AnnotationType.NORMAL,
106 |     b'V': AnnotationType.PVC,
107 |     b'S': AnnotationType.SVPB,
108 |     b'P': AnnotationType.PACE,
109 |     b"'": AnnotationType.PACESP,
110 |     b'"': AnnotationType.PACESP,
111 |     b'L': AnnotationType.LEARN,
112 |     b'M': AnnotationType.NOTE,
113 |     b'B': AnnotationType.UNKNOWN,
114 |     b'?': AnnotationType.UNKNOWN,
115 |     b'A': AnnotationType.ARFCT,
116 | }
117 | 
118 | class EnumerationValueFinalizer:
119 |     def __init__(self, record):
120 |         self.record = record
121 |         self.log = record.open_log_reader('_phi_enums', allow_missing = True)
122 | 
123 |         # Scan the enums log file, and add timestamps to the time map.
124 |         for (sn, ts, line) in self.log.unsorted_items():
125 |             ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
126 |             ts = ts.replace(tzinfo = timezone.utc)
127 |             record.time_map.add_time(ts)
128 | 
129 |     def finalize_record(self):
130 |         sn0 = self.record.seqnum0()
131 |         if sn0 is None:
132 |             # if we don't have a seqnum0 then time is meaningless
133 |             return
134 | 
135 |         annfname = os.path.join(self.record.path, 'waves.beat')
136 |         with Annotator(annfname, afreq = 1000) as anns:
137 |             # Reread the enums log file in order, and write beat annotations.
138 |             for (sn, ts, line) in self.log.sorted_items():
139 |                 if b'\030' in line:
140 |                     continue
141 |                 ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
142 |                 ts = ts.replace(tzinfo = timezone.utc)
143 |                 sn = self.record.time_map.get_seqnum(ts, sn + 5120) or sn
144 | 
145 |                 f = line.split(b'\t')
146 |                 if len(f) == 3 and f[0] == b'Annot':
147 |                     (label, value_physio_id, value) = f
148 |                     t = _ann_code.get(value_physio_id)
149 |                     if t:
150 |                         (anntyp, subtyp, aux) = t
151 |                     else:
152 |                         anntyp = _ann_letter.get(value[:1],
153 |                                                  AnnotationType.UNKNOWN)
154 |                         subtyp = 0
155 |                         aux = b'[' + value_physio_id + b'] ' + value
156 |                     anns.put(time = (sn - sn0), chan = 255,
157 |                              anntyp = anntyp, subtyp = subtyp, aux = aux)
158 | 


--------------------------------------------------------------------------------
/downcast/db/query.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import re
 20 | from collections import namedtuple
 21 | import ply.lex
 22 | import ply.yacc
 23 | 
 24 | from .exceptions import (ProgrammingError, ParameterCountError)
 25 | 
 26 | # This implements a parser for an extremely limited subset of SQL,
 27 | # just enough to handle the queries generated by downcast.
 28 | 
 29 | SelectStatement = namedtuple('SelectStatement', (
 30 |     'columns', 'table', 'constraints', 'order', 'limit'))
 31 | 
 32 | Constraint = namedtuple('Constraint', (
 33 |     'column', 'relation', 'value'))
 34 | 
 35 | class SimpleQueryParser:
 36 |     _keywords = {
 37 |         'SELECT', 'FROM', 'WHERE', 'AND', 'ORDER', 'BY', 'LIMIT'
 38 |     }
 39 | 
 40 |     tokens = list(_keywords) + [
 41 |         'PARAM', 'LE', 'GE', 'identifier', 'bracketed_identifier',
 42 |         'integer', 'string_constant'
 43 |     ]
 44 | 
 45 |     literals = ['=', '<', '>', ',', '*', ';']
 46 | 
 47 |     t_ignore = ' \t\r\f\n'
 48 | 
 49 |     t_LE = r'<='
 50 |     t_GE = r'>='
 51 | 
 52 |     def t_PARAM(self, t):
 53 |         r'\?'
 54 |         try:
 55 |             t.value = next(self._param_iter)
 56 |         except StopIteration:
 57 |             c = None
 58 |             if self._constraint_pos is not None:
 59 |                 c = self._input[self._constraint_pos:self._lexer.lexpos]
 60 |             raise ParameterCountError('not enough parameters for query', c)
 61 |         return t
 62 | 
 63 |     def t_identifier(self, t):
 64 |         r'[A-Za-z_][A-Za-z0-9_.]*'
 65 |         u = t.value.upper()
 66 |         if u in SimpleQueryParser._keywords:
 67 |             t.type = u
 68 |         return t
 69 | 
 70 |     def t_bracketed_identifier(self, t):
 71 |         r'\[[A-Za-z0-9_.]+\]'
 72 |         t.value = t.value[1:-1]
 73 |         return t
 74 | 
 75 |     def t_integer(self, t):
 76 |         r'[0-9]+'
 77 |         t.value = int(t.value)
 78 |         return t
 79 | 
 80 |     def t_string_constant(self, t):
 81 |         r"'(?:[^']+|'')*'"
 82 |         t.value = t.value[1:-1].replace("''", "'")
 83 |         return t
 84 | 
 85 |     def t_error(self, t):
 86 |         text = (self._input[:t.lexpos] + '<<!>>' + self._input[t.lexpos:])
 87 |         raise ProgrammingError('syntax error in %r' % text)
 88 | 
 89 |     ################################################################
 90 | 
 91 |     def p_statement(self, p):
 92 |         """
 93 |         statement : SELECT columns FROM table constraints order limit ';'
 94 |                   | SELECT columns FROM table constraints order limit
 95 |         """
 96 |         p[0] = SelectStatement(columns = p[2], table = p[4],
 97 |                                constraints = p[5], order = p[6],
 98 |                                limit = p[7])
 99 | 
100 |     def p_columns(self, p):
101 |         """columns : columns ',' column"""
102 |         p[0] = p[1] + [p[3]]
103 | 
104 |     def p_columns_1(self, p):
105 |         """columns : column"""
106 |         p[0] = [p[1]]
107 | 
108 |     def p_columns_star(self, p):
109 |         """columns : '*'"""
110 |         p[0] = ['*']
111 | 
112 |     def p_column(self, p):
113 |         """column : identifier"""
114 |         p[0] = p[1]
115 |         self._column_pos = p.lexpos(1)
116 | 
117 |     def p_table(self, p):
118 |         """
119 |         table : identifier
120 |               | bracketed_identifier
121 |         """
122 |         p[0] = p[1]
123 | 
124 |     def p_constraints(self, p):
125 |         """constraints : WHERE constraint_list"""
126 |         p[0] = p[2]
127 | 
128 |     def p_constraints_0(self, p):
129 |         """constraints : """
130 |         p[0] = []
131 | 
132 |     def p_constraint_list(self, p):
133 |         """constraint_list : constraint_list AND constraint"""
134 |         p[0] = p[1] + [p[3]]
135 | 
136 |     def p_constraint_list_1(self, p):
137 |         """constraint_list : constraint"""
138 |         p[0] = [p[1]]
139 | 
140 |     def p_constraint(self, p):
141 |         """
142 |         constraint : constraint_column '=' constraint_value
143 |                    | constraint_column '<' constraint_value
144 |                    | constraint_column '>' constraint_value
145 |                    | constraint_column LE constraint_value
146 |                    | constraint_column GE constraint_value
147 |         """
148 |         p[0] = Constraint(column = p[1], relation = p[2], value = p[3])
149 |         self._constraint_pos = None
150 | 
151 |     def p_constraint_column(self, p):
152 |         """constraint_column : column"""
153 |         p[0] = p[1]
154 |         self._constraint_pos = self._column_pos
155 | 
156 |     def p_constraint_value(self, p):
157 |         """
158 |         constraint_value : PARAM
159 |                          | integer
160 |                          | string_constant
161 |         """
162 |         p[0] = p[1]
163 | 
164 |     def p_order(self, p):
165 |         """order : ORDER BY column"""
166 |         p[0] = p[3]
167 | 
168 |     def p_order_0(self, p):
169 |         """order : """
170 |         p[0] = None
171 | 
172 |     def p_limit(self, p):
173 |         """limit : LIMIT integer"""
174 |         p[0] = p[2]
175 | 
176 |     def p_limit_0(self, p):
177 |         """limit : """
178 |         p[0] = None
179 | 
180 |     def p_error(self, t):
181 |         if t:
182 |             desc = t.type
183 |             text = (self._input[:t.lexpos] + '<<!>>' + self._input[t.lexpos:])
184 |         else:
185 |             desc = 'EOF'
186 |             text = (self._input + '<<!>>')
187 |         raise ProgrammingError('syntax error (unexpected %s) in %r'
188 |                                % (desc, text))
189 | 
190 |     def __init__(self):
191 |         self._lexer = ply.lex.lex(module = self)
192 |         self._parser = ply.yacc.yacc(module = self,
193 |                                      write_tables = False,
194 |                                      debug = False)
195 | 
196 |     def parse(self, statement, params):
197 |         self._input = statement
198 |         self._param_iter = iter(params)
199 |         self._constraint_pos = None
200 |         q = self._parser.parse(statement, lexer = self._lexer)
201 |         try:
202 |             next(self._param_iter)
203 |             raise ParameterCountError('too many parameters for query')
204 |         except StopIteration:
205 |             return q
206 | 


--------------------------------------------------------------------------------
/downcast/db/bcp/cursor.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from ..exceptions import (Error, DataError, ProgrammingError)
 20 | 
 21 | class BCPCursor:
 22 |     def __init__(self, connection):
 23 |         self._conn = connection
 24 |         self._table_iters = {}
 25 |         self._query_fetch = None
 26 |         self._query_skip = None
 27 |         self._query_cols = None
 28 |         self.description = None
 29 |         self.rowcount = -1
 30 |         self.arraysize = 1
 31 | 
 32 |     def __enter__(self):
 33 |         return self
 34 | 
 35 |     def __exit__(self, exc_type, exc_val, exc_tb):
 36 |         self.close()
 37 | 
 38 |     def __iter__(self):
 39 |         return self
 40 | 
 41 |     def __next__(self):
 42 |         row = self.fetchone()
 43 |         if row:
 44 |             return row
 45 |         else:
 46 |             raise StopIteration()
 47 | 
 48 |     #### DB-API ####
 49 | 
 50 |     def close(self):
 51 |         try:
 52 |             for it in self._table_iters.values():
 53 |                 it.close()
 54 |         finally:
 55 |             self._table_iters = {}
 56 |             self._conn = None
 57 |             self._query_fetch = None
 58 |             self._query_skip = None
 59 |             self._query_cols = None
 60 | 
 61 |     def execute(self, statement, params = ()):
 62 |         try:
 63 |             q = self._conn.parse(statement, params)
 64 |         except Error:
 65 |             raise
 66 |         except Exception as e:
 67 |             raise ProgrammingError(e)
 68 | 
 69 |         table = self._conn.get_table(q.table)
 70 | 
 71 |         if table not in self._table_iters:
 72 |             self._table_iters[table] = table.iterator()
 73 |         it = self._table_iters[table]
 74 | 
 75 |         if q.order is not None:
 76 |             i = table.column_number(q.order)
 77 |             if i != table.order_column():
 78 |                 raise ProgrammingError('cannot sort %s by %s'
 79 |                                        % (q.table, q.order))
 80 | 
 81 |         cols = []
 82 |         for c in q.columns:
 83 |             if c == '*':
 84 |                 cols += range(table.n_columns())
 85 |             else:
 86 |                 cols.append(table.column_number(c))
 87 | 
 88 |         seek = None
 89 |         skip = []
 90 |         for c in q.constraints:
 91 |             i = table.column_number(c.column)
 92 |             t = table.column_type(i)
 93 | 
 94 |             try:
 95 |                 v = t.from_param(c.value)
 96 |             except Exception:
 97 |                 raise ProgrammingError('in %s, cannot compare %s to %r'
 98 |                                        % (table.name, c.column, c.value))
 99 | 
100 |             oc = table.order_column()
101 |             rel = c.relation
102 |             if i == oc and rel == '<':
103 |                 skip += [_halt_unless(i, rel, v)]
104 |             elif i == oc and rel == '<=':
105 |                 skip += [_halt_unless(i, rel, v)]
106 |             elif i == oc and rel == '=' and seek is None:
107 |                 seek = (i, v)
108 |                 skip += [_halt_unless(i, rel, v)]
109 |             elif i == oc and rel == '>=' and seek is None:
110 |                 seek = (i, v)
111 |             elif i == oc and rel == '>' and seek is None:
112 |                 seek = (i, v)
113 |                 skip += [_skip_unless(i, '<>', v)]
114 |             elif table.column_indexed(i) and rel == '=' and seek is None:
115 |                 seek = (i, v)
116 |                 skip += [_halt_unless(i, '=', v)]
117 |             else:
118 |                 skip += [_skip_unless(i, rel, v)]
119 | 
120 |         self.description = []
121 |         for i in cols:
122 |             self.description.append((table.column_name(i),
123 |                                      table.column_type(i),
124 |                                      None, None, None, None, None))
125 |         self.rowcount = 0
126 | 
127 |         if q.limit is not None:
128 |             skip += [lambda r: self.rowcount >= q.limit and _halt()]
129 | 
130 |         if seek is None:
131 |             it.seek(None, None)
132 |         else:
133 |             it.seek(*seek)
134 |         self._query_fetch = it.fetch
135 |         self._query_skip = skip
136 |         self._query_cols = cols
137 | 
138 |     def executemany(self, statement, params):
139 |         for p in params:
140 |             self.execute(statement, p)
141 | 
142 |     def fetchone(self):
143 |         fetch = self._query_fetch
144 |         skip = self._query_skip
145 |         try:
146 |             r = fetch()
147 |             while r:
148 |                 if any(f(r) for f in skip):
149 |                     r = fetch()
150 |                 else:
151 |                     self.rowcount += 1
152 |                     return [r[i] for i in self._query_cols]
153 |         except HaltQuery:
154 |             self._query_fetch = lambda: None
155 |             return
156 |         except Error:
157 |             self._query_fetch = lambda: None
158 |             raise
159 |         except Exception as e:
160 |             self._query_fetch = lambda: None
161 |             raise DataError(e)
162 | 
163 |     def fetchmany(self, size = None):
164 |         if size is None:
165 |             size = self.arraysize
166 |         rows = []
167 |         while size > 0:
168 |             size -= 1
169 |             row = self.fetchone()
170 |             if not row:
171 |                 break
172 |             rows.append(row)
173 |         return rows
174 | 
175 |     def fetchall(self):
176 |         rows = []
177 |         row = self.fetchone()
178 |         while row:
179 |             rows.append(row)
180 |             row = self.fetchone()
181 |         return rows
182 | 
183 |     def setinputsizes(self, sizes):
184 |         pass
185 | 
186 |     def setoutputsize(self, size, column):
187 |         pass
188 | 
189 |     def nextset(self):
190 |         return None
191 | 
192 | class HaltQuery(Exception):
193 |     pass
194 | 
195 | def _halt():
196 |     raise HaltQuery()
197 | 
198 | def _skip_unless(col, rel, value):
199 |     if rel == '<':
200 |         return lambda row: row[col] >= value
201 |     elif rel == '<=':
202 |         return lambda row: row[col] > value
203 |     elif rel == '>':
204 |         return lambda row: row[col] <= value
205 |     elif rel == '>=':
206 |         return lambda row: row[col] < value
207 |     elif rel == '=':
208 |         return lambda row: row[col] != value
209 |     elif rel == '<>':
210 |         return lambda row: row[col] == value
211 |     else:
212 |         raise ProgrammingError('unknown relation %r' % rel)
213 | 
214 | def _halt_unless(col, rel, value):
215 |     if rel == '<':
216 |         return lambda row: row[col] >= value and _halt()
217 |     elif rel == '<=':
218 |         return lambda row: row[col] > value and _halt()
219 |     elif rel == '>':
220 |         return lambda row: row[col] <= value and _halt()
221 |     elif rel == '>=':
222 |         return lambda row: row[col] < value and _halt()
223 |     elif rel == '=':
224 |         return lambda row: row[col] != value and _halt()
225 |     elif rel == '<>':
226 |         return lambda row: row[col] == value and _halt()
227 |     else:
228 |         raise ProgrammingError('unknown relation %r' % rel)
229 | 


--------------------------------------------------------------------------------
/downcast/attributes.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2017 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from collections import namedtuple
 20 | 
 21 | # Note that 'enumeration_id', 'numeric_id', and 'wave_id' are
 22 | # deliberately omitted.  Contents of these attribute structures should
 23 | # be fully anonymized.
 24 | 
 25 | # _Export.Enumeration_
 26 | EnumerationAttr = namedtuple('EnumerationAttr', (
 27 |     # Magic number for... something.  See
 28 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 29 |     # (Parameters).  Underlying type is 'bigint'.
 30 |     'base_physio_id',
 31 | 
 32 |     # Magic number for the enumeration.  See
 33 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 34 |     # (Parameters).  Underlying type is 'bigint'.
 35 |     'physio_id',
 36 | 
 37 |     # Description of the enumeration, such as 'Annot' or 'RhySta'.
 38 |     'label',
 39 | 
 40 |     # Undocumented magic number.  Underlying type is 'bigint'.
 41 |     'value_physio_id',
 42 | 
 43 |     # Supposedly indicates if observation is aperiodic.
 44 |     # Seems to be 0 even for 'Annot'.
 45 |     'is_aperiodic',
 46 | 
 47 |     # Indicates if observation is manually entered, I guess???
 48 |     'is_manual',
 49 | 
 50 |     # Magic number indicating whether observation is valid????
 51 |     'validity',
 52 | 
 53 |     # Magic number for the units of measurement.  See
 54 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 55 |     # (Units-Of-Measure).  Underlying type is 'bigint'.
 56 |     'unit_code',
 57 | 
 58 |     # Units of measurement, if that makes any sense (current enums say
 59 |     # 'Unknwn'.)  (What IS an "enumeration", if not something that
 60 |     # lacks units of measurement?)
 61 |     'unit_label',
 62 | 
 63 |     # Color to use for displaying enumeration values, represented as
 64 |     # 0xAARRGGBB, reinterpreted as a signed 32-bit integer.
 65 |     'color'))
 66 | 
 67 | undefined_enumeration = EnumerationAttr(*[None]*10)
 68 | 
 69 | # _Export.Numeric_
 70 | NumericAttr = namedtuple('NumericAttr', (
 71 |     # Magic number for... something.  Underlying type is 'bigint'.
 72 |     'base_physio_id',
 73 | 
 74 |     # Magic number for the "category" of numeric.  See
 75 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 76 |     # (Parameters? or Calculations?)  Underlying type is 'bigint'.
 77 |     'physio_id',
 78 | 
 79 |     # Description of the "category" of numeric (such as 'NBP'.)
 80 |     'label',
 81 | 
 82 |     # Indicates that the measurement is aperiodic (like NBP), rather
 83 |     # than periodic (like HR).
 84 |     'is_aperiodic',
 85 | 
 86 |     # Units of measurement.
 87 |     'unit_label',
 88 | 
 89 |     # Magic number indicating whether measurement is valid????
 90 |     'validity',
 91 | 
 92 |     # Lower alarm threshold (?!)
 93 |     'lower_limit',
 94 | 
 95 |     # Upper alarm threshold (?!)
 96 |     'upper_limit',
 97 | 
 98 |     # Indicates that threshold(?) alarms are disabled (?!)
 99 |     'is_alarming_off',
100 | 
101 |     # Magic number for the specific numeric.  See
102 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
103 |     # (Parameters? or Calculations?)  Underlying type is 'bigint'.
104 |     'sub_physio_id',
105 | 
106 |     # Description of the specific numeric (such as 'NBPs'.)
107 |     'sub_label',
108 | 
109 |     # Color to use for displaying numeric values, represented as
110 |     # 0xAARRGGBB, reinterpreted as a signed 32-bit integer.
111 |     'color',
112 | 
113 |     # Indicates if value is manually entered, I guess???
114 |     'is_manual',
115 | 
116 |     # Number of values belonging to the compound value???
117 |     'max_values',
118 | 
119 |     # Number of decimal places to be displayed (?)
120 |     'scale'))
121 | 
122 | undefined_numeric = NumericAttr(*[None]*15)
123 | 
124 | # _Export.Wave_
125 | WaveAttr = namedtuple('WaveAttr', (
126 |     # Magic number for the "category" of waveform.  See
127 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
128 |     # (Parameters).  Underlying type is 'bigint'.
129 |     'base_physio_id',
130 | 
131 |     # Magic number for the specific waveform.  See
132 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
133 |     # (Parameters).  Underlying type is 'bigint'.
134 |     'physio_id',
135 | 
136 |     # Description of the waveform.
137 |     'label',
138 | 
139 |     # 0 = Primary, 1 = Secondary ???
140 |     'channel',
141 | 
142 |     # Presumably, number of seqnum ticks per sample.
143 |     'sample_period',
144 | 
145 |     # Indicates the waveform should be displayed with lower time
146 |     # resolution than usual.
147 |     'is_slow_wave',
148 | 
149 |     # Indicates that the waveform is "derived". ???
150 |     'is_derived',
151 | 
152 |     # Color to use for displaying the waveform, represented as
153 |     # 0xAARRGGBB, reinterpreted as a signed 32-bit integer.
154 |     'color',
155 | 
156 |     # Low/high cutoff frequency of the input bandpass filter.
157 |     'low_edge_frequency',
158 |     'high_edge_frequency',
159 | 
160 |     # Range of sample values.
161 |     'scale_lower',
162 |     'scale_upper',
163 | 
164 |     # Two reference sample values.
165 |     'calibration_scaled_lower',
166 |     'calibration_scaled_upper',
167 | 
168 |     # Physical values corresponding to the two reference sample
169 |     # values.
170 |     'calibration_abs_lower',
171 |     'calibration_abs_upper',
172 | 
173 |     # Magic number indicating how signal is calibrated (???)
174 |     'calibration_type',
175 | 
176 |     # Units of measurement.
177 |     'unit_label',
178 | 
179 |     # Magic number for the units of measurement.  See
180 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
181 |     # (Units-Of-Measure).  Underlying type is 'bigint'.
182 |     'unit_code',
183 | 
184 |     # Magic number indicating electrode placement (???)
185 |     'ecg_lead_placement'))
186 | 
187 | undefined_wave = WaveAttr(*[None]*20)
188 | 
189 | # "Parameters" table
190 | PhysioIDAttr = namedtuple('PhysioIDAttr', (
191 |     # I guess this is a standard code of some sort (comment says HL7)?
192 |     'mdil_code',
193 | 
194 |     # The Philips internal identifier for the signal/parameter, used
195 |     # in various structures.  (These don't appear related to the
196 |     # "StardateNom" numbering system used in DataExport and RDE.)
197 |     'physio_id',
198 | 
199 |     # Short description of the signal/parameter.
200 |     'label',
201 | 
202 |     # Verbose description of the signal/parameter.
203 |     'description',
204 | 
205 |     # I guess this is another standard code of some sort?  Often this
206 |     # equals the PhysioId.
207 |     'mdc_code',
208 | 
209 |     # I guess this is another standard code, in this case a symbolic
210 |     # name.
211 |     'mdc_label',
212 | 
213 |     # Defines how the physioid is used, I guess: "wave", "numeric",
214 |     # "numeric/wave", "setting/numeric", or "string/enumeration".
215 |     # Maybe other possibilities, who knows?
216 |     'type',
217 | 
218 |     # ???
219 |     'hl7_outbound',
220 | 
221 |     # ???
222 |     'data_warehouse_connect'))
223 | 
224 | # "Units-Of-Measure" table
225 | UnitAttr = namedtuple('UnitAttr', (
226 |     # I guess this is a standard code of some sort (comment says HL7)?
227 |     'mdil_code',
228 | 
229 |     # The Philips internal identifier for the unit, used in various
230 |     # structures.
231 |     'unit_code',
232 | 
233 |     # Abbreviation for the unit.  Not typographically consistent
234 |     # ("°F", "/mm³", "cmH2O/l/s", "1/nl", ...)
235 |     'label',
236 | 
237 |     # I guess this is another standard code of some sort?  Often this
238 |     # equals the unit_code.
239 |     'mdc_code',
240 | 
241 |     # I guess this is another standard code, in this case a symbolic
242 |     # name.
243 |     'mdc_label',
244 | 
245 |     # Verbose description, even more typographically inconsistent than
246 |     # the label.
247 |     'description'))
248 | 


--------------------------------------------------------------------------------
/test-parsers:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from datetime import datetime
  4 | 
  5 | from downcast.server import DWCDB
  6 | from downcast.parser import (WaveSampleParser, DummyWaveSampleParser,
  7 |                              AlertParser, NumericValueParser,
  8 |                              EnumerationValueParser, WaveAttrParser,
  9 |                              NumericAttrParser, EnumerationAttrParser,
 10 |                              BedTagParser, PatientDateAttributeParser,
 11 |                              PatientStringAttributeParser,
 12 |                              PatientBasicInfoParser, PatientMappingParser)
 13 | 
 14 | DWCDB.load_config('server.conf')
 15 | db = DWCDB('demo')
 16 | conn = db.connect()
 17 | 
 18 | def test(parser):
 19 |     for (query, handler) in parser.queries():
 20 |         print()
 21 |         print(query)
 22 |         cursor = conn.cursor()
 23 |         cursor.execute(*query)
 24 |         row = cursor.fetchone()
 25 |         nresults = 0
 26 |         while row is not None:
 27 |             msg = handler(db, row)
 28 |             if msg is not None:
 29 |                 nresults += 1
 30 |                 print(msg)
 31 |             row = cursor.fetchone()
 32 |         cursor.close()
 33 |         if nresults == 0:
 34 |             raise Exception("no results!")
 35 | 
 36 | def pt(s):
 37 |     return datetime.strptime(s, '%b %d %Y %H:%M:%S.%f %z')
 38 | 
 39 | ## note the following queries are not necessarily efficient, but we
 40 | ## want to individually test each possible constraint
 41 | 
 42 | ## note that some of these timestamps are in the "wrong" timezone; we
 43 | ## want to be sure that the server DTRT.
 44 | 
 45 | test(WaveSampleParser(limit = 2))
 46 | test(WaveSampleParser(limit = 1, mapping_id = '85965f09-e8c2-4e79-8c1c-cb1775bd2550'))
 47 | test(WaveSampleParser(limit = 1, time_gt = pt('Jan 28 2016 17:00:00.0 -0400')))
 48 | test(WaveSampleParser(limit = 1, time_ge = pt('Jan 28 2016 17:00:00.0 -0400')))
 49 | test(WaveSampleParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
 50 | test(WaveSampleParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
 51 | test(WaveSampleParser(limit = 1, time = pt('Jan 28 2016 14:40:29.321 -0400')))
 52 | test(WaveSampleParser(limit = 1, seqnum_ge = 507279000000))
 53 | test(WaveSampleParser(limit = 1, seqnum_gt = 507279000000))
 54 | test(WaveSampleParser(limit = 1, seqnum_le = 507279000000))
 55 | test(WaveSampleParser(limit = 1, seqnum_lt = 507279000000))
 56 | test(WaveSampleParser(limit = 1, seqnum = 507278429440))
 57 | test(DummyWaveSampleParser(limit = 2))
 58 | 
 59 | test(AlertParser(limit = 2))
 60 | test(AlertParser(limit = 1, mapping_id = '7cc594d9-d8dc-4bc7-9522-59cbc8091d23'))
 61 | test(AlertParser(limit = 1, time_gt = pt('Jan 28 2016 16:00:00.0 -0500')))
 62 | test(AlertParser(limit = 1, time_ge = pt('Jan 28 2016 16:00:00.0 -0500')))
 63 | test(AlertParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
 64 | test(AlertParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
 65 | test(AlertParser(limit = 1, time = pt('Jan 28 2016 13:30:05.755 -0500')))
 66 | test(AlertParser(limit = 1, seqnum_gt = 507279000000))
 67 | test(AlertParser(limit = 1, seqnum_ge = 507279000000))
 68 | test(AlertParser(limit = 1, seqnum_lt = 507279000000))
 69 | test(AlertParser(limit = 1, seqnum_le = 507279000000))
 70 | test(AlertParser(limit = 1, seqnum = 507277805824))
 71 | 
 72 | test(NumericValueParser(limit = 2))
 73 | test(NumericValueParser(limit = 1, mapping_id = '655d8b35-cdb7-46aa-84d8-bed0dece0cb2'))
 74 | test(NumericValueParser(limit = 1, time_gt = pt('Jan 28 2016 16:00:00.0 -0500')))
 75 | test(NumericValueParser(limit = 1, time_ge = pt('Jan 28 2016 16:00:00.0 -0500')))
 76 | test(NumericValueParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
 77 | test(NumericValueParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
 78 | test(NumericValueParser(limit = 1, time = pt('Jan 28 2016 16:33:27.0 -0500')))
 79 | test(NumericValueParser(limit = 1, seqnum_gt = 507279000000))
 80 | test(NumericValueParser(limit = 1, seqnum_ge = 507279000000))
 81 | test(NumericValueParser(limit = 1, seqnum_lt = 507279000000))
 82 | test(NumericValueParser(limit = 1, seqnum_le = 507279000000))
 83 | test(NumericValueParser(limit = 1, seqnum = 507278429440))
 84 | 
 85 | test(EnumerationValueParser(limit = 2))
 86 | test(EnumerationValueParser(limit = 1, mapping_id = '466fcc4c-7d8c-4c59-b00c-80aba6e7605d'))
 87 | test(EnumerationValueParser(limit = 1, time_gt = pt('Jan 28 2016 16:00:00.0 -0500')))
 88 | test(EnumerationValueParser(limit = 1, time_ge = pt('Jan 28 2016 16:00:00.0 -0500')))
 89 | test(EnumerationValueParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
 90 | test(EnumerationValueParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
 91 | test(EnumerationValueParser(limit = 1, time = pt('Jan 28 2016 13:40:29.577 -0500')))
 92 | test(EnumerationValueParser(limit = 1, seqnum_gt = 507279000000))
 93 | test(EnumerationValueParser(limit = 1, seqnum_ge = 507279000000))
 94 | test(EnumerationValueParser(limit = 1, seqnum_lt = 507279000000))
 95 | test(EnumerationValueParser(limit = 1, seqnum_le = 507279000000))
 96 | test(EnumerationValueParser(limit = 1, seqnum = 507278429440))
 97 | 
 98 | test(WaveAttrParser(limit = 2))
 99 | test(WaveAttrParser(limit = 1, wave_id = 1)) # (ART)
100 | 
101 | test(NumericAttrParser(limit = 2))
102 | test(NumericAttrParser(limit = 1, numeric_id = 1)) # (HR)
103 | 
104 | test(EnumerationAttrParser(limit = 2))
105 | test(EnumerationAttrParser(limit = 1, enumeration_id = 1)) # (RhySta)
106 | 
107 | test(BedTagParser(limit = 2))
108 | test(BedTagParser(limit = 1, bed_label = 'CDBed1'))
109 | test(BedTagParser(limit = 1, time_gt = pt('Jan 28 2016 12:00:00.0 -0500')))
110 | test(BedTagParser(limit = 1, time_ge = pt('Jan 28 2016 12:00:00.0 -0500')))
111 | test(BedTagParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
112 | test(BedTagParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
113 | test(BedTagParser(limit = 1, time = pt('Jan 28 2016 13:30:23.202 -0500')))
114 | 
115 | test(PatientDateAttributeParser(limit = 2))
116 | test(PatientDateAttributeParser(limit = 1, patient_id = '31c1da32-2ea1-4166-a7eb-2d9738967412'))
117 | test(PatientDateAttributeParser(limit = 1, attr = 'DOB'))
118 | test(PatientDateAttributeParser(limit = 1, time_gt = pt('Jan 28 2016 12:00:00.0 -0500')))
119 | test(PatientDateAttributeParser(limit = 1, time_ge = pt('Jan 28 2016 12:00:00.0 -0500')))
120 | test(PatientDateAttributeParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
121 | test(PatientDateAttributeParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
122 | test(PatientDateAttributeParser(limit = 1, time = pt('Jan 28 2016 13:30:23.202 -0500')))
123 | 
124 | test(PatientStringAttributeParser(limit = 2))
125 | test(PatientStringAttributeParser(limit = 1, patient_id = '31c1da32-2ea1-4166-a7eb-2d9738967412'))
126 | test(PatientStringAttributeParser(limit = 1, attr = 'FirstName'))
127 | test(PatientStringAttributeParser(limit = 1, time_gt = pt('Jan 28 2016 12:00:00.0 -0500')))
128 | test(PatientStringAttributeParser(limit = 1, time_ge = pt('Jan 28 2016 12:00:00.0 -0500')))
129 | test(PatientStringAttributeParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
130 | test(PatientStringAttributeParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
131 | test(PatientStringAttributeParser(limit = 1, time = pt('Jan 28 2016 13:30:23.202 -0500')))
132 | 
133 | test(PatientBasicInfoParser(limit = 2))
134 | test(PatientBasicInfoParser(limit = 1, patient_id = '31c1da32-2ea1-4166-a7eb-2d9738967412'))
135 | test(PatientBasicInfoParser(limit = 1, time_gt = pt('Jan 28 2016 12:00:00.0 -0500')))
136 | test(PatientBasicInfoParser(limit = 1, time_ge = pt('Jan 28 2016 12:00:00.0 -0500')))
137 | test(PatientBasicInfoParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
138 | test(PatientBasicInfoParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
139 | test(PatientBasicInfoParser(limit = 1, time = pt('Jan 28 2016 13:30:23.202 -0500')))
140 | 
141 | test(PatientMappingParser(limit = 2))
142 | test(PatientMappingParser(limit = 1, patient_id = '31c1da32-2ea1-4166-a7eb-2d9738967412'))
143 | test(PatientMappingParser(limit = 1, mapping_id = '466fcc4c-7d8c-4c59-b00c-80aba6e7605d'))
144 | test(PatientMappingParser(limit = 1, time_gt = pt('Jan 28 2016 12:00:00.0 -0500')))
145 | test(PatientMappingParser(limit = 1, time_ge = pt('Jan 28 2016 12:00:00.0 -0500')))
146 | test(PatientMappingParser(limit = 1, time_lt = pt('Jan 28 2016 16:00:00.0 -0500')))
147 | test(PatientMappingParser(limit = 1, time_le = pt('Jan 28 2016 16:00:00.0 -0500')))
148 | test(PatientMappingParser(limit = 1, time = pt('Jan 28 2016 13:26:53.456 -0500')))
149 | test(PatientMappingParser(limit = 1, hostname = 'RDEGEN8-1'))
150 | test(PatientMappingParser(limit = 1, is_mapped = False))
151 | 


--------------------------------------------------------------------------------
/downcast/output/numerics.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2017 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from datetime import datetime, timezone
 20 | import heapq
 21 | 
 22 | from ..messages import NumericValueMessage
 23 | from ..util import string_to_ascii
 24 | 
 25 | class NumericValueHandler:
 26 |     def __init__(self, archive):
 27 |         self.archive = archive
 28 |         self.last_periodic = {}
 29 |         self.last_aperiodic = {}
 30 | 
 31 |     def send_message(self, chn, msg, source, ttl):
 32 |         if not isinstance(msg, NumericValueMessage):
 33 |             return
 34 | 
 35 |         source.nack_message(chn, msg, self)
 36 | 
 37 |         # Load metadata for this numeric
 38 |         attr = msg.origin.get_numeric_attr(msg.numeric_id, (ttl <= 0))
 39 |         if attr is None:
 40 |             # Metadata not yet available - hold message in pending and
 41 |             # continue processing
 42 |             return
 43 | 
 44 |         # Look up the corresponding record
 45 |         record = self.archive.get_record(msg)
 46 |         if record is None:
 47 |             # Record not yet available - hold message in pending and
 48 |             # continue processing
 49 |             return
 50 | 
 51 |         # Dump original message to BCP file if desired
 52 |         if record.dump(msg):
 53 |             source.ack_message(chn, msg, self)
 54 |             return
 55 | 
 56 |         if attr.is_aperiodic:
 57 |             # Open or create a log file
 58 |             logfile = record.open_log_file('_phi_aperiodics')
 59 | 
 60 |             # Write the sequence number to the log file
 61 |             # (if it doesn't differ from the previous event)
 62 |             sn = msg.sequence_number
 63 |             old_sn = self.last_aperiodic.get(record, None)
 64 |             if sn != old_sn:
 65 |                 logfile.append('S%s' % sn)
 66 |             self.last_aperiodic[record] = sn
 67 | 
 68 |             # Write the value to the log file
 69 |             lbl = string_to_ascii(attr.sub_label)
 70 |             ulbl = string_to_ascii(attr.unit_label)
 71 |             val = msg.value
 72 |             if val is None:
 73 |                 val = ''
 74 |             logfile.append('%s\t%s\t%s' % (lbl, val, ulbl))
 75 |             source.ack_message(chn, msg, self)
 76 | 
 77 |         else:
 78 |             # Open or create a log file
 79 |             logfile = record.open_log_file('_phi_numerics')
 80 | 
 81 |             # Write the sequence number and timestamp to the log file
 82 |             # (if they don't differ from the previous event)
 83 |             sn = msg.sequence_number
 84 |             ts = msg.timestamp
 85 |             (old_sn, old_ts) = self.last_periodic.get(record, (None, None))
 86 |             if sn != old_sn:
 87 |                 logfile.append('S%s' % sn)
 88 |             if ts != old_ts:
 89 |                 logfile.append(ts.strftime_utc('%Y%m%d%H%M%S%f'))
 90 |             self.last_periodic[record] = (sn, ts)
 91 | 
 92 |             # Write the value to the log file
 93 |             lbl = string_to_ascii(attr.sub_label)
 94 |             ulbl = string_to_ascii(attr.unit_label)
 95 |             val = msg.value
 96 |             if val is None:
 97 |                 val = ''
 98 |             logfile.append('%s\t%s\t%s' % (lbl, val, ulbl))
 99 |             source.ack_message(chn, msg, self)
100 | 
101 |     def flush(self):
102 |         self.archive.flush()
103 | 
104 | def _strip_csv_meta(string):
105 |     return string.replace(b',', b'_').replace(b'"', b'_')
106 | 
107 | class NumericValueFinalizer:
108 |     def __init__(self, record):
109 |         self.record = record
110 | 
111 |         # Scan the log files; make a list of all non-null
112 |         # numerics, and add timestamps to the time map
113 |         raw_numerics = set()
114 | 
115 |         self.periodic_log = record.open_log_reader('_phi_numerics',
116 |                                                    allow_missing = True)
117 |         for (sn, ts, line) in self.periodic_log.unsorted_items():
118 |             ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
119 |             ts = ts.replace(tzinfo = timezone.utc)
120 |             record.time_map.add_time(ts)
121 |             if b'\030' not in line:
122 |                 parts = line.rstrip(b'\n').split(b'\t')
123 |                 # ignore nulls
124 |                 if len(parts) >= 3 and parts[1]:
125 |                     raw_numerics.add((parts[0], parts[2]))
126 | 
127 |         self.aperiodic_log = record.open_log_reader('_phi_aperiodics',
128 |                                                     allow_missing = True)
129 |         for (sn, _, line) in self.aperiodic_log.unsorted_items():
130 |             if b'\030' not in line:
131 |                 parts = line.rstrip(b'\n').split(b'\t')
132 |                 # ignore nulls
133 |                 if len(parts) >= 3 and parts[1]:
134 |                     raw_numerics.add((parts[0], parts[2]))
135 | 
136 |         self.norm_numerics = {}
137 |         for (raw_name, raw_units) in raw_numerics:
138 |             norm_name = _strip_csv_meta(raw_name.strip())
139 |             norm_units = _strip_csv_meta(raw_units.strip()) or b'NU'
140 |             self.norm_numerics[(raw_name, raw_units)] = (norm_name, norm_units)
141 | 
142 |     def finalize_record(self):
143 |         sn0 = self.record.seqnum0()
144 | 
145 |         if self.norm_numerics:
146 |             num_columns = sorted(set(self.norm_numerics.values()))
147 |             num_index = {}
148 |             for (raw_key, norm_key) in self.norm_numerics.items():
149 |                 num_index[raw_key] = num_columns.index(norm_key) + 1
150 | 
151 |             nf = self.record.open_log_file('numerics.csv', truncate = True)
152 |             row = [b'"time"']
153 |             for (name, units) in num_columns:
154 |                 desc = name + b' [' + units + b']'
155 |                 row.append(b'"' + desc.replace(b'"', b'""') + b'"')
156 |             cur_ts = None
157 |             cur_sn = None
158 |             cur_time = None
159 |             for (sn, ts, line) in heapq.merge(
160 |                     self.periodic_log.sorted_items(),
161 |                     self.aperiodic_log.sorted_items()):
162 |                 if b'\030' in line:
163 |                     continue
164 |                 parts = line.rstrip(b'\n').split(b'\t')
165 |                 # ignore nulls
166 |                 if len(parts) < 3 or not parts[1]:
167 |                     continue
168 |                 col_id = (parts[0], parts[2])
169 | 
170 |                 # determine new time value
171 |                 if ts == cur_ts and sn == cur_sn:
172 |                     time = cur_time
173 |                 else:
174 |                     if ts == 0:
175 |                         # for aperiodics (such as NBP), use sequence number as
176 |                         # observation time
177 |                         obs_sn = sn
178 |                     else:
179 |                         # for periodics, translate timestamp to
180 |                         # sequence number and use that as observation
181 |                         # time
182 |                         ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
183 |                         ts = ts.replace(tzinfo = timezone.utc)
184 |                         obs_sn = self.record.time_map.get_seqnum(ts, sn + 5120)
185 |                         if obs_sn is None:
186 |                             obs_sn = sn
187 | 
188 |                     if sn0 is None:
189 |                         sn0 = obs_sn
190 |                     # Time measured in counter ticks, ick.
191 |                     # Better would probably be to use (real) seconds
192 |                     time = str(obs_sn - sn0).encode()
193 |                     cur_ts = ts
194 |                     cur_sn = sn
195 |                     cur_time = time
196 | 
197 |                 # write out a complete row if the time value has changed
198 |                 if time != row[0]:
199 |                     nf.fp.write(b','.join(row))
200 |                     nf.fp.write(b'\n')
201 |                     row = [time] + [b''] * len(num_columns)
202 |                 row[num_index[col_id]] = parts[1].rstrip(b'0').rstrip(b'.')
203 |             # write the final row
204 |             nf.fp.write(b','.join(row))
205 |             nf.fp.write(b'\n')
206 | 


--------------------------------------------------------------------------------
/bcp-scripts/bulk-verify:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -w
  2 | #
  3 | # bulk-verify - check syntax of DWC-BCP data files
  4 | #
  5 | # Copyright (c) 2018 Laboratory for Computational Physiology
  6 | #
  7 | # This program is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # This program is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | 
 20 | use strict;
 21 | use Getopt::Long qw(:config gnu_getopt);
 22 | 
 23 | my $HEX = qr/[0-9A-F]/;
 24 | my $UUID = qr/$HEX{8}(?:-$HEX{4}){3}-$HEX{12}/;
 25 | my $HEX_I = qr/[0-9A-F]/i;
 26 | my $UUID_STRING = qr/$HEX_I{8}(?:-$HEX_I{4}){3}-$HEX_I{12}/;
 27 | my $DATE = qr/\d{4,}-\d{2}-\d{2}/;
 28 | my $TIMESTAMP = qr/$DATE \d{2}:\d{2}:\d{2}\.\d+ [-+]\d{2}:\d{2}/;
 29 | my $INTEGER = qr/-?\d+/;
 30 | my $DECIMAL = qr/-?\d*\.\d+/;
 31 | 
 32 | my $SAMPLE_INDEX_LIST = qr/\d+(?: \d+)*/;
 33 | my $SAMPLE_RANGE_LIST = qr/\d+ \d+(?: \d+ \d+)*/;
 34 | 
 35 | my %PATTERNS = (
 36 |   AdmitState               => qr{\A$INTEGER?\z},
 37 |   AlertId                  => qr{\A$UUID\z},
 38 |   Alias                    => qr{.?}s,
 39 |   AnnounceTime             => qr{\A$TIMESTAMP\z},
 40 |   BasePhysioId             => qr{\A$INTEGER\z},
 41 |   BedLabel                 => qr{.?}s,
 42 |   CalibrationAbsLower      => qr{\A$DECIMAL?\z},
 43 |   CalibrationAbsUpper      => qr{\A$DECIMAL?\z},
 44 |   CalibrationScaledLower   => qr{\A$INTEGER\z},
 45 |   CalibrationScaledUpper   => qr{\A$INTEGER\z},
 46 |   CalibrationType          => qr{\A$INTEGER\z},
 47 |   Category                 => qr{\A$INTEGER?\z},
 48 |   Channel                  => qr{\A$INTEGER\z},
 49 |   ClinicalUnit             => qr{.?}s,
 50 |   Code                     => qr{\A$INTEGER\z},
 51 |   Color                    => qr{\A$INTEGER\z},
 52 |   CompoundValueId          => qr{\A$UUID\z},
 53 |   EcgLeadPlacement         => qr{\A$INTEGER\z},
 54 |   EndTime                  => qr{\A$TIMESTAMP\z},
 55 |   EnumerationId            => qr{\A$INTEGER\z},
 56 |   Gender                   => qr{\A$INTEGER\z},
 57 |   Height                   => qr{\A$DECIMAL?\z},
 58 |   HeightUnit               => qr{\A$INTEGER?\z},
 59 |   HighEdgeFrequency        => qr{\A$DECIMAL?\z},
 60 |   Hostname                 => qr{.}s,
 61 |   Id => {
 62 |     Enumeration            => qr{\A$INTEGER\z},
 63 |     Numeric                => qr{\A$INTEGER\z},
 64 |     Wave                   => qr{\A$INTEGER\z},
 65 |     Patient                => qr{\A$UUID_STRING\z},
 66 |     PatientMapping         => qr{\A$UUID\z},
 67 |   },
 68 |   InvalidSamples           => qr{\A$SAMPLE_RANGE_LIST?\z},
 69 |   IsAlarmingOff            => qr{\A[01]\z},
 70 |   IsAperiodic              => qr{\A[01]\z},
 71 |   IsDerived                => qr{\A[01]\z},
 72 |   IsManual                 => qr{\A[01]\z},
 73 |   IsMapped                 => qr{\A1\z}, # we don't want pre-mapping mappings
 74 |   IsSilenced               => qr{\A[01]\z},
 75 |   IsSlowWave               => qr{\A[01]\z},
 76 |   IsTrendUploaded          => qr{\A[01]\z},
 77 |   Kind                     => qr{\A$INTEGER\z},
 78 |   Label                    => qr{.}s,
 79 |   LowEdgeFrequency         => qr{\A$DECIMAL?\z},
 80 |   LowerLimit               => qr{\A$DECIMAL?\z},
 81 |   MappingId                => qr{\A$UUID\z},
 82 |   MaxValues                => qr{\A$INTEGER\z},
 83 |   Name                     => qr{\A\S+\z},
 84 |   NumericId                => qr{\A$INTEGER\z},
 85 |   OnsetTime                => qr{\A$TIMESTAMP\z},
 86 |   PacedMode                => qr{\A$INTEGER?\z},
 87 |   PacedPulses              => qr{\A$SAMPLE_INDEX_LIST?\z},
 88 |   PatientId                => qr{\A$UUID_STRING\z},
 89 |   PhysioId                 => qr{\A$INTEGER\z},
 90 |   PressureUnit             => qr{\A$INTEGER?\z},
 91 |   ResuscitationStatus      => qr{\A$INTEGER?\z},
 92 |   SamplePeriod             => qr{\A$INTEGER\z},
 93 |   Scale                    => qr{\A$INTEGER\z},
 94 |   ScaleLower               => qr{\A$INTEGER\z},
 95 |   ScaleUpper               => qr{\A$INTEGER\z},
 96 |   SequenceNumber           => qr{\A$INTEGER\z},
 97 |   Severity                 => qr{\A$INTEGER\z},
 98 |   Source                   => qr{\A$INTEGER\z},
 99 |   SubLabel                 => qr{.}s,
100 |   SubPhysioId              => qr{\A$INTEGER\z},
101 |   SubtypeId                => qr{\A$INTEGER\z},
102 |   Tag                      => qr{.}s,
103 |   TimeStamp                => qr{\A$TIMESTAMP\z},
104 |   Timestamp                => qr{\A$TIMESTAMP\z},
105 |   UnavailableSamples       => qr{\A$SAMPLE_RANGE_LIST?\z},
106 |   UnitCode                 => qr{\A$INTEGER\z},
107 |   UnitLabel                => qr{.}s,
108 |   UpperLimit               => qr{\A$DECIMAL?\z},
109 |   Validity                 => qr{\A$INTEGER\z},
110 |   Value => {
111 |     EnumerationValue       => qr{.}s,
112 |     NumericValue           => qr{\A$DECIMAL?\z},
113 |     PatientDateAttribute   => qr{\A$DATE \d{2}:\d{2}:\d{2}\z},
114 |     PatientStringAttribute => qr{.}s,
115 |   },
116 |   ValuePhysioId            => qr{\A$INTEGER\z},
117 |   WaveId                   => qr{\A$INTEGER\z},
118 |   WaveSamples              => qr{\A(?:..)+\z}s,
119 |   Weight                   => qr{\A$DECIMAL?\z},
120 |   WeightUnit               => qr{\A$INTEGER?\z},
121 | );
122 | 
123 | my $slistmax = 0;
124 | 
125 | sub check_sample_list {
126 |   $slistmax = length($_) if length($_) > $slistmax;
127 |   my ($x, @n) = split / /;
128 |   while (@n) {
129 |     my $y = shift @n;
130 |     return 0 if $x >= $y;
131 |     $x = $y;
132 |   }
133 |   return 1;
134 | }
135 | 
136 | my %CHECKFUNC = (
137 |   PacedPulses        => \&check_sample_list,
138 | # UnavailableSamples => \&check_sample_list,
139 | # InvalidSamples     => \&check_sample_list,
140 | );
141 | 
142 | my $exit_status = 0;
143 | my @ignored_bad_columns;
144 | 
145 | GetOptions('force-invalid=s' => \@ignored_bad_columns) or die;
146 | 
147 | foreach my $datafile (@ARGV) {
148 |   my ($table) = split /\./, $datafile;
149 |   my $fmtfile = "$table.fmt";
150 |   my @cols;
151 | 
152 |   open FMT, $fmtfile or die "can't read $fmtfile: $!";
153 |   $/ = "\n";
154 |   my $ver = <FMT>;
155 |   my $ncols = <FMT>;
156 |   while (<FMT>) {
157 |     s/^\s+//;
158 |     my ($hcol, $type, $plen, $clen, $term, $tcol, $name) = split /\s+/;
159 |     die "$fmtfile: wrong column number" if $hcol ne (@cols + 1);
160 |     die "$fmtfile: invalid data type" if $type !~ /^SYB(?:CHAR|BINARY)$/;
161 |     die "$fmtfile: invalid prefix size" if $plen !~ /^\d+$/;
162 |     die "$fmtfile: invalid column size" if $clen ne -1;
163 |     die "$fmtfile: invalid column separator" if $term !~ /^".*"$/;
164 |     die "$fmtfile: invalid source column" if $tcol !~ /^\d+$/;
165 |     my $pat = $PATTERNS{$name};
166 |     my $func = $CHECKFUNC{$name};
167 |     if (ref $pat eq 'HASH') {
168 |       $pat = $pat->{$table};
169 |     }
170 |     die "$fmtfile: unknown column name" if !defined $pat;
171 | 
172 |     if ($term eq '"\t"' && $plen == 0) {
173 |       push @cols, [ undef, "\t", $pat, $func, $name ];
174 |     }
175 |     elsif ($term eq '"\n"' && $plen == 0) {
176 |       push @cols, [ undef, "\n", $pat, $func, $name ];
177 |     }
178 |     elsif ($term eq '""' && $plen == 4) {
179 |       push @cols, [ 4, 'V', $pat, $func, $name ];
180 |     }
181 |     else {
182 |       die "$fmtfile: unknown column specification";
183 |     }
184 |   }
185 |   close FMT;
186 |   if (@cols != $ncols) {
187 |     die "$fmtfile: incorrect number of columns";
188 |   }
189 | 
190 |   open DATA, $datafile or die "can't read $datafile: $!";
191 |   my $nrows = 0;
192 |   my $invalid = 0;
193 |   my %invalid_cols;
194 |   while (!eof DATA) {
195 |     $nrows++;
196 |     foreach my $c (@cols) {
197 |       if ($c->[0]) {
198 |         if ($c->[0] != read DATA, $_, $c->[0]) {
199 |           print STDERR "$datafile:R$nrows: unexpected EOF (in $c->[3])\n";
200 |           $invalid = 1;
201 |           last;
202 |         }
203 |         my $n = unpack $c->[1], $_;
204 |         if ($n != read DATA, $_, $n) {
205 |           print STDERR "$datafile:R$nrows: unexpected EOF (in $c->[3])\n";
206 |           $invalid = 1;
207 |           last;
208 |         }
209 |       }
210 |       else {
211 |         $/ = $c->[1];
212 |         $_ = <DATA>;
213 |         if (!chomp) {
214 |           print STDERR "$datafile:R$nrows: unexpected EOF (in $c->[3])\n";
215 |           $invalid = 1;
216 |           last;
217 |         }
218 |       }
219 |       if ($_ !~ $c->[2] or ($c->[3] and !&{$c->[3]})) {
220 |         $invalid_cols{$c->[4]}++;
221 |         if ($invalid_cols{$c->[4]} <= 5) {
222 |           s/([\\"])/\\$1/g;
223 |           s{([\000-\037])}{sprintf '\\%03o', ord $1}eg;
224 |           my $pos = tell DATA;
225 |           print STDERR "$datafile:R$nrows:\@$pos: invalid $c->[4]\n";
226 |           print STDERR "  value: \"$_\"\n";
227 |           print STDERR "  expected: $c->[2]\n";
228 |         }
229 |       }
230 |     }
231 |   }
232 |   close DATA;
233 | 
234 |   foreach my $c (sort keys %invalid_cols) {
235 |     if (!grep { $_ eq $c } @ignored_bad_columns) {
236 |       $invalid = 1;
237 |       $exit_status = 1;
238 |     }
239 |   }
240 | 
241 |   if ($invalid) {
242 |     print '-' x 32, " $datafile ($nrows)";
243 |   }
244 |   else {
245 |     my ($md5) = (`md5sum $datafile` =~ /^([0-9a-f]{32})/);
246 |     print "$md5 $datafile $nrows";
247 |   }
248 |   foreach my $c (sort keys %invalid_cols) {
249 |     print "\t(", $invalid_cols{$c}, " $c)";
250 |   }
251 |   print "\n";
252 | }
253 | 
254 | if ($slistmax == 2048) {
255 |   print "*** Warning: sample lists may have been truncated\n";
256 | }
257 | 
258 | exit ($exit_status);
259 | 


--------------------------------------------------------------------------------
/downcast/main.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import sys
 20 | import os
 21 | import resource
 22 | from argparse import ArgumentParser, ArgumentTypeError
 23 | from datetime import timedelta
 24 | 
 25 | from .server import DWCDB
 26 | from .timestamp import T
 27 | from .extractor import (Extractor, WaveSampleQueue, NumericValueQueue,
 28 |                         EnumerationValueQueue, AlertQueue,
 29 |                         PatientMappingQueue, PatientBasicInfoQueue,
 30 |                         PatientDateAttributeQueue,
 31 |                         PatientStringAttributeQueue, BedTagQueue)
 32 | 
 33 | from .output.archive import Archive
 34 | from .output.numerics import NumericValueHandler
 35 | from .output.waveforms import WaveSampleHandler
 36 | from .output.enums import EnumerationValueHandler
 37 | from .output.alerts import AlertHandler
 38 | from .output.mapping import PatientMappingHandler
 39 | from .output.patients import PatientHandler
 40 | 
 41 | def main(args = None):
 42 |     (_, n) = resource.getrlimit(resource.RLIMIT_NOFILE)
 43 |     if n != resource.RLIM_INFINITY and n < 4096:
 44 |         sys.exit('RLIMIT_NOFILE too low (%d)' % (n,))
 45 |     resource.setrlimit(resource.RLIMIT_NOFILE, (n, n))
 46 | 
 47 |     opts = _parse_cmdline(args)
 48 |     _main_loop(opts)
 49 | 
 50 | def _parse_timestamp(arg):
 51 |     try:
 52 |         return T(arg)
 53 |     except Exception:
 54 |         raise ArgumentTypeError(
 55 |             "%r is not in the format 'YYYY-MM-DD HH:MM:SS.SSS +ZZ:ZZ'" % arg)
 56 | 
 57 | def _parse_cmdline(args):
 58 |     p = ArgumentParser(
 59 |         description = 'Extract and convert DWC patient data.',
 60 |         fromfile_prefix_chars = '@')
 61 | 
 62 |     g = p.add_argument_group('input selection')
 63 |     g.add_argument('--server', metavar = 'NAME',
 64 |                    help = 'name of DWC database server')
 65 |     g.add_argument('--password-file', metavar = 'FILE',
 66 |                    default = 'server.conf',
 67 |                    help = 'file containing login credentials')
 68 | 
 69 |     g = p.add_argument_group('output database location')
 70 |     g.add_argument('--output-dir', metavar = 'DIR',
 71 |                    help = 'directory to store output database')
 72 |     g.add_argument('--state-dir', metavar = 'DIR',
 73 |                    help = 'directory to store state files')
 74 | 
 75 |     g = p.add_argument_group('conversion modes')
 76 |     g.add_argument('--init', action = 'store_true',
 77 |                    help = 'initialize a new output database')
 78 |     g.add_argument('--batch', action = 'store_true',
 79 |                    help = 'process available data and exit')
 80 |     g.add_argument('--live', action = 'store_true',
 81 |                    help = 'collect data continuously')
 82 |     g.add_argument('--start', metavar = 'TIME', type = _parse_timestamp,
 83 |                    help = 'begin collecting data at the given time')
 84 |     g.add_argument('--end', metavar = 'TIME', type = _parse_timestamp,
 85 |                    help = 'collect data up to the given time')
 86 |     g.add_argument('--partial', action = 'store_true',
 87 |                    help = 'include partial records at start time')
 88 |     g.add_argument('--terminate', action = 'store_true',
 89 |                    help = 'handle final data after permanent shutdown')
 90 | 
 91 |     opts = p.parse_args(args)
 92 |     progname = sys.argv[0]
 93 | 
 94 |     if opts.output_dir is None:
 95 |         sys.exit(('%s: no --output-dir specified' % progname)
 96 |                  + '\n' + p.format_usage())
 97 |     if opts.server is None:
 98 |         sys.exit(('%s: no --server specified' % progname)
 99 |                  + '\n' + p.format_usage())
100 | 
101 |     if (opts.init + opts.batch + opts.live) != 1:
102 |         sys.exit(('%s: must specify exactly one of --init, --batch, or --live'
103 |                   % progname) + '\n' + p.format_usage())
104 | 
105 |     if opts.start is not None and not opts.init:
106 |         sys.exit(('%s: --start can only be used with --init' % progname)
107 |                  + '\n' + p.format_usage())
108 |     if opts.end is not None and not opts.batch:
109 |         sys.exit(('%s: --end can only be used with --batch' % progname)
110 |                  + '\n' + p.format_usage())
111 | 
112 |     if opts.state_dir is None:
113 |         opts.state_dir = opts.output_dir
114 | 
115 |     if opts.init:
116 |         if os.path.exists(opts.state_dir):
117 |             sys.exit("%s: directory %s already exists"
118 |                      % (progname, opts.state_dir))
119 |         if os.path.exists(opts.output_dir):
120 |             sys.exit("%s: directory %s already exists"
121 |                      % (progname, opts.state_dir))
122 |     else:
123 |         if not os.path.isdir(opts.state_dir):
124 |             sys.exit("%s: directory %s does not exist"
125 |                      % (progname, opts.state_dir))
126 |         if not os.path.isdir(opts.output_dir):
127 |             sys.exit("%s: directory %s does not exist"
128 |                      % (progname, opts.state_dir))
129 |     return opts
130 | 
131 | def _init_extractor(opts):
132 |     DWCDB.load_config(opts.password_file)
133 | 
134 |     db = DWCDB(opts.server)
135 |     ex = Extractor(db, opts.state_dir, fatal_exceptions = True,
136 |                    deterministic_output = True, debug = True)
137 | 
138 |     pmq = PatientMappingQueue('mapping',
139 |                               start_time = opts.start,
140 |                               end_time = opts.end)
141 |     ex.add_queue(pmq)
142 | 
143 |     ex.add_queue(WaveSampleQueue(
144 |         'waves',
145 |         start_time = opts.start, end_time = opts.end))
146 |     ex.add_queue(NumericValueQueue(
147 |         'numerics',
148 |         start_time = opts.start, end_time = opts.end))
149 |     ex.add_queue(EnumerationValueQueue(
150 |         'enums',
151 |         start_time = opts.start, end_time = opts.end))
152 |     ex.add_queue(AlertQueue(
153 |         'alerts',
154 |         start_time = opts.start, end_time = opts.end))
155 |     return ex
156 | 
157 | def _init_archive(opts, extractor):
158 |     a = Archive(opts.output_dir, deterministic_output = True)
159 | 
160 |     # Scan the output directory to find patients for whom we have not
161 |     # seen any data for a long time, and finalize those records.  We
162 |     # need to do this periodically since otherwise nothing would
163 |     # finalize records at the end of a patient stay.
164 |     synctime = extractor.fully_processed_timestamp()
165 |     a.finalize_before(synctime)
166 |     a.flush()
167 | 
168 |     extractor.add_handler(NumericValueHandler(a))
169 |     extractor.add_handler(WaveSampleHandler(a))
170 |     extractor.add_handler(EnumerationValueHandler(a))
171 |     extractor.add_handler(AlertHandler(a))
172 |     extractor.add_handler(PatientMappingHandler(a))
173 | 
174 |     # FIXME: Handling patient messages is disabled for now - it causes
175 |     # archive to split records unnecessarily.
176 |     #extractor.add_handler(PatientHandler(a))
177 | 
178 |     # Create or refresh state files, and fail if they're not writable
179 |     extractor.flush()
180 |     return a
181 | 
182 | def _main_loop(opts):
183 |     if opts.init:
184 |         # In --init mode, simply create the extractor and write the
185 |         # initial queue state files.
186 |         if opts.start and not opts.partial:
187 |             os.makedirs(opts.output_dir, exist_ok = True)
188 |             horizon_file = os.path.join(opts.output_dir, '%horizon')
189 |             with open(horizon_file, 'w') as hf:
190 |                 hf.write(str(opts.start) + '\n')
191 | 
192 |         extractor = _init_extractor(opts)
193 |         extractor.flush()
194 |         return
195 | 
196 |     # Otherwise, feed data from the extractor into the archive until
197 |     # we reach the desired end point.
198 |     while True:
199 |         # We periodically stop and re-create the extractor and
200 |         # archive, so that records can be finalized at the end of a
201 |         # stay.  (We can't simply invoke finalize_before on a live
202 |         # Archive object because different patients are handled by
203 |         # different subprocesses - each process only knows about the
204 |         # patients that have been delegated to it.)
205 |         extractor = _init_extractor(opts)
206 |         _init_archive(opts, extractor)
207 |         next_sync = (extractor.fully_processed_timestamp()
208 |                      + timedelta(hours = 3))
209 |         try:
210 |             # Save state to disk after every 500 queries.
211 |             n = 500
212 |             while extractor.fully_processed_timestamp() < next_sync:
213 |                 if extractor.idle() and not opts.live:
214 |                     if opts.terminate:
215 |                         extractor.dispatcher.terminate()
216 |                         extractor.flush()
217 |                         a = Archive(opts.output_dir)
218 |                         a.terminate()
219 |                     return
220 | 
221 |                 extractor.run()
222 |                 n -= 1
223 |                 if n <= 0:
224 |                     extractor.flush()
225 |                     n = 500
226 |         finally:
227 |             extractor.flush()
228 |             extractor.dispatcher.shutdown()
229 | 


--------------------------------------------------------------------------------
/downcast/server.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from configparser import ConfigParser
 20 | import logging
 21 | import warnings
 22 | import os
 23 | 
 24 | from .parser import (WaveAttrParser, NumericAttrParser,
 25 |                      EnumerationAttrParser, PatientMappingParser,
 26 |                      DBSyntaxError)
 27 | from .attributes import (undefined_wave, undefined_numeric,
 28 |                          undefined_enumeration)
 29 | 
 30 | class DWCDB:
 31 |     _config = None
 32 |     _config_path = ''
 33 | 
 34 |     def load_config(filename):
 35 |         DWCDB._config = ConfigParser()
 36 |         DWCDB._config.read(filename)
 37 |         DWCDB._config_path = os.path.dirname(filename)
 38 | 
 39 |     def __init__(self, servername):
 40 |         self._server = DWCDBServer.get(servername)
 41 |         self.servername = servername
 42 |         self.dialect = self._server.dialect
 43 |         self.paramstyle = self._server.paramstyle
 44 | 
 45 |     def __repr__(self):
 46 |         return ('%s(%r)' % (self.__class__.__name__, self.servername))
 47 | 
 48 |     def __getstate__(self):
 49 |         return self.servername
 50 | 
 51 |     def __setstate__(self, servername):
 52 |         DWCDB.__init__(self, servername)
 53 | 
 54 |     def connect(self):
 55 |         return self._server.connect()
 56 | 
 57 |     def get_messages(self, parser, connection = None, cursor = None):
 58 |         tmpconn = None
 59 |         tmpcur = None
 60 |         try:
 61 |             if cursor is not None:
 62 |                 cur = cursor
 63 |             elif connection is not None:
 64 |                 cur = tmpcur = connection.cursor()
 65 |             else:
 66 |                 tmpconn = self._server.connect()
 67 |                 cur = tmpcur = tmpconn.cursor()
 68 |             yield from parser.parse(self, cur)
 69 |         finally:
 70 |             if tmpcur is not None:
 71 |                 tmpcur.close()
 72 |             if tmpconn is not None:
 73 |                 tmpconn.close()
 74 | 
 75 |     def get_wave_attr(self, wave_id, sync):
 76 |         v = self._server.wave_attr.get(wave_id, None)
 77 |         if v is not None:
 78 |             return v
 79 | 
 80 |         p = WaveAttrParser(dialect = self.dialect,
 81 |                            paramstyle = self.paramstyle,
 82 |                            limit = 2, wave_id = wave_id)
 83 |         try:
 84 |             v = self._parse_attr(p, sync)
 85 |         except UnknownAttrError:
 86 |             if not self._server._warned_wave:
 87 |                 logging.warning('unknown wave ID: %s' % wave_id)
 88 |                 self._server._warned_wave = True
 89 |             v = undefined_wave
 90 |         except DBSyntaxError as e:
 91 |             warnings.warn(e.warning(), stacklevel = 2)
 92 |             v = undefined_wave
 93 |         except UnavailableAttrError:
 94 |             return None
 95 |         self._server.wave_attr[wave_id] = v
 96 |         return v
 97 | 
 98 |     def get_numeric_attr(self, numeric_id, sync):
 99 |         v = self._server.numeric_attr.get(numeric_id, None)
100 |         if v is not None:
101 |             return v
102 | 
103 |         p = NumericAttrParser(dialect = self.dialect,
104 |                               paramstyle = self.paramstyle,
105 |                               limit = 2, numeric_id = numeric_id)
106 |         try:
107 |             v = self._parse_attr(p, sync)
108 |         except UnknownAttrError:
109 |             if not self._server._warned_numeric:
110 |                 logging.warning('unknown numeric ID: %s' % numeric_id)
111 |                 self._server._warned_numeric = True
112 |             v = undefined_numeric
113 |         except DBSyntaxError as e:
114 |             warnings.warn(e.warning(), stacklevel = 2)
115 |             v = undefined_numeric
116 |         except UnavailableAttrError:
117 |             return None
118 |         self._server.numeric_attr[numeric_id] = v
119 |         return v
120 | 
121 |     def get_enumeration_attr(self, enumeration_id, sync):
122 |         v = self._server.enumeration_attr.get(enumeration_id, None)
123 |         if v is not None:
124 |             return v
125 | 
126 |         p = EnumerationAttrParser(dialect = self.dialect,
127 |                                   paramstyle = self.paramstyle,
128 |                                   limit = 2, enumeration_id = enumeration_id)
129 |         try:
130 |             v = self._parse_attr(p, sync)
131 |         except UnknownAttrError:
132 |             if not self._server._warned_enum:
133 |                 logging.warning('unknown enumeration ID: %s' % enumeration_id)
134 |                 self._server._warned_enum = True
135 |             v = undefined_enumeration
136 |         except DBSyntaxError as e:
137 |             warnings.warn(e.warning(), stacklevel = 2)
138 |             v = undefined_enumeration
139 |         except UnavailableAttrError:
140 |             return None
141 |         self._server.enumeration_attr[enumeration_id] = v
142 |         return v
143 | 
144 |     def get_patient_id(self, mapping_id, sync):
145 |         v = self._server.patient_map.get(mapping_id, None)
146 |         if v is not None:
147 |             return v
148 |         # if not sync:
149 |         #     return None
150 | 
151 |         p = PatientMappingParser(dialect = self.dialect,
152 |                                  paramstyle = self.paramstyle,
153 |                                  limit = 2, mapping_id = mapping_id)
154 |         try:
155 |             v = self._parse_attr(p, True)
156 |         except UnknownAttrError:
157 |             if not self._server._warned_mapping:
158 |                 logging.warning('unknown mapping ID: %s' % mapping_id)
159 |                 self._server._warned_mapping = True
160 |             return None
161 |         except DBSyntaxError as e:
162 |             warnings.warn(e.warning(), stacklevel = 2)
163 |             self._server.patient_map[mapping_id] = None
164 |             return None
165 |         self.set_patient_id(mapping_id, v.patient_id)
166 |         return v.patient_id
167 | 
168 |     def set_patient_id(self, mapping_id, patient_id):
169 |         self._server.patient_map[mapping_id] = patient_id
170 | 
171 |     def _parse_attr(self, parser, sync):
172 |         # ensure that attr_db connections are not shared between
173 |         # processes
174 |         pid = os.getpid()
175 |         if self._server.attr_db_pid == pid:
176 |             conn = self._server.attr_db
177 |         else:
178 |             self._server.attr_db = conn = self._server.connect()
179 |             self._server.attr_db_pid = pid
180 | 
181 |         # FIXME: add asynchronous processing
182 |         results = []
183 |         for msg in self.get_messages(parser, connection = conn):
184 |             results.append(msg)
185 |         if len(results) > 1:
186 |             logging.warning('multiple results found for %r' % parser)
187 |         elif len(results) == 0:
188 |             raise UnknownAttrError()
189 |         return results[0]
190 | 
191 | class DWCDBServer:
192 |     _named_servers = {}
193 | 
194 |     def __init__(self, servername):
195 |         self.dbtype = DWCDB._config.get(servername, 'type', fallback = 'mssql')
196 | 
197 |         if self.dbtype == 'mssql':
198 |             import pymssql
199 |             self.hostname = DWCDB._config[servername]['hostname']
200 |             self.username = DWCDB._config[servername]['username']
201 |             self.password = DWCDB._config[servername]['password']
202 |             self.database = DWCDB._config[servername]['database']
203 |             self.dialect = 'ms'
204 |             self.paramstyle = pymssql.paramstyle
205 |         elif self.dbtype == 'sqlite':
206 |             import sqlite3
207 |             self.filename = DWCDB._config[servername]['file']
208 |             self.dialect = 'sqlite'
209 |             self.paramstyle = sqlite3.paramstyle
210 |         elif self.dbtype == 'bcp':
211 |             from .db import dwcbcp
212 |             self.bcpdirs = []
213 |             for d in DWCDB._config[servername]['bcp-path'].split(':'):
214 |                 self.bcpdirs.append(os.path.join(DWCDB._config_path, d))
215 |             self.dialect = 'sqlite'
216 |             self.paramstyle = dwcbcp.paramstyle
217 |         else:
218 |             raise ValueError('unknown database type')
219 | 
220 |         self.wave_attr = {}
221 |         self.numeric_attr = {}
222 |         self.enumeration_attr = {}
223 |         self.patient_map = {}
224 |         self.attr_db = None
225 |         self.attr_db_pid = None
226 |         self._warned_mapping = False
227 |         self._warned_wave = False
228 |         self._warned_numeric = False
229 |         self._warned_enum = False
230 | 
231 |     def get(servername):
232 |         s = DWCDBServer._named_servers.get(servername, None)
233 |         if s is None:
234 |             s = DWCDBServer(servername)
235 |             DWCDBServer._named_servers[servername] = s
236 |         return s
237 | 
238 |     def connect(self):
239 |         if self.dbtype == 'mssql':
240 |             import pymssql
241 |             return pymssql.connect(self.hostname, self.username,
242 |                                    self.password, self.database,
243 |                                    tds_version='7.1')
244 |         elif self.dbtype == 'sqlite':
245 |             import sqlite3
246 |             return sqlite3.connect(self.filename)
247 |         elif self.dbtype == 'bcp':
248 |             from .db import dwcbcp
249 |             return dwcbcp.connect(self.bcpdirs)
250 | 
251 | class UnknownAttrError(Exception):
252 |     """Internal exception indicating the object does not exist."""
253 |     pass
254 | 
255 | class UnavailableAttrError(Exception):
256 |     """Internal exception indicating that the request is pending."""
257 |     pass
258 | 


--------------------------------------------------------------------------------
/downcast/output/alerts.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from datetime import datetime, timezone
 20 | import os
 21 | import re
 22 | 
 23 | from ..messages import AlertMessage
 24 | from ..timestamp import (T, delta_ms)
 25 | from ..util import string_to_ascii
 26 | from .wfdb import (Annotator, AnnotationType)
 27 | 
 28 | _sane_time = T('1970-01-01 00:00:00.000 +00:00')
 29 | 
 30 | class AlertHandler:
 31 |     def __init__(self, archive):
 32 |         self.archive = archive
 33 | 
 34 |     def send_message(self, chn, msg, source, ttl):
 35 |         if not isinstance(msg, AlertMessage):
 36 |             return
 37 | 
 38 |         source.nack_message(chn, msg, self)
 39 | 
 40 |         # Look up the corresponding record
 41 |         record = self.archive.get_record(msg)
 42 |         if record is None:
 43 |             # Record not yet available - hold message in pending and
 44 |             # continue processing
 45 |             return
 46 | 
 47 |         # Dump original message to BCP file if desired
 48 |         if record.dump(msg):
 49 |             source.ack_message(chn, msg, self)
 50 |             return
 51 | 
 52 |         # Open or create a log file
 53 |         logfile = record.open_log_file('_phi_alerts')
 54 | 
 55 |         # Write value to the log file
 56 |         sn = msg.sequence_number
 57 |         ts = msg.timestamp.strftime_utc('%Y%m%d%H%M%S%f')
 58 |         idstr = str(msg.alert_id)
 59 |         lbl = string_to_ascii(msg.label)
 60 |         if msg.is_silenced:
 61 |             statestr = '~'
 62 |         else:
 63 |             statestr = '='
 64 | 
 65 |         logfile.append('S%s' % sn)
 66 |         if msg.announce_time and msg.announce_time > _sane_time:
 67 |             ats = msg.announce_time.strftime_utc('%Y%m%d%H%M%S%f')
 68 |             logfile.append(ats)
 69 |             logfile.append('(%s)+' % (idstr,))
 70 |         if msg.onset_time and msg.onset_time > _sane_time:
 71 |             ots = msg.onset_time.strftime_utc('%Y%m%d%H%M%S%f')
 72 |             logfile.append(ots)
 73 |             logfile.append('(%s)!' % (idstr,))
 74 |         if msg.end_time and msg.end_time > _sane_time:
 75 |             ets = msg.end_time.strftime_utc('%Y%m%d%H%M%S%f')
 76 |             logfile.append(ets)
 77 |             logfile.append('(%s)-' % (idstr,))
 78 |         logfile.append(ts)
 79 |         logfile.append('(%s)%s%s%s' % (idstr, msg.severity, statestr, lbl))
 80 | 
 81 |         source.ack_message(chn, msg, self)
 82 | 
 83 |     def flush(self):
 84 |         self.archive.flush()
 85 | 
 86 | class AlertFinalizer:
 87 |     def __init__(self, record):
 88 |         self.record = record
 89 |         self.log = record.open_log_reader('_phi_alerts', allow_missing = True)
 90 | 
 91 |         self.alert_onset = {}
 92 |         self.alert_announce = {}
 93 |         self.alert_end = {}
 94 | 
 95 |         # Scan the alerts log file, add timestamps to the time map,
 96 |         # and record onset/announce/end time for each alert ID.
 97 |         for (sn, ts, line) in self.log.unsorted_items():
 98 |             ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
 99 |             ts = ts.replace(tzinfo = timezone.utc)
100 |             record.time_map.add_time(ts)
101 | 
102 |             (alert_id, event, severity, state, label) = _parse_info(line)
103 |             # If there are multiple recorded onset times, save the one
104 |             # that was recorded first (smallest sequence number.)
105 |             # Save the earliest onset timestamp that was recorded at
106 |             # that sequence number.
107 |             if event == b'!':
108 |                 if (sn, ts) < self.alert_onset.setdefault(alert_id, (sn, ts)):
109 |                     self.alert_onset[alert_id] = (sn, ts)
110 |             # If there are multiple recorded announce times, save the
111 |             # one that was recorded first (smallest sequence number.)
112 |             # Save the earliest announce timestamp that was recorded
113 |             # at that sequence number.
114 |             elif event == b'+':
115 |                 if (sn, ts) < self.alert_announce.setdefault(alert_id,
116 |                                                              (sn, ts)):
117 |                     self.alert_announce[alert_id] = (sn, ts)
118 |             # If there are multiple recorded end times, save the one
119 |             # that was recorded last (largest sequence number.)  Save
120 |             # the latest end timestamp that was recorded at that
121 |             # sequence number.
122 |             elif event == b'-':
123 |                 if (sn, ts) > self.alert_end.setdefault(alert_id, (sn, ts)):
124 |                     self.alert_end[alert_id] = (sn, ts)
125 | 
126 |     def finalize_record(self):
127 |         sn0 = self.record.seqnum0()
128 |         if sn0 is None:
129 |             # if we don't have a seqnum0 then time is meaningless
130 |             return
131 | 
132 |         alert_first = {}
133 |         alert_pre_announce = {}
134 |         alert_pre_end = {}
135 |         alert_last = {}
136 |         alert_num = {}
137 | 
138 |         announce_t = {}
139 |         for (alert_id, (sn, ts)) in self.alert_announce.items():
140 |             sn = self.record.time_map.get_seqnum(ts, sn + 5120)
141 |             if sn is None:
142 |                 continue
143 |             announce_t[alert_id] = sn - sn0
144 | 
145 |         end_t = {}
146 |         for (alert_id, (sn, ts)) in self.alert_end.items():
147 |             # alert end time may actually be slightly later than
148 |             # time of the message.  why?  no idea.  how do these
149 |             # timestamps work in regard to system clock
150 |             # adjustments?  no idea.
151 |             sn = self.record.time_map.get_seqnum(ts, sn + 15120)
152 |             if sn is None:
153 |                 continue
154 |             end_t[alert_id] = sn - sn0
155 | 
156 |         annfname = os.path.join(self.record.path, 'waves.alarm')
157 |         with Annotator(annfname, afreq = 1000) as anns:
158 |             # Reread the alerts log file in order.  Assign an integer
159 |             # ID to each alert in order of appearance, and record the
160 |             # severity, state (silenced or not) and label.
161 |             #
162 |             # Severity/state/label can change from one message to the
163 |             # next.  For the onset annotation, we use the earliest
164 |             # message.  For the announcement annotation, we use the
165 |             # latest message that precedes the announcement time, or
166 |             # the earliest annotation if there isn't one.  For the end
167 |             # annotation, we use the latest message that precedes the
168 |             # end time, or the latest message if there isn't one.  If
169 |             # there are any state changes between announcement and
170 |             # end, we add those as additional annotations.
171 |             for (sn, ts, line) in self.log.sorted_items():
172 |                 if b'\030' in line:
173 |                     continue
174 | 
175 |                 (alert_id, event, severity, state, label) = _parse_info(line)
176 |                 if not label:
177 |                     continue
178 | 
179 |                 ts = datetime.strptime(str(ts), '%Y%m%d%H%M%S%f')
180 |                 ts = ts.replace(tzinfo = timezone.utc)
181 |                 sn = self.record.time_map.get_seqnum(ts, sn + 5120) or sn
182 |                 t = sn - sn0
183 | 
184 |                 num = alert_num.setdefault(alert_id, len(alert_num) + 1)
185 | 
186 |                 oldstate = alert_last.get(alert_id, None)
187 |                 newstate = (severity, state, label)
188 |                 alert_first.setdefault(alert_id, newstate)
189 |                 alert_last[alert_id] = newstate
190 | 
191 |                 announce = announce_t.get(alert_id, t)
192 |                 end = end_t.get(alert_id, t)
193 |                 if t <= announce:
194 |                     alert_pre_announce[alert_id] = newstate
195 |                 if t <= end:
196 |                     alert_pre_end[alert_id] = newstate
197 | 
198 |                 if oldstate and oldstate != newstate and announce < t < end:
199 |                     _put_annot(anns, t, num, b';', severity, state, label)
200 | 
201 |             for (alert_id, (sn, ts)) in self.alert_onset.items():
202 |                 num = alert_num.get(alert_id)
203 |                 sn = self.record.time_map.get_seqnum(ts, sn + 5120)
204 |                 if num is None or sn is None:
205 |                     continue
206 |                 t = sn - sn0
207 |                 (severity, state, label) = alert_first[alert_id]
208 |                 _put_annot(anns, t, num, b'+', severity, state, label)
209 | 
210 |             for (alert_id, t) in announce_t.items():
211 |                 num = alert_num.get(alert_id)
212 |                 if num is None:
213 |                     continue
214 |                 (severity, state, label) = (alert_pre_announce.get(alert_id)
215 |                                             or alert_first[alert_id])
216 |                 _put_annot(anns, t, num, b'<', severity, state, label)
217 | 
218 |             for (alert_id, t) in end_t.items():
219 |                 num = alert_num.get(alert_id)
220 |                 if num is None:
221 |                     continue
222 |                 (severity, state, label) = (alert_pre_end.get(alert_id)
223 |                                             or alert_last[alert_id])
224 |                 _put_annot(anns, t, num, b'>', severity, state, label)
225 | 
226 | _info_pattern = re.compile(rb'\(([\w-]+)\)(?:([-+!])|(\d+)([=~])(.*))')
227 | 
228 | def _parse_info(line):
229 |     m = _info_pattern.fullmatch(line.rstrip(b'\n'))
230 |     if m:
231 |         return m.groups()
232 |     else:
233 |         return (None, None, None, None, None)
234 | 
235 | def _put_annot(anns, time, alert_num, event_code, severity, state, label):
236 |     severity = int(severity)
237 |     if severity == 0:           # RED
238 |         subtyp = 3
239 |     elif severity == 1:         # YELLOW
240 |         subtyp = 2
241 |     elif severity == 2:         # SHORT YELLOW
242 |         subtyp = 1
243 |     else:
244 |         subtyp = 0
245 | 
246 |     if event_code == b'+':      # onset
247 |         subtyp += 90
248 |     elif event_code == b'<':    # announce
249 |         subtyp += 80
250 |     elif event_code == b'>':    # end
251 |         subtyp += 60
252 |     else:
253 |         subtyp += 70
254 | 
255 |     aux = event_code + b'{' + str(alert_num).encode() + b'}'
256 |     if state == b'~':           # silenced
257 |         aux += b'~'
258 |     else:
259 |         aux += b' '
260 |     aux += label
261 | 
262 |     anns.put(time = time, anntyp = AnnotationType.NOTE,
263 |              subtyp = subtyp, chan = 255, aux = aux)
264 | 


--------------------------------------------------------------------------------
/downcast/output/timemap.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import os
 20 | import csv
 21 | import bisect
 22 | import logging
 23 | from datetime import timedelta
 24 | 
 25 | from ..timestamp import T, delta_ms
 26 | from ..util import fdatasync
 27 | 
 28 | class TimeMap:
 29 |     """
 30 |     Object that tracks the mapping between time and sequence number.
 31 | 
 32 |     In general, sequence numbers provide a reliable measurement of
 33 |     time; wall-clock timestamps do not.
 34 | 
 35 |     (For example, two events whose sequence numbers differ by
 36 |     1,000,000 are exactly twice as far apart as two events whose
 37 |     sequence numbers differ by 500,000.  However, two events whose
 38 |     wall-clock timestamps differ by 1,000 seconds might be anywhere
 39 |     from 970 to 1,030 seconds apart.)
 40 | 
 41 |     This object aggregates the available information concerning the
 42 |     mapping (which is not necessarily injective in either direction)
 43 |     between sequence number and timestamp, so that given an arbitrary
 44 |     timestamp, it is possible to determine the most likely sequence
 45 |     number at which that timestamp would have been generated.
 46 |     """
 47 | 
 48 |     def __init__(self, record_id):
 49 |         self.entries = []
 50 |         self.record_id = record_id
 51 | 
 52 |     def read(self, path, name):
 53 |         """Read a time map file."""
 54 |         fname = os.path.join(path, name)
 55 |         try:
 56 |             with open(fname, 'rt', encoding = 'UTF-8') as f:
 57 |                 for row in csv.reader(f):
 58 |                     start = int(row[0])
 59 |                     end = int(row[1])
 60 |                     baset = T(row[2])
 61 |                     self.entries.append([start, end, baset, set()])
 62 |         except FileNotFoundError:
 63 |             pass
 64 |         self.entries.sort()
 65 | 
 66 |     def write(self, path, name):
 67 |         """Write a time map file."""
 68 |         fname = os.path.join(path, name)
 69 |         tmpfname = os.path.join(path, '_' + name + '.tmp')
 70 |         with open(tmpfname, 'wt', encoding = 'UTF-8') as f:
 71 |             w = csv.writer(f)
 72 |             for e in self.entries:
 73 |                 w.writerow(e[0:3])
 74 |             f.flush()
 75 |             fdatasync(f.fileno())
 76 |         os.rename(tmpfname, fname)
 77 | 
 78 |     def set_time(self, seqnum, time):
 79 |         """
 80 |         Add a reference timestamp to the map.
 81 | 
 82 |         This indicates that we know (from a reliable source, such as a
 83 |         wave sample message) the exact wall-clock time at a given
 84 |         sequence number.
 85 | 
 86 |         Given this information, we can infer what the wall-clock time
 87 |         must have been at other moments in time, so long as the wall
 88 |         clock is not adjusted.
 89 | 
 90 |         This information is treated as trustworthy and will be saved
 91 |         to the time map file when write() is called.
 92 |         """
 93 |         baset = time - timedelta(milliseconds = seqnum)
 94 | 
 95 |         # i = index of the first span that begins at or after seqnum
 96 |         i = bisect.bisect_right(self.entries, [seqnum])
 97 |         p = self.entries[i-1:i]
 98 |         n = self.entries[i:i+1]
 99 | 
100 |         # If this sequence number falls within an existing span,
101 |         # verify that baset is what we expect
102 |         if p and seqnum <= p[0][1]:
103 |             if baset != p[0][2]:
104 |                 logging.warning('conflicting timestamps at %d in %s'
105 |                                 % (seqnum, self.record_id))
106 |         elif n and seqnum >= n[0][0]:
107 |             if baset != n[0][2]:
108 |                 logging.warning('conflicting timestamps at %d in %s'
109 |                                 % (seqnum, self.record_id))
110 | 
111 |         # If this sequence number falls close to the start or end of
112 |         # an existing span that has the same baset value (close enough
113 |         # that we assume there could not have been more than one clock
114 |         # adjustment), then extend the existing span(s)
115 |         elif p and p[0][2] == baset and seqnum - p[0][1] < 30000:
116 |             p[0][1] = seqnum
117 |             if n and n[0][2] == baset and n[0][0] - seqnum < 30000:
118 |                 n[0][0] = p[0][0]
119 |                 del self.entries[i-1]
120 |         elif n and n[0][2] == baset and n[0][0] - seqnum < 30000:
121 |             n[0][0] = seqnum
122 | 
123 |         # Otherwise, define a new span
124 |         else:
125 |             self.entries.insert(i, [seqnum, seqnum, baset, set()])
126 | 
127 |     def add_time(self, time):
128 |         """
129 |         Add a non-reference timestamp to the map.
130 | 
131 |         This indicates that we have observed the given wall-clock time
132 |         (for example, it is used as the timestamp of a numeric or
133 |         alert message), but we do not yet know precisely when that
134 |         timestamp occurred.
135 | 
136 |         This information is not saved in the time map file, but is
137 |         used by resolve_gaps() to refine the time map.
138 | 
139 |         This function should be called after all reference timestamps
140 |         have been recorded using set_time().
141 |         """
142 |         for e in self.entries:
143 |             start = e[2] + timedelta(milliseconds = e[0])
144 |             if time < start:
145 |                 e[3].add(time)
146 |                 return
147 |             end = e[2] + timedelta(milliseconds = e[1])
148 |             if time <= end:
149 |                 return
150 | 
151 |     def get_seqnum(self, time, limit = None):
152 |         """
153 |         Guess the sequence number corresponding to a wall-clock time.
154 | 
155 |         limit should be the latest possible value (inclusive) for this
156 |         sequence number.  Typically, if the message sequence number is
157 |         N, then it should be impossible for any event to have occurred
158 |         at time greater than (N + 5120).
159 | 
160 |         If no information is available, this will return None.
161 |         """
162 | 
163 |         if not self.entries:
164 |             return None
165 | 
166 |         if limit is None:
167 |             limit = self.entries[-1][1]
168 | 
169 |         # If this timestamp falls within a known interval - there is
170 |         # an instant at which we know the system clock would have
171 |         # displayed that value - then choose the latest such instant
172 |         # that is before or equal to 'limit'.
173 |         possible_sn = []
174 |         best_known = None
175 |         for (start, end, base, _) in self.entries:
176 |             sn = delta_ms(time, base)
177 |             possible_sn.append((sn, end))
178 |             if start <= sn <= end and sn <= limit:
179 |                 best_known = sn
180 |         if best_known is not None:
181 |             return best_known
182 | 
183 |         # Otherwise, take the earliest interval for which this
184 |         # timestamp would appear to be in the past.  (So, if the
185 |         # system clock never displayed this timestamp, then translate
186 |         # according to the next reference timestamp *after* this
187 |         # point.  If the system clock displayed this timestamp
188 |         # multiple times, but all of those occurred after 'limit',
189 |         # then choose the earliest.)
190 |         for (sn, interval_end) in possible_sn:
191 |             if sn <= interval_end:
192 |                 return sn
193 | 
194 |         # Otherwise, the timestamp occurs in the future; extrapolate
195 |         # from the *last* reference timestamp.
196 |         return possible_sn[-1][0]
197 | 
198 |     def get_time(self, seqnum):
199 |         """
200 |         Guess the wall-clock time corresponding to a sequence number.
201 | 
202 |         If no information is available, this will return None.
203 |         """
204 |         best_time = None
205 |         best_delta = None
206 |         for (start, end, base, _) in self.entries:
207 |             delta = max(start - seqnum, seqnum - end)
208 |             if best_delta is None or delta < best_delta:
209 |                 best_time = base + timedelta(milliseconds = seqnum)
210 |                 best_delta = delta
211 |         return best_time
212 | 
213 |     def resolve_gaps(self):
214 |         """
215 |         Refine the time map based on all available information.
216 | 
217 |         The wall clock may be adjusted at any time during the record;
218 |         in general, there is no way to know exactly when this happens.
219 |         When it does, two consecutive reference timestamps will
220 |         disagree; for example, we might have
221 | 
222 |           sequence number     timestamp
223 |           500000000000        2015-11-05 12:53:20.000 +00:00
224 |           500000005120        2015-11-05 12:53:27.120 +00:00
225 | 
226 |         This tells us that, at some time between those two events, the
227 |         wall clock was adjusted forward by two seconds.  If we then
228 |         see:
229 | 
230 |           (unknown)           2015-11-05 12:53:23.800 +00:00
231 | 
232 |         we can't tell whether that occurs 3.8 seconds after event #1,
233 |         or 3.32 seconds before event #2.  However, if we also see:
234 | 
235 |           (unknown)           2015-11-05 12:53:21.900 +00:00
236 | 
237 |         we can deduce that the two-second adjustment could not
238 |         possibly have occurred between events #1 and #4, nor between
239 |         events #4 and #3, and thus it must have been between events #3
240 |         and #2; so event #4 must have occurred at sequence number
241 |         500000001900, and event #3 at 500000003800.
242 | 
243 |         In ambiguous cases, our best guess is that the adjustment
244 |         occurred between the most distant pair of timestamps - if we
245 |         only have events #1-#3 above, then all we can say is that it's
246 |         more likely to have a 3.32-second interval with no events,
247 |         than to have a 3.8-second interval with no events, and thus
248 |         the clock adjustment is more likely to have occurred between
249 |         events #1 and #3.
250 |         """
251 |         p = None
252 |         new_refs = []
253 |         for n in self.entries:
254 |             if p and n[3]:
255 |                 gapstart = p[2] + timedelta(milliseconds = p[1])
256 |                 gapend = n[2] + timedelta(milliseconds = n[0])
257 |                 n[3].add(gapstart)
258 |                 n[3].add(gapend)
259 |                 best = (timedelta(0), gapstart)
260 |                 for d in _differences(sorted(n[3])):
261 |                     best = max(best, d)
262 |                 tbefore = best[1]
263 |                 tafter = best[1] + best[0]
264 |                 snp = delta_ms(tbefore, p[2])
265 |                 snn = delta_ms(tafter, n[2])
266 |                 new_refs.append((snp, tbefore))
267 |                 new_refs.append((snn, tafter))
268 |             p = n
269 |         for (seqnum, time) in new_refs:
270 |             self.set_time(seqnum, time)
271 | 
272 | def _differences(k):
273 |     i = iter(k)
274 |     try:
275 |         prev = next(i)
276 |     except StopIteration:
277 |         return
278 |     for cur in i:
279 |         yield (cur - prev, prev)
280 |         prev = cur
281 | 


--------------------------------------------------------------------------------
/downcast/shell.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # dwcsql - simple interactive frontend for the DWC SQL database
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import sys
 20 | import readline
 21 | import time
 22 | import os
 23 | import re
 24 | import locale
 25 | import ast
 26 | from argparse import ArgumentParser
 27 | from uuid import UUID
 28 | from decimal import Decimal
 29 | 
 30 | from .server import DWCDB
 31 | from .db.exceptions import ParameterCountError
 32 | 
 33 | ################################################################
 34 | 
 35 | _known_tables = [
 36 |     'External_Alert',
 37 |     'External_BedTag',
 38 |     'External_Enumeration',
 39 |     'External_EnumerationValue',
 40 |     'External_Numeric',
 41 |     'External_NumericValue',
 42 |     'External_Patient',
 43 |     'External_PatientDateAttribute',
 44 |     'External_PatientStringAttribute',
 45 |     'External_Wave',
 46 |     'External_WaveSample',
 47 |     'Pdx_PartitionDetailView',
 48 |     '_Export.AlertArchive_',
 49 |     '_Export.Alert_',
 50 |     '_Export.BedTag_',
 51 |     '_Export.Configuration_',
 52 |     '_Export.DbMaintenanceLock_',
 53 |     '_Export.EnumerationValueArchive_',
 54 |     '_Export.EnumerationValue_',
 55 |     '_Export.Enumeration_',
 56 |     '_Export.NumericValueArchive_',
 57 |     '_Export.NumericValue_',
 58 |     '_Export.Numeric_',
 59 |     '_Export.PartitionSetting_',
 60 |     '_Export.PatientDateAttribute_',
 61 |     '_Export.PatientMappingArchive_',
 62 |     '_Export.PatientMapping_',
 63 |     '_Export.PatientStringAttribute_',
 64 |     '_Export.Patient_',
 65 |     '_Export.StorageLocation_',
 66 |     '_Export.WaveSampleArchive_',
 67 |     '_Export.WaveSample_',
 68 |     '_Export.Wave_'
 69 | ]
 70 | 
 71 | _known_columns = [
 72 |     'AdmitState', 'AlertId', 'Alias', 'AnnounceTime', 'BasePhysioId',
 73 |     'BedLabel', 'CalibrationAbsLower', 'CalibrationAbsUpper',
 74 |     'CalibrationScaledLower', 'CalibrationScaledUpper',
 75 |     'CalibrationType', 'Category', 'Channel', 'ClinicalUnit', 'Code',
 76 |     'Color', 'CompoundValueId', 'EcgLeadPlacement', 'EndTime',
 77 |     'EnumerationId', 'Gender', 'Height', 'HeightUnit',
 78 |     'HighEdgeFrequency', 'Hostname', 'Id', 'InvalidSamples',
 79 |     'IsAlarmingOff', 'IsAperiodic', 'IsDerived', 'IsManual',
 80 |     'IsMapped', 'IsSilenced', 'IsSlowWave', 'IsTrendUploaded', 'Kind',
 81 |     'Label', 'LowEdgeFrequency', 'LowerLimit', 'MappingId',
 82 |     'MaxValues', 'Name', 'NumericId', 'OnsetTime', 'PacedMode',
 83 |     'PacedPulses', 'PatientId', 'PhysioId', 'PressureUnit',
 84 |     'ResuscitationStatus', 'SamplePeriod', 'Scale', 'ScaleLower',
 85 |     'ScaleUpper', 'SequenceNumber', 'Severity', 'Source', 'SubLabel',
 86 |     'SubPhysioId', 'SubtypeId', 'Tag', 'TimeStamp', 'Timestamp',
 87 |     'UnavailableSamples', 'UnitCode', 'UnitLabel', 'UpperLimit',
 88 |     'Validity', 'Value', 'ValuePhysioId', 'WaveId', 'WaveSamples',
 89 |     'Weight', 'WeightUnit'
 90 | ]
 91 | 
 92 | _known_ids = {}
 93 | 
 94 | def _get_completions(text):
 95 |     for t in _known_tables:
 96 |         if t.startswith(text):
 97 |             yield t
 98 |     for c in _known_columns:
 99 |         if c.startswith(text):
100 |             yield c
101 |     if text.startswith("'"):
102 |         prefix = text[1:3]
103 |         if prefix in _known_ids:
104 |             for z in _known_ids[prefix]:
105 |                 if z.startswith(text):
106 |                     yield z
107 | 
108 | def _add_known_uuid(val):
109 |     s = repr(str(val))
110 |     prefix = s[1:3]
111 |     if prefix not in _known_ids:
112 |         _known_ids[prefix] = set()
113 |     _known_ids[prefix].add(s)
114 | 
115 | _ctext = ''
116 | _ccompl = []
117 | 
118 | def _completer(text, state):
119 |     global _ctext, _ccompl
120 |     if text != _ctext:
121 |         _ctext = text
122 |         _ccompl = sorted(_get_completions(text))
123 |     if state < len(_ccompl):
124 |         return _ccompl[state]
125 |     else:
126 |         return None
127 | 
128 | ################################################################
129 | 
130 | _uuid_pattern = re.compile('\A[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12}\Z',
131 |                            re.ASCII | re.IGNORECASE)
132 | 
133 | if sys.stdout.isatty() and os.environ.get('TERM', 'dumb') != 'dumb':
134 |     _vcolor = ['\033[0m'] + ['\033[%dm' % i for i in range(31, 37)]
135 |     _hcolor = ['\033[0;1m'] + ['\033[%d;1m' % i for i in range(31, 37)]
136 |     _color0 = '\033[0m'
137 | else:
138 |     _vcolor = _hcolor = ['']
139 |     _color0 = ''
140 | 
141 | _max_align_width = 64
142 | _align_group_size = 20
143 | 
144 | def _format_value(val, desc=None):
145 |     if isinstance(val, bool):
146 |         return repr(val)
147 |     elif isinstance(val, Decimal) or isinstance(val, int):
148 |         if desc == 'Color' and val < 0 and val >= -16777216:
149 |             return '#%06x' % (val + 16777216)
150 |         return '{:n}'.format(val)
151 |     elif isinstance(val, UUID):
152 |         _add_known_uuid(val)
153 |         return repr(str(val))
154 |     elif isinstance(val, str):
155 |         if _uuid_pattern.match(val):
156 |             _add_known_uuid(UUID(val))
157 |         return repr(val)
158 |     else:
159 |         return repr(val)
160 | 
161 | def _value_alignment(val):
162 |     return (isinstance(val, str)
163 |             or isinstance(val, bytes)
164 |             or isinstance(val, UUID))
165 | 
166 | def _pad(text, width, leftalign):
167 |     if leftalign:
168 |         return text.ljust(width)
169 |     else:
170 |         return text.rjust(width)
171 | 
172 | def _show_results(cur, colinfo, results, setindex, transpose=False):
173 |     headers = (len(colinfo) == 0)
174 |     headerwidth = 0
175 |     if headers:
176 |         for desc in cur.description:
177 |             if transpose:
178 |                 colinfo.append([0, None, desc[0]])
179 |                 headerwidth = max(headerwidth, len(desc[0]))
180 |             else:
181 |                 colinfo.append([len(desc[0]), None, desc[0]])
182 |     table = []
183 |     for row in results:
184 |         while len(colinfo) < len(row):
185 |             colinfo.append([0, None, ''])
186 |         tabrow = []
187 |         for (i, value) in enumerate(row):
188 |             text = _format_value(value, colinfo[i][2])
189 |             width = len(text)
190 |             if width < _max_align_width:
191 |                 colinfo[i][0] = max(colinfo[i][0], width)
192 |             if value is not None and colinfo[i][1] is None:
193 |                 colinfo[i][1] = _value_alignment(value)
194 |             tabrow.append(text)
195 |         table.append(tabrow)
196 | 
197 |     if transpose:
198 |         cellwidth = max(ci[0] for ci in colinfo)
199 |         for (i, (_, leftalign, label)) in enumerate(colinfo):
200 |             sys.stdout.write(_hcolor[(i + setindex) % len(_hcolor)])
201 |             sys.stdout.write(_pad(label, headerwidth, True))
202 |             sys.stdout.write(_color0)
203 |             sys.stdout.write(_vcolor[(i + setindex) % len(_vcolor)])
204 |             for tabrow in table:
205 |                 try:
206 |                     text = tabrow[i]
207 |                 except IndexError:
208 |                     text = ''
209 |                 sys.stdout.write(' ')
210 |                 sys.stdout.write(_pad(text, cellwidth, leftalign))
211 |             sys.stdout.write(_color0 + '\n')
212 |     else:
213 |         if headers:
214 |             for (i, (width, leftalign, label)) in enumerate(colinfo):
215 |                 if i > 0:
216 |                     sys.stdout.write(' ')
217 |                 sys.stdout.write(_hcolor[(i + setindex) % len(_hcolor)])
218 |                 sys.stdout.write(_pad(label, width, leftalign))
219 |             sys.stdout.write(_color0 + '\n')
220 |         for tabrow in table:
221 |             for (i, text) in enumerate(tabrow):
222 |                 if i > 0:
223 |                     sys.stdout.write(' ')
224 |                 sys.stdout.write(_vcolor[(i + setindex) % len(_vcolor)])
225 |                 (width, leftalign, _) = colinfo[i]
226 |                 sys.stdout.write(_pad(text, width, leftalign))
227 |             sys.stdout.write(_color0 + '\n')
228 | 
229 | def _run_query(conn, query, params):
230 |     if query == '':
231 |         return
232 |     if re.match(r'@transpose\s', query):
233 |         transpose = True
234 |         query = query[len('@transpose'):]
235 |     else:
236 |         transpose = False
237 |     with conn.cursor() as cur:
238 |         begin = time.monotonic()
239 |         cur.execute(query, params)
240 | 
241 |         more_results = True
242 |         setindex = 0
243 |         while more_results:
244 |             colinfo = []
245 |             headers = True
246 |             results = []
247 |             row = cur.fetchone()
248 |             while row is not None:
249 |                 results.append(row)
250 |                 if len(results) >= _align_group_size:
251 |                     _show_results(cur, colinfo, results, setindex, transpose)
252 |                     results = []
253 |                 row = cur.fetchone()
254 |             _show_results(cur, colinfo, results, setindex, transpose)
255 |             more_results = cur.nextset()
256 |             setindex += 1
257 |             if more_results:
258 |                 print()
259 | 
260 |         end = time.monotonic()
261 |         print('(%d rows; %.3f seconds)' % (cur.rowcount, end - begin))
262 |         print()
263 | 
264 | ################################################################
265 | 
266 | def main():
267 |     locale.setlocale(locale.LC_ALL, '')
268 | 
269 |     p = ArgumentParser()
270 |     p.add_argument('--server', metavar = 'NAME', default = 'demo')
271 |     p.add_argument('--password-file', metavar = 'FILE',
272 |                    default = 'server.conf')
273 |     opts = p.parse_args()
274 | 
275 |     DWCDB.load_config(opts.password_file)
276 | 
277 |     db = DWCDB(opts.server)
278 |     conn = None
279 | 
280 |     readline.set_completer_delims(' \t\n()[]=<>-+*?,')
281 |     readline.parse_and_bind('tab: complete')
282 |     readline.set_completer(_completer)
283 | 
284 |     histfile = os.environ.get('DWCSQL_HISTFILE', None)
285 |     if histfile is not None:
286 |         try:
287 |             readline.read_history_file(histfile)
288 |         except Exception:
289 |             pass
290 |     readline.set_history_length(1000)
291 | 
292 |     try:
293 |         while True:
294 |             try:
295 |                 line = input(opts.server + '> ')
296 |                 query = line
297 |                 while line != '' and not query.endswith(';'):
298 |                     line = input(' ' * len(opts.server) + '> ')
299 |                     query += '\n' + line
300 |                 params = []
301 |                 if conn is None:
302 |                     conn = db.connect()
303 |                 while True:
304 |                     try:
305 |                         _run_query(conn, query, params)
306 |                         break
307 |                     except ParameterCountError as e:
308 |                         pprompt = (e.context or '?') + ' '
309 |                         pass
310 |                     except:
311 |                         conn.close()
312 |                         conn = None
313 |                         raise
314 |                     line = input(pprompt)
315 |                     params.append(ast.literal_eval(line.strip()))
316 |             except KeyboardInterrupt:
317 |                 print(_color0)
318 |             except EOFError:
319 |                 print()
320 |                 return
321 |             except Exception as e:
322 |                 # nasty hack to extract the human-readable message from a
323 |                 # pymssql exception... is there a proper way to do this?
324 |                 if (hasattr(e, 'args') and isinstance(e.args, tuple)
325 |                         and len(e.args) == 2 and isinstance(e.args[1], bytes)):
326 |                     msg = e.args[1].decode('UTF-8', errors = 'replace')
327 |                 else:
328 |                     msg = str(e)
329 |                 print('%s%s:\n%s\n' % (_color0, type(e).__name__, msg))
330 |     finally:
331 |         if histfile is not None:
332 |             try:
333 |                 readline.write_history_file(histfile)
334 |             except Exception:
335 |                 pass
336 | 


--------------------------------------------------------------------------------
/downcast/db/dwcbcp.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2018 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | import os
 20 | import re
 21 | 
 22 | from .bcp import *
 23 | 
 24 | # Sorting order for each table
 25 | 
 26 | _table_order_column = {
 27 |     '_Export.Alert_':                  'TimeStamp',
 28 |     '_Export.BedTag_':                 'Timestamp',
 29 |     '_Export.Enumeration_':            'Id',
 30 |     '_Export.EnumerationValue_':       'TimeStamp',
 31 |     '_Export.Numeric_':                'Id',
 32 |     '_Export.NumericValue_':           'TimeStamp',
 33 |     '_Export.Patient_':                'Timestamp',
 34 |     '_Export.PatientDateAttribute_':   'Timestamp',
 35 |     '_Export.PatientStringAttribute_': 'Timestamp',
 36 |     '_Export.PatientMapping_':         'Timestamp',
 37 |     '_Export.Wave_':                   'Id',
 38 |     '_Export.WaveSample_':             'TimeStamp'
 39 | }
 40 | 
 41 | # Index keys for each table
 42 | 
 43 | _table_id_columns = {
 44 |     '_Export.PatientMapping_': ['Id']
 45 | }
 46 | 
 47 | # Regular expression to identify start of a row
 48 | 
 49 | _table_sync_pattern = {
 50 |     '_Export.Alert_':                  b'\n().',
 51 |     '_Export.BedTag_':                 b'\n().',
 52 |     '_Export.Enumeration_':            b'\n().',
 53 |     '_Export.EnumerationValue_':       b'\n().',
 54 |     '_Export.Numeric_':                b'\n().',
 55 |     '_Export.NumericValue_':           b'\n().',
 56 |     '_Export.Patient_':                b'\n().',
 57 |     '_Export.PatientDateAttribute_':   b'\n().',
 58 |     '_Export.PatientStringAttribute_': b'\n().',
 59 |     '_Export.PatientMapping_':         b'\n().',
 60 |     '_Export.Wave_':                   b'\n().',
 61 |     '_Export.WaveSample_': b'''(?x)
 62 |         # UnavailableSamples
 63 |         [ 0-9\0]* [\t]
 64 |         # InvalidSamples
 65 |         [ 0-9\0]* [\t]
 66 |         # PacedPulses
 67 |         [ 0-9\0]* [\t]
 68 |         # MappingId
 69 |         [0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12} [\n]
 70 |         ()
 71 |         # WaveId
 72 |         \d+ [\t]
 73 |         # TimeStamp
 74 |         \d{4}-\d{2}-\d{2} [ ] \d{2}:\d{2}:\d{2}\.\d+ [ ] [-+]\d{2}:\d{2} [\t]
 75 |         # SequenceNumber
 76 |         \d+ [\t]
 77 |     '''
 78 | }
 79 | 
 80 | # List of columns and types
 81 | 
 82 | _table_columns = {
 83 |     '_Export.Alert_': {
 84 |         'TimeStamp':               DATETIME,
 85 |         'SequenceNumber':          INTEGER,
 86 |         'AlertId':                 UUID,
 87 |         'Source':                  INTEGER,
 88 |         'Code':                    INTEGER,
 89 |         'Label':                   STRING,
 90 |         'Severity':                INTEGER,
 91 |         'Kind':                    INTEGER,
 92 |         'IsSilenced':              BOOLEAN,
 93 |         'SubtypeId':               INTEGER,
 94 |         'AnnounceTime':            DATETIME,
 95 |         'OnsetTime':               DATETIME,
 96 |         'EndTime':                 DATETIME,
 97 |         'MappingId':               UUID,
 98 |     },
 99 |     '_Export.BedTag_': {
100 |         'BedLabel':                STRING,
101 |         'Timestamp':               DATETIME,
102 |         'Tag':                     STRING,
103 |     },
104 |     '_Export.Enumeration_': {
105 |         'Id':                      INTEGER,
106 |         'BasePhysioId':            INTEGER,
107 |         'PhysioId':                INTEGER,
108 |         'Label':                   STRING,
109 |         'ValuePhysioId':           INTEGER,
110 |         'IsAperiodic':             BOOLEAN,
111 |         'IsManual':                BOOLEAN,
112 |         'Validity':                INTEGER,
113 |         'UnitCode':                INTEGER,
114 |         'UnitLabel':               STRING,
115 |         'Color':                   INTEGER,
116 |     },
117 |     '_Export.EnumerationValue_': {
118 |         'EnumerationId':           INTEGER,
119 |         'TimeStamp':               DATETIME,
120 |         'SequenceNumber':          INTEGER,
121 |         'CompoundValueId':         UUID,
122 |         'Value':                   STRING,
123 |         'MappingId':               UUID,
124 |     },
125 |     '_Export.Numeric_': {
126 |         'Id':                      INTEGER,
127 |         'BasePhysioId':            INTEGER,
128 |         'PhysioId':                INTEGER,
129 |         'Label':                   STRING,
130 |         'IsAperiodic':             BOOLEAN,
131 |         'UnitLabel':               STRING,
132 |         'Validity':                INTEGER,
133 |         'LowerLimit':              NUMBER,
134 |         'UpperLimit':              NUMBER,
135 |         'IsAlarmingOff':           BOOLEAN,
136 |         'SubPhysioId':             INTEGER,
137 |         'SubLabel':                STRING,
138 |         'Color':                   INTEGER,
139 |         'IsManual':                BOOLEAN,
140 |         'MaxValues':               INTEGER,
141 |         'Scale':                   INTEGER,
142 |     },
143 |     '_Export.NumericValue_': {
144 |         'NumericId':               INTEGER,
145 |         'TimeStamp':               DATETIME,
146 |         'SequenceNumber':          INTEGER,
147 |         'IsTrendUploaded':         BOOLEAN,
148 |         'CompoundValueId':         UUID,
149 |         'Value':                   NUMBER,
150 |         'MappingId':               UUID,
151 |     },
152 |     '_Export.Patient_': {
153 |         'Id':                      UUID,
154 |         'Timestamp':               DATETIME,
155 |         'BedLabel':                STRING,
156 |         'Alias':                   STRING,
157 |         'Category':                INTEGER,
158 |         'Height':                  NUMBER,
159 |         'HeightUnit':              INTEGER,
160 |         'Weight':                  NUMBER,
161 |         'WeightUnit':              INTEGER,
162 |         'PressureUnit':            INTEGER,
163 |         'PacedMode':               INTEGER,
164 |         'ResuscitationStatus':     INTEGER,
165 |         'AdmitState':              INTEGER,
166 |         'ClinicalUnit':            STRING,
167 |         'Gender':                  INTEGER,
168 |     },
169 |     '_Export.PatientDateAttribute_': {
170 |         'PatientId':               UUID,
171 |         'Timestamp':               DATETIME,
172 |         'Name':                    STRING,
173 |         'Value':                   STRING, # actually a date but who cares
174 |     },
175 |     '_Export.PatientStringAttribute_': {
176 |         'PatientId':               UUID,
177 |         'Timestamp':               DATETIME,
178 |         'Name':                    STRING,
179 |         'Value':                   STRING,
180 |     },
181 |     '_Export.PatientMapping_': {
182 |         'Id':                      UUID,
183 |         'PatientId':               UUID,
184 |         'Timestamp':               DATETIME,
185 |         'IsMapped':                BOOLEAN,
186 |         'Hostname':                STRING,
187 |     },
188 |     '_Export.Wave_': {
189 |         'Id':                      INTEGER,
190 |         'BasePhysioId':            INTEGER,
191 |         'PhysioId':                INTEGER,
192 |         'Label':                   STRING,
193 |         'Channel':                 INTEGER,
194 |         'SamplePeriod':            INTEGER,
195 |         'IsSlowWave':              BOOLEAN,
196 |         'IsDerived':               BOOLEAN,
197 |         'Color':                   INTEGER,
198 |         'LowEdgeFrequency':        NUMBER,
199 |         'HighEdgeFrequency':       NUMBER,
200 |         'ScaleLower':              INTEGER,
201 |         'ScaleUpper':              INTEGER,
202 |         'CalibrationScaledLower':  INTEGER,
203 |         'CalibrationScaledUpper':  INTEGER,
204 |         'CalibrationAbsLower':     NUMBER,
205 |         'CalibrationAbsUpper':     NUMBER,
206 |         'CalibrationType':         INTEGER,
207 |         'UnitLabel':               STRING,
208 |         'UnitCode':                INTEGER,
209 |         'EcgLeadPlacement':        INTEGER,
210 |     },
211 |     '_Export.WaveSample_': {
212 |         'WaveId':                  INTEGER,
213 |         'TimeStamp':               DATETIME,
214 |         'SequenceNumber':          INTEGER,
215 |         'WaveSamples':             BINARY,
216 |         'UnavailableSamples':      STRING,
217 |         'InvalidSamples':          STRING,
218 |         'PacedPulses':             STRING,
219 |         'MappingId':               UUID,
220 |     }
221 | }
222 | 
223 | class DWCBCPConnection(BCPConnection):
224 |     def __init__(self, datadirs):
225 |         BCPConnection.__init__(self)
226 |         for d in datadirs:
227 |             self.add_data_dir(d)
228 | 
229 |     def add_data_dir(self, dirname):
230 |         """
231 |         Import a directory of data files into the database.
232 | 
233 |         An example data directory might contain the following:
234 | 
235 |             Alert.20010101_20010102
236 |             Alert.fmt
237 |             BedTag.20010101_20010102
238 |             BedTag.fmt
239 |             Enumeration
240 |             Enumeration.fmt
241 |             EnumerationValue.20010101_20010102
242 |             EnumerationValue.fmt
243 |             Numeric
244 |             Numeric.fmt
245 |             NumericValue.20010101_20010102
246 |             NumericValue.fmt
247 |             Patient.20010101_20010102
248 |             Patient.fmt
249 |             PatientDateAttribute.20010101_20010102
250 |             PatientDateAttribute.fmt
251 |             PatientMapping.20010101_20010102
252 |             PatientMapping.fmt
253 |             PatientStringAttribute.20010101_20010102
254 |             PatientStringAttribute.fmt
255 |             Wave
256 |             Wave.fmt
257 |             WaveSample.20010101_20010102
258 |             WaveSample.fmt
259 | 
260 |         For example, 'Alert.20010101_20010102' contains Alert data
261 |         between those two dates, and 'Alert.fmt' is a freebcp format
262 |         file describing the format of 'Alert.20010101_20010102'.
263 | 
264 |         The 'Enumeration', 'Numeric', and 'Wave' tables are not
265 |         specific to the time period.  For those tables, the most
266 |         recently imported file replaces any previous files.
267 | 
268 |         For the other tables, all data files are concatenated in the
269 |         order that they are imported.  All of these files must be
270 |         sorted by timestamp, and must not overlap.
271 |         """
272 | 
273 |         meta_pat = re.compile('\A(?:Enumeration|Numeric|Wave)(?:\.dat)?\Z')
274 |         data_pat = re.compile('\.(?:dat|[0-9]+_[0-9]+)\Z')
275 |         for f in sorted(os.listdir(dirname)):
276 |             path = os.path.join(dirname, f)
277 |             base = f.split('.')[0]
278 |             table = '_Export.%s_' % base
279 |             fmtpath = os.path.join(dirname, base + '.fmt')
280 |             if meta_pat.search(f):
281 |                 self.add_data_file(table, path, fmtpath, True)
282 |             elif data_pat.search(f):
283 |                 self.add_data_file(table, path, fmtpath, False)
284 | 
285 |     def add_data_file(self, table, data_file, format_file, replace = False):
286 |         """
287 |         Import a file into the database.
288 | 
289 |         table is the name of the table, such as '_Export.Alert_'.
290 | 
291 |         data_file is the name of the raw data file; format_file is the
292 |         name of the corresponding freebcp format file.  (Note that
293 |         only a very small subset of the possible freebcp formats are
294 |         supported.)
295 | 
296 |         If replace is true, the new data file replaces all previously
297 |         imported data; otherwise, it is concatenated onto the end of
298 |         the preceding files.
299 |         """
300 | 
301 |         tbl = self.add_table(table)
302 |         tbl.set_sync_pattern(_table_sync_pattern[table])
303 |         tbl.set_order(_table_order_column[table])
304 |         for (col, dtype) in _table_columns[table].items():
305 |             tbl.add_column(col, dtype)
306 |         for col in _table_id_columns.get(table, []):
307 |             tbl.add_unique_id(col)
308 |         if replace:
309 |             tbl.clear()
310 |         tbl.add_data_file(data_file, format_file)
311 | 
312 | #### DB-API ####
313 | 
314 | def connect(datadirs):
315 |     return DWCBCPConnection(datadirs)
316 | 


--------------------------------------------------------------------------------
/downcast/subprocess.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2017 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from enum import Enum
 20 | from multiprocessing import Process, Pipe, current_process
 21 | import atexit
 22 | import traceback
 23 | import logging
 24 | import cProfile
 25 | import os
 26 | import sys
 27 | 
 28 | from .dispatcher import Dispatcher
 29 | from .util import setproctitle
 30 | 
 31 | class ParallelDispatcher:
 32 |     """Object that routes messages to a set of child processes.
 33 | 
 34 |     When a message is sent to this dispatcher, it is forwarded to one
 35 |     of N child processes, selected based on the message channel.  All
 36 |     messages in the same channel will be delivered to the same
 37 |     process, but no other guarantees are made about how different
 38 |     messages will be routed.  Thus, all related messages must be sent
 39 |     to the same channel.
 40 | 
 41 |     Apart from distributing the workload, and operating
 42 |     asynchronously, this class's API is largely compatible with the
 43 |     API of the Dispatcher class.
 44 |     """
 45 | 
 46 |     def __init__(self, n_children, pending_limit = 200, **kwargs):
 47 |         self.n_children = n_children
 48 |         self.pending_limit = pending_limit
 49 |         self.children = None
 50 |         self.dispatcher = Dispatcher(**kwargs)
 51 |         sys.excepthook = _handle_fatal_exception
 52 | 
 53 |     def add_handler(self, handler):
 54 |         """Add a message handler.
 55 | 
 56 |         All handlers must be attached before the child processes are
 57 |         launched; i.e., before sending any messages.
 58 |         """
 59 |         if self.children is not None:
 60 |             raise Exception('cannot add handlers after sending messages')
 61 |         self.dispatcher.add_handler(handler)
 62 | 
 63 |     def add_dead_letter_handler(self, handler):
 64 |         """Add a dead-letter handler.
 65 | 
 66 |         All handlers must be attached before the child processes are
 67 |         launched; i.e., before sending any messages.
 68 |         """
 69 |         if self.children is not None:
 70 |             raise Exception('cannot add handlers after sending messages')
 71 |         self.dispatcher.add_dead_letter_handler(handler)
 72 | 
 73 |     def _start(self):
 74 |         if self.children is None:
 75 |             self.children = []
 76 |             for i in range(0, self.n_children):
 77 |                 c = ChildConnector(self.dispatcher,
 78 |                                    pending_limit = self.pending_limit,
 79 |                                    name = ('handler%d' % i))
 80 |                 self.children.append(c)
 81 |             atexit.register(self.shutdown)
 82 | 
 83 |     def shutdown(self):
 84 |         """Stop all worker processes and wait for them to exit.
 85 | 
 86 |         Typically flush should be called first.
 87 |         """
 88 |         if self.children is not None:
 89 |             atexit.unregister(self.shutdown)
 90 |             for c in self.children:
 91 |                 c.close()
 92 |             self.children = None
 93 | 
 94 |     def send_message(self, channel, message, source, ttl):
 95 |         """Submit a new message.
 96 | 
 97 |         Note that message acknowledgements, as well as exceptions,
 98 |         will be reported asynchronously.  In particular, if this
 99 |         function raises an exception, it may actually be the result of
100 |         some earlier message.
101 |         """
102 |         self._start()
103 |         k = hash(channel) % self.n_children
104 |         self.children[k].send_message(channel, message, source, ttl)
105 | 
106 |     def flush(self):
107 |         """Flush pending output to disk.
108 | 
109 |         Any pending acknowledgements or exceptions will be processed
110 |         before flushing.  If this function raises an exception, it may
111 |         actually be the result of some earlier message.
112 |         """
113 |         self._start()
114 |         for c in self.children:
115 |             c.flush_begin()
116 |         for c in self.children:
117 |             c.flush_end()
118 | 
119 |     def terminate(self):
120 |         """Force expiration of all pending messages."""
121 |         self._start()
122 |         for c in self.children:
123 |             c.terminate()
124 | 
125 | class ChildConnector:
126 |     """Object that routes messages to a child process."""
127 | 
128 |     _all_pipes = set()
129 | 
130 |     def __init__(self, handler, pending_limit = 50, name = None):
131 |         self.pending_limit = pending_limit
132 |         self.pending_count = pending_limit
133 |         self.messages = {}
134 |         self.message_id = 0
135 | 
136 |         (parent_pipe, child_pipe) = Pipe()
137 |         ChildConnector._all_pipes.add(parent_pipe)
138 |         self.child = ChildContext(handler)
139 |         self.process = Process(target = self.child._main,
140 |                                args = (name, child_pipe),
141 |                                name = name)
142 |         self.process.start()
143 |         self.parent_pipe = parent_pipe
144 |         child_pipe.close()
145 | 
146 |     def close(self):
147 |         """Shut down the child process."""
148 |         try:
149 |             if self.pending_count != self.pending_limit:
150 |                 try:
151 |                     self._sync_response()
152 |                 except Exception:
153 |                     logging.exception('Unhandled exception in child process')
154 |             self.parent_pipe.send(ChildRequest.EXIT)
155 |         finally:
156 |             self.parent_pipe.close()
157 |             ChildConnector._all_pipes.discard(self.parent_pipe)
158 |             self.process.join()
159 | 
160 |     def send_message(self, channel, message, source, ttl):
161 |         """Send a message to the child process."""
162 |         if ttl <= 0:
163 |             self._async_message(channel, message, source, ttl)
164 |             self._sync_response()
165 |         else:
166 |             source.nack_message(channel, message, self)
167 |             self._async_message(channel, message, source, ttl)
168 | 
169 |     def flush_begin(self):
170 |         """Instruct the child process to flush output to disk."""
171 |         self._async_request(ChildRequest.FLUSH)
172 | 
173 |     def flush_end(self):
174 |         """Wait for the child process to finish flushing output."""
175 |         self._sync_response()
176 | 
177 |     def terminate(self):
178 |         """Force expiration of all pending messages."""
179 |         self._async_request(ChildRequest.TERMINATE)
180 | 
181 |     def _async_message(self, channel, message, source, ttl):
182 |         self.message_id += 1
183 |         msgid = self.message_id
184 |         self.messages[msgid] = (channel, message, source)
185 |         self._async_request((msgid, channel, message, ttl))
186 | 
187 |     def _async_request(self, request):
188 |         if self.pending_count <= 0:
189 |             self._sync_response()
190 |         self.parent_pipe.send(request)
191 |         self.pending_count -= 1
192 | 
193 |     def _sync_response(self):
194 |         self.parent_pipe.send(ChildRequest.SYNC_RESPONSE)
195 |         (acks, exc, exc_msg) = self.parent_pipe.recv()
196 |         for ackid in acks:
197 |             m = self.messages.pop(ackid, None)
198 |             if m is None:
199 |                 logging.warning('ack for an unknown message')
200 |             else:
201 |                 (channel, message, source) = m
202 |                 source.ack_message(channel, message, self)
203 |         if exc is not None:
204 |             if isinstance(exc, BorkedPickleException):
205 |                 m = self.messages.get(exc.last_seen_msgid + 1, (None, None))
206 |                 desc = ('Failed to send/receive a message;' +
207 |                         ' pending channel=%r, message=%r') % (m[0], m[1])
208 |                 exc = TypeError(desc)
209 |             raise exc from Exception(exc_msg)
210 |         self.pending_count = self.pending_limit
211 | 
212 | class ChildContext:
213 |     def __init__(self, handler):
214 |         self.handler = handler
215 |         self.message_ids = {}
216 |         self.acks = []
217 |         self.pipe = None
218 | 
219 |     def _main(self, name, child_pipe):
220 |         try:
221 |             # Close all of the parent-side pipes that were created
222 |             # previously (and inherited by the child process.)
223 |             # Unfortunately we can't simply close all file
224 |             # descriptors, or even all 'non-inheritable' file
225 |             # descriptors, as that breaks pymssql.
226 |             for p in ChildConnector._all_pipes:
227 |                 p.close()
228 |             ChildConnector._all_pipes = set()
229 | 
230 |             if name is not None:
231 |                 setproctitle('downcast:%s' % (name,))
232 | 
233 |             self.pipe = child_pipe
234 |             pf = os.environ.get('DOWNCAST_PROFILE_OUT', None)
235 |             if pf is not None and name is not None:
236 |                 pf = '%s.%s' % (pf, name)
237 |                 cProfile.runctx('self._main1()', globals(), locals(), pf)
238 |             else:
239 |                 self._main1()
240 |         except:
241 |             _handle_fatal_exception(*sys.exc_info())
242 |             sys.exit(1)
243 | 
244 |     def _main1(self):
245 |         try:
246 |             msgid = 0
247 |             while True:
248 |                 try:
249 |                     req = self.pipe.recv()
250 |                 except EOFError:
251 |                     return
252 |                 except (OSError, MemoryError):
253 |                     raise
254 |                 except Exception as e:
255 |                     # We assume that all other exceptions that occur
256 |                     # here result from an error in the process of
257 |                     # unpickling the message (or, potentially, the
258 |                     # channel.)  Such exceptions can occur even
259 |                     # without raising an exception on the sender side,
260 |                     # and the resulting error message is generally
261 |                     # unhelpful in the extreme.  Thus, we send back an
262 |                     # exception that indicates the *last message ID
263 |                     # that we were able to decode*; the sender, upon
264 |                     # receiving such an exception, can identify the
265 |                     # message that was (probably) the cause of the
266 |                     # exception.
267 |                     #
268 |                     # (We assume that there are never problems with
269 |                     # pickling/unpickling ChildRequests, nor 'msgid'
270 |                     # or 'ttl' values.)
271 |                     raise BorkedPickleException(msgid) from e
272 | 
273 |                 if isinstance(req, tuple):
274 |                     (msgid, channel, message, ttl) = req
275 |                     self.message_ids[channel, message] = msgid
276 |                     self.handler.send_message(channel, message, self, ttl)
277 |                 elif req is ChildRequest.SYNC_RESPONSE:
278 |                     resp = (self.acks, None, None)
279 |                     self.acks = []
280 |                     self.pipe.send(resp)
281 |                     counter = 0
282 |                 elif req is ChildRequest.FLUSH:
283 |                     self.handler.flush()
284 |                 elif req is ChildRequest.TERMINATE:
285 |                     self.handler.terminate()
286 |                 elif req is ChildRequest.EXIT:
287 |                     return
288 |         except Exception as exc:
289 |             exc_msg = traceback.format_exc()
290 |             while True:
291 |                 try:
292 |                     req = self.pipe.recv()
293 |                 except EOFError:
294 |                     return
295 |                 except (OSError, MemoryError):
296 |                     raise
297 |                 except Exception:
298 |                     req = None
299 |                 if req is ChildRequest.SYNC_RESPONSE:
300 |                     resp = (self.acks, exc, exc_msg)
301 |                     self.acks = []
302 |                     self.pipe.send(resp)
303 |                 elif req is ChildRequest.EXIT:
304 |                     return
305 | 
306 |     def nack_message(self, channel, message, handler):
307 |         """Defer processing of a message."""
308 |         pass
309 | 
310 |     def ack_message(self, channel, message, handler):
311 |         """Acknowledge a message."""
312 |         msgid = self.message_ids.pop((channel, message), None)
313 |         if msgid is None:
314 |             logging.warning('ack for an unknown message')
315 |         else:
316 |             self.acks.append(msgid)
317 | 
318 | 
319 | def _handle_fatal_exception(exc_type, exc_val, exc_tb):
320 |     if exc_type is not SystemExit:
321 |         hdr = '-------- %s --------\n' % current_process().name
322 |         msg = traceback.format_exception(exc_type, exc_val, exc_tb)
323 |         m = (hdr + ''.join(msg) + '\n').encode(sys.stderr.encoding,
324 |                                                errors = 'replace')
325 |         sys.stderr.flush()
326 |         os.write(sys.stderr.fileno(), m)
327 | 
328 | class ChildRequest(Enum):
329 |     SYNC_RESPONSE = 0
330 |     FLUSH = 1
331 |     TERMINATE = 2
332 |     EXIT = 3
333 | 
334 | class BorkedPickleException(Exception):
335 |     def __init__(self, last_seen_msgid):
336 |         self.last_seen_msgid = last_seen_msgid
337 | 


--------------------------------------------------------------------------------
/downcast/messages.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # downcast - tools for unpacking patient data from DWC
  3 | #
  4 | # Copyright (c) 2017 Laboratory for Computational Physiology
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful,
 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | # GNU General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 18 | 
 19 | from collections import namedtuple
 20 | import struct
 21 | import uuid
 22 | 
 23 | ################################################################
 24 | 
 25 | # _Export.WaveSample_
 26 | WaveSampleMessage = namedtuple('WaveSampleMessage', (
 27 |     # The original data source (required for looking up wave_ids.)
 28 |     'origin',
 29 | 
 30 |     # An opaque identifier (probably a small integer) for the waveform
 31 |     # attributes.  I am hoping that those attributes are immutable
 32 |     # (e.g. same signal with different gain/baseline will use a
 33 |     # different ID.)  Underlying type is 'bigint'.
 34 |     'wave_id',
 35 | 
 36 |     # A timestamp (probably from DWC or SQL.)
 37 |     'timestamp',
 38 | 
 39 |     # Apparently a uniform counter (i.e., runs continuously, never
 40 |     # adjusted forward or backward) of Philips milliseconds.
 41 |     'sequence_number',
 42 | 
 43 |     # Byte array encoding wave samples as 16-bit little-endian
 44 |     # unsigned integers.  Note that users should probably assume that
 45 |     # indices corresponding to 'unavailable_samples' or
 46 |     # 'invalid_samples' contain garbage and should be ignored.
 47 |     'wave_samples',
 48 | 
 49 |     # String describing the intervals within 'wave_samples' that are
 50 |     # considered "unavailable".  Should be a list of ASCII decimal
 51 |     # numbers separated by spaces; each pair of numbers indicates the
 52 |     # start and end of an "unavailable" interval.
 53 |     'unavailable_samples',
 54 | 
 55 |     # String describing the intervals within 'wave_samples' that are
 56 |     # considered "invalid".  Should be a list of ASCII decimal numbers
 57 |     # separated by spaces; each pair of numbers indicates the start
 58 |     # and end of an "invalid" interval.  Indices start at zero and the
 59 |     # range is inclusive (e.g. "0 9" would indicate the first ten
 60 |     # samples.)
 61 |     'invalid_samples',
 62 | 
 63 |     # String (list of ASCII decimal numbers separated by spaces)
 64 |     # giving the relative sample numbers at which pacemaker pulses
 65 |     # occurred.
 66 |     'paced_pulses',
 67 | 
 68 |     # Should correspond to 'mapping_id' in PatientMappingMessage.
 69 |     'mapping_id'))
 70 | 
 71 | ################################################################)
 72 | 
 73 | # _Export.Alert_
 74 | AlertMessage = namedtuple('AlertMessage', (
 75 |     # The original data source.
 76 |     'origin',
 77 | 
 78 |     # A timestamp (probably from DWC or SQL.)
 79 |     'timestamp',
 80 | 
 81 |     # Sequence number.  Corresponds to what?
 82 |     'sequence_number',
 83 | 
 84 |     # An opaque identifier (probably a GUID) for the particular alarm.
 85 |     'alert_id',
 86 | 
 87 |     # Magic number for the "source" of the alarm.  See
 88 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 89 |     # (Parameters? or Calculations?).  Underlying type is 'bigint'.
 90 |     'source',
 91 | 
 92 |     # Magic number for the "code" of the alarm.  See
 93 |     # System_Parameter-Alerts_Table_Ed_2_-_PIIC_iX_Rel_B.00.xlsx
 94 |     # (Alarm-Code-Ids).  Underlying type is 'integer'.
 95 |     'code',
 96 | 
 97 |     # Alarm message.
 98 |     'label',
 99 | 
100 |     # Magic number for the "severity" of the alarm.
101 |     'severity',
102 | 
103 |     # Magic number for the category of the alarm.
104 |     'kind',
105 | 
106 |     # Indicates that alarm has been silenced (?)
107 |     'is_silenced',
108 | 
109 |     # Undocumented magic number.  Underlying type is 'bigint'.
110 |     'subtype_id',
111 | 
112 |     # Time that the alarm is reported? (probably from monitor)
113 |     'announce_time',
114 | 
115 |     # Time that the triggering condition begins? (probably from monitor)
116 |     # If the time is unknown, this will be something absurd like 0001-01-01.
117 |     'onset_time',
118 | 
119 |     # Time that ??? ends (probably from monitor)
120 |     # If the alarm has not yet ended, this will be something absurd
121 |     # like 0001-01-01.
122 |     'end_time',
123 | 
124 |     # Should correspond to 'mapping_id' in PatientMappingMessage.
125 |     'mapping_id'))
126 | 
127 | ################################################################)
128 | 
129 | # _Export.EnumerationValue_
130 | EnumerationValueMessage = namedtuple('EnumerationValueMessage', (
131 |     # The original data source (required for looking up
132 |     # enumeration_ids.)
133 |     'origin',
134 | 
135 |     # An opaque identifier (probably a small integer) for the
136 |     # observation attributes.  I am hoping that those attributes are
137 |     # immutable.  Underlying type is 'bigint'.
138 |     'enumeration_id',
139 | 
140 |     # A timestamp (probably from DWC or SQL.)
141 |     'timestamp',
142 | 
143 |     # Sequence number when the observation was made.
144 |     'sequence_number',
145 | 
146 |     # An opaque identifier (probably a GUID) for a set of
147 |     # simultaneous, related observations (???)
148 |     'compound_value_id',
149 | 
150 |     # Value, such as a beat label or description of rhythm.
151 |     'value',
152 | 
153 |     # Should correspond to 'mapping_id' in PatientMappingMessage.
154 |     'mapping_id'))
155 | 
156 | ################################################################
157 | 
158 | # _Export.NumericValue_
159 | NumericValueMessage = namedtuple('NumericValueMessage', (
160 |     # The original data source (required for looking up
161 |     # numeric_ids.)
162 |     'origin',
163 | 
164 |     # An opaque identifier (probably a small integer) for the
165 |     # measurement attributes.  I am hoping that these attributes are
166 |     # immutable.  Underlying type is 'bigint'.
167 |     'numeric_id',
168 | 
169 |     # A timestamp (probably from DWC or SQL.)
170 |     'timestamp',
171 | 
172 |     # Sequence number when the measurement was made.
173 |     'sequence_number',
174 | 
175 |     # Supposedly indicates that it's derived from "historic data
176 |     # loaded upon bed association to PIIC iX".
177 |     'is_trend_uploaded',
178 | 
179 |     # An opaque identifier (probably a GUID) for a set of
180 |     # simultaneous, related measurements.
181 |     'compound_value_id',
182 | 
183 |     # Measurement value.
184 |     'value',
185 | 
186 |     # Should correspond to 'mapping_id' in PatientMappingMessage.
187 |     'mapping_id'))
188 | 
189 | ################################################################
190 | 
191 | # _Export.PatientMapping_
192 | PatientMappingMessage = namedtuple('PatientMappingMessage', (
193 |     # The original data source.
194 |     'origin',
195 | 
196 |     # An opaque identifier (probably a GUID) for the record.  (This is
197 |     # the 'Id' column in _Export.PatientMapping_.)
198 |     'mapping_id',
199 | 
200 |     # An opaque identifier (probably a GUID) for the patient.
201 |     'patient_id',
202 | 
203 |     # A timestamp, origin unknown.  Presumably indicates when the
204 |     # information in this message was updated.
205 |     'timestamp',
206 | 
207 |     # ???
208 |     'is_mapped',
209 | 
210 |     # Presumably indicates the original host from which the message
211 |     # was received by the DWC system.
212 |     'hostname'))
213 | 
214 | # _Export.Patient_
215 | PatientBasicInfoMessage = namedtuple('PatientBasicInfoMessage', (
216 |     # The original data source.
217 |     'origin',
218 | 
219 |     # An opaque identifier (probably a GUID) for the patient.
220 |     'patient_id',
221 | 
222 |     # A timestamp, origin unknown.  Presumably indicates when the
223 |     # information in this message was updated.
224 |     'timestamp',
225 | 
226 |     # Presumably, the name of the bed the patient is assigned to.
227 |     'bed_label',
228 | 
229 |     # ???
230 |     'alias',
231 | 
232 |     # Magic number for patient's age category.
233 |     'category',
234 | 
235 |     # Patient's height.
236 |     'height',
237 | 
238 |     # Magic number for units of height.
239 |     'height_unit',
240 | 
241 |     # Patient's weight.
242 |     'weight',
243 | 
244 |     # Magic number for units of weight.
245 |     'weight_unit',
246 | 
247 |     # Magic number for units of pressure.  (Why is this here?)
248 |     'pressure_unit',
249 | 
250 |     # Magic number for whether or not the patient has a pacemaker.
251 |     'paced_mode',
252 | 
253 |     # ???
254 |     'resuscitation_status',
255 | 
256 |     # ???
257 |     'admit_state',
258 | 
259 |     # Presumably, the name of the care unit.
260 |     'clinical_unit',
261 | 
262 |     # Magic number for sex.
263 |     'gender'))
264 | 
265 | # _Export.BedTag_
266 | BedTagMessage = namedtuple('BedTagMessage', (
267 |     # The original data source.
268 |     'origin',
269 | 
270 |     # Name of the bed.
271 |     'bed_label',
272 | 
273 |     # A timestamp, origin unknown.  Presumably indicates when the
274 |     # information in this message was updated.
275 |     'timestamp',
276 | 
277 |     # Tag.  What is this?
278 |     'tag'))
279 | 
280 | # _Export.PatientDateAttribute_
281 | PatientDateAttributeMessage = namedtuple('PatientDateAttributeMessage', (
282 |     # The original data source.
283 |     'origin',
284 | 
285 |     # An opaque identifier (probably a GUID) for the patient.
286 |     'patient_id',
287 | 
288 |     # A timestamp, origin unknown.  Presumably indicates when the
289 |     # information in this message was updated.
290 |     'timestamp',
291 | 
292 |     # Name of the attribute, such as "DOB".
293 |     'name',
294 | 
295 |     # Value of the attribute.
296 |     'value'))
297 | 
298 | # _Export.PatientStringAttribute_
299 | PatientStringAttributeMessage = namedtuple('PatientStringAttributeMessage', (
300 |     # The original data source.
301 |     'origin',
302 | 
303 |     # An opaque identifier (probably a GUID) for the patient.
304 |     'patient_id',
305 | 
306 |     # A timestamp, origin unknown.  Presumably indicates when the
307 |     # information in this message was updated.
308 |     'timestamp',
309 | 
310 |     # Name of the attribute.
311 |     'name',
312 | 
313 |     # Value of the attribute.
314 |     'value'))
315 | 
316 | ################################################################
317 | 
318 | def bcp_format_message(message):
319 |     """Convert a message to BCP format.
320 | 
321 |     The argument must be an AlertMessage, BedTagMessage,
322 |     EnumerationValueMessage, NumericValueMessage,
323 |     PatientBasicInfoMessage, PatientDateAttributeMessage,
324 |     PatientMappingMessage, PatientStringAttributeMessage, or
325 |     WaveSampleMessage.
326 | 
327 |     The result is a byte string which can be written to a file and
328 |     later parsed by freebcp or by the downcast.db.bcp module.
329 | 
330 |     Note that the result is not always identical to what freebcp
331 |     itself would have produced, since UUIDs are sometimes "natively"
332 |     written as lowercase and sometimes uppercase.
333 |     """
334 |     text = []
335 |     for (field, value) in zip(message._fields, message):
336 |         # ignore the internal "origin" field
337 |         if field == 'origin':
338 |             continue
339 |         # special case for WaveSamples
340 |         if field == 'wave_samples':
341 |             ftext = struct.pack('<I', len(value)) + value
342 |         else:
343 |             if value is None:
344 |                 # Null stored as empty field
345 |                 ftext = b''
346 |             elif isinstance(value, bool):
347 |                 # Booleans stored as '0' or '1'
348 |                 ftext = str(int(value)).encode()
349 |             elif isinstance(value, uuid.UUID):
350 |                 # UUIDs stored as uppercase
351 |                 ftext = str(value).upper().encode()
352 |             else:
353 |                 # Other types (str, int, Decimal, T) use the default
354 |                 # Python string representation, except that empty
355 |                 # strings are stored as b'\0' to distinguish from null
356 |                 ftext = str(value).encode() or b'\0'
357 |             ftext += b'\t'
358 |         text.append(ftext)
359 |     if field != 'wave_samples':
360 |         assert ftext[-1:] == b'\t'
361 |         text[-1] = ftext[:-1] + b'\n'
362 |     return b''.join(text)
363 | 
364 | def bcp_format_description(data_type):
365 |     """Generate a BCP format description for a message type.
366 | 
367 |     The argument must be one of the classes AlertMessage,
368 |     BedTagMessage, EnumerationValueMessage, NumericValueMessage,
369 |     PatientBasicInfoMessage, PatientDateAttributeMessage,
370 |     PatientMappingMessage, PatientStringAttributeMessage, or
371 |     WaveSampleMessage, or an instance of one of those classes.
372 | 
373 |     This description can be written to a '.fmt' file and later used by
374 |     freebcp or downcast.db.bcp to parse the data generated by
375 |     bcp_format_message().
376 | 
377 |     Note that the column names are always written in lowercase.
378 |     """
379 |     # ignore the internal "origin" field
380 |     columns = [f for f in data_type._fields if f != 'origin']
381 |     text = '0.0\n%d\n' % len(columns)
382 |     for (i, field) in enumerate(columns):
383 |         text += str(i + 1)
384 |         # special case for WaveSamples
385 |         if field == 'wave_samples':
386 |             text += ' SYBBINARY 4 -1 "" '
387 |         elif i == len(columns) - 1:
388 |             text += ' SYBCHAR 0 -1 "\\n" '
389 |         else:
390 |             text += ' SYBCHAR 0 -1 "\\t" '
391 |         name = field.replace('_', '')
392 |         # XXX PatientMappingMessage: mapping_id -> id
393 |         text += str(i + 1) + ' ' + name + ' ""\n'
394 |     return text
395 | 


--------------------------------------------------------------------------------