├── .travis.yml
├── DEPLOY.md
├── LICENSE.txt
├── MANIFEST
├── README.md
├── dist
    ├── genepattern-python-1.0.3.tar.gz
    ├── genepattern-python-1.0.4.tar.gz
    ├── genepattern-python-1.0.5.tar.gz
    ├── genepattern-python-1.0.6.tar.gz
    ├── genepattern-python-1.0.7.tar.gz
    ├── genepattern-python-1.1.0.tar.gz
    ├── genepattern-python-1.1.1.tar.gz
    ├── genepattern-python-1.2.0.tar.gz
    ├── genepattern-python-1.2.1.tar.gz
    ├── genepattern-python-1.2.2.tar.gz
    └── genepattern-python-1.2.3.tar.gz
├── gp
    ├── __init__.py
    ├── core.py
    ├── data.py
    └── modules.py
├── setup.cfg
├── setup.py
└── test
    └── test_data.py


/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 |   - "3.7"
 5 | install:
 6 |   - python setup.py install
 7 |   - pip install pandas
 8 | script:
 9 |   - pytest
10 | notifications:
11 |   - slack: genepattern:OAj2q4hee6Mk5v9r6SUCIacZ
12 | 


--------------------------------------------------------------------------------
/DEPLOY.md:
--------------------------------------------------------------------------------
 1 | # How to Deploy to PyPi Test
 2 | 
 3 | 1. Make sure setup.py and gp.py/__version__ are updated
 4 | 2. cd to *genepattern-python* directory
 5 | 3. Remove any residual build artifacts from the last time nbtools was built. This step is not necessary the first time the package is built.
 6 | > rm dist/\*.tar.gz; rm dist/\*.whl
 7 | 4. Build the sdist and wheel artifacts.
 8 | > python -m build .
 9 | 5. Upload the files by running:
10 | > twine upload -r pypitest dist/\*.tar.gz; twine upload -r pypitest dist/\*.whl
11 | 6. If the upload fails go to [https://testpypi.python.org/pypi](https://testpypi.python.org/pypi) and manually upload dist/nbtools-*.tar.gz.
12 | 7. Test the deploy by uninstalling and reinstalling the package: 
13 | > pip uninstall genepattern-python;
14 | > pip install -i https://test.pypi.org/simple/ genepattern-python
15 | > 
16 | # How to Deploy to Production PyPi
17 | 
18 | 1. First deploy to test and ensure everything is working correctly (see above).
19 | 2. cd to *genepattern-python* directory
20 | 4. Remove any residual build artifacts from the last time nbtools was built. This step is not necessary the first time the package is built.
21 | > rm dist/\*.tar.gz; rm dist/\*.whl
22 | 5. Build the sdist and wheel artifacts.
23 | > python -m build .
24 | 6. Upload the files by running:
25 | > twine upload dist/\*.tar.gz; twine upload dist/\*.whl
26 | 7. If the upload fails go to [https://testpypi.python.org/pypi](https://testpypi.python.org/pypi) and manually upload dist/nbtools-*.tar.gz.
27 | 8. Test the deploy by uninstalling and reinstalling the package: 
28 | > pip uninstall genepattern-python;
29 | > pip install genepattern-python
30 | > 
31 | # How to Deploy to Conda
32 | 
33 | 1. Deploy to Production PyPi
34 | 2. Navigate to Anaconda directory
35 | > cd ~/opt/anaconda3
36 | 3. Run the following, removing the existing directory if necessary:
37 | > conda skeleton pypi genepattern-python --version XXX
38 | 4. Build the package:
39 | > conda build genepattern-python
40 | 5. Converting this package to builds for other operating systems can be done as shown below. You will need to upload each
41 | built version using a separate upload command.
42 | > conda convert --platform all ./conda-bld/osx-64/genepattern-python-XXX-py37_0.tar.bz2 -o conda-bld/
43 | 6. Upload the newly built package:
44 | > anaconda upload ./conda-bld/*/genepattern-python-XXX-py37_0.tar.bz2 -u genepattern
45 | 7. Log into the [Anaconda website](https://anaconda.org/) to make sure everything is good.


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2015-2023, Regents of the University of California & Broad Institute
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | setup.cfg
3 | setup.py
4 | gp/__init__.py
5 | gp/core.py
6 | gp/data.py
7 | gp/modules.py
8 | test/test_data.py
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Version](https://img.shields.io/pypi/v/genepattern-python.svg)](https://pypi.python.org/pypi/genepattern-python)
 2 | [![Build](https://travis-ci.org/genepattern/genepattern-python.svg?branch=master)](https://travis-ci.org/genepattern/genepattern-python)
 3 | [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://github.com/genepattern/example-notebooks/blob/master/GenePattern%20Python%20Tutorial.ipynb)
 4 | 
 5 | # GenePattern Python Library
 6 | 
 7 | This is a Python library for working with GenePattern programmatically. Behind the scenes, calls from this library execute the GenePattern REST API.
 8 | 
 9 | ## Supported Python Versions
10 | 
11 | This library requires Python 3.6+. The bundled data submodule `gp.data` also requires [pandas](http://pandas.pydata.org/), although the rest of the module does not.
12 | 
13 | **Python 2 Support:** Support for Python 2 was removed in version 1.4.0. Python 2 users should use version 1.3.1.
14 | 
15 | ## Installing
16 | 
17 | It is recommended to install this library from PIP. Simply execute the command below:
18 | 
19 | > pip install genepattern-python
20 | 
21 | ## Upgrading
22 | 
23 | To upgrade to the latest version of the library, execute the command below:
24 | 
25 | > pip install genepattern-python --upgrade
26 | 
27 | ## Tutorial
28 | 
29 | A tutorial on how to use the GenePattern Python Library is [available here](https://github.com/genepattern/example-notebooks/blob/master/GenePattern%20Python%20Tutorial.ipynb).
30 | 
31 | ## "Connection Reset by Peer" Error
32 | 
33 | Connecting to the GenePattern public server now requires TLS 1.2+. Older versions of SSL and TLS will no longer work. If you're attempting to connect and receiving a "Connection Reset by Peer" error, you will need to update the OpenSSL library associated with your Python installation.
34 | 


--------------------------------------------------------------------------------
/dist/genepattern-python-1.0.3.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.0.3.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.0.4.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.0.4.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.0.5.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.0.5.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.0.6.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.0.6.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.0.7.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.0.7.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.1.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.1.0.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.1.1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.1.1.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.2.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.2.0.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.2.1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.2.1.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.2.2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.2.2.tar.gz


--------------------------------------------------------------------------------
/dist/genepattern-python-1.2.3.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genepattern/genepattern-python/ec6109095732c628785ee2edfcd6aa46ab3baf8b/dist/genepattern-python-1.2.3.tar.gz


--------------------------------------------------------------------------------
/gp/__init__.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | """
 4 | GenePattern Python Client
 5 | 
 6 | Compatible with Python 3.6+
 7 | """
 8 | 
 9 | __authors__ = ['Thorin Tabor', 'Chet Birger']
10 | __copyright__ = 'Copyright 2014-2024, Regents of the University of California & Broad Institute'
11 | __version__ = '24.01'
12 | __status__ = 'Production'
13 | 
14 | # Import core functionality
15 | from .core import GPException, GPFile, GPJob, GPJobSpec, GPResource, GPServer, GPTask, GPTaskParam, GPJSONEncoder
16 | 


--------------------------------------------------------------------------------
/gp/core.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import urllib
  3 | import base64
  4 | import json
  5 | import time
  6 | from contextlib import closing
  7 | import urllib.request
  8 | import urllib.parse
  9 | import urllib.error
 10 | 
 11 | 
 12 | GP_JOB_TAG = 'GenePattern Python Client'
 13 | 
 14 | 
 15 | class GPServer(object):
 16 |     """
 17 |     Wrapper for data needed to make server calls.
 18 | 
 19 |     Wraps the server url, username and password, and provides helper function
 20 |     to construct the authorization header.
 21 |     """
 22 | 
 23 |     def __init__(self, url, username, password):
 24 |         self.url = url
 25 |         self.username = username
 26 |         self.password = password
 27 |         self.token = None
 28 |         self.last_job = None
 29 | 
 30 |     def __str__(self):
 31 |         return self.url + ' ' + self.username
 32 | 
 33 |     def authorization_header(self):
 34 |         """
 35 |         Returns a string containing the authorization header used to authenticate
 36 |         with GenePattern. This string is included in the header of subsequent
 37 |         requests sent to GenePattern.
 38 |         """
 39 |         return 'Basic %s' % base64.b64encode(bytes(self.username + ':' + self.password, 'ascii')).decode('ascii')
 40 |     
 41 |     def system_message(self):
 42 |         url = f"{self.url}/rest/v1/config/system-message"
 43 |         request = urllib.request.Request(url)
 44 |         response = urllib.request.urlopen(request)
 45 |         return response.read().decode('utf-8')
 46 |     
 47 |     def login(self):
 48 |         """Log in to the OAuth2 endpoint"""
 49 |         safe_username = urllib.parse.quote(self.username)
 50 |         safe_password = urllib.parse.quote(self.password)
 51 |         url = f"{self.url}/rest/v1/oauth2/token?grant_type=password&username={safe_username}&password={safe_password}&client_id=GenePatternNotebook-{safe_username}"
 52 | 
 53 |         request = urllib.request.Request(url)
 54 |         response = urllib.request.urlopen(request, b'')
 55 |         if response.getcode() != 200:
 56 |             raise urllib.error.HTTPError(url, response.getcode(), 'Invalid username or password', response.getheaders(), None)
 57 |         self.token = json.loads(response.read())['access_token']
 58 |         return self.token
 59 | 
 60 |     def upload_file(self, file_name, file_path):
 61 |         """
 62 |         Upload a file to a server
 63 | 
 64 |         Attempts to upload a local file with path filepath, to the server, where it
 65 |         will be named filename.
 66 | 
 67 |         Args:
 68 |             :param file_name: The name that the uploaded file will be called on the server.
 69 |             :param file_path: The path of the local file to upload.
 70 | 
 71 |         Returns:
 72 |             :return: A GPFile object that wraps the URI of the uploaded file, or None if the upload fails.
 73 |         """
 74 | 
 75 |         request = urllib.request.Request(self.url + '/rest/v1/data/upload/job_input?name=' + urllib.parse.quote(file_name))
 76 |         if self.authorization_header() is not None:
 77 |             request.add_header('Authorization', self.authorization_header())
 78 |         request.add_header('User-Agent', 'GenePatternRest')
 79 |         with open(file_path, 'rb') as f:
 80 |             data = f.read()
 81 | 
 82 |         try:
 83 |             response = urllib.request.urlopen(request, data)
 84 |         except IOError:
 85 |             print("authentication failed")
 86 |             return None
 87 | 
 88 |         if response.getcode() != 201:
 89 |             print("file upload failed, status code = %i" % response.getcode())
 90 |             return None
 91 | 
 92 |         return GPFile(self, response.info().get('Location'))
 93 | 
 94 |     def run_job(self, job_spec, wait_until_done=True):
 95 |         """
 96 |         Runs a job defined by jobspec, optionally non-blocking.
 97 | 
 98 |         Takes a GPJobSpec object that defines a request to run a job, and makes the
 99 |         request to the server.  By default blocks until the job is finished by
100 |         polling the server, but can also run asynchronously.
101 | 
102 |         Args:
103 |             :param job_spec: A GPJobSpec object that contains the data defining the job to be run.
104 |             :param wait_until_done: Whether to wait until the job is finished before returning.
105 |             :return:
106 | 
107 |         Returns:
108 |             a GPJob object that refers to the running job on the server.  If called
109 |             synchronously, this object will contain the info associated with the
110 |             completed job.  Otherwise, it will just wrap the URI of the running job.
111 |         """
112 | 
113 |         # names should be a list of names,
114 |         # values should be a list of **lists** of values
115 |         json_string = json.dumps({'lsid': job_spec.lsid, 'params': job_spec.params, 'tags': [GP_JOB_TAG]}, cls=GPJSONEncoder)
116 |         if sys.version_info.major == 3:  # Handle conversion to bytes for Python 3
117 |             json_string = bytes(json_string, 'utf-8')
118 |         request = urllib.request.Request(self.url + '/rest/v1/jobs')
119 |         if self.authorization_header() is not None:
120 |             request.add_header('Authorization', self.authorization_header())
121 |         request.add_header('Content-Type', 'application/json')
122 |         request.add_header('User-Agent', 'GenePatternRest')
123 |         try:
124 |             response = urllib.request.urlopen(request, json_string)
125 |         except urllib.error.HTTPError as e:
126 |             if e.code == 403:
127 |                 print("job POST failed, your account is either over the data limit or you have too many jobs running")
128 |             else:
129 |                 print(f" job POST failed, status code = {e.code}, {e.reason}")
130 |             return None
131 |         if response.getcode() != 201:
132 |             print(" job POST failed, status code = %i" % response.getcode())
133 |             return None
134 |         data = json.loads(response.read().decode('utf-8'))
135 |         job = GPJob(self, data['jobId'])
136 |         job.get_info()
137 |         self.last_job = job  # Set the last job
138 |         if wait_until_done:
139 |             job.wait_until_done()
140 |         return job
141 | 
142 |     def get_token(self):
143 |         """Return the authentication token, logging in to obtain it if necessary"""
144 |         if self.token: return self.token
145 |         else: return self.login()
146 | 
147 |     def get_job(self, job_number):
148 |         job = GPJob(self, job_number)
149 |         return job
150 | 
151 |     def get_task_list(self):
152 |         """
153 |         Queries the GenePattern server and returns a list of GPTask objects,
154 |         each representing one of the modules installed on the server. Useful
155 |         for determining which are available on the server.
156 |         """
157 |         request = urllib.request.Request(self.url + '/rest/v1/tasks/all.json')
158 |         if self.authorization_header() is not None:
159 |             request.add_header('Authorization', self.authorization_header())
160 |         request.add_header('User-Agent', 'GenePatternRest')
161 |         response = urllib.request.urlopen(request)
162 |         response_string = response.read().decode('utf-8')
163 |         category_and_tasks = json.loads(response_string)
164 |         raw_list = category_and_tasks['all_modules']
165 |         task_list = []
166 |         for task_dict in raw_list:
167 |             task = GPTask(self, task_dict['lsid'], task_dict)
168 |             task_list.append(task)
169 |         return task_list
170 | 
171 |     @staticmethod
172 |     def wait_until_complete(job_list):
173 |         """
174 |         Args: Accepts a list of GPJob objects
175 | 
176 |         This method will not return until all GPJob objects in the list have
177 |         finished running. That us, they are either complete and have resulted in
178 |         an error state.
179 | 
180 |         This method will occasionally query each job to see if it is finished.
181 |         """
182 |         complete = [False] * len(job_list)
183 |         wait = 1
184 |         while not all(complete):
185 |             time.sleep(wait)
186 |             for i, job in enumerate(job_list):
187 |                 if not complete[i]:
188 |                     complete[i] = job.is_finished()
189 |                     if not complete[i]:
190 |                         break
191 |             wait = min(wait * 2, 10)
192 | 
193 |     def get_recent_jobs(self, n_jobs=10):
194 |         """
195 |         Returns the user's N most recently submitted jobs on the GenePattern server.
196 | 
197 |         Args: If not specified, n_jobs = 10.
198 | 
199 |         Returns: An array of GPJob objects.
200 |         """
201 | 
202 |         # Query the server for the list of jobs
203 |         request = urllib.request.Request(self.url + '/rest/v1/jobs/?pageSize=' +
204 |                                          str(n_jobs) + '&userId=' + str(urllib.parse.quote(self.username)) +
205 |                                          '&orderBy=-dateSubmitted')
206 |         if self.authorization_header() is not None:
207 |             request.add_header('Authorization', self.authorization_header())
208 |         request.add_header('User-Agent', 'GenePatternRest')
209 |         response = urllib.request.urlopen(request)
210 |         response_string = response.read().decode('utf-8')
211 |         response_json = json.loads(response_string)
212 | 
213 |         # For each job in the JSON Array, build a GPJob object and add to the job list
214 |         job_list = []
215 |         for job_json in response_json['items']:
216 |             job_id = job_json['jobId']
217 |             job = GPJob(self, job_id)
218 |             job.info = job_json
219 |             job.load_info()
220 |             job_list.append(job)
221 | 
222 |         return job_list
223 | 
224 | 
225 | class GPResource(object):
226 |     """
227 |     Base class for resources on a Gene Pattern server.
228 | 
229 |     Wraps references to resources on a Gene Pattern server, which are all
230 |     defined by a URI.  Subclasses can implement custom logic appropriate for
231 |     that resources such as downloading a file or info for a running or completed
232 |     job.
233 |     """
234 |     uri = None
235 | 
236 |     def __init__(self, uri):
237 |         self.uri = uri
238 | 
239 | 
240 | class GPFile(GPResource):
241 |     """
242 |     A file on a Gene Pattern server.
243 | 
244 |     Wraps the URI of the file, and contains methods to download the file.
245 |     """
246 |     server_data = None
247 | 
248 |     def __init__(self, server_data, uri):
249 |         GPResource.__init__(self, uri)
250 |         self.server_data = server_data
251 | 
252 |     def open(self):
253 |         """
254 |         Opens the URL associated with the GPFile and returns a file-like object
255 |         with three extra methods:
256 | 
257 |             * geturl() - return the ultimate URL (can be used to determine if a
258 |                 redirect was followed)
259 | 
260 |             * info() - return the meta-information of the page, such as headers
261 | 
262 |             * getcode() - return the HTTP status code of the response
263 |         """
264 |         request = urllib.request.Request(self.uri)
265 |         if self.server_data.authorization_header() is not None:
266 |             request.add_header('Authorization', self.server_data.authorization_header())
267 |         request.add_header('User-Agent', 'GenePatternRest')
268 |         try:
269 |             return urllib.request.urlopen(request)
270 |         except urllib.error.HTTPError as e:
271 |             if e.geturl():  # Handle S3 redirects if one is encountered
272 |                 return urllib.request.urlopen(urllib.request.Request(e.geturl()))
273 |             else:
274 |                 raise e
275 | 
276 |     def read(self):
277 |         """
278 |         Reads the contents of the GPFile and returns the contents as a string (assumes UTF-8)
279 |         """
280 |         with closing(self.open()) as f:
281 |             data = f.read()
282 |         return data.decode("utf-8") or None
283 | 
284 |     def get_url(self):
285 |         """
286 |         Returns the URL to the GPFile
287 |         """
288 |         return self.uri
289 | 
290 |     def get_name(self):
291 |         """
292 |         Returns the file name of the output file
293 |         """
294 |         return urllib.parse.unquote(self.get_url().split('/')[-1])
295 | 
296 |     def __str__(self):
297 |         return self.uri
298 | 
299 | 
300 | class GPJob(GPResource):
301 |     """
302 |     A running or completed job on a Gene Pattern server.
303 | 
304 |     Contains methods to get the info of the job, and to wait on a running job by
305 |     polling the server until the job is completed.
306 |     """
307 |     json = None  # Define the backing JSON string
308 |     info = None
309 |     server_data = None
310 |     task_name = None
311 |     task_lsid = None
312 |     user_id = None
313 |     job_number = None
314 |     status = None
315 |     date_submitted = None
316 |     log_files = None
317 |     output_files = None
318 |     num_output_files = None
319 |     children = None
320 |     input_params = None
321 | 
322 |     def __init__(self, server_data, uri):
323 |         super(GPJob, self).__init__(str(uri))
324 |         self.info = None
325 |         self.server_data = server_data
326 |         self.job_number = uri
327 | 
328 |     def get_info(self):
329 |         """
330 |         Query the GenePattern server for metadata regarding this job and assign
331 |         that metadata to the properties on this GPJob object. Including:
332 |             * Task Name
333 |             * LSID
334 |             * User ID
335 |             * Job Number
336 |             * Status
337 |             * Date Submitted
338 |             * URL of Log Files
339 |             * URL of Output Files
340 |             * Number of Output Files
341 |         """
342 |         request = urllib.request.Request(self.server_data.url + "/rest/v1/jobs/" + self.uri + "?includeInputParams=true")
343 |         if self.server_data.authorization_header() is not None:
344 |             request.add_header('Authorization', self.server_data.authorization_header())
345 |         request.add_header('User-Agent', 'GenePatternRest')
346 |         response = urllib.request.urlopen(request)
347 | 
348 |         self.json = response.read().decode('utf-8')
349 |         self.info = json.loads(self.json)
350 |         self.load_info()
351 | 
352 |     def load_info(self):
353 |         """
354 |         Parses the JSON object stored at GPJob.info and assigns its metadata to
355 |         properties of this GPJob object.
356 | 
357 |         Primarily intended to be called from GPJob.get_info().
358 |         """
359 |         self.task_name = self.info['taskName']
360 |         self.task_lsid = self.info['taskLsid']
361 |         self.user_id = self.info['userId']
362 |         self.job_number = int(self.info['jobId'])
363 |         self.status = self.get_status_message()
364 |         self.date_submitted = self.info['dateSubmitted']
365 |         self.log_files = self.info['logFiles']
366 |         self.output_files = self.info['outputFiles']
367 |         self.num_output_files = self.info['numOutputFiles']
368 |         self.input_params = self.info['inputParams'] if 'inputParams' in self.info else None
369 | 
370 |         # Create children, if relevant
371 |         self.children = self.get_child_jobs()
372 | 
373 |     def get_input_params(self):
374 |         """Return the input parameters used to launch the job"""
375 | 
376 |         # Lazily load info
377 |         if self.info is None: self.get_info()
378 | 
379 |         return { list(p.keys())[0]:list(p.values())[0] for p in self.input_params }
380 | 
381 |     def get_child_jobs(self):
382 |         """
383 |         Queries the GenePattern server for child jobs of this job, creates GPJob
384 |         objects representing each of them and assigns the list of them to the
385 |         GPJob.children property. Then return this list.
386 |         """
387 |         # Lazily load info
388 |         if self.info is None:
389 |             self.get_info()
390 | 
391 |         # Lazily load children
392 |         if self.children:
393 |             return self.children
394 |         else:
395 |             if 'children' in self.info:
396 |                 child_list = []
397 |                 for child in self.info['children']['items']:
398 |                     child_job = GPJob(self.server_data, child['jobId'])
399 |                     child_job.info = child
400 |                     child_job.load_info()
401 |                     child_list.append(child_job)
402 |                 return child_list
403 |             else:               # No children? Return empty list
404 |                 return []
405 | 
406 |     def is_finished(self):
407 |         """
408 |         Queries the server to check if the job has been completed.
409 |         Returns True or False.
410 |         """
411 |         self.get_info()
412 | 
413 |         if 'status' not in self.info:
414 |             return False
415 |         if 'isFinished' not in self.info['status']:
416 |             return False
417 | 
418 |         return self.info['status']['isFinished']
419 | 
420 |     def has_error(self):
421 |         """
422 |         Queries the server to check if the job has an error.
423 |         Returns True or False.
424 |         """
425 |         self.get_info()
426 | 
427 |         if 'status' not in self.info:
428 |             return False
429 |         if 'hasError' not in self.info['status']:
430 |             return False
431 | 
432 |         return self.info['status']['hasError']
433 | 
434 |     def is_pending(self):
435 |         """
436 |         Queries the server to check if the job is pending.
437 |         Returns True or False.
438 |         """
439 |         self.get_info()
440 | 
441 |         if 'status' not in self.info:
442 |             return False
443 |         if 'isPending' not in self.info['status']:
444 |             return False
445 | 
446 |         return self.info['status']['isPending']
447 | 
448 |     def get_status_message(self):
449 |         """
450 |         Returns the status message for the job, querying the
451 |         server if necessary.
452 |         """
453 |         # Lazily load info
454 |         if self.info is None:
455 |             self.get_info()
456 | 
457 |         return self.info['status']['statusMessage']
458 | 
459 |     def get_tags(self):
460 |         """
461 |         Returns the tags for the job, querying the
462 |         server if necessary.
463 |         """
464 |         # Lazily load info
465 |         if self.info is None:
466 |             self.get_info()
467 | 
468 |         if 'tags' in self.info:
469 |             return [structure['tag']['tag'] for structure in self.info['tags']]
470 |         else:
471 |             return []
472 | 
473 |     def get_comments(self):
474 |         """
475 |         Returns the comments for the job, querying the
476 |         server if necessary.
477 |         """
478 |         # Lazily load info
479 |         if self.info is None:
480 |             self.get_info()
481 | 
482 |         if 'comments' in self.info:
483 |             return [structure['text'] for structure in self.info['comments']['comments']]
484 |         else:
485 |             return []
486 | 
487 |     def get_output_files(self):
488 |         """
489 |         Returns a list of the files output by the job, querying the server if
490 |         necessary. If the job has output no files, an empty list will be
491 |         returned.
492 |         """
493 |         # Lazily load info
494 |         if self.info is None:
495 |             self.get_info()
496 | 
497 |         if 'outputFiles' in self.info:
498 |             return [GPFile(self.server_data, f['link']['href']) for f in self.info['outputFiles']]
499 |         else:
500 |             return []
501 | 
502 |     def get_file(self, name):
503 |         """
504 |         Returns the output file with the specified name, if no output files
505 |         match, returns None.
506 |         """
507 |         files = self.get_output_files()
508 |         for f in files:
509 |             if f.get_name() == name:
510 |                 return f
511 |         return None
512 | 
513 |     def wait_until_done(self):
514 |         """
515 |         This method will not return until the job is either complete or has
516 |         reached an error state. This queries the server periodically to check
517 |         for an update in status.
518 |         """
519 |         wait = 1
520 |         while True:
521 |             time.sleep(wait)
522 |             self.get_info()
523 |             if self.info['status']['isFinished']:
524 |                 break
525 |             # implements a crude exponential back off
526 |             wait = min(wait * 2, 60)
527 | 
528 |     def get_job_status_url(self):
529 |         """
530 |         Returns the URL of the job's status page on the GenePattern server
531 |         """
532 |         return self.server_data.url + "/pages/index.jsf?jobid=" + self.uri
533 |     
534 |     def get_permissions(self):
535 |         """Get the permissions object for the GP job"""
536 |         url = f'{self.server_data.url}/rest/v1/jobs/{self.job_number}/permissions'
537 |         request = urllib.request.Request(url)
538 |         if self.server_data.authorization_header() is not None:
539 |             request.add_header('Authorization', self.server_data.authorization_header())
540 |         request.add_header('User-Agent', 'GenePatternRest')
541 | 
542 |         response = urllib.request.urlopen(request)
543 |         return json.loads(response.read())
544 |         
545 |     def set_permissions(self, permissions):
546 |         """Set the group permissions for the job"""
547 |         url = f'{self.server_data.url}/rest/v1/jobs/{self.job_number}/permissions'
548 |         data = json.dumps(permissions).encode('utf8')
549 |         request = urllib.request.Request(url, data=data, method='PUT')
550 |         if self.server_data.authorization_header() is not None:
551 |             request.add_header('Authorization', self.server_data.authorization_header())
552 |         request.add_header('User-Agent', 'GenePatternRest')
553 |         urllib.request.urlopen(request)
554 | 
555 |     def terminate(self):
556 |         """Terminate a running or pending job"""
557 |         url = f'{self.server_data.url}/rest/v1/jobs/{self.job_number}/terminate'
558 |         request = urllib.request.Request(url, method='DELETE')
559 |         if self.server_data.authorization_header() is not None:
560 |             request.add_header('Authorization', self.server_data.authorization_header())
561 |         request.add_header('User-Agent', 'GenePatternRest')
562 |         return urllib.request.urlopen(request).code == 200
563 | 
564 | 
565 | class GPJobSpec(object):
566 |     """
567 |     Data needed to make a request to perform a job on a Gene Pattern server
568 | 
569 |     Encapsulates the data needed to make a server call to run a job.  This
570 |     includes the LSID of the job, and the parameters.  Helper methods set
571 |     the LSID and parameters.
572 |     """
573 | 
574 |     def __init__(self, server_data, lsid):
575 |         self.params = []
576 |         self.lsid = lsid
577 |         self.server_data = server_data
578 | 
579 |     def set_parameter(self, name, values, group_id=None):
580 |         """
581 |         Sets the value of a parameter for the GPJobSpec
582 |         :param name: name of the parameter
583 |         :param values: list of values for the parameter
584 |         :param group_id: optional parameter group ID
585 |         :return:
586 |         """
587 |         if not isinstance(values, list):
588 |             values = [values]
589 |         if group_id is None:
590 |             self.params.append({'name': name, 'values': values})
591 |         else:
592 |             self.params.append({'name': name, 'groupId': group_id, 'values': values})
593 | 
594 | 
595 | class GPTask(GPResource):
596 |     """Describes a GenePattern task (module or pipeline).
597 | 
598 |     The constructor retrieves data transfer object (DTO) describing task from GenePattern server.
599 |     The DTO contains general task information (LSID, Category, Description, Version comment),
600 |     a parameter list and a list of initial values.  Class includes getters for each of these
601 |     components.
602 | 
603 |     """
604 |     json = None  # Define the backing JSON string
605 |     server_data = None
606 |     description = None
607 |     name = None
608 |     documentation = None
609 |     lsid = None
610 |     version = None
611 |     params = None
612 |     dto = None
613 | 
614 |     _params_loaded = False
615 |     submit_json = None
616 |     job_spec = None
617 |     job = None
618 |     job_number = None
619 | 
620 |     def __init__(self, server_data, name_or_lsid, task_dict=None):
621 |         GPResource.__init__(self, name_or_lsid)
622 |         self.server_data = server_data
623 | 
624 |         # Initialize descriptive attributes if available
625 |         if task_dict is not None:
626 |             if 'name' in task_dict:
627 |                 self.name = task_dict['name']
628 |             if 'lsid' in task_dict:
629 |                 self.lsid = task_dict['lsid']
630 |             if 'description' in task_dict:
631 |                 self.description = task_dict['description']
632 |             if 'documentation' in task_dict:
633 |                 self.documentation = task_dict['documentation']
634 |             if 'version' in task_dict:
635 |                 self.version = task_dict['version']
636 | 
637 |     def param_load(self):
638 |         """
639 |         Queries the server for the parameter information and other metadata associated with
640 |         this task
641 |         """
642 |         escaped_uri = urllib.parse.quote(self.uri)
643 |         request = urllib.request.Request(self.server_data.url + '/rest/v1/tasks/' + escaped_uri)
644 |         if self.server_data.authorization_header() is not None:
645 |             request.add_header('Authorization', self.server_data.authorization_header())
646 |         request.add_header('User-Agent', 'GenePatternRest')
647 |         response = urllib.request.urlopen(request)
648 |         self.json = response.read().decode('utf-8')
649 |         self.dto = json.loads(self.json)
650 | 
651 |         self.description = self.dto['description'] if 'description' in self.dto else ""
652 |         self.name = self.dto['name']
653 |         self.documentation = self.dto['documentation'] if 'documentation' in self.dto else ""
654 |         self.lsid = self.dto['lsid']
655 |         self.version = self.dto['version'] if 'version' in self.dto else ""
656 |         self.params = []
657 |         for param in self.dto['params']:
658 |             self.params.append(GPTaskParam(self, param))
659 |         self._params_loaded = True
660 | 
661 |     def get_lsid(self):
662 |         """
663 |         :return: Returns the task's LSID as a string
664 |         """
665 |         return self.lsid
666 | 
667 |     def get_name(self):
668 |         """
669 |         :return: Returns the task's name as a string
670 |         """
671 |         return self.name
672 | 
673 |     def get_description(self):
674 |         """
675 |         :return: Returns the task's description as a string
676 |         """
677 |         return self.description
678 | 
679 |     def get_version(self):
680 |         """
681 |         :return: Returns the task's version as a string
682 |         """
683 |         return self.version
684 | 
685 |     def get_parameters(self):
686 |         """
687 |         :return: Returns a list of GPTaskParam objects representing the parameters for this
688 |         task, in order
689 |         """
690 |         return self.params
691 | 
692 |     def make_job_spec(self):
693 |         """
694 |         :return: Returns a GPJobSpec used to launch a job of this task type
695 |         """
696 |         # If the parameters haven't been loaded yet, do so
697 |         if not self._params_loaded and self.server_data is not None:
698 |             self.param_load()
699 | 
700 |         return GPJobSpec(self.server_data, self.lsid)
701 | 
702 | 
703 | class GPTaskParam(object):
704 |     """
705 |     Encapsulates single parameter information.
706 | 
707 |     The constructor's input parameter is the data transfer object
708 |     associated with a single task parameter (i.e., element from list
709 |     returned by GPTask.getParameters)
710 |     """
711 |     task = None
712 |     dto = None
713 |     name = None
714 |     description = None
715 |     attributes = None
716 | 
717 |     def __init__(self, task, dto):
718 |         self.task = task
719 |         self.dto = dto
720 |         self.name = list(dto)[0]
721 |         if 'description' in dto[self.name]:
722 |             self.description = dto[self.name]['description']
723 |         else:
724 |             self.description = ''
725 |         self.attributes = dto[self.name]['attributes']
726 | 
727 |     def get_dto(self):
728 |         """
729 |         Returns a raw object representing the parameter. This is mostly used to
730 |         initialize GPTaskParam objects
731 |         """
732 |         return self.dto
733 | 
734 |     def get_name(self):
735 |         """
736 |         :return: Returns the parameter name as a string
737 |         """
738 |         return self.name
739 | 
740 |     def is_optional(self):
741 |         """
742 |         Returns whether the parameter is optional or required
743 |         :return: Return True if optional, False if required
744 |         """
745 |         if (('optional' in self.attributes and bool(self.attributes['optional'].strip())) or
746 |                 ('minValue' in self.attributes and self.attributes['minValue'] == 0)):
747 |             return True
748 |         else:
749 |             return False
750 | 
751 |     def get_description(self):
752 |         """
753 |         :return: Returns the parameter description as a string
754 |         """
755 |         return self.description
756 | 
757 |     def get_type(self):
758 |         """
759 |         Returns either 'File' or 'String'.
760 | 
761 |         The type attribute (e.g., java.io.File, java.lang.Integer, java.lang.Float),
762 |         which might give a hint as to what string should represent,
763 |         is not enforced and not employed consistently across all tasks, so we ignore.
764 |         """
765 | 
766 |         if 'TYPE' in self.attributes and 'MODE' in self.attributes:
767 |             dto_type = self.attributes['TYPE']
768 |             dto_mode = self.attributes['MODE']
769 |             if dto_type == 'FILE' and dto_mode == 'IN':
770 |                 return 'File'
771 |         return 'String'
772 | 
773 |     def is_password(self):
774 |         """
775 |         Indicates whether password flag associated with string parameter.
776 | 
777 |         If string parameter flagged as password, UI should not display
778 |         parameter value on input field (e.g., mask out with asterisks).
779 | 
780 |         """
781 | 
782 |         if 'type' in self.attributes and self.attributes['type'] == 'PASSWORD':
783 |             return True
784 |         else:
785 |             return False
786 | 
787 |     def allow_multiple(self):
788 |         """
789 |         Return whether the parameter allows multiple values or not
790 |         :return: Return True if the parameter allows multiple values, otherwise False
791 |         """
792 |         # note that maxValue means "max number of values", and is an integer, not a string
793 |         if ('maxValue' in self.attributes) and (self.attributes['maxValue'] > 1):
794 |             return True
795 |         elif ('numValues' in self.attributes) and ('+' in self.attributes['numValues']):
796 |             return True
797 |         else:
798 |             return False
799 | 
800 |     def get_default_value(self):
801 |         """
802 |         Return the default value for the parameter. If here is no default value, return None
803 |         """
804 |         if ('default_value' in self.attributes and
805 |                 bool(self.attributes['default_value'].strip())):
806 |             return self.attributes['default_value']
807 |         else:
808 |             return None
809 | 
810 |     def is_choice_param(self):
811 |         """
812 |         :return: Return True if this is a choice parameter, otherwise False
813 |         """
814 |         return 'choiceInfo' in self.dto[self.name]
815 | 
816 |     def get_choice_status(self):
817 |         """
818 |         Returns a message field, which indicates whether choices statically
819 |         or dynamically defined, and flag indicating whether a dynamic file
820 |         selection loading error occurred.
821 | 
822 |         Throws an error if this is not a choice parameter.
823 |         """
824 |         if 'choiceInfo' not in self.dto[self.name]:
825 |             raise GPException('not a choice parameter')
826 | 
827 |         status = self.dto[self.name]['choiceInfo']['status']
828 |         return status['message'], status['flag']
829 | 
830 |     def get_choice_href(self):
831 |         """
832 |         Returns the HREF of a dynamic choice parameter.
833 |         Throws an error if this is not a choice parameter.
834 |         """
835 |         if 'choiceInfo' not in self.dto[self.name]:
836 |             raise GPException('not a choice parameter')
837 | 
838 |         return self.dto[self.name]['choiceInfo']['href']
839 | 
840 |     def get_choice_selected_value(self):
841 |         """
842 |         Returns the default selection from a choice menu
843 |         Throws an error if this is not a choice parameter.
844 |         """
845 |         if 'choiceInfo' not in self.dto[self.name]:
846 |             raise GPException('not a choice parameter')
847 |         choice_info_dto = self.dto[self.name]['choiceInfo']
848 |         if 'selectedValue' in choice_info_dto:
849 |             return self.dto[self.name]['choiceInfo']['selectedValue']
850 |         else:
851 |             return None
852 | 
853 |     def allow_choice_custom_value(self):
854 |         """
855 |         Returns boolean indicating whether choice parameter supports custom value.
856 | 
857 |         If choice parameter supports custom value, user can provide parameter value
858 |         other than those provided in choice list.
859 |         """
860 |         if 'choiceInfo' not in self.dto[self.name]:
861 |             raise GPException('not a choice parameter')
862 |         return self._is_string_true(self.dto[self.name]['choiceInfo']['choiceAllowCustom'])
863 | 
864 |     # this needs additional work - some kind of limited polling to give server time to assemble list
865 |     def get_choices(self):
866 |         """
867 |         Returns a list of dictionary objects, one dictionary object per choice.
868 | 
869 |         Each object has two keys defined: 'value', 'label'.
870 |         The 'label' entry is what should be displayed on the UI, the 'value' entry
871 |         is what is written into GPJobSpec.
872 |         """
873 | 
874 |         if 'choiceInfo' not in self.dto[self.name]:
875 |             raise GPException('not a choice parameter')
876 |         if self.get_choice_status()[1] == "NOT_INITIALIZED":
877 |             print(self.get_choice_status())
878 |             print("choice status not initialized")
879 | 
880 |             request = urllib.request.Request(self.get_choice_href())
881 |             if self.task.server_data.authorization_header() is not None:
882 |                 request.add_header('Authorization', self.task.server_data.authorization_header())
883 |             request.add_header('User-Agent', 'GenePatternRest')
884 |             response = urllib.request.urlopen(request)
885 |             self.dto[self.name]['choiceInfo'] = json.loads(response.read().decode('utf-8'))
886 |         return self.dto[self.name]['choiceInfo']['choices']
887 | 
888 |     def get_alt_name(self):
889 |         """
890 |         Returns the alternate name of a parameter.
891 |         Only pipeline prompt-when-run parameters
892 |         can have alternate names and alternate descriptions
893 |         """
894 |         if ('altName' in self.attributes and
895 |                 bool(self.attributes['altName'].strip())):
896 |             return self.attributes['altName']
897 |         else:
898 |             return None
899 | 
900 |     def get_alt_description(self):
901 |         """
902 |         Returns the alternate description of a parameter.
903 |         Only pipeline prompt-when-run parameters
904 |         can have alternate names and alternate descriptions
905 |         """
906 |         if 'altDescription' in self.attributes and bool(self.attributes['altDescription'].strip()):
907 |             return self.attributes['altDescription']
908 |         else:
909 |             return None
910 | 
911 |     @staticmethod
912 |     def _is_string_true(test):
913 |         """
914 |         Determines whether a string value is "True" for the purposes of GenePattern's
915 |         parameter parsing
916 |         """
917 |         if type(test) is bool:
918 |             return test
919 |         return test.lower() in ('on', 'yes', 'true')
920 | 
921 | 
922 | class GPException(Exception):
923 |     """
924 |     An exception raised by GenePattern and returned to the user
925 |     """
926 |     def __init__(self, value):
927 |         self.value = value
928 | 
929 |     def __str__(self):
930 |         return repr(self.value)
931 | 
932 | 
933 | class GPJSONEncoder(json.JSONEncoder):
934 |     """
935 |     Custom JSON encoder for encoding GenePattern classes
936 |     """
937 |     def default(self, o):
938 |         if isinstance(o, GPFile):
939 |             return o.get_url()
940 | 
941 |         return {'__{}__'.format(o.__class__.__name__): o.__dict__}
942 | 


--------------------------------------------------------------------------------
/gp/data.py:
--------------------------------------------------------------------------------
  1 | __authors__ = ['Thorin Tabor']
  2 | __copyright__ = 'Copyright 2014-2022, Regents of the University of California & Broad Institute'
  3 | __version__ = '0.1.2'
  4 | __status__ = 'Beta'
  5 | 
  6 | """
  7 | GenePattern Data Tools
  8 | 
  9 | Tools for loading GenePattern data files (such as GCT or ODF files) and
 10 | working with their contents in a Pandas DataFrame.
 11 | 
 12 | Compatible with Python 3.4+
 13 | """
 14 | 
 15 | import gp
 16 | import re
 17 | import io
 18 | import types
 19 | import pandas as pd
 20 | import urllib.request
 21 | 
 22 | 
 23 | def GCT(gct_obj):
 24 |     """
 25 |     Create a Dataframe with the contents of the GCT file
 26 |     """
 27 |     # Handle all the various initialization types and get an IO object
 28 |     gct_io = _obtain_io(gct_obj)
 29 | 
 30 |     # Load the GCT file into a DataFrame
 31 |     df = pd.read_csv(gct_io, sep='\t', header=2, index_col=[0, 1], skip_blank_lines=True)
 32 | 
 33 |     # Return the Dataframe
 34 |     return df
 35 | 
 36 | 
 37 | class CLS:
 38 |     def __init__(self, cls_obj):
 39 |         """
 40 |         Create a CLS object with the contents of a CLS file
 41 | 
 42 |         For more information on the CLS format see:
 43 |         http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide
 44 | 
 45 |         :cls_obj: The CLS file. Accepts a file-like object, a file path, a URL to the file
 46 |                   or a string containing the raw data.
 47 |         """
 48 | 
 49 |         hdr_line_re = re.compile(r"^(?P<samples>[0-9]+)\s+(?P<classes>[0-9]+)\s+1\s*$")
 50 |         assign_line_re = re.compile(r"^\s*(?:.+\s+)*.+\s*$", re.ASCII)
 51 | 
 52 |         # Handle all the various initialization types and get an IO object
 53 |         cls_io = _obtain_io(cls_obj)
 54 | 
 55 |         # Read the file as an array of lines
 56 |         raw_lines = cls_io.readlines()
 57 | 
 58 |         # Convert byte strings to unicode strings
 59 |         raw_lines = _bytes_to_str(raw_lines)
 60 | 
 61 |         # Validate cls file format and contents
 62 |         hdr_line_match = re.match(hdr_line_re, raw_lines[0])
 63 |         if hdr_line_match:
 64 |             (self.num_samples, self.num_classes) = (int(hdr_line_match["samples"]), int(hdr_line_match["classes"]))
 65 | 
 66 |             self.class_names = raw_lines[1].replace('#', '').split()
 67 |             if len(self.class_names) != self.num_classes:
 68 |                 raise ValueError("Mismatch in {0} between number of class names declared ({1}) and number provided ({2})".format(cls_obj, self.num_classes, len(self.class_names)))
 69 | 
 70 |         else:
 71 |             raise ValueError("Bad format in {0} for header line: {1}".format(cls_obj, raw_lines[0]))
 72 | 
 73 |         assign_line_match = re.match(assign_line_re, raw_lines[2])
 74 |         if assign_line_match:
 75 |             self.class_assignments = [i for i in raw_lines[2].split()]
 76 |             if self.num_samples != len(self.class_assignments):
 77 |                 raise ValueError("Mismatch in {0} between number of samples declared ({1}) and number of class assignments provided ({2})".format(cls_obj, self.num_samples, len(self.class_assignments)))
 78 |         else:
 79 |             raise ValueError("Bad format in {0} for class assignment line: {1}".format(cls_obj, raw_lines[2]))
 80 | 
 81 | 
 82 | def ODF(odf_obj):
 83 |     """
 84 |     Create a Dataframe with the contents of the ODF file
 85 | 
 86 |     For more information on the ODF format see:
 87 |     http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide
 88 | 
 89 |     :odf_obj: The ODF file. Accepts a file-like object, a file path, a URL to the file
 90 |               or a string containing the raw data.
 91 |     """
 92 | 
 93 |     # Handle all the various initialization types and get an IO object
 94 |     odf_io = _obtain_io(odf_obj)
 95 | 
 96 |     # Read the file as an array of lines
 97 |     raw_lines = odf_io.readlines()
 98 | 
 99 |     # Convert byte strings to unicode strings
100 |     raw_lines = _bytes_to_str(raw_lines)
101 | 
102 |     try:
103 |         # Read the header count
104 |         header_count = _extract_header_number(raw_lines)
105 | 
106 |         # Read the header dict
107 |         headers = _parse_header(raw_lines)
108 | 
109 |         # Read the model
110 |         model = _extract_model(headers)
111 | 
112 |         # Read the column names, if available
113 |         column_names = _extract_column_names(headers)
114 | 
115 |         # Assemble the data
116 |         data_lines = _join_data_lines(raw_lines, header_count)
117 | 
118 |         # Put together new IO
119 |         odf_string_io = io.StringIO(data_lines)
120 | 
121 |         # Load the ODF file into a DataFrame
122 |         df = pd.read_csv(odf_string_io, sep='\t', header=None, names=column_names, skip_blank_lines=True)
123 | 
124 |         # Apply ODF-specific properties
125 |         _apply_odf_properties(df, headers, model)
126 | 
127 |         # Return the Dataframe
128 |         return df
129 | 
130 |     # Catch any errors related to parsing the ODF file
131 |     except Exception:
132 |         raise TypeError('Error parsing ODF file')
133 | 
134 | ############################
135 | # Shared Utility Functions #
136 | ############################
137 | 
138 | 
139 | def write_odf(df, file_path, headers=None):
140 |     """
141 |     Writes the provided DataFrame to a ODF file.
142 | 
143 |     Assumes that the DataFrame matches the structure of those produced
144 |     by the ODF() function in this library
145 | 
146 |     :param df: the DataFrame to write to ODF
147 |     :param file_path: path to which to write the ODF file
148 |     :param headers: A dict of ODF headers, if none are provided will attempt to read them from the ODF file
149 |     :return:
150 |     """
151 |     if headers is None and hasattr(df, 'headers'):
152 |         headers = df.headers
153 |     else:
154 |         raise AttributeError('ODF headers not provided')
155 | 
156 |     with open(file_path, 'w') as file:
157 |         file.write(_header_dict_to_str(headers))
158 |         df.to_csv(file, sep='\t', header=False, index=False, mode='w+')
159 | 
160 | 
161 | def write_gct(df, file_path):
162 |     """
163 |     Writes the provided DataFrame to a GCT file.
164 | 
165 |     Assumes that the DataFrame matches the structure of those produced
166 |     by the GCT() function in this library
167 | 
168 |     :param df:
169 |     :param file_path:
170 |     :return:
171 |     """
172 |     with open(file_path, 'w') as file:
173 |         file.write('#1.2\n' + str(len(df.index)) + '\t' + str(len(df.columns)) + '\n')
174 |         df.to_csv(file, sep='\t', mode='w+')
175 | 
176 | 
177 | def _is_url(url):
178 |     """
179 |     Used to determine if a given string represents a URL
180 |     """
181 |     regex = re.compile(
182 |         r'^(?:http|ftp)s?://'  # http:// or https://
183 |         r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain...
184 |         r'localhost|'  # localhost...
185 |         r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
186 |         r'(?::\d+)?'  # optional port
187 |         r'(?:/?|[/?]\S+)$', re.IGNORECASE)
188 |     if regex.match(url) is not None:
189 |         return True
190 |     else:
191 |         return False
192 | 
193 | 
194 | def _obtain_io(init_obj):
195 |     io_obj = None
196 | 
197 |     # Check to see if init_obj is a GPFile object from the GenePattern Python Client
198 |     if isinstance(init_obj, gp.GPFile):
199 |         io_obj = init_obj.open()
200 | 
201 |     # Check to see if init_obj is a file-like object
202 |     # Skip if a file-like object has already been obtained
203 |     if hasattr(init_obj, 'read') and io_obj is None:
204 |         io_obj = init_obj
205 | 
206 |     # Check to see if gct_obj is a string
207 |     # Skip if a file-like object has already been obtained
208 |     if isinstance(init_obj, str) and io_obj is None:
209 | 
210 |         # Check to see if the string contains multiple lines
211 |         # If it does, it is likely raw data
212 |         if '\n' in init_obj:
213 |             # Wrap the raw data in a StringIO (file-like object)
214 |             io_obj = io.StringIO(init_obj)
215 | 
216 |         # Check to see if the string contains a URL
217 |         # Skip if a file-like object has already been obtained
218 |         if _is_url(init_obj) and io_obj is None:
219 |             io_obj = urllib.request.urlopen(init_obj)
220 | 
221 |         # Otherwise try treating the string as a file path
222 |         # If this doesn't work throw an error, we don't know what to do with this string.
223 |         # Skip if a file-like object has already been obtained
224 |         if io_obj is None:
225 |             try:
226 |                 # Point gct_obj to file (read in the code below)
227 |                 io_obj = open(init_obj, 'r')
228 |             except IOError:
229 |                 raise IOError('Input string not determined to be raw data, URL or readable file.')
230 | 
231 |     # If we still don't have a file-like object at this point, throw an error
232 |     if io_obj is None:
233 |         raise TypeError('Unknown type passed to GCT() or ODF()')
234 | 
235 |     # Return the io_obj
236 |     return io_obj
237 | 
238 | 
239 | #########################
240 | # ODF Utility Functions #
241 | #########################
242 | 
243 | 
244 | def _header_dict_to_str(headers):
245 |     # Define the list of headers to handle as special cases
246 |     special = ['HeaderLines', 'COLUMN_NAMES', 'COLUMN_TYPES', 'Model', 'DataLines']
247 | 
248 |     # Add the initial ODF version line
249 |     combined = 'ODF 1.0\n'
250 | 
251 |     # Add HeaderLines
252 |     combined += 'HeaderLines=' + str(len(headers)) + '\n'
253 | 
254 |     # Add column names, if available
255 |     if 'COLUMN_NAMES' in headers:
256 |         combined += 'COLUMN_NAMES:' + str(headers['COLUMN_NAMES']) + '\n'
257 | 
258 |     # Add column types, if available
259 |     if 'COLUMN_TYPES' in headers:
260 |         combined += 'COLUMN_TYPES:' + str(headers['COLUMN_TYPES']) + '\n'
261 | 
262 |     # Add model, if available
263 |     if 'Model' in headers:
264 |         combined += 'Model=' + str(headers['Model']) + '\n'
265 | 
266 |     # Add remaining headers
267 |     for key, value in sorted(headers.items()):
268 |         if key not in special:
269 |             combined += str(key) + '=' + str(value) + '\n'
270 | 
271 |     # Add data lines, if available
272 |     if 'DataLines' in headers:
273 |         combined += 'DataLines=' + str(headers['DataLines']) + '\n'
274 | 
275 |     # Return the combined header string
276 |     return combined
277 | 
278 | 
279 | def _apply_odf_properties(df, headers, model):
280 |     """
281 |     Attach properties to the Dataframe to carry along ODF metadata
282 | 
283 |     :param df: The dataframe to be modified
284 |     :param headers: The ODF header lines
285 |     :param model: The ODF model type
286 |     """
287 |     df.headers = headers
288 |     df.model = model
289 | 
290 | 
291 | def _bytes_to_str(lines):
292 |     """
293 |     Convert all lines from byte string to unicode string, if necessary
294 |     """
295 |     if len(lines) >= 1 and hasattr(lines[0], 'decode'):
296 |         return [line.decode('utf-8') for line in lines]
297 |     else:
298 |         return lines
299 | 
300 | 
301 | def _extract_header_value(line):
302 |     """
303 |     Extracts a key / value pair from a header line in an ODF file
304 |     """
305 | 
306 |     # Skip blank lines, returning None
307 |     if not line:
308 |         return None
309 | 
310 |     # Attempt to split by equals sign
311 |     halves = line.split('=')
312 |     if len(halves) > 1:
313 |         key = halves[0].strip()
314 |         value = halves[1].strip()
315 |         return {key: value}
316 | 
317 |     # Otherwise, attempt to split by colon
318 |     else:
319 |         halves = line.split(':')
320 |         key = halves[0].strip()
321 |         value = halves[1].strip()
322 |         return {key: value}
323 | 
324 | 
325 | def _extract_column_names(headers):
326 |     """
327 |     Return an array containing the column names, extracted from the headers
328 |     """
329 |     if 'COLUMN_NAMES' in headers:
330 |         name_string = headers['COLUMN_NAMES']
331 |         return name_string.split('\t')
332 |     else:
333 |         return None
334 | 
335 | 
336 | def _extract_model(headers):
337 |     """
338 |     Return an array containing the column names, extracted from the headers
339 |     """
340 |     return headers['Model']
341 | 
342 | 
343 | def _extract_header_number(lines):
344 |     """
345 |     Extracts the number of header lines from the second line of the ODF file
346 |     """
347 |     pair = _extract_header_value(lines[1])
348 |     value_list = list(pair.values())
349 |     return int(value_list[0])
350 | 
351 | 
352 | def _parse_header(lines):
353 |     """
354 |     Parse the ODF header and return a dict of all key / value pairs
355 |     """
356 |     header_count = _extract_header_number(lines)
357 |     header_dict = {}
358 |     for i in range(2, header_count + 2):
359 |         pair = _extract_header_value(lines[i])
360 | 
361 |         if not pair:  # Ignore empty strings
362 |             continue
363 | 
364 |         header_dict.update(pair)
365 |     return header_dict
366 | 
367 | 
368 | def count_header_blanks(lines, count):
369 |     """
370 |     Count the number of blank lines in the header
371 |     """
372 |     blanks = 0
373 |     for i in range(2, count + 2):
374 |         pair = _extract_header_value(lines[i])
375 |         if not pair:
376 |             blanks += 1
377 |     return blanks
378 | 
379 | 
380 | def _join_data_lines(lines, skip):
381 |     """
382 |     Join all the data lines into a byte string
383 |     """
384 |     lines = list(map(str.strip, lines))
385 |     blank_lines = count_header_blanks(lines, skip)
386 |     body = lines[skip + blank_lines + 2:]
387 |     return '\n'.join(body)
388 | 


--------------------------------------------------------------------------------
/gp/modules.py:
--------------------------------------------------------------------------------
  1 | """
  2 | GenePattern Module Creator
  3 | 
  4 | Tools for converting Python scripts into GenePattern server modules
  5 | 
  6 | Compatible with Python 3.4+
  7 | """
  8 | import getpass
  9 | import json
 10 | import os
 11 | import pathlib
 12 | import socket
 13 | import string
 14 | import zipfile
 15 | from enum import Enum
 16 | from datetime import datetime
 17 | 
 18 | import re
 19 | 
 20 | from gp import GPServer
 21 | 
 22 | __authors__ = ['Thorin Tabor']
 23 | __version__ = '0.2.0'
 24 | __status__ = 'Alpha'
 25 | 
 26 | 
 27 | class StringEnum(str, Enum):
 28 |     """
 29 |     Enum where members are also (and must be) strings
 30 |     Necessary for JSON serialization of the Enums declared here
 31 |     """
 32 | 
 33 | 
 34 | class Privacy(StringEnum):
 35 |     PRIVATE = "private"
 36 |     PUBLIC = "public"
 37 | 
 38 | 
 39 | class Quality(StringEnum):
 40 |     DEVELOPMENT = "development"
 41 |     PREPRODUCTION = "preproduction"
 42 |     PRODUCTION = "production"
 43 | 
 44 | 
 45 | class OS(StringEnum):
 46 |     ANY = "any"
 47 |     LINUX = "linux"
 48 |     MAC = "mac"
 49 |     WINDOWS = "windows"
 50 | 
 51 | 
 52 | class CPU(StringEnum):
 53 |     ANY = "any"
 54 |     ALPHA = "alpha"
 55 |     INTEL = "intel"
 56 |     POWERPC = "powerpc"
 57 |     SPARC = "sparn"
 58 | 
 59 | 
 60 | MANIFEST_FILE_NAME = "manifest"
 61 | DEFAULT_LSID_AUTHORITY = 0
 62 | 
 63 | 
 64 | class GPTaskSpec:
 65 |     """
 66 |     Specification needed to create a new GenePattern module
 67 |     """
 68 | 
 69 |     def __init__(self, name=None, description="", version_comment="", author="", institution="",
 70 |                  categories=[], privacy=Privacy.PRIVATE, quality=Quality.DEVELOPMENT,
 71 |                  file_format=[], os=OS.ANY, cpu=CPU.ANY, language="Python",
 72 |                  user=None, support_files=[], documentation="", license="",
 73 |                  lsid=None, version=1, lsid_authority=DEFAULT_LSID_AUTHORITY, command_line=None, parameters=[]):
 74 | 
 75 |         self.name = name
 76 |         self.description = description
 77 |         self.version_comment = version_comment
 78 |         self.author = author
 79 |         self.institution = institution
 80 | 
 81 |         self.categories = categories
 82 |         self.privacy = privacy
 83 |         self.quality = quality
 84 | 
 85 |         self.file_format = file_format
 86 |         self.os = os
 87 |         self.cpu = cpu
 88 |         self.language = language
 89 | 
 90 |         self.user = user
 91 |         self.support_files = support_files
 92 |         self.documentation = documentation
 93 |         self.license = license
 94 | 
 95 |         # Use None for no LSID checking
 96 |         self.lsid_authority = lsid_authority if lsid_authority != DEFAULT_LSID_AUTHORITY else LSIDAuthority()
 97 |         self.version = version
 98 |         self.lsid = lsid if lsid else self._get_lsid()
 99 |         self.command_line = command_line
100 |         self.parameters = parameters
101 | 
102 |     def validate(self):
103 |         """
104 |         Perform some basic checks to help ensure that the specification is valid.
105 |         Throws an exception if an invalid value is found.
106 |         Returns true if all checks were passed.
107 |         :return: boolean
108 |         """
109 |         # Check all values for None
110 |         for attr in self.__dict__:
111 |             if self.__dict__[attr] is None:
112 |                 raise ValueError(attr + " is not set")
113 | 
114 |         # Validate name
115 |         invalid_chars = GPTaskSpec.invalid_chars()
116 |         if any(char in invalid_chars for char in self.name):
117 |             raise ValueError("module name includes invalid characters: " + self.name)
118 | 
119 |         # Validate LSID
120 |         self._valid_lsid()
121 | 
122 |         # Validate categories
123 |         if not self.all_strings(self.categories):
124 |             raise TypeError("categories contains non-string value: " + str(self.categories))
125 | 
126 |         # Validate file formats
127 |         if not self.all_strings(self.file_format):
128 |             raise TypeError("file_format contains non-string value: " + str(self.file_format))
129 | 
130 |         # Validate support files
131 |         if not self.all_strings(self.support_files):
132 |             raise TypeError("support_files contains non-string value: " + str(self.support_files))
133 | 
134 |         # Validate parameter list
135 |         if not self._all_params(self.parameters):
136 |             raise TypeError("parameters contains non-GPParamSpec value: " + str(self.parameters))
137 | 
138 |         # Validate individual parameters
139 |         for param in self.parameters:
140 |             param.validate()
141 | 
142 |         # Return that everything validates
143 |         return True
144 | 
145 |     def create_zip(self, clean=True, increment_version=True, register=True):
146 |         """
147 |         Creates a GenePattern module zip file for upload and installation on a GenePattern server
148 |         :param clean: boolean
149 |         :return:
150 |         """
151 |         # First validate the attributes
152 |         self.validate()
153 | 
154 |         # Check to see if an existing interferes with module creation
155 |         if os.path.exists(MANIFEST_FILE_NAME):
156 |             raise OSError("existing manifest blocks manifest file creation")
157 | 
158 |         # Write the manifest
159 |         self.write_manifest()
160 | 
161 |         # Create the zip
162 |         self._zip_files()
163 | 
164 |         # Increment the version of the module
165 |         if increment_version:
166 |             self.version += 1
167 | 
168 |         # Register the module with the LSID authority
169 |         if register and self.lsid_authority:
170 |             self.lsid_authority.register(self)
171 | 
172 |         # Clean up the manifest
173 |         if clean:
174 |             os.remove(MANIFEST_FILE_NAME)
175 | 
176 |     def _get_lsid(self):
177 |         """
178 |         Assigns the module an LSID from the LSID authority
179 |         :return:
180 |         """
181 |         # If no LSID authority, skip LSID assignment
182 |         if self.lsid_authority is None:
183 |             return
184 | 
185 |         # Otherwise assign the LSID
186 |         return self.lsid_authority.lsid()
187 | 
188 |     def _zip_files(self):
189 |         """
190 |         Adds the manifest and all support files to the zip file
191 |         :return:
192 |         """
193 |         # Create the zip file
194 |         zip = zipfile.ZipFile(self.name + '.zip', 'w', zipfile.ZIP_DEFLATED)
195 | 
196 |         # Add the manifest file to the zip
197 |         zip.write(MANIFEST_FILE_NAME)
198 | 
199 |         # Add the support files to the zip
200 |         for file in self.support_files:
201 |             zip.write(file)
202 | 
203 |         # Close the zip file
204 |         zip.close()
205 | 
206 |     def write_manifest(self, module_directory=""):
207 |         """
208 |         Write a GenePattern manifest file for the module
209 |         :param module_directory: optionally write to subdirectory
210 |         :return:
211 |         """
212 |         # First validate the spec
213 |         self.validate()
214 | 
215 |         # Open the manifest file for writing
216 |         manifest_file = open(os.path.join(module_directory, "manifest"), "w")
217 | 
218 |         # Write the header
219 |         manifest_file.write("# " + self.name + "\n")
220 |         manifest_file.write("# " + str(datetime.now()) + "\n")
221 |         manifest_file.write("# Generated by Python Module Creator v" + __version__ + "\n")
222 | 
223 |         # Write initial attributes
224 |         manifest_file.write("JVMLevel=\n")
225 |         manifest_file.write("LSID=" + self.manifest_escape(self.lsid) + ':' + str(self.version) + "\n")
226 |         manifest_file.write("author=" + self._author_line() + "\n")
227 |         manifest_file.write("categories=" + ';'.join(self.categories) + "\n")
228 |         manifest_file.write("commandLine=" + self.command_line + "\n")
229 |         manifest_file.write("cpuType=" + str(self.cpu.value) + "\n")
230 |         manifest_file.write("description=" + self.description + "\n")
231 |         manifest_file.write("fileFormat=" + ';'.join(self.file_format) + "\n")
232 |         manifest_file.write("language=" + self.language + "\n")
233 |         manifest_file.write("license=" + self.license + "\n")
234 |         manifest_file.write("name=" + self.name + "\n")
235 |         manifest_file.write("os=" + str(self.os.value) + "\n")
236 | 
237 |         # Write parameter attributes
238 |         for index, param in enumerate(self.parameters):
239 |             manifest_file.write(param.manifest_repr(index+1))
240 | 
241 |         # Write footer attributes
242 |         manifest_file.write("privacy=" + str(self.privacy.value) + "\n")
243 |         manifest_file.write("publicationDate=" + self._publication_date() + "\n")
244 |         manifest_file.write("quality=" + str(self.quality.value) + "\n")
245 |         manifest_file.write("taskDoc=" + self.documentation + "\n")
246 |         manifest_file.write("taskType=" + self._task_type() + "\n")
247 |         manifest_file.write("userid=" + self.user + "\n")
248 |         manifest_file.write("version=" + self.version_comment + "\n")
249 | 
250 |         # Close the file
251 |         manifest_file.close()
252 | 
253 |     def _author_line(self):
254 |         """
255 |         Helper method to concatenate author and institution values, if necessary
256 |         :return: string
257 |         """
258 |         if self.author and self.institution:
259 |             return self.author + ";" + self.institution
260 |         elif self.author:
261 |             return self.author
262 |         else:
263 |             return self.institution
264 | 
265 |     @staticmethod
266 |     def _publication_date():
267 |         """
268 |         Helper method to return the publication date in the expected format
269 |         :return: string
270 |         """
271 |         return datetime.now().strftime("%m/%d/%Y %H\:%M")
272 | 
273 |     def _task_type(self):
274 |         """
275 |         Helper method for extracting taskType from the categories list
276 |         :return:
277 |         """
278 |         if self.categories:
279 |             return self.categories[0]
280 |         else:
281 |             return ""
282 | 
283 |     @staticmethod
284 |     def manifest_escape(string):
285 |         """
286 |         Escape colon and equals characters for inclusion in manifest file
287 |         :param string:
288 |         :return: string
289 |         """
290 |         return string.replace(':', '\:').replace('=', '\=')
291 | 
292 |     @staticmethod
293 |     def all_strings(arr):
294 |         """
295 |         Ensures that the argument is a list that either is empty or contains only strings
296 |         :param arr: list
297 |         :return:
298 |         """
299 |         if not isinstance([], list):
300 |             raise TypeError("non-list value found where list is expected")
301 |         return all(isinstance(x, str) for x in arr)
302 | 
303 |     @staticmethod
304 |     def _all_params(arr):
305 |         """
306 |         Ensures that the argument is a list that either is empty or contains only GPParamSpec's
307 |         :param arr: list
308 |         :return:
309 |         """
310 |         if not isinstance([], list):
311 |             raise TypeError("non-list value found for parameters")
312 |         return all(isinstance(x, GPParamSpec) for x in arr)
313 | 
314 |     def _valid_lsid(self):
315 |         """
316 |         Performs some basic (non-comprehensive) LSID validation
317 |         :return:
318 |         """
319 |         if not isinstance(self.lsid, str):
320 |             raise TypeError("lsid is not a string, string expected: " + str(self.lsid))
321 | 
322 |         if self.lsid.count(':') != 4:
323 |             raise ValueError("lsid contains incorrect number of colons, 4 expected: " + str(self.lsid))
324 | 
325 |         if self.lsid.split(':')[0].lower() != 'urn':
326 |             raise ValueError("lsid does not begin with urn: " + str(self.lsid))
327 | 
328 |         # If an LSID authority is specified, check with the authority
329 |         if self.lsid_authority:
330 |             if not self.lsid_authority.validate(self.lsid, check_existing=False):
331 |                 raise ValueError("lsid does not the authority: " + str(self.lsid))
332 | 
333 |     @staticmethod
334 |     def invalid_chars():
335 |         """
336 |         Returns a set of characters which are not valid in module or parameter names
337 |         :return:
338 |         """
339 |         return set(string.punctuation.replace("_", "").replace(".", "") + string.whitespace)
340 | 
341 | 
342 | class Type(StringEnum):
343 |     FILE = "FILE"
344 |     TEXT = "TEXT"
345 |     INTEGER = "Integer"
346 |     FLOATING_POINT = "Floating Point"
347 |     DIRECTORY = "DIRECTORY"
348 |     PASSWORD = "PASSWORD"
349 | 
350 | 
351 | class JavaType(StringEnum):
352 |     FILE = "java.io.File"
353 |     TEXT = "java.lang.String"
354 |     INTEGER = "java.lang.Integer"
355 |     FLOATING_POINT = "java.lang.Float"
356 |     DIRECTORY = "DIRECTORY"
357 |     PASSWORD = "PASSWORD"
358 | 
359 | 
360 | class Optional(StringEnum):
361 |     REQUIRED = ""
362 |     OPTIONAL = "on"
363 | 
364 | 
365 | class GPParamSpec:
366 |     """
367 |     Specification needed to create a parameter for a new GenePattern module
368 |     """
369 |     def __init__(self, name=None, description="", optional=Optional.REQUIRED,
370 |                  type=Type.TEXT, choices={}, value="", default_value="",
371 |                  file_format=[], min_values=0, max_values=1,
372 |                  flag="", prefix_when_specified=False):
373 | 
374 |         self.name = name
375 |         self.description = description
376 |         self.optional = optional
377 | 
378 |         self.type = type
379 |         self.choices = choices
380 |         self.value = value
381 |         self.default_value = default_value
382 | 
383 |         self.file_format = file_format
384 |         self.min_values = min_values
385 |         self.max_values = max_values
386 | 
387 |         self.flag = flag
388 |         self.prefix_when_specified = prefix_when_specified
389 | 
390 |     def validate(self):
391 |         # Check all values for None, only max_values is allowed to be None
392 |         for attr in self.__dict__:
393 |             if self.__dict__[attr] is None and attr != "max_values":
394 |                 raise ValueError(attr + " is not set")
395 | 
396 |         # Validate name
397 |         invalid_chars = GPTaskSpec.invalid_chars()
398 |         if any(char in invalid_chars for char in self.name):
399 |             raise ValueError("parameter name includes invalid characters: " + self.name)
400 | 
401 |         # Validate min_values
402 |         if not isinstance(self.min_values, int):
403 |             raise ValueError("min_values not an int in: " + self.name)
404 | 
405 |         # Validate max_values
406 |         if not isinstance(self.max_values, int) and self.max_values is not None and self.max_values != float("inf"):
407 |             raise ValueError("max_values not an int, None or infinity in: " + self.name)
408 | 
409 |         # Validate file formats
410 |         if not GPTaskSpec.all_strings(self.file_format):
411 |             raise TypeError("file_format contains non-string value in parameter: " + self.name)
412 | 
413 |         # Validate choices dict
414 |         if not isinstance(self.choices, dict):
415 |             raise TypeError("choices is not dict in parameter: " + self.name)
416 | 
417 |         # Return that everything validates
418 |         return True
419 | 
420 |     def manifest_repr(self, p_num):
421 |         """
422 |         Builds a manifest string representation of the parameters and returns it
423 |         :param p_num: int
424 |         :return: string
425 |         """
426 |         # Build the parameter prefix
427 |         prefix = "p" + str(p_num) + "_"
428 | 
429 |         # Generate the manifest string
430 |         manifest = prefix + "MODE=" + ("IN" if self.type == Type.FILE else "") + "\n"
431 |         manifest += prefix + "TYPE=" + str(self.type.value) + "\n"
432 |         if self.type == Type.FILE and len(self.choices) > 0:
433 |             manifest += prefix + "choices=" + self._choices() + "\n"
434 |         manifest += prefix + "default_value=" + self.default_value + "\n"
435 |         manifest += prefix + "description=" + GPTaskSpec.manifest_escape(self.description) + "\n"
436 |         manifest += prefix + "fileFormat=" + ';'.join(self.file_format) + "\n"
437 |         manifest += prefix + "flag=" + self.flag + "\n"
438 |         manifest += prefix + "name=" + self.name + "\n"
439 |         manifest += prefix + "numValues=" + self._num_values() + "\n"
440 |         manifest += prefix + "optional=" + str(self.optional.value) + "\n"
441 |         manifest += prefix + "prefix=" + (self.flag if self.prefix_when_specified else "") + "\n"
442 |         manifest += prefix + "prefix_when_specified=" + (self.flag if self.prefix_when_specified else "") + "\n"
443 |         manifest += prefix + "type=" + self._java_type() + "\n"
444 |         manifest += prefix + "value=" + (self._choices() if self.type != Type.FILE and len(self.choices) > 0 else "") + "\n"
445 | 
446 |         # Return the manifest string
447 |         return manifest
448 | 
449 |     def _choices(self):
450 |         """
451 |         Generate a string of choices as key/value pairs
452 |         :return: string
453 |         """
454 |         # Generate key/value strings
455 |         pairs = []
456 |         for key, value in self.choices.items():
457 |             pairs.append(str(value) + "=" + str(key))
458 | 
459 |         # Assemble into overall string and escape
460 |         return GPTaskSpec.manifest_escape(";".join(pairs))
461 | 
462 |     def _num_values(self):
463 |         """
464 |         Generate a valid num_values string based off min_values and max_values
465 |         :return: string
466 |         """
467 |         # Add min_values to string
468 |         num_values = str(self.min_values) if self.min_values else "0"
469 | 
470 |         # Handle infinite max_values or finite max_values
471 |         if self.max_values is None or self.max_values == float("inf"):
472 |             num_values += "+"
473 |         else:
474 |             num_values += ".." + str(self.max_values)
475 | 
476 |         # Return the num_values string
477 |         return num_values
478 | 
479 |     def _java_type(self):
480 |         """
481 |         Translates GenePattern type string to Java type string
482 |         :return: string
483 |         """
484 |         return JavaType[self.type.name].value
485 | 
486 | 
487 | class LSIDAuthority:
488 |     """
489 |     Class representing a Life Science Identifier (LSID) authority used to assign an LSID to the
490 |     GenePattern modules produced by this package.
491 |     """
492 |     authority = None
493 |     base_lsid = None
494 |     module_count = None
495 |     registered_modules = None
496 | 
497 |     def __init__(self, authority=None):
498 |         """
499 |         Initializes an LSID authority. Looks for an LSID authority file. If no such file is found,
500 |         a file will be created with an LSID based off the machine's hostname or IP address.
501 | 
502 |         LSID authority standard location:
503 |             ~/.genepattern/lsid_authority.json
504 | 
505 |         :param authority: Must be a file path to the LSID authority file, or a gpserver object (not implemented).
506 |         """
507 | 
508 |         # Handle default authority file locations
509 |         if authority is None:
510 |             # Check for LSID authority file in user directory
511 |             user_dir = str(pathlib.Path.home())
512 |             gp_dir = os.path.join(user_dir, '.genepattern')
513 |             default_authority_file = os.path.join(gp_dir, 'lsid_authority.json')
514 |             if os.path.isfile(default_authority_file):
515 |                 # Authority file found, assign path
516 |                 authority = default_authority_file
517 |             else:
518 |                 # No authority file found, lazily create file
519 |                 self._create_authority_file(default_authority_file)
520 |                 authority = default_authority_file
521 | 
522 |         # Handle a GenePattern server as the LSID authority
523 |         if type(authority) == GPServer:
524 |             raise NotImplementedError("Support for GenePattern server as a remote LSID authority is not implemented.")
525 | 
526 |         # Handle a string file path as an LSID authority
527 |         if type(authority) == str:
528 |             if os.path.isfile(authority):
529 |                 if os.access(authority, os.R_OK) and os.access(authority, os.W_OK):
530 |                     try:
531 |                         # Load the authority file
532 |                         self.authority = authority
533 |                         self._load_lsid_authority()
534 |                     except Exception as e:
535 |                         raise RuntimeError("Unable to read authority file due to: " + str(e))
536 |                 else:
537 |                     raise RuntimeError("Missing permissions on provided LSID authority file")
538 |             else:
539 |                 raise RuntimeError("Provided LSID authority isn't a file")
540 | 
541 |     @staticmethod
542 |     def _generate_namespace():
543 |         """
544 |         Generate an LSID namespace based off Jupyter user or system user
545 |         :return: string
546 |         """
547 |         raw_namespace = None
548 | 
549 |         # Get the Jupyter user, if available
550 |         try:
551 |             raw_namespace = os.environ['JPY_USER']
552 |         except KeyError:
553 |             pass
554 | 
555 |         # Otherwise get the current user
556 |         if raw_namespace is None or raw_namespace == '':
557 |             raw_namespace = getpass.getuser()
558 | 
559 |         # Remove illegal characters and return
560 |         return re.sub(r'[^\w.-]', '-', raw_namespace)
561 | 
562 |     @staticmethod
563 |     def _generate_domain():
564 |         """
565 |         Generate an LSID domain based off a setting file or the hostname
566 |         :return: string
567 |         """
568 | 
569 |         # Check for LSID domain setting file
570 |         try:
571 |             user_dir = str(pathlib.Path.home())
572 |             jupyter_dir = os.path.join(user_dir, '.jupyter')
573 |             domain_path = os.path.join(jupyter_dir, 'lsid_domain')
574 |             with open(domain_path, 'r') as domain_file:
575 |                 domain = str(domain_file.read()).strip()
576 |             if domain is not None and domain != '':
577 |                 return domain
578 |         except:
579 |             # Ignore exceptions, simply fall back to the domain name
580 |             pass
581 | 
582 |         # If this fails, return the fully qualified domain name
583 |         return socket.getfqdn()
584 | 
585 |     def _generate_base_lsid(self):
586 |         """
587 |         Generates and returns a base LSID
588 |         :return:
589 |         """
590 |         domain = self._generate_domain()
591 |         namespace = self._generate_namespace()
592 | 
593 |         # Return the base LSID
594 |         return "urn:lsid:" + domain + ":" + namespace
595 | 
596 |     def _create_blank_authority(self):
597 |         """
598 |         Returns a dictionary structure representing a blank LSID authority file
599 |         :return: dict
600 |         """
601 |         return {
602 |             'base_lsid': self._generate_base_lsid(),
603 |             'module_count': 0,
604 |             'registered_modules': {},
605 |         }
606 | 
607 |     def _create_authority_file(self, file_path):
608 |         """
609 |         Create a new LSID authority file at the indicated location
610 |         :param file_path: location of LSID authority file
611 |         """
612 |         parent_dir = os.path.dirname(os.path.realpath(file_path))
613 | 
614 |         # Create the parent directory if it does not exist
615 |         if not os.path.exists(parent_dir):
616 |             os.makedirs(parent_dir)
617 | 
618 |         # Create blank LSID authority structure
619 |         blank = self._create_blank_authority()
620 | 
621 |         # Write blank structure to new authority file
622 |         with open(file_path, 'w+') as authority_file:
623 |             json.dump(blank, authority_file, sort_keys=True, indent=4, separators=(',', ': '))
624 | 
625 |     def _load_lsid_authority(self):
626 |         """
627 |         Load (or reload) the LSID authority file and set class attributes
628 |         """
629 |         authority_file = open(self.authority, 'r')
630 |         authority_json = json.load(authority_file)
631 |         authority_file.close()
632 |         self.base_lsid = authority_json['base_lsid']
633 |         self.module_count = int(authority_json['module_count'])
634 |         self.registered_modules = authority_json['registered_modules']
635 | 
636 |     def _next_lsid_number(self):
637 |         """
638 |         Return a string representing the next module number for this LSID authority
639 |         :return:
640 |         """
641 |         if self.module_count is None:
642 |             raise Exception("Module count in LSID authority not initialized")
643 | 
644 |         return str(self.module_count+1).zfill(4)
645 | 
646 |     def _assemble_lsid(self, module_number):
647 |         """
648 |         Return an assembled LSID based off the provided module number and the authority's base LSID.
649 |         Note: Never includes the module's version number.
650 |         :param module_number:
651 |         :return: string
652 |         """
653 |         if self.base_lsid is None:
654 |             raise Exception("Base LSID in LSID authority not initialized")
655 | 
656 |         return self.base_lsid + ":" + str(module_number)
657 | 
658 |     def lsid(self):
659 |         """
660 |         Acquire a new LSID assigned by the LSID authority
661 |         :return: string - assigned LSID
662 |         """
663 |         return self._assemble_lsid(self._next_lsid_number())
664 | 
665 |     def register(self, task_spec):
666 |         """
667 |         Registers a module specification with the LSID authority.
668 |         Validates that it possesses an LSID assigned by the authority.
669 |         Raises an exception if registration wasn't successful.
670 |         :param task_spec:
671 |         :return: boolean - True if registration was successful
672 |         """
673 |         if self.validate(task_spec.lsid):
674 |             # Add the module name to the map
675 |             self.registered_modules[task_spec.lsid] = task_spec.name
676 | 
677 |             # Increment module count
678 |             self.module_count += 1
679 | 
680 |             # Write the updated LSID authority file and reload
681 |             with open(self.authority, 'w') as authority_file:
682 |                 json.dump({
683 |                     'base_lsid': self.base_lsid,
684 |                     'module_count': self.module_count,
685 |                     'registered_modules': self.registered_modules,
686 |                 }, authority_file, sort_keys=True, indent=4, separators=(',', ': '))
687 |             self._load_lsid_authority()
688 |         else:
689 |             raise RuntimeError("Module LSID id not valid: " + str(task_spec.lsid))
690 | 
691 |         return True
692 | 
693 |     def validate(self, lsid, check_existing=True):
694 |         """
695 |         Validates an LSID with the LSID authority.
696 |         :param lsid:
697 |         :return: boolean - is the LSID valid with this authority?
698 |         """
699 |         # Base LSID matches
700 |         if not lsid.startswith(self.base_lsid):
701 |             return False
702 | 
703 |         # Module number isn't already taken
704 |         if check_existing and lsid in self.registered_modules:
705 |             return False
706 | 
707 |         # Everything checks out, return True
708 |         return True
709 | 
710 |     def lookup(self, lsid):
711 |         """
712 |         Look up the name of a module by LSID assigned by the authority.
713 |         Returns None if the LSID is not found.
714 |         :param lsid:
715 |         :return: string or none
716 |         """
717 |         if self.registered_modules is None or lsid not in self.registered_modules:
718 |             return None
719 |         else:
720 |             return self.registered_modules[lsid]
721 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | # Read version and other metadata from file
 5 | __version__ = '24.01'
 6 | 
 7 | with open('README.md') as f:
 8 |     long_description = f.read()
 9 | 
10 | setup(
11 |     name='genepattern-python',
12 |     packages=['gp'],
13 |     version=__version__,
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     description='Library for programmatically interacting with GenePattern from Python.',
17 |     author='Thorin Tabor',
18 |     author_email='tmtabor@cloud.ucsd.edu',
19 |     url='https://github.com/genepattern/genepattern-python',
20 |     download_url='https://github.com/genepattern/genepattern-python/archive/' + __version__ + '.tar.gz',
21 |     keywords=['genepattern', 'genomics', 'bioinformatics'],
22 |     license='BSD'
23 | )
24 | 


--------------------------------------------------------------------------------
/test/test_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for loading GCT and ODF files into pandas dataframes
  3 | """
  4 | import pytest
  5 | 
  6 | import gp
  7 | import gp.data
  8 | import urllib.request
  9 | 
 10 | 
 11 | @pytest.fixture(scope="session", autouse=True)
 12 | def before_tests(request):
 13 |     # Download files for local use
 14 |     urllib.request.urlretrieve("https://datasets.genepattern.org/data/all_aml/all_aml_test.gct", "all_aml_test.gct")
 15 |     urllib.request.urlretrieve("https://datasets.genepattern.org/data/all_aml/all_aml_test.preprocessed.comp.marker.odf",
 16 |                                "all_aml_test.preprocessed.comp.marker.odf")
 17 | 
 18 |     # Clean up after ourselves
 19 |     request.addfinalizer(after_tests)
 20 | 
 21 | 
 22 | def test_gct_load_gpfile():
 23 |     gpfile = gp.GPFile(gp.GPServer('http://genepattern.broadinstitute.org/gp', '', ''),
 24 |                        'https://datasets.genepattern.org/data/all_aml/all_aml_test.gct')
 25 |     gct = gp.data.GCT(gpfile)
 26 |     gct_asserts(gct)
 27 | 
 28 | 
 29 | def test_gct_load_file():
 30 |     file = open('all_aml_test.gct', 'r')
 31 |     gct = gp.data.GCT(file)
 32 |     gct_asserts(gct)
 33 | 
 34 | 
 35 | def test_gct_load_url():
 36 |     gct = gp.data.GCT('https://datasets.genepattern.org/data/all_aml/all_aml_test.gct')
 37 |     gct_asserts(gct)
 38 | 
 39 | 
 40 | def test_gct_load_path():
 41 |     gct = gp.data.GCT('all_aml_test.gct')
 42 |     gct_asserts(gct)
 43 | 
 44 | 
 45 | def test_gct_load_string():
 46 |     with open('all_aml_test.gct', 'r') as file:
 47 |         file_str = file.read()
 48 |     gct = gp.data.GCT(file_str)
 49 |     gct_asserts(gct)
 50 | 
 51 | 
 52 | def test_odf_load_gpfile():
 53 |     gpfile = gp.GPFile(gp.GPServer('http://genepattern.broadinstitute.org/gp', '', ''),
 54 |                        'https://datasets.genepattern.org/data/all_aml/all_aml_test.preprocessed.comp.marker.odf')
 55 |     odf = gp.data.ODF(gpfile)
 56 |     odf_asserts(odf)
 57 | 
 58 | 
 59 | def test_odf_load_file():
 60 |     file = open('all_aml_test.preprocessed.comp.marker.odf', 'r')
 61 |     odf = gp.data.ODF(file)
 62 |     odf_asserts(odf)
 63 | 
 64 | 
 65 | def test_odf_load_url():
 66 |     odf = gp.data.ODF('https://datasets.genepattern.org/data/all_aml/all_aml_test.preprocessed.comp.marker.odf')
 67 |     odf_asserts(odf)
 68 | 
 69 | 
 70 | def test_odf_load_path():
 71 |     odf = gp.data.ODF('all_aml_test.preprocessed.comp.marker.odf')
 72 |     odf_asserts(odf)
 73 | 
 74 | 
 75 | def test_odf_load_string():
 76 |     with open('all_aml_test.preprocessed.comp.marker.odf', 'r') as file:
 77 |         file_str = file.read()
 78 |     odf = gp.data.ODF(file_str)
 79 |     odf_asserts(odf)
 80 | 
 81 | 
 82 | def after_tests():
 83 |     pass
 84 | 
 85 | 
 86 | #####################
 87 | # Utility functions #
 88 | #####################
 89 | 
 90 | 
 91 | def gct_asserts(odf):
 92 |     assert odf.row_count()
 93 |     assert odf.col_count()
 94 | 
 95 | 
 96 | def odf_asserts(odf):
 97 |     assert odf.model is not None
 98 |     assert odf.headers is not None
 99 |     assert odf.row_count()
100 |     assert odf.col_count()
101 | 


--------------------------------------------------------------------------------