├── .gitignore
├── How to test scripts.txt
├── LICENSE.txt
├── README.md
├── appserver
    └── static
    │   └── .placeholder
├── bin
    ├── GithubAPI.py
    ├── SplunkAPI.py
    ├── app_home.sh
    ├── fetch_git_repo_data.sh
    ├── fetch_github_data.py
    ├── fork_counter.py
    ├── generate_multi_repo_view.sh
    ├── git_fetch_repos.sh
    ├── git_repo_messages.sh
    ├── git_source_code.sh
    ├── multi_repositories_row.txt
    ├── print_splunk_user_and_password.py
    ├── shell_variables.sh
    └── splunkgit_settings.py
├── default
    ├── app.conf
    ├── data
    │   └── ui
    │   │   ├── nav
    │   │       └── default.xml
    │   │   └── views
    │   │       ├── git_author_page.xml
    │   │       ├── git_file_page.xml
    │   │       ├── git_filetype_page.xml
    │   │       ├── git_repo_page.xml
    │   │       ├── github_issues.xml
    │   │       └── github_repo.xml
    ├── indexes.conf
    ├── inputs.conf
    ├── macros.conf
    ├── props.conf
    ├── savedsearches.conf
    └── splunkgit.conf
├── lib
    └── joblib
    │   ├── __init__.py
    │   ├── disk.py
    │   ├── format_stack.py
    │   ├── func_inspect.py
    │   ├── hashing.py
    │   ├── logger.py
    │   ├── memory.py
    │   ├── my_exceptions.py
    │   ├── numpy_pickle.py
    │   ├── parallel.py
    │   ├── test
    │       ├── __init__.py
    │       ├── common.py
    │       ├── test_disk.py
    │       ├── test_format_stack.py
    │       ├── test_func_inspect.py
    │       ├── test_hashing.py
    │       ├── test_logger.py
    │       ├── test_memory.py
    │       ├── test_my_exceptions.py
    │       ├── test_numpy_pickle.py
    │       ├── test_parallel.py
    │       └── tmp.py
    │   └── testing.py
├── local
    └── splunkgit.conf
└── metadata
    └── default.meta


/.gitignore:
--------------------------------------------------------------------------------
 1 | # OSX noise
 2 | .DS_Store
 3 | 
 4 | # Compiled python files
 5 | *.pyc
 6 | 
 7 | # Eclipse project files
 8 | .project
 9 | .pydevproject
10 | 
11 | # Local stuff
12 | local/
13 | local.meta
14 | 
15 | # Git repositories folder
16 | git-repositories/
17 | 


--------------------------------------------------------------------------------
/How to test scripts.txt:
--------------------------------------------------------------------------------
 1 | This is how you can manually test our scripted inputs.
 2 | 
 3 | Prerequisites:
 4 | 1. Make sure 'splunk' is in your $PATH.
 5 | 2. Make sure you've read and understood our README.md
 6 | 
 7 | Test the scripts by opening up a terminal and locating the 'bin' folder of this app, which could be located at:
 8 | $SPLUNK_HOME/etc/apps/splunkgit/bin
 9 | 
10 | To test our git repository data type:
11 |   splunk cmd ./fetch_git_repo_data.sh
12 | 
13 | To test the github data type:
14 |   splunk cmd python fetch_github_data.py
15 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright 2011 Splunk Inc
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Splunk > Splunkgit
  2 | ==================
  3 | 
  4 | This application needs an awesome software called [Splunk](http://www.splunk.com/).
  5 | 
  6 | Currently the application is tested and the documentation written for Mac OS X 10.7
  7 | 
  8 | Stuff in this repository are mentioned on a four part blog series.
  9 | 
 10 | - [part 1](http://blogs.splunk.com/2011/11/9/splunkgit-part-1)
 11 | - [part 2](http://blogs.splunk.com/2011/11/9/splunkgit-part-2)
 12 | - [part 3](http://blogs.splunk.com/2011/11/17/splunkgit-part-3)
 13 | - [part 4](http://blogs.splunk.com/2011/11/18/splunkgit-part-4)
 14 | 
 15 | Released v1.3.0.1 (12/19/2012)
 16 | - Edited README.md!!
 17 | 
 18 | Released v1.3! (12/19/2012, because of it's been a long time)
 19 | - Fixes problem with grep -P, where -P is not supported by all grep versions.
 20 | - Fixes git author page for Splunk 5.x
 21 | - Some extractions of code, to hopefully make somethings easier to understand.
 22 | - Splunking commit messages (No dashboards with this data yet)
 23 | - Can splunk any source files (Non are enabled and there are no dashboards yet)
 24 | 
 25 | Released v1.2.1! (02/15/2012)
 26 | - All repositories are now non-flash, aka mobile supported.
 27 | - Increased the days shown in multiple repositories from last 2 weeks to last 30 days.
 28 | 
 29 | Released v1.2! (01/16/2012)
 30 | - You can now watch multiple repositories in semi real-time!
 31 | - Less configuration
 32 | - Faster updating scripts
 33 | 
 34 | Getting started
 35 | ---------------
 36 | 
 37 | ### Installing splunk
 38 | 
 39 | #### Generic instructions
 40 | 
 41 | - [Download Splunk for your platform](http://www.splunk.com/download?r=productOverview).
 42 | - Unpack/Install Splunk by running the downloaded files.
 43 | - Follow the instructions on screen
 44 | - When done continue to *Installing Splunkgit App* section
 45 | 
 46 | `<SPLUNK_ROOT>` will from now on refer to the direcotry where you've extracted splunk.
 47 | 
 48 | ### Installing splunkgit
 49 | 
 50 | - Make sure splunk is not running
 51 | - Open Terminal app
 52 | - Goto `<SPLUNK_ROOT>`: `cd <SPLUNK_ROOT>`
 53 | - Goto app directory: `cd etc/apps`
 54 | - Download the app: `git clone git://github.com/splunk/splunk-app-splunkgit.git`  
 55 |   You can also download a released version from the [tags](./splunk-app-splunkgit/tags) page.
 56 | 
 57 | ### Configuration
 58 | 
 59 | - Make sure splunk is not running
 60 | - Open Terminal app
 61 | - Goto `<SPLUNK_ROOT>/etc/apps/splunk-app-aplunkgit`
 62 | - Edit local/splunkgit.conf with a text editor (`open -e local/splunkgit.conf`) and assign the following values:
 63 |     - `repo_addresses=` The addresses to the repos, use the read-only address. Ex: `git://github.com/splunk/splunk-app-splunkgit.git`. You can have one or multiple repositories, space separated
 64 |     - `user=` Splunk user login so our scripts can search in Splunk
 65 |     - `password=` Splunk password for the user
 66 | 
 67 | #### Configurating multiple repositories in semi real-time
 68 | - Edit local/splunkgit.conf and assign `repo_addresses=` with multiple repositories by separating the repositories with a space. Ex: `repo_address=git://github.com/splunk/splunk-app-splunkgit.git git://github.com/splunk/splunk-sdk-java.git git://github.com/splunk/splunk-sdk-python.git`
 69 | 
 70 | - Copy default/inputs.conf to the local directory
 71 | - Set the interval value of the fetch_git_repo_data.sh script to a low value. Ex: 20
 72 | The git repositores will now be updated each 20 seconds. The views in multiple repositories dashboard will be updated whenever there's more data.
 73 | 
 74 | ### Changing repository
 75 | 
 76 | - Make sure splunk is not running
 77 | - Run the following command to wipe all app data from splunk:
 78 | 
 79 |         splunk clean eventdata -f -index splunkgit
 80 | 
 81 | - Change the splunkgit.conf file, as described in *Configuration* section, to point to the new repo.
 82 | 
 83 | ### Starting and stopping Splunk
 84 | 
 85 | - Open Terminal
 86 | - Goto `<SPLUNK_ROOT>`: `cd <SPLUNK_ROOT>`
 87 | - Start splunk `bin/splunk start`
 88 |     - On you web browser goto `http://localhost:8000`
 89 |     - If asked enter your name and user name (default value is **admin:changeme**)
 90 |     - If you change the password, you also need the change the configuration file to match this.
 91 | - Stop splunk: `bin/splunk stop`
 92 | 
 93 | Third party libraries
 94 | ---------------------
 95 | 
 96 | - [httplib2](http://code.google.com/p/httplib2/ "httplib2")
 97 | - [joblib](http://code.google.com/p/httplib2/ "joblib")
 98 | 
 99 | Known issues
100 | ------------
101 | 
102 | - If you clone this repository, install the app and start up Splunk without configurating your own splunkgit.conf (as explained in *Changing repository*) splunk will get git repository data from this repositories .git directory.
103 | 
104 | License
105 | -------
106 | 
107 |     Copyright 2012 Splunk, Inc.
108 |     
109 |     Licensed under the Apache License, Version 2.0 (the "License");
110 |     you may not use this file except in compliance with the License.
111 |     You may obtain a copy of the License at
112 |     
113 |     http://www.apache.org/licenses/LICENSE-2.0
114 |     
115 |     Unless required by applicable law or agreed to in writing, software
116 |     distributed under the License is distributed on an "AS IS" BASIS,
117 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
118 |     See the License for the specific language governing permissions and
119 |     limitations under the License.
120 | 


--------------------------------------------------------------------------------
/appserver/static/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/splunk/splunk-app-splunkgit/11658bd1000464e09064a2d7cc6937fa1af75f2b/appserver/static/.placeholder


--------------------------------------------------------------------------------
/bin/GithubAPI.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Splunk, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import json
 16 | import httplib2
 17 | import re
 18 | 
 19 | import inspect
 20 | 
 21 | class GithubAPI(object):
 22 |     '''
 23 |     Author: Emre Berge Ergenekon
 24 |     Contains functions for making some of the Github v3 API requests.
 25 |     '''     
 26 |     _base_url = ''
 27 |     
 28 |     def __init__(self, user_name, repo_name):
 29 |         self._base_url = 'https://api.github.com/repos/{0}/{1}'.format(user_name, repo_name)
 30 |     
 31 |     def watchers(self):
 32 |         return self.make_request('watchers')
 33 |     
 34 |     def open_issues(self):
 35 |         return self._issues('open')
 36 |     
 37 |     def closed_issues(self):
 38 |         return self._issues('closed')
 39 |     
 40 |     def _issues(self, state='open'):
 41 |         return self.make_request('issues?state={0}'.format(state))
 42 | 
 43 |     def issues_since(self, since):
 44 |         request_issues_since = 'issues?since={0}'.format(since)
 45 |         return self._open_issues(request_issues_since) + self._closed_issues(request_issues_since)
 46 | 
 47 |     def _open_issues(self, request_prefix):
 48 |         return self.make_request(request_prefix + '&state=open')
 49 | 
 50 |     def _closed_issues(self, request_prefix):
 51 |         return self.make_request(request_prefix + '&state=closed')
 52 |     
 53 |     def forks(self):
 54 |         return self.make_request('forks')
 55 |     
 56 |     def repo(self):
 57 |         request = self.make_request('')
 58 |         if request:
 59 |             return request[0]
 60 |         else:
 61 |             return None
 62 | 
 63 |     def make_request(self, partial_url):
 64 |         http = self._get_http()
 65 |         next_url = self._create_full_api_url(partial_url)
 66 |         last_url = ''
 67 |         all_responses = []
 68 |         while True :
 69 |             response, content = http.request(next_url)
 70 |             if response.status != 200: break
 71 |             response_as_json = json.loads(content)
 72 |             if type(response_as_json) is dict: response_as_json = [response_as_json]
 73 |             all_responses[len(all_responses):] = response_as_json
 74 |             if next_url == last_url or not self._has_link_header(response): break
 75 |             next_url = self._link_header_value_for_reletion(response, 'next')
 76 |             last_url = self._link_header_value_for_reletion(response, 'last')
 77 |         return all_responses
 78 |     
 79 |     def _get_http(self):
 80 |         #disable ssl_certificate_validation because of an issue with python 2.7.2 and httplib2 0.7.x
 81 |         #link: http://code.google.com/p/httplib2/issues/detail?id=154
 82 |         if 'disable_ssl_certificate_validation' in inspect.getargspec(httplib2.Http.__init__)[0]:
 83 |             return httplib2.Http(disable_ssl_certificate_validation=True)
 84 |         else:
 85 |             return httplib2.Http()
 86 |         
 87 |     def _create_full_api_url(self, partial_url):
 88 |         if partial_url == '':
 89 |             return '{base_url}?per_page=100'.format(base_url=self._base_url)
 90 |         else:
 91 |             return '{base_url}/{partial_url}{parameter_separator}per_page=100'.format(base_url=self._base_url, partial_url=partial_url, parameter_separator=('&' if '?' in partial_url else '?'))
 92 |     
 93 |     def _link_header_value_for_reletion(self, response, rel):
 94 |         linkHeader = response['link']
 95 |         pattern = '(?<=\<)[^>]+?(?=\>; rel="{0}")'.format(rel)
 96 |         value = re.search(pattern, linkHeader).group(0)
 97 |         return value
 98 |     
 99 |     def _has_link_header(self, response):
100 |         return 'link' in response
101 |     
102 | ''' The code here should be rewritten to build and return a list of the forks instead of just counting them '''
103 | '''
104 | import os
105 | import sys
106 | 
107 | #import from 3rd party lib
108 | LIB_PATH = os.path.abspath(os.path.join(__file__, '..', '..', 'lib'))
109 | sys.path.insert(0, LIB_PATH)
110 | from joblib import Parallel, delayed
111 | 
112 | 
113 | def get_total_fork_count(forks, run_as_single_job=False):
114 |     total_number_of_forks = 1
115 |     total_number_of_forks += _count_forks_in_all_depths(forks, run_as_single_job)
116 |     return total_number_of_forks
117 | 
118 | def _count_forks_in_all_depths(forks, run_as_single_job=False):
119 |     if run_as_single_job:
120 |         return _count_forks_in_all_depths_on_single_thread(forks)
121 |     else:
122 |         return _count_forks_in_all_depths_on_multiple_threads(forks)
123 | 
124 | def _count_forks_in_all_depths_on_single_thread(forks):
125 |     total_number_of_forks = 0
126 |     for fork in forks:
127 |         total_number_of_forks += _get_total_fork_count_inner_loop(fork)
128 |     return total_number_of_forks
129 | 
130 | def _count_forks_in_all_depths_on_multiple_threads(forks):
131 |     fork_counts = Parallel(n_jobs=16)(delayed(_get_total_fork_count_inner_loop)(forks[i]) for i in range(len(forks)))
132 |     return sum(fork_counts)
133 | 
134 | def _get_total_fork_count_inner_loop(fork):
135 |     if fork['private'] == True:
136 |         return 0
137 |     else:
138 |         return _get_total_fork_count_for_public_fork(fork)
139 | 
140 | def _get_total_fork_count_for_public_fork(fork):
141 |     github_user = fork['owner']['login']
142 |     github_fork = fork['name']
143 |     github_api = _get_github_api_for(github_user, github_fork)
144 |     return get_total_fork_count(github_api.forks(), True)
145 |     
146 | def _get_github_api_for(github_user, github_repo):
147 |     return GithubAPI(github_user, github_repo)
148 | '''


--------------------------------------------------------------------------------
/bin/SplunkAPI.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | '''
17 | Created on Oct 31, 2011
18 | 
19 | @author: Petter Eriksson
20 | '''
21 | 
22 | import splunk.auth
23 | import splunk.search as search
24 | from GithubAPI import GithubAPI
25 | import time
26 | 
27 | class SplunkAPI(object):
28 | 
29 |     def __init__(self, username, password):
30 |         splunk.auth.getSessionKey(username, password)
31 |     
32 |     def time_of_last_updated_issue(self, repository):
33 |         last_updated_issue_search = self._search_for_last_updated_issue(repository)
34 |         return self._get_update_time_from_search(last_updated_issue_search)
35 |         
36 |     def _search_for_last_updated_issue(self, repository):
37 |         search_string = 'search index=splunkgit sourcetype="github_data" repository=%s github_issue_update_time=* | sort -str(github_issue_update_time) | head 1' % repository
38 |         issue_search = search.dispatch(search_string)
39 |         while not issue_search.isDone:
40 |             time.sleep(0.5) #for a while
41 |         return issue_search
42 |     
43 |     def _get_update_time_from_search(self, search):
44 |         if len(search) is 0:
45 |             return None
46 |         else: 
47 |             return self._get_update_time_from_head_of_search(search)
48 |     
49 |     def _get_update_time_from_head_of_search(self, search):
50 |         return search.events[0]['github_issue_update_time']
51 | 


--------------------------------------------------------------------------------
/bin/app_home.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2011 Splunk, Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | script_dir=$(dirname $0)
17 | app_dir="$script_dir/.."
18 | absoulte_app_dir=`cd $app_dir; pwd`
19 | echo $absoulte_app_dir
20 | 


--------------------------------------------------------------------------------
/bin/fetch_git_repo_data.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2011 Splunk, Inc.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | # 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Script creates splunk events with each file change for all commits, with insertions and deletions.
 18 | # Here's an example with some of the contents of these events: 
 19 | # [10-12-23 12:34:56] commit=123adf32fa21 repository=repo path=src/clj/core.clj insertions=3 deletions=1
 20 | # Author: Petter Eriksson, Emre Berge Ergenekon
 21 | 
 22 | set -e
 23 | set -u
 24 | 
 25 | SCRIPT_HOME=$(dirname $0)
 26 | source $SCRIPT_HOME/shell_variables.sh
 27 | 
 28 | #Initializing
 29 | GIT_REPO=
 30 | GIT_REPO_FOLDER=
 31 | GIT_REPOS_HOME=
 32 | chosen_repository=
 33 | 
 34 | main ()
 35 | {
 36 |   $SCRIPT_HOME/git_fetch_repos.sh
 37 | for repository in `$SPLUNK cmd python $SCRIPT_HOME/splunkgit_settings.py`
 38 | do
 39 |   GIT_REPO=$repository
 40 |   GIT_REPO_FOLDER=`echo $GIT_REPO | sed 's/.*\///'`
 41 |   GIT_REPOS_HOME=$APP_HOME/git-repositories
 42 |   chosen_repository=$GIT_REPOS_HOME/$GIT_REPO_FOLDER
 43 | 
 44 |   if [ "$GIT_REPO" = "" ]; then
 45 |     echo "Could not find configured git repository. Have you configured splunkgit.conf? Read README.md for more information." 1>&2
 46 |   else
 47 |     if [ -d "$chosen_repository" ]; then
 48 |       print_hashes_and_git_log_numstat
 49 |     else
 50 |       echo "repository does not exist!" 1>&2
 51 |     fi
 52 |   fi
 53 | done
 54 | }
 55 | 
 56 | print_hashes_and_git_log_numstat ()
 57 | {
 58 |   cd $chosen_repository
 59 |   git fetch 1>&2
 60 | 
 61 | # Find the last indexed commit.
 62 | # If there are no indexed commits, get the first commit of the repository.
 63 |   SINCE_COMMIT=""
 64 | 
 65 |   HAS_INDEXED_COMMITS=`$SPLUNK search "index=splunkgit repository=$GIT_REPO sourcetype=git_file_change | head 1 | stats count" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '[0-9]+'`
 66 |   if [ "$HAS_INDEXED_COMMITS" = "0" ]; then
 67 |     FIRST_COMMIT=`git log --all --no-color --no-renames --no-merges --reverse --pretty=format:'%H' | head -n 1`
 68 |     SINCE_COMMIT=$FIRST_COMMIT
 69 |   else
 70 |     LATEST_INDEXED_COMMIT=`$SPLUNK search "index=splunkgit repository=$GIT_REPO sourcetype="git_file_change" | sort 1 - _time | table commit_hash" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '^\w+'`
 71 |     SINCE_COMMIT=$LATEST_INDEXED_COMMIT
 72 |   fi
 73 | 
 74 | # Get the time of the commit we are logging since.
 75 | # Note: We're getting the time, so we can specify the --since flag to git log.
 76 | #       Otherwise, we can get commits that were made earlier than we would have wanted.
 77 | UNIX_TIME_OF_SINCE_COMMIT=`git log $SINCE_COMMIT -n 1 --pretty=format:'%ct'`
 78 | 
 79 | #For each commit in the repository do:
 80 | #if commit doesn't have edited lines, just print 'time, author_name, author_mail, commit...'
 81 | #else
 82 | #for each file change in commit do:
 83 | #print commit info in front of every file change.
 84 |   git log --pretty=format:'[%ci] author_name="%an" author_mail="%ae" commit_hash="%H" parent_hash="%P" tree_hash="%T"' --numstat --all --no-color --no-renames --no-merges --since=$UNIX_TIME_OF_SINCE_COMMIT $SINCE_COMMIT.. |
 85 |     sed '/^$/d' |
 86 |     awk -F '\t' -v FIRST_LINE=1 -v REPO="$GIT_REPO" -v RECENT_COMMIT=0 '{
 87 |       IS_COMMIT = match($0, /^\[/);
 88 |       if (IS_COMMIT) {
 89 |         if (RECENT_COMMIT==1) {
 90 |           print COMMIT_INFO
 91 |         }
 92 |         RECENT_COMMIT=1;
 93 |         COMMIT_INFO=$0
 94 |       } else {
 95 |         RECENT_COMMIT=0;
 96 |         print COMMIT_INFO" insertions=\""$1"\" deletions=\""$2"\" path=\""$3"\" file_type=\"---/"$3"---\" repository=\""REPO"\""
 97 |       }
 98 |     }' |
 99 |     perl -pe 's|---.*/(.+?)---|---\.\1---|' |
100 |     perl -pe 's|---.*\.(.+?)---|\1|'
101 | }
102 | 
103 | main
104 | 


--------------------------------------------------------------------------------
/bin/fetch_github_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | '''
16 | This script polls usefull github data from github
17 | Author: Emre Berge Ergenekon, Petter Eriksson
18 | '''
19 | 
20 | #Import from std lib
21 | from time import localtime, strftime
22 | 
23 | 
24 | #import own classes
25 | from SplunkAPI import SplunkAPI
26 | from GithubAPI import GithubAPI
27 | import splunkgit_settings
28 | 
29 | def fetch_single_github_repo_data(github_user, github_repo, github_repo_address):
30 |     github_api = GithubAPI(github_user, github_repo)
31 |     time_stamp = strftime("%Y-%m-%d %H:%M:%S %z", localtime())    
32 | 
33 |     repo = github_api.repo()
34 |     if repo is not None:
35 |         print '[{0}] github_watcher_count={1}'.format(time_stamp, repo['watchers'])
36 |         print '[{0}] github_forks_count={1}'.format(time_stamp, repo['forks'])
37 | 
38 |     splunk_api = SplunkAPI(splunkgit_settings.splunk_user_name(), splunkgit_settings.splunk_password())
39 |     since = splunk_api.time_of_last_updated_issue(github_repo_address)
40 |     if since is None:
41 |         since = '1900-01-01T00:00:01Z'
42 |     all_issues =  github_api.issues_since(since)
43 |     for issue in all_issues :
44 |         print u'[{0}] github_issue_number={1} github_issue_state="{2}" github_issue_comment_count={3} github_issue_reporter="{4}" github_issue_title="{5}" github_issue_close_time="{6}" github_issue_update_time="{7}" github_issue_creation_time="{8}" repository="{9}"'.format(time_stamp, issue['number'], issue['state'], issue['comments'], issue['user']['login'], issue['title'], issue['closed_at'], issue['updated_at'], issue['created_at'], github_repo_address)
45 | 
46 | if __name__ == '__main__':
47 |     for github_repository in splunkgit_settings.github_repositories():
48 |         github_user = github_repository.get_user()
49 |         github_repo = github_repository.get_repo()
50 |         github_repo_address = github_repository.get_repo_address()
51 |         fetch_single_github_repo_data(github_user, github_repo, github_repo_address)
52 | 


--------------------------------------------------------------------------------
/bin/fork_counter.py:
--------------------------------------------------------------------------------
  1 | Y# Copyright 2011 Splunk, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | '''
 16 | Created on Nov 7, 2011
 17 | Counts the number of forks given a queue of forks with no other third-party dependencies other than our own.
 18 | You can get the fork count of a github_repo like so:
 19 |     ForkCounter.new_with_github_api(github_api).count_forks()
 20 | @author: periksson
 21 | '''
 22 | 
 23 | import threading
 24 | import Queue
 25 | from GithubAPI import GithubAPI
 26 | 
 27 | NUMBER_OF_WORKERS = 16
 28 | 
 29 | class ForkCounter(object):
 30 |     
 31 |     def __init__(self, fork_queue, fork_counter_workers, initial_count=0):
 32 |         self._fork_queue = fork_queue
 33 |         self._fork_counter_workers = fork_counter_workers
 34 |         self._fork_count = initial_count
 35 |         
 36 |     def count_forks(self):
 37 |         self._start_workers()
 38 |         self._wait_fork_workers_to_finish()
 39 |         self._gather_workers_counts()
 40 |         return self._fork_count
 41 | 
 42 |     def _start_workers(self):
 43 |         for worker in self._fork_counter_workers:
 44 |             worker.start()
 45 |             
 46 |     def _wait_fork_workers_to_finish(self):
 47 |         self._fork_queue.join()
 48 |             
 49 |     def _gather_workers_counts(self):
 50 |         for worker in self._fork_counter_workers:
 51 |             self._fork_count += worker.get_count()
 52 |             
 53 |     @classmethod
 54 |     def new_with_github_api(cls, github_api):
 55 |         forks = github_api.forks()
 56 |         fork_queue = ForkCounter._new_fork_queue_with_forks(forks)
 57 |         workers = ForkCounter._create_fork_counter_workers(fork_queue)
 58 |         count_first_fork = 1
 59 |         return ForkCounter(fork_queue, workers, count_first_fork)
 60 |     
 61 |     @classmethod
 62 |     def _new_fork_queue_with_forks(cls, forks):
 63 |         fork_queue = Queue.Queue()
 64 |         for fork in forks:
 65 |             fork_queue.put(fork)
 66 |         return fork_queue
 67 | 
 68 |     @classmethod
 69 |     def _create_fork_counter_workers(cls, fork_queue):
 70 |         n_workers = NUMBER_OF_WORKERS
 71 |         return ForkCounter._create_a_number_of_fork_counter_workers(fork_queue, n_workers)
 72 | 
 73 |     @classmethod
 74 |     def _create_a_number_of_fork_counter_workers(cls, fork_queue, n_workers):
 75 |         workers = []
 76 |         for _ in range(n_workers):
 77 |             workers.append(ForkCounterWorker.new_with_fork_queue(fork_queue))
 78 |         return workers
 79 |     
 80 | class ForkCounterWorker(threading.Thread):
 81 |     
 82 |     def __init__(self, queue):
 83 |         threading.Thread.__init__(self)
 84 |         self._queue = queue
 85 |         self._count = 0
 86 |         
 87 |     def run(self):
 88 |         while not self._queue.empty():
 89 |             fork = self._queue.get()
 90 |             self._count += self._count_forks_in_all_depths_on_single_thread([fork])
 91 |             self._queue.task_done()
 92 |     
 93 |     def _count_forks_in_all_depths_on_single_thread(self, forks):
 94 |         total_number_of_forks = 0
 95 |         for fork in forks:
 96 |             total_number_of_forks += self._get_total_fork_count_inner_loop(fork)
 97 |         return total_number_of_forks
 98 |     
 99 |     def _get_total_fork_count_inner_loop(self, fork):
100 |         if fork['private'] == True:
101 |             return 0
102 |         else:
103 |             return self._get_total_fork_count_for_public_fork(fork)
104 | 
105 |     def _get_total_fork_count_for_public_fork(self, fork):
106 |         github_user = fork['owner']['login']
107 |         github_fork = fork['name']
108 |         github_api = GithubAPI(github_user, github_fork)
109 |         return 1 + self._count_forks_in_all_depths_on_single_thread(github_api.forks())
110 | 
111 |     def get_count(self):
112 |         return self._count
113 |     
114 |     @classmethod
115 |     def new_with_fork_queue(cls, fork_queue):
116 |         new_worker = ForkCounterWorker(fork_queue)
117 |         new_worker.setDaemon(True)
118 |         return new_worker
119 | 


--------------------------------------------------------------------------------
/bin/generate_multi_repo_view.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2011 Splunk, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Script for generating a view that views all repositories that are configured in splunkgit.conf
18 | # Author: Petter Eriksson
19 | 
20 | #Global variables
21 | SCRIPT_HOME=$(dirname $0)
22 | SPLUNK=$SPLUNK_HOME/bin/splunk
23 | APP_HOME=`$SPLUNK cmd ./$SCRIPT_HOME/app_home.sh`
24 | 
25 | #XML writing for a view that views all repositories
26 | xml_dir=$APP_HOME/local/data/ui/views
27 | xml_file=$xml_dir/multi_repositories.xml
28 | 
29 | main ()
30 | {
31 |   setup_xml
32 |   for repository in `$SPLUNK cmd python $SCRIPT_HOME/splunkgit_settings.py`
33 |   do
34 |     write_xml $repository
35 |   done
36 |   end_xml
37 |   reload_views_for_splunkgit
38 | }
39 | 
40 | setup_xml () {
41 |   # Create xml file
42 |   mkdir -p $xml_dir
43 |   echo "<?xml version='1.0' encoding='utf-8'?>" > $xml_file
44 |   echo "<dashboard>" >> $xml_file
45 |   echo "  <label>Repositories</label>" >> $xml_file
46 | }
47 | 
48 | # Write multi_repositories_row.txt and replace ---REPOSITORY--- with $1, which should be a repository
49 | write_xml () {
50 |   repository=$1
51 |   repository_simple_name=`echo $repository | sed 's/.*\///' | sed 's,\.git,,'`
52 |   cat $APP_HOME/bin/multi_repositories_row.txt | sed "s,---REPOSITORY---,$repository," | sed "s,---REPOSITORY_SIMPLE_NAME---,$repository_simple_name,"  >> $xml_file
53 | }
54 | 
55 | 
56 | end_xml () {
57 |   echo "</dashboard>" >> $xml_file
58 | }
59 | 
60 | reload_views_for_splunkgit () {
61 |   # Splunk variables
62 |   username_password_script="$SPLUNK cmd python $SCRIPT_HOME/print_splunk_user_and_password.py"
63 |   SPLUNK_USERNAME=`$username_password_script | cut -d ':' -f 1`
64 |   SPLUNK_PASSWORD=`$username_password_script | cut -d ':' -f 2`
65 |   SPLUNKD_PORT=`$SPLUNK show splunkd-port | egrep -o '\d+?$'`
66 | 
67 |   # Reload views for $APP_NAME (splunkgit)
68 |   curl -s -u $SPLUNK_USERNAME:$SPLUNK_PASSWORD -k https://localhost:$SPLUNKD_PORT/servicesNS/nobody/$APP_NAME/data/ui/views/_reload > /dev/null
69 | }
70 | 
71 | # Run script
72 | main
73 | 


--------------------------------------------------------------------------------
/bin/git_fetch_repos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2011 Splunk, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Script pulls the repositories configured in local/splunkgit.conf
18 | # Author: Petter Eriksson
19 | 
20 | SCRIPT_HOME=$(dirname $0)
21 | source $SCRIPT_HOME/shell_variables.sh 
22 | 
23 | #Initializing
24 | GIT_REPO=
25 | GIT_REPO_FOLDER=
26 | GIT_REPOS_HOME=
27 | chosen_repository=
28 | 
29 | main ()
30 | {
31 | for repository in `$SPLUNK cmd python $SCRIPT_HOME/splunkgit_settings.py`
32 | do
33 |   GIT_REPO=$repository
34 |   GIT_REPO_FOLDER=`echo $GIT_REPO | sed 's/.*\///'`
35 |   GIT_REPOS_HOME=$APP_HOME/git-repositories
36 |   chosen_repository=$GIT_REPOS_HOME/$GIT_REPO_FOLDER
37 | 
38 |   if [ "$GIT_REPO" = "" ]; then
39 |     echo "Could not find configured git repository. Have you configured splunkgit.conf? Read README.md for more information." 1>&2
40 |   else
41 |     if [ ! -d "$chosen_repository" ]; then
42 |       echo "repository does not exist!" 1>&2
43 |       fetch_git_repository
44 |     fi
45 |   fi
46 | done
47 | }
48 | 
49 | fetch_git_repository ()
50 | {
51 |   echo "fetching git repo data for repository: $repository" 1>&2
52 |   error_output=err.out
53 |   mkdir -p $GIT_REPOS_HOME
54 |   git clone --mirror $GIT_REPO $chosen_repository 1>&2
55 |   git_exit_code=$?
56 |   if [[ $git_exit_code != 0 ]]; then
57 |     echo "Unable to clone repository: $GIT_REPO" 1>&2
58 |   fi
59 | }
60 | 
61 | main
62 | 


--------------------------------------------------------------------------------
/bin/git_repo_messages.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2011 Splunk, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Prints the commit message event for each commit, including time, commit_hash and repository.
18 | # Author: Petter Eriksson 
19 | 
20 | SCRIPT_HOME=$(dirname $0)
21 | source $SCRIPT_HOME/shell_variables.sh
22 | 
23 | #Initializing
24 | GIT_REPO=
25 | GIT_REPO_FOLDER=
26 | GIT_REPOS_HOME=
27 | chosen_repository=
28 | 
29 | main ()
30 | {
31 | $SCRIPT_HOME/git_fetch_repos.sh
32 | for repository in `$SPLUNK cmd python $SCRIPT_HOME/splunkgit_settings.py`
33 | do
34 |   GIT_REPO=$repository
35 |   GIT_REPO_FOLDER=`echo $GIT_REPO | sed 's/.*\///'`
36 |   GIT_REPOS_HOME=$APP_HOME/git-repositories
37 |   chosen_repository=$GIT_REPOS_HOME/$GIT_REPO_FOLDER
38 | 
39 |   if [ "$GIT_REPO" = "" ]; then
40 |     echo "Could not find configured git repository. Have you configured splunkgit.conf? Read README.md for more information." 1>&2
41 |   else
42 |     if [ -d "$chosen_repository" ]; then
43 |       print_commit_message_event
44 |     else
45 |       echo "repository does not exist!" 1>&2
46 |     fi
47 |   fi
48 | done
49 | }
50 | 
51 | print_commit_message_event ()
52 | {
53 |   cd $chosen_repository
54 |   git fetch 1>&2
55 | 
56 | # Find the last indexed commit.
57 | # If there are no indexed commits, get the first commit of the repository.
58 |   SINCE_COMMIT=""
59 | 
60 |   commit_messages_search="index=splunkgit repository=$GIT_REPO sourcetype=git_commit_messages | head 1 | stats count"
61 | 
62 |   HAS_INDEXED_COMMITS=`$SPLUNK search "$commit_messages_search" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '[0-9]+'`
63 |   if [ "$HAS_INDEXED_COMMITS" = "0" ]; then
64 |     FIRST_COMMIT=`git log --all --no-color --no-renames --no-merges --reverse --pretty=format:'%H' | head -n 1`
65 |     SINCE_COMMIT=$FIRST_COMMIT
66 |   else
67 |     LATEST_INDEXED_COMMIT=`$SPLUNK search "index=splunkgit repository=$GIT_REPO sourcetype=git_commit_messages | sort 1 - _time | table commit_hash" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '^\w+'`
68 |     SINCE_COMMIT=$LATEST_INDEXED_COMMIT
69 |   fi
70 | 
71 | # Get the time of the commit we are logging since.
72 | # Note: We're getting the time, so we can specify the --since flag to git log.
73 | #       Otherwise, we can get commits that were made earlier than we would have wanted.
74 | UNIX_TIME_OF_SINCE_COMMIT=`git log $SINCE_COMMIT -n 1 --pretty=format:'%ct'`
75 | 
76 | # Print repository, commit and commit messages for each commit, since the last indexed commit.
77 |   git log --pretty=format:"repository=\"$GIT_REPO\" [%ci] commit_hash=%H message=\"%B\"" --all --no-color --no-renames --no-merges --since=$UNIX_TIME_OF_SINCE_COMMIT $SINCE_COMMIT.. |
78 |    sed '/^$/d'
79 | }
80 | 
81 | main
82 | 


--------------------------------------------------------------------------------
/bin/git_source_code.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2011 Splunk, Inc.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | # 
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Prints the source code for all the changed files in each commit.
 18 | # Takes one argument ($1), which is the file pattern that should be printed to stdout.
 19 | # For example: '\.xml$' would only print the xml files that has been changed in the repository.
 20 | # This script is disabled in the default/inputs.conf by default.
 21 | # Author: Petter Eriksson
 22 | 
 23 | SCRIPT_HOME=$(dirname $0)
 24 | source $SCRIPT_HOME/shell_variables.sh
 25 | 
 26 | # file pattern goes into the first variable.
 27 | file_pattern=$1
 28 | 
 29 | #Initializing
 30 | GIT_REPO=
 31 | GIT_REPO_FOLDER=
 32 | GIT_REPOS_HOME=
 33 | chosen_repository=
 34 | 
 35 | main ()
 36 | {
 37 | $SCRIPT_HOME/git_fetch_repos.sh
 38 | for repository in `$SPLUNK cmd python $SCRIPT_HOME/splunkgit_settings.py`
 39 | do
 40 |   GIT_REPO=$repository
 41 |   GIT_REPO_FOLDER=`echo $GIT_REPO | sed 's/.*\///'`
 42 |   GIT_REPOS_HOME=$APP_HOME/git-repositories
 43 |   chosen_repository=$GIT_REPOS_HOME/$GIT_REPO_FOLDER
 44 |   
 45 |   if [ "$GIT_REPO" = "" ]; then
 46 |     echo "Could not find configured git repository. Have you configured splunkgit.conf? Read README.md for more information." 1>&2
 47 |   else
 48 |     if [ -d "$chosen_repository" ]; then
 49 |       print_source_code
 50 |     else
 51 |       echo "repository does not exist!" 1>&2
 52 |     fi
 53 |   fi
 54 | done
 55 | }
 56 | 
 57 | print_source_code ()
 58 | {
 59 |   cd $chosen_repository
 60 |   git fetch 1>&2
 61 | 
 62 | # Clone the repository with files instead of just bare/mirror
 63 |   cd ..
 64 |   repo_with_files=$chosen_repository-with-files
 65 |   if [ ! -d "$repo_with_files" ]; then
 66 |     git clone $chosen_repository $repo_with_files 1>&2
 67 |   fi
 68 |   cd $repo_with_files
 69 |   git reset --hard master 1>&2
 70 |   git pull $chosen_repository master 1>&2
 71 | 
 72 | # Find the last indexed commit.
 73 | # If there are no indexed commits, get the first commit of the repository.
 74 |   SINCE_COMMIT=""
 75 | 
 76 |   commit_messages_search="index=splunkgit repository=$GIT_REPO sourcetype=git_source_code | head 1 | stats count"
 77 | 
 78 |   HAS_INDEXED_COMMITS=`$SPLUNK search "$commit_messages_search" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '[0-9]+'`
 79 |   if [ "$HAS_INDEXED_COMMITS" = "0" ]; then
 80 |     FIRST_COMMIT=`git log --all --no-color --no-renames --no-merges --reverse --pretty=format:'%H' | head -n 1`
 81 |     SINCE_COMMIT=$FIRST_COMMIT
 82 |   else
 83 |     LATEST_INDEXED_COMMIT=`$SPLUNK search "index=splunkgit repository=$GIT_REPO sourcetype=git_source_code | sort 1 - _time | table commit_hash" -auth $SPLUNK_USERNAME:$SPLUNK_PASSWORD -app $APP_NAME | egrep -o '^\w+'`
 84 |     SINCE_COMMIT=$LATEST_INDEXED_COMMIT
 85 |   fi
 86 | 
 87 | # Get the time of the commit we are logging since.
 88 | # Note: We're getting the time, so we can specify the --since flag to git log.
 89 | #       Otherwise, we can get commits that were made earlier than we would have wanted.
 90 |   UNIX_TIME_OF_SINCE_COMMIT=`git log $SINCE_COMMIT -n 1 --pretty=format:'%ct'`
 91 | 
 92 | 
 93 | # For each commit, checkout the commit and print all the changed files matching the file pattern in $1.
 94 |   for commit in `git rev-list --all --no-color --no-renames --no-merges --reverse --since=$UNIX_TIME_OF_SINCE_COMMIT $SINCE_COMMIT..`; do
 95 |     # debug: echo "working commit: $commit" 1>&2
 96 |     git checkout $commit 1>&2 2> /dev/null
 97 |     for file in `git show $commit --pretty=format:"" --numstat |
 98 |       sed '/^$/d' |
 99 |       awk -F '\t' '{ print $3 }' |
100 |       sed 's/ /\\ /g' |
101 |       egrep "$file_pattern"`; do # Catch the configured file pattern. 
102 |         echo "commit_hash=$commit repository=$GIT_REPO file=\"$file\""
103 |         # debug: echo "commit_hash=$commit repository=$GIT_REPO file=\"$file\"" 1>&2
104 |         cat "$file"
105 |     done
106 |   done
107 | }
108 | 
109 | main
110 | 


--------------------------------------------------------------------------------
/bin/multi_repositories_row.txt:
--------------------------------------------------------------------------------
 1 | <row>
 2 | <chart>
 3 | <searchString>index=splunkgit source="git_repo" repository=---REPOSITORY--- | dedup commit_hash | timechart count(commit_hash) by author_name | streamstats sum(*) as *</searchString>
 4 | <title>Commits all time: ---REPOSITORY_SIMPLE_NAME---</title>
 5 | <option name="charting.axisTitleX.text">Time</option>
 6 | <option name="charting.axisTitleY.text">Number of commits</option>
 7 | <option name="charting.chart">area</option>
 8 | <option name="charting.chart.stackMode">stacked</option>
 9 | <option name="height">300px</option>
10 | </chart>
11 | <chart>
12 | <searchString>index=splunkgit source="git_repo" repository=---REPOSITORY--- | dedup commit_hash | timechart count(commit_hash) by author_name | streamstats sum(*) as *</searchString>
13 | <title>Commmits last 30 days: ---REPOSITORY_SIMPLE_NAME---</title>
14 | <earliestTime>rt-30d</earliestTime>
15 | <latestTime>rt</latestTime>
16 | <option name="charting.axisTitleX.text">Time</option>
17 | <option name="charting.axisTitleY.text">Number of commits</option>
18 | <option name="charting.chart">area</option>
19 | <option name="charting.chart.stackMode">stacked</option>
20 | <option name="height">300px</option>
21 | </chart>
22 | </row>
23 | 


--------------------------------------------------------------------------------
/bin/print_splunk_user_and_password.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | '''
16 | Prints the username and password configured in splunkgit.conf.
17 | Author: Emre Berge Ergenekon, Petter Eriksson
18 | '''
19 | 
20 | import splunkgit_settings
21 | 
22 | SEPARATOR = ":"
23 | 
24 | if __name__ == '__main__':
25 |     print splunkgit_settings.splunk_user_name() + SEPARATOR + splunkgit_settings.splunk_password()
26 | 


--------------------------------------------------------------------------------
/bin/shell_variables.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #Global variables
 3 | SPLUNK=$SPLUNK_HOME/bin/splunk
 4 | SCRIPT_HOME=$(dirname $0)
 5 | APP_HOME=`$SPLUNK cmd ./$SCRIPT_HOME/app_home.sh`
 6 | APP_NAME=`echo $APP_HOME | sed 's/.*\///'`
 7 | 
 8 | # Splunk variables
 9 | username_password_script="$SPLUNK cmd python $SCRIPT_HOME/print_splunk_user_and_password.py"
10 | SPLUNK_USERNAME=`$username_password_script | cut -d ':' -f 1`
11 | SPLUNK_PASSWORD=`$username_password_script | cut -d ':' -f 2`
12 | 
13 | 


--------------------------------------------------------------------------------
/bin/splunkgit_settings.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import splunk.clilib.cli_common
16 | import re
17 | 
18 | '''
19 | Functions for retriveing settigs from splunkgit conf file.
20 | Author: Emre Berge Ergenekon, Petter Eriksson
21 | '''
22 | SPLUNKGIT_GIT_SETTINGS = splunk.clilib.cli_common.getConfStanza('splunkgit','git')
23 | SPLUNK_SETTINGS = splunk.clilib.cli_common.getConfStanza('splunkgit','splunk')
24 | 
25 | class GithubRepository(object):
26 | 
27 |     def __init__(self, repo_address, user, repo):
28 |         self._repo_address = repo_address
29 |         self._user = user
30 |         self._repo = repo
31 | 
32 |     def get_repo_address(self):
33 |         return self._repo_address
34 | 
35 |     def get_user(self):
36 |         return self._user
37 | 
38 |     def get_repo(self):
39 |         return self._repo
40 | 
41 |     @classmethod
42 |     def new_from_repo_address(cls, repo_address):
43 |         user = GithubRepository.get_user_from_repo_address(repo_address)
44 |         repo = GithubRepository.get_repo_from_repo_address(repo_address, user)
45 |         if user is None or repo is None:
46 |             return None
47 |         return GithubRepository(repo_address, user, repo)
48 | 
49 |     @classmethod
50 |     def get_user_from_repo_address(cls, repo_address):
51 |         user_match = re.search('(?<=github\.com.)(.*)(?=/)', repo_address) # match anything after github.com until /
52 |         if user_match is not None:
53 |                 return user_match.group(0)
54 |         else:
55 |             return None
56 | 
57 |     @classmethod
58 |     def get_repo_from_repo_address(cls, repo_address, user):
59 |         repo_match = re.search("(?<=%s/)(.*)(?=\.git)" % user, repo_address) # match <something>.git after user/
60 |         if repo_match is not None:
61 |             return repo_match.group(0)
62 |         else:
63 |             return None
64 | 
65 | def git_repo_addresses():
66 |     return SPLUNKGIT_GIT_SETTINGS['repo_addresses']
67 | 
68 | def splunk_user_name():
69 |     return SPLUNK_SETTINGS['user']
70 | 
71 | def splunk_password():
72 |     return SPLUNK_SETTINGS['password']
73 | 
74 | def github_repositories():
75 |     space_separated_repo_addresses = git_repo_addresses()
76 |     repo_addresses = space_separated_repo_addresses.split(' ')
77 |     return github_repos_from_repo_addresses(repo_addresses)
78 | 
79 | def github_repos_from_repo_addresses(repo_addresses):
80 |     github_repos = []
81 |     for repo_address in repo_addresses:
82 |         github_repo = GithubRepository.new_from_repo_address(repo_address)
83 |         if github_repo is not None:
84 |             github_repos.append(github_repo)
85 |     return github_repos
86 | 
87 | if __name__ == '__main__':
88 |     print git_repo_addresses()
89 | 


--------------------------------------------------------------------------------
/default/app.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [ui]
16 | is_visible=true
17 | label=Splunkgit
18 | 
19 | [launcher]
20 | author=Emre Berge Ergenekon & Petter Eriksson
21 | description=App for git and github repositories. Get an overview over your repositories. Git just got Splunked!
22 | version=1.3.0.2
23 | 
24 | [package]
25 | id=splunkgit
26 | 


--------------------------------------------------------------------------------
/default/data/ui/nav/default.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <nav>
19 |     <view name="flashtimeline" default="true"/>
20 |     <collection label="Github">
21 |         <view source="all" match="github_"/>
22 |     </collection>
23 |     <collection label="Git">
24 |         <view source="all" match="git_"/>
25 |     </collection>
26 |     <collection label="Multi-Git">
27 |         <view source="all" match="multi_"/>
28 |     </collection>
29 | </nav>


--------------------------------------------------------------------------------
/default/data/ui/views/git_author_page.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <form>
19 |  <label>Authors</label>
20 |  <fieldset autoRun="true">
21 |   <input type="dropdown" token="author">
22 |    <label>Author</label>
23 |    <populatingSearch fieldForValue="author_name" fieldForLabel="author_name">index=splunkgit author_name=* | dedup author_name | sort author_name</populatingSearch>
24 | </input>
25 |  </fieldset>
26 | 
27 |  <label>File type</label>
28 |  <fieldset>
29 |   <input type="dropdown" token="filetype">
30 |    <populatingSearch fieldForValue="file_type" fieldForLabel="file_type">index=splunkgit file_type=* | dedup file_type | sort file_type</populatingSearch>
31 |    <default>*</default>
32 |  </input>
33 |  </fieldset>
34 | 
35 |  <label>Files</label>
36 |  <fieldset>
37 |    <input type="text" token="file" />
38 |    <label>File</label>
39 |    <default>*</default>
40 |  </fieldset>
41 | 
42 |   <row>
43 |     <single>
44 |       <title>Number of commits</title>
45 |       <searchTemplate>`number_of_commits_by_author_file_filetype("$author$","$file$","$filetype$")`</searchTemplate>
46 |     </single>
47 |     <single>
48 |       <title>First commit</title>
49 |       <searchTemplate>`first_commit_by_author_file_filetype("$author$","$file$","$filetype$")` | eval Time=strftime(_time,"%Y-%m-%d") | table Time</searchTemplate>
50 |     </single>
51 |     <single>
52 |       <title>Last commit</title>
53 |       <searchTemplate>`last_commit_by_author_file_filetype("$author$","$file$","$filetype$")` | eval Time=strftime(_time,"%Y-%m-%d") | table Time</searchTemplate>
54 |     </single>
55 |   </row>
56 | 
57 |   <row>
58 |     <chart>
59 |       <title>Top file activity</title>
60 |       <searchTemplate>`most_impacted_files_by_author_file_filetype("$author$","$file$","$filetype$")`</searchTemplate>  
61 |       <option name="charting.chart">bar</option>
62 |       <option name="height">300px</option>
63 |       <option name="charting.chart.stackMode">stacked</option>
64 |       <option name="charting.axisTitleX.text">Rows</option>
65 |       <option name="charting.axisTitleY.text">File</option>
66 |     </chart>
67 | 
68 |     <chart>
69 |       <title>Top comitted files</title>
70 |       <searchTemplate>`top_committed_files_by_author_file_filetype("$author$","$file$","$filetype$")`</searchTemplate>  
71 |       <option name="charting.chart">pie</option>
72 |       <option name="height">300px</option>
73 |     </chart>
74 |   </row>
75 | 
76 |   <row>
77 |     <table>
78 |       <title>Repository statistics</title>
79 |       <searchTemplate>`repository_stats_by_author_file_filetype("$author$","$file$","$filetype$")`</searchTemplate>
80 |     </table>
81 |   </row>
82 | </form>
83 | 


--------------------------------------------------------------------------------
/default/data/ui/views/git_file_page.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <form>
19 |  <label>Files</label>
20 |  <fieldset autoRun="true">
21 |    <input type="text" token="file" />
22 |    <label>File</label>
23 |    <seed>*</seed>
24 |  </fieldset>
25 |    
26 |   <row>
27 |     <single>
28 |       <title>Number of commits</title>
29 |       <searchTemplate>`number_of_commits_by_file("$file$")`</searchTemplate>
30 |     </single>
31 |     <single>
32 |       <title>First commit</title>
33 |       <searchTemplate>`first_commit_by_file("$file$")` | eval Time=strftime(_time,"%Y-%m-%d") | table Time</searchTemplate>
34 |     </single>
35 |     <single>
36 |       <title>Last commit</title>
37 |       <searchTemplate>`last_commit_by_file("$file$")` | eval Time=strftime(_time,"%Y-%m-%d") | table Time</searchTemplate>
38 |     </single>
39 |   </row>
40 | 
41 | 
42 |   <row>
43 |     <chart>
44 |       <title>Impact on file(s) by author</title>
45 |       <searchTemplate>`author_impact_on_file(file="$file$")`</searchTemplate>  
46 |       <option name="charting.chart">pie</option>
47 |       <option name="height">300px</option>
48 |     </chart>
49 | 
50 |     <chart>
51 |       <title>Commits with file(s) by author</title>
52 |       <searchTemplate>`author_commits_on_file(file="$file$")`</searchTemplate>  
53 |       <option name="charting.chart">pie</option>
54 |       <option name="height">300px</option>
55 |     </chart>
56 |   </row>
57 | 
58 |   <row>
59 |     <chart>
60 |       <title>Commits on file(s) over time by author</title>
61 |       <searchTemplate>`commits_over_time_on_by_file(file="$file$")`</searchTemplate>
62 |       <option name="charting.chart">column</option>
63 |       <option name="charting.axisTitleX.text">Time</option>
64 |       <option name="charting.axisTitleY.text">Commits by author</option>
65 |       <option name="charting.chart.stackMode">stacked</option>
66 |     </chart>
67 |   </row>
68 | 
69 | 
70 |   <row>
71 |     <table>
72 |       <title>Matched files</title>
73 |       <searchTemplate>`match_file(file="$file$")`</searchTemplate>
74 |       <option name="showPager">true</option>
75 |     </table>
76 |   </row>
77 | </form>
78 | 


--------------------------------------------------------------------------------
/default/data/ui/views/git_filetype_page.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <form>
19 |  <label>File types</label>
20 | 
21 |  <fieldset autoRun="true">
22 |   <input type="dropdown" token="filetype">
23 |    <label>Select File type</label>
24 |    <populatingSearch fieldForValue="file_type" fieldForLabel="file_type">index=splunkgit file_type=* | dedup file_type | sort file_type</populatingSearch>
25 | </input>
26 |  </fieldset>
27 |    
28 |   <row>
29 |     <chart>
30 |       <title>Impact on file type by author</title>
31 |       <searchTemplate>`author_impact_on_filetype(filetype="$filetype$")`</searchTemplate>  
32 |       <option name="charting.chart">pie</option>
33 |       <option name="height">300px</option>
34 |     </chart>
35 | 
36 |     <chart>
37 |       <title>Commits on file type by author</title>
38 |       <searchTemplate>`author_commits_on_filetype(filetype="$filetype$")`</searchTemplate>  
39 |       <option name="charting.chart">pie</option>
40 |       <option name="height">300px</option>
41 |     </chart>
42 |   </row>
43 | 
44 |   <row>
45 |     <table>
46 |      <title>Repository statistics</title>
47 |      <searchTemplate>`repository_stats_by_filetype(filetype="$filetype$")`</searchTemplate>
48 |     </table>
49 |   </row>
50 | 
51 |   <row>
52 |     <table>
53 |      <title>Most common file types</title>
54 |      <searchTemplate>index=splunkgit sourcetype=git_file_change | chart count as Count by file_type | sort -Count | rename file_type as "File type"</searchTemplate>
55 |     </table>
56 |   </row>
57 | </form>


--------------------------------------------------------------------------------
/default/data/ui/views/git_repo_page.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <dashboard>
19 |   <label>Repository</label>
20 |   <row>
21 |     <single>
22 |       <title>Number of coders</title>
23 |       <searchTemplate>`number_of_coders`</searchTemplate>    
24 |    </single>
25 | </row>
26 |   <row>
27 |     <chart>
28 |       <searchName>active_authors_over_time</searchName>
29 |       <title>Number of active authors over time</title>
30 |       <option name="charting.chart">column</option>
31 |       <option name="charting.axisTitleX.text">Time</option>
32 |       <option name="charting.axisTitleY.text">Number of active authors</option>
33 |       <option name="charting.chart.stackMode">stacked</option>
34 |       </chart>
35 |   </row>
36 |   <row>
37 |     <chart>
38 |       <searchName>author_count_vs_commit_count</searchName>
39 |       <title>Number of authors over commit count</title>
40 |       <option name="charting.chart">line</option>
41 |       <option name="charting.axisTitleX.text">Number of commits</option>
42 |       <option name="charting.axisTitleY.text">Number of authors</option>
43 |       </chart>
44 |   </row>
45 |   <row>
46 |     <chart>
47 |       <searchName>impact_over_time</searchName>
48 |       <title>Impact Over Time</title>
49 |       <option name="charting.chart">area</option>
50 |       <option name="charting.axisTitleX.text">Time</option>
51 |       <option name="charting.axisTitleY.text">Impact</option>
52 |       <option name="charting.axisY.scale">log</option>
53 |       <option name="charting.chart.nullValueMode">zero</option>
54 |       </chart>
55 |   </row>
56 |   <row>
57 |     <chart>
58 |       <searchName>commit_count_over_time_groupedby_author</searchName>
59 |       <title>Total number of commits</title>
60 |       <option name="charting.chart">area</option>
61 |       <option name="charting.axisTitleX.text">Time</option>
62 |       <option name="charting.axisTitleY.text">Number of commits</option>
63 |       <option name="charting.chart.stackMode">stacked</option>
64 |       <option name="height">500px</option>
65 |       </chart>
66 |   </row>
67 |   <row>
68 |     <chart>
69 |       <searchName>Total impact</searchName>
70 |       <title>Impact over time</title>
71 |       <option name="charting.axisTitleX.text">Time</option>
72 |       <option name="charting.axisTitleY.text">Impact</option>
73 |       <option name="charting.chart">area</option>
74 |       <option name="charting.chart.nullValueMode">zero</option>
75 |       <option name="charting.chart.stackMode">stacked</option>
76 |       <option name="charting.layout.splitSeries">false</option>
77 |       <option name="charting.legend.placement">right</option>
78 |       <option name="height">500px</option>
79 |     </chart>
80 |   </row>                                                                                                        
81 | </dashboard>
82 | 


--------------------------------------------------------------------------------
/default/data/ui/views/github_issues.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <dashboard>
19 |   <label>Issues</label>
20 |   <row >
21 |     <single>
22 |       <searchString>`latest_fetched_github_issue_set` | search  github_issue_state="open" | stats count</searchString>
23 |       <title>Number of open issues</title>
24 |     </single>
25 |     <single>
26 |       <searchString>`latest_fetched_github_issue_set` | search  github_issue_state="closed" | stats count</searchString>
27 |       <title>Number of closed issues</title>
28 |     </single>
29 |     <single>
30 |       <searchString>`latest_fetched_github_issue_set` | stats count</searchString>
31 |       <title>Total number of issues</title>
32 |     </single>
33 |   </row>
34 |   <row>
35 |     <table>
36 |       <searchName>Latest opened issues (Still open)</searchName>
37 |       <title>Latest opened issues (Still open)</title>
38 |     </table>
39 |     <table>
40 |       <searchName>Latest closed issues (Still closed)</searchName>
41 |       <title>Latest closed issues (Still closed)</title>
42 |     </table>
43 |   </row>
44 |   <row>
45 |     <table>
46 |       <searchName>Latest updated issues</searchName>
47 |       <title>Latest updated issues</title>
48 |     </table>
49 |     <table>
50 |       <searchName>Oldest open issues (Still open)</searchName>
51 |       <title>Oldest open issues (Still open)</title>
52 |     </table>
53 |   </row>
54 |   <row grouping="1,2">
55 |     <table>
56 |       <title>Top 10 issue reporters</title>
57 |       <searchName>Top issue reporters</searchName>
58 |     </table>
59 |     <chart>
60 |       <title>Top 10 issue reporters</title>
61 |       <searchName>Top issue reporters</searchName>
62 |       <option name="charting.chart">pie</option>
63 |     </chart>
64 |   </row>
65 | </dashboard>


--------------------------------------------------------------------------------
/default/data/ui/views/github_repo.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='utf-8'?>
 2 | <!--
 3 | Copyright 2011 Splunk, Inc.
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | -->
17 | 
18 | <dashboard>
19 |   <label>Repo statistics</label>
20 |   <row >
21 |     <single>
22 |       <searchString>index="splunkgit" source="github_rest_api" github_watcher_count=* | sort - _time |table github_watcher_count</searchString>
23 |       <title>Number of watchers</title>
24 |     </single>
25 |     <single>
26 |       <searchString>index="splunkgit" source="github_rest_api" github_forks_count=* | sort - _time | table github_forks_count</searchString>
27 |       <title>Number of forks</title>
28 |     </single>
29 |   </row>
30 |   <row>
31 |     <chart>
32 |       <title>Watcher count over time</title>
33 |       <searchName>Watcher count over time</searchName>
34 |       <option name="charting.chart">line</option>
35 |       <option name="charting.axisTitleX.text">Time</option>
36 |       <option name="charting.axisTitleY.text">Number of watchers</option>
37 |       <option name="charting.chart.nullValueMode">connect</option>
38 |       <option name="charting.axisLabelsY.integerUnits">false</option>
39 |       <option name="charting.axisY.includeZero">false</option>
40 |       </chart>
41 |     <chart>
42 |       <title>Forks count over time</title>
43 |       <searchName>Forks count over time</searchName>
44 |       <option name="charting.chart">line</option>
45 |       <option name="charting.axisTitleX.text">Time</option>
46 |       <option name="charting.axisTitleY.text">Number of forks</option>
47 |       <option name="charting.chart.nullValueMode">connect</option>
48 |       <option name="charting.axisLabelsY.integerUnits">false</option>
49 |       <option name="charting.axisY.includeZero">false</option>
50 |     </chart>
51 |   </row>
52 | </dashboard>
53 | 


--------------------------------------------------------------------------------
/default/indexes.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [splunkgit]
16 | coldPath = $SPLUNK_DB/splunkgit/colddb
17 | homePath = $SPLUNK_DB/splunkgit/db
18 | thawedPath = $SPLUNK_DB/splunkgit/thaweddb
19 | 


--------------------------------------------------------------------------------
/default/inputs.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [script://./bin/fetch_git_repo_data.sh]
16 | interval = 900
17 | sourcetype = git_file_change
18 | source = git_repo
19 | index = splunkgit
20 | disabled = false
21 | 
22 | [script://./bin/git_repo_messages.sh]
23 | interval = 900
24 | sourcetype = git_commit_messages
25 | source = jquery_hackathon
26 | index = splunkgit
27 | disabled = false
28 | 
29 | [script://./bin/git_source_code.sh \.xml$]
30 | interval = 900
31 | sourcetype = git_source_code
32 | source = jquery_hackathon
33 | index = splunkgit
34 | disabled = true
35 | 
36 | [script://./bin/generate_multi_repo_view.sh]
37 | interval = -1
38 | sourcetype = view_generation
39 | source = local_machine
40 | index = splunkgit
41 | disabled = false
42 | 
43 | [script://./bin/fetch_github_data.py]
44 | interval = 21600
45 | sourcetype = github_data
46 | source = github_rest_api
47 | index = splunkgit
48 | disabled = false
49 | 


--------------------------------------------------------------------------------
/default/macros.conf:
--------------------------------------------------------------------------------
  1 | # Copyright 2011 Splunk, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | [latest_fetched_github_issue_set]
 16 | definition = index="splunkgit" source="github_rest_api" github_issue_number=* | sort - _time | dedup github_issue_number
 17 | iseval = 0
 18 | 
 19 | [author_impact_on_file(1)]
 20 | args = file
 21 | definition = index=splunkgit sourcetype=git_file_change path=*"$file$"* | stats sum(insertions) as Insertions, sum(deletions) as Deletions by author_name | eval Impact=Insertions+Deletions | rename author_name as Author | fields + Author,Impact | sort 0 -Impact
 22 | iseval = 0
 23 | 
 24 | [find_author(1)]
 25 | args = author
 26 | definition = index=splunkgit author_name=* | dedup author_name | sort 0 author_name
 27 | iseval = 0
 28 | 
 29 | [match_file(1)]
 30 | args = file
 31 | definition = index=splunkgit sourcetype=git_file_change path=*"$file$"* | dedup path | rename path as "Matching file" | sort 0 "Matching file" | table "Matching file"
 32 | iseval = 0
 33 | 
 34 | [most_impacted_files_by_author(1)]
 35 | args = author
 36 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" [savedsearch "Find existing files"] | stats sum(insertions) as Insertions, sum(deletions) as Deletions by path | eval Impact=Insertions+Deletions | sort -Impact | rename path as File | table File,Insertions,Deletions | head 10
 37 | iseval = 0
 38 | 
 39 | [repository_stats_by_author(1)]
 40 | args = author
 41 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | stats sum(insertions) as Insertions, sum(deletions) as Deletions | eval Impact=Insertions+Deletions | eval ""Rows=Insertions-Deletions | table Impact, Insertions, Deletions, Rows | rename Rows as "Insertions-Deletions"
 42 | iseval = 0
 43 | 
 44 | [top_impacted_files_by_author(1)]
 45 | args = author
 46 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | stats sum(insertions) as Insertions, sum(deletions) as Deletions by path | eval Impact=Insertions+Deletions | rename path as File | table File, Impact | sort -Impact | head 10
 47 | iseval = 0
 48 | 
 49 | [author_commits_on_file(1)]
 50 | args = file
 51 | definition = index=splunkgit sourcetype=git_file_change path=*"$file$"*| stats count as Count by author_name | rename author_name as Author | sort 0 -Count
 52 | iseval = 0
 53 | 
 54 | [min_number_of_watchers]
 55 | definition = index="splunkgit" source="github_rest_api" github_watcher_count=* | stats min(github_watcher_count)
 56 | iseval = 0
 57 | 
 58 | [author_commits_on_filetype(1)]
 59 | args = filetype
 60 | definition = index=splunkgit sourcetype=git_file_change file_type="$filetype$"| stats count as Count by author_name | rename author_name as Author | sort 0 -Count
 61 | iseval = 0
 62 | 
 63 | [author_impact_on_filetype(1)]
 64 | args = filetype
 65 | definition = index=splunkgit sourcetype=git_file_change file_type="$filetype$"| stats sum(insertions) as Insertions, sum(deletions) as Deletions by author_name | eval Impact=Insertions+Deletions | rename author_name as Author | fields + Author,Impact | sort 0 -Impact
 66 | iseval = 0
 67 | 
 68 | [coders_with_more_than_number_commits(1)]
 69 | args = commits
 70 | definition = index=splunkgit | dedup commit_hash | stats count(commit_hash) as Count by author_name | search Count>$commits$ | stats count
 71 | iseval = 0
 72 | 
 73 | [commits_by_author(1)]
 74 | args = author
 75 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | dedup commit_hash | stats count
 76 | iseval = 0
 77 | 
 78 | [first_commit_by_author(1)]
 79 | args = author
 80 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | dedup commit_hash | sort 0 _time by author_name | table _time | head 1
 81 | iseval = 0
 82 | 
 83 | [last_commit_by_author(1)]
 84 | args = author
 85 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | dedup commit_hash | sort 0 -_time by author_name | table _time | head 1
 86 | iseval = 0
 87 | 
 88 | [most_impacted_files_by_author_file_filetype(3)]
 89 | args = author,file,filetype
 90 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" path=*"$file$"* file_type="$filetype$"| stats sum(insertions) as Insertions, sum(deletions) as Deletions by path | eval Impact=Insertions+Deletions | sort -Impact | rename path as File | table File,Insertions,Deletions | head 10
 91 | iseval = 0
 92 | 
 93 | [new_committers_since_days(1)]
 94 | args = days
 95 | definition = index=splunkgit sourcetype=git_file_change startdaysago=$days$ [search index=splunkgit | dedup author_name | table author_name] |  dedup commit_hash | sort _time by author_name | dedup author_name | stats count as "New committers since $days$days"
 96 | iseval = 0
 97 | 
 98 | [number_of_coders]
 99 | definition = index=splunkgit | dedup author_name | stats count as "Number of coders" | table "Number of coders"
100 | iseval = 0
101 | 
102 | [number_of_commits_by_author(1)]
103 | args = author
104 | definition = index=splunkgit sourcetype="git_file_change" author_name="$author$" | dedup commit_hash | stats count
105 | iseval = 0
106 | 
107 | [repository_stats_by_author_file_filetype(3)]
108 | args = author,file,filetype
109 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" path=*"$file$"* file_type="$filetype$"| stats sum(insertions) as Insertions, sum(deletions) as Deletions | eval Impact=Insertions+Deletions | eval ""Rows=Insertions-Deletions | table Impact, Insertions, Deletions, Rows | rename Rows as "Insertions-Deletions"
110 | iseval = 0
111 | 
112 | [repository_stats_by_filetype(1)]
113 | args = filetype
114 | definition = index=splunkgit sourcetype=git_file_change file_type="$filetype$" | stats sum(insertions) as Insertions, sum(deletions) as Deletions | eval Impact=Insertions+Deletions | eval ""Rows=Insertions-Deletions | table Impact, Insertions, Deletions, Rows | rename Rows as "Insertions-Deletions"
115 | iseval = 0
116 | 
117 | [top_committed_files_by_author(1)]
118 | args = author
119 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" | stats count as Count by path | sort -Count | head 10
120 | iseval = 0
121 | 
122 | [top_committed_files_by_author_file_filetype(3)]
123 | args = author,file,filetype
124 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" path=*"$file$"* file_type="$filetype$"| stats count as Count by path | sort -Count | head 10
125 | iseval = 0
126 | 
127 | [first_commit_by_file(1)]
128 | args = file
129 | definition = index=splunkgit sourcetype=git_file_change path=*"$file$"* | dedup commit_hash | sort 0 _time by author_name | table _time | head 1
130 | iseval = 0
131 | 
132 | [last_commit_by_file(1)]
133 | args = file
134 | definition = index=splunkgit sourcetype=git_file_change path=*"$file$"* | dedup commit_hash | sort 0 -_time by author_name | table _time | head 1
135 | iseval = 0
136 | 
137 | [number_of_commits_by_file(1)]
138 | args = file
139 | definition = index=splunkgit sourcetype="git_file_change" path=*"$file$"* | dedup commit_hash | stats count
140 | iseval = 0
141 | 
142 | [last_commit_by_author_file_filetype(3)]
143 | args = author,file,filetype
144 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" path=*"$file$"* file_type="$filetype$"| dedup commit_hash | sort 0 -_time by author_name | table _time | head 1
145 | iseval = 0
146 | 
147 | [first_commit_by_author_file_filetype(3)]
148 | args = author,file,filetype
149 | definition = index=splunkgit sourcetype=git_file_change author_name="$author$" path=*"$file$"* file_type="$filetype$"| dedup commit_hash | sort 0 _time by author_name | table _time | head 1
150 | iseval = 0
151 | 
152 | [number_of_commits_by_author_file_filetype(3)]
153 | args = author,file,filetype
154 | definition = index=splunkgit sourcetype="git_file_change" author_name="$author$" path=*"$file$"* file_type="$filetype$"| dedup commit_hash | stats count
155 | iseval = 0
156 | 
157 | [commits_over_time_on_by_file(1)]
158 | args = file
159 | definition = index="splunkgit" source="git_repo" path=*"$file$"* | dedup commit_hash | sort 0 _time | timechart count by author_name
160 | iseval = 0
161 | 


--------------------------------------------------------------------------------
/default/props.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [source::git*]
16 | MAX_DAYS_AGO=10000
17 | 
18 | [sourcetype::git_commit_messages]
19 | SHOULD_LINEMERGE = false
20 | LINE_BREAKER = "([\r\n]+)repository=
21 | 
22 | [git_source_code]
23 | SHOULD_LINEMERGE = True
24 | LINE_BREAKER = ([\r\n]+|\>)commit_hash=
25 | TRUNCATE = 999999
26 | DATETIME_CONFIG = CURRENT
27 | MAX_EVENTS = 99999
28 | 


--------------------------------------------------------------------------------
/default/savedsearches.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [Latest closed issues (Still closed)]
16 | search = `latest_fetched_github_issue_set` | search github_issue_state="closed" | sort -str(github_issue_close_time) | head 10 | table github_issue_close_time github_issue_number github_issue_title  github_issue_reporter | rename github_issue_close_time as "Closing time", github_issue_number as "ID", github_issue_title as "Title", github_issue_reporter as "Reporter"
17 | 
18 | [Latest opened issues (Still open)]
19 | search = `latest_fetched_github_issue_set` | search  github_issue_state="open" | sort -str(github_issue_creation_time) | head 10 | table github_issue_creation_time github_issue_number github_issue_title  github_issue_reporter | rename github_issue_creation_time as "Creation time", github_issue_number as "ID", github_issue_title as "Title", github_issue_reporter as "Reporter"
20 | 
21 | [Latest updated issues]
22 | search = `latest_fetched_github_issue_set` | sort -str(github_issue_update_time) | head 10 | table github_issue_update_time github_issue_number github_issue_title  github_issue_reporter | rename github_issue_update_time as "Update time", github_issue_number as "ID", github_issue_title as "Title", github_issue_reporter as "Reporter"
23 | 
24 | [Oldest open issues (Still open)]
25 | search = `latest_fetched_github_issue_set` | search  github_issue_state="open" | sort -str(github_issue_creation_time) | tail 10 | table github_issue_creation_time github_issue_number github_issue_title  github_issue_reporter | rename github_issue_creation_time as "Creation time", github_issue_number as "ID", github_issue_title as "Title", github_issue_reporter as "Reporter"
26 | 
27 | [Find existing files]
28 | search = index=splunkgit sourcetype=git_file_change | stats sum(insertions) as Insertions, sum(deletions) as Deletions by path | eval delta=Insertions-Deletions | search delta!=0 | eval Impact=Insertions+Deletions | table path
29 | 
30 | [Top issue reporters]
31 | search = `latest_fetched_github_issue_set` | chart count by github_issue_reporter | sort - count | head 10 | rename github_issue_reporter as Reporter, count as Count
32 | 
33 | [Watcher count over time]
34 | search = index="splunkgit" source="github_rest_api" github_watcher_count=* | timechart cont=t mode(github_watcher_count) as "Watcher count"
35 | 
36 | [Forks count over time]
37 | search = index="splunkgit" source="github_rest_api" github_forks_count=* | timechart mode(github_forks_count) as "Forks count"
38 | 
39 | [impact_over_time]
40 | search = index="splunkgit" source="git_repo" | timechart cont="t" eval(sum(deletions) + sum(insertions)) as Impact
41 | 
42 | [commit_count_over_time_groupedby_author]
43 | search = index=splunkgit source="git_repo"  | dedup commit_hash | timechart count(commit_hash) by author_name | streamstats sum(*) as *
44 | 
45 | [author_count_vs_commit_count]
46 | search = index="splunkgit" source="git_repo" | dedup commit_hash | stats count by author_name  | stats dc(author_name) by count | sort 0 - count | streamstats sum(dc(author_name)) | sort 0 count | rename dc(author_name) as "Authors with exactly X# of commits", sum(dc(author_name)) as "Authors with at least X# of commits", count as "Number of commits"
47 | 
48 | [active_authors_over_time]
49 | search = index="splunkgit" source="git_repo" | dedup commit_hash | sort 0 _time | dedup author_name | timechart count as newValue | streamstats window=1 current="f" first(newValue) as oldValue | eval Change=round((newValue/oldValue-1)*100)  | appendcols [search index="splunkgit" source="git_repo" | dedup commit_hash | sort 0 _time | timechart dc(author_name) as activeUsers] | eval oldUsers=activeUsers-newValue | table _time, newValue, oldUsers | rename newValue as "Number of new authors" oldUsers as "Number of old authors"
50 | 
51 | [Total impact]
52 | search = index=splunkgit source="git_repo"  | dedup commit_hash | timechart eval(sum(deletions) + sum(insertions)) as Impact by author_name | streamstats sum(*) as *
53 | 
54 | 


--------------------------------------------------------------------------------
/default/splunkgit.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Separate repo_addresses with a space to have multiple repositories indexed
16 | [git]
17 | repo_addresses=
18 | 
19 | [splunk]
20 | user=
21 | password=
22 | 


--------------------------------------------------------------------------------
/lib/joblib/__init__.py:
--------------------------------------------------------------------------------
  1 | """ Joblib is a set of tools to provide **lightweight pipelining in
  2 | Python**. In particular, joblib offers:
  3 | 
  4 |   1. transparent disk-caching of the output values and lazy re-evaluation
  5 |      (memoize pattern)
  6 | 
  7 |   2. easy simple parallel computing
  8 | 
  9 |   3. logging and tracing of the execution
 10 | 
 11 | Joblib is optimized to be **fast** and **robust** in particular on large
 12 | data and has specific optimizations for `numpy` arrays. It is
 13 | **BSD-licensed**.
 14 | 
 15 | 
 16 |     ============================== ============================================
 17 |     **User documentation**:        http://packages.python.org/joblib
 18 | 
 19 |     **Download packages**:         http://pypi.python.org/pypi/joblib#downloads
 20 | 
 21 |     **Source code**:               http://github.com/joblib/joblib
 22 | 
 23 |     **Report issues**:             http://github.com/joblib/joblib/issues
 24 |     ============================== ============================================
 25 | 
 26 | 
 27 | Vision
 28 | --------
 29 | 
 30 | The vision is to provide tools to easily achieve better performance and
 31 | reproducibility when working with long running jobs. In addition, Joblib
 32 | can also be used to provide a light-weight make replacement or caching
 33 | solution.
 34 | 
 35 |  *  **Avoid computing twice the same thing**: code is rerun over an
 36 |     over, for instance when prototyping computational-heavy jobs (as in
 37 |     scientific development), but hand-crafted solution to aleviate this
 38 |     issue is error-prone and often leads to unreproducible results
 39 | 
 40 |  *  **Persist to disk transparently**: persisting in an efficient way
 41 |     arbitrary objects containing large data is hard. In addition,
 42 |     hand-written persistence does not link easily the file on disk to the
 43 |     execution context of the original Python object. As a result, it is
 44 |     challenging to resume a application status or computational job, eg
 45 |     after a crash.
 46 | 
 47 | It strives to address these problems while **leaving your code and your
 48 | flow control as unmodified as possible** (no framework, no new
 49 | paradigms).
 50 | 
 51 | Main features
 52 | ------------------
 53 | 
 54 | 1) **Transparent and fast disk-caching of output value:** a memoize or
 55 |    make-like functionality for Python functions that works well for
 56 |    arbitrary Python objects, including very large numpy arrays. Separate
 57 |    persistence and flow-execution logic from domain logic or algorithmic
 58 |    code by writing the operations as a set of steps with well-defined
 59 |    inputs and  outputs: Python functions. Joblib can save their
 60 |    computation to disk and rerun it only if necessary::
 61 | 
 62 |       >>> from joblib import Memory
 63 |       >>> mem = Memory(cachedir='/tmp/joblib')
 64 |       >>> import numpy as np
 65 |       >>> a = np.vander(np.arange(3))
 66 |       >>> square = mem.cache(np.square)
 67 |       >>> b = square(a)                                   # doctest: +ELLIPSIS
 68 |       ________________________________________________________________________________
 69 |       [Memory] Calling square...
 70 |       square(array([[0, 0, 1],
 71 |              [1, 1, 1],
 72 |              [4, 2, 1]]))
 73 |       ___________________________________________________________square - 0...s, 0.0min
 74 | 
 75 |       >>> c = square(a)
 76 |       >>> # The above call did not trigger an evaluation
 77 | 
 78 | 2) **Embarrassingly parallel helper:** to make is easy to write readable
 79 |    parallel code and debug it quickly:
 80 | 
 81 |       >>> from joblib import Parallel, delayed
 82 |       >>> from math import sqrt
 83 |       >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
 84 |       [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
 85 | 
 86 | 
 87 | 3) **Logging/tracing:** The different functionalities will
 88 |    progressively acquire better logging mechanism to help track what
 89 |    has been ran, and capture I/O easily. In addition, Joblib will
 90 |    provide a few I/O primitives, to easily define define logging and
 91 |    display streams, and provide a way of compiling a report.
 92 |    We want to be able to quickly inspect what has been run.
 93 | 
 94 | ..
 95 |     >>> import shutil ; shutil.rmtree('/tmp/joblib/')
 96 | 
 97 | """
 98 | 
 99 | __version__ = '0.5.5'
100 | 
101 | 
102 | from .memory import Memory
103 | from .logger import PrintTime, Logger
104 | from .hashing import hash
105 | from .numpy_pickle import dump, load
106 | from .parallel import Parallel, delayed, cpu_count
107 | 


--------------------------------------------------------------------------------
/lib/joblib/disk.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Disk management utilities.
 3 | """
 4 | 
 5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
 6 | # Copyright (c) 2010 Gael Varoquaux
 7 | # License: BSD Style, 3 clauses.
 8 | 
 9 | 
10 | import sys
11 | import os
12 | import shutil
13 | import time
14 | 
15 | 
16 | def disk_used(path):
17 |     """ Return the disk usage in a directory."""
18 |     size = 0
19 |     for file in os.listdir(path) + ['.']:
20 |         stat = os.stat(os.path.join(path, file))
21 |         if hasattr(stat, 'st_blocks'):
22 |             size += stat.st_blocks * 512
23 |         else:
24 |             # on some platform st_blocks is not available (e.g., Windows)
25 |             # approximate by rounding to next multiple of 512
26 |             size += (stat.st_size // 512 + 1) * 512
27 |     # We need to convert to int to avoid having longs on some systems (we
28 |     # don't want longs to avoid problems we SQLite)
29 |     return int(size / 1024.)
30 | 
31 | 
32 | def memstr_to_kbytes(text):
33 |     """ Convert a memory text to it's value in kilobytes.
34 |     """
35 |     kilo = 1024
36 |     units = dict(K=1, M=kilo, G=kilo ** 2)
37 |     try:
38 |         size = int(units[text[-1]] * float(text[:-1]))
39 |     except (KeyError, ValueError):
40 |         raise ValueError(
41 |                 "Invalid literal for size give: %s (type %s) should be "
42 |                 "alike '10G', '500M', '50K'." % (text, type(text))
43 |                 )
44 |     return size
45 | 
46 | # if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
47 | # then retry once. if it still fails, raise the exception
48 | RM_SUBDIRS_RETRY_TIME = 0.1
49 | 
50 | 
51 | def rm_subdirs(path, onerror=None):
52 |     """Remove all subdirectories in this path.
53 | 
54 |     The directory indicated by `path` is left in place, and its subdirectories
55 |     are erased.
56 | 
57 |     If onerror is set, it is called to handle the error with arguments (func,
58 |     path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
59 |     path is the argument to that function that caused it to fail; and
60 |     exc_info is a tuple returned by sys.exc_info().  If onerror is None,
61 |     an exception is raised.
62 |     """
63 | 
64 |     # NOTE this code is adapted from the one in shutil.rmtree, and is
65 |     # just as fast
66 | 
67 |     names = []
68 |     try:
69 |         names = os.listdir(path)
70 |     except os.error, err:
71 |         if onerror is not None:
72 |             onerror(os.listdir, path, sys.exc_info())
73 |         else:
74 |             raise
75 | 
76 |     for name in names:
77 |         fullname = os.path.join(path, name)
78 |         if os.path.isdir(fullname):
79 |             if onerror is not None:
80 |                 shutil.rmtree(fullname, False, onerror)
81 |             else:
82 |                 # allow the rmtree to fail once, wait and re-try.
83 |                 # if the error is raised again, fail
84 |                 err_count = 0
85 |                 while True:
86 |                     try:
87 |                         shutil.rmtree(fullname, False, None)
88 |                         break
89 |                     except os.error, err:
90 |                         if err_count > 0:
91 |                             raise
92 |                         err_count += 1
93 |                         time.sleep(RM_SUBDIRS_RETRY_TIME)
94 | 


--------------------------------------------------------------------------------
/lib/joblib/format_stack.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Represent an exception with a lot of information.
  3 | 
  4 | Provides 2 useful functions:
  5 | 
  6 | format_exc: format an exception into a complete traceback, with full
  7 |             debugging instruction.
  8 | 
  9 | format_outer_frames: format the current position in the stack call.
 10 | 
 11 | Adapted from IPython's VerboseTB.
 12 | """
 13 | # Authors: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
 14 | #          Nathaniel Gray <n8gray@caltech.edu>
 15 | #          Fernando Perez <fperez@colorado.edu>
 16 | # Copyright: 2010, Gael Varoquaux
 17 | #            2001-2004, Fernando Perez
 18 | #            2001 Nathaniel Gray
 19 | # License: BSD 3 clause
 20 | 
 21 | 
 22 | import inspect
 23 | import keyword
 24 | import linecache
 25 | import os
 26 | import pydoc
 27 | import string
 28 | import sys
 29 | import time
 30 | import tokenize
 31 | import traceback
 32 | import types
 33 | 
 34 | 
 35 | INDENT = ' ' * 8
 36 | 
 37 | 
 38 | ###############################################################################
 39 | # some internal-use functions
 40 | def safe_repr(value):
 41 |     """Hopefully pretty robust repr equivalent."""
 42 |     # this is pretty horrible but should always return *something*
 43 |     try:
 44 |         return pydoc.text.repr(value)
 45 |     except KeyboardInterrupt:
 46 |         raise
 47 |     except:
 48 |         try:
 49 |             return repr(value)
 50 |         except KeyboardInterrupt:
 51 |             raise
 52 |         except:
 53 |             try:
 54 |                 # all still in an except block so we catch
 55 |                 # getattr raising
 56 |                 name = getattr(value, '__name__', None)
 57 |                 if name:
 58 |                     # ick, recursion
 59 |                     return safe_repr(name)
 60 |                 klass = getattr(value, '__class__', None)
 61 |                 if klass:
 62 |                     return '%s instance' % safe_repr(klass)
 63 |             except KeyboardInterrupt:
 64 |                 raise
 65 |             except:
 66 |                 return 'UNRECOVERABLE REPR FAILURE'
 67 | 
 68 | 
 69 | def eq_repr(value, repr=safe_repr):
 70 |     return '=%s' % repr(value)
 71 | 
 72 | 
 73 | ###############################################################################
 74 | def uniq_stable(elems):
 75 |     """uniq_stable(elems) -> list
 76 | 
 77 |     Return from an iterable, a list of all the unique elements in the input,
 78 |     but maintaining the order in which they first appear.
 79 | 
 80 |     A naive solution to this problem which just makes a dictionary with the
 81 |     elements as keys fails to respect the stability condition, since
 82 |     dictionaries are unsorted by nature.
 83 | 
 84 |     Note: All elements in the input must be hashable.
 85 |     """
 86 |     unique = []
 87 |     unique_set = set()
 88 |     for nn in elems:
 89 |         if nn not in unique_set:
 90 |             unique.append(nn)
 91 |             unique_set.add(nn)
 92 |     return unique
 93 | 
 94 | 
 95 | ###############################################################################
 96 | def fix_frame_records_filenames(records):
 97 |     """Try to fix the filenames in each record from inspect.getinnerframes().
 98 | 
 99 |     Particularly, modules loaded from within zip files have useless filenames
100 |     attached to their code object, and inspect.getinnerframes() just uses it.
101 |     """
102 |     fixed_records = []
103 |     for frame, filename, line_no, func_name, lines, index in records:
104 |         # Look inside the frame's globals dictionary for __file__, which should
105 |         # be better.
106 |         better_fn = frame.f_globals.get('__file__', None)
107 |         if isinstance(better_fn, str):
108 |             # Check the type just in case someone did something weird with
109 |             # __file__. It might also be None if the error occurred during
110 |             # import.
111 |             filename = better_fn
112 |         fixed_records.append((frame, filename, line_no, func_name, lines,
113 |                               index))
114 |     return fixed_records
115 | 
116 | 
117 | def _fixed_getframes(etb, context=1, tb_offset=0):
118 |     LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
119 | 
120 |     records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
121 | 
122 |     # If the error is at the console, don't build any context, since it would
123 |     # otherwise produce 5 blank lines printed out (there is no file at the
124 |     # console)
125 |     rec_check = records[tb_offset:]
126 |     try:
127 |         rname = rec_check[0][1]
128 |         if rname == '<ipython console>' or rname.endswith('<string>'):
129 |             return rec_check
130 |     except IndexError:
131 |         pass
132 | 
133 |     aux = traceback.extract_tb(etb)
134 |     assert len(records) == len(aux)
135 |     for i, (file, lnum, _, _) in enumerate(aux):
136 |         maybeStart = lnum - 1 - context // 2
137 |         start = max(maybeStart, 0)
138 |         end = start + context
139 |         lines = linecache.getlines(file)[start:end]
140 |         # pad with empty lines if necessary
141 |         if maybeStart < 0:
142 |             lines = (['\n'] * -maybeStart) + lines
143 |         if len(lines) < context:
144 |             lines += ['\n'] * (context - len(lines))
145 |         buf = list(records[i])
146 |         buf[LNUM_POS] = lnum
147 |         buf[INDEX_POS] = lnum - 1 - start
148 |         buf[LINES_POS] = lines
149 |         records[i] = tuple(buf)
150 |     return records[tb_offset:]
151 | 
152 | 
153 | def _format_traceback_lines(lnum, index, lines, lvals=None):
154 |     numbers_width = 7
155 |     res = []
156 |     i = lnum - index
157 | 
158 |     for line in lines:
159 |         if i == lnum:
160 |             # This is the line with the error
161 |             pad = numbers_width - len(str(i))
162 |             if pad >= 3:
163 |                 marker = '-' * (pad - 3) + '-> '
164 |             elif pad == 2:
165 |                 marker = '> '
166 |             elif pad == 1:
167 |                 marker = '>'
168 |             else:
169 |                 marker = ''
170 |             num = marker + str(i)
171 |         else:
172 |             num = '%*s' % (numbers_width, i)
173 |         line = '%s %s' % (num, line)
174 | 
175 |         res.append(line)
176 |         if lvals and i == lnum:
177 |             res.append(lvals + '\n')
178 |         i = i + 1
179 |     return res
180 | 
181 | 
182 | def format_records(records):   # , print_globals=False):
183 |     # Loop over all records printing context and info
184 |     frames = []
185 |     abspath = os.path.abspath
186 |     for frame, file, lnum, func, lines, index in records:
187 |         #print '*** record:',file,lnum,func,lines,index  # dbg
188 |         try:
189 |             file = file and abspath(file) or '?'
190 |         except OSError:
191 |             # if file is '<console>' or something not in the filesystem,
192 |             # the abspath call will throw an OSError.  Just ignore it and
193 |             # keep the original file string.
194 |             pass
195 |         link = file
196 |         try:
197 |             args, varargs, varkw, locals = inspect.getargvalues(frame)
198 |         except:
199 |             # This can happen due to a bug in python2.3.  We should be
200 |             # able to remove this try/except when 2.4 becomes a
201 |             # requirement.  Bug details at http://python.org/sf/1005466
202 |             print "\nJoblib's exception reporting continues...\n"
203 | 
204 |         if func == '?':
205 |             call = ''
206 |         else:
207 |             # Decide whether to include variable details or not
208 |             try:
209 |                 call = 'in %s%s' % (func, inspect.formatargvalues(args,
210 |                                             varargs, varkw, locals,
211 |                                             formatvalue=eq_repr))
212 |             except KeyError:
213 |                 # Very odd crash from inspect.formatargvalues().  The
214 |                 # scenario under which it appeared was a call to
215 |                 # view(array,scale) in NumTut.view.view(), where scale had
216 |                 # been defined as a scalar (it should be a tuple). Somehow
217 |                 # inspect messes up resolving the argument list of view()
218 |                 # and barfs out. At some point I should dig into this one
219 |                 # and file a bug report about it.
220 |                 print "\nJoblib's exception reporting continues...\n"
221 |                 call = 'in %s(***failed resolving arguments***)' % func
222 | 
223 |         # Initialize a list of names on the current line, which the
224 |         # tokenizer below will populate.
225 |         names = []
226 | 
227 |         def tokeneater(token_type, token, start, end, line):
228 |             """Stateful tokeneater which builds dotted names.
229 | 
230 |             The list of names it appends to (from the enclosing scope) can
231 |             contain repeated composite names.  This is unavoidable, since
232 |             there is no way to disambguate partial dotted structures until
233 |             the full list is known.  The caller is responsible for pruning
234 |             the final list of duplicates before using it."""
235 | 
236 |             # build composite names
237 |             if token == '.':
238 |                 try:
239 |                     names[-1] += '.'
240 |                     # store state so the next token is added for x.y.z names
241 |                     tokeneater.name_cont = True
242 |                     return
243 |                 except IndexError:
244 |                     pass
245 |             if token_type == tokenize.NAME and token not in keyword.kwlist:
246 |                 if tokeneater.name_cont:
247 |                     # Dotted names
248 |                     names[-1] += token
249 |                     tokeneater.name_cont = False
250 |                 else:
251 |                     # Regular new names.  We append everything, the caller
252 |                     # will be responsible for pruning the list later.  It's
253 |                     # very tricky to try to prune as we go, b/c composite
254 |                     # names can fool us.  The pruning at the end is easy
255 |                     # to do (or the caller can print a list with repeated
256 |                     # names if so desired.
257 |                     names.append(token)
258 |             elif token_type == tokenize.NEWLINE:
259 |                 raise IndexError
260 |         # we need to store a bit of state in the tokenizer to build
261 |         # dotted names
262 |         tokeneater.name_cont = False
263 | 
264 |         def linereader(file=file, lnum=[lnum], getline=linecache.getline):
265 |             line = getline(file, lnum[0])
266 |             lnum[0] += 1
267 |             return line
268 | 
269 |         # Build the list of names on this line of code where the exception
270 |         # occurred.
271 |         try:
272 |             # This builds the names list in-place by capturing it from the
273 |             # enclosing scope.
274 |             tokenize.tokenize(linereader, tokeneater)
275 |         except IndexError:
276 |             # signals exit of tokenizer
277 |             pass
278 |         except tokenize.TokenError, msg:
279 |             print ("An unexpected error occurred while tokenizing input\n"
280 |                     "The following traceback may be corrupted or invalid\n"
281 |                     "The error message is: %s\n" % msg)
282 | 
283 |         # prune names list of duplicates, but keep the right order
284 |         unique_names = uniq_stable(names)
285 | 
286 |         # Start loop over vars
287 |         lvals = []
288 |         for name_full in unique_names:
289 |             name_base = name_full.split('.', 1)[0]
290 |             if name_base in frame.f_code.co_varnames:
291 |                 if name_base in locals.keys():
292 |                     try:
293 |                         value = repr(eval(name_full, locals))
294 |                     except:
295 |                         value = "undefined"
296 |                 else:
297 |                     value = "undefined"
298 |                 name = name_full
299 |                 lvals.append('%s = %s' % (name, value))
300 |             #elif print_globals:
301 |             #    if frame.f_globals.has_key(name_base):
302 |             #        try:
303 |             #            value = repr(eval(name_full,frame.f_globals))
304 |             #        except:
305 |             #            value = "undefined"
306 |             #    else:
307 |             #        value = "undefined"
308 |             #    name = 'global %s' % name_full
309 |             #    lvals.append('%s = %s' % (name,value))
310 |         if lvals:
311 |             lvals = '%s%s' % (INDENT, ('\n%s' % INDENT).join(lvals))
312 |         else:
313 |             lvals = ''
314 | 
315 |         level = '%s\n%s %s\n' % (75 * '.', link, call)
316 | 
317 |         if index is None:
318 |             frames.append(level)
319 |         else:
320 |             frames.append('%s%s' % (level, ''.join(
321 |                 _format_traceback_lines(lnum, index, lines, lvals))))
322 | 
323 |     return frames
324 | 
325 | 
326 | ###############################################################################
327 | def format_exc(etype, evalue, etb, context=5, tb_offset=0):
328 |     """ Return a nice text document describing the traceback.
329 | 
330 |         Parameters
331 |         -----------
332 |         etype, evalue, etb: as returned by sys.exc_info
333 |         context: number of lines of the source file to plot
334 |         tb_offset: the number of stack frame not to use (0 = use all)
335 | 
336 |     """
337 |     # some locals
338 |     try:
339 |         etype = etype.__name__
340 |     except AttributeError:
341 |         pass
342 | 
343 |     # Header with the exception type, python version, and date
344 |     pyver = 'Python ' + string.split(sys.version)[0] + ': ' + sys.executable
345 |     date = time.ctime(time.time())
346 |     pid = 'PID: %i' % os.getpid()
347 | 
348 |     head = '%s%s%s\n%s%s%s' % (etype, ' ' * (75 - len(str(etype)) - len(date)),
349 |                            date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
350 |                            pyver)
351 | 
352 |     # Flush cache before calling inspect.  This helps alleviate some of the
353 |     # problems with python 2.3's inspect.py.
354 |     linecache.checkcache()
355 |     # Drop topmost frames if requested
356 |     try:
357 |         records = _fixed_getframes(etb, context, tb_offset)
358 |     except:
359 |         raise
360 |         print '\nUnfortunately, your original traceback can not be ' + \
361 |               'constructed.\n'
362 |         return ''
363 | 
364 |     # Get (safely) a string form of the exception info
365 |     try:
366 |         etype_str, evalue_str = map(str, (etype, evalue))
367 |     except:
368 |         # User exception is improperly defined.
369 |         etype, evalue = str, sys.exc_info()[:2]
370 |         etype_str, evalue_str = map(str, (etype, evalue))
371 |     # ... and format it
372 |     exception = ['%s: %s' % (etype_str, evalue_str)]
373 |     if type(evalue) is types.InstanceType:
374 |         try:
375 |             names = [w for w in dir(evalue) if isinstance(w, basestring)]
376 |         except:
377 |             # Every now and then, an object with funny inernals blows up
378 |             # when dir() is called on it.  We do the best we can to report
379 |             # the problem and continue
380 |             exception.append(
381 |                     'Exception reporting error (object with broken dir()):'
382 |                     )
383 |             etype_str, evalue_str = map(str, sys.exc_info()[:2])
384 |             exception.append('%s: %s' % (etype_str, evalue_str))
385 |             names = []
386 |         for name in names:
387 |             value = safe_repr(getattr(evalue, name))
388 |             exception.append('\n%s%s = %s' % (INDENT, name, value))
389 | 
390 |     frames = format_records(records)
391 |     return '%s\n%s\n%s' % (head, '\n'.join(frames), ''.join(exception[0]))
392 | 
393 | 
394 | ###############################################################################
395 | def format_outer_frames(context=5, stack_start=None, stack_end=None,
396 |                         ignore_ipython=True):
397 |     LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
398 |     records = inspect.getouterframes(inspect.currentframe())
399 |     output = list()
400 | 
401 |     for i, (frame, filename, line_no, func_name, lines, index) \
402 |                                                 in enumerate(records):
403 |         # Look inside the frame's globals dictionary for __file__, which should
404 |         # be better.
405 |         better_fn = frame.f_globals.get('__file__', None)
406 |         if isinstance(better_fn, str):
407 |             # Check the type just in case someone did something weird with
408 |             # __file__. It might also be None if the error occurred during
409 |             # import.
410 |             filename = better_fn
411 |             if filename.endswith('.pyc'):
412 |                 filename = filename[:-4] + '.py'
413 |         if ignore_ipython:
414 |             # Hack to avoid printing the interals of IPython
415 |             if (os.path.basename(filename) == 'iplib.py'
416 |                         and func_name in ('safe_execfile', 'runcode')):
417 |                 break
418 |         maybeStart = line_no - 1 - context // 2
419 |         start = max(maybeStart, 0)
420 |         end = start + context
421 |         lines = linecache.getlines(filename)[start:end]
422 |         # pad with empty lines if necessary
423 |         if maybeStart < 0:
424 |             lines = (['\n'] * -maybeStart) + lines
425 |         if len(lines) < context:
426 |             lines += ['\n'] * (context - len(lines))
427 |         buf = list(records[i])
428 |         buf[LNUM_POS] = line_no
429 |         buf[INDEX_POS] = line_no - 1 - start
430 |         buf[LINES_POS] = lines
431 |         output.append(tuple(buf))
432 |     return '\n'.join(format_records(output[stack_end:stack_start:-1]))
433 | 


--------------------------------------------------------------------------------
/lib/joblib/func_inspect.py:
--------------------------------------------------------------------------------
  1 | """
  2 | My own variation on function-specific inspect-like features.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2009 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import itertools
 10 | import inspect
 11 | import warnings
 12 | import os
 13 | 
 14 | 
 15 | def get_func_code(func):
 16 |     """ Attempts to retrieve a reliable function code hash.
 17 | 
 18 |         The reason we don't use inspect.getsource is that it caches the
 19 |         source, whereas we want this to be modified on the fly when the
 20 |         function is modified.
 21 | 
 22 |         Returns
 23 |         -------
 24 |         func_code: string
 25 |             The function code
 26 |         source_file: string
 27 |             The path to the file in which the function is defined.
 28 |         first_line: int
 29 |             The first line of the code in the source file.
 30 | 
 31 |         Notes
 32 |         ------
 33 |         This function does a bit more magic than inspect, and is thus
 34 |         more robust.
 35 |     """
 36 |     source_file = None
 37 |     try:
 38 |         # Try to retrieve the source code.
 39 |         source_file = func.func_code.co_filename
 40 |         source_file_obj = file(source_file)
 41 |         first_line = func.func_code.co_firstlineno
 42 |         # All the lines after the function definition:
 43 |         source_lines = list(itertools.islice(source_file_obj, first_line - 1,
 44 |                                              None))
 45 |         return ''.join(inspect.getblock(source_lines)), source_file, first_line
 46 |     except:
 47 |         # If the source code fails, we use the hash. This is fragile and
 48 |         # might change from one session to another.
 49 |         if hasattr(func, 'func_code'):
 50 |             return str(func.func_code.__hash__()), source_file, -1
 51 |         else:
 52 |             # Weird objects like numpy ufunc don't have func_code
 53 |             # This is fragile, as quite often the id of the object is
 54 |             # in the repr, so it might not persist accross sessions,
 55 |             # however it will work for ufuncs.
 56 |             return repr(func), source_file, -1
 57 | 
 58 | 
 59 | def get_func_name(func, resolv_alias=True, win_characters=True):
 60 |     """ Return the function import path (as a list of module names), and
 61 |         a name for the function.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         func: callable
 66 |             The func to inspect
 67 |         resolv_alias: boolean, optional
 68 |             If true, possible local alias are indicated.
 69 |         win_characters: boolean, optional
 70 |             If true, substitute special characters using urllib.quote
 71 |             This is useful in Windows, as it cannot encode some filenames
 72 |     """
 73 |     if hasattr(func, '__module__'):
 74 |         module = func.__module__
 75 |     else:
 76 |         try:
 77 |             module = inspect.getmodule(func)
 78 |         except TypeError:
 79 |             if hasattr(func, '__class__'):
 80 |                 module = func.__class__.__module__
 81 |             else:
 82 |                 module = 'unkown'
 83 |     if module is None:
 84 |         # Happens in doctests, eg
 85 |         module = ''
 86 |     if module == '__main__':
 87 |         try:
 88 |             filename = inspect.getsourcefile(func)
 89 |         except:
 90 |             filename = None
 91 |         if filename is not None:
 92 |             # mangling of full path to filename
 93 |             filename = filename.replace(os.sep, '-')
 94 |             filename = filename.replace(":", "-")
 95 |             if filename.endswith('.py'):
 96 |                 filename = filename[:-3]
 97 |             module = module + '-' + filename
 98 |     module = module.split('.')
 99 |     if hasattr(func, 'func_name'):
100 |         name = func.func_name
101 |     elif hasattr(func, '__name__'):
102 |         name = func.__name__
103 |     else:
104 |         name = 'unknown'
105 |     # Hack to detect functions not defined at the module-level
106 |     if resolv_alias:
107 |         # TODO: Maybe add a warning here?
108 |         if hasattr(func, 'func_globals') and name in func.func_globals:
109 |             if not func.func_globals[name] is func:
110 |                 name = '%s-alias' % name
111 |     if inspect.ismethod(func):
112 |         # We need to add the name of the class
113 |         if hasattr(func, 'im_class'):
114 |             klass = func.im_class
115 |             module.append(klass.__name__)
116 |     if os.name == 'nt' and win_characters:
117 |         # Stupid windows can't encode certain characters in filenames
118 |         import urllib
119 |         for char in ('<', '>', '!', ':'):
120 |             name = name.replace(char, urllib.quote(char))
121 |     return module, name
122 | 
123 | 
124 | def filter_args(func, ignore_lst, *args, **kwargs):
125 |     """ Filters the given args and kwargs using a list of arguments to
126 |         ignore, and a function specification.
127 | 
128 |         Parameters
129 |         ----------
130 |         func: callable
131 |             Function giving the argument specification
132 |         ignore_lst: list of strings
133 |             List of arguments to ignore (either a name of an argument
134 |             in the function spec, or '*', or '**')
135 |         *args: list
136 |             Positional arguments passed to the function.
137 |         **kwargs: dict
138 |             Keyword arguments passed to the function
139 | 
140 |         Returns
141 |         -------
142 |         filtered_args: list
143 |             List of filtered positional arguments.
144 |         filtered_kwdargs: dict
145 |             List of filtered Keyword arguments.
146 |     """
147 |     args = list(args)
148 |     if isinstance(ignore_lst, basestring):
149 |         # Catch a common mistake
150 |         raise ValueError('ignore_lst must be a list of parameters to ignore '
151 |             '%s (type %s) was given' % (ignore_lst, type(ignore_lst)))
152 |     # Special case for functools.partial objects
153 |     if (not inspect.ismethod(func) and not inspect.isfunction(func)):
154 |         if ignore_lst:
155 |             warnings.warn('Cannot inspect object %s, ignore list will '
156 |                 'not work.' % func, stacklevel=2)
157 |         return {'*': args, '**': kwargs}
158 |     arg_spec = inspect.getargspec(func)
159 |     # We need to if/them to account for different versions of Python
160 |     if hasattr(arg_spec, 'args'):
161 |         arg_names = arg_spec.args
162 |         arg_defaults = arg_spec.defaults
163 |         arg_keywords = arg_spec.keywords
164 |         arg_varargs = arg_spec.varargs
165 |     else:
166 |         arg_names, arg_varargs, arg_keywords, arg_defaults = arg_spec
167 |     arg_defaults = arg_defaults or {}
168 |     if inspect.ismethod(func):
169 |         # First argument is 'self', it has been removed by Python
170 |         # we need to add it back:
171 |         args = [func.im_self, ] + args
172 |     # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such
173 |     # as on ndarrays.
174 | 
175 |     _, name = get_func_name(func, resolv_alias=False)
176 |     arg_dict = dict()
177 |     arg_position = -1
178 |     for arg_position, arg_name in enumerate(arg_names):
179 |         if arg_position < len(args):
180 |             # Positional argument or keyword argument given as positional
181 |             arg_dict[arg_name] = args[arg_position]
182 |         else:
183 |             position = arg_position - len(arg_names)
184 |             if arg_name in kwargs:
185 |                 arg_dict[arg_name] = kwargs.pop(arg_name)
186 |             else:
187 |                 try:
188 |                     arg_dict[arg_name] = arg_defaults[position]
189 |                 except (IndexError, KeyError):
190 |                     # Missing argument
191 |                     raise ValueError('Wrong number of arguments for %s%s:\n'
192 |                                      '     %s(%s, %s) was called.'
193 |                         % (name,
194 |                            inspect.formatargspec(*inspect.getargspec(func)),
195 |                            name,
196 |                            repr(args)[1:-1],
197 |                            ', '.join('%s=%s' % (k, v)
198 |                                     for k, v in kwargs.iteritems())
199 |                            )
200 |                         )
201 | 
202 |     varkwargs = dict()
203 |     for arg_name, arg_value in kwargs.iteritems():
204 |         if arg_name in arg_dict:
205 |             arg_dict[arg_name] = arg_value
206 |         elif arg_keywords is not None:
207 |             varkwargs[arg_name] = arg_value
208 |         else:
209 |             raise TypeError("Ignore list for %s() contains an unexpected "
210 |                             "keyword argument '%s'" % (name, arg_name))
211 | 
212 |     if arg_keywords is not None:
213 |         arg_dict['**'] = varkwargs
214 |     if arg_varargs is not None:
215 |         varargs = args[arg_position + 1:]
216 |         arg_dict['*'] = varargs
217 | 
218 |     # Now remove the arguments to be ignored
219 |     for item in ignore_lst:
220 |         if item in arg_dict:
221 |             arg_dict.pop(item)
222 |         else:
223 |             raise ValueError("Ignore list: argument '%s' is not defined for "
224 |             "function %s%s" %
225 |                             (item, name,
226 |                              inspect.formatargspec(arg_names,
227 |                                                    arg_varargs,
228 |                                                    arg_keywords,
229 |                                                    arg_defaults,
230 |                                                    )))
231 |     # XXX: Return a sorted list of pairs?
232 |     return arg_dict
233 | 


--------------------------------------------------------------------------------
/lib/joblib/hashing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Fast cryptographic hash of Python objects, with a special case for fast
  3 | hashing of numpy arrays.
  4 | """
  5 | 
  6 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  7 | # Copyright (c) 2009 Gael Varoquaux
  8 | # License: BSD Style, 3 clauses.
  9 | 
 10 | import pickle
 11 | import hashlib
 12 | import sys
 13 | import cStringIO
 14 | import types
 15 | 
 16 | 
 17 | class Hasher(pickle.Pickler):
 18 |     """ A subclass of pickler, to do cryptographic hashing, rather than
 19 |         pickling.
 20 |     """
 21 | 
 22 |     def __init__(self, hash_name='md5'):
 23 |         self.stream = cStringIO.StringIO()
 24 |         pickle.Pickler.__init__(self, self.stream, protocol=2)
 25 |         # Initialise the hash obj
 26 |         self._hash = hashlib.new(hash_name)
 27 | 
 28 |     def hash(self, obj, return_digest=True):
 29 |         self.dump(obj)
 30 |         dumps = self.stream.getvalue()
 31 |         self._hash.update(dumps)
 32 |         if return_digest:
 33 |             return self._hash.hexdigest()
 34 | 
 35 |     def save(self, obj):
 36 |         if isinstance(obj, types.MethodType):
 37 |             # the Pickler cannot pickle instance methods; here we decompose
 38 |             # them into components that make them uniquely identifiable
 39 |             func_name = obj.im_func.__name__
 40 |             inst = obj.im_self
 41 |             cls = obj.im_class
 42 |             obj = (func_name, inst, cls)
 43 |         pickle.Pickler.save(self, obj)
 44 | 
 45 | 
 46 | class NumpyHasher(Hasher):
 47 |     """ Special case the hasher for when numpy is loaded.
 48 |     """
 49 | 
 50 |     def __init__(self, hash_name='md5', coerce_mmap=False):
 51 |         """
 52 |             Parameters
 53 |             ----------
 54 |             hash_name: string
 55 |                 The hash algorithm to be used
 56 |             coerce_mmap: boolean
 57 |                 Make no difference between np.memmap and np.ndarray
 58 |                 objects.
 59 |         """
 60 |         self.coerce_mmap = coerce_mmap
 61 |         Hasher.__init__(self, hash_name=hash_name)
 62 |         # delayed import of numpy, to avoid tight coupling
 63 |         import numpy as np
 64 |         self.np = np
 65 | 
 66 |     def save(self, obj):
 67 |         """ Subclass the save method, to hash ndarray subclass, rather
 68 |             than pickling them. Off course, this is a total abuse of
 69 |             the Pickler class.
 70 |         """
 71 |         if isinstance(obj, self.np.ndarray):
 72 |             # Compute a hash of the object:
 73 |             try:
 74 |                 self._hash.update(self.np.getbuffer(obj))
 75 |             except TypeError:
 76 |                 # Cater for non-single-segment arrays: this creates a
 77 |                 # copy, and thus aleviates this issue.
 78 |                 # XXX: There might be a more efficient way of doing this
 79 |                 self._hash.update(self.np.getbuffer(obj.flatten()))
 80 | 
 81 |             # We store the class, to be able to distinguish between
 82 |             # Objects with the same binary content, but different
 83 |             # classes.
 84 |             if self.coerce_mmap and isinstance(obj, self.np.memmap):
 85 |                 # We don't make the difference between memmap and
 86 |                 # normal ndarrays, to be able to reload previously
 87 |                 # computed results with memmap.
 88 |                 klass = self.np.ndarray
 89 |             else:
 90 |                 klass = obj.__class__
 91 |             # We also return the dtype and the shape, to distinguish
 92 |             # different views on the same data with different dtypes.
 93 | 
 94 |             # The object will be pickled by the pickler hashed at the end.
 95 |             obj = (klass, ('HASHED', obj.dtype, obj.shape, obj.strides))
 96 |         Hasher.save(self, obj)
 97 | 
 98 | 
 99 | def hash(obj, hash_name='md5', coerce_mmap=False):
100 |     """ Quick calculation of a hash to identify uniquely Python objects
101 |         containing numpy arrays.
102 | 
103 | 
104 |         Parameters
105 |         -----------
106 |         hash_name: 'md5' or 'sha1'
107 |             Hashing algorithm used. sha1 is supposedly safer, but md5 is
108 |             faster.
109 |         coerce_mmap: boolean
110 |             Make no difference between np.memmap and np.ndarray
111 |     """
112 |     if 'numpy' in sys.modules:
113 |         hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
114 |     else:
115 |         hasher = Hasher(hash_name=hash_name)
116 |     return hasher.hash(obj)
117 | 


--------------------------------------------------------------------------------
/lib/joblib/logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helpers for logging.
  3 | 
  4 | This module needs much love to become useful.
  5 | """
  6 | 
  7 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  8 | # Copyright (c) 2008 Gael Varoquaux
  9 | # License: BSD Style, 3 clauses.
 10 | 
 11 | 
 12 | import time
 13 | import sys
 14 | import os
 15 | import shutil
 16 | import logging
 17 | import pprint
 18 | 
 19 | 
 20 | def _squeeze_time(t):
 21 |     """Remove .1s to the time under Windows: this is the time it take to
 22 |     stat files. This is needed to make results similar to timings under
 23 |     Unix, for tests
 24 |     """
 25 |     if sys.platform.startswith('win'):
 26 |         return max(0, t - .1)
 27 |     else:
 28 |         return t
 29 | 
 30 | 
 31 | def format_time(t):
 32 |     t = _squeeze_time(t)
 33 |     return "%.1fs, %.1fmin" % (t, t / 60.)
 34 | 
 35 | 
 36 | def short_format_time(t):
 37 |     t = _squeeze_time(t)
 38 |     if t > 60:
 39 |         return "%4.1fmin" % (t / 60.)
 40 |     else:
 41 |         return " %5.1fs" % (t)
 42 | 
 43 | 
 44 | ###############################################################################
 45 | # class `Logger`
 46 | ###############################################################################
 47 | class Logger(object):
 48 |     """ Base class for logging messages.
 49 |     """
 50 | 
 51 |     def __init__(self, depth=3):
 52 |         """
 53 |             Parameters
 54 |             ----------
 55 |             depth: int, optional
 56 |                 The depth of objects printed.
 57 |         """
 58 |         self.depth = depth
 59 | 
 60 |     def warn(self, msg):
 61 |         logging.warn("[%s]: %s" % (self, msg))
 62 | 
 63 |     def debug(self, msg):
 64 |         # XXX: This conflicts with the debug flag used in children class
 65 |         logging.debug("[%s]: %s" % (self, msg))
 66 | 
 67 |     def format(self, obj, indent=0):
 68 |         """ Return the formated representation of the object.
 69 |         """
 70 |         if 'numpy' in sys.modules:
 71 |             import numpy as np
 72 |             print_options = np.get_printoptions()
 73 |             np.set_printoptions(precision=6, threshold=64, edgeitems=1)
 74 |         else:
 75 |             print_options = None
 76 |         out = pprint.pformat(obj, depth=self.depth, indent=indent)
 77 |         if print_options:
 78 |             np.set_printoptions(**print_options)
 79 |         return out
 80 | 
 81 | 
 82 | ###############################################################################
 83 | # class `PrintTime`
 84 | ###############################################################################
 85 | class PrintTime(object):
 86 |     """ Print and log messages while keeping track of time.
 87 |     """
 88 | 
 89 |     def __init__(self, logfile=None, logdir=None):
 90 |         if logfile is not None and logdir is not None:
 91 |             raise ValueError('Cannot specify both logfile and logdir')
 92 |         # XXX: Need argument docstring
 93 |         self.last_time = time.time()
 94 |         self.start_time = self.last_time
 95 |         if logdir is not None:
 96 |             logfile = os.path.join(logdir, 'joblib.log')
 97 |         self.logfile = logfile
 98 |         if logfile is not None:
 99 |             if not os.path.exists(os.path.dirname(logfile)):
100 |                 os.makedirs(os.path.dirname(logfile))
101 |             if os.path.exists(logfile):
102 |                 # Rotate the logs
103 |                 for i in range(1, 9):
104 |                     if os.path.exists(logfile + '.%i' % i):
105 |                         try:
106 |                             shutil.move(logfile + '.%i' % i,
107 |                                         logfile + '.%i' % (i + 1))
108 |                         except:
109 |                             "No reason failing here"
110 |                 # Use a copy rather than a move, so that a process
111 |                 # monitoring this file does not get lost.
112 |                 try:
113 |                     shutil.copy(logfile, logfile + '.1')
114 |                 except:
115 |                     "No reason failing here"
116 |             try:
117 |                 logfile = file(logfile, 'w')
118 |                 logfile.write('\nLogging joblib python script\n')
119 |                 logfile.write('\n---%s---\n' % time.ctime(self.last_time))
120 |             except:
121 |                 """ Multiprocessing writing to files can create race
122 |                     conditions. Rather fail silently than crash the
123 |                     caculation.
124 |                 """
125 |                 # XXX: We actually need a debug flag to disable this
126 |                 # silent failure.
127 | 
128 |     def __call__(self, msg='', total=False):
129 |         """ Print the time elapsed between the last call and the current
130 |             call, with an optional message.
131 |         """
132 |         if not total:
133 |             time_lapse = time.time() - self.last_time
134 |             full_msg = "%s: %s" % (msg, format_time(time_lapse))
135 |         else:
136 |             # FIXME: Too much logic duplicated
137 |             time_lapse = time.time() - self.start_time
138 |             full_msg = "%s: %.2fs, %.1f min" % (msg, time_lapse,
139 |                                                 time_lapse / 60)
140 |         print >> sys.stderr, full_msg
141 |         if self.logfile is not None:
142 |             try:
143 |                 print >> file(self.logfile, 'a'), full_msg
144 |             except:
145 |                 """ Multiprocessing writing to files can create race
146 |                     conditions. Rather fail silently than crash the
147 |                     caculation.
148 |                 """
149 |                 # XXX: We actually need a debug flag to disable this
150 |                 # silent failure.
151 |         self.last_time = time.time()
152 | 


--------------------------------------------------------------------------------
/lib/joblib/my_exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Exceptions
 3 | """
 4 | # Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
 5 | # Copyright: 2010, Gael Varoquaux
 6 | # License: BSD 3 clause
 7 | 
 8 | import sys
 9 | 
10 | 
11 | class JoblibException(Exception):
12 |     """ A simple exception with an error message that you can get to.
13 |     """
14 | 
15 |     def __init__(self, message):
16 |         self.message = message
17 | 
18 |     def __reduce__(self):
19 |         # For pickling
20 |         return self.__class__, (self.message,), {}
21 | 
22 |     def __repr__(self):
23 |         return '%s\n%s\n%s\n%s' % (
24 |                     self.__class__.__name__,
25 |                     75 * '_',
26 |                     self.message,
27 |                     75 * '_')
28 | 
29 |     __str__ = __repr__
30 | 
31 | 
32 | class TransportableException(JoblibException):
33 |     """ An exception containing all the info to wrap an original
34 |         exception and recreate it.
35 |     """
36 | 
37 |     def __init__(self, message, etype):
38 |         self.message = message
39 |         self.etype = etype
40 | 
41 |     def __reduce__(self):
42 |         # For pickling
43 |         return self.__class__, (self.message, self.etype), {}
44 | 
45 | 
46 | _exception_mapping = dict()
47 | 
48 | 
49 | def _mk_exception(exception, name=None):
50 |     # Create an exception inheriting from both JoblibException
51 |     # and that exception
52 |     if name is None:
53 |         name = exception.__name__
54 |     this_name = 'Joblib%s' % name
55 |     if this_name in _exception_mapping:
56 |         # Avoid creating twice the same exception
57 |         this_exception = _exception_mapping[this_name]
58 |     else:
59 |         this_exception = type(this_name, (exception, JoblibException),
60 |                     dict(__repr__=JoblibException.__repr__,
61 |                          __str__=JoblibException.__str__),
62 |                     )
63 |         _exception_mapping[this_name] = this_exception
64 |     return this_exception, this_name
65 | 
66 | 
67 | def _mk_common_exceptions():
68 |     namespace = dict()
69 |     if sys.version_info[0] == 3:
70 |         import builtins as _builtin_exceptions
71 |         common_exceptions = filter(
72 |             lambda x: x.endswith('Error'),
73 |             dir(_builtin_exceptions))
74 |     else:
75 |         import exceptions as _builtin_exceptions
76 |         common_exceptions = dir(_builtin_exceptions)
77 | 
78 |     for name in common_exceptions:
79 |         obj = getattr(_builtin_exceptions, name)
80 |         if isinstance(obj, type) and issubclass(obj, BaseException):
81 |             try:
82 |                 this_obj, this_name = _mk_exception(obj, name=name)
83 |                 namespace[this_name] = this_obj
84 |             except TypeError:
85 |                 # Cannot create a consistent method resolution order:
86 |                 # a class that we can't subclass properly, probably
87 |                 # BaseException
88 |                 pass
89 |     return namespace
90 | 
91 | 
92 | # Updating module locals so that the exceptions pickle right. AFAIK this
93 | # works only at module-creation time
94 | locals().update(_mk_common_exceptions())
95 | 


--------------------------------------------------------------------------------
/lib/joblib/numpy_pickle.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A pickler to save numpy arrays in separate .npy files.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2009 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import pickle
 10 | import traceback
 11 | import sys
 12 | import os
 13 | 
 14 | if sys.version_info[0] == 3:
 15 |     from pickle import _Unpickler as Unpickler
 16 | else:
 17 |     from pickle import Unpickler
 18 | 
 19 | ###############################################################################
 20 | # Utility objects for persistence.
 21 | 
 22 | 
 23 | class NDArrayWrapper(object):
 24 |     """ An object to be persisted instead of numpy arrays.
 25 | 
 26 |         The only thing this object does, is store the filename in wich
 27 |         the array has been persisted.
 28 |     """
 29 |     def __init__(self, filename):
 30 |         self.filename = filename
 31 | 
 32 | 
 33 | ###############################################################################
 34 | # Pickler classes
 35 | 
 36 | class NumpyPickler(pickle.Pickler):
 37 |     """ A pickler subclass that extracts ndarrays and saves them in .npy
 38 |         files outside of the pickle.
 39 |     """
 40 | 
 41 |     def __init__(self, filename):
 42 |         self._filename = filename
 43 |         self._filenames = [filename, ]
 44 |         self.file = open(filename, 'wb')
 45 |         # Count the number of npy files that we have created:
 46 |         self._npy_counter = 0
 47 |         pickle.Pickler.__init__(self, self.file,
 48 |                                 protocol=pickle.HIGHEST_PROTOCOL)
 49 |         # delayed import of numpy, to avoid tight coupling
 50 |         import numpy as np
 51 |         self.np = np
 52 | 
 53 |     def save(self, obj):
 54 |         """ Subclass the save method, to save ndarray subclasses in npy
 55 |             files, rather than pickling them. Off course, this is a
 56 |             total abuse of the Pickler class.
 57 |         """
 58 |         if isinstance(obj, self.np.ndarray):
 59 |             self._npy_counter += 1
 60 |             try:
 61 |                 filename = '%s_%02i.npy' % (self._filename,
 62 |                                             self._npy_counter)
 63 |                 self._filenames.append(filename)
 64 |                 self.np.save(filename, obj)
 65 |                 obj = NDArrayWrapper(os.path.basename(filename))
 66 |             except:
 67 |                 self._npy_counter -= 1
 68 |                 # XXX: We should have a logging mechanism
 69 |                 print 'Failed to save %s to .npy file:\n%s' % (
 70 |                         type(obj),
 71 |                         traceback.format_exc())
 72 |         pickle.Pickler.save(self, obj)
 73 | 
 74 | 
 75 | class NumpyUnpickler(Unpickler):
 76 |     """ A subclass of the Unpickler to unpickle our numpy pickles.
 77 |     """
 78 |     dispatch = Unpickler.dispatch.copy()
 79 | 
 80 |     def __init__(self, filename, mmap_mode=None):
 81 |         self._filename = filename
 82 |         self.mmap_mode = mmap_mode
 83 |         self._dirname = os.path.dirname(filename)
 84 |         self.file = open(filename, 'rb')
 85 |         Unpickler.__init__(self, self.file)
 86 |         import numpy as np
 87 |         self.np = np
 88 | 
 89 |     def load_build(self):
 90 |         """ This method is called to set the state of a knewly created
 91 |             object.
 92 | 
 93 |             We capture it to replace our place-holder objects,
 94 |             NDArrayWrapper, by the array we are interested in. We
 95 |             replace directly in the stack of pickler.
 96 |         """
 97 |         Unpickler.load_build(self)
 98 |         if isinstance(self.stack[-1], NDArrayWrapper):
 99 |             nd_array_wrapper = self.stack.pop()
100 |             if self.np.__version__ >= '1.3':
101 |                 array = self.np.load(os.path.join(self._dirname,
102 |                                                 nd_array_wrapper.filename),
103 |                                                 mmap_mode=self.mmap_mode)
104 |             else:
105 |                 # Numpy does not have mmap_mode before 1.3
106 |                 array = self.np.load(os.path.join(self._dirname,
107 |                                                 nd_array_wrapper.filename))
108 |             self.stack.append(array)
109 | 
110 |     # Be careful to register our new method.
111 |     dispatch[pickle.BUILD] = load_build
112 | 
113 | 
114 | ###############################################################################
115 | # Utility functions
116 | 
117 | def dump(value, filename):
118 |     """ Persist an arbitrary Python object into a filename, with numpy arrays
119 |         saved as separate .npy files.
120 | 
121 |         See Also
122 |         --------
123 |         joblib.load : corresponding loader
124 |     """
125 |     try:
126 |         pickler = NumpyPickler(filename)
127 |         pickler.dump(value)
128 |     finally:
129 |         if 'pickler' in locals() and hasattr(pickler, 'file'):
130 |             pickler.file.flush()
131 |             pickler.file.close()
132 |     return pickler._filenames
133 | 
134 | 
135 | def load(filename, mmap_mode=None):
136 |     """ Reconstruct a Python object and the numpy arrays it contains from
137 |         a persisted file.
138 | 
139 |         This function loads the numpy array files saved separately. If
140 |         the mmap_mode argument is given, it is passed to np.save and
141 |         arrays are loaded as memmaps. As a consequence, the reconstructed
142 |         object might not match the original pickled object.
143 | 
144 |         See Also
145 |         --------
146 |         joblib.dump : function to save the object
147 |     """
148 |     try:
149 |         unpickler = NumpyUnpickler(filename, mmap_mode=mmap_mode)
150 |         obj = unpickler.load()
151 |     finally:
152 |         if 'unpickler' in locals() and hasattr(unpickler, 'file'):
153 |             unpickler.file.close()
154 |     return obj
155 | 


--------------------------------------------------------------------------------
/lib/joblib/parallel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helpers for embarrassingly parallel code.
  3 | """
  4 | # Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
  5 | # Copyright: 2010, Gael Varoquaux
  6 | # License: BSD 3 clause
  7 | 
  8 | import os
  9 | import sys
 10 | import functools
 11 | import time
 12 | import threading
 13 | import itertools
 14 | try:
 15 |     import cPickle as pickle
 16 | except:
 17 |     import pickle
 18 | 
 19 | # Obtain possible configuration from the environment, assuming 1 (on)
 20 | # by default, upon 0 set to None. Should instructively fail if some non
 21 | # 0/1 value is set.
 22 | multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
 23 | if multiprocessing:
 24 |     try:
 25 |         import multiprocessing
 26 |     except ImportError:
 27 |         multiprocessing = None
 28 | 
 29 | from .format_stack import format_exc, format_outer_frames
 30 | from .logger import Logger, short_format_time
 31 | from .my_exceptions import TransportableException, _mk_exception
 32 | 
 33 | 
 34 | ###############################################################################
 35 | # CPU that works also when multiprocessing is not installed (python2.5)
 36 | def cpu_count():
 37 |     """ Return the number of CPUs.
 38 |     """
 39 |     if multiprocessing is None:
 40 |         return 1
 41 |     return multiprocessing.cpu_count()
 42 | 
 43 | 
 44 | ###############################################################################
 45 | class WorkerInterrupt(Exception):
 46 |     """ An exception that is not KeyboardInterrupt to allow subprocesses
 47 |         to be interrupted.
 48 |     """
 49 |     pass
 50 | 
 51 | 
 52 | ###############################################################################
 53 | class SafeFunction(object):
 54 |     """ Wraps a function to make it exception with full traceback in
 55 |         their representation.
 56 |         Useful for parallel computing with multiprocessing, for which
 57 |         exceptions cannot be captured.
 58 |     """
 59 |     def __init__(self, func):
 60 |         self.func = func
 61 | 
 62 |     def __call__(self, *args, **kwargs):
 63 |         try:
 64 |             return self.func(*args, **kwargs)
 65 |         except KeyboardInterrupt:
 66 |             # We capture the KeyboardInterrupt and reraise it as
 67 |             # something different, as multiprocessing does not
 68 |             # interrupt processing for a KeyboardInterrupt
 69 |             raise WorkerInterrupt()
 70 |         except:
 71 |             e_type, e_value, e_tb = sys.exc_info()
 72 |             text = format_exc(e_type, e_value, e_tb, context=10,
 73 |                              tb_offset=1)
 74 |             raise TransportableException(text, e_type)
 75 | 
 76 | 
 77 | ###############################################################################
 78 | def delayed(function):
 79 |     """ Decorator used to capture the arguments of a function.
 80 |     """
 81 |     # Try to pickle the input function, to catch the problems early when
 82 |     # using with multiprocessing
 83 |     pickle.dumps(function)
 84 | 
 85 |     def delayed_function(*args, **kwargs):
 86 |         return function, args, kwargs
 87 |     try:
 88 |         delayed_function = functools.wraps(function)(delayed_function)
 89 |     except AttributeError:
 90 |         " functools.wraps fails on some callable objects "
 91 |     return delayed_function
 92 | 
 93 | 
 94 | ###############################################################################
 95 | class ImmediateApply(object):
 96 |     """ A non-delayed apply function.
 97 |     """
 98 |     def __init__(self, func, args, kwargs):
 99 |         # Don't delay the application, to avoid keeping the input
100 |         # arguments in memory
101 |         self.results = func(*args, **kwargs)
102 | 
103 |     def get(self):
104 |         return self.results
105 | 
106 | 
107 | ###############################################################################
108 | class CallBack(object):
109 |     """ Callback used by parallel: it is used for progress reporting, and
110 |         to add data to be processed
111 |     """
112 |     def __init__(self, index, parallel):
113 |         self.parallel = parallel
114 |         self.index = index
115 | 
116 |     def __call__(self, out):
117 |         if self.parallel.verbose:
118 |             self.print_progress()
119 |         if self.parallel._iterable:
120 |             self.parallel.dispatch_next()
121 | 
122 |     def print_progress(self):
123 |         # XXX: Not using the logger framework: need to
124 |         # learn to use logger better.
125 |         n_jobs = len(self.parallel._pool._pool)
126 |         if self.parallel.n_dispatched > 2 * n_jobs:
127 |             # Report less often
128 |             if not self.index % n_jobs == 0:
129 |                 return
130 |         elapsed_time = time.time() - self.parallel._start_time
131 |         remaining_time = (elapsed_time / (self.index + 1) *
132 |                     (self.parallel.n_dispatched - self.index - 1.))
133 |         if self.parallel._iterable:
134 |             # The object is still building its job list
135 |             total = "%3i+" % self.parallel.n_dispatched
136 |         else:
137 |             total = "%3i " % self.parallel.n_dispatched
138 | 
139 |         if self.parallel.verbose < 50:
140 |             writer = sys.stderr.write
141 |         else:
142 |             writer = sys.stdout.write
143 |         writer('[%s]: Done %3i out of %s |elapsed: %s remaining: %s\n'
144 |                 % (self.parallel,
145 |                     self.index + 1,
146 |                     total,
147 |                     short_format_time(elapsed_time),
148 |                     short_format_time(remaining_time),
149 |                     ))
150 | 
151 | 
152 | ###############################################################################
153 | class Parallel(Logger):
154 |     ''' Helper class for readable parallel mapping.
155 | 
156 |         Parameters
157 |         -----------
158 |         n_jobs: int
159 |             The number of jobs to use for the computation. If -1 all CPUs
160 |             are used. If 1 is given, no parallel computing code is used
161 |             at all, which is useful for debuging.
162 |         verbose: int, optional
163 |             The verbosity level. If 1 is given, the elapsed time as well
164 |             as the estimated remaining time are displayed. Above 100, the
165 |             output is sent to stdout.
166 |         pre_dispatch: {'all', integer, or expression, as in '3*n_jobs'}
167 |             The amount of jobs to be pre-dispatched. Default is 'all',
168 |             but it may be memory consuming, for instance if each job
169 |             involves a lot of a data.
170 | 
171 |         Notes
172 |         -----
173 | 
174 |         This object uses the multiprocessing module to compute in
175 |         parallel the application of a function to many different
176 |         arguments. The main functionality it brings in addition to
177 |         using the raw multiprocessing API are (see examples for details):
178 | 
179 |             * More readable code, in particular since it avoids
180 |               constructing list of arguments.
181 | 
182 |             * Easier debuging:
183 |                 - informative tracebacks even when the error happens on
184 |                   the client side
185 |                 - using 'n_jobs=1' enables to turn off parallel computing
186 |                   for debuging without changing the codepath
187 |                 - early capture of pickling errors
188 | 
189 |             * An optional progress meter.
190 | 
191 |             * Interruption of multiprocesses jobs with 'Ctrl-C'
192 | 
193 |         Examples
194 |         --------
195 | 
196 |         A simple example:
197 | 
198 |         >>> from math import sqrt
199 |         >>> from joblib import Parallel, delayed
200 |         >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
201 |         [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
202 | 
203 |         Reshaping the output when the function has several return
204 |         values:
205 | 
206 |         >>> from math import modf
207 |         >>> from joblib import Parallel, delayed
208 |         >>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
209 |         >>> res, i = zip(*r)
210 |         >>> res
211 |         (0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5)
212 |         >>> i
213 |         (0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0)
214 | 
215 |         The progress meter::
216 | 
217 |             >>> from time import sleep
218 |             >>> from joblib import Parallel, delayed
219 |             >>> r = Parallel(n_jobs=2, verbose=1)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
220 |             [Parallel(n_jobs=2)]: Done   1 out of  10 |elapsed:    0.1s remaining:    0.9s
221 |             [Parallel(n_jobs=2)]: Done   3 out of  10 |elapsed:    0.2s remaining:    0.5s
222 |             [Parallel(n_jobs=2)]: Done   5 out of  10 |elapsed:    0.3s remaining:    0.3s
223 |             [Parallel(n_jobs=2)]: Done   7 out of  10 |elapsed:    0.4s remaining:    0.2s
224 |             [Parallel(n_jobs=2)]: Done   9 out of  10 |elapsed:    0.5s remaining:    0.1s
225 | 
226 |         Traceback example, note how the line of the error is indicated
227 |         as well as the values of the parameter passed to the function that
228 |         triggered the exception, even though the traceback happens in the
229 |         child process::
230 | 
231 |          >>> from string import atoi
232 |          >>> from joblib import Parallel, delayed
233 |          >>> Parallel(n_jobs=2)(delayed(atoi)(n) for n in ('1', '300', 30)) #doctest: +SKIP
234 |          #...
235 |          ---------------------------------------------------------------------------
236 |          Sub-process traceback:
237 |          ---------------------------------------------------------------------------
238 |          TypeError                                          Fri Jul  2 20:32:05 2010
239 |          PID: 4151                                     Python 2.6.5: /usr/bin/python
240 |          ...........................................................................
241 |          /usr/lib/python2.6/string.pyc in atoi(s=30, base=10)
242 |              398     is chosen from the leading characters of s, 0 for octal, 0x or
243 |              399     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
244 |              400     accepted.
245 |              401
246 |              402     """
247 |          --> 403     return _int(s, base)
248 |              404
249 |              405
250 |              406 # Convert string to long integer
251 |              407 def atol(s, base=10):
252 | 
253 |          TypeError: int() can't convert non-string with explicit base
254 |          ___________________________________________________________________________
255 | 
256 |         Using pre_dispatch in a producer/consumer situation, where the
257 |         data is generated on the fly. Note how the producer is first
258 |         called a 3 times before the parallel loop is initiated, and then
259 |         called to generate new data on the fly. In this case the total
260 |         number of iterations reported is underestimated::
261 | 
262 |          >>> from math import sqrt
263 |          >>> from joblib import Parallel, delayed
264 | 
265 |          >>> def producer():
266 |          ...     for i in range(6):
267 |          ...         print 'Produced %s' % i
268 |          ...         yield i
269 | 
270 |          >>> out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')(
271 |          ...                         delayed(sqrt)(i) for i in producer()) #doctest: +SKIP
272 |          Produced 0
273 |          Produced 1
274 |          Produced 2
275 |          [Parallel(n_jobs=2)]: Done   1 out of   3+ |elapsed:   ...s remaining:   ...s
276 |          Produced 3
277 |          [Parallel(n_jobs=2)]: Done ... out of   4+ |elapsed:   ...s remaining:   ...s
278 |          ...
279 | 
280 |     '''
281 |     def __init__(self, n_jobs=None, verbose=0, pre_dispatch='all'):
282 |         self.verbose = verbose
283 |         self.n_jobs = n_jobs
284 |         self.pre_dispatch = pre_dispatch
285 |         self._pool = None
286 |         # Not starting the pool in the __init__ is a design decision, to be
287 |         # able to close it ASAP, and not burden the user with closing it.
288 |         self._output = None
289 |         self._jobs = list()
290 | 
291 |     def dispatch(self, func, args, kwargs):
292 |         """ Queue the function for computing, with or without multiprocessing
293 |         """
294 |         if self._pool is None:
295 |             job = ImmediateApply(func, args, kwargs)
296 |             if self.verbose:
297 |                 print '[%s]: Done job %3i | elapsed: %s' % (
298 |                         self, len(self._jobs) + 1,
299 |                         short_format_time(time.time() - self._start_time)
300 |                     )
301 |             self._jobs.append(job)
302 |             self.n_dispatched += 1
303 |         else:
304 |             self._lock.acquire()
305 |             # If job.get() catches an exception, it closes the queue:
306 |             try:
307 |                 job = self._pool.apply_async(SafeFunction(func), args,
308 |                             kwargs, callback=CallBack(self.n_dispatched, self))
309 |                 self._jobs.append(job)
310 |                 self.n_dispatched += 1
311 |             except AssertionError:
312 |                 print '[Parallel] Pool seems closed'
313 |             finally:
314 |                 self._lock.release()
315 | 
316 |     def dispatch_next(self):
317 |         """ Dispatch more data for parallel processing
318 |         """
319 |         self._dispatch_amount += 1
320 |         while self._dispatch_amount:
321 |             try:
322 |                 # XXX: possible race condition shuffling the order of
323 |                 # dispatchs in the next two lines.
324 |                 func, args, kwargs = self._iterable.next()
325 |                 self.dispatch(func, args, kwargs)
326 |                 self._dispatch_amount -= 1
327 |             except ValueError:
328 |                 """ Race condition in accessing a generator, we skip,
329 |                     the dispatch will be done later.
330 |                 """
331 |             except StopIteration:
332 |                 self._iterable = None
333 |                 return
334 | 
335 |     def retrieve(self):
336 |         self._output = list()
337 |         while self._jobs:
338 |             # We need to be careful: the job queue can be filling up as
339 |             # we empty it
340 |             if hasattr(self, '_lock'):
341 |                 self._lock.acquire()
342 |             job = self._jobs.pop(0)
343 |             if hasattr(self, '_lock'):
344 |                 self._lock.release()
345 |             try:
346 |                 self._output.append(job.get())
347 |             except tuple(self.exceptions), exception:
348 |                 if isinstance(exception,
349 |                         (KeyboardInterrupt, WorkerInterrupt)):
350 |                     # We have captured a user interruption, clean up
351 |                     # everything
352 |                     if hasattr(self, '_pool'):
353 |                         self._pool.close()
354 |                         self._pool.terminate()
355 |                     raise exception
356 |                 elif isinstance(exception, TransportableException):
357 |                     # Capture exception to add information on the local stack
358 |                     # in addition to the distant stack
359 |                     this_report = format_outer_frames(context=10,
360 |                                                       stack_start=1)
361 |                     report = """Multiprocessing exception:
362 | %s
363 | ---------------------------------------------------------------------------
364 | Sub-process traceback:
365 | ---------------------------------------------------------------------------
366 | %s""" % (
367 |                             this_report,
368 |                             exception.message,
369 |                         )
370 |                     # Convert this to a JoblibException
371 |                     exception_type = _mk_exception(exception.etype)[0]
372 |                     raise exception_type(report)
373 |                 raise exception
374 | 
375 |     def __call__(self, iterable):
376 |         if self._jobs:
377 |             raise ValueError('This Parallel instance is already running')
378 |         n_jobs = self.n_jobs
379 |         if n_jobs == -1 and multiprocessing is not None:
380 |             n_jobs = multiprocessing.cpu_count()
381 | 
382 |         # The list of exceptions that we will capture
383 |         self.exceptions = [TransportableException]
384 |         if n_jobs is None or multiprocessing is None or n_jobs == 1:
385 |             n_jobs = 1
386 |             self._pool = None
387 |         else:
388 |             self._pool = multiprocessing.Pool(n_jobs)
389 |             self._lock = threading.Lock()
390 |             # We are using multiprocessing, we also want to capture
391 |             # KeyboardInterrupts
392 |             self.exceptions.extend([KeyboardInterrupt, WorkerInterrupt])
393 | 
394 |         if self.pre_dispatch == 'all' or n_jobs == 1:
395 |             self._iterable = None
396 |         else:
397 |             self._iterable = iterable
398 |             self._dispatch_amount = 0
399 |             pre_dispatch = self.pre_dispatch
400 |             if hasattr(pre_dispatch, 'endswith'):
401 |                 pre_dispatch = eval(pre_dispatch)
402 |             pre_dispatch = int(pre_dispatch)
403 |             iterable = itertools.islice(iterable, pre_dispatch)
404 | 
405 |         self._start_time = time.time()
406 |         self.n_dispatched = 0
407 |         try:
408 |             for function, args, kwargs in iterable:
409 |                 self.dispatch(function, args, kwargs)
410 | 
411 |             self.retrieve()
412 |         finally:
413 |             if n_jobs > 1:
414 |                 self._pool.close()
415 |                 self._pool.join()
416 |             self._jobs = list()
417 |         output = self._output
418 |         self._output = None
419 |         return output
420 | 
421 |     def __repr__(self):
422 |         return '%s(n_jobs=%s)' % ( self.__class__.__name__, self.n_jobs)
423 | 


--------------------------------------------------------------------------------
/lib/joblib/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/splunk/splunk-app-splunkgit/11658bd1000464e09064a2d7cc6937fa1af75f2b/lib/joblib/test/__init__.py


--------------------------------------------------------------------------------
/lib/joblib/test/common.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Small utilities for testing.
 3 | """
 4 | import nose
 5 | 
 6 | # A decorator to run tests only when numpy is available
 7 | try:
 8 |     import numpy as np
 9 | 
10 |     def with_numpy(func):
11 |         """ A decorator to skip tests requiring numpy.
12 |         """
13 |         return func
14 | 
15 | except ImportError:
16 |     def with_numpy(func):
17 |         """ A decorator to skip tests requiring numpy.
18 |         """
19 |         def my_func():
20 |             raise nose.SkipTest('Test requires numpy')
21 |         return my_func
22 |     np = None
23 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_disk.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unit tests for the disk utilities.
 3 | """
 4 | 
 5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
 6 | # Copyright (c) 2010 Gael Varoquaux
 7 | # License: BSD Style, 3 clauses.
 8 | 
 9 | import os
10 | import shutil
11 | import array
12 | from tempfile import mkdtemp
13 | 
14 | import nose
15 | 
16 | from ..disk import memstr_to_kbytes, disk_used
17 | 
18 | 
19 | ###############################################################################
20 | 
21 | def test_disk_used():
22 |     cachedir = mkdtemp()
23 |     try:
24 |         if os.path.exists(cachedir):
25 |             shutil.rmtree(cachedir)
26 |         os.mkdir(cachedir)
27 |         # Not write a file that is 1M big in this directory, and check the
28 |         # size. The reason we use such a big file is that it makes us robust
29 |         # to errors due to block allocation.
30 |         a = array.array('i')
31 |         sizeof_i = a.itemsize
32 |         target_size = 1024
33 |         n = target_size * 1024 / sizeof_i
34 |         a = array.array('i', n * (1,))
35 |         a.tofile(file(os.path.join(cachedir, 'test'), 'w'))
36 |         nose.tools.assert_true(disk_used(cachedir) >= target_size)
37 |         nose.tools.assert_true(disk_used(cachedir) < target_size + 12)
38 |     finally:
39 |         shutil.rmtree(cachedir)
40 | 
41 | 
42 | def test_memstr_to_kbytes():
43 |     for text, value in zip(('80G', '1.4M', '120M', '53K'),
44 |                            (80 * 1024**2, int(1.4 * 1024), 120 * 1024, 53)):
45 |         yield nose.tools.assert_equal, memstr_to_kbytes(text), value
46 | 
47 |     nose.tools.assert_raises(ValueError, memstr_to_kbytes, 'foobar')
48 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_format_stack.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unit tests for the stack formatting utilities
 3 | """
 4 | 
 5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
 6 | # Copyright (c) 2010 Gael Varoquaux
 7 | # License: BSD Style, 3 clauses.
 8 | 
 9 | import nose
10 | 
11 | from ..format_stack import safe_repr
12 | 
13 | 
14 | ###############################################################################
15 | 
16 | class Vicious(object):
17 |     def __repr__(self):
18 |         raise ValueError
19 | 
20 | 
21 | def test_safe_repr():
22 |     safe_repr(Vicious())
23 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_func_inspect.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the func_inspect module.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2009 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import nose
 10 | import tempfile
 11 | import functools
 12 | 
 13 | from ..func_inspect import filter_args, get_func_name, get_func_code
 14 | from ..memory import Memory
 15 | 
 16 | 
 17 | ###############################################################################
 18 | # Module-level functions, for tests
 19 | def f(x, y=0):
 20 |     pass
 21 | 
 22 | 
 23 | def f2(x):
 24 |     pass
 25 | 
 26 | 
 27 | # Create a Memory object to test decorated functions.
 28 | # We should be careful not to call the decorated functions, so that
 29 | # cache directories are not created in the temp dir.
 30 | mem = Memory(cachedir=tempfile.gettempdir())
 31 | 
 32 | 
 33 | @mem.cache
 34 | def g(x):
 35 |     return x
 36 | 
 37 | 
 38 | def h(x, y=0, *args, **kwargs):
 39 |     pass
 40 | 
 41 | 
 42 | def i(x=1):
 43 |     pass
 44 | 
 45 | 
 46 | def j(x, y, **kwargs):
 47 |     pass
 48 | 
 49 | 
 50 | def k(*args, **kwargs):
 51 |     pass
 52 | 
 53 | 
 54 | class Klass(object):
 55 | 
 56 |     def f(self, x):
 57 |         return x
 58 | 
 59 | 
 60 | ###############################################################################
 61 | # Tests
 62 | 
 63 | def test_filter_args():
 64 |     yield nose.tools.assert_equal, filter_args(f, [], 1), {'x': 1, 'y': 0}
 65 |     yield nose.tools.assert_equal, filter_args(f, ['x'], 1), {'y': 0}
 66 |     yield nose.tools.assert_equal, filter_args(f, ['y'], 0), {'x': 0}
 67 |     yield nose.tools.assert_equal, filter_args(f, ['y'], 0, y=1), {'x': 0}
 68 |     yield nose.tools.assert_equal, filter_args(f, ['x', 'y'], 0), {}
 69 |     yield nose.tools.assert_equal, filter_args(f, [], 0, y=1), {'x': 0, 'y': 1}
 70 |     yield nose.tools.assert_equal, filter_args(f, ['y'], x=2, y=1), {'x': 2}
 71 | 
 72 |     yield nose.tools.assert_equal, filter_args(i, [], 2), {'x': 2}
 73 |     yield nose.tools.assert_equal, filter_args(f2, [], x=1), {'x': 1}
 74 | 
 75 | 
 76 | def test_filter_args_method():
 77 |     obj = Klass()
 78 |     nose.tools.assert_equal(filter_args(obj.f, [], 1),
 79 |         {'x': 1, 'self': obj})
 80 | 
 81 | 
 82 | def test_filter_varargs():
 83 |     yield nose.tools.assert_equal, filter_args(h, [], 1), \
 84 |                             {'x': 1, 'y': 0, '*': [], '**': {}}
 85 |     yield nose.tools.assert_equal, filter_args(h, [], 1, 2, 3, 4), \
 86 |                             {'x': 1, 'y': 2, '*': [3, 4], '**': {}}
 87 |     yield nose.tools.assert_equal, filter_args(h, [], 1, 25, ee=2), \
 88 |                             {'x': 1, 'y': 25, '*': [], '**': {'ee': 2}}
 89 |     yield nose.tools.assert_equal, filter_args(h, ['*'], 1, 2, 25, ee=2), \
 90 |                             {'x': 1, 'y': 2, '**': {'ee': 2}}
 91 | 
 92 | 
 93 | def test_filter_kwargs():
 94 |     nose.tools.assert_equal(filter_args(k, [], 1, 2, ee=2),
 95 |                             {'*': [1, 2], '**': {'ee': 2}})
 96 |     nose.tools.assert_equal(filter_args(k, [], 3, 4),
 97 |                             {'*': [3, 4], '**': {}})
 98 | 
 99 | 
100 | def test_filter_args_2():
101 |     nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2),
102 |                             {'x': 1, 'y': 2, '**': {'ee': 2}})
103 | 
104 |     nose.tools.assert_raises(ValueError, filter_args, f, 'a', None)
105 |     # Check that we capture an undefined argument
106 |     nose.tools.assert_raises(ValueError, filter_args, f, ['a'], None)
107 |     ff = functools.partial(f, 1)
108 |     # filter_args has to special-case partial
109 |     nose.tools.assert_equal(filter_args(ff, [], 1),
110 |                             {'*': [1], '**': {}})
111 |     nose.tools.assert_equal(filter_args(ff, ['y'], 1),
112 |                             {'*': [1], '**': {}})
113 | 
114 | 
115 | def test_func_name():
116 |     yield nose.tools.assert_equal, 'f', get_func_name(f)[1]
117 |     # Check that we are not confused by the decoration
118 |     yield nose.tools.assert_equal, 'g', get_func_name(g)[1]
119 | 
120 | 
121 | def test_func_inspect_errors():
122 |     """ Check that func_inspect is robust and will work on weird objects
123 |     """
124 |     nose.tools.assert_equal(get_func_name('a'.lower)[-1], 'lower')
125 |     nose.tools.assert_equal(get_func_code('a'.lower)[1:], (None, -1))
126 |     ff = lambda x: x
127 |     nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1],
128 |                                                             '<lambda>')
129 |     nose.tools.assert_equal(get_func_code(ff)[1],
130 |                                     __file__.replace('.pyc', '.py'))
131 |     # Simulate a function defined in __main__
132 |     ff.__module__ = '__main__'
133 |     nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1],
134 |                                                             '<lambda>')
135 |     nose.tools.assert_equal(get_func_code(ff)[1],
136 |                                     __file__.replace('.pyc', '.py'))
137 | 
138 | 
139 | def test_bound_methods():
140 |     """ Make sure that calling the same method on two different instances
141 |         of the same class does resolv to different signatures.
142 |     """
143 |     a = Klass()
144 |     b = Klass()
145 |     nose.tools.assert_not_equal(filter_args(a.f, [], 1),
146 |                                 filter_args(b.f, [], 1))
147 | 
148 | 
149 | def test_filter_args_error_msg():
150 |     """ Make sure that filter_args returns decent error messages, for the
151 |         sake of the user.
152 |     """
153 |     nose.tools.assert_raises(ValueError, filter_args, f, [])
154 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_hashing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the hashing module.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2009 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import nose
 10 | import time
 11 | import hashlib
 12 | import tempfile
 13 | import os
 14 | import gc
 15 | import StringIO
 16 | 
 17 | from ..hashing import hash
 18 | from ..func_inspect import filter_args
 19 | from ..memory import Memory
 20 | from .common import np, with_numpy
 21 | 
 22 | from test_memory import env as test_memory_env
 23 | from test_memory import setup_module as test_memory_setup_func
 24 | from test_memory import teardown_module as test_memory_teardown_func
 25 | 
 26 | 
 27 | ###############################################################################
 28 | # Helper functions for the tests
 29 | def time_func(func, *args):
 30 |     """ Time function func on *args.
 31 |     """
 32 |     times = list()
 33 |     for _ in range(3):
 34 |         t1 = time.time()
 35 |         func(*args)
 36 |         times.append(time.time() - t1)
 37 |     return min(times)
 38 | 
 39 | 
 40 | def relative_time(func1, func2, *args):
 41 |     """ Return the relative time between func1 and func2 applied on
 42 |         *args.
 43 |     """
 44 |     time_func1 = time_func(func1, *args)
 45 |     time_func2 = time_func(func2, *args)
 46 |     relative_diff = 0.5 * (abs(time_func1 - time_func2)
 47 |                            / (time_func1 + time_func2))
 48 |     return relative_diff
 49 | 
 50 | 
 51 | class Klass(object):
 52 | 
 53 |     def f(self, x):
 54 |         return x
 55 | 
 56 | 
 57 | class KlassWithCachedMethod(object):
 58 | 
 59 |     def __init__(self):
 60 |         mem = Memory(cachedir=test_memory_env['dir'])
 61 |         self.f = mem.cache(self.f)
 62 | 
 63 |     def f(self, x):
 64 |         return x
 65 | 
 66 | 
 67 | ###############################################################################
 68 | # Tests
 69 | 
 70 | def test_trival_hash():
 71 |     """ Smoke test hash on various types.
 72 |     """
 73 |     obj_list = [1, 1., 1 + 1j,
 74 |                 'a',
 75 |                 (1, ), [1, ], {1:1},
 76 |                 None,
 77 |                ]
 78 |     for obj1 in obj_list:
 79 |         for obj2 in obj_list:
 80 |             yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \
 81 |                 obj1 is obj2
 82 | 
 83 | 
 84 | def test_hash_methods():
 85 |     """ Check that hashing instance methods works """
 86 |     a = StringIO.StringIO('a')
 87 |     b = StringIO.StringIO('b')
 88 |     nose.tools.assert_equal(hash(a.flush), hash(a.flush))
 89 |     nose.tools.assert_not_equal(hash(a.flush), hash(b.flush))
 90 | 
 91 | 
 92 | @with_numpy
 93 | def test_hash_numpy():
 94 |     """ Test hashing with numpy arrays.
 95 |     """
 96 |     arr1 = np.random.random((10, 10))
 97 |     arr2 = arr1.copy()
 98 |     arr3 = arr2.copy()
 99 |     arr3[0] += 1
100 |     obj_list = (arr1, arr2, arr3)
101 |     for obj1 in obj_list:
102 |         for obj2 in obj_list:
103 |             yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \
104 |                 np.all(obj1 == obj2)
105 | 
106 |     d1 = {1: arr1, 2: arr1}
107 |     d2 = {1: arr2, 2: arr2}
108 |     yield nose.tools.assert_equal, hash(d1), hash(d2)
109 | 
110 |     d3 = {1: arr2, 2: arr3}
111 |     yield nose.tools.assert_not_equal, hash(d1), hash(d3)
112 | 
113 |     yield nose.tools.assert_not_equal, hash(arr1), hash(arr1.T)
114 | 
115 | 
116 | @with_numpy
117 | def test_hash_memmap():
118 |     """ Check that memmap and arrays hash identically if coerce_mmap is
119 |         True.
120 |     """
121 |     filename = tempfile.mktemp()
122 |     try:
123 |         m = np.memmap(filename, shape=(10, 10), mode='w+')
124 |         a = np.asarray(m)
125 |         for coerce_mmap in (False, True):
126 |             yield (nose.tools.assert_equal,
127 |                             hash(a, coerce_mmap=coerce_mmap)
128 |                                 == hash(m, coerce_mmap=coerce_mmap),
129 |                             coerce_mmap)
130 |     finally:
131 |         if 'm' in locals():
132 |             del m
133 |             # Force a garbage-collection cycle, to be certain that the
134 |             # object is delete, and we don't run in a problem under
135 |             # Windows with a file handle still open.
136 |             gc.collect()
137 |             try:
138 |                 os.unlink(filename)
139 |             except OSError, e:
140 |                 # Under windows, some files don't get erased.
141 |                 if not os.name == 'nt':
142 |                     raise e
143 | 
144 | 
145 | @with_numpy
146 | def test_hash_numpy_performance():
147 |     """ Check the performance of hashing numpy arrays:
148 | 
149 |         In [22]: a = np.random.random(1000000)
150 | 
151 |         In [23]: %timeit hashlib.md5(a).hexdigest()
152 |         100 loops, best of 3: 20.7 ms per loop
153 | 
154 |         In [24]: %timeit hashlib.md5(pickle.dumps(a, protocol=2)).hexdigest()
155 |         1 loops, best of 3: 73.1 ms per loop
156 | 
157 |         In [25]: %timeit hashlib.md5(cPickle.dumps(a, protocol=2)).hexdigest()
158 |         10 loops, best of 3: 53.9 ms per loop
159 | 
160 |         In [26]: %timeit hash(a)
161 |         100 loops, best of 3: 20.8 ms per loop
162 |     """
163 |     a = np.random.random(1000000)
164 |     md5_hash = lambda x: hashlib.md5(np.getbuffer(x)).hexdigest()
165 | 
166 |     relative_diff = relative_time(md5_hash, hash, a)
167 |     yield nose.tools.assert_true, relative_diff < 0.1
168 | 
169 |     # Check that hashing an tuple of 3 arrays takes approximately
170 |     # 3 times as much as hashing one array
171 |     time_hashlib = 3 * time_func(md5_hash, a)
172 |     time_hash = time_func(hash, (a, a, a))
173 |     relative_diff = 0.5 * (abs(time_hash - time_hashlib)
174 |                            / (time_hash + time_hashlib))
175 | 
176 |     yield nose.tools.assert_true, relative_diff < 0.2
177 | 
178 | 
179 | def test_bound_methods_hash():
180 |     """ Make sure that calling the same method on two different instances
181 |     of the same class does resolve to the same hashes.
182 |     """
183 |     a = Klass()
184 |     b = Klass()
185 |     nose.tools.assert_equal(hash(filter_args(a.f, [], 1)),
186 |                             hash(filter_args(b.f, [], 1)))
187 | 
188 | 
189 | @nose.tools.with_setup(test_memory_setup_func, test_memory_teardown_func)
190 | def test_bound_cached_methods_hash():
191 |     """ Make sure that calling the same _cached_ method on two different
192 |     instances of the same class does resolve to the same hashes.
193 |     """
194 |     a = KlassWithCachedMethod()
195 |     b = KlassWithCachedMethod()
196 |     nose.tools.assert_equal(hash(filter_args(a.f.func, [], 1)),
197 |                             hash(filter_args(b.f.func, [], 1)))
198 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_logger.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the logger module.
 3 | """
 4 | 
 5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
 6 | # Copyright (c) 2009 Gael Varoquaux
 7 | # License: BSD Style, 3 clauses.
 8 | 
 9 | import shutil
10 | import os
11 | import sys
12 | import StringIO
13 | from tempfile import mkdtemp
14 | import re
15 | 
16 | import nose
17 | 
18 | from ..logger import PrintTime
19 | 
20 | 
21 | ###############################################################################
22 | # Test fixtures
23 | env = dict()
24 | 
25 | 
26 | def setup():
27 |     """ Test setup.
28 |     """
29 |     cachedir = mkdtemp()
30 |     if os.path.exists(cachedir):
31 |         shutil.rmtree(cachedir)
32 |     env['dir'] = cachedir
33 | 
34 | 
35 | def teardown():
36 |     """ Test teardown.
37 |     """
38 |     #return True
39 |     shutil.rmtree(env['dir'])
40 | 
41 | 
42 | ###############################################################################
43 | # Tests
44 | def test_print_time():
45 |     """ A simple smoke test for PrintTime.
46 |     """
47 |     try:
48 |         orig_stderr = sys.stderr
49 |         sys.stderr = StringIO.StringIO()
50 |         print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log'))
51 |         print_time('Foo')
52 |         # Create a second time, to smoke test log rotation.
53 |         print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log'))
54 |         print_time('Foo')
55 |         # And a third time
56 |         print_time = PrintTime(logfile=os.path.join(env['dir'], 'test.log'))
57 |         print_time('Foo')
58 |         printed_text = sys.stderr.getvalue()
59 |         # Use regexps to be robust to time variations
60 |         match = r"Foo: 0\..s, 0\.0min\nFoo: 0\..s, 0.0min\nFoo: " + \
61 |                 r".\..s, 0.0min\n"
62 |         if not re.match(match, printed_text):
63 |             raise AssertionError('Excepted %s, got %s' %
64 |                                     (match, printed_text))
65 |     finally:
66 |         sys.stderr = orig_stderr
67 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_memory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the memory module.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2009 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | from __future__ import with_statement
 10 | 
 11 | import shutil
 12 | import os
 13 | from tempfile import mkdtemp
 14 | import pickle
 15 | import warnings
 16 | 
 17 | import nose
 18 | 
 19 | from ..memory import Memory, MemorizedFunc
 20 | from .common import with_numpy, np
 21 | 
 22 | 
 23 | ###############################################################################
 24 | # Module-level variables for the tests
 25 | def f(x, y=1):
 26 |     """ A module-level function for testing purposes.
 27 |     """
 28 |     return x**2 + y
 29 | 
 30 | 
 31 | ###############################################################################
 32 | # Test fixtures
 33 | env = dict()
 34 | 
 35 | 
 36 | def setup_module():
 37 |     """ Test setup.
 38 |     """
 39 |     cachedir = mkdtemp()
 40 |     #cachedir = 'foobar'
 41 |     env['dir'] = cachedir
 42 |     if os.path.exists(cachedir):
 43 |         shutil.rmtree(cachedir)
 44 |     # Don't make the cachedir, Memory should be able to do that on the fly
 45 |     print 80 * '_'
 46 |     print 'test_memory setup'
 47 |     print 80 * '_'
 48 | 
 49 | 
 50 | def _rmtree_onerror(func, path, excinfo):
 51 |     print '!' * 79
 52 |     print 'os function failed:', repr(func)
 53 |     print 'file to be removed:', path
 54 |     print 'exception was:', excinfo[1]
 55 |     print '!' * 79
 56 | 
 57 | 
 58 | def teardown_module():
 59 |     """ Test teardown.
 60 |     """
 61 |     shutil.rmtree(env['dir'], False, _rmtree_onerror)
 62 |     print 80 * '_'
 63 |     print 'test_memory teardown'
 64 |     print 80 * '_'
 65 | 
 66 | 
 67 | ###############################################################################
 68 | # Helper function for the tests
 69 | def check_identity_lazy(func, accumulator):
 70 |     """ Given a function and an accumulator (a list that grows every
 71 |         time the function is called, check that the function can be
 72 |         decorated by memory to be a lazy identity.
 73 |     """
 74 |     # Call each function with several arguments, and check that it is
 75 |     # evaluated only once per argument.
 76 |     memory = Memory(cachedir=env['dir'], verbose=0)
 77 |     memory.clear(warn=False)
 78 |     func = memory.cache(func)
 79 |     for i in range(3):
 80 |         for _ in range(2):
 81 |             yield nose.tools.assert_equal, func(i), i
 82 |             yield nose.tools.assert_equal, len(accumulator), i + 1
 83 | 
 84 | 
 85 | ###############################################################################
 86 | # Tests
 87 | def test_memory_integration():
 88 |     """ Simple test of memory lazy evaluation.
 89 |     """
 90 |     accumulator = list()
 91 |     # Rmk: this function has the same name than a module-level function,
 92 |     # thus it serves as a test to see that both are identified
 93 |     # as different.
 94 | 
 95 |     def f(l):
 96 |         accumulator.append(1)
 97 |         return l
 98 | 
 99 |     for test in check_identity_lazy(f, accumulator):
100 |         yield test
101 | 
102 |     # Now test clearing
103 |     memory = Memory(cachedir=env['dir'], verbose=0)
104 |     # First clear the cache directory, to check that our code can
105 |     # handle that
106 |     # NOTE: this line would raise an exception, as the database file is still
107 |     # open; we ignore the error since we want to test what happens if the
108 |     # directory disappears
109 |     shutil.rmtree(env['dir'], ignore_errors=True)
110 |     g = memory.cache(f)
111 |     g(1)
112 |     g.clear(warn=False)
113 |     current_accumulator = len(accumulator)
114 |     out = g(1)
115 |     yield nose.tools.assert_equal, len(accumulator), \
116 |                 current_accumulator + 1
117 |     # Also, check that Memory.eval works similarly
118 |     yield nose.tools.assert_equal, memory.eval(f, 1), out
119 |     yield nose.tools.assert_equal, len(accumulator), \
120 |                 current_accumulator + 1
121 | 
122 | 
123 | def test_no_memory():
124 |     """ Test memory with cachedir=None: no memoize """
125 |     accumulator = list()
126 | 
127 |     def ff(l):
128 |         accumulator.append(1)
129 |         return l
130 | 
131 |     mem = Memory(cachedir=None, verbose=0)
132 |     gg = mem.cache(ff)
133 |     for _ in range(4):
134 |         current_accumulator = len(accumulator)
135 |         gg(1)
136 |         yield nose.tools.assert_equal, len(accumulator), \
137 |                     current_accumulator + 1
138 | 
139 | 
140 | def test_memory_kwarg():
141 |     " Test memory with a function with keyword arguments."
142 |     accumulator = list()
143 | 
144 |     def g(l=None, m=1):
145 |         accumulator.append(1)
146 |         return l
147 | 
148 |     for test in check_identity_lazy(g, accumulator):
149 |         yield test
150 | 
151 |     memory = Memory(cachedir=env['dir'], verbose=0)
152 |     g = memory.cache(g)
153 |     # Smoke test with an explicit keyword argument:
154 |     nose.tools.assert_equal(g(l=30, m=2), 30)
155 | 
156 | 
157 | def test_memory_lambda():
158 |     " Test memory with a function with a lambda."
159 |     accumulator = list()
160 | 
161 |     def helper(x):
162 |         """ A helper function to define l as a lambda.
163 |         """
164 |         accumulator.append(1)
165 |         return x
166 | 
167 |     l = lambda x: helper(x)
168 | 
169 |     for test in check_identity_lazy(l, accumulator):
170 |         yield test
171 | 
172 | 
173 | def test_memory_name_collision():
174 |     " Check that name collisions with functions will raise warnings"
175 |     memory = Memory(cachedir=env['dir'], verbose=0)
176 | 
177 |     @memory.cache
178 |     def name_collision(x):
179 |         """ A first function called name_collision
180 |         """
181 |         return x
182 | 
183 |     a = name_collision
184 | 
185 |     @memory.cache
186 |     def name_collision(x):
187 |         """ A second function called name_collision
188 |         """
189 |         return x
190 | 
191 |     b = name_collision
192 | 
193 |     if not hasattr(warnings, 'catch_warnings'):
194 |         # catch_warnings is new in Python 2.6
195 |         return
196 | 
197 |     with warnings.catch_warnings(record=True) as w:
198 |         # Cause all warnings to always be triggered.
199 |         warnings.simplefilter("always")
200 |         a(1)
201 |         b(1)
202 | 
203 |         yield nose.tools.assert_equal, len(w), 1
204 |         yield nose.tools.assert_true, "collision" in str(w[-1].message)
205 | 
206 | 
207 | def test_memory_warning_lambda_collisions():
208 |     " Check that multiple use of lambda will raise collisions"
209 |     memory = Memory(cachedir=env['dir'], verbose=0)
210 |     a = lambda x: x
211 |     a = memory.cache(a)
212 |     b = lambda x: x + 1
213 |     b = memory.cache(b)
214 | 
215 |     if not hasattr(warnings, 'catch_warnings'):
216 |         # catch_warnings is new in Python 2.6
217 |         return
218 | 
219 |     with warnings.catch_warnings(record=True) as w:
220 |         # Cause all warnings to always be triggered.
221 |         warnings.simplefilter("always")
222 |         a(1)
223 |         b(1)
224 | 
225 |         yield nose.tools.assert_equal, len(w), 2
226 |         yield nose.tools.assert_true, "collision" in str(w[-1].message)
227 |         yield nose.tools.assert_true, "collision" in str(w[-2].message)
228 | 
229 | 
230 | def test_memory_warning_collision_detection():
231 |     """ Check that collisions impossible to detect will raise appropriate
232 |         warnings.
233 |     """
234 |     memory = Memory(cachedir=env['dir'], verbose=0)
235 |     a = eval('lambda x: x')
236 |     a = memory.cache(a)
237 |     b = eval('lambda x: x+1')
238 |     b = memory.cache(b)
239 | 
240 |     if not hasattr(warnings, 'catch_warnings'):
241 |         # catch_warnings is new in Python 2.6
242 |         return
243 | 
244 |     with warnings.catch_warnings(record=True) as w:
245 |         # Cause all warnings to always be triggered.
246 |         warnings.simplefilter("always")
247 |         a(1)
248 |         b(1)
249 | 
250 |         yield nose.tools.assert_equal, len(w), 1
251 |         yield nose.tools.assert_true, \
252 |                 "cannot detect" in str(w[-1].message).lower()
253 | 
254 | 
255 | def test_memory_partial():
256 |     " Test memory with functools.partial."
257 |     accumulator = list()
258 | 
259 |     def func(x, y):
260 |         """ A helper function to define l as a lambda.
261 |         """
262 |         accumulator.append(1)
263 |         return y
264 | 
265 |     import functools
266 |     function = functools.partial(func, 1)
267 | 
268 |     for test in check_identity_lazy(function, accumulator):
269 |         yield test
270 | 
271 | 
272 | def test_memory_eval():
273 |     " Smoke test memory with a function with a function defined in an eval."
274 |     memory = Memory(cachedir=env['dir'], verbose=0)
275 | 
276 |     m = eval('lambda x: x')
277 |     mm = memory.cache(m)
278 | 
279 |     yield nose.tools.assert_equal, 1, mm(1)
280 | 
281 | 
282 | def count_and_append(x=[]):
283 |     """ A function with a side effect in its arguments.
284 | 
285 |         Return the lenght of its argument and append one element.
286 |     """
287 |     len_x = len(x)
288 |     x.append(None)
289 |     return len_x
290 | 
291 | 
292 | def test_argument_change():
293 |     """ Check that if a function has a side effect in its arguments, it
294 |         should use the hash of changing arguments.
295 |     """
296 |     mem = Memory(cachedir=env['dir'], verbose=0)
297 |     func = mem.cache(count_and_append)
298 |     # call the function for the first time, is should cache it with
299 |     # argument x=[]
300 |     assert func() == 0
301 |     # the second time the argument is x=[None], which is not cached
302 |     # yet, so the functions should be called a second time
303 |     assert func() == 1
304 | 
305 | 
306 | @with_numpy
307 | def test_memory_numpy():
308 |     " Test memory with a function with numpy arrays."
309 |     # Check with memmapping and without.
310 |     for mmap_mode in (None, 'r'):
311 |         accumulator = list()
312 | 
313 |         def n(l=None):
314 |             accumulator.append(1)
315 |             return l
316 | 
317 |         memory = Memory(cachedir=env['dir'], mmap_mode=mmap_mode,
318 |                             verbose=0)
319 |         memory.clear(warn=False)
320 |         cached_n = memory.cache(n)
321 |         for i in range(3):
322 |             a = np.random.random((10, 10))
323 |             for _ in range(3):
324 |                 yield nose.tools.assert_true, np.all(cached_n(a) == a)
325 |                 yield nose.tools.assert_equal, len(accumulator), i + 1
326 | 
327 | 
328 | def test_memory_exception():
329 |     """ Smoketest the exception handling of Memory.
330 |     """
331 |     memory = Memory(cachedir=env['dir'], verbose=0)
332 | 
333 |     class MyException(Exception):
334 |         pass
335 | 
336 |     @memory.cache
337 |     def h(exc=0):
338 |         if exc:
339 |             raise MyException
340 | 
341 |     # Call once, to initialise the cache
342 |     h()
343 | 
344 |     for _ in range(3):
345 |         # Call 3 times, to be sure that the Exception is always raised
346 |         yield nose.tools.assert_raises, MyException, h, 1
347 | 
348 | 
349 | def test_memory_ignore():
350 |     " Test the ignore feature of memory "
351 |     memory = Memory(cachedir=env['dir'], verbose=0)
352 |     accumulator = list()
353 | 
354 |     @memory.cache(ignore=['y'])
355 |     def z(x, y=1):
356 |         accumulator.append(1)
357 | 
358 |     yield nose.tools.assert_equal, z.ignore, ['y']
359 | 
360 |     z(0, y=1)
361 |     yield nose.tools.assert_equal, len(accumulator), 1
362 |     z(0, y=1)
363 |     yield nose.tools.assert_equal, len(accumulator), 1
364 |     z(0, y=2)
365 |     yield nose.tools.assert_equal, len(accumulator), 1
366 | 
367 | 
368 | def test_func_dir():
369 |     """ Test the creation of the memory cache directory for the function.
370 |     """
371 |     memory = Memory(cachedir=env['dir'], verbose=0)
372 |     path = __name__.split('.')
373 |     path.append('f')
374 |     path = os.path.join(env['dir'], 'joblib', *path)
375 | 
376 |     g = memory.cache(f)
377 |     # Test that the function directory is created on demand
378 |     yield nose.tools.assert_equal, g._get_func_dir(), path
379 |     yield nose.tools.assert_true, os.path.exists(path)
380 | 
381 |     # Test that the code is stored.
382 |     yield nose.tools.assert_false, \
383 |         g._check_previous_func_code()
384 |     yield nose.tools.assert_true, \
385 |             os.path.exists(os.path.join(path, 'func_code.py'))
386 |     yield nose.tools.assert_true, \
387 |         g._check_previous_func_code()
388 | 
389 |     # Test the robustness to failure of loading previous results.
390 |     dir, _ = g.get_output_dir(1)
391 |     a = g(1)
392 |     yield nose.tools.assert_true, os.path.exists(dir)
393 |     os.remove(os.path.join(dir, 'output.pkl'))
394 |     yield nose.tools.assert_equal, a, g(1)
395 | 
396 | 
397 | def test_persistence():
398 |     """ Test the memorized functions can be pickled and restored.
399 |     """
400 |     memory = Memory(cachedir=env['dir'], verbose=0)
401 |     g = memory.cache(f)
402 |     output = g(1)
403 | 
404 |     h = pickle.loads(pickle.dumps(g))
405 | 
406 |     output_dir, _ = g.get_output_dir(1)
407 |     yield nose.tools.assert_equal, output, h.load_output(output_dir)
408 | 
409 | 
410 | def test_format_signature():
411 |     """ Test the signature formatting.
412 |     """
413 |     func = MemorizedFunc(f, cachedir=env['dir'])
414 |     path, sgn = func.format_signature(f, range(10))
415 |     yield nose.tools.assert_equal, \
416 |                 sgn, \
417 |                 'f([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])'
418 |     path, sgn = func.format_signature(f, range(10), y=range(10))
419 |     yield nose.tools.assert_equal, \
420 |                 sgn, \
421 |         'f([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], y=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])'
422 | 
423 | 
424 | @with_numpy
425 | def test_format_signature_numpy():
426 |     """ Test the format signature formatting with numpy.
427 |     """
428 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_my_exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test my automatically generate exceptions
 3 | """
 4 | from nose.tools import assert_true
 5 | 
 6 | from .. import my_exceptions
 7 | 
 8 | 
 9 | def test_inheritance():
10 |     assert_true(isinstance(my_exceptions.JoblibNameError(), NameError))
11 |     assert_true(isinstance(my_exceptions.JoblibNameError(),
12 |                             my_exceptions.JoblibException))
13 |     assert_true(my_exceptions.JoblibNameError is
14 |                 my_exceptions._mk_exception(NameError)[0])
15 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_numpy_pickle.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the numpy pickler as a replacement of the standard pickler.
  3 | 
  4 | """
  5 | 
  6 | from tempfile import mkdtemp
  7 | import copy
  8 | import shutil
  9 | import os
 10 | 
 11 | import nose
 12 | 
 13 | from .common import np, with_numpy
 14 | 
 15 | # numpy_pickle is not a drop-in replacement of pickle, as it takes
 16 | # filenames instead of open files as arguments.
 17 | from .. import numpy_pickle
 18 | 
 19 | ###############################################################################
 20 | # Define a list of standard types.
 21 | # Borrowed from dill, initial author: Micheal McKerns:
 22 | # http://dev.danse.us/trac/pathos/browser/dill/dill_test2.py
 23 | 
 24 | typelist = []
 25 | 
 26 | # testing types
 27 | _none = None
 28 | typelist.append(_none)
 29 | _type = type
 30 | typelist.append(_type)
 31 | _bool = bool(1)
 32 | typelist.append(_bool)
 33 | _int = int(1)
 34 | typelist.append(_int)
 35 | _long = long(1)
 36 | typelist.append(_long)
 37 | _float = float(1)
 38 | typelist.append(_float)
 39 | _complex = complex(1)
 40 | typelist.append(_complex)
 41 | _string = str(1)
 42 | typelist.append(_string)
 43 | _unicode = unicode(1)
 44 | typelist.append(_unicode)
 45 | _tuple = ()
 46 | typelist.append(_tuple)
 47 | _list = []
 48 | typelist.append(_list)
 49 | _dict = {}
 50 | typelist.append(_dict)
 51 | _file = file
 52 | typelist.append(_file)
 53 | _buffer = buffer
 54 | typelist.append(_buffer)
 55 | _builtin = len
 56 | typelist.append(_builtin)
 57 | 
 58 | 
 59 | def _function(x):
 60 |     yield x
 61 | 
 62 | 
 63 | class _class:
 64 |     def _method(self):
 65 |         pass
 66 | 
 67 | 
 68 | class _newclass(object):
 69 |     def _method(self):
 70 |         pass
 71 | 
 72 | 
 73 | typelist.append(_function)
 74 | typelist.append(_class)
 75 | typelist.append(_newclass)  # <type 'type'>
 76 | _instance = _class()
 77 | typelist.append(_instance)
 78 | _object = _newclass()
 79 | typelist.append(_object)  # <type 'class'>
 80 | 
 81 | 
 82 | ###############################################################################
 83 | # Test fixtures
 84 | 
 85 | env = dict()
 86 | 
 87 | 
 88 | def setup_module():
 89 |     """ Test setup.
 90 |     """
 91 |     env['dir'] = mkdtemp()
 92 |     env['filename'] = os.path.join(env['dir'], 'test.pkl')
 93 |     print 80 * '_'
 94 |     print 'setup numpy_pickle'
 95 |     print 80 * '_'
 96 | 
 97 | 
 98 | def teardown_module():
 99 |     """ Test teardown.
100 |     """
101 |     shutil.rmtree(env['dir'])
102 |     #del env['dir']
103 |     #del env['filename']
104 |     print 80 * '_'
105 |     print 'teardown numpy_pickle'
106 |     print 80 * '_'
107 | 
108 | 
109 | ###############################################################################
110 | # Tests
111 | 
112 | def test_standard_types():
113 |     #""" Test pickling and saving with standard types.
114 |     #"""
115 |     filename = env['filename']
116 |     for member in typelist:
117 |         numpy_pickle.dump(member, filename)
118 |         _member = numpy_pickle.load(filename)
119 |         # We compare the pickled instance to the reloaded one only if it
120 |         # can be compared to a copied one
121 |         if member == copy.deepcopy(member):
122 |             yield nose.tools.assert_equal, member, _member
123 | 
124 | 
125 | @with_numpy
126 | def test_numpy_persistence():
127 |     filename = env['filename']
128 |     a = np.random.random(10)
129 |     for obj in (a,), (a, a), [a, a, a]:
130 |         filenames = numpy_pickle.dump(obj, filename)
131 |         # Check that one file was created per array
132 |         yield nose.tools.assert_equal, len(filenames), len(obj) + 1
133 |         # Check that these files do exist
134 |         for file in filenames:
135 |             yield nose.tools.assert_true, \
136 |                 os.path.exists(os.path.join(env['dir'], file))
137 | 
138 |         # Unpickle the object
139 |         obj_ = numpy_pickle.load(filename)
140 |         # Check that the items are indeed arrays
141 |         for item in obj_:
142 |             yield nose.tools.assert_true, isinstance(item, np.ndarray)
143 |         # And finally, check that all the values are equal.
144 |         yield nose.tools.assert_true, np.all(np.array(obj) ==
145 |                                              np.array(obj_))
146 | 
147 | 
148 | @with_numpy
149 | def test_memmap_persistence():
150 |     a = np.random.random(10)
151 |     filename = env['filename']
152 |     numpy_pickle.dump(a, filename)
153 |     b = numpy_pickle.load(filename, mmap_mode='r')
154 |     if np.__version__ >= '1.3':
155 |         yield nose.tools.assert_true, isinstance(b, np.memmap)
156 | 
157 | 
158 | @with_numpy
159 | def test_masked_array_persistence():
160 |     # The special-case picker fails, because saving masked_array
161 |     # not implemented, but it just delegates to the standard pickler.
162 |     a = np.random.random(10)
163 |     a = np.ma.masked_greater(a, 0.5)
164 |     filename = env['filename']
165 |     numpy_pickle.dump(a, filename)
166 |     b = numpy_pickle.load(filename, mmap_mode='r')
167 |     nose.tools.assert_true, isinstance(b, np.ma.masked_array)
168 | 


--------------------------------------------------------------------------------
/lib/joblib/test/test_parallel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the parallel module.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  6 | # Copyright (c) 2010-2011 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import time
 10 | try:
 11 |     import cPickle as pickle
 12 |     PickleError = TypeError
 13 | except:
 14 |     import pickle
 15 |     PickleError = pickle.PicklingError
 16 | 
 17 | from ..parallel import Parallel, delayed, SafeFunction, WorkerInterrupt, \
 18 |         multiprocessing, cpu_count
 19 | from ..my_exceptions import JoblibException
 20 | 
 21 | import nose
 22 | 
 23 | 
 24 | ###############################################################################
 25 | 
 26 | def division(x, y):
 27 |     return x / y
 28 | 
 29 | 
 30 | def square(x):
 31 |     return x**2
 32 | 
 33 | 
 34 | def exception_raiser(x):
 35 |     if x == 7:
 36 |         raise ValueError
 37 |     return x
 38 | 
 39 | 
 40 | def interrupt_raiser(x):
 41 |     time.sleep(.05)
 42 |     raise KeyboardInterrupt
 43 | 
 44 | 
 45 | def f(x, y=0, z=0):
 46 |     """ A module-level function so that it can be spawn with
 47 |     multiprocessing.
 48 |     """
 49 |     return x**2 + y + z
 50 | 
 51 | 
 52 | ###############################################################################
 53 | def test_cpu_count():
 54 |     assert cpu_count() > 0
 55 | 
 56 | 
 57 | ###############################################################################
 58 | # Test parallel
 59 | def test_simple_parallel():
 60 |     X = range(10)
 61 |     for n_jobs in (1, 2, -1):
 62 |         yield (nose.tools.assert_equal, [square(x) for x in X],
 63 |                         Parallel(n_jobs=-1)(delayed(square)(x) for x in X))
 64 | 
 65 | 
 66 | def test_parallel_kwargs():
 67 |     """ Check the keyword argument processing of pmap.
 68 |     """
 69 |     lst = range(10)
 70 |     for n_jobs in (1, 4):
 71 |         yield (nose.tools.assert_equal,
 72 |                [f(x, y=1) for x in lst],
 73 |                Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst)
 74 |               )
 75 | 
 76 | 
 77 | def test_parallel_pickling():
 78 |     """ Check that pmap captures the errors when it is passed an object
 79 |         that cannot be pickled.
 80 |     """
 81 |     def g(x):
 82 |         return x**2
 83 |     nose.tools.assert_raises(PickleError,
 84 |                              Parallel(),
 85 |                              (delayed(g)(x) for x in range(10))
 86 |                             )
 87 | 
 88 | 
 89 | def test_error_capture():
 90 |     """ Check that error are captured, and that correct exceptions
 91 |         are raised.
 92 |     """
 93 |     if multiprocessing is not None:
 94 |         # A JoblibException will be raised only if there is indeed
 95 |         # multiprocessing
 96 |         nose.tools.assert_raises(JoblibException,
 97 |                                 Parallel(n_jobs=2),
 98 |                     [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))],
 99 |                         )
100 |         nose.tools.assert_raises(WorkerInterrupt,
101 |                                     Parallel(n_jobs=2),
102 |                         [delayed(interrupt_raiser)(x) for x in (1, 0)],
103 |                             )
104 |     else:
105 |         nose.tools.assert_raises(KeyboardInterrupt,
106 |                                     Parallel(n_jobs=2),
107 |                         [delayed(interrupt_raiser)(x) for x in (1, 0)],
108 |                             )
109 |     nose.tools.assert_raises(ZeroDivisionError,
110 |                                 Parallel(n_jobs=2),
111 |                     [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))],
112 |                         )
113 |     try:
114 |         Parallel(n_jobs=1)(
115 |                     delayed(division)(x, y) for x, y in zip((0, 1), (1, 0)))
116 |     except Exception, e:
117 |         pass
118 |     nose.tools.assert_false(isinstance(e, JoblibException))
119 | 
120 | 
121 | class Counter(object):
122 |     def __init__(self, list1, list2):
123 |         self.list1 = list1
124 |         self.list2 = list2
125 | 
126 |     def __call__(self, i):
127 |         self.list1.append(i)
128 |         nose.tools.assert_equal(len(self.list1), len(self.list2))
129 | 
130 | 
131 | def consumer(queue, item):
132 |     queue.append('Consumed %s' % item)
133 | 
134 | 
135 | def test_dispatch_one_job():
136 |     """ Test that with only one job, Parallel does act as a iterator.
137 |     """
138 |     queue = list()
139 | 
140 |     def producer():
141 |         for i in range(6):
142 |             queue.append('Produced %i' % i)
143 |             yield i
144 | 
145 |     Parallel(n_jobs=1)(delayed(consumer)(queue, x) for x in producer())
146 |     nose.tools.assert_equal(queue,
147 |                               ['Produced 0', 'Consumed 0',
148 |                                'Produced 1', 'Consumed 1',
149 |                                'Produced 2', 'Consumed 2',
150 |                                'Produced 3', 'Consumed 3',
151 |                                'Produced 4', 'Consumed 4',
152 |                                'Produced 5', 'Consumed 5']
153 |                                )
154 |     nose.tools.assert_equal(len(queue), 12)
155 | 
156 | 
157 | def test_dispatch_multiprocessing():
158 |     """ Check that using pre_dispatch Parallel does indeed dispatch items
159 |         lazily.
160 |     """
161 |     if multiprocessing is None:
162 |         return
163 |     manager = multiprocessing.Manager()
164 |     queue = manager.list()
165 | 
166 |     def producer():
167 |         for i in range(6):
168 |             queue.append('Produced %i' % i)
169 |             yield i
170 | 
171 |     Parallel(n_jobs=2, pre_dispatch=3)(delayed(consumer)(queue, i)
172 |                                        for i in producer())
173 |     nose.tools.assert_equal(list(queue)[:4],
174 |             ['Produced 0', 'Produced 1', 'Produced 2',
175 |              'Consumed 0', ])
176 |     nose.tools.assert_equal(len(queue), 12)
177 | 
178 | 
179 | def test_exception_dispatch():
180 |     "Make sure that exception raised during dispatch are indeed captured"
181 |     nose.tools.assert_raises(
182 |             ValueError,
183 |             Parallel(n_jobs=6, pre_dispatch=16, verbose=0),
184 |                     (delayed(exception_raiser)(i) for i in range(30)),
185 |             )
186 | 
187 | 
188 | ###############################################################################
189 | # Test helpers
190 | def test_joblib_exception():
191 |     # Smoke-test the custom exception
192 |     e = JoblibException('foobar')
193 |     # Test the repr
194 |     repr(e)
195 |     # Test the pickle
196 |     pickle.dumps(e)
197 | 
198 | 
199 | def test_safe_function():
200 |     safe_division = SafeFunction(division)
201 |     nose.tools.assert_raises(JoblibException, safe_division, 1, 0)
202 | 


--------------------------------------------------------------------------------
/lib/joblib/test/tmp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test the parallel module.
  3 | """
  4 | 
  5 | # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> 
  6 | # Copyright (c) 2010-2011 Gael Varoquaux
  7 | # License: BSD Style, 3 clauses.
  8 | 
  9 | import time
 10 | try:
 11 |     import cPickle as pickle
 12 |     PickleError = TypeError
 13 | except:
 14 |     import pickle
 15 |     PickleError = pickle.PicklingError
 16 | 
 17 | from ..parallel import Parallel, delayed, SafeFunction, WorkerInterrupt,\
 18 |             multiprocessing
 19 | from ..my_exceptions import JoblibException
 20 | 
 21 | import nose
 22 | 
 23 | ################################################################################
 24 | 
 25 | def division(x, y):
 26 |     return x/y
 27 | 
 28 | def square(x):
 29 |     return x**2
 30 | 
 31 | def exception_raiser(x):
 32 |     if x == 7:
 33 |         raise ValueError
 34 |     return x
 35 | 
 36 | def interrupt_raiser(x):
 37 |     time.sleep(.05)
 38 |     raise KeyboardInterrupt
 39 | 
 40 | def f(x, y=0, z=0):
 41 |     """ A module-level function so that it can be spawn with
 42 |     multiprocessing.
 43 |     """
 44 |     return x**2 + y + z
 45 | 
 46 | ################################################################################
 47 | # Test parallel
 48 | def test_simple_parallel():
 49 |     X = range(10)
 50 |     for n_jobs in (1, 2, -1):
 51 |         yield (nose.tools.assert_equal, [square(x) for x in X],
 52 |                         Parallel(n_jobs=-1)(delayed(square)(x) for x in X))
 53 | 
 54 | 
 55 | def test_parallel_kwargs():
 56 |     """ Check the keyword argument processing of pmap.
 57 |     """
 58 |     lst = range(10)
 59 |     for n_jobs in (1, 4):
 60 |         yield (nose.tools.assert_equal, 
 61 |                [f(x, y=1) for x in lst], 
 62 |                Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst)
 63 |               )
 64 | 
 65 |         
 66 | def test_parallel_pickling():
 67 |     """ Check that pmap captures the errors when it is passed an object
 68 |         that cannot be pickled.
 69 |     """
 70 |     def g(x):
 71 |         return x**2
 72 |     nose.tools.assert_raises(PickleError,
 73 |                              Parallel(), 
 74 |                              (delayed(g)(x) for x in range(10))
 75 |                             )
 76 | 
 77 | 
 78 | def test_error_capture():
 79 |     """ Check that error are captured, and that correct exceptions
 80 |         are raised.
 81 |     """
 82 |     if multiprocessing is not None:
 83 |         # A JoblibException will be raised only if there is indeed
 84 |         # multiprocessing
 85 |         nose.tools.assert_raises(JoblibException,
 86 |                                 Parallel(n_jobs=2),
 87 |                     [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))],
 88 |                         )
 89 |         nose.tools.assert_raises(WorkerInterrupt,
 90 |                                     Parallel(n_jobs=2),
 91 |                         [delayed(interrupt_raiser)(x) for x in (1, 0)],
 92 |                             )
 93 |     else:
 94 |         nose.tools.assert_raises(KeyboardInterrupt,
 95 |                                     Parallel(n_jobs=2),
 96 |                         [delayed(interrupt_raiser)(x) for x in (1, 0)],
 97 |                             )
 98 |     nose.tools.assert_raises(ZeroDivisionError,
 99 |                                 Parallel(n_jobs=2),
100 |                     [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))],
101 |                         )
102 |     try:
103 |         Parallel(n_jobs=1)(
104 |                     delayed(division)(x, y) for x, y in zip((0, 1), (1, 0)))
105 |     except Exception, e:
106 |         pass
107 |     nose.tools.assert_false(isinstance(e, JoblibException))
108 | 
109 | 
110 | class Counter(object):
111 |     def __init__(self, consumed, produced, pre_dispatch=0):
112 |         self.consumed = consumed
113 |         self.produced = produced
114 |         self.pre_dispatch = pre_dispatch
115 | 
116 |     def __call__(self, i, args):
117 |         # Cate for 2 use cases: multiprocessing queue and simple lists
118 |         if hasattr(self.consumed, 'put'):
119 |             self.consumed.put(i)
120 |             nose.tools.assert_true(self.consumed.qsize() <= 
121 |                                     self.produced.qsize())
122 |             nose.tools.assert_true(self.consumed.qsize() >= 
123 |                                     self.produced.qsize() - self.pre_dispatch)
124 |         else:
125 |             self.consumed.append(i)
126 |             nose.tools.assert_equal(len(self.consumed), len(self.produced))
127 | 
128 | 
129 | def test_dispatch_one_job():
130 |     """ Test that with only one job, Parallel does act as a iterator.
131 |     """
132 |     produced = list()
133 |     consumed = list()
134 |     def producer():
135 |         for i in range(6):
136 |             produced.append(i)
137 |             yield i
138 |     consumer = Counter(consumed=consumed, produced=consumed)
139 | 
140 |     Parallel(n_jobs=1)(delayed(consumer)(x) for x in producer())
141 | 
142 | 
143 | def test_dispatch_multiprocessing():
144 |     """ Check that using pre_dispatch Parallel does indeed dispatch items
145 |         lazily.
146 |     """
147 |     if multiprocessing is None:
148 |         return
149 |     consumed = multiprocessing.Queue()
150 |     produced = multiprocessing.Queue()
151 |     def producer():
152 |         for i in range(10):
153 |             produced.put(i)
154 |             yield i
155 |     consumer = Counter(consumed=consumed, produced=produced,
156 |                                     pre_dispatch=3)
157 |     Parallel(n_jobs=2, pre_dispatch=3)(
158 |                     delayed(consumer)(i, consumed) for i in producer()
159 |                 )
160 | 
161 |  
162 | def test_exception_dispatch():
163 |     "Make sure that exception raised during dispatch are indeed captured"
164 |     nose.tools.assert_raises(
165 |             ValueError,
166 |             Parallel(n_jobs=6, pre_dispatch=16, verbose=0),
167 |                     (delayed(exception_raiser)(i) for i in range(30)),
168 |             )
169 | 
170 | 
171 | ################################################################################
172 | # Test helpers
173 | def test_joblib_exception():
174 |     # Smoke-test the custom exception
175 |     e = JoblibException('foobar')
176 |     # Test the repr
177 |     repr(e)
178 |     # Test the pickle
179 |     pickle.dumps(e)
180 | 
181 | 
182 | def test_safe_function():
183 |     safe_division = SafeFunction(division)
184 |     nose.tools.assert_raises(JoblibException, safe_division, 1, 0)
185 | 
186 | 


--------------------------------------------------------------------------------
/lib/joblib/testing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper for testing.
 3 | """
 4 | 
 5 | import sys
 6 | import warnings
 7 | import os.path
 8 | 
 9 | 
10 | def warnings_to_stdout():
11 |     """ Redirect all warnings to stdout.
12 |     """
13 |     showwarning_orig = warnings.showwarning
14 | 
15 |     def showwarning(msg, cat, fname, lno, file=None, line=0):
16 |         showwarning_orig(msg, cat, os.path.basename(fname), line, sys.stdout)
17 | 
18 |     warnings.showwarning = showwarning
19 |     #warnings.simplefilter('always')
20 | 


--------------------------------------------------------------------------------
/local/splunkgit.conf:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Separate repo_addresses with a space to have multiple repositories indexed
16 | [git]
17 | repo_addresses=git://github.com/splunk/splunk-app-splunkgit.git
18 | 
19 | [splunk]
20 | user=
21 | password=
22 | 


--------------------------------------------------------------------------------
/metadata/default.meta:
--------------------------------------------------------------------------------
 1 | # Copyright 2011 Splunk, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Application-level permissions
16 | 
17 | []
18 | access = read : [ * ], write : [ admin, power ]
19 | 


--------------------------------------------------------------------------------