' + _('Hide Search Matches') + '
') 178 | .appendTo($('#searchbox')); 179 | } 180 | }, 181 | 182 | /** 183 | * init the domain index toggle buttons 184 | */ 185 | initIndexTable : function() { 186 | var togglers = $('img.toggler').click(function() { 187 | var src = $(this).attr('src'); 188 | var idnum = $(this).attr('id').substr(7); 189 | $('tr.cg-' + idnum).toggle(); 190 | if (src.substr(-9) == 'minus.png') 191 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 192 | else 193 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 194 | }).css('display', ''); 195 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 196 | togglers.click(); 197 | } 198 | }, 199 | 200 | /** 201 | * helper function to hide the search marks again 202 | */ 203 | hideSearchWords : function() { 204 | $('#searchbox .highlight-link').fadeOut(300); 205 | $('span.highlighted').removeClass('highlighted'); 206 | }, 207 | 208 | /** 209 | * make the url absolute 210 | */ 211 | makeURL : function(relativeURL) { 212 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 213 | }, 214 | 215 | /** 216 | * get the current relative url 217 | */ 218 | getCurrentURL : function() { 219 | var path = document.location.pathname; 220 | var parts = path.split(/\//); 221 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 222 | if (this == '..') 223 | parts.pop(); 224 | }); 225 | var url = parts.join('/'); 226 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 227 | } 228 | }; 229 | 230 | // quick alias for translations 231 | _ = Documentation.gettext; 232 | 233 | $(document).ready(function() { 234 | Documentation.init(); 235 | }); 236 | -------------------------------------------------------------------------------- /doc/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dssg/ushine-learning/71e2eb6746b464a509c1bb39c495591f5b0a5d5f/doc/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /doc/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dssg/ushine-learning/71e2eb6746b464a509c1bb39c495591f5b0a5d5f/doc/build/html/_static/down.png -------------------------------------------------------------------------------- /doc/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dssg/ushine-learning/71e2eb6746b464a509c1bb39c495591f5b0a5d5f/doc/build/html/_static/file.png -------------------------------------------------------------------------------- /doc/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dssg/ushine-learning/71e2eb6746b464a509c1bb39c495591f5b0a5d5f/doc/build/html/_static/minus.png -------------------------------------------------------------------------------- /doc/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dssg/ushine-learning/71e2eb6746b464a509c1bb39c495591f5b0a5d5f/doc/build/html/_static/plus.png -------------------------------------------------------------------------------- /doc/build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 8 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 9 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 10 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 11 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 12 | .highlight .ge { font-style: italic } /* Generic.Emph */ 13 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 14 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 15 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 16 | .highlight .go { color: #333333 } /* Generic.Output */ 17 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 18 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 19 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 20 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 21 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 22 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 23 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 24 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 25 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 26 | .highlight .kt { color: #902000 } /* Keyword.Type */ 27 | .highlight .m { color: #208050 } /* Literal.Number */ 28 | .highlight .s { color: #4070a0 } /* Literal.String */ 29 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 30 | .highlight .nb { color: #007020 } /* Name.Builtin */ 31 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 32 | .highlight .no { color: #60add5 } /* Name.Constant */ 33 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 34 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 35 | .highlight .ne { color: #007020 } /* Name.Exception */ 36 | .highlight .nf { color: #06287e } /* Name.Function */ 37 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 38 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 39 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 40 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 41 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 42 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 43 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 44 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 45 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 46 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 47 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 48 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 49 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 50 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 51 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 52 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 53 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 54 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 55 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 56 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 57 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 58 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 59 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 60 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 61 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 62 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /doc/build/html/_static/sidebar.js: -------------------------------------------------------------------------------- 1 | /* 2 | * sidebar.js 3 | * ~~~~~~~~~~ 4 | * 5 | * This script makes the Sphinx sidebar collapsible. 6 | * 7 | * .sphinxsidebar contains .sphinxsidebarwrapper. This script adds 8 | * in .sphixsidebar, after .sphinxsidebarwrapper, the #sidebarbutton 9 | * used to collapse and expand the sidebar. 10 | * 11 | * When the sidebar is collapsed the .sphinxsidebarwrapper is hidden 12 | * and the width of the sidebar and the margin-left of the document 13 | * are decreased. When the sidebar is expanded the opposite happens. 14 | * This script saves a per-browser/per-session cookie used to 15 | * remember the position of the sidebar among the pages. 16 | * Once the browser is closed the cookie is deleted and the position 17 | * reset to the default (expanded). 18 | * 19 | * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. 20 | * :license: BSD, see LICENSE for details. 21 | * 22 | */ 23 | 24 | $(function() { 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | // global elements used by the functions. 34 | // the 'sidebarbutton' element is defined as global after its 35 | // creation, in the add_sidebar_button function 36 | var bodywrapper = $('.bodywrapper'); 37 | var sidebar = $('.sphinxsidebar'); 38 | var sidebarwrapper = $('.sphinxsidebarwrapper'); 39 | 40 | // for some reason, the document has no sidebar; do not run into errors 41 | if (!sidebar.length) return; 42 | 43 | // original margin-left of the bodywrapper and width of the sidebar 44 | // with the sidebar expanded 45 | var bw_margin_expanded = bodywrapper.css('margin-left'); 46 | var ssb_width_expanded = sidebar.width(); 47 | 48 | // margin-left of the bodywrapper and width of the sidebar 49 | // with the sidebar collapsed 50 | var bw_margin_collapsed = '.8em'; 51 | var ssb_width_collapsed = '.8em'; 52 | 53 | // colors used by the current theme 54 | var dark_color = $('.related').css('background-color'); 55 | var light_color = $('.document').css('background-color'); 56 | 57 | function sidebar_is_collapsed() { 58 | return sidebarwrapper.is(':not(:visible)'); 59 | } 60 | 61 | function toggle_sidebar() { 62 | if (sidebar_is_collapsed()) 63 | expand_sidebar(); 64 | else 65 | collapse_sidebar(); 66 | } 67 | 68 | function collapse_sidebar() { 69 | sidebarwrapper.hide(); 70 | sidebar.css('width', ssb_width_collapsed); 71 | bodywrapper.css('margin-left', bw_margin_collapsed); 72 | sidebarbutton.css({ 73 | 'margin-left': '0', 74 | 'height': bodywrapper.height() 75 | }); 76 | sidebarbutton.find('span').text('»'); 77 | sidebarbutton.attr('title', _('Expand sidebar')); 78 | document.cookie = 'sidebar=collapsed'; 79 | } 80 | 81 | function expand_sidebar() { 82 | bodywrapper.css('margin-left', bw_margin_expanded); 83 | sidebar.css('width', ssb_width_expanded); 84 | sidebarwrapper.show(); 85 | sidebarbutton.css({ 86 | 'margin-left': ssb_width_expanded-12, 87 | 'height': bodywrapper.height() 88 | }); 89 | sidebarbutton.find('span').text('«'); 90 | sidebarbutton.attr('title', _('Collapse sidebar')); 91 | document.cookie = 'sidebar=expanded'; 92 | } 93 | 94 | function add_sidebar_button() { 95 | sidebarwrapper.css({ 96 | 'float': 'left', 97 | 'margin-right': '0', 98 | 'width': ssb_width_expanded - 28 99 | }); 100 | // create the button 101 | sidebar.append( 102 | ' ' 103 | ); 104 | var sidebarbutton = $('#sidebarbutton'); 105 | light_color = sidebarbutton.css('background-color'); 106 | // find the height of the viewport to center the '<<' in the page 107 | var viewport_height; 108 | if (window.innerHeight) 109 | viewport_height = window.innerHeight; 110 | else 111 | viewport_height = $(window).height(); 112 | sidebarbutton.find('span').css({ 113 | 'display': 'block', 114 | 'margin-top': (viewport_height - sidebar.position().top - 20) / 2 115 | }); 116 | 117 | sidebarbutton.click(toggle_sidebar); 118 | sidebarbutton.attr('title', _('Collapse sidebar')); 119 | sidebarbutton.css({ 120 | 'color': '#FFFFFF', 121 | 'border-left': '1px solid ' + dark_color, 122 | 'font-size': '1.2em', 123 | 'cursor': 'pointer', 124 | 'height': bodywrapper.height(), 125 | 'padding-top': '1px', 126 | 'margin-left': ssb_width_expanded - 12 127 | }); 128 | 129 | sidebarbutton.hover( 130 | function () { 131 | $(this).css('background-color', dark_color); 132 | }, 133 | function () { 134 | $(this).css('background-color', light_color); 135 | } 136 | ); 137 | } 138 | 139 | function set_position_from_cookie() { 140 | if (!document.cookie) 141 | return; 142 | var items = document.cookie.split(';'); 143 | for(var k=0; kGiven some text, returns a ranked list of likey natural languages 65 | the given content is in
66 |Given some text input, identify - besides location - people, 72 | organisations and other types of entities within the text
73 |Given text, finds the near duplicate messages.
79 |input: text 80 | output: list. made up of tuples of (id, message text). 81 | [todo: does this only return reports? or unannotated messages, too? 82 | should be any message for completeness, and then the front-end can decide 83 | what should be hidden from the user.]
84 |Given a message/report, suggests the possible categories 90 | that the message could fall into
91 |Suggest locations in a text string. These might be useful keywords for 97 | annotators to geolocate.
98 |input: full message’s text [string] 99 | output: list. each item is a python dictionary:
100 |101 |108 |102 |
107 |- text : the text for the specific entity [string]
103 |- indices : tuple of (start [int], end [int]) offset where entity is 104 | located in given full message
105 |- confidence : probability from 0-to-1 [float]
106 |
Suggest personally identifying information (PII) – such as 114 | credit card numbers, phone numbers, email, etc – 115 | from a text string. These are useful for annotators to investigate 116 | and strip before publicly posting information.
117 |input: text, 118 | input: options
119 |120 |125 |121 |
124 |- custom regex for local phone numbers
122 |- flags or booleans to specify the type of pii (e.g. phone_only)
123 |
|
76 |
|
86 |
|
104 |
|
114 |
|
124 |
|
130 |
|
152 |
|
166 |
|
176 |
|
190 |
|
196 |
|
212 |
|
218 |
|
232 |
|
242 |
|
252 |
|
262 |
|
268 |
Contents:
54 |alias of dssg.Machine
66 |60 | d | ||
64 | | 65 | dssg | 66 | |
69 | | 70 | dssg.junutils | 71 | |
74 | | 75 | dssg.Machine | 76 | |
79 | | 80 | dssg.webapp | 81 | |
84 | | 85 | dssg.webapp.rest_api | 86 | |
59 | Please activate JavaScript to enable the search 60 | functionality. 61 |
62 |64 | From here you can search these documents. Enter your search 65 | words into the box below and click "search". Note that the search 66 | function will automatically search for all of the words. Pages 67 | containing fewer words won't appear in the result list. 68 |
69 | 74 | 75 |Bases: object
55 |Base class for Machine Learning
56 |The categories property.
60 |returns a set of message id’s with similarity score, sorted by similarity score. 71 | I recommend using >=0.875 to define ‘near-dup’. 72 | :return [(‘1’, 0.9), (‘2’, 0.8), ...], sorted by the real value
73 |74 |75 |(second element of each item) in decreasing order.
The entities property.
81 |Takes a list of messages (each in dictionary form), and make 92 | guesses their category labels, languages, and so on. 93 | :param messages list of messages 94 | :return [(msgDict, {‘c1’: 4.6, ‘c2’: 4.2}), ...], list of pairs: message, dictionary of categories.
95 |96 |97 |The numbers returned range from (-inf, inf), but are ranks and cannot be interpreted beyond (e.g., as probabilites).
Returns list of entity guesses for the message 103 | Each entity (ideally) also includes
104 |105 |112 |106 |
111 |- text
107 |- start (offset in included string)
108 |- type (person, location, etc)
109 |- confidence
110 |
Returns list of language guesses for the message, with confidence measure (0 to 1).
118 |Returns list of potentially private/sensitive information to consider stripping. 124 | Output is a list of tuples each containing two parts:
125 |126 |131 |127 |
130 |- the private information type (PERSON, ID, PHONE, etc.)
128 |- the word(s) in the message that correspond to this unit
129 |
Note that the same words in text may appear (in whole or part) in multiple tuples 132 | For example, a number may meet the criteria for both an ID and a phone number.
133 |The types of information:
134 |Note this includes possible locations (GPE), which may be non-private and useful for geolocation
138 |ID numbers (passport, driver’s license, etc.) [type: ID]
142 |Usernames (e.g. Twitter handles) [type: USERNAME]
144 |URLs [type: URL]
146 |E-mail addresses [type: EMAIL]
148 |Phone numbers, using the optional provided regex “phone_regex” [type: PHONE]
150 |The messages property.
158 |Takes list of messages. each message is a dictionary.
164 |