15 |
16 |
' + 106 | '' + 107 | _("Hide Search Matches") + 108 | "
" 109 | ) 110 | ); 111 | }, 112 | 113 | /** 114 | * helper function to hide the search marks again 115 | */ 116 | hideSearchWords: () => { 117 | document 118 | .querySelectorAll("#searchbox .highlight-link") 119 | .forEach((el) => el.remove()); 120 | document 121 | .querySelectorAll("span.highlighted") 122 | .forEach((el) => el.classList.remove("highlighted")); 123 | localStorage.removeItem("sphinx_highlight_terms") 124 | }, 125 | 126 | initEscapeListener: () => { 127 | // only install a listener if it is really needed 128 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 129 | 130 | document.addEventListener("keydown", (event) => { 131 | // bail for input elements 132 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 133 | // bail with special keys 134 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 135 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 136 | SphinxHighlight.hideSearchWords(); 137 | event.preventDefault(); 138 | } 139 | }); 140 | }, 141 | }; 142 | 143 | _ready(SphinxHighlight.highlightSearchWords); 144 | _ready(SphinxHighlight.initEscapeListener); 145 | -------------------------------------------------------------------------------- /docs/_build/html/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 153 | -------------------------------------------------------------------------------- /docs/_build/html/_images/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 153 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 153 | -------------------------------------------------------------------------------- /docs/_static/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 152 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 152 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text_bellow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text_bellow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 188 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text_below_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text_below_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/skeleton.css: -------------------------------------------------------------------------------- 1 | /* Some sane resets. */ 2 | html { 3 | height: 100%; 4 | } 5 | 6 | body { 7 | margin: 0; 8 | min-height: 100%; 9 | } 10 | 11 | /* All the flexbox magic! */ 12 | body, 13 | .sb-announcement, 14 | .sb-content, 15 | .sb-main, 16 | .sb-container, 17 | .sb-container__inner, 18 | .sb-article-container, 19 | .sb-footer-content, 20 | .sb-header, 21 | .sb-header-secondary, 22 | .sb-footer { 23 | display: flex; 24 | } 25 | 26 | /* These order things vertically */ 27 | body, 28 | .sb-main, 29 | .sb-article-container { 30 | flex-direction: column; 31 | } 32 | 33 | /* Put elements in the center */ 34 | .sb-header, 35 | .sb-header-secondary, 36 | .sb-container, 37 | .sb-content, 38 | .sb-footer, 39 | .sb-footer-content { 40 | justify-content: center; 41 | } 42 | /* Put elements at the ends */ 43 | .sb-article-container { 44 | justify-content: space-between; 45 | } 46 | 47 | /* These elements grow. */ 48 | .sb-main, 49 | .sb-content, 50 | .sb-container, 51 | article { 52 | flex-grow: 1; 53 | } 54 | 55 | /* Because padding making this wider is not fun */ 56 | article { 57 | box-sizing: border-box; 58 | } 59 | 60 | /* The announcements element should never be wider than the page. */ 61 | .sb-announcement { 62 | max-width: 100%; 63 | } 64 | 65 | .sb-sidebar-primary, 66 | .sb-sidebar-secondary { 67 | flex-shrink: 0; 68 | width: 17rem; 69 | } 70 | 71 | .sb-announcement__inner { 72 | justify-content: center; 73 | 74 | box-sizing: border-box; 75 | height: 3rem; 76 | 77 | overflow-x: auto; 78 | white-space: nowrap; 79 | } 80 | 81 | /* Sidebars, with checkbox-based toggle */ 82 | .sb-sidebar-primary, 83 | .sb-sidebar-secondary { 84 | position: fixed; 85 | height: 100%; 86 | top: 0; 87 | } 88 | 89 | .sb-sidebar-primary { 90 | left: -17rem; 91 | transition: left 250ms ease-in-out; 92 | } 93 | .sb-sidebar-secondary { 94 | right: -17rem; 95 | transition: right 250ms ease-in-out; 96 | } 97 | 98 | .sb-sidebar-toggle { 99 | display: none; 100 | } 101 | .sb-sidebar-overlay { 102 | position: fixed; 103 | top: 0; 104 | width: 0; 105 | height: 0; 106 | 107 | transition: width 0ms ease 250ms, height 0ms ease 250ms, opacity 250ms ease; 108 | 109 | opacity: 0; 110 | background-color: rgba(0, 0, 0, 0.54); 111 | } 112 | 113 | #sb-sidebar-toggle--primary:checked 114 | ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--primary"], 115 | #sb-sidebar-toggle--secondary:checked 116 | ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--secondary"] { 117 | width: 100%; 118 | height: 100%; 119 | opacity: 1; 120 | transition: width 0ms ease, height 0ms ease, opacity 250ms ease; 121 | } 122 | 123 | #sb-sidebar-toggle--primary:checked ~ .sb-container .sb-sidebar-primary { 124 | left: 0; 125 | } 126 | #sb-sidebar-toggle--secondary:checked ~ .sb-container .sb-sidebar-secondary { 127 | right: 0; 128 | } 129 | 130 | /* Full-width mode */ 131 | .drop-secondary-sidebar-for-full-width-content 132 | .hide-when-secondary-sidebar-shown { 133 | display: none !important; 134 | } 135 | .drop-secondary-sidebar-for-full-width-content .sb-sidebar-secondary { 136 | display: none !important; 137 | } 138 | 139 | /* Mobile views */ 140 | .sb-page-width { 141 | width: 100%; 142 | } 143 | 144 | .sb-article-container, 145 | .sb-footer-content__inner, 146 | .drop-secondary-sidebar-for-full-width-content .sb-article, 147 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 148 | width: 100vw; 149 | } 150 | 151 | .sb-article, 152 | .match-content-width { 153 | padding: 0 1rem; 154 | box-sizing: border-box; 155 | } 156 | 157 | @media (min-width: 32rem) { 158 | .sb-article, 159 | .match-content-width { 160 | padding: 0 2rem; 161 | } 162 | } 163 | 164 | /* Tablet views */ 165 | @media (min-width: 42rem) { 166 | .sb-article-container { 167 | width: auto; 168 | } 169 | .sb-footer-content__inner, 170 | .drop-secondary-sidebar-for-full-width-content .sb-article, 171 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 172 | width: 42rem; 173 | } 174 | .sb-article, 175 | .match-content-width { 176 | width: 42rem; 177 | } 178 | } 179 | @media (min-width: 46rem) { 180 | .sb-footer-content__inner, 181 | .drop-secondary-sidebar-for-full-width-content .sb-article, 182 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 183 | width: 46rem; 184 | } 185 | .sb-article, 186 | .match-content-width { 187 | width: 46rem; 188 | } 189 | } 190 | @media (min-width: 50rem) { 191 | .sb-footer-content__inner, 192 | .drop-secondary-sidebar-for-full-width-content .sb-article, 193 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 194 | width: 50rem; 195 | } 196 | .sb-article, 197 | .match-content-width { 198 | width: 50rem; 199 | } 200 | } 201 | 202 | /* Tablet views */ 203 | @media (min-width: 59rem) { 204 | .sb-sidebar-secondary { 205 | position: static; 206 | } 207 | .hide-when-secondary-sidebar-shown { 208 | display: none !important; 209 | } 210 | .sb-footer-content__inner, 211 | .drop-secondary-sidebar-for-full-width-content .sb-article, 212 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 213 | width: 59rem; 214 | } 215 | .sb-article, 216 | .match-content-width { 217 | width: 42rem; 218 | } 219 | } 220 | @media (min-width: 63rem) { 221 | .sb-footer-content__inner, 222 | .drop-secondary-sidebar-for-full-width-content .sb-article, 223 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 224 | width: 63rem; 225 | } 226 | .sb-article, 227 | .match-content-width { 228 | width: 46rem; 229 | } 230 | } 231 | @media (min-width: 67rem) { 232 | .sb-footer-content__inner, 233 | .drop-secondary-sidebar-for-full-width-content .sb-article, 234 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 235 | width: 67rem; 236 | } 237 | .sb-article, 238 | .match-content-width { 239 | width: 50rem; 240 | } 241 | } 242 | 243 | /* Desktop views */ 244 | @media (min-width: 76rem) { 245 | .sb-sidebar-primary { 246 | position: static; 247 | } 248 | .hide-when-primary-sidebar-shown { 249 | display: none !important; 250 | } 251 | .sb-footer-content__inner, 252 | .drop-secondary-sidebar-for-full-width-content .sb-article, 253 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 254 | width: 59rem; 255 | } 256 | .sb-article, 257 | .match-content-width { 258 | width: 42rem; 259 | } 260 | } 261 | 262 | /* Full desktop views */ 263 | @media (min-width: 80rem) { 264 | .sb-article, 265 | .match-content-width { 266 | width: 46rem; 267 | } 268 | .sb-footer-content__inner, 269 | .drop-secondary-sidebar-for-full-width-content .sb-article, 270 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 271 | width: 63rem; 272 | } 273 | } 274 | 275 | @media (min-width: 84rem) { 276 | .sb-article, 277 | .match-content-width { 278 | width: 50rem; 279 | } 280 | .sb-footer-content__inner, 281 | .drop-secondary-sidebar-for-full-width-content .sb-article, 282 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 283 | width: 67rem; 284 | } 285 | } 286 | 287 | @media (min-width: 88rem) { 288 | .sb-footer-content__inner, 289 | .drop-secondary-sidebar-for-full-width-content .sb-article, 290 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 291 | width: 67rem; 292 | } 293 | .sb-page-width { 294 | width: 88rem; 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /docs/_build/html/_static/styles/furo-extensions.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"styles/furo-extensions.css","mappings":"AAGA,2BACE,oFACA,4CAKE,6CAHA,YACA,eAEA,CACA,kDACE,yCAEF,8CACE,sCAEJ,8CACE,kDAEJ,2BAGE,uBACA,cAHA,gBACA,UAEA,CAGA,yCACE,mBAEF,gDAEE,gDADA,YACA,CACA,sDACE,gDACF,yDACE,sCAEJ,+CACE,UACA,qDACE,UAGF,mDACE,eAEJ,yEAEE,4DAEA,mHASE,mBAPA,kBAEA,YADA,oBAGA,aADA,gBAIA,CAEA,qIAEE,WADA,UACA,CAEJ,uGACE,aAEF,iUAGE,cAEF,mHACE,aC1EJ,gCACE,mCAEF,0BAKE,mBAUA,8CACA,YAFA,mCAKA,eAZA,cALA,UASA,YADA,YAYA,iCAdA,YAcA,CAEA,gCAEE,8CADA,gCACA,CAEF,gCAGE,6BADA,mCADA,YAEA,CAEF,kCAEE,cADA,oBACA,CACA,wCACE,cAEJ,8BACE,UC5CN,KAEE,6CAA8C,CAC9C,uDAAwD,CACxD,uDAAwD,CAGxD,iCAAsC,CAGtC,+CAAgD,CAChD,uDAAwD,CACxD,uDAAwD,CACxD,oDAAqD,CACrD,6DAA8D,CAC9D,6DAA8D,CAG9D,uDAAwD,CACxD,yDAA0D,CAC1D,4DAA6D,CAC7D,2DAA4D,CAC5D,8DAA+D,CAC/D,iEAAkE,CAClE,uDAAwD,CACxD,wDAAyD,CAG3D,gBACE,qFAGF,SACE,6EAEF,cACE,uFAEF,cACE,uFAEF,cACE,uFAGF,qBACE,eAEF,mBACE,WACA,eChDF,KACE,gDAAiD,CACjD,uDAAwD,CACxD,qDAAsD,CACtD,4DAA6D,CAC7D,oCAAqC,CACrC,2CAA4C,CAC5C,4CAA6C,CAC7C,mDAAoD,CACpD,wBAAyB,CACzB,oBAAqB,CACrB,6CAA8C,CAC9C,gCAAiC,CACjC,yDAA0D,CAC1D,uDAAwD,CACxD,8DAA+D,CCbjE,uBACE,eACA,eACA,gBAGF,iBACE,YACA,+EAGF,iBACE,mDACA","sources":["webpack:///./src/furo/assets/styles/extensions/_readthedocs.sass","webpack:///./src/furo/assets/styles/extensions/_copybutton.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-design.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-inline-tabs.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-panels.sass"],"sourcesContent":["// This file contains the styles used for tweaking how ReadTheDoc's embedded\n// contents would show up inside the theme.\n\n#furo-sidebar-ad-placement\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n .ethical-sidebar\n // Remove the border and box-shadow.\n border: none\n box-shadow: none\n // Manage the background colors.\n background: var(--color-background-secondary)\n &:hover\n background: var(--color-background-hover)\n // Ensure the text is legible.\n a\n color: var(--color-foreground-primary)\n\n .ethical-callout a\n color: var(--color-foreground-secondary) !important\n\n#furo-readthedocs-versions\n position: static\n width: 100%\n background: transparent\n display: block\n\n // Make the background color fit with the theme's aesthetic.\n .rst-versions\n background: rgb(26, 28, 30)\n\n .rst-current-version\n cursor: unset\n background: var(--color-sidebar-item-background)\n &:hover\n background: var(--color-sidebar-item-background)\n .fa-book\n color: var(--color-foreground-primary)\n\n > .rst-other-versions\n padding: 0\n small\n opacity: 1\n\n .injected\n .rst-versions\n position: unset\n\n &:hover,\n &:focus-within\n box-shadow: 0 0 0 1px var(--color-sidebar-background-border)\n\n .rst-current-version\n // Undo the tweaks done in RTD's CSS\n font-size: inherit\n line-height: inherit\n height: auto\n text-align: right\n padding: 12px\n\n // Match the rest of the body\n background: #1a1c1e\n\n .fa-book\n float: left\n color: white\n\n .fa-caret-down\n display: none\n\n .rst-current-version,\n .rst-other-versions,\n .injected\n display: block\n\n > .rst-current-version\n display: none\n",".highlight\n &:hover button.copybtn\n color: var(--color-code-foreground)\n\n button.copybtn\n // Make it visible\n opacity: 1\n\n // Align things correctly\n align-items: center\n\n height: 1.25em\n width: 1.25em\n\n top: 0.625rem // $code-spacing-vertical\n right: 0.5rem\n\n // Make it look better\n color: var(--color-background-item)\n background-color: var(--color-code-background)\n border: none\n\n // Change to cursor to make it obvious that you can click on it\n cursor: pointer\n\n // Transition smoothly, for aesthetics\n transition: color 300ms, opacity 300ms\n\n &:hover\n color: var(--color-brand-content)\n background-color: var(--color-code-background)\n\n &::after\n display: none\n color: var(--color-code-foreground)\n background-color: transparent\n\n &.success\n transition: color 0ms\n color: #22863a\n &::after\n display: block\n\n svg\n padding: 0\n","body\n // Colors\n --sd-color-primary: var(--color-brand-primary)\n --sd-color-primary-highlight: var(--color-brand-content)\n --sd-color-primary-text: var(--color-background-primary)\n\n // Shadows\n --sd-color-shadow: rgba(0, 0, 0, 0.05)\n\n // Cards\n --sd-color-card-border: var(--color-card-border)\n --sd-color-card-border-hover: var(--color-brand-content)\n --sd-color-card-background: var(--color-card-background)\n --sd-color-card-text: var(--color-foreground-primary)\n --sd-color-card-header: var(--color-card-marginals-background)\n --sd-color-card-footer: var(--color-card-marginals-background)\n\n // Tabs\n --sd-color-tabs-label-active: var(--color-brand-content)\n --sd-color-tabs-label-hover: var(--color-foreground-muted)\n --sd-color-tabs-label-inactive: var(--color-foreground-muted)\n --sd-color-tabs-underline-active: var(--color-brand-content)\n --sd-color-tabs-underline-hover: var(--color-foreground-border)\n --sd-color-tabs-underline-inactive: var(--color-background-border)\n --sd-color-tabs-overline: var(--color-background-border)\n --sd-color-tabs-underline: var(--color-background-border)\n\n// Tabs\n.sd-tab-content\n box-shadow: 0 -2px var(--sd-color-tabs-overline), 0 1px var(--sd-color-tabs-underline)\n\n// Shadows\n.sd-card // Have a shadow by default\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n.sd-shadow-sm\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-md\n box-shadow: 0 0.3rem 0.75rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-lg\n box-shadow: 0 0.6rem 1.5rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Cards\n.sd-card-hover:hover // Don't change scale on hover\n transform: none\n\n.sd-cards-carousel // Have a bit of gap in the carousel by default\n gap: 0.25rem\n padding: 0.25rem\n","// This file contains styles to tweak sphinx-inline-tabs to work well with Furo.\n\nbody\n --tabs--label-text: var(--color-foreground-muted)\n --tabs--label-text--hover: var(--color-foreground-muted)\n --tabs--label-text--active: var(--color-brand-content)\n --tabs--label-text--active--hover: var(--color-brand-content)\n --tabs--label-background: transparent\n --tabs--label-background--hover: transparent\n --tabs--label-background--active: transparent\n --tabs--label-background--active--hover: transparent\n --tabs--padding-x: 0.25em\n --tabs--margin-x: 1em\n --tabs--border: var(--color-background-border)\n --tabs--label-border: transparent\n --tabs--label-border--hover: var(--color-foreground-muted)\n --tabs--label-border--active: var(--color-brand-content)\n --tabs--label-border--active--hover: var(--color-brand-content)\n","// This file contains styles to tweak sphinx-panels to work well with Furo.\n\n// sphinx-panels includes Bootstrap 4, which uses .container which can conflict\n// with docutils' `.. container::` directive.\n[role=\"main\"] .container\n max-width: initial\n padding-left: initial\n padding-right: initial\n\n// Make the panels look nicer!\n.shadow.docutils\n border: none\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Make panel colors respond to dark mode\n.sphinx-bs .card\n background-color: var(--color-background-secondary)\n color: var(--color-foreground)\n"],"names":[],"sourceRoot":""} -------------------------------------------------------------------------------- /tweetopic/btm.py: -------------------------------------------------------------------------------- 1 | """Module containing sklearn compatible Biterm Topic Model.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Optional, Union 6 | 7 | import numpy as np 8 | import scipy.sparse as spr 9 | import sklearn 10 | from numpy.typing import ArrayLike 11 | 12 | from tweetopic._btm import (compute_biterm_set, corpus_unique_biterms, 13 | fit_model, predict_docs) 14 | from tweetopic._doc import init_doc_words 15 | from tweetopic.exceptions import NotFittedException 16 | from tweetopic.utils import set_numba_seed 17 | 18 | 19 | class BTM(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): 20 | """Implementation of the Biterm Topic Model with Gibbs Sampling 21 | solver. 22 | 23 | Parameters 24 | ---------- 25 | n_components: int 26 | Number of topics in the model. 27 | n_iterations: int, default 100 28 | Number of iterations furing fitting. 29 | alpha: float, default 6.0 30 | Dirichlet prior for topic distribution. 31 | beta: float, default 0.1 32 | Dirichlet prior for topic-word distribution. 33 | 34 | Attributes 35 | ---------- 36 | components_: array of shape (n_components, n_vocab) 37 | Conditional probabilities of all terms given a topic. 38 | topic_distribution: array of shape (n_components,) 39 | Prior probability of each topic. 40 | n_features_in_: int 41 | Number of total vocabulary items seen during fitting. 42 | random_state: int, default None 43 | Random seed to use for reproducibility. 44 | """ 45 | 46 | def __init__( 47 | self, 48 | n_components: int, 49 | n_iterations: int = 100, 50 | alpha: float = 6.0, 51 | beta: float = 0.1, 52 | random_state: Optional[int] = None, 53 | ): 54 | self.n_components = n_components 55 | self.n_iterations = n_iterations 56 | self.alpha = alpha 57 | self.beta = beta 58 | self.random_state = random_state 59 | # Not none for typing reasons 60 | self.components_ = np.array(0) 61 | self.topic_distribution = None 62 | self.n_features_in_ = 0 63 | 64 | @property 65 | def _fitted(self) -> bool: 66 | """Property describing whether the model is fitted.""" 67 | return self.topic_distribution is not None 68 | 69 | def _check_fitted(self) -> None: 70 | """Raise exception if the model is not fitted.""" 71 | if not self._fitted: 72 | raise NotFittedException 73 | 74 | def get_params(self, deep: bool = False) -> dict: 75 | """Get parameters for this estimator. 76 | 77 | Parameters 78 | ---------- 79 | deep: bool, default False 80 | Ignored, exists for sklearn compatibility. 81 | 82 | Returns 83 | ------- 84 | dict 85 | Parameter names mapped to their values. 86 | 87 | Note 88 | ---- 89 | Exists for sklearn compatibility. 90 | """ 91 | return { 92 | "n_components": self.n_components, 93 | "n_iterations": self.n_iterations, 94 | "alpha": self.alpha, 95 | "beta": self.beta, 96 | } 97 | 98 | def set_params(self, **params) -> BTM: 99 | """Set parameters for this estimator. 100 | 101 | Returns 102 | ------- 103 | BTM 104 | Estimator instance 105 | 106 | Note 107 | ---- 108 | Exists for sklearn compatibility. 109 | """ 110 | for param, value in params.items(): 111 | self.__setattr__(param, value) 112 | return self 113 | 114 | def fit(self, X: Union[spr.spmatrix, ArrayLike], y: None = None): 115 | """Fits the model using Gibbs Sampling. Detailed description of the 116 | algorithm in Yan et al. (2013). 117 | 118 | Parameters 119 | ---------- 120 | X: array-like or sparse matrix of shape (n_samples, n_features) 121 | BOW matrix of corpus. 122 | y: None 123 | Ignored, exists for sklearn compatibility. 124 | 125 | Returns 126 | ------- 127 | BTM 128 | The fitted model. 129 | 130 | Note 131 | ---- 132 | fit() works in-place too, the fitted model is returned for convenience. 133 | """ 134 | if self.random_state is not None: 135 | set_numba_seed(self.random_state) 136 | # Converting X into sparse array if it isn't one already. 137 | X = spr.csr_matrix(X) 138 | _, self.n_features_in_ = X.shape 139 | # Calculating the number of nonzero elements for each row 140 | # using the internal properties of CSR matrices. 141 | max_unique_words = np.max(np.diff(X.indptr)) 142 | print("Extracting biterms.") 143 | doc_unique_words, doc_unique_word_counts = init_doc_words( 144 | X.tolil(), 145 | max_unique_words=max_unique_words, 146 | ) 147 | biterms = corpus_unique_biterms(doc_unique_words, doc_unique_word_counts) 148 | biterm_set = compute_biterm_set(biterms) 149 | self.topic_distribution, self.components_ = fit_model( 150 | n_iter=self.n_iterations, 151 | alpha=self.alpha, 152 | beta=self.beta, 153 | n_components=self.n_components, 154 | n_vocab=self.n_features_in_, 155 | biterms=biterm_set, 156 | ) 157 | return self 158 | 159 | # TODO: Something goes terribly wrong here, fix this 160 | 161 | def transform(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 162 | """Predicts probabilities for each document belonging to each 163 | topic. 164 | 165 | Parameters 166 | ---------- 167 | X: array-like or sparse matrix of shape (n_samples, n_features) 168 | Document-term matrix. 169 | 170 | Returns 171 | ------- 172 | array of shape (n_samples, n_components) 173 | Probabilities for each document belonging to each cluster. 174 | 175 | Raises 176 | ------ 177 | NotFittedException 178 | If the model is not fitted, an exception will be raised 179 | """ 180 | self._check_fitted() 181 | # Converting X into sparse array if it isn't one already. 182 | X = spr.csr_matrix(X) 183 | # n_samples, _ = X.shape 184 | sample_max_unique_words = np.max(np.diff(X.indptr)) 185 | doc_unique_words, doc_unique_word_counts = init_doc_words( 186 | X.tolil(), 187 | max_unique_words=sample_max_unique_words, 188 | ) 189 | return predict_docs( 190 | topic_distribution=self.topic_distribution, # type: ignore 191 | topic_word_distribution=self.components_, 192 | doc_unique_words=doc_unique_words, 193 | doc_unique_word_counts=doc_unique_word_counts, 194 | ) 195 | 196 | def fit_transform( 197 | self, 198 | X: Union[spr.spmatrix, ArrayLike], 199 | y: None = None, 200 | ) -> np.ndarray: 201 | """Fits the model, then transforms the given data. 202 | 203 | Parameters 204 | ---------- 205 | X: array-like or sparse matrix of shape (n_samples, n_features) 206 | Document-term matrix. 207 | y: None 208 | Ignored, sklearn compatibility. 209 | 210 | Returns 211 | ------- 212 | array of shape (n_samples, n_components) 213 | Probabilities for each document belonging to each cluster. 214 | """ 215 | return self.fit(X).transform(X) 216 | -------------------------------------------------------------------------------- /docs/_build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({"docnames": ["index", "installation", "tweetopic.btm", "tweetopic.dmm", "using_tweetopic.btm", "using_tweetopic.dmm", "using_tweetopic.model_persistence", "using_tweetopic.pipeline", "using_tweetopic.visualization"], "filenames": ["index.rst", "installation.rst", "tweetopic.btm.rst", "tweetopic.dmm.rst", "using_tweetopic.btm.rst", "using_tweetopic.dmm.rst", "using_tweetopic.model_persistence.rst", "using_tweetopic.pipeline.rst", "using_tweetopic.visualization.rst"], "titles": ["tweetopic", "Installation", "BTM", "DMM", "Biterm Topic Model", "Dirichlet Multinomial Mixture Model", "Model persistence", "Pipelines", "Visualization"], "terms": {"i": [0, 2, 3, 4, 5, 7], "python": 0, "librari": [0, 7], "blaze": 0, "fast": 0, "conveni": [0, 2, 3], "topic": [0, 2, 3, 5, 7, 8], "model": [0, 2, 3, 7, 8], "tweet": 0, "other": 0, "short": [0, 4, 5], "text": [0, 4, 5, 7, 8], "thi": [0, 2, 3, 4], "websit": 0, "contain": [0, 3, 4], "well": [0, 4, 5], "guid": [0, 2, 3], "instal": [0, 8], "dirichlet": [0, 2, 3], "multinomi": 0, "mixtur": [0, 3], "biterm": [0, 2], "pipelin": [0, 8], "visual": 0, "persist": 0, "dmm": [0, 4, 5, 6, 7, 8], "btm": [0, 4], "type": [2, 3], "github": 0, "repositori": 0, "tweetop": [1, 2, 3, 4, 5, 7], "can": [1, 4, 5, 8], "simpli": 1, "pypi": [1, 8], "packag": 1, "pip": [1, 8], "usag": [2, 3], "class": [2, 3, 4, 5], "n_compon": [2, 3, 4, 5, 7], "int": [2, 3], "n_iter": [2, 3, 4, 5, 7], "50": 3, "alpha": [2, 3, 4, 5, 7], "float": [2, 3], "0": [2, 3, 4, 5, 7], "1": [2, 3, 5, 7], "beta": [2, 3, 4, 5, 7], "implement": [2, 3], "gibb": [2, 3, 4, 5], "sampl": [2, 3, 4, 5], "solver": [2, 3], "The": [2, 3, 4, 5], "aim": 3, "achiev": 3, "full": 3, "compat": [2, 3, 8], "sklearn": [2, 3, 6, 7, 8], "paramet": [2, 3, 5], "number": [2, 3, 4, 5], "compon": [3, 5, 7, 8], "default": [2, 3], "iter": [2, 3, 4, 5], "dure": [2, 3], "fit": [2, 3, 4, 5, 7], "If": [2, 3], "you": [3, 6, 7, 8], "find": 3, "your": [3, 5, 8], "result": [3, 4, 5], "ar": [3, 4, 5], "unsatisfactori": 3, "increas": [3, 4, 5], "willing": [3, 5], "document": [2, 3, 4, 5, 6, 8], "join": [3, 5], "an": [2, 3, 4, 5, 7], "empti": 3, "cluster": [2, 3, 4, 5], "where": [3, 5], "term": [2, 3, 4, 5], "present": 3, "components_": [2, 3], "describ": [3, 4], "all": [2, 3, 5, 8], "distribut": [2, 3, 4, 5], "amount": 3, "each": [2, 3, 5], "word": [2, 3, 4, 5, 7], "ha": [3, 5], "been": [3, 5], "assign": 3, "arrai": [2, 3, 6], "shape": [2, 3], "n_vocab": [2, 3], "cluster_doc_count": 3, "how": [3, 4], "mani": 3, "n_features_in_": [2, 3], "total": [2, 3], "vocabulari": [2, 3], "item": [2, 3], "seen": [2, 3], "n_document": 3, "max_unique_word": 3, "maximum": 3, "uniqu": 3, "get_param": [2, 3], "deep": [2, 3], "bool": [2, 3], "fals": [2, 3], "dict": [2, 3], "get": [2, 3], "estim": [2, 3], "ignor": [2, 3], "exist": [2, 3], "return": [2, 3], "name": [2, 3], "map": [2, 3], "valu": [2, 3], "set_param": [2, 3], "param": [2, 3], "set": [2, 3], "instanc": [2, 3], "x": [2, 3, 4], "spmatrix": [2, 3], "arraylik": [2, 3], "y": [2, 3, 4], "none": [2, 3], "us": [2, 3, 4, 5, 6, 7, 8], "detail": [2, 3], "descript": [2, 3], "algorithm": [2, 3, 5], "yin": [3, 5], "wang": [3, 5], "2014": [3, 5], "like": [2, 3], "spars": [2, 3], "matrix": [2, 3, 4, 5], "n_sampl": [2, 3], "n_featur": [2, 3], "bow": [2, 3], "corpu": [2, 3, 8], "work": [2, 3, 4, 5, 8], "place": [2, 3], "too": [2, 3], "transform": [2, 3, 4, 5], "ndarrai": [2, 3], "predict": [2, 3, 4, 5], "probabl": [2, 3], "belong": [2, 3], "rais": [2, 3], "notfittedexcept": [2, 3], "except": [2, 3], "predict_proba": 3, "alia": 3, "mainli": 3, "densiti": 3, "label": [3, 4, 5], "fit_transform": [2, 3], "given": [2, 3], "data": [2, 3, 5, 7], "provid": [4, 5], "util": [4, 5], "gener": [4, 5], "probabilist": [4, 5], "instead": 4, "process": [4, 5, 7], "focu": 4, "coocurr": 4, "from": [4, 5, 6, 7, 8], "allow": 4, "them": 4, "captur": 4, "relat": 4, "better": [4, 5], "unlik": 4, "thei": 4, "also": [4, 5], "corpora": 4, "longer": 4, "graphic": [4, 5], "plate": [4, 5], "notat": [4, 5], "yan": [2, 4], "et": [2, 4], "al": [2, 4], "2013": [2, 4], "sinc": [4, 5, 6], "\u1e3fcmc": [4, 5], "method": [4, 5], "usual": [4, 5], "converg": [4, 5], "api": [4, 5], "creat": [4, 5, 7], "import": [4, 5, 6, 7, 8], "15": [4, 5, 7], "200": [4, 5, 7], "6": [2, 4], "2": [4, 5, 7], "doc_term_matrix": [4, 5], "unseen": [4, 5], "new_doc": [4, 5], "guo": 4, "j": [4, 5], "lan": 4, "cheng": 4, "A": [4, 5], "proceed": [4, 5], "22nd": 4, "intern": [4, 5], "confer": [4, 5], "world": 4, "wide": 4, "web": 4, "1445": 4, "1456": 4, "\u03c0\u03b1\u03c1\u03bf\u03c5\u03c3\u03b9\u03ac\u03c3\u03c4\u03b7\u03ba\u03b5": 4, "\u03c3\u03c4\u03bf": 4, "rio": 4, "de": 4, "janeiro": 4, "brazil": 4, "doi": 4, "10": 4, "1145": 4, "2488388": 4, "2488514": 4, "simul": 5, "1000": 5, "observ": 5, "drawn": 5, "sourc": 5, "wikipedia": 5, "assum": 5, "point": 5, "popul": 5, "unknown": 5, "thought": 5, "fuzzi": 5, "emploi": 5, "It": [5, 7], "demonstr": 5, "particularli": 5, "over": 5, "explain": 5, "movi": 5, "group": 5, "analogi": 5, "procedur": 5, "student": 5, "classroom": 5, "have": 5, "divid": 5, "themselv": 5, "up": 5, "base": 5, "prefer": 5, "express": 5, "form": 5, "list": 5, "favorit": 5, "analog": 5, "being": 5, "In": 5, "try": 5, "choos": 5, "tabl": 5, "two": [5, 7], "rule": 5, "more": [5, 6, 8], "similar": 5, "own": 5, "here": 5, "repres": 5, "lower": 5, "while": 5, "differ": 5, "approach": 5, "20th": 5, "acm": 5, "sigkdd": 5, "knowledg": 5, "discoveri": 5, "mine": 5, "pp": 5, "233": 5, "242": 5, "associ": 5, "comput": 5, "machineri": 5, "For": [6, 8], "we": [6, 7, 8], "suggest": 6, "joblib": 6, "store": 6, "numpi": 6, "much": 6, "effici": 6, "dump": 6, "load": 6, "dmm_model": 6, "mai": 6, "follow": 6, "comprehens": 6, "overview": 6, "limit": 6, "consult": [6, 8], "": [6, 7], "To": 7, "avoid": 7, "leakag": 7, "make": 7, "easier": 7, "oper": 7, "recommend": [7, 8], "scikit": 7, "learn": 7, "vector": [7, 8], "feature_extract": 7, "countvector": 7, "stop_word": 7, "english": 7, "max_df": 7, "3": 7, "min_df": 7, "add": 7, "stream": 7, "highli": 7, "advis": 7, "pre": 7, "nlp": 7, "spaci": 7, "nltk": 7, "remov": 7, "stop": 7, "function": 7, "lemmat": 7, "could": 7, "drastic": 7, "improv": 7, "qualiti": 7, "topicwizard": 8, "which": 8, "nativ": 8, "wizard": 8, "either": 8, "individu": 8, "pass": 8, "whole": 8, "topic_model": 8, "inform": 8, "100": 2, "fure": 2, "prior": 2, "condit": 2, "topic_distribut": 2}, "objects": {"tweetopic.btm": [[2, 0, 1, "", "BTM"]], "tweetopic.btm.BTM": [[2, 1, 1, "", "components_"], [2, 2, 1, "", "fit"], [2, 2, 1, "", "fit_transform"], [2, 2, 1, "", "get_params"], [2, 1, 1, "", "n_features_in_"], [2, 2, 1, "", "set_params"], [2, 1, 1, "", "topic_distribution"], [2, 2, 1, "", "transform"]], "tweetopic.dmm": [[3, 0, 1, "", "DMM"]], "tweetopic.dmm.DMM": [[3, 1, 1, "", "cluster_doc_count"], [3, 1, 1, "", "components_"], [3, 2, 1, "", "fit"], [3, 2, 1, "", "fit_transform"], [3, 2, 1, "", "get_params"], [3, 1, 1, "", "max_unique_words"], [3, 1, 1, "", "n_documents"], [3, 1, 1, "", "n_features_in_"], [3, 2, 1, "", "predict"], [3, 2, 1, "", "predict_proba"], [3, 2, 1, "", "set_params"], [3, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:class", "1": "py:attribute", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "attribute", "Python attribute"], "2": ["py", "method", "Python method"]}, "titleterms": {"tweetop": 0, "get": 0, "start": 0, "usag": [0, 4, 5], "api": 0, "refer": [0, 4, 5], "instal": 1, "btm": 2, "dmm": 3, "type": [], "biterm": 4, "topic": 4, "model": [4, 5, 6], "dirichlet": 5, "multinomi": 5, "mixtur": 5, "persist": 6, "pipelin": 7, "visual": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Installation": [[1, "installation"]], "DMM": [[3, "dmm"]], "Biterm Topic Model": [[4, "biterm-topic-model"]], "Usage": [[4, "usage"], [5, "usage"], [0, null]], "References": [[4, "references"], [5, "references"]], "Dirichlet Multinomial Mixture Model": [[5, "dirichlet-multinomial-mixture-model"]], "Model persistence": [[6, "model-persistence"]], "Pipelines": [[7, "pipelines"]], "Visualization": [[8, "visualization"]], "BTM": [[2, "btm"]], "tweetopic": [[0, "tweetopic"]], "Getting started": [[0, null]], "API reference": [[0, null]]}, "indexentries": {"btm (class in tweetopic.btm)": [[2, "tweetopic.btm.BTM"]], "components_ (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.components_"]], "fit() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.fit"]], "fit_transform() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.fit_transform"]], "get_params() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.get_params"]], "n_features_in_ (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.n_features_in_"]], "set_params() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.set_params"]], "topic_distribution (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.topic_distribution"]], "transform() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.transform"]], "dmm (class in tweetopic.dmm)": [[3, "tweetopic.dmm.DMM"]], "cluster_doc_count (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.cluster_doc_count"]], "components_ (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.components_"]], "fit() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.fit"]], "fit_transform() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.fit_transform"]], "get_params() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.get_params"]], "max_unique_words (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.max_unique_words"]], "n_documents (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.n_documents"]], "n_features_in_ (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.n_features_in_"]], "predict() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.predict"]], "predict_proba() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.predict_proba"]], "set_params() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.set_params"]], "transform() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.transform"]]}}) -------------------------------------------------------------------------------- /tweetopic/dmm.py: -------------------------------------------------------------------------------- 1 | """Module containing a fully sklearn compatible Dirichlet Mixture Model.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Optional, Union 6 | 7 | import numpy as np 8 | import scipy.sparse as spr 9 | import sklearn 10 | from numpy.typing import ArrayLike 11 | 12 | from tweetopic._dmm import fit_model, init_clusters, predict_doc 13 | from tweetopic._doc import init_doc_words 14 | from tweetopic.exceptions import NotFittedException 15 | from tweetopic.utils import set_numba_seed 16 | 17 | 18 | class DMM(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): 19 | """Implementation of the Dirichlet Mixture Model with Gibbs Sampling 20 | solver. The class aims to achieve full compatibility with sklearn. 21 | 22 | Parameters 23 | ---------- 24 | n_components: int 25 | Number of mixture components in the model. 26 | n_iterations: int, default 50 27 | Number of iterations during fitting. 28 | If you find your results are unsatisfactory, increase this number. 29 | alpha: float, default 0.1 30 | Willingness of a document joining an empty cluster. 31 | beta: float, default 0.1 32 | Willingness to join clusters, where the terms in the document 33 | are not present. 34 | random_state: int, default None 35 | Random seed to use for reproducibility. 36 | 37 | Attributes 38 | ---------- 39 | components_: array of shape (n_components, n_vocab) 40 | Describes all components of the topic distribution. 41 | Contains the amount each word has been assigned to each component 42 | during fitting. 43 | cluster_doc_count: array of shape (n_components,) 44 | Array containing how many documents there are in each cluster. 45 | n_features_in_: int 46 | Number of total vocabulary items seen during fitting. 47 | n_documents: int 48 | Total number of documents seen during fitting. 49 | max_unique_words: int 50 | Maximum number of unique words in a document seen during fitting. 51 | """ 52 | 53 | def __init__( 54 | self, 55 | n_components: int, 56 | n_iterations: int = 50, 57 | alpha: float = 0.1, 58 | beta: float = 0.1, 59 | random_state: Optional[int] = None, 60 | ): 61 | self.n_components = n_components 62 | self.n_iterations = n_iterations 63 | self.alpha = alpha 64 | self.beta = beta 65 | self.random_state = random_state 66 | # Not none for typing reasons 67 | self.components_ = np.array(0) 68 | self.cluster_doc_count = None 69 | self.cluster_word_count = None 70 | self.n_features_in_ = 0 71 | self.n_documents = 0 72 | self.max_unique_words = 0 73 | 74 | @property 75 | def _fitted(self) -> bool: 76 | """Property describing whether the model is fitted.""" 77 | # If the number of documents seen is more than 0 78 | # It can be assumed that the model is fitted. 79 | return bool(self.n_documents) 80 | 81 | def _check_fitted(self) -> None: 82 | """Raise exception if the model is not fitted.""" 83 | if not self._fitted: 84 | raise NotFittedException 85 | 86 | def get_params(self, deep: bool = False) -> dict: 87 | """Get parameters for this estimator. 88 | 89 | Parameters 90 | ---------- 91 | deep: bool, default False 92 | Ignored, exists for sklearn compatibility. 93 | 94 | Returns 95 | ------- 96 | dict 97 | Parameter names mapped to their values. 98 | 99 | Note 100 | ---- 101 | Exists for sklearn compatibility. 102 | """ 103 | return { 104 | "n_components": self.n_components, 105 | "n_iterations": self.n_iterations, 106 | "alpha": self.alpha, 107 | "beta": self.beta, 108 | } 109 | 110 | def set_params(self, **params) -> DMM: 111 | """Set parameters for this estimator. 112 | 113 | Returns 114 | ------- 115 | DMM 116 | Estimator instance 117 | 118 | Note 119 | ---- 120 | Exists for sklearn compatibility. 121 | """ 122 | for param, value in params: 123 | self.__setattr__(param, value) 124 | return self 125 | 126 | def fit(self, X: Union[spr.spmatrix, ArrayLike], y: None = None): 127 | """Fits the model using Gibbs Sampling. Detailed description of the 128 | algorithm in Yin and Wang (2014). 129 | 130 | Parameters 131 | ---------- 132 | X: array-like or sparse matrix of shape (n_samples, n_features) 133 | BOW matrix of corpus. 134 | y: None 135 | Ignored, exists for sklearn compatibility. 136 | 137 | Returns 138 | ------- 139 | DMM 140 | The fitted model. 141 | 142 | Note 143 | ---- 144 | fit() works in-place too, the fitted model is returned for convenience. 145 | """ 146 | if self.random_state is not None: 147 | set_numba_seed(self.random_state) 148 | # Converting X into sparse array if it isn't one already. 149 | X = spr.csr_matrix(X) 150 | self.n_documents, self.n_features_in_ = X.shape 151 | # Calculating the number of nonzero elements for each row 152 | # using the internal properties of CSR matrices. 153 | print("Initializing components.") 154 | self.max_unique_words = np.max(np.diff(X.indptr)) 155 | doc_unique_words, doc_unique_word_counts = init_doc_words( 156 | X.tolil(), 157 | max_unique_words=self.max_unique_words, 158 | ) 159 | initial_clusters = np.random.multinomial( 160 | 1, 161 | np.ones(self.n_components) / self.n_components, 162 | size=self.n_documents, 163 | ) 164 | doc_clusters = np.argmax(initial_clusters, axis=1) 165 | self.cluster_doc_count = np.zeros(self.n_components) 166 | self.components_ = np.zeros((self.n_components, self.n_features_in_)) 167 | self.cluster_word_count = np.zeros(self.n_components) 168 | init_clusters( 169 | cluster_word_distribution=self.components_, 170 | cluster_word_count=self.cluster_word_count, 171 | cluster_doc_count=self.cluster_doc_count, 172 | doc_clusters=doc_clusters, 173 | doc_unique_words=doc_unique_words, 174 | doc_unique_word_counts=doc_unique_word_counts, 175 | max_unique_words=self.max_unique_words, 176 | ) 177 | fit_model( 178 | n_iter=self.n_iterations, 179 | alpha=self.alpha, 180 | beta=self.beta, 181 | n_clusters=self.n_components, 182 | n_vocab=self.n_features_in_, 183 | n_docs=self.n_documents, 184 | doc_unique_words=doc_unique_words, 185 | doc_unique_word_counts=doc_unique_word_counts, 186 | doc_clusters=doc_clusters, 187 | cluster_doc_count=self.cluster_doc_count, 188 | cluster_word_count=self.cluster_word_count, 189 | cluster_word_distribution=self.components_, 190 | max_unique_words=self.max_unique_words, 191 | ) 192 | return self 193 | 194 | def transform(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 195 | """Predicts probabilities for each document belonging to each 196 | component. 197 | 198 | Parameters 199 | ---------- 200 | X: array-like or sparse matrix of shape (n_samples, n_features) 201 | Document-term matrix. 202 | 203 | Returns 204 | ------- 205 | array of shape (n_samples, n_components) 206 | Probabilities for each document belonging to each cluster. 207 | 208 | Raises 209 | ------ 210 | NotFittedException 211 | If the model is not fitted, an exception will be raised 212 | """ 213 | self._check_fitted() 214 | # Converting X into sparse array if it isn't one already. 215 | X = spr.csr_matrix(X) 216 | sample_max_unique_words = np.max(np.diff(X.indptr)) 217 | doc_unique_words, doc_unique_word_counts = init_doc_words( 218 | X.tolil(), 219 | max_unique_words=sample_max_unique_words, 220 | ) 221 | doc_words_count = np.sum(doc_unique_word_counts, axis=1) 222 | n_docs = X.shape[0] 223 | predictions = [] 224 | for i_doc in range(n_docs): 225 | pred = np.zeros(self.n_components) 226 | predict_doc( 227 | probabilities=pred, 228 | i_document=i_doc, 229 | doc_unique_words=doc_unique_words, 230 | doc_unique_word_counts=doc_unique_word_counts, 231 | n_words=doc_words_count[i_doc], 232 | alpha=self.alpha, 233 | beta=self.beta, 234 | n_clusters=self.n_components, 235 | n_vocab=self.n_features_in_, 236 | n_docs=n_docs, 237 | cluster_doc_count=self.cluster_doc_count, # type: ignore 238 | cluster_word_count=self.cluster_word_count, # type: ignore 239 | cluster_word_distribution=self.components_, # type: ignore 240 | max_unique_words=sample_max_unique_words, 241 | ) 242 | predictions.append(pred) 243 | return np.stack(predictions) 244 | 245 | def predict_proba(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 246 | """Alias of :meth:`~tweetopic.dmm.DMM.transform` . 247 | 248 | Mainly exists for compatibility with density estimators in 249 | sklearn. 250 | """ 251 | return self.transform(X) 252 | 253 | def predict(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 254 | """Predicts cluster labels for a set of documents. Mainly exists for 255 | compatibility with density estimators in sklearn. 256 | 257 | Parameters 258 | ---------- 259 | X: array-like or sparse matrix of shape (n_samples, n_features) 260 | Document-term matrix. 261 | 262 | Returns 263 | ------- 264 | array of shape (n_samples,) 265 | Cluster label for each document. 266 | 267 | Raises 268 | ------ 269 | NotFittedException 270 | If the model is not fitted, an exception will be raised 271 | """ 272 | return np.argmax(self.transform(X), axis=1) 273 | 274 | def fit_transform( 275 | self, 276 | X: Union[spr.spmatrix, ArrayLike], 277 | y: None = None, 278 | ) -> np.ndarray: 279 | """Fits the model, then transforms the given data. 280 | 281 | Parameters 282 | ---------- 283 | X: array-like or sparse matrix of shape (n_samples, n_features) 284 | Document-term matrix. 285 | y: None 286 | Ignored, sklearn compatibility. 287 | 288 | Returns 289 | ------- 290 | array of shape (n_samples, n_components) 291 | Probabilities for each document belonging to each cluster. 292 | """ 293 | return self.fit(X).transform(X) 294 | -------------------------------------------------------------------------------- /docs/_build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |