├── .github ├── stale.yml └── workflows │ └── release.yml ├── .gitignore ├── .mailmap ├── .readthedocs.yaml ├── .testr.conf ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── bench ├── README.rst ├── bench_hca └── bench_lda.py ├── build.py ├── build_dist.sh ├── doc └── source │ ├── conf.py │ ├── contributing.rst │ ├── getting_started.rst │ ├── index.rst │ ├── installation.rst │ ├── loglikelihoods.png │ ├── release_howto.rst │ └── whats_new.rst ├── docs-requirements.txt ├── lda ├── __init__.py ├── _lda.pyx ├── datasets.py ├── gamma.c ├── gamma.h ├── lda.py ├── tests │ ├── __init__.py │ ├── reuters.ldac │ ├── reuters.titles │ ├── reuters.tokens │ ├── test_datasets.py │ ├── test_lda.py │ ├── test_lda_reuters.py │ ├── test_lda_sparse.py │ ├── test_lda_transform.py │ └── test_utils.py └── utils.py ├── meson.build ├── pyproject.toml └── tox.ini /.github/stale.yml: -------------------------------------------------------------------------------- 1 | daysUntilStale: 60 2 | daysUntilClose: 7 3 | exemptLabels: 4 | - pinned 5 | - security 6 | staleLabel: wontfix 7 | markComment: > 8 | This issue has been automatically marked as stale because it has not had 9 | recent activity. It will be closed if no further activity occurs. Thank you 10 | for your contributions. 11 | closeComment: false 12 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: CI & CD 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | 8 | env: 9 | PYTHONDONTWRITEBYTECODE: 1 10 | 11 | jobs: 12 | build: 13 | name: Build wheels on ${{ matrix.os }} 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [ubuntu-latest, windows-latest, macos-latest] 18 | defaults: 19 | run: 20 | shell: bash 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: actions/setup-python@v4 26 | name: Install Python 27 | with: 28 | python-version: "3.12" 29 | 30 | - name: Install Poetry 31 | run: pip install "poetry~=1.2" 32 | 33 | - name: Install Python dependencies 34 | run: poetry install --only build --no-root 35 | 36 | - name: Build wheels 37 | run: poetry run cibuildwheel --output-dir dist 38 | 39 | - name: Build source distribution 40 | run: poetry build --format sdist 41 | 42 | - name: Deploy wheels 43 | run: poetry publish --username ${{ secrets.PYPI_USER }} --password ${{ secrets.PYPI_PW }} --skip-existing 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # Cython generated C source code 56 | lda/_lda.c 57 | 58 | # Project-specific 59 | .testrepository 60 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | # Format is: 2 | # 3 | # 4 | Allen Riddell riddella+lda-project@indiana.edu abr@ariddell.org ariddell@users.noreply.github.com ariddell 5 | Timothy Hopper tdhopper@gmail.com 6 | Shaoze Luo luoshao23@gmail.com 7 | Severin Simmler severin.simmler.github@gmail.com 8 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | sphinx: 9 | configuration: doc/source/conf.py 10 | 11 | python: 12 | install: 13 | - requirements: docs-requirements.txt 14 | -------------------------------------------------------------------------------- /.testr.conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | test_command=OS_STDOUT_CAPTURE=${OS_STDOUT_CAPTURE:-1} \ 3 | OS_STDERR_CAPTURE=${OS_STDERR_CAPTURE:-1} \ 4 | OS_TEST_TIMEOUT=${OS_TEST_TIMEOUT:-60} \ 5 | ${PYTHON:-python} -m subunit.run discover -t ./ . $LISTOPT $IDOPTION 6 | test_id_option=--load-list $IDFILE 7 | test_list_option=--list -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Style Guidlines 3 | =============== 4 | Before contributing a patch, please read the Python "Style Commandments" written 5 | by the OpenStack developers: http://docs.openstack.org/developer/hacking/ 6 | 7 | ======================== 8 | Building in Develop Mode 9 | ======================== 10 | 11 | To build in develop mode, install `Poetry `_ and run:: 12 | 13 | git clone https://github.com/lda-project/lda.git 14 | cd lda 15 | poetry install 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License, version 2.0 2 | 3 | 1. Definitions 4 | 5 | 1.1. "Contributor" 6 | 7 | means each individual or legal entity that creates, contributes to the 8 | creation of, or owns Covered Software. 9 | 10 | 1.2. "Contributor Version" 11 | 12 | means the combination of the Contributions of others (if any) used by a 13 | Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | 17 | means Covered Software of a particular Contributor. 18 | 19 | 1.4. "Covered Software" 20 | 21 | means Source Code Form to which the initial Contributor has attached the 22 | notice in Exhibit A, the Executable Form of such Source Code Form, and 23 | Modifications of such Source Code Form, in each case including portions 24 | thereof. 25 | 26 | 1.5. "Incompatible With Secondary Licenses" 27 | means 28 | 29 | a. that the initial Contributor has attached the notice described in 30 | Exhibit B to the Covered Software; or 31 | 32 | b. that the Covered Software was made available under the terms of 33 | version 1.1 or earlier of the License, but not also under the terms of 34 | a Secondary License. 35 | 36 | 1.6. "Executable Form" 37 | 38 | means any form of the work other than Source Code Form. 39 | 40 | 1.7. "Larger Work" 41 | 42 | means a work that combines Covered Software with other material, in a 43 | separate file or files, that is not Covered Software. 44 | 45 | 1.8. "License" 46 | 47 | means this document. 48 | 49 | 1.9. "Licensable" 50 | 51 | means having the right to grant, to the maximum extent possible, whether 52 | at the time of the initial grant or subsequently, any and all of the 53 | rights conveyed by this License. 54 | 55 | 1.10. "Modifications" 56 | 57 | means any of the following: 58 | 59 | a. any file in Source Code Form that results from an addition to, 60 | deletion from, or modification of the contents of Covered Software; or 61 | 62 | b. any new file in Source Code Form that contains any Covered Software. 63 | 64 | 1.11. "Patent Claims" of a Contributor 65 | 66 | means any patent claim(s), including without limitation, method, 67 | process, and apparatus claims, in any patent Licensable by such 68 | Contributor that would be infringed, but for the grant of the License, 69 | by the making, using, selling, offering for sale, having made, import, 70 | or transfer of either its Contributions or its Contributor Version. 71 | 72 | 1.12. "Secondary License" 73 | 74 | means either the GNU General Public License, Version 2.0, the GNU Lesser 75 | General Public License, Version 2.1, the GNU Affero General Public 76 | License, Version 3.0, or any later versions of those licenses. 77 | 78 | 1.13. "Source Code Form" 79 | 80 | means the form of the work preferred for making modifications. 81 | 82 | 1.14. "You" (or "Your") 83 | 84 | means an individual or a legal entity exercising rights under this 85 | License. For legal entities, "You" includes any entity that controls, is 86 | controlled by, or is under common control with You. For purposes of this 87 | definition, "control" means (a) the power, direct or indirect, to cause 88 | the direction or management of such entity, whether by contract or 89 | otherwise, or (b) ownership of more than fifty percent (50%) of the 90 | outstanding shares or beneficial ownership of such entity. 91 | 92 | 93 | 2. License Grants and Conditions 94 | 95 | 2.1. Grants 96 | 97 | Each Contributor hereby grants You a world-wide, royalty-free, 98 | non-exclusive license: 99 | 100 | a. under intellectual property rights (other than patent or trademark) 101 | Licensable by such Contributor to use, reproduce, make available, 102 | modify, display, perform, distribute, and otherwise exploit its 103 | Contributions, either on an unmodified basis, with Modifications, or 104 | as part of a Larger Work; and 105 | 106 | b. under Patent Claims of such Contributor to make, use, sell, offer for 107 | sale, have made, import, and otherwise transfer either its 108 | Contributions or its Contributor Version. 109 | 110 | 2.2. Effective Date 111 | 112 | The licenses granted in Section 2.1 with respect to any Contribution 113 | become effective for each Contribution on the date the Contributor first 114 | distributes such Contribution. 115 | 116 | 2.3. Limitations on Grant Scope 117 | 118 | The licenses granted in this Section 2 are the only rights granted under 119 | this License. No additional rights or licenses will be implied from the 120 | distribution or licensing of Covered Software under this License. 121 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 122 | Contributor: 123 | 124 | a. for any code that a Contributor has removed from Covered Software; or 125 | 126 | b. for infringements caused by: (i) Your and any other third party's 127 | modifications of Covered Software, or (ii) the combination of its 128 | Contributions with other software (except as part of its Contributor 129 | Version); or 130 | 131 | c. under Patent Claims infringed by Covered Software in the absence of 132 | its Contributions. 133 | 134 | This License does not grant any rights in the trademarks, service marks, 135 | or logos of any Contributor (except as may be necessary to comply with 136 | the notice requirements in Section 3.4). 137 | 138 | 2.4. Subsequent Licenses 139 | 140 | No Contributor makes additional grants as a result of Your choice to 141 | distribute the Covered Software under a subsequent version of this 142 | License (see Section 10.2) or under the terms of a Secondary License (if 143 | permitted under the terms of Section 3.3). 144 | 145 | 2.5. Representation 146 | 147 | Each Contributor represents that the Contributor believes its 148 | Contributions are its original creation(s) or it has sufficient rights to 149 | grant the rights to its Contributions conveyed by this License. 150 | 151 | 2.6. Fair Use 152 | 153 | This License is not intended to limit any rights You have under 154 | applicable copyright doctrines of fair use, fair dealing, or other 155 | equivalents. 156 | 157 | 2.7. Conditions 158 | 159 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in 160 | Section 2.1. 161 | 162 | 163 | 3. Responsibilities 164 | 165 | 3.1. Distribution of Source Form 166 | 167 | All distribution of Covered Software in Source Code Form, including any 168 | Modifications that You create or to which You contribute, must be under 169 | the terms of this License. You must inform recipients that the Source 170 | Code Form of the Covered Software is governed by the terms of this 171 | License, and how they can obtain a copy of this License. You may not 172 | attempt to alter or restrict the recipients' rights in the Source Code 173 | Form. 174 | 175 | 3.2. Distribution of Executable Form 176 | 177 | If You distribute Covered Software in Executable Form then: 178 | 179 | a. such Covered Software must also be made available in Source Code Form, 180 | as described in Section 3.1, and You must inform recipients of the 181 | Executable Form how they can obtain a copy of such Source Code Form by 182 | reasonable means in a timely manner, at a charge no more than the cost 183 | of distribution to the recipient; and 184 | 185 | b. You may distribute such Executable Form under the terms of this 186 | License, or sublicense it under different terms, provided that the 187 | license for the Executable Form does not attempt to limit or alter the 188 | recipients' rights in the Source Code Form under this License. 189 | 190 | 3.3. Distribution of a Larger Work 191 | 192 | You may create and distribute a Larger Work under terms of Your choice, 193 | provided that You also comply with the requirements of this License for 194 | the Covered Software. If the Larger Work is a combination of Covered 195 | Software with a work governed by one or more Secondary Licenses, and the 196 | Covered Software is not Incompatible With Secondary Licenses, this 197 | License permits You to additionally distribute such Covered Software 198 | under the terms of such Secondary License(s), so that the recipient of 199 | the Larger Work may, at their option, further distribute the Covered 200 | Software under the terms of either this License or such Secondary 201 | License(s). 202 | 203 | 3.4. Notices 204 | 205 | You may not remove or alter the substance of any license notices 206 | (including copyright notices, patent notices, disclaimers of warranty, or 207 | limitations of liability) contained within the Source Code Form of the 208 | Covered Software, except that You may alter any license notices to the 209 | extent required to remedy known factual inaccuracies. 210 | 211 | 3.5. Application of Additional Terms 212 | 213 | You may choose to offer, and to charge a fee for, warranty, support, 214 | indemnity or liability obligations to one or more recipients of Covered 215 | Software. However, You may do so only on Your own behalf, and not on 216 | behalf of any Contributor. You must make it absolutely clear that any 217 | such warranty, support, indemnity, or liability obligation is offered by 218 | You alone, and You hereby agree to indemnify every Contributor for any 219 | liability incurred by such Contributor as a result of warranty, support, 220 | indemnity or liability terms You offer. You may include additional 221 | disclaimers of warranty and limitations of liability specific to any 222 | jurisdiction. 223 | 224 | 4. Inability to Comply Due to Statute or Regulation 225 | 226 | If it is impossible for You to comply with any of the terms of this License 227 | with respect to some or all of the Covered Software due to statute, 228 | judicial order, or regulation then You must: (a) comply with the terms of 229 | this License to the maximum extent possible; and (b) describe the 230 | limitations and the code they affect. Such description must be placed in a 231 | text file included with all distributions of the Covered Software under 232 | this License. Except to the extent prohibited by statute or regulation, 233 | such description must be sufficiently detailed for a recipient of ordinary 234 | skill to be able to understand it. 235 | 236 | 5. Termination 237 | 238 | 5.1. The rights granted under this License will terminate automatically if You 239 | fail to comply with any of its terms. However, if You become compliant, 240 | then the rights granted under this License from a particular Contributor 241 | are reinstated (a) provisionally, unless and until such Contributor 242 | explicitly and finally terminates Your grants, and (b) on an ongoing 243 | basis, if such Contributor fails to notify You of the non-compliance by 244 | some reasonable means prior to 60 days after You have come back into 245 | compliance. Moreover, Your grants from a particular Contributor are 246 | reinstated on an ongoing basis if such Contributor notifies You of the 247 | non-compliance by some reasonable means, this is the first time You have 248 | received notice of non-compliance with this License from such 249 | Contributor, and You become compliant prior to 30 days after Your receipt 250 | of the notice. 251 | 252 | 5.2. If You initiate litigation against any entity by asserting a patent 253 | infringement claim (excluding declaratory judgment actions, 254 | counter-claims, and cross-claims) alleging that a Contributor Version 255 | directly or indirectly infringes any patent, then the rights granted to 256 | You by any and all Contributors for the Covered Software under Section 257 | 2.1 of this License shall terminate. 258 | 259 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user 260 | license agreements (excluding distributors and resellers) which have been 261 | validly granted by You or Your distributors under this License prior to 262 | termination shall survive termination. 263 | 264 | 6. Disclaimer of Warranty 265 | 266 | Covered Software is provided under this License on an "as is" basis, 267 | without warranty of any kind, either expressed, implied, or statutory, 268 | including, without limitation, warranties that the Covered Software is free 269 | of defects, merchantable, fit for a particular purpose or non-infringing. 270 | The entire risk as to the quality and performance of the Covered Software 271 | is with You. Should any Covered Software prove defective in any respect, 272 | You (not any Contributor) assume the cost of any necessary servicing, 273 | repair, or correction. This disclaimer of warranty constitutes an essential 274 | part of this License. No use of any Covered Software is authorized under 275 | this License except under this disclaimer. 276 | 277 | 7. Limitation of Liability 278 | 279 | Under no circumstances and under no legal theory, whether tort (including 280 | negligence), contract, or otherwise, shall any Contributor, or anyone who 281 | distributes Covered Software as permitted above, be liable to You for any 282 | direct, indirect, special, incidental, or consequential damages of any 283 | character including, without limitation, damages for lost profits, loss of 284 | goodwill, work stoppage, computer failure or malfunction, or any and all 285 | other commercial damages or losses, even if such party shall have been 286 | informed of the possibility of such damages. This limitation of liability 287 | shall not apply to liability for death or personal injury resulting from 288 | such party's negligence to the extent applicable law prohibits such 289 | limitation. Some jurisdictions do not allow the exclusion or limitation of 290 | incidental or consequential damages, so this exclusion and limitation may 291 | not apply to You. 292 | 293 | 8. Litigation 294 | 295 | Any litigation relating to this License may be brought only in the courts 296 | of a jurisdiction where the defendant maintains its principal place of 297 | business and such litigation shall be governed by laws of that 298 | jurisdiction, without reference to its conflict-of-law provisions. Nothing 299 | in this Section shall prevent a party's ability to bring cross-claims or 300 | counter-claims. 301 | 302 | 9. Miscellaneous 303 | 304 | This License represents the complete agreement concerning the subject 305 | matter hereof. If any provision of this License is held to be 306 | unenforceable, such provision shall be reformed only to the extent 307 | necessary to make it enforceable. Any law or regulation which provides that 308 | the language of a contract shall be construed against the drafter shall not 309 | be used to construe this License against a Contributor. 310 | 311 | 312 | 10. Versions of the License 313 | 314 | 10.1. New Versions 315 | 316 | Mozilla Foundation is the license steward. Except as provided in Section 317 | 10.3, no one other than the license steward has the right to modify or 318 | publish new versions of this License. Each version will be given a 319 | distinguishing version number. 320 | 321 | 10.2. Effect of New Versions 322 | 323 | You may distribute the Covered Software under the terms of the version 324 | of the License under which You originally received the Covered Software, 325 | or under the terms of any subsequent version published by the license 326 | steward. 327 | 328 | 10.3. Modified Versions 329 | 330 | If you create software not governed by this License, and you want to 331 | create a new license for such software, you may create and use a 332 | modified version of this License if you rename the license and remove 333 | any references to the name of the license steward (except to note that 334 | such modified license differs from this License). 335 | 336 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 337 | Licenses If You choose to distribute Source Code Form that is 338 | Incompatible With Secondary Licenses under the terms of this version of 339 | the License, the notice described in Exhibit B of this License must be 340 | attached. 341 | 342 | Exhibit A - Source Code Form License Notice 343 | 344 | This Source Code Form is subject to the 345 | terms of the Mozilla Public License, v. 346 | 2.0. If a copy of the MPL was not 347 | distributed with this file, You can 348 | obtain one at 349 | http://mozilla.org/MPL/2.0/. 350 | 351 | If it is not possible or desirable to put the notice in a particular file, 352 | then You may include the notice in a location (such as a LICENSE file in a 353 | relevant directory) where a recipient would be likely to look for such a 354 | notice. 355 | 356 | You may add additional accurate notices of copyright ownership. 357 | 358 | Exhibit B - "Incompatible With Secondary Licenses" Notice 359 | 360 | This Source Code Form is "Incompatible 361 | With Secondary Licenses", as defined by 362 | the Mozilla Public License, v. 2.0. 363 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include ChangeLog 3 | include CONTRIBUTING.rst 4 | include LICENSE 5 | include README.rst 6 | exclude .gitignore 7 | exclude .gitreview 8 | 9 | include lda/tests/reuters.* 10 | 11 | global-exclude *.pyc 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | 3 | cython: 4 | find lda -name "*.pyx" -exec $(PYTHON) -m cython -3 {} \; 5 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | lda: Topic modeling with latent Dirichlet allocation 2 | ==================================================== 3 | 4 | |pypi| |actions| |zenodo| 5 | 6 | **NOTE: This package is in maintenance mode. Critical bugs will be fixed. No new features will be added.** 7 | 8 | 9 | ``lda`` implements latent Dirichlet allocation (LDA) using collapsed Gibbs 10 | sampling. ``lda`` is fast and is tested on Linux, OS X, and Windows. 11 | 12 | You can read more about lda in `the documentation `_. 13 | 14 | Installation 15 | ------------ 16 | 17 | ``pip install lda`` 18 | 19 | Getting started 20 | --------------- 21 | 22 | ``lda.LDA`` implements latent Dirichlet allocation (LDA). The interface follows 23 | conventions found in scikit-learn_. 24 | 25 | The following demonstrates how to inspect a model of a subset of the Reuters 26 | news dataset. The input below, ``X``, is a document-term matrix (sparse matrices 27 | are accepted). 28 | 29 | .. code-block:: python 30 | 31 | >>> import numpy as np 32 | >>> import lda 33 | >>> import lda.datasets 34 | >>> X = lda.datasets.load_reuters() 35 | >>> vocab = lda.datasets.load_reuters_vocab() 36 | >>> titles = lda.datasets.load_reuters_titles() 37 | >>> X.shape 38 | (395, 4258) 39 | >>> X.sum() 40 | 84010 41 | >>> model = lda.LDA(n_topics=20, n_iter=1500, random_state=1) 42 | >>> model.fit(X) # model.fit_transform(X) is also available 43 | >>> topic_word = model.topic_word_ # model.components_ also works 44 | >>> n_top_words = 8 45 | >>> for i, topic_dist in enumerate(topic_word): 46 | ... topic_words = np.array(vocab)[np.argsort(topic_dist)][:-(n_top_words+1):-1] 47 | ... print('Topic {}: {}'.format(i, ' '.join(topic_words))) 48 | 49 | Topic 0: british churchill sale million major letters west britain 50 | Topic 1: church government political country state people party against 51 | Topic 2: elvis king fans presley life concert young death 52 | Topic 3: yeltsin russian russia president kremlin moscow michael operation 53 | Topic 4: pope vatican paul john surgery hospital pontiff rome 54 | Topic 5: family funeral police miami versace cunanan city service 55 | Topic 6: simpson former years court president wife south church 56 | Topic 7: order mother successor election nuns church nirmala head 57 | Topic 8: charles prince diana royal king queen parker bowles 58 | Topic 9: film french france against bardot paris poster animal 59 | Topic 10: germany german war nazi letter christian book jews 60 | Topic 11: east peace prize award timor quebec belo leader 61 | Topic 12: n't life show told very love television father 62 | Topic 13: years year time last church world people say 63 | Topic 14: mother teresa heart calcutta charity nun hospital missionaries 64 | Topic 15: city salonika capital buddhist cultural vietnam byzantine show 65 | Topic 16: music tour opera singer israel people film israeli 66 | Topic 17: church catholic bernardin cardinal bishop wright death cancer 67 | Topic 18: harriman clinton u.s ambassador paris president churchill france 68 | Topic 19: city museum art exhibition century million churches set 69 | 70 | The document-topic distributions are available in ``model.doc_topic_``. 71 | 72 | .. code-block:: python 73 | 74 | >>> doc_topic = model.doc_topic_ 75 | >>> for i in range(10): 76 | ... print("{} (top topic: {})".format(titles[i], doc_topic[i].argmax())) 77 | 0 UK: Prince Charles spearheads British royal revolution. LONDON 1996-08-20 (top topic: 8) 78 | 1 GERMANY: Historic Dresden church rising from WW2 ashes. DRESDEN, Germany 1996-08-21 (top topic: 13) 79 | 2 INDIA: Mother Teresa's condition said still unstable. CALCUTTA 1996-08-23 (top topic: 14) 80 | 3 UK: Palace warns British weekly over Charles pictures. LONDON 1996-08-25 (top topic: 8) 81 | 4 INDIA: Mother Teresa, slightly stronger, blesses nuns. CALCUTTA 1996-08-25 (top topic: 14) 82 | 5 INDIA: Mother Teresa's condition unchanged, thousands pray. CALCUTTA 1996-08-25 (top topic: 14) 83 | 6 INDIA: Mother Teresa shows signs of strength, blesses nuns. CALCUTTA 1996-08-26 (top topic: 14) 84 | 7 INDIA: Mother Teresa's condition improves, many pray. CALCUTTA, India 1996-08-25 (top topic: 14) 85 | 8 INDIA: Mother Teresa improves, nuns pray for "miracle". CALCUTTA 1996-08-26 (top topic: 14) 86 | 9 UK: Charles under fire over prospect of Queen Camilla. LONDON 1996-08-26 (top topic: 8) 87 | 88 | 89 | Requirements 90 | ------------ 91 | 92 | Python ≥3.10 and NumPy. 93 | 94 | Caveat 95 | ------ 96 | 97 | ``lda`` aims for simplicity. (It happens to be fast, as essential parts are 98 | written in C via Cython_.) If you are working with a very large corpus you may 99 | wish to use more sophisticated topic models such as those implemented in hca_ 100 | and MALLET_. hca_ is written entirely in C and MALLET_ is written in Java. 101 | Unlike ``lda``, hca_ can use more than one processor at a time. Both MALLET_ and 102 | hca_ implement topic models known to be more robust than standard latent 103 | Dirichlet allocation. 104 | 105 | Notes 106 | ----- 107 | 108 | Latent Dirichlet allocation is described in `Blei et al. (2003)`_ and `Pritchard 109 | et al. (2000)`_. Inference using collapsed Gibbs sampling is described in 110 | `Griffiths and Steyvers (2004)`_. 111 | 112 | Important links 113 | --------------- 114 | 115 | - Documentation: http://lda.readthedocs.org 116 | - Source code: https://github.com/lda-project/lda/ 117 | - Issue tracker: https://github.com/lda-project/lda/issues 118 | 119 | Other implementations 120 | --------------------- 121 | - scikit-learn_'s `LatentDirichletAllocation `_ (uses online variational inference) 122 | - `gensim `_ (uses online variational inference) 123 | 124 | License 125 | ------- 126 | 127 | lda is licensed under Version 2.0 of the Mozilla Public License. 128 | 129 | .. _Python: http://www.python.org/ 130 | .. _scikit-learn: http://scikit-learn.org 131 | .. _hca: https://www.mloss.org/software/view/527/ 132 | .. _MALLET: http://mallet.cs.umass.edu/ 133 | .. _numpy: http://www.numpy.org/ 134 | .. _pbr: https://pypi.python.org/pypi/pbr 135 | .. _Cython: http://cython.org 136 | .. _Blei et al. (2003): http://jmlr.org/papers/v3/blei03a.html 137 | .. _Pritchard et al. (2000): http://www.genetics.org/content/155/2/945.full 138 | .. _Griffiths and Steyvers (2004): http://www.pnas.org/content/101/suppl_1/5228.abstract 139 | 140 | .. |pypi| image:: https://badge.fury.io/py/lda.png 141 | :target: https://pypi.python.org/pypi/lda 142 | :alt: pypi version 143 | 144 | .. |actions| image:: https://github.com/lda-project/lda/actions/workflows/release.yml/badge.svg 145 | :target: https://github.com/lda-project/lda/actions 146 | :alt: github actions build status 147 | 148 | .. |zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.1412135.svg 149 | :target: https://doi.org/10.5281/zenodo.1412135 150 | :alt: Zenodo citation 151 | -------------------------------------------------------------------------------- /bench/README.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Benchmarking lda 3 | ================ 4 | 5 | This directory contains scripts to compare the running time of ``lda`` against 6 | hca_. hca_ is written entirely in C. 7 | 8 | To run ``bench_hca`` you will need to have hca_ on your path. 9 | 10 | The test uses the following settings for hca_ 11 | 12 | - 100 topics 13 | - 100 iterations 14 | - Latent Dirichlet allocation (used automatically with ``-A`` and ``-B``) 15 | 16 | .. _hca: http://www.mloss.org/software/view/527/ 17 | -------------------------------------------------------------------------------- /bench/bench_hca: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | time hca -v -e -C100 -A0.1 -B0.001 -K100 -fldac ../lda/tests/reuters /tmp/hca-test 3 | -------------------------------------------------------------------------------- /bench/bench_lda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import time 5 | 6 | import lda 7 | import lda.utils 8 | 9 | test_dir = os.path.join(os.path.dirname(__file__), '..', 'lda', 'tests') 10 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 11 | dtm = lda.utils.ldac2dtm(open(reuters_ldac_fn), offset=0) 12 | t0 = time.time() 13 | n_iter = 100 14 | n_topics = 100 15 | random_seed = 1 16 | model = model = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 17 | 18 | t0 = time.time() 19 | doc_topic = model.fit_transform(dtm) 20 | print("seconds elapsed: {}".format(time.time() - t0)) 21 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import shutil 3 | import subprocess 4 | from pathlib import Path 5 | 6 | BUILD_DIR = Path(__file__).parent.joinpath("build") 7 | LIB_DIR = Path(__file__).parent.joinpath("lda") 8 | 9 | 10 | def _is_windows(): 11 | """Return True if the current platform is Windows.""" 12 | return platform.system() == "Windows" 13 | 14 | 15 | def _meson(*args): 16 | """Invoke meson with the given arguments.""" 17 | subprocess.check_call(["meson", *list(args)]) 18 | 19 | 20 | def _cleanup(): 21 | """Remove build artifacts.""" 22 | if BUILD_DIR.exists(): 23 | shutil.rmtree(BUILD_DIR) 24 | 25 | for file in LIB_DIR.glob("*.pyd" if _is_windows() else "*.so"): 26 | file.unlink() 27 | 28 | 29 | def build(): 30 | """Build the project.""" 31 | _cleanup() 32 | 33 | _meson("setup", BUILD_DIR.as_posix()) 34 | _meson("compile", "-C", BUILD_DIR.as_posix()) 35 | _meson("install", "-C", BUILD_DIR.as_posix()) 36 | 37 | 38 | if __name__ == "__main__": 39 | build() 40 | -------------------------------------------------------------------------------- /build_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # bash strict mode 3 | set -euo pipefail 4 | IFS=$'\n\t' 5 | 6 | err() { 7 | echo "$@" >&2 8 | } 9 | 10 | LAST=$(git tag --sort version:refname | grep -v rc | tail -1) 11 | 12 | echo "Building distribution for: $LAST" 13 | git checkout $LAST 14 | 15 | # superficial check that whats_new.rst has been updated 16 | if ! grep -q "$LAST" doc/source/whats_new.rst ; then 17 | err "release notes not updated, exiting" 18 | exit -1 19 | fi 20 | 21 | read -p "Ok to continue (y/n)? " answer 22 | case ${answer:0:1} in 23 | y|Y ) 24 | echo "Building distribution" 25 | poetry build 26 | ;; 27 | * ) 28 | echo "Not building distribution" 29 | ;; 30 | esac 31 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import os 4 | import sys 5 | 6 | sys.path.insert(0, os.path.abspath('../..')) 7 | # -- General configuration ---------------------------------------------------- 8 | 9 | # Add any Sphinx extension module names here, as strings. They can be 10 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 11 | extensions = [ 12 | 'sphinx_rtd_theme', 13 | 'numpydoc', 14 | 'autoapi.extension', 15 | ] 16 | autoapi_dirs = [os.path.join("..", "..", "lda")] 17 | autoapi_ignore = ["*tests*"] 18 | 19 | # autodoc generation is a bit aggressive and a nuisance when doing heavy 20 | # text edit cycles. 21 | # execute "export SPHINX_DEBUG=1" in your terminal to disable 22 | 23 | # The suffix of source filenames. 24 | source_suffix = '.rst' 25 | 26 | # The master toctree document. 27 | master_doc = 'index' 28 | 29 | # General information about the project. 30 | project = u'lda' 31 | copyright = u'2014, lda Developers' 32 | 33 | # If true, '()' will be appended to :func: etc. cross-reference text. 34 | add_function_parentheses = True 35 | 36 | # If true, the current module name will be prepended to all description 37 | # unit titles (such as .. function::). 38 | add_module_names = True 39 | 40 | # The name of the Pygments (syntax highlighting) style to use. 41 | pygments_style = 'sphinx' 42 | 43 | # -- Options for HTML output -------------------------------------------------- 44 | 45 | # The theme to use for HTML and HTML Help pages. Major themes that come with 46 | # Sphinx are currently 'default' and 'sphinxdoc'. 47 | import sphinx_rtd_theme 48 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 49 | html_theme = "sphinx_rtd_theme" 50 | # html_static_path = ['static'] 51 | 52 | # Output file base name for HTML help builder. 53 | htmlhelp_basename = '%sdoc' % project 54 | 55 | # Grouping the document tree into LaTeX files. List of tuples 56 | # (source start file, target name, title, author, documentclass 57 | # [howto/manual]). 58 | latex_documents = [ 59 | ('index', 60 | '%s.tex' % project, 61 | u'%s Documentation' % project, 62 | u'lda Developers', 'manual'), 63 | ] 64 | 65 | # numpydoc configuration to rein in autosummary errors 66 | numpydoc_show_class_members = False 67 | -------------------------------------------------------------------------------- /doc/source/contributing.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | .. include:: ../../CONTRIBUTING.rst -------------------------------------------------------------------------------- /doc/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting started 3 | =============== 4 | 5 | The following demonstrates how to inspect a model of a subset of the Reuters 6 | news dataset. The input below, ``X``, is a document-term matrix (sparse matrices 7 | are accepted). 8 | 9 | .. code-block:: python 10 | 11 | >>> import numpy as np 12 | >>> import lda 13 | >>> X = lda.datasets.load_reuters() 14 | >>> vocab = lda.datasets.load_reuters_vocab() 15 | >>> titles = lda.datasets.load_reuters_titles() 16 | >>> X.shape 17 | (395, 4258) 18 | >>> X.sum() 19 | 84010 20 | >>> model = lda.LDA(n_topics=20, n_iter=1500, random_state=1) 21 | >>> model.fit(X) # model.fit_transform(X) is also available 22 | >>> topic_word = model.topic_word_ # model.components_ also works 23 | >>> n_top_words = 8 24 | >>> for i, topic_dist in enumerate(topic_word): 25 | ... topic_words = np.array(vocab)[np.argsort(topic_dist)][:-n_top_words:-1] 26 | ... print('Topic {}: {}'.format(i, ' '.join(topic_words))) 27 | Topic 0: british churchill sale million major letters west 28 | Topic 1: church government political country state people party 29 | Topic 2: elvis king fans presley life concert young 30 | Topic 3: yeltsin russian russia president kremlin moscow michael 31 | Topic 4: pope vatican paul john surgery hospital pontiff 32 | Topic 5: family funeral police miami versace cunanan city 33 | Topic 6: simpson former years court president wife south 34 | Topic 7: order mother successor election nuns church nirmala 35 | Topic 8: charles prince diana royal king queen parker 36 | Topic 9: film french france against bardot paris poster 37 | Topic 10: germany german war nazi letter christian book 38 | Topic 11: east peace prize award timor quebec belo 39 | Topic 12: n't life show told very love television 40 | Topic 13: years year time last church world people 41 | Topic 14: mother teresa heart calcutta charity nun hospital 42 | Topic 15: city salonika capital buddhist cultural vietnam byzantine 43 | Topic 16: music tour opera singer israel people film 44 | Topic 17: church catholic bernardin cardinal bishop wright death 45 | Topic 18: harriman clinton u.s ambassador paris president churchill 46 | Topic 19: city museum art exhibition century million churches 47 | 48 | The document-topic distributions are available in ``model.doc_topic_``. 49 | 50 | .. code-block:: python 51 | 52 | >>> doc_topic = model.doc_topic_ 53 | >>> for i in range(10): 54 | ... print("{} (top topic: {})".format(titles[i], doc_topic[i].argmax())) 55 | 0 UK: Prince Charles spearheads British royal revolution. LONDON 1996-08-20 (top topic: 8) 56 | 1 GERMANY: Historic Dresden church rising from WW2 ashes. DRESDEN, Germany 1996-08-21 (top topic: 13) 57 | 2 INDIA: Mother Teresa's condition said still unstable. CALCUTTA 1996-08-23 (top topic: 14) 58 | 3 UK: Palace warns British weekly over Charles pictures. LONDON 1996-08-25 (top topic: 8) 59 | 4 INDIA: Mother Teresa, slightly stronger, blesses nuns. CALCUTTA 1996-08-25 (top topic: 14) 60 | 5 INDIA: Mother Teresa's condition unchanged, thousands pray. CALCUTTA 1996-08-25 (top topic: 14) 61 | 6 INDIA: Mother Teresa shows signs of strength, blesses nuns. CALCUTTA 1996-08-26 (top topic: 14) 62 | 7 INDIA: Mother Teresa's condition improves, many pray. CALCUTTA, India 1996-08-25 (top topic: 14) 63 | 8 INDIA: Mother Teresa improves, nuns pray for "miracle". CALCUTTA 1996-08-26 (top topic: 14) 64 | 9 UK: Charles under fire over prospect of Queen Camilla. LONDON 1996-08-26 (top topic: 8) 65 | 66 | Document-topic distributions may be inferred for out-of-sample texts using the 67 | ``transform`` method: 68 | 69 | .. code-block:: python 70 | 71 | >>> X = lda.datasets.load_reuters() 72 | >>> titles = lda.datasets.load_reuters_titles() 73 | >>> X_train = X[10:] 74 | >>> X_test = X[:10] 75 | >>> titles_test = titles[:10] 76 | >>> model = lda.LDA(n_topics=20, n_iter=1500, random_state=1) 77 | >>> model.fit(X_train) 78 | >>> doc_topic_test = model.transform(X_test) 79 | >>> for title, topics in zip(titles_test, doc_topic_test): 80 | ... print("{} (top topic: {})".format(title, topics.argmax())) 81 | 0 UK: Prince Charles spearheads British royal revolution. LONDON 1996-08-20 (top topic: 7) 82 | 1 GERMANY: Historic Dresden church rising from WW2 ashes. DRESDEN, Germany 1996-08-21 (top topic: 11) 83 | 2 INDIA: Mother Teresa's condition said still unstable. CALCUTTA 1996-08-23 (top topic: 4) 84 | 3 UK: Palace warns British weekly over Charles pictures. LONDON 1996-08-25 (top topic: 7) 85 | 4 INDIA: Mother Teresa, slightly stronger, blesses nuns. CALCUTTA 1996-08-25 (top topic: 4) 86 | 5 INDIA: Mother Teresa's condition unchanged, thousands pray. CALCUTTA 1996-08-25 (top topic: 4) 87 | 6 INDIA: Mother Teresa shows signs of strength, blesses nuns. CALCUTTA 1996-08-26 (top topic: 4) 88 | 7 INDIA: Mother Teresa's condition improves, many pray. CALCUTTA, India 1996-08-25 (top topic: 4) 89 | 8 INDIA: Mother Teresa improves, nuns pray for "miracle". CALCUTTA 1996-08-26 (top topic: 4) 90 | 9 UK: Charles under fire over prospect of Queen Camilla. LONDON 1996-08-26 (top topic: 11) 91 | 92 | (Note that the topic numbers have changed due to LDA not being an `identifiable 93 | `_ model. The phenomenon is 94 | known as `label switching 95 | `_ in the 96 | literature.) 97 | 98 | Convergence may be monitored by accessing the ``loglikelihoods_`` attribute on a 99 | fitted model. The attribute is bound to a list which records the sequence of 100 | log likelihoods associated with the model at different iterations (thinned by 101 | the ``refresh`` parameter). 102 | 103 | (The following code assumes `matplotlib `_ is installed.) 104 | 105 | .. code-block:: python 106 | 107 | >>> import matplotlib.pyplot as plt 108 | >>> # skipping the first few entries makes the graph more readable 109 | >>> plt.plot(model.loglikelihoods_[5:]) 110 | 111 | .. image:: loglikelihoods.png 112 | :width: 600px 113 | 114 | Judging convergence from the plot, the model should be fit with a slightly 115 | greater number of iterations. 116 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. lda documentation master file, created by 2 | sphinx-quickstart on Tue Jul 9 22:26:36 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | lda: Topic modeling with latent Dirichlet Allocation 7 | ==================================================== 8 | 9 | .. raw:: html 10 | 11 |
12 | 13 | **lda** implements latent Dirichlet allocation (LDA) using collapsed Gibbs 14 | sampling. lda is fast and can be installed without a compiler on Linux and macOS. 15 | 16 | .. raw:: html 17 | 18 |
19 | 20 | The interface follows conventions found in scikit-learn_. The following 21 | demonstrates how to inspect a model of a subset of the Reuters news dataset. 22 | (The input below, ``X``, is a document-term matrix.) 23 | 24 | .. code-block:: python 25 | 26 | >>> import numpy as np 27 | >>> import lda 28 | >>> X = lda.datasets.load_reuters() 29 | >>> vocab = lda.datasets.load_reuters_vocab() 30 | >>> titles = lda.datasets.load_reuters_titles() 31 | >>> X.shape 32 | (395, 4258) 33 | >>> X.sum() 34 | 84010 35 | >>> model = lda.LDA(n_topics=20, n_iter=1500, random_state=1) 36 | >>> model.fit(X) # model.fit_transform(X) is also available 37 | >>> topic_word = model.topic_word_ # model.components_ also works 38 | >>> n_top_words = 8 39 | >>> for i, topic_dist in enumerate(topic_word): 40 | ... topic_words = np.array(vocab)[np.argsort(topic_dist)][:-n_top_words:-1] 41 | ... print('Topic {}: {}'.format(i, ' '.join(topic_words))) 42 | Topic 0: british churchill sale million major letters west 43 | Topic 1: church government political country state people party 44 | Topic 2: elvis king fans presley life concert young 45 | Topic 3: yeltsin russian russia president kremlin moscow michael 46 | Topic 4: pope vatican paul john surgery hospital pontiff 47 | Topic 5: family funeral police miami versace cunanan city 48 | Topic 6: simpson former years court president wife south 49 | Topic 7: order mother successor election nuns church nirmala 50 | Topic 8: charles prince diana royal king queen parker 51 | Topic 9: film french france against bardot paris poster 52 | Topic 10: germany german war nazi letter christian book 53 | Topic 11: east peace prize award timor quebec belo 54 | Topic 12: n't life show told very love television 55 | Topic 13: years year time last church world people 56 | Topic 14: mother teresa heart calcutta charity nun hospital 57 | Topic 15: city salonika capital buddhist cultural vietnam byzantine 58 | Topic 16: music tour opera singer israel people film 59 | Topic 17: church catholic bernardin cardinal bishop wright death 60 | Topic 18: harriman clinton u.s ambassador paris president churchill 61 | Topic 19: city museum art exhibition century million churches 62 | 63 | **NOTE: This package is in maintenance mode. Critical bugs will be fixed. No new features will be added.** 64 | 65 | Contents: 66 | 67 | .. toctree:: 68 | :maxdepth: 2 69 | 70 | getting_started 71 | installation 72 | autoapi/index 73 | contributing 74 | whats_new 75 | 76 | Indices and tables 77 | ================== 78 | 79 | * :ref:`genindex` 80 | * :ref:`modindex` 81 | * :ref:`search` 82 | 83 | 84 | .. _scikit-learn: http://scikit-learn.org 85 | -------------------------------------------------------------------------------- /doc/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation-instructions: 2 | 3 | ============== 4 | Installing lda 5 | ============== 6 | 7 | lda requires Python and NumPy. If these 8 | requirements are satisfied, lda should install successfully on Linux, macOS and Windows with:: 9 | 10 | pip install lda 11 | 12 | 13 | Installation from source 14 | ------------------------ 15 | 16 | Installing from source requires you to have installed the Python development 17 | headers and a working C/C++ compiler. Under Debian-based operating systems, 18 | which include Ubuntu, you can install all these requirements by issuing:: 19 | 20 | sudo apt-get install build-essential python3-dev 21 | 22 | You can compile and install lda using the ``pip install``. 23 | -------------------------------------------------------------------------------- /doc/source/loglikelihoods.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lda-project/lda/c63e8d7709a3271d383be3d80cb15cd53f9a6451/doc/source/loglikelihoods.png -------------------------------------------------------------------------------- /doc/source/release_howto.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | How to make a release of lda 3 | ============================== 4 | 5 | Fingerprint of signing key is ``94D5E5A35ED429648B1C627AD96242D5314C8249``. 6 | 7 | 1. Verify that the following chores are finished: 8 | 9 | - Tests pass. 10 | - Changes since last release are mentioned in ``doc/source/whats_new.rst``. 11 | - Signed tag for the current release exists. Run ``git tag -s -u 94D5E5A35ED429648B1C627AD96242D5314C8249 ``. 12 | - The version in ``pyproject.toml`` is the same as in the tag. 13 | 14 | 2. Push the tag to GitHub; a GitHub Action will automatically publish the wheels and source dist to PyPI. 15 | -------------------------------------------------------------------------------- /doc/source/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. _whats_new: 2 | 3 | .. currentmodule:: lda 4 | 5 | ============ 6 | What's New 7 | ============ 8 | 9 | v3.0.2 (29. July 2024) 10 | ====================== 11 | - Fix typo in version number of NumPy dependency. (thanks @b-trout) 12 | 13 | v3.0.1 (19. June 2024) 14 | ====================== 15 | - Drop support for Python 3.9. 16 | - Construct nzw in fortran order to reduce cache misses in hot loops. Thanks @ghuls. 17 | 18 | v3.0.0 (4. December 2023) 19 | ============ 20 | - Drop support for Python 3.6, 3.7 and 3.8 21 | - Wheels for Python 3.9, 3.10, 3.11 and 3.12 22 | 23 | v2.0.0 (17. August 2020) 24 | ========================== 25 | - Drop support for Python 2.7 26 | - Wheels for Python 3.8 27 | 28 | v1.1.0 (9. September 2018) 29 | ========================== 30 | - Wheels for Python 3.7 31 | - Minimum required NumPy version is 1.13.0. 32 | - Major speed increase in data loading. Thanks @luoshao23. 33 | - Bugfix in Cython searchsorted function. Thanks @luoshao23. 34 | 35 | v1.0.5 (18. June 2017) 36 | ====================== 37 | - Wheels for Python 3.6 38 | 39 | v1.0.4 (13. July 2016) 40 | ====================== 41 | - Linux wheels (manylinux1) 42 | 43 | v1.0.3 (5. Nov 2015) 44 | ==================== 45 | - Python 3.5 wheels 46 | - Release GIL during sampling 47 | - Many minor fixes 48 | -------------------------------------------------------------------------------- /docs-requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx~=7.2.6 2 | sphinx-autoapi~=3.0 3 | numpydoc~=1.6 4 | sphinx-rtd-theme~=1.3 5 | -------------------------------------------------------------------------------- /lda/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals # noqa 2 | 3 | import logging 4 | 5 | from lda.lda import LDA # noqa 6 | import lda.datasets # noqa 7 | 8 | try: 9 | from importlib.metadata import version # py38 and higher 10 | __version__ = version("lda") 11 | except: # noqa 12 | pass 13 | 14 | logging.getLogger('lda').addHandler(logging.NullHandler()) 15 | -------------------------------------------------------------------------------- /lda/_lda.pyx: -------------------------------------------------------------------------------- 1 | #cython: language_level=3 2 | #cython: boundscheck=False 3 | #cython: wraparound=False 4 | #cython: cdivision=True 5 | 6 | from cython.operator cimport preincrement as inc, predecrement as dec 7 | from libc.stdlib cimport malloc, free 8 | 9 | 10 | cdef extern from "gamma.h": 11 | cdef double lda_lgamma(double x) nogil 12 | 13 | 14 | cdef double lgamma(double x) nogil: 15 | if x <= 0: 16 | with gil: 17 | raise ValueError("x must be strictly positive") 18 | return lda_lgamma(x) 19 | 20 | 21 | cdef int searchsorted(double* arr, int length, double value) nogil: 22 | """Bisection search (c.f. numpy.searchsorted) 23 | 24 | Find the index into sorted array `arr` of length `length` such that, if 25 | `value` were inserted before the index, the order of `arr` would be 26 | preserved. 27 | """ 28 | cdef int imin, imax, imid 29 | imin = 0 30 | imax = length 31 | while imin < imax: 32 | imid = imin + ((imax - imin) >> 1) 33 | if value > arr[imid]: 34 | imin = imid + 1 35 | else: 36 | imax = imid 37 | return imin 38 | 39 | 40 | def _sample_topics(int[:] WS, int[:] DS, int[:] ZS, int[::1, :] nzw, int[:, ::1] ndz, int[:] nz, 41 | double[:] alpha, double[:] eta, double[:] rands): 42 | cdef int i, k, w, d, z, z_new 43 | cdef double r, dist_cum 44 | cdef int N = WS.shape[0] 45 | cdef int n_rand = rands.shape[0] 46 | cdef int n_topics = nz.shape[0] 47 | cdef double eta_sum = 0 48 | cdef double* dist_sum = malloc(n_topics * sizeof(double)) 49 | if dist_sum is NULL: 50 | raise MemoryError("Could not allocate memory during sampling.") 51 | with nogil: 52 | for i in range(eta.shape[0]): 53 | eta_sum += eta[i] 54 | 55 | for i in range(N): 56 | w = WS[i] 57 | d = DS[i] 58 | z = ZS[i] 59 | 60 | dec(nzw[z, w]) 61 | dec(ndz[d, z]) 62 | dec(nz[z]) 63 | 64 | dist_cum = 0 65 | for k in range(n_topics): 66 | # eta is a double so cdivision yields a double 67 | dist_cum += (nzw[k, w] + eta[w]) / (nz[k] + eta_sum) * (ndz[d, k] + alpha[k]) 68 | dist_sum[k] = dist_cum 69 | 70 | r = rands[i % n_rand] * dist_cum # dist_cum == dist_sum[-1] 71 | z_new = searchsorted(dist_sum, n_topics, r) 72 | 73 | ZS[i] = z_new 74 | inc(nzw[z_new, w]) 75 | inc(ndz[d, z_new]) 76 | inc(nz[z_new]) 77 | 78 | free(dist_sum) 79 | 80 | 81 | cpdef double _loglikelihood(int[::1, :] nzw, int[:, ::1] ndz, int[:] nz, int[:] nd, double alpha, double eta) nogil: 82 | cdef int k, d 83 | cdef int D = ndz.shape[0] 84 | cdef int n_topics = ndz.shape[1] 85 | cdef int vocab_size = nzw.shape[1] 86 | 87 | cdef double ll = 0 88 | 89 | # calculate log p(w|z) 90 | cdef double lgamma_eta, lgamma_alpha 91 | with nogil: 92 | lgamma_eta = lgamma(eta) 93 | lgamma_alpha = lgamma(alpha) 94 | 95 | ll += n_topics * lgamma(eta * vocab_size) 96 | for k in range(n_topics): 97 | ll -= lgamma(eta * vocab_size + nz[k]) 98 | for w in range(vocab_size): 99 | # if nzw[k, w] == 0 addition and subtraction cancel out 100 | if nzw[k, w] > 0: 101 | ll += lgamma(eta + nzw[k, w]) - lgamma_eta 102 | 103 | # calculate log p(z) 104 | for d in range(D): 105 | ll += (lgamma(alpha * n_topics) - 106 | lgamma(alpha * n_topics + nd[d])) 107 | for k in range(n_topics): 108 | if ndz[d, k] > 0: 109 | ll += lgamma(alpha + ndz[d, k]) - lgamma_alpha 110 | return ll 111 | -------------------------------------------------------------------------------- /lda/datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals # noqa 2 | 3 | import os 4 | 5 | import lda.utils 6 | 7 | 8 | _test_dir = os.path.join(os.path.dirname(__file__), 'tests') 9 | 10 | 11 | def load_reuters(): 12 | reuters_ldac_fn = os.path.join(_test_dir, 'reuters.ldac') 13 | return lda.utils.ldac2dtm(open(reuters_ldac_fn), offset=0) 14 | 15 | 16 | def load_reuters_vocab(): 17 | reuters_vocab_fn = os.path.join(_test_dir, 'reuters.tokens') 18 | with open(reuters_vocab_fn) as f: 19 | vocab = tuple(f.read().split()) 20 | return vocab 21 | 22 | 23 | def load_reuters_titles(): 24 | reuters_titles_fn = os.path.join(_test_dir, 'reuters.titles') 25 | with open(reuters_titles_fn) as f: 26 | titles = tuple(line.strip() for line in f.readlines()) 27 | return titles 28 | -------------------------------------------------------------------------------- /lda/gamma.c: -------------------------------------------------------------------------------- 1 | /* 2 | * John D. Cook's public domain version of lgamma, from 3 | * http://www.johndcook.com/stand_alone_code.html 4 | * 5 | * Replaces the C99 standard lgamma for stone-age C compilers like the one 6 | * from Redmond. 7 | * 8 | * I removed the test cases and added the cfloat import (Vlad N. ) 9 | * 10 | * Translated to C by Lars Buitinck. Input validation removed; we handle 11 | * that in the Cython wrapper. 12 | * 13 | * sklearn_ prefix removed to avoid confusing lda readers by Allen B. Riddell. 14 | */ 15 | 16 | #include 17 | #include 18 | 19 | #include "gamma.h" 20 | 21 | /* Euler's gamma constant. */ 22 | #define GAMMA 0.577215664901532860606512090 23 | 24 | #define HALF_LOG2_PI 0.91893853320467274178032973640562 25 | 26 | 27 | static double lda_gamma(double x) 28 | { 29 | /* 30 | * Split the function domain into three intervals: 31 | * (0, 0.001), [0.001, 12), and (12, infinity). 32 | */ 33 | 34 | /* 35 | * First interval: (0, 0.001). 36 | * 37 | * For small x, 1/Gamma(x) has power series x + gamma x^2 - ... 38 | * So in this range, 1/Gamma(x) = x + gamma x^2 with error 39 | * on the order of x^3. 40 | * The relative error over this interval is less than 6e-7. 41 | */ 42 | if (x < 0.001) 43 | return 1.0 / (x * (1.0 + GAMMA * x)); 44 | 45 | /* 46 | * Second interval: [0.001, 12). 47 | */ 48 | if (x < 12.0) { 49 | /* numerator coefficients for approximation over the interval (1,2) */ 50 | static const double p[] = { 51 | -1.71618513886549492533811E+0, 52 | 2.47656508055759199108314E+1, 53 | -3.79804256470945635097577E+2, 54 | 6.29331155312818442661052E+2, 55 | 8.66966202790413211295064E+2, 56 | -3.14512729688483675254357E+4, 57 | -3.61444134186911729807069E+4, 58 | 6.64561438202405440627855E+4 59 | }; 60 | 61 | /* denominator coefficients for approximation over the interval (1,2) */ 62 | static const double q[] = { 63 | -3.08402300119738975254353E+1, 64 | 3.15350626979604161529144E+2, 65 | -1.01515636749021914166146E+3, 66 | -3.10777167157231109440444E+3, 67 | 2.25381184209801510330112E+4, 68 | 4.75584627752788110767815E+3, 69 | -1.34659959864969306392456E+5, 70 | -1.15132259675553483497211E+5 71 | }; 72 | 73 | double den, num, result, z; 74 | 75 | /* The algorithm directly approximates gamma over (1,2) and uses 76 | * reduction identities to reduce other arguments to this interval. */ 77 | double y = x; 78 | int i, n = 0; 79 | int arg_was_less_than_one = (y < 1.0); 80 | 81 | /* Add or subtract integers as necessary to bring y into (1,2) 82 | * Will correct for this below */ 83 | if (arg_was_less_than_one) 84 | y += 1.0; 85 | else { 86 | n = (int)floor(y) - 1; 87 | y -= n; 88 | } 89 | 90 | num = 0.0; 91 | den = 1.0; 92 | 93 | z = y - 1; 94 | for (i = 0; i < 8; i++) { 95 | num = (num + p[i]) * z; 96 | den = den * z + q[i]; 97 | } 98 | result = num/den + 1.0; 99 | 100 | /* Apply correction if argument was not initially in (1,2) */ 101 | if (arg_was_less_than_one) 102 | /* Use identity gamma(z) = gamma(z+1)/z 103 | * The variable "result" now holds gamma of the original y + 1 104 | * Thus we use y-1 to get back the original y. */ 105 | result /= (y-1.0); 106 | else 107 | /* Use the identity gamma(z+n) = z*(z+1)* ... *(z+n-1)*gamma(z) */ 108 | for (i = 0; i < n; i++, y++) 109 | result *= y; 110 | 111 | return result; 112 | } 113 | 114 | /* 115 | * Third interval: [12, infinity). 116 | */ 117 | if (x > 171.624) 118 | /* Correct answer too large to display, force +infinity. */ 119 | return 2 * DBL_MAX; 120 | return exp(lda_lgamma(x)); 121 | } 122 | 123 | 124 | double lda_lgamma(double x) 125 | { 126 | /* 127 | * Abramowitz and Stegun 6.1.41 128 | * Asymptotic series should be good to at least 11 or 12 figures 129 | * For error analysis, see Whittiker and Watson 130 | * A Course in Modern Analysis (1927), page 252 131 | */ 132 | static const double c[8] = 133 | { 134 | 1.0/12.0, 135 | -1.0/360.0, 136 | 1.0/1260.0, 137 | -1.0/1680.0, 138 | 1.0/1188.0, 139 | -691.0/360360.0, 140 | 1.0/156.0, 141 | -3617.0/122400.0 142 | }; 143 | 144 | double z, sum; 145 | int i; 146 | 147 | if (x < 12.0) 148 | return log(fabs(lda_gamma(x))); 149 | 150 | z = 1.0 / (x * x); 151 | sum = c[7]; 152 | for (i=6; i >= 0; i--) { 153 | sum *= z; 154 | sum += c[i]; 155 | } 156 | 157 | return (x - 0.5) * log(x) - x + HALF_LOG2_PI + sum / x; 158 | } 159 | 160 | -------------------------------------------------------------------------------- /lda/gamma.h: -------------------------------------------------------------------------------- 1 | #ifndef LDA_GAMMA_H_ 2 | #define LDA_GAMMA_H_ 3 | 4 | double lda_lgamma(double); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /lda/lda.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Latent Dirichlet allocation using collapsed Gibbs sampling""" 3 | 4 | from __future__ import absolute_import, division, unicode_literals # noqa 5 | import logging 6 | import sys 7 | 8 | import numpy as np 9 | 10 | import lda._lda 11 | import lda.utils 12 | 13 | logger = logging.getLogger('lda') 14 | 15 | PY2 = sys.version_info[0] == 2 16 | if PY2: 17 | range = xrange # noqa 18 | 19 | 20 | class LDA: 21 | """Latent Dirichlet allocation using collapsed Gibbs sampling 22 | 23 | Parameters 24 | ---------- 25 | n_topics : int 26 | Number of topics 27 | 28 | n_iter : int, default 2000 29 | Number of sampling iterations 30 | 31 | alpha : float, default 0.1 32 | Dirichlet parameter for distribution over topics 33 | 34 | eta : float, default 0.01 35 | Dirichlet parameter for distribution over words 36 | 37 | random_state : int or RandomState, optional 38 | The generator used for the initial topics. 39 | 40 | Attributes 41 | ---------- 42 | `components_` : array, shape = [n_topics, n_features] 43 | Point estimate of the topic-word distributions (Phi in literature) 44 | `topic_word_` : 45 | Alias for `components_` 46 | `nzw_` : array, shape = [n_topics, n_features] 47 | Matrix of counts recording topic-word assignments in final iteration. 48 | `ndz_` : array, shape = [n_samples, n_topics] 49 | Matrix of counts recording document-topic assignments in final iteration. 50 | `doc_topic_` : array, shape = [n_samples, n_features] 51 | Point estimate of the document-topic distributions (Theta in literature) 52 | `nz_` : array, shape = [n_topics] 53 | Array of topic assignment counts in final iteration. 54 | 55 | Examples 56 | -------- 57 | >>> import numpy 58 | >>> X = numpy.array([[1,1], [2, 1], [3, 1], [4, 1], [5, 8], [6, 1]]) 59 | >>> import lda 60 | >>> model = lda.LDA(n_topics=2, random_state=0, n_iter=100) 61 | >>> model.fit(X) #doctest: +ELLIPSIS +NORMALIZE_WHITESPACE 62 | LDA(alpha=... 63 | >>> model.components_ 64 | array([[ 0.85714286, 0.14285714], 65 | [ 0.45 , 0.55 ]]) 66 | >>> model.loglikelihood() #doctest: +ELLIPSIS 67 | -40.395... 68 | 69 | References 70 | ---------- 71 | Blei, David M., Andrew Y. Ng, and Michael I. Jordan. "Latent Dirichlet 72 | Allocation." Journal of Machine Learning Research 3 (2003): 993–1022. 73 | 74 | Griffiths, Thomas L., and Mark Steyvers. "Finding Scientific Topics." 75 | Proceedings of the National Academy of Sciences 101 (2004): 5228–5235. 76 | doi:10.1073/pnas.0307752101. 77 | 78 | Wallach, Hanna, David Mimno, and Andrew McCallum. "Rethinking LDA: Why 79 | Priors Matter." In Advances in Neural Information Processing Systems 22, 80 | edited by Y. Bengio, D. Schuurmans, J. Lafferty, C. K. I. Williams, and A. 81 | Culotta, 1973–1981, 2009. 82 | 83 | Wallach, Hanna M., Iain Murray, Ruslan Salakhutdinov, and David Mimno. 2009. 84 | “Evaluation Methods for Topic Models.” In Proceedings of the 26th Annual 85 | International Conference on Machine Learning, 1105–1112. ICML ’09. New York, 86 | NY, USA: ACM. https://doi.org/10.1145/1553374.1553515. 87 | 88 | Buntine, Wray. "Estimating Likelihoods for Topic Models." In Advances in 89 | Machine Learning, First Asian Conference on Machine Learning (2009): 51–64. 90 | doi:10.1007/978-3-642-05224-8_6. 91 | 92 | """ 93 | 94 | def __init__(self, n_topics, n_iter=2000, alpha=0.1, eta=0.01, random_state=None, 95 | refresh=10): 96 | self.n_topics = n_topics 97 | self.n_iter = n_iter 98 | self.alpha = alpha 99 | self.eta = eta 100 | # if random_state is None, check_random_state(None) does nothing 101 | # other than return the current numpy RandomState 102 | self.random_state = random_state 103 | self.refresh = refresh 104 | 105 | if alpha <= 0 or eta <= 0: 106 | raise ValueError("alpha and eta must be greater than zero") 107 | 108 | # random numbers that are reused 109 | rng = lda.utils.check_random_state(random_state) 110 | self._rands = rng.rand(1024**2 // 8) # 1MiB of random variates 111 | 112 | # configure console logging if not already configured 113 | if len(logger.handlers) == 1 and isinstance(logger.handlers[0], logging.NullHandler): 114 | logging.basicConfig(level=logging.INFO) 115 | 116 | def fit(self, X, y=None): 117 | """Fit the model with X. 118 | 119 | Parameters 120 | ---------- 121 | X: array-like, shape (n_samples, n_features) 122 | Training data, where n_samples in the number of samples 123 | and n_features is the number of features. Sparse matrix allowed. 124 | 125 | Returns 126 | ------- 127 | self : object 128 | Returns the instance itself. 129 | """ 130 | self._fit(X) 131 | return self 132 | 133 | def fit_transform(self, X, y=None): 134 | """Apply dimensionality reduction on X 135 | 136 | Parameters 137 | ---------- 138 | X : array-like, shape (n_samples, n_features) 139 | New data, where n_samples in the number of samples 140 | and n_features is the number of features. Sparse matrix allowed. 141 | 142 | Returns 143 | ------- 144 | doc_topic : array-like, shape (n_samples, n_topics) 145 | Point estimate of the document-topic distributions 146 | 147 | """ 148 | if isinstance(X, np.ndarray): 149 | # in case user passes a (non-sparse) array of shape (n_features,) 150 | # turn it into an array of shape (1, n_features) 151 | X = np.atleast_2d(X) 152 | self._fit(X) 153 | return self.doc_topic_ 154 | 155 | def transform(self, X, max_iter=20, tol=1e-16): 156 | """Transform the data X according to previously fitted model 157 | 158 | Parameters 159 | ---------- 160 | X : array-like, shape (n_samples, n_features) 161 | New data, where n_samples in the number of samples 162 | and n_features is the number of features. 163 | max_iter : int, optional 164 | Maximum number of iterations in iterated-pseudocount estimation. 165 | tol: double, optional 166 | Tolerance value used in stopping condition. 167 | 168 | Returns 169 | ------- 170 | doc_topic : array-like, shape (n_samples, n_topics) 171 | Point estimate of the document-topic distributions 172 | 173 | Note 174 | ---- 175 | To calculate an approximation of the distribution over topics for each 176 | new document this function uses the "iterated pseudo-counts" approach 177 | described in Wallach, Murray, Salakhutdinov, and Mimno (2009) and 178 | justified in greater detail in Buntine (2009). Specifically, we 179 | implement the "simpler first order version" described in section 3.3 of 180 | Buntine (2009). 181 | 182 | """ 183 | if isinstance(X, np.ndarray): 184 | # in case user passes a (non-sparse) array of shape (n_features,) 185 | # turn it into an array of shape (1, n_features) 186 | X = np.atleast_2d(X) 187 | doc_topic = np.empty((X.shape[0], self.n_topics)) 188 | WS, DS = lda.utils.matrix_to_lists(X) 189 | # TODO: this loop is parallelizable 190 | for d in np.unique(DS): 191 | doc_topic[d] = self._transform_single(WS[DS == d], max_iter, tol) 192 | return doc_topic 193 | 194 | def _transform_single(self, doc, max_iter, tol): 195 | """Transform a single document according to the previously fit model 196 | 197 | Parameters 198 | ---------- 199 | X : 1D numpy array of integers 200 | Each element represents a word in the document 201 | max_iter : int 202 | Maximum number of iterations in iterated-pseudocount estimation. 203 | tol: double 204 | Tolerance value used in stopping condition. 205 | 206 | Returns 207 | ------- 208 | doc_topic : 1D numpy array of length n_topics 209 | Point estimate of the topic distributions for document 210 | 211 | Note 212 | ---- 213 | 214 | See Note in `transform` documentation. 215 | 216 | """ 217 | PZS = np.zeros((len(doc), self.n_topics)) 218 | for iteration in range(max_iter + 1): # +1 is for initialization 219 | PZS_new = self.components_[:, doc].T 220 | PZS_new *= (PZS.sum(axis=0) - PZS + self.alpha) 221 | PZS_new /= PZS_new.sum(axis=1)[:, np.newaxis] # vector to single column matrix 222 | delta_naive = np.abs(PZS_new - PZS).sum() 223 | logger.debug('transform iter {}, delta {}'.format(iteration, delta_naive)) 224 | PZS = PZS_new 225 | if delta_naive < tol: 226 | break 227 | theta_doc = PZS.sum(axis=0) / PZS.sum() 228 | assert len(theta_doc) == self.n_topics 229 | assert theta_doc.shape == (self.n_topics,) 230 | return theta_doc 231 | 232 | def _fit(self, X): 233 | """Fit the model to the data X 234 | 235 | Parameters 236 | ---------- 237 | X: array-like, shape (n_samples, n_features) 238 | Training vector, where n_samples in the number of samples and 239 | n_features is the number of features. Sparse matrix allowed. 240 | """ 241 | random_state = lda.utils.check_random_state(self.random_state) 242 | rands = self._rands.copy() 243 | self._initialize(X) 244 | for it in range(self.n_iter): 245 | # FIXME: using numpy.roll with a random shift might be faster 246 | random_state.shuffle(rands) 247 | if it % self.refresh == 0: 248 | ll = self.loglikelihood() 249 | logger.info("<{}> log likelihood: {:.0f}".format(it, ll)) 250 | # keep track of loglikelihoods for monitoring convergence 251 | self.loglikelihoods_.append(ll) 252 | self._sample_topics(rands) 253 | ll = self.loglikelihood() 254 | logger.info("<{}> log likelihood: {:.0f}".format(self.n_iter - 1, ll)) 255 | # note: numpy /= is integer division 256 | self.components_ = (self.nzw_ + self.eta).astype(float) 257 | self.components_ /= np.sum(self.components_, axis=1)[:, np.newaxis] 258 | self.topic_word_ = self.components_ 259 | self.doc_topic_ = (self.ndz_ + self.alpha).astype(float) 260 | self.doc_topic_ /= np.sum(self.doc_topic_, axis=1)[:, np.newaxis] 261 | 262 | # delete attributes no longer needed after fitting to save memory and reduce clutter 263 | del self.WS 264 | del self.DS 265 | del self.ZS 266 | return self 267 | 268 | def _initialize(self, X): 269 | D, W = X.shape 270 | N = int(X.sum()) 271 | n_topics = self.n_topics 272 | n_iter = self.n_iter 273 | logger.info("n_documents: {}".format(D)) 274 | logger.info("vocab_size: {}".format(W)) 275 | logger.info("n_words: {}".format(N)) 276 | logger.info("n_topics: {}".format(n_topics)) 277 | logger.info("n_iter: {}".format(n_iter)) 278 | 279 | self.nzw_ = nzw_ = np.zeros((n_topics, W), dtype=np.intc, order="F") 280 | self.ndz_ = ndz_ = np.zeros((D, n_topics), dtype=np.intc, order="C") 281 | self.nz_ = nz_ = np.zeros(n_topics, dtype=np.intc) 282 | 283 | self.WS, self.DS = WS, DS = lda.utils.matrix_to_lists(X) 284 | self.ZS = ZS = np.empty_like(self.WS, dtype=np.intc) 285 | np.testing.assert_equal(N, len(WS)) 286 | for i in range(N): 287 | w, d = WS[i], DS[i] 288 | z_new = i % n_topics 289 | ZS[i] = z_new 290 | ndz_[d, z_new] += 1 291 | nzw_[z_new, w] += 1 292 | nz_[z_new] += 1 293 | self.loglikelihoods_ = [] 294 | 295 | def loglikelihood(self): 296 | """Calculate complete log likelihood, log p(w,z) 297 | 298 | Formula used is log p(w,z) = log p(w|z) + log p(z) 299 | """ 300 | nzw, ndz, nz = self.nzw_, self.ndz_, self.nz_ 301 | alpha = self.alpha 302 | eta = self.eta 303 | nd = np.sum(ndz, axis=1).astype(np.intc) 304 | return lda._lda._loglikelihood(nzw, ndz, nz, nd, alpha, eta) 305 | 306 | def _sample_topics(self, rands): 307 | """Samples all topic assignments. Called once per iteration.""" 308 | n_topics, vocab_size = self.nzw_.shape 309 | alpha = np.repeat(self.alpha, n_topics).astype(np.float64) 310 | eta = np.repeat(self.eta, vocab_size).astype(np.float64) 311 | lda._lda._sample_topics(self.WS, self.DS, self.ZS, self.nzw_, self.ndz_, self.nz_, 312 | alpha, eta, rands) 313 | -------------------------------------------------------------------------------- /lda/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lda-project/lda/c63e8d7709a3271d383be3d80cb15cd53f9a6451/lda/tests/__init__.py -------------------------------------------------------------------------------- /lda/tests/reuters.titles: -------------------------------------------------------------------------------- 1 | 0 UK: Prince Charles spearheads British royal revolution. LONDON 1996-08-20 2 | 1 GERMANY: Historic Dresden church rising from WW2 ashes. DRESDEN, Germany 1996-08-21 3 | 2 INDIA: Mother Teresa's condition said still unstable. CALCUTTA 1996-08-23 4 | 3 UK: Palace warns British weekly over Charles pictures. LONDON 1996-08-25 5 | 4 INDIA: Mother Teresa, slightly stronger, blesses nuns. CALCUTTA 1996-08-25 6 | 5 INDIA: Mother Teresa's condition unchanged, thousands pray. CALCUTTA 1996-08-25 7 | 6 INDIA: Mother Teresa shows signs of strength, blesses nuns. CALCUTTA 1996-08-26 8 | 7 INDIA: Mother Teresa's condition improves, many pray. CALCUTTA, India 1996-08-25 9 | 8 INDIA: Mother Teresa improves, nuns pray for "miracle". CALCUTTA 1996-08-26 10 | 9 UK: Charles under fire over prospect of Queen Camilla. LONDON 1996-08-26 11 | 10 UK: Britain tells Charles to forget Camilla. LONDON 1996-08-27 12 | 11 COTE D'IVOIRE: FEATURE - Quiet homecoming for reprieved Ivory Coast maid. ABIDJAN 1996-08-28 13 | 12 INDIA: Mother Teresa ("I want to go home") sits and prays. CALCUTTA 1996-08-28 14 | 13 INDIA: Mother Teresa nears end of crisis, nuns rejoice. CALCUTTA 1996-08-28 15 | 14 UK: Prosaic end for marriage of Charles and Diana. LONDON 1996-08-28 16 | 15 UK: No respite for British royals despite divorce. LONDON 1996-08-28 17 | 16 UK: Camilla, love of Charles' life, an unlikely queen. LONDON 1996-08-28 18 | 17 UK: Diana sets out on new life as single woman. LONDON 1996-08-28 19 | 18 USA: O.J. Simpson attacks media, hints at lawsuits. WASHINGTON 1996-08-28 20 | 19 USA: U.S. Cardinal Bernardin has one year or less to live. CHICAGO 1996-08-30 21 | 20 USA: U.S. Cardinal Bernardin says has terminal cancer. CHICAGO 1996-08-30 22 | 21 ROMANIA: German architect wins Bucharest rebuilding prize. BUCHAREST 1996-09-02 23 | 22 ARGENTINA: Argentina's "Blond Angel" finally quits Navy. BUENOS AIRES, Argentina 1996-09-02 24 | 23 UK: Disney lights up Pocahontas resting place. GRAVESEND, England 1996-09-06 25 | 24 HUNGARY: POPE LEAVES HUNGARY AFTER DEMANDING TWO-DAY VISIT. BUDAPEST 1996-09-07 26 | 25 HUNGARY: Pope says mass in Hungary, health in spotlight. GYOR, Hungary 1996-09-07 27 | 26 UK: Prince Charles' love will not wed him, paper says. LONDON 1996-09-09 28 | 27 UK: Ex-archbishop calls Princess Diana actress-schemer. LONDON 1996-09-09 29 | 28 USA: Politics discussed backstage at Emmys, no airplay. PASADENA, Calif 1996-09-08 30 | 29 UK: Diana angered by ex-archbishop's "schemer" jibe. LONDON 1996-09-11 31 | 30 UK: Queen Elizabeth to hold "royal summit" - report. LONDON 1996-09-10 32 | 31 BRAZIL: Nobel Prize winner sides with Brazil's landless. BRASILIA 1996-09-10 33 | 32 VATICAN: FEATURE - Papal health in spotlight amid talk of Parkinson's. VATICAN CITY 1996-09-13 34 | 33 UK: Cool it with Camilla, Major tells Charles - paper. LONDON 1996-09-14 35 | 34 ROMANIA: Romania "royalty" wedding draws business barons. BUCHAREST 1996-09-15 36 | 35 UK: British royal family holds meeting on future. LONDON 1996-09-15 37 | 36 UK: Scandal-battered British royals debate future role. LONDON 1996-09-16 38 | 37 SOUTH AFRICA: It's wonderful I've found Nelson, says new love. JOHANNESBURG 1996-09-17 39 | 38 CHILE: Chilean economist Felipe Herrera dies at 74. SANTIAGO 1996-09-18 40 | 39 GREECE: Princess Diana joins in funeral on Greek island. LIMNI, Greece 1996-09-18 41 | 40 USA: U.S. House of Representatives honors Mother Teresa. WASHINGTON 1996-09-17 42 | 41 FRANCE: Jewish-born cardinal is Pope's key man in France. PARIS 1996-09-18 43 | 42 VATICAN: Pope's health will figure large on trip to France. VATICAN CITY 1996-09-18 44 | 43 PHILIPPINES: FEATURE - Ex-actor eyes Philippine presidency. [CORRECTED 03:43 GMT] MANILA 1996-09-18 45 | 44 UK: Runaway bishop had child, Catholic church admits. LONDON 1996-09-19 46 | 45 FRANCE: Pope skirts religious controversy in France. TOURS, France 1996-09-19 47 | 46 FRANCE: Pope visits western France religious stronghold. SAINTE-ANNE D'AURAY, France 1996-09-20 48 | 47 UK: "I still love him" says runaway bishop's mistress. LONDON 1996-09-21 49 | 48 UK: Church appeals to runaway Scottish bishop. LONDON 1996-09-21 50 | 49 SWITZERLAND: Romanian ex-king's daughter marries. LAUSANNE 1996-09-21 51 | 50 SWITZERLAND: ROMANIAN EX-KING'S DAUGHTER MARRIES. LAUSANNE 1996-09-21 52 | 51 UK: Errant Catholic bishop wants to marry divorcee. LONDON 1996-09-22 53 | 52 FRANCE: Pope speaks of death with "a smile"-Cardinal. REIMS, France 1996-09-22 54 | 53 USA: Actress Dorothy Lamour dead at 81. LOS ANGELES 1996-09-23 55 | 54 USA: South Georgia resort kept Kennedy wedding secret. CUMBERLAND ISLAND, Ga 1996-09-23 56 | 55 FRANCE: FEATURE - Bardot tells all - for the love of animals. PARIS 1996-09-24 57 | 56 FRANCE: Bardot tells all - for the love of animals. [CORRECTED 09:05 GMT] PARIS 1996-09-24 58 | 57 AUSTRALIA: Quiet Buddhist farewell for mercy death Australian. DARWIN, Australia 1996-09-27 59 | 58 AUSTRALIA: Australia mercy death man joked until the end. DARWIN, Australia 1996-09-27 60 | 59 BELGIUM: Famed Belgian battlefield site makes its mark. WATERLOO, Belgium 1996-09-28 61 | 60 REPUBLIC OF IRELAND: Ireland's Cardinal Daly steps down at 79. DUBLIN 1996-10-01 62 | 61 CANADA: Former Quebec premier fought for Canadian unity. QUEBEC CITY 1996-10-02 63 | 62 GERMANY: Feminist Hite seeks European parliament platform. FRANKFURT 1996-10-04 64 | 63 UK: Prince Charles aide out after gaffes over Camilla. LONDON 1996-10-05 65 | 64 GERMANY: Vargas Llosa urges sanctions on rights abuses. FRANKFURT 1996-10-06 66 | 65 VATICAN: Tired Pope calls for prayers before operation. VATICAN CITY 1996-10-06 67 | 66 ITALY: Pope ready for appendix surgery. ROME 1996-10-08 68 | 67 FRANCE: FEATURE - "Who's Who" sounds requiem for French intellectuals. PARIS 1996-10-07 69 | 68 USA: Clinton wishes Pope "speedy recovery". WASHINGTON 1996-10-08 70 | 69 ITALY: "Textbook" papal appendix surgery excludes tumour. [CORRECTED 16:45 GMT] ROME 1996-10-08 71 | 70 ITALY: Pope gets clean bill of health after appendectomy. ROME 1996-10-09 72 | 71 UK: Runaway UK bishop to marry, apologises to church. KENDAL, England 1996-10-08 73 | 72 ITALY: Pope's surgery a success, doctors say no tumour. ROME 1996-10-08 74 | 73 UK: Runaway Scottish bishop to marry lover. GLASGOW, Scotland 1996-10-08 75 | 74 ITALY: Pope has appendix removed, no problems. ROME 1996-10-08 76 | 75 ITALY: Tranquil Pope has appendix surgery. ROME 1996-10-08 77 | 76 ITALY: Pope's operation starts - hospital sources. ROME 1996-10-08 78 | 77 SOUTH KOREA: "King of Pop" Michael Jackson arrives in Seoul. SEOUL 1996-10-09 79 | 78 SOUTH KOREA: Jackson takes to stage in Seoul despite protests. SEOUL 1996-10-11 80 | 79 INDONESIA: Nobel peace awards put East Timor in spotlight. JAKARTA 1996-10-11 81 | 80 NORWAY: Nobel peace award wins praise outside indonesia. OSLO 1996-10-11 82 | 81 VATICAN: Bishop's Nobel prize sweet satisfaction for Pope. VATICAN CITY 1996-10-11 83 | 82 NORWAY: Winners of Nobel Peace Prize since 1970. OSLO 1996-10-11 84 | 83 SOUTH AFRICA: FEATURE - Germans cling to little oasis in rural South Africa. HERMANNSBURG, South Africa 1996-10-13 85 | 84 GERMANY: German publisher of Stern magazine dies. HAMBURG, Germany 1996-10-13 86 | 85 ITALY: Joking Pope appears in public after surgery. ROME 1996-10-13 87 | 86 INDONESIA: Nobel laureate invited to gathering for Suharto. JAKARTA 1996-10-14 88 | 87 VATICAN: After appendectomy, Papal trembling in spotlight. VATICAN CITY 1996-10-14 89 | 88 USA: Material Girl Madonna becomes a mother. LOS ANGELES 1996-10-14 90 | 89 USA: Madonna as mother - the next chapter. LOS ANGELES 1996-10-14 91 | 90 REPUBLIC OF IRELAND: Irish aid agency pulls out of concert over singer. DUBLIN 1996-10-15 92 | 91 UK: British writer seeks reward on Nazi gold-raid book. LONDON 1996-10-18 93 | 92 UK: Mahatma Ghandi's letters go on sale in London. LONDON 1996-10-21 94 | 93 UK: Mahatma Gandhi's letters go on sale in London. LONDON 1996-10-21 95 | 94 USA: FEATURE-Haitian voodoo - more than dolls and zombies. MIAMI 1996-10-22 96 | 95 VATICAN: Pope to celebrate public mass on Sunday. VATICAN CITY 1996-10-22 97 | 96 VATICAN: Pope greets pilgrims, resumes full duties Sunday. VATICAN CITY 1996-10-23 98 | 97 SPAIN: Spanish bishops attack minister's lavish wedding. MADRID 1996-10-23 99 | 98 INDIA: Former Gandhi aide has second thoughts on auction. MADRAS, India 1996-10-24 100 | 99 UK: ENGLISH PASTOR DEFENDS MEMORIAL SERVICE FOR MONKEY. LONDON 1996-10-25 101 | 100 VATICAN: Pope gradually resumes activity after operation. VATICAN CITY 1996-10-27 102 | 101 RUSSIA: Russia presents archives to U.S. Holocaust museum. MOSCOW 1996-10-28 103 | 102 USA: Municipal industry mourns James Augenthaler. NEW YORK 1996-10-28 104 | 103 UK: U.S. actor berates archbishop over smacking. LONDON 1996-10-28 105 | 104 USA: Author who denied Japanese-Americans interned dies. LOS ANGELES 1996-10-29 106 | 105 VATICAN: Pope to publish brief memoirs. VATICAN CITY 1996-10-31 107 | 106 VATICAN: Pope marks 50 years as priest, announces memoirs. VATICAN CITY 1996-11-01 108 | 107 VATICAN: FEATURE - Pope marks 50th anniversary of priesthood. VATICAN CITY 1996-11-01 109 | 108 ROMANIA: Romanians enjoy colourful choice of candidates. BUCHAREST 1996-11-03 110 | 109 RUSSIA: Veil of secrecy thickens as Yeltsin surgery nears. MOSCOW 1996-11-04 111 | 110 RUSSIA: Yeltsin in good form as heart operation nears. MOSCOW 1996-11-04 112 | 111 RUSSIA: Yeltsin op looks imminent, security tightened. MOSCOW 1996-11-05 113 | 112 RUSSIA: FEATURE - Provincial museum fights to stay open in Russia. KOSTROMA, Russia 1996-11-04 114 | 113 ARGENTINA: FEATURE - Erstwhile angel Cavallo bedevils government. BUENOS AIRES 1996-11-04 115 | 114 RUSSIA: YELTSIN COMES THROUGH SEVEN-HOUR HEART OPERATION. MOSCOW 1996-11-05 116 | 115 RUSSIA: Russia on edge as Yeltsin surgery goes to plan. MOSCOW 1996-11-05 117 | 116 RUSSIA: Yeltsin comes through seven-hour heart operation. MOSCOW 1996-11-05 118 | 117 ITALY: Real-life Don Camillo won't toll bells for red. ROME 1996-11-06 119 | 118 VATICAN: Pope celebrates priesthood, to resume all activity. VATICAN CITY 1996-11-10 120 | 119 USA: Chicago's Cardinal Bernardin reported near death. CHICAGO 1996-11-13 121 | 120 USA: Church, lay leaders mourn Chicago cardinal. CHICAGO 1996-11-14 122 | 121 USA: Bishops recall Bernardin's humble leadership. WASHINGTON 1996-11-14 123 | 122 USA: Cardinal led Chicago archdiocese for 14 years. CHICAGO 1996-11-14 124 | 123 USA: Chicago's Cardinal Bernardin dead at age 68. CHICAGO 1996-11-14 125 | 124 VATICAN: Pope saddened over Bernardin death. VATICAN CITY 1996-11-14 126 | 125 CROATIA: Croatian doctor says Tudjman "feeling excellent". ZAGREB 1996-11-16 127 | 126 INDONESIA: Norway issues Timorese Nobel laureate a visa. JAKARTA 1996-11-17 128 | 127 INDIA: Mother Teresa becomes honorary American citizen. NEW DELHI 1996-11-16 129 | 128 VATICAN: Pope warms up for Castro by meeting Gorbachev. VATICAN CITY 1996-11-18 130 | 129 USA: Final services for Cardinal Bernardin of Chicago. CHICAGO 1996-11-20 131 | 130 UK: India saves Gandhi papers from jaws of commerce. LONDON 1996-11-22 132 | 131 USA: Simpson's life story defies imagination. SANTA MONICA, Calif. 1996-11-22 133 | 132 INDIA: Mother Teresa "comfortable" after heart failure. CALCUTTA 1996-11-22 134 | 133 INDIA: Mother Teresa in hospital with heart problem. CALCUTTA 1996-11-22 135 | 134 FRANCE: France pays tribute to writer Malraux on Saturday. PARIS 1996-11-22 136 | 135 USA: S. Carolina Klan museum claims to teach history. LAURENS, S.C. 1996-11-24 137 | 136 USA: Ex-hostage Waite still keeps symbolic ticket. NEW YORK 1996-11-26 138 | 137 UK: Cluedo's final mystery solved at inventor's grave. LONDON 1996-11-27 139 | 138 MOLDOVA: Moldova's new leader has Moscow past, western goals. CHISINAU 1996-12-02 140 | 139 VATICAN: Belgian cardinal Hamer dies. VATICAN CITY 1996-12-02 141 | 140 UK: Papers show Duke of Windsor was royal loose cannon. LONDON 1996-12-03 142 | 141 FRANCE: Acclaimed French historian Georges Duby dies at 77. PARIS 1996-12-03 143 | 142 CENTRAL AFRICAN REPUBLIC: State burial for African emperor Bokassa cancelled. BANGUI 1996-12-04 144 | 143 INDIA: Mother Teresa seeks foster homes for 4,000 children. CALCUTTA 1996-12-05 145 | 144 USA: Abortion foe Salvi buried, victim attends funeral. IPSWICH, Mass 1996-12-04 146 | 145 ITALY: Nobel laureate Belo says peace will be his message. ROME 1996-12-06 147 | 146 VIETNAM: Hanoi declares dissident temple a national site. HANOI 1996-12-06 148 | 147 GREECE: SALONIKA TO SHOW OFF BYZANTINE LEGACY IN 1997. SALONIKA, Greece 1996-12-08 149 | 148 GREECE: SALONIKA TO SHOW OFF BYZANTINE LEGACY IN 1997. SALONIKA, Greece 1996-12-09 150 | 149 RUSSIA: Cash-strapped Kremlin to start charging visitors. MOSCOW 1996-12-10 151 | 150 VATICAN: Ageing Pope to skip Christmas Day mass this year. VATICAN CITY 1996-12-10 152 | 151 NORWAY: East Timor activist shares peace prize. OSLO 1996-12-10 153 | 152 UK: FEATURE - Duke's reputation sinks 60 years after abdication. LONDON 1996-12-11 154 | 153 SOUTH AFRICA: S.Africa's Boesak charged with theft of aid funds. CAPE TOWN 1996-12-13 155 | 154 ITALY: Italy's Dossetti, political figure, dies. BOLOGNA, Italy 1996-12-15 156 | 155 GREECE: FEATURE - Salonika to show off Byzantine legacy in 1997. SALONIKA, Greece 1996-12-15 157 | 156 NETHERLANDS: Hermitage treasures on show in Amsterdam. AMSTERDAM 1996-12-16 158 | 157 RUSSIA: Russian Orthodox leader taken to hospital. MOSCOW 1996-12-18 159 | 158 FRANCE: Bardot denies racism charge in Paris court. PARIS 1996-12-19 160 | 159 FRANCE: Paris bids emotional farewell to Mastroianni. PARIS 1996-12-20 161 | 160 ITALY: Italy says "Ciao Marcello," thanks for sweet life. ROME 1996-12-20 162 | 161 PERU: Peru rebel chief is angry, violent revolutionary. LIMA 1996-12-22 163 | 162 CUBA: Santa Claus skips Cuba, at least officially. HAVANA 1996-12-23 164 | 163 ISRAEL: Israel takes shine off Christmas in Bethlehem. BETHLEHEM, West Bank 1996-12-23 165 | 164 RUSSIA: FEATURE - Moscow to celebrate 850th birthday in grand style. MOSCOW 1996-12-25 166 | 165 UK: Fred West film plan angers British MPs, relatives. LONDON 1996-12-31 167 | 166 INDIA: Israel president shrugs off illness. NEW DELHI 1996-12-31 168 | 167 UK: Britain to review law after Fred West film plan. LONDON 1997-01-01 169 | 168 UK: Britain to review law after Fred West film plan. LONDON 1997-01-01 170 | 169 USA: Mother Teresa "miracle" bun takes to the Internet. NASHVILLE, Tenn. 1997-01-03 171 | 170 SWEDEN: Sweden's honourable Prince Bertil dies, aged 84. STOCKHOLM 1997-01-05 172 | 171 SWEDEN: Sweden's Prince Bertil dies. STOCKHOLM 1997-01-05 173 | 172 RUSSIA: Kremlin slams report on return of Tsar's heir. MOSCOW 1997-01-06 174 | 173 RUSSIA: Yeltsin spends Russian Christmas in bed with cold. MOSCOW 1997-01-07 175 | 174 SOUTH KOREA: Under plastic awning, veteran newsman leads strikes. SEOUL 1997-01-07 176 | 175 RUSSIA: FEATURE - Russian historian seeks real Stalin behind myth. MOSCOW 1997-01-09 177 | 176 GERMANY: Hollywood stars blast Germany over Scientologists. BONN 1997-01-09 178 | 177 Germany: Kohl dismisses Hollywood stars' rebuke of Germany. BONN 1997-01-09 179 | 178 UK: Buckingham Palace to campaign for Charles as king. LONDON 1997-01-09 180 | 179 CANADA: FEATURE-Canada's Ben Heppner -- the Fourth Tenor?. OTTAWA 1997-01-10 181 | 180 GERMANY: Germany rebuffs stars' accusations over Scientology. BONN 1997-01-10 182 | 181 RUSSIA: FEATURE - Russian artists say forging evidence is kids' stuff. MOSCOW 1997-01-12 183 | 182 USA: Houston moves to rein in topless nightclubs. HOUSTON 1997-01-12 184 | 183 SWEDEN: Sweden, European royalty mourn Prince Bertil. STOCKHOLM 1997-01-13 185 | 184 GERMANY: German Jewish leader slams Scientology letter. FRANKFURT 1997-01-13 186 | 185 PHILIPPINES: FEATURE - Festivals bring buried Philippine towns back to life. ANGELES CITY, Philippines 1997-01-13 187 | 186 SWEDEN: European royalty join Sweden to mourn Prince Bertil. STOCKHOLM 1997-01-13 188 | 187 AUSTRALIA: RTRS-TIMELINES-Today in History - Jan 15. 189 | 188 DENMARK: Danish queen mourns, marks 25 years as monarch. COPENHAGEN 1997-01-14 190 | 189 SOUTH AFRICA: S.Africa's moral conscience, Tutu, battles cancer. JOHANNESBURG 1997-01-17 191 | 190 USA: Debate over King legacy dampens Atlanta festivities. ATLANTA 1997-01-19 192 | 191 USA: Church marks legacy of Martin Luther King Jr.. ATLANTA 1997-01-20 193 | 192 USA: U.S. Sen. Tsongas to be buried on Thursday. LOWELL, Mass 1997-01-19 194 | 193 USA: Friends line up to pay respects to Tsongas. LOWELL, Mass. 1997-01-21 195 | 194 USA: John Phillips dies, ex-First Boston muni official. NEW YORK 1997-01-22 196 | 195 FRANCE: France's Bardot cleared of racist slur. PARIS 1997-01-23 197 | 196 LIBERIA: Liberia's Taylor remarries, offers toast for peace. GBANGA, Liberia 1997-01-28 198 | 197 TAIWAN: Taiwan president soul-searches with Hollywood hunk. TAIPEI 1997-01-29 199 | 198 COLOMBIA: Gays beat Church ban, parade in Colombian carnival. BOGOTA 1997-02-02 200 | 199 FRANCE: U.S. ambassador to France has brain haemorrhage. PARIS 1997-02-04 201 | 200 GERMANY: Berlin festival quells Scientology boycott fears. BERLIN 1997-02-04 202 | 201 USA: U.S. envoy to France seriously ill. WASHINGTON 1997-02-03 203 | 202 USA: U.S. envoy to France said to be near death. WASHINGTON 1997-02-03 204 | 203 FRANCE: U.S. Paris envoy Harriman in serious condition. PARIS 1997-02-04 205 | 204 FRANCE: U.S. ambassador to France has brain haemorrhage. PARIS 1997-02-04 206 | 205 FRANCE: U.S. ambassador to France ill in hospital. PARIS 1997-02-04 207 | 206 USA: Pamela Harriman eulogized by Clinton, Democrats. WASHINGTON 1997-02-05 208 | 207 USA: Clinton, Democrats laud Pamela Harriman. WASHINGTON 1997-02-05 209 | 208 FRANCE: U.S. envoy Pamela Harriman dies in Paris. PARIS 1997-02-05 210 | 209 FRANCE: Pamela Harriman's life a mix of wealth and power. PARIS 1997-02-05 211 | 210 USA: Clinton lauds Harriman as gifted public servant. WASHINGTON 1997-02-05 212 | 211 USA: O.J. Simpson's incredible reversal of fortune. SANTA MONICA, Calif. 1997-02-04 213 | 212 FRANCE: U.S. ambassador to Paris Harriman dead at 76. PARIS 1997-02-05 214 | 213 FRANCE: Pamela Harriman, U.S. envoy to France, dead at 76. PARIS 1997-02-05 215 | 214 FRANCE: U.S. ambassador to France condition still serious. PARIS 1997-02-05 216 | 215 FRANCE: Chirac to honour late U.S. envoy Pamela Harriman. PARIS 1997-02-06 217 | 216 USA: FEATURE - Marsalis conjures soul from slavery. NEW YORK 1997-02-06 218 | 217 UK: Oasis singer Liam Gallagher set to marry next week. LONDON 1997-02-06 219 | 218 USA: Harriman's body returned to Washington. WASHINGTON 1997-02-08 220 | 219 SWITZERLAND: FEATURE - Troubleshooting Swiss diplomat keeps low profile. BERNE 1997-02-12 221 | 220 USA: Clinton recalls Harriman as elegant, indomitable. WASHINGTON 1997-02-13 222 | 221 USA: Funeral for baby allegedly killed by British nanny. BROOKLINE, Mass. 1997-02-12 223 | 222 VATICAN: Historian of Vatican policy towards Nazis dies. VATICAN CITY 1997-02-14 224 | 223 UK: UK watchdog halts ads over epileptic seizure risk. LONDON 1997-02-14 225 | 224 PHILIPPINES: Air crash follows bishop's funeral in Philippines. JOLO, Philippines 1997-02-14 226 | 225 HONG KONG: China, Taiwan, HK stocks rocked by Deng worries. HONG KONG 1997-02-18 227 | 226 UK: Albright looks back to war years on London visit. LONDON 1997-02-19 228 | 227 USA: Harriman son, estranged wife inherit - report. WASHINGTON 1997-02-18 229 | 228 FRANCE: French judges refuse to ban "Larry Flynt" posters. PARIS 1997-02-20 230 | 229 GERMANY: Bavarians outraged at exhibition on Hitler's army. MUNICH, Germany 1997-02-20 231 | 230 SWITZERLAND: Swiss bishops blast Larry Flynt film poster. ZURICH 1997-02-21 232 | 231 ROMANIA: Romania king, citizenship restored, to visit home. BUCHAREST 1996-02-24 233 | 232 CHINA: China Catholics pray for Deng as church silent. BEIJING 1997-02-23 234 | 233 ROMANIA: Romania flashy "Gypsy King" dies of heart attack. BUCHAREST 1997-02-24 235 | 234 USA: Rev. Benjamin Chavis joins Farrakhan group. CHICAGO 1997-02-24 236 | 235 INDIA: Mother Teresa succession vote soon, order says. CALCUTTA, India 1997-02-24 237 | 236 INDIA: Mother Teresa succession vote imminent, order says. CALCUTTA, India 1997-02-24 238 | 237 GERMANY: "English Patient" favourite for Berlin Golden Bear. BERLIN 1997-02-24 239 | 238 FRANCE: Berlin-winner Forman drops Larry Flynt poster. PARIS 1997-02-24 240 | 239 GERMANY: Bavarians protest against exhibit on Hitler's army. MUNICH, Germany 1997-01-24 241 | 240 GERMANY: "English Patient" tipped for Berlin festival prize. BERLIN 1997-02-24 242 | 241 ROMANIA: Romanians honour memory of Gypsy King. SIBIU, Romania 1997-02-26 243 | 242 ROMANIA: Romania to use King as envoy to boost NATO chances. BUCHAREST 1997-02-26 244 | 243 ITALY: Sicily archbishop on trial for corruption, EU fraud. PALERMO, Sicily 1997-02-26 245 | 244 VATICAN: St Peter's in Rome to get facelift for 2000. VATICAN CITY 1997-02-27 246 | 245 ROMANIA: Romania's exiled king backs reforms. BUCHAREST 1997-03-01 247 | 246 ROMANIA: Small crowds greet Romanian king, patriarch. BUCHAREST 1997-03-02 248 | 247 USA: Forbes seeks to shape U.S. agenda, could run again. NEW YORK 1997-03-04 249 | 248 INDIA: Mother Teresa successor vote soon - church official. CALCUTTA 1997-03-04 250 | 249 POLAND: Polish bishops condemn Larry Flynt film poster. WARSAW 1997-03-05 251 | 250 USA: Ex-president Reagan, wife mark 45th anniversary. LOS ANGELES 1997-03-04 252 | 251 BULGARIA: Bulgarian patriarch vows to stay on despite ruling. SOFIA 1997-03-06 253 | 252 INDIA: Nuns to elect Mother Teresa's successor any day. CALCUTTA 1997-03-06 254 | 253 UK: UK Cardinal offers cash help to women not to abort. GLASGOW, Scotland 1997-03-09 255 | 254 UK: Charles and Diana reunited for son's confirmation. LONDON 1997-03-09 256 | 255 INDIA: Mother Teresa's order grappling with succession. CALCUTTA, India 1997-03-09 257 | 256 RUSSIA: FEATURE - Russians still squabbling over tsar's bones. YEKATERINBURG, Russia 1997-03-10 258 | 257 USA: Pamela Harriman's estate to be auctioned off. NEW YORK 1997-03-11 259 | 258 USA: FEATURE-Grammy Winner Brecker owes it all to Philadelphia. PHILADELPHIA 1997-03-11 260 | 259 INDIA: Hindu Brahmin convert to succeed Mother Teresa. CALCUTTA, India 1997-03-13 261 | 260 INDIA: Shy nun emerges from Mother Teresa's shadow. CALCUTTA, India 1997-03-13 262 | 261 GERMANY: German home of 'lost Gospel' may hold other finds. BERLIN 1997-03-13 263 | 262 SPAIN: FEATURE - Spanish fiestas stir animal groups into fury. MANGANESES DE LA POLVOROSA, Spain 1997-03-13 264 | 263 INDIA: Mother Teresa's successor faces bumpy start. CALCUTTA, India 1997-03-16 265 | 264 SLOVAKIA: SLOVAK STUDENTS PRESS FOR MINISTER TO QUIT. BRATISLAVA 1997-03-17 266 | 265 JAMAICA: Former Jamaica prime minister Manley laid to rest. KINGSTON, Jamaica 1997-03-16 267 | 266 UK: FEATURE-Maverick Leigh strikes blow for independent film makers. LONDON 1997-03-17 268 | 267 USA: Fans swarm New York procession for slain rapper. NEW YORK 1997-03-18 269 | 268 USA: Hundreds swarm New York motorcade for slain rapper. NEW YORK 1997-03-18 270 | 269 USA: FEATURE - John Tesh - music is entertainment tonight. LOS ANGELES 1997-03-18 271 | 270 UK: Thatcher's archive to go to Churchill College. LONDON 1997-03-18 272 | 271 EGYPT: Egypt moves ahead with church restoration project. CAIRO 1997-03-18 273 | 272 PHILIPPINES: Ramos says N.Korean defector in Philppines for short time. MANILA 1997-03-19 274 | 273 UK: Archbishop of Canterbury speaks of retiring. LONDON 1997-03-20 275 | 274 ZIMBABWE: Mrs Clinton sees Zimbabwe efforts to help people. HARARE, Zimbabwe 1997-03-22 276 | 275 ITALY: Dutch-Flemish show goes to roots of modern art. VENICE, Italy 1997-03-23 277 | 276 USA: Clinton adjusts to slower-paced life. WASHINGTON 1997-03-25 278 | 277 RUSSIA: Lenin niece opposes re-burial of Communist leader. MOSCOW 1997-03-26 279 | 278 INDONESIA: Seventeen detained after riot on Indonesia's Java. JAKARTA 1997-03-27 280 | 279 VENEZUELA: FEATURE-Andean hermit acrobat wins artist fame. SAN RAFAEL DE MUCUCHIES, Venezuela 1997-04-01 281 | 280 VATICAN: FEATURE - Pope marks 19th Easter season looking tired, frail. VATICAN CITY 1997-04-01 282 | 281 PORTUGAL: FEATURE - Portugal's Cinderella city gets its reward. 283 | 282 FRANCE: Christian Dior denies rupture with actress Beart. PARIS 1997-04-03 284 | 283 SPAIN: Spanish princess to wed handball player - reports. MADRID 1997-04-03 285 | 284 UK: Defiant Princess Anne to return for Grand National. LIVERPOOL, England 1997-04-07 286 | 285 USA: Writer Patricia Cornwell admits lesbian affair. [CORRECTED 21:20 GMT]. NEW YORK 1997-04-08 287 | 286 USA: Joe Kennedy's ex-wife to appeal annulment - report. BOSTON 1997-04-08 288 | 287 USA: Ex-wife of US Sen. Kerry fights annulment - report. BOSTON 1997-04-10 289 | 288 RUSSIA: Top Russian official meets royal relative. MOSCOW 1997-04-12 290 | 289 USA: Farrakhan blames Catholics Church for hate crimes. [CORRECTED 21:48 GMT] WASHINGTON 1997-04-13 291 | 290 AUSTRALIA: RTRS-Australia's Colston released from hospital. BRISBANE 1996-04-17 292 | 291 UK: FEATURE - Britain hosts major Thomas Becket exhibition. CANTERBURY, England 1997-04-20 293 | 292 USA: Brooke Shields and Andre Agassi marry. MONTEREY, Calif. 1997-04-19 294 | 293 UK: China's HK General perfect choice, says tutor. LONDON 1997-04-22 295 | 294 USA: U.S. Christian Coalition leader to step down. [CORRECTED 18:19 GMT] WASHINGTON 1997-04-23 296 | 295 USA: Ralph Reed steps down from U.S. Christian Coalition. WASHINGTON 1997-04-23 297 | 296 EGYPT: U.S. pilot heads to Asia on Earhart tribute trip. LUXOR, Egypt 1997-04-24 298 | 297 USA: Alabama judge wins Kennedy "Courage" award. BOSTON 1997-04-24 299 | 298 PHILIPPINES: Philippines buries Marcos predecessor Macapagal. MANILA 1997-04-27 300 | 299 USA: 'Ellen' comes out, gays across U.S. celebrate. MIAMI 1997-05-01 301 | 300 USA: 'Ellen' comes out, gays across U.S. celebrate. MIAMI 1997-04-30 302 | 301 UK: Worldwide Advertising & Media Digest - May 2. 303 | 302 UK: Ex-wife of Yorkshire Ripper weds again in Britain. LONDON 1997-05-03 304 | 303 NEW ZEALAND: Former NZ Social Credit leader Bruce Beetham dies. 305 | 304 ITALY: Italy unveils plan to protect art treasures. ROME 1997-05-03 306 | 305 UK: Eurosceptic Redwood bids to lead UK Conservatives. LONDON 1997-05-06 307 | 306 GERMANY: Germany's Herzog urges Europeans to back unity. AACHEN, Germany 1997-05-08 308 | 307 USA: American civil rights leader to be buried in Harlem. NEW YORK 1997-05-08 309 | 308 USA: Kempton honored by New York writers, mayor. NEW YORK 1997-05-08 310 | 309 VATICAN: Glamorous ex-model swaps catwalk for convent. VATICAN CITY 1997-05-09 311 | 310 RUSSIA: Russian parliament votes no changes to Red Square. MOSCOW 1997-05-14 312 | 311 AUSTRIA: FEATURE - "Sound of Music" makes Austrian cash tills ring. SALZBURG, Austria 1997-05-16 313 | 312 AUSTRALIA: TIMELINES-Today in History - May 19. 314 | 313 ITALY: Pope, on birthday, says wants to live to 100. ROME 1997-05-18 315 | 314 USA: Pamela Harriman's estate goes on sale. [CORRECTED 22:43 GMT] NEW YORK 1997-05-19 316 | 315 USA: Stallone married in London, publicist confirms. LOS ANGELES 1997-05-19 317 | 316 UK: Germans ran systematic wartime plunder campaign. LONDON 1997-05-19 318 | 317 USA: Harriman auction continues to bring high prices. NEW YORK 1997-05-20 319 | 318 MEXICO: Mexico bids awkward farewell to controversial cleric. MEXICO CITY 1997-05-21 320 | 319 USA: Notebook contains early Lennon-McCartney tunes. NEW YORK 1997-05-21 321 | 320 USA: Harriman auction continues to bring high prices. NEW YORK 1997-05-20 322 | 321 ITALY: Healthy looking Mother Teresa initiates new nuns. ROME 1997-05-23 323 | 322 USA: Mother Teresa in New York to initiate new nuns. NEW YORK 1997-05-26 324 | 323 USA: Leona Helmsley makes $1 million donation. NEW YORK 1997-05-28 325 | 324 USA: Alabama judge accepts Kennedy "Courage" award. BOSTON 1997-05-29 326 | 325 RUSSIA: Would-be Russian tsar to return for family ceremony. MOSCOW 1997-05-30 327 | 326 POLAND: Pope in Poland tries to allay health concerns. WROCLAW, Poland 1996-06-02 328 | 327 UK: Britain plans glittering royal golden wedding. LONDON 1997-06-04 329 | 328 FRANCE: Vaillant gets loyalty reward in French cabinet post. PARIS 1997-06-04 330 | 329 POLAND: POPE TAKES HELICOPTER TRIP ON REST DAY. ZAKOPANE, Poland 1997-06-05 331 | 330 POLAND: Pope takes helicopter trip over beloved mountains. ZAKOPANE, Poland 1997-06-05 332 | 331 POLAND: Pope rests at halfway mark as Poles mull message. ZAKOPANE, Poland 1997-06-05 333 | 332 POLAND: Pope has moving reunion with with old schoolmates. ZAKOPANE, Poland 1997-06-06 334 | 333 UK: Oasis songwriter Noel Gallagher weds in Las Vegas. LONDON 1997-06-07 335 | 334 POLAND: Pope gives thanks for surviving assassination bid. ZAKOPANE, Poland 1997-06-07 336 | 335 POLAND: Pope emotionally recalls assassination attempt. ZAKOPANE, Poland 1997-06-07 337 | 336 POLAND: Pope to spend few moments at parents' grave. KRAKOW, Poland 1996-06-09 338 | 337 POLAND: Pope spends moments of silence at parents' grave. KRAKOW, Poland 1997-06-09 339 | 338 POLAND: Poles flock to big mass on last day of Papal visit. KROSNO, Poland 1997-06-10 340 | 339 ITALY: Mussolini descendants gather for son's funeral. ROME 1997-06-14 341 | 340 ISRAEL: Sinead quits Jerusalem concert after death threats. JERUSALEM 1997-06-16 342 | 341 USA: U.S. church boycotts "Gay friendly" Disney. DALLAS 1997-06-18 343 | 342 RUSSIA: Russia's Patriarch urges caution on Lenin removal. MOSCOW 1997-06-19 344 | 343 GERMANY: Fragments of rare Gutenberg bible found in Germany. BONN 1997-06-20 345 | 344 UK: UK ex-minister quits as Queen's adviser - reports. LONDON 1997-06-25 346 | 345 GERMANY: Boris Becker threatens Scientology over Internet. BONN 1997-06-27 347 | 346 CHINA: China's top Catholic church official dies at 80. BEIJING 1997-06-28 348 | 347 UK: Westminster Abbey to charge entry fee - paper. LONDON 1997-06-29 349 | 348 UK: Freemasons launch magazine to improve image. LONDON 1997-07-01 350 | 349 UK: FEATURE-Orange and Green, the colours of N.Ireland conflict. BELFAST 1997-07-02 351 | 350 COLOMBIA: Colombia recovers church art worth $1.5 million. BOGOTA 1997-07-03 352 | 351 UK: Rare show of sympathy for UK's royal mistress. LONDON 1997-07-07 353 | 352 USA: Hollywood bids farewell to actor Jimmy Stewart. BEVERLY HILLS, Calif. 1997-07-07 354 | 353 ITALY: Pope begins private mountain retreat in Italy Alps. INTROD, Italy 1997-07-09 355 | 354 UK: FEATURE - UK royals try to relaunch after decade "horribilis". LONDON 1997-07-10 356 | 355 USA: Black church leader denies affair. ST. PETERSBURG, Fla. 1997-07-11 357 | 356 ALBANIA: FEATURE - Albania violence threatens treasured ancient city. BUTRINT, Albania 1997-07-13 358 | 357 ITALY: Jovial Pope seems invigorated by mountain break. LES COMBES, Italy 1997-07-13 359 | 358 UK: Churchill's World War One letters up for auction. LONDON 1997-07-16 360 | 359 UK: Prince Charles holds 50th bash for lover Camilla. LONDON 1997-07-16 361 | 360 GERMANY: Russian minister criticises Bonn on art booty. BONN 1997-07-16 362 | 361 UK: Churchill letters sell for three times estimate. LONDON 1997-07-17 363 | 362 USA: Miami police probe new killing, Versace cremated. MIAMI 1997-07-17 364 | 363 USA: Versace ashes to be flown to Italy Thursday. MIAMI 1997-07-17 365 | 364 ISRAEL: FEATURE - Despite Vanunu, no nuclear debate in Israel. JERUSALEM 1997-07-17 366 | 365 USA: Miami Beach bids farewell to Versace. MIAMI BEACH, Fla. 1997-07-18 367 | 366 ITALY: Lakeside villa was hideaway for glamorous Versace. MOLTRASIO, Italy 1997-07-18 368 | 367 USA: Miami Beach bids farewell to Versace. MIAMI BEACH, Fla. 1997-07-18 369 | 368 USA: Clinton will speak at Arkansas friend's funeral. HARRISON, Ark 1997-07-18 370 | 369 USA: Versace service set; new murder seen as unrelated. MIAMI BEACH 1997-07-18 371 | 370 UK: Camilla steps out of shadows as royal lover. LONDON 1997-07-19 372 | 371 INDIA: Mother Teresa returns, pleased with foreign trip. NEW DELHI 1997-07-20 373 | 372 VIETNAM: Senior Vietnam Buddhist dies. HANOI 1997-07-21 374 | 373 RUSSIA: Yeltsin revels in good health, backs new reforms. MOSCOW 1997-07-23 375 | 374 PHILIPPINES: Aunt calls Versace killer "nice, quiet boy". BALIUAG, Philippines 1997-07-24 376 | 375 VIETNAM: Thousands mourn death of top Vietnam Buddhist. HO CHI MINH CITY, Vietnam 1997-07-25 377 | 376 UK: UK's Blair says Charles can wed Camilla - report. LONDON 1997-07-27 378 | 377 UK: UK's Blair dismisses Charles and Camilla reports. LONDON 1997-07-28 379 | 378 USA: Clinton delivers eulogy at Brennan's funeral. WASHINGTON 1997-07-29 380 | 379 USA: California memorial service held for Andrew Cunanan. SAN DIEGO 1997-07-30 381 | 380 CROATIA: Croatia's Tudjman sworn in for second 5-year term. ZAGREB 1997-08-05 382 | 381 USA: Former FBI Director Clarence Kelley dies at 85. KANSAS CITY, Mo. 1997-08-05 383 | 382 UK: Most Britons oppose idea of "Queen Camilla"-poll. LONDON 1997-08-06 384 | 383 USA: Clarence Kelley, former FBI director, dies at 85. KANSAS CITY, Mo 1997-08-05 385 | 384 UK: Britain's "top people" back Charles marrying - poll. LONDON 1997-08-06 386 | 385 UK: Scotland kicks off world's biggest arts festival. EDINBURGH 1997-08-10 387 | 386 UK: Fringe chases audiences in Edinburgh festival month. EDINBURGH 1996-08-12 388 | 387 USA: Even in death, Elvis lives. LOS ANGELES 1997-08-11 389 | 388 USA: EVEN IN DEATH, ELVIS LIVES. LOS ANGELES 1997-08-11 390 | 389 USA: EVEN IN DEATH, ELVIS LIVES. LOS ANGELES 1997-08-11 391 | 390 CANADA: FEATURE - French-speaking Quebec celebrates Irish heritage. QUEBEC CITY 1997-08-14 392 | 391 BULGARIA: FEATURE - Bulgarian opera stars are enduring export. SOFIA 1997-08-15 393 | 392 USA: Fans end Elvis Presley fete with concert. MEMPHIS, Tenn 1997-08-16 394 | 393 UK: Volcano buries studio where rock legends recorded. LONDON 1997-08-18 395 | 394 USA: Joseph Vostal, ex-Kidder muni banker, dead at 88. NEW YORK 1997-08-18 396 | -------------------------------------------------------------------------------- /lda/tests/reuters.tokens: -------------------------------------------------------------------------------- 1 | church 2 | pope 3 | years 4 | people 5 | mother 6 | last 7 | told 8 | first 9 | world 10 | year 11 | president 12 | teresa 13 | charles 14 | catholic 15 | during 16 | life 17 | u.s 18 | city 19 | public 20 | time 21 | since 22 | family 23 | king 24 | former 25 | british 26 | harriman 27 | against 28 | country 29 | vatican 30 | made 31 | three 32 | hospital 33 | minister 34 | home 35 | died 36 | tuesday 37 | government 38 | n't 39 | million 40 | prince 41 | john 42 | very 43 | war 44 | say 45 | order 46 | day 47 | political 48 | leader 49 | heart 50 | roman 51 | yeltsin 52 | later 53 | france 54 | party 55 | monday 56 | death 57 | clinton 58 | sunday 59 | elvis 60 | state 61 | diana 62 | royal 63 | wednesday 64 | churchill 65 | official 66 | service 67 | including 68 | film 69 | while 70 | week 71 | left 72 | work 73 | paris 74 | around 75 | american 76 | saying 77 | take 78 | doctors 79 | group 80 | son 81 | queen 82 | house 83 | police 84 | national 85 | under 86 | surgery 87 | next 88 | peace 89 | paul 90 | long 91 | ceremony 92 | month 93 | thursday 94 | french 95 | head 96 | marriage 97 | operation 98 | visit 99 | show 100 | germany 101 | russian 102 | days 103 | love 104 | news 105 | ago 106 | became 107 | own 108 | spokesman 109 | says 110 | television 111 | never 112 | mass 113 | friday 114 | bishop 115 | held 116 | members 117 | prime 118 | called 119 | off 120 | father 121 | century 122 | whose 123 | south 124 | health 125 | parker 126 | four 127 | bowles 128 | cardinal 129 | officials 130 | united 131 | among 132 | britain 133 | newspaper 134 | funeral 135 | camilla 136 | russia 137 | age 138 | several 139 | statement 140 | religious 141 | private 142 | media 143 | added 144 | women 145 | reporters 146 | until 147 | ambassador 148 | part 149 | great 150 | east 151 | known 152 | children 153 | wife 154 | late 155 | few 156 | took 157 | good 158 | married 159 | woman 160 | german 161 | times 162 | 1992 163 | set 164 | trip 165 | union 166 | leaders 167 | bernardin 168 | international 169 | won 170 | come 171 | taken 172 | england 173 | place 174 | become 175 | princess 176 | rights 177 | rome 178 | saturday 179 | end 180 | both 181 | asked 182 | charity 183 | michael 184 | expected 185 | prize 186 | second 187 | born 188 | past 189 | local 190 | throne 191 | white 192 | percent 193 | christian 194 | reuters 195 | led 196 | us 197 | reports 198 | states 199 | catholics 200 | pontiff 201 | election 202 | director 203 | york 204 | found 205 | earlier 206 | outside 207 | london 208 | foreign 209 | go 210 | give 211 | used 212 | want 213 | nuns 214 | sister 215 | senior 216 | court 217 | major 218 | know 219 | music 220 | law 221 | help 222 | came 223 | right 224 | thousands 225 | kremlin 226 | simpson 227 | husband 228 | close 229 | old 230 | conference 231 | think 232 | role 233 | five 234 | put 235 | nobel 236 | live 237 | gave 238 | art 239 | town 240 | winston 241 | went 242 | campaign 243 | although 244 | support 245 | calcutta 246 | fans 247 | italian 248 | men 249 | washington 250 | moscow 251 | without 252 | couple 253 | member 254 | missionaries 255 | top 256 | every 257 | capital 258 | best 259 | might 260 | miami 261 | orthodox 262 | wrote 263 | once 264 | versace 265 | power 266 | history 267 | body 268 | plans 269 | away 270 | god 271 | third 272 | recent 273 | problems 274 | seen 275 | churches 276 | early 277 | palace 278 | mark 279 | office 280 | cunanan 281 | divorce 282 | met 283 | suffering 284 | parliament 285 | poland 286 | nation 287 | together 288 | six 289 | communist 290 | award 291 | began 292 | successor 293 | friends 294 | book 295 | priest 296 | peter 297 | again 298 | west 299 | return 300 | wanted 301 | 1994 302 | economic 303 | birthday 304 | exhibition 305 | personal 306 | europe 307 | soviet 308 | letter 309 | ill 310 | museum 311 | condition 312 | disease 313 | india 314 | hope 315 | secret 316 | festival 317 | quoted 318 | wright 319 | bill 320 | human 321 | poor 322 | night 323 | romania 324 | 50 325 | spent 326 | june 327 | groups 328 | general 329 | himself 330 | months 331 | kept 332 | chief 333 | announced 334 | appeared 335 | future 336 | nazi 337 | despite 338 | friend 339 | far 340 | whether 341 | pamela 342 | bishops 343 | popular 344 | marry 345 | relationship 346 | denied 347 | series 348 | 1993 349 | name 350 | hand 351 | divorced 352 | archbishop 353 | same 354 | daughter 355 | often 356 | countries 357 | across 358 | southern 359 | security 360 | small 361 | culture 362 | october 363 | appendix 364 | christmas 365 | little 366 | sent 367 | cancer 368 | big 369 | hours 370 | brought 371 | interview 372 | nun 373 | 1981 374 | following 375 | stay 376 | special 377 | meeting 378 | centre 379 | young 380 | jews 381 | buried 382 | reported 383 | daily 384 | presidential 385 | community 386 | received 387 | near 388 | tour 389 | published 390 | black 391 | opinion 392 | believe 393 | child 394 | helped 395 | air 396 | anniversary 397 | magazine 398 | chicago 399 | given 400 | illness 401 | cathedral 402 | lead 403 | weeks 404 | conservative 405 | brother 406 | medical 407 | going 408 | leading 409 | star 410 | full 411 | 10 412 | opposition 413 | cultural 414 | money 415 | ended 416 | does 417 | july 418 | square 419 | yet 420 | letters 421 | kennedy 422 | plan 423 | america 424 | open 425 | army 426 | sale 427 | north 428 | today 429 | duke 430 | killed 431 | democratic 432 | timor 433 | 20 434 | european 435 | due 436 | planned 437 | mayor 438 | already 439 | english 440 | according 441 | hundreds 442 | university 443 | taking 444 | along 445 | politics 446 | nearly 447 | high 448 | remains 449 | famous 450 | evening 451 | september 452 | affair 453 | attended 454 | social 455 | actor 456 | spokeswoman 457 | italy 458 | lives 459 | crowd 460 | include 461 | vote 462 | living 463 | polish 464 | case 465 | concert 466 | issue 467 | november 468 | recently 469 | figure 470 | others 471 | society 472 | main 473 | less 474 | 1995 475 | press 476 | 'm 477 | central 478 | wedding 479 | school 480 | team 481 | ever 482 | happy 483 | navarro-valls 484 | described 485 | organisation 486 | always 487 | radio 488 | returned 489 | civil 490 | clear 491 | whom 492 | william 493 | start 494 | stars 495 | belo 496 | wants 497 | saint 498 | showed 499 | quebec 500 | fell 501 | making 502 | use 503 | success 504 | decided 505 | removed 506 | prayers 507 | murder 508 | 15 509 | admitted 510 | los 511 | western 512 | lost 513 | angeles 514 | find 515 | blood 516 | force 517 | secretary 518 | suffered 519 | irish 520 | august 521 | sometimes 522 | attack 523 | hollywood 524 | real 525 | island 526 | indonesia 527 | traditional 528 | turned 529 | bring 530 | decision 531 | 1989 532 | cause 533 | least 534 | bardot 535 | rest 536 | front 537 | nearby 538 | named 539 | image 540 | newspapers 541 | 30 542 | memorial 543 | released 544 | african 545 | military 546 | hit 547 | authorities 548 | northern 549 | care 550 | freedom 551 | story 552 | eastern 553 | carried 554 | beach 555 | feature 556 | actress 557 | large 558 | caused 559 | moslem 560 | something 561 | monarchy 562 | almost 563 | march 564 | 1991 565 | speculation 566 | recovery 567 | hold 568 | point 569 | lenin 570 | aides 571 | strong 572 | believed 573 | rather 574 | worked 575 | final 576 | boris 577 | rule 578 | issued 579 | accused 580 | built 581 | council 582 | industry 583 | shot 584 | gmt 585 | crucitti 586 | source 587 | policy 588 | elected 589 | seven 590 | sources 591 | possible 592 | polls 593 | stage 594 | view 595 | bertil 596 | priests 597 | company 598 | loved 599 | really 600 | millions 601 | report 602 | berlin 603 | college 604 | half 605 | morning 606 | diplomat 607 | failed 608 | mission 609 | parties 610 | attend 611 | 1996 612 | stalin 613 | run 614 | event 615 | treated 616 | nirmala 617 | forced 618 | salonika 619 | pounds 620 | picture 621 | estate 622 | started 623 | feel 624 | joined 625 | coalition 626 | dead 627 | paid 628 | gay 629 | embassy 630 | cannot 631 | 100 632 | committee 633 | though 634 | tumour 635 | play 636 | probably 637 | scientology 638 | face 639 | modern 640 | different 641 | important 642 | sense 643 | behind 644 | africa 645 | business 646 | wartime 647 | everything 648 | gemelli 649 | leave 650 | 76 651 | fall 652 | papers 653 | fever 654 | finally 655 | message 656 | chapel 657 | founded 658 | attempt 659 | pay 660 | republican 661 | keep 662 | trying 663 | parkinson 664 | tribute 665 | involved 666 | works 667 | doctor 668 | eight 669 | allowed 670 | successful 671 | singer 672 | arrived 673 | within 674 | red 675 | played 676 | refused 677 | looking 678 | bid 679 | 80 680 | nothing 681 | hall 682 | enough 683 | anything 684 | rock 685 | opera 686 | april 687 | biggest 688 | amid 689 | elections 690 | poles 691 | career 692 | need 693 | spread 694 | tests 695 | included 696 | brain 697 | abortion 698 | spiritual 699 | based 700 | working 701 | control 702 | poster 703 | turn 704 | services 705 | papal 706 | done 707 | legal 708 | especially 709 | governor 710 | territory 711 | themselves 712 | nuclear 713 | deeply 714 | however 715 | huge 716 | spoke 717 | james 718 | read 719 | got 720 | project 721 | buddhist 722 | residence 723 | efforts 724 | programme 725 | sen 726 | indian 727 | violence 728 | gold 729 | elizabeth 730 | chernomyrdin 731 | move 732 | region 733 | honour 734 | krakow 735 | issues 736 | disney 737 | events 738 | building 739 | idea 740 | chirac 741 | largest 742 | heir 743 | china 744 | signs 745 | madonna 746 | quite 747 | christ 748 | faith 749 | charges 750 | gathered 751 | let 752 | visited 753 | saw 754 | ii 755 | having 756 | tv 757 | prison 758 | swiss 759 | pray 760 | animal 761 | figures 762 | claims 763 | takes 764 | greek 765 | ireland 766 | cold 767 | needed 768 | prominent 769 | comes 770 | present 771 | averell 772 | holiday 773 | israel 774 | christians 775 | battle 776 | hitler 777 | association 778 | felt 779 | followed 780 | signed 781 | form 782 | israeli 783 | fact 784 | revealed 785 | marrying 786 | fashion 787 | remained 788 | powerful 789 | lover 790 | look 791 | ruling 792 | stop 793 | 18 794 | citizen 795 | hard 796 | britons 797 | fine 798 | free 799 | reed 800 | sweden 801 | ramos 802 | la 803 | served 804 | protest 805 | broke 806 | documents 807 | further 808 | guests 809 | trembling 810 | fame 811 | able 812 | showing 813 | serious 814 | 25 815 | ellen 816 | historic 817 | trial 818 | line 819 | communism 820 | remove 821 | minutes 822 | active 823 | kohl 824 | independence 825 | weekend 826 | pressure 827 | looked 828 | voice 829 | artists 830 | rare 831 | movement 832 | 1979 833 | period 834 | fear 835 | tsar 836 | vietnam 837 | summer 838 | presley 839 | speak 840 | victims 841 | road 842 | debate 843 | underwent 844 | job 845 | officer 846 | headquarters 847 | flynt 848 | homeland 849 | step 850 | department 851 | details 852 | republic 853 | supreme 854 | byzantine 855 | visitors 856 | monarch 857 | tradition 858 | change 859 | entertainment 860 | romanian 861 | build 862 | site 863 | 1978 864 | meet 865 | moment 866 | schedule 867 | religion 868 | professor 869 | mrs 870 | grand 871 | beautiful 872 | raise 873 | truth 874 | arts 875 | carlos 876 | warned 877 | area 878 | celebrate 879 | howard 880 | 've 881 | whole 882 | sun 883 | fbi 884 | land 885 | hotel 886 | influence 887 | estimated 888 | light 889 | running 890 | san 891 | ban 892 | movie 893 | helping 894 | cinema 895 | market 896 | particularly 897 | problem 898 | leadership 899 | cemetery 900 | nazis 901 | books 902 | win 903 | joseph 904 | bank 905 | homes 906 | favourite 907 | twice 908 | 16 909 | ran 910 | opened 911 | course 912 | february 913 | post 914 | australia 915 | sold 916 | car 917 | films 918 | 1990 919 | larry 920 | billion 921 | symbol 922 | gandhi 923 | trouble 924 | blue 925 | sex 926 | invited 927 | stand 928 | revolution 929 | title 930 | holy 931 | better 932 | foundation 933 | things 934 | january 935 | judge 936 | pop 937 | posters 938 | 17 939 | founder 940 | theatre 941 | disorder 942 | o 943 | miles 944 | calls 945 | retreat 946 | hoped 947 | charge 948 | bucharest 949 | written 950 | 76-year-old 951 | scotland 952 | december 953 | port 954 | itself 955 | abroad 956 | nations 957 | fellow 958 | auction 959 | sound 960 | everyone 961 | companies 962 | doing 963 | tired 964 | aged 965 | blessing 966 | 12 967 | scheduled 968 | financial 969 | sell 970 | labour 971 | person 972 | quickly 973 | spirit 974 | coming 975 | club 976 | thought 977 | streets 978 | continue 979 | lived 980 | organised 981 | celebrated 982 | relations 983 | farrakhan 984 | tried 985 | prepared 986 | starting 987 | becoming 988 | spain 989 | talk 990 | clubs 991 | surgeon 992 | performance 993 | soon 994 | ground 995 | nursing 996 | mountain 997 | process 998 | side 999 | action 1000 | steps 1001 | solidarity 1002 | roots 1003 | pictures 1004 | concerned 1005 | strength 1006 | offered 1007 | 1973 1008 | patriarch 1009 | designer 1010 | vast 1011 | workers 1012 | deep 1013 | instead 1014 | nato 1015 | collection 1016 | swedish 1017 | bed 1018 | expressed 1019 | information 1020 | australian 1021 | destitute 1022 | wallace 1023 | woodlands 1024 | jesus 1025 | style 1026 | key 1027 | killing 1028 | shortly 1029 | true 1030 | criticism 1031 | jewish 1032 | switzerland 1033 | alexander 1034 | appears 1035 | winning 1036 | marked 1037 | runcie 1038 | pound 1039 | marks 1040 | gone 1041 | considered 1042 | struggle 1043 | agreed 1044 | oldest 1045 | declined 1046 | birth 1047 | cities 1048 | regular 1049 | wearing 1050 | dismissed 1051 | 19 1052 | bypass 1053 | launched 1054 | views 1055 | parts 1056 | neurological 1057 | visits 1058 | trade 1059 | why 1060 | arms 1061 | release 1062 | haemorrhage 1063 | try 1064 | audience 1065 | cavallo 1066 | remain 1067 | stood 1068 | families 1069 | sought 1070 | loss 1071 | youth 1072 | murders 1073 | joaquin 1074 | speaking 1075 | california 1076 | critics 1077 | respect 1078 | controversial 1079 | leg 1080 | forces 1081 | jackson 1082 | material 1083 | aide 1084 | stone 1085 | diplomatic 1086 | passed 1087 | 11 1088 | performed 1089 | question 1090 | justice 1091 | urged 1092 | deal 1093 | politicians 1094 | brief 1095 | 1971 1096 | treatment 1097 | accept 1098 | praised 1099 | vanunu 1100 | wonderful 1101 | giving 1102 | von 1103 | dies 1104 | kind 1105 | moral 1106 | declared 1107 | various 1108 | airport 1109 | houses 1110 | violent 1111 | prayer 1112 | sexual 1113 | patient 1114 | worldwide 1115 | fringe 1116 | photographs 1117 | presence 1118 | continued 1119 | parents 1120 | using 1121 | publicly 1122 | 1930s 1123 | playing 1124 | farewell 1125 | changes 1126 | castro 1127 | attention 1128 | current 1129 | d 1130 | americans 1131 | rich 1132 | appearance 1133 | video 1134 | speech 1135 | reforms 1136 | board 1137 | 1 1138 | hands 1139 | model 1140 | reception 1141 | restoration 1142 | difficult 1143 | fire 1144 | recorded 1145 | defence 1146 | fair 1147 | albright 1148 | perform 1149 | staff 1150 | brennan 1151 | regional 1152 | chinese 1153 | independent 1154 | topless 1155 | 1977 1156 | boston 1157 | attacks 1158 | trips 1159 | goes 1160 | dr 1161 | inside 1162 | ordered 1163 | remember 1164 | comment 1165 | fought 1166 | author 1167 | gustaf 1168 | grew 1169 | archdiocese 1170 | hwang 1171 | phillips 1172 | jones 1173 | agency 1174 | 1947 1175 | nicholas 1176 | apparently 1177 | a.m 1178 | heard 1179 | position 1180 | gets 1181 | adding 1182 | call 1183 | ahead 1184 | communists 1185 | advertising 1186 | romanians 1187 | words 1188 | movies 1189 | ad 1190 | controversy 1191 | price 1192 | maria 1193 | similar 1194 | canterbury 1195 | writer 1196 | anyone 1197 | missionary 1198 | village 1199 | fighting 1200 | native 1201 | grounds 1202 | fully 1203 | duties 1204 | looks 1205 | everybody 1206 | includes 1207 | brown 1208 | david 1209 | soldiers 1210 | status 1211 | mastroianni 1212 | windsor 1213 | scientologists 1214 | laureate 1215 | dinner 1216 | respirator 1217 | annual 1218 | dozens 1219 | sick 1220 | 20th 1221 | spanish 1222 | happen 1223 | mind 1224 | legacy 1225 | evidence 1226 | timorese 1227 | items 1228 | poverty 1229 | troops 1230 | lady 1231 | castle 1232 | adolf 1233 | 50th 1234 | floor 1235 | andrew 1236 | greater 1237 | record 1238 | alliance 1239 | 're 1240 | treasures 1241 | usually 1242 | previous 1243 | electoral 1244 | moslems 1245 | committed 1246 | meant 1247 | break 1248 | temple 1249 | immigrants 1250 | 1986 1251 | 13 1252 | visiting 1253 | rejected 1254 | reconciliation 1255 | natural 1256 | someone 1257 | longer 1258 | 14 1259 | shared 1260 | buckingham 1261 | returning 1262 | anglican 1263 | ruled 1264 | era 1265 | cabinet 1266 | knew 1267 | latin 1268 | villa 1269 | shows 1270 | revolutionary 1271 | date 1272 | feet 1273 | thing 1274 | tourists 1275 | pages 1276 | herself 1277 | jail 1278 | seriously 1279 | promote 1280 | 24 1281 | watch 1282 | sad 1283 | water 1284 | fight 1285 | russians 1286 | growing 1287 | restore 1288 | raised 1289 | throughout 1290 | henry 1291 | chairman 1292 | jacques 1293 | 1975 1294 | language 1295 | crimes 1296 | korea 1297 | jazz 1298 | congress 1299 | coffin 1300 | assassination 1301 | citizenship 1302 | ancient 1303 | affairs 1304 | recalled 1305 | beetham 1306 | nine 1307 | bad 1308 | fund 1309 | travel 1310 | perhaps 1311 | economy 1312 | celebrations 1313 | spotlight 1314 | favour 1315 | bear 1316 | accompanied 1317 | intellectual 1318 | abdominal 1319 | schools 1320 | wed 1321 | publicity 1322 | development 1323 | golden 1324 | sons 1325 | travelled 1326 | representatives 1327 | allow 1328 | attending 1329 | bethlehem 1330 | surprise 1331 | lamour 1332 | agencies 1333 | liberal 1334 | pain 1335 | names 1336 | scandal 1337 | share 1338 | scene 1339 | wall 1340 | km 1341 | devotion 1342 | basilica 1343 | wish 1344 | hungary 1345 | christianity 1346 | pneumonia 1347 | boycott 1348 | towards 1349 | telephone 1350 | democracy 1351 | miss 1352 | p.m 1353 | helicopter 1354 | deng 1355 | lord 1356 | mary 1357 | germans 1358 | stockholm 1359 | constitution 1360 | decades 1361 | glamorous 1362 | widely 1363 | crown 1364 | alleged 1365 | mansion 1366 | boy 1367 | bulgarian 1368 | memoirs 1369 | radzinsky 1370 | documentary 1371 | criticised 1372 | happened 1373 | lies 1374 | holocaust 1375 | richard 1376 | jose 1377 | delayed 1378 | concern 1379 | dictator 1380 | portugal 1381 | fate 1382 | cioaba 1383 | kong 1384 | confirmed 1385 | tell 1386 | succeed 1387 | ending 1388 | referring 1389 | quiet 1390 | hero 1391 | bourassa 1392 | fears 1393 | 40 1394 | conservatives 1395 | completed 1396 | powers 1397 | eve 1398 | protect 1399 | consider 1400 | retire 1401 | common 1402 | federal 1403 | student 1404 | louis 1405 | else 1406 | questions 1407 | previously 1408 | munich 1409 | credit 1410 | plane 1411 | situation 1412 | popularity 1413 | short 1414 | rev 1415 | talks 1416 | faces 1417 | grave 1418 | hong 1419 | tours 1420 | version 1421 | 19th 1422 | sign 1423 | dent 1424 | tabloid 1425 | post-communist 1426 | invaded 1427 | winner 1428 | cuba 1429 | race 1430 | tutu 1431 | string 1432 | protestant 1433 | lilian 1434 | glittering 1435 | address 1436 | needs 1437 | makes 1438 | senate 1439 | complications 1440 | experts 1441 | 2,000 1442 | announcement 1443 | understand 1444 | officially 1445 | river 1446 | act 1447 | alabama 1448 | research 1449 | agca 1450 | ashes 1451 | hayward 1452 | forward 1453 | shown 1454 | envoy 1455 | street 1456 | remarks 1457 | dozen 1458 | heritage 1459 | denounced 1460 | abc 1461 | thomas 1462 | reading 1463 | van 1464 | created 1465 | experience 1466 | korean 1467 | conflict 1468 | changed 1469 | join 1470 | expect 1471 | headed 1472 | simply 1473 | values 1474 | canada 1475 | randolph 1476 | estrada 1477 | immediately 1478 | entered 1479 | charged 1480 | turning 1481 | francisco 1482 | spencer 1483 | lack 1484 | historian 1485 | damage 1486 | finding 1487 | band 1488 | review 1489 | bob 1490 | protests 1491 | lot 1492 | opponents 1493 | faced 1494 | annulment 1495 | forbes 1496 | internet 1497 | firm 1498 | candidate 1499 | moore 1500 | le 1501 | sotheby 1502 | low 1503 | leigh 1504 | mention 1505 | wooden 1506 | resistance 1507 | tom 1508 | appeal 1509 | middle 1510 | divorcee 1511 | gift 1512 | gypsies 1513 | drive 1514 | activities 1515 | raising 1516 | emotional 1517 | personality 1518 | buy 1519 | unity 1520 | completely 1521 | renowned 1522 | orange 1523 | drew 1524 | express 1525 | cross 1526 | behaviour 1527 | candidates 1528 | lee 1529 | 1997 1530 | pilgrims 1531 | decide 1532 | bormann 1533 | tomb 1534 | waiting 1535 | property 1536 | seemed 1537 | rules 1538 | 27 1539 | younger 1540 | display 1541 | threatened 1542 | markets 1543 | example 1544 | walked 1545 | appointed 1546 | indonesian 1547 | victory 1548 | liver 1549 | wales 1550 | 18th 1551 | serve 1552 | hindu 1553 | danger 1554 | standing 1555 | seat 1556 | institution 1557 | leaving 1558 | manley 1559 | blair 1560 | ideas 1561 | stepped 1562 | voodoo 1563 | ties 1564 | baptist 1565 | projects 1566 | ministry 1567 | rose 1568 | agenda 1569 | exiled 1570 | 1980 1571 | historical 1572 | bulletin 1573 | lebed 1574 | highly 1575 | effort 1576 | 70 1577 | conducted 1578 | resort 1579 | serving 1580 | representative 1581 | nationalist 1582 | saved 1583 | csu 1584 | artistic 1585 | initial 1586 | scottish 1587 | means 1588 | mexico 1589 | particular 1590 | centuries 1591 | 200 1592 | aid 1593 | marble 1594 | ali 1595 | tsongas 1596 | supporter 1597 | 'll 1598 | chancellor 1599 | murdered 1600 | malaria 1601 | sat 1602 | highest 1603 | cerpa 1604 | greatest 1605 | cassisa 1606 | gunman 1607 | holding 1608 | gays 1609 | increase 1610 | metres 1611 | bearing 1612 | total 1613 | mainly 1614 | trust 1615 | easter 1616 | relatively 1617 | georgy 1618 | unusual 1619 | carey 1620 | boost 1621 | proud 1622 | homosexual 1623 | accusations 1624 | turkish 1625 | welcome 1626 | suspected 1627 | room 1628 | match 1629 | akobe 1630 | apart 1631 | daly 1632 | banned 1633 | 1970s 1634 | portuguese 1635 | facade 1636 | unprecedented 1637 | stern 1638 | handed 1639 | mausoleum 1640 | seeking 1641 | matter 1642 | paintings 1643 | socialist 1644 | whibley 1645 | 1970 1646 | japanese 1647 | dressed 1648 | george 1649 | plays 1650 | girls 1651 | comments 1652 | awards 1653 | commission 1654 | actors 1655 | above 1656 | interest 1657 | wojtyla 1658 | eldest 1659 | brazil 1660 | residents 1661 | mistress 1662 | taylor 1663 | greece 1664 | soul 1665 | 1946 1666 | feeling 1667 | 'd 1668 | system 1669 | approved 1670 | route 1671 | stewart 1672 | jakarta 1673 | latest 1674 | resignation 1675 | opposed 1676 | edward 1677 | thanked 1678 | right-wing 1679 | shooting 1680 | borer 1681 | cornwell 1682 | gutters 1683 | crisis 1684 | ex-wife 1685 | exile 1686 | emergency 1687 | likely 1688 | seems 1689 | blessed 1690 | brecker 1691 | suicide 1692 | 1960s 1693 | touch 1694 | praying 1695 | philippines 1696 | imminent 1697 | sort 1698 | allegations 1699 | debakey 1700 | improve 1701 | coast 1702 | budget 1703 | strikes 1704 | democrat 1705 | empire 1706 | 86 1707 | faithful 1708 | royals 1709 | lawyers 1710 | task 1711 | league 1712 | 5 1713 | 1945 1714 | songs 1715 | places 1716 | reputation 1717 | winners 1718 | education 1719 | afternoon 1720 | guards 1721 | learn 1722 | haiti 1723 | edinburgh 1724 | produced 1725 | ion 1726 | multimillionaire 1727 | photographers 1728 | accepted 1729 | represented 1730 | developed 1731 | deliberations 1732 | continuing 1733 | becket 1734 | wait 1735 | romanov 1736 | moved 1737 | weekly 1738 | arrival 1739 | giant 1740 | easy 1741 | pair 1742 | intellectuals 1743 | activist 1744 | challenge 1745 | province 1746 | compared 1747 | choice 1748 | chapter 1749 | eventually 1750 | musicians 1751 | journalists 1752 | chadwick 1753 | procession 1754 | margareta 1755 | 1987 1756 | dating 1757 | defeated 1758 | courage 1759 | sure 1760 | growth 1761 | ready 1762 | fortune 1763 | celebrities 1764 | sacred 1765 | writers 1766 | 3 1767 | responsible 1768 | funds 1769 | attacked 1770 | ask 1771 | graceland 1772 | planning 1773 | introduced 1774 | claim 1775 | alive 1776 | communities 1777 | contemporary 1778 | racial 1779 | split 1780 | lie 1781 | lesbian 1782 | football 1783 | die 1784 | writings 1785 | selling 1786 | crime 1787 | ramos-horta 1788 | bible 1789 | resume 1790 | succession 1791 | deputy 1792 | tree 1793 | eyes 1794 | related 1795 | retired 1796 | resigned 1797 | supply 1798 | 21 1799 | thank 1800 | speeches 1801 | cardiac 1802 | farnborough 1803 | wrong 1804 | repeatedly 1805 | 47 1806 | 1948 1807 | mccartney 1808 | animals 1809 | access 1810 | brothers 1811 | sprawling 1812 | cases 1813 | spend 1814 | normally 1815 | martin 1816 | f 1817 | reward 1818 | enter 1819 | focus 1820 | audiences 1821 | currently 1822 | remote 1823 | mean 1824 | allowing 1825 | humour 1826 | lahar 1827 | ailing 1828 | robert 1829 | jerusalem 1830 | sales 1831 | navy 1832 | banks 1833 | long-time 1834 | wishes 1835 | poll 1836 | mourners 1837 | dignitaries 1838 | positive 1839 | occasion 1840 | protection 1841 | administration 1842 | suburb 1843 | investment 1844 | 86-year-old 1845 | armed 1846 | level 1847 | 2 1848 | irregular 1849 | 1960 1850 | manhattan 1851 | naturalised 1852 | peaceful 1853 | slightly 1854 | jury 1855 | 1940 1856 | standards 1857 | admission 1858 | beyond 1859 | memory 1860 | prayed 1861 | monks 1862 | placed 1863 | slow 1864 | papacy 1865 | finch 1866 | constant 1867 | kerry 1868 | beloved 1869 | term 1870 | 75 1871 | recognition 1872 | polish-born 1873 | lustiger 1874 | complained 1875 | canadian 1876 | onassis 1877 | redwood 1878 | appealed 1879 | carrying 1880 | 300 1881 | acceptance 1882 | hate 1883 | green 1884 | nurse 1885 | interests 1886 | bought 1887 | messages 1888 | range 1889 | offensive 1890 | 1,500 1891 | mountains 1892 | follow 1893 | blamed 1894 | terms 1895 | flag 1896 | intention 1897 | promised 1898 | anne 1899 | operations 1900 | records 1901 | survived 1902 | concerts 1903 | 1949 1904 | postponed 1905 | assembly 1906 | carl 1907 | chance 1908 | 77-year-old 1909 | hunting 1910 | cruise 1911 | appear 1912 | ceausescu 1913 | eye 1914 | offer 1915 | worth 1916 | closest 1917 | memphis 1918 | sentence 1919 | herald 1920 | engagements 1921 | demanded 1922 | healthy 1923 | duty 1924 | traffic 1925 | keeping 1926 | reach 1927 | chosen 1928 | seem 1929 | asking 1930 | sweet 1931 | felix 1932 | analysts 1933 | row 1934 | possibility 1935 | agent 1936 | publisher 1937 | clinic 1938 | 1985 1939 | colon 1940 | manila 1941 | seoul 1942 | archives 1943 | alongside 1944 | runs 1945 | packed 1946 | captured 1947 | adviser 1948 | commoner 1949 | draw 1950 | bells 1951 | begin 1952 | unable 1953 | corruption 1954 | killer 1955 | stallone 1956 | getting 1957 | master 1958 | normal 1959 | sheep 1960 | dutch 1961 | myself 1962 | malraux 1963 | flew 1964 | majority 1965 | commercial 1966 | watching 1967 | flowers 1968 | converted 1969 | maybe 1970 | impact 1971 | fee 1972 | walls 1973 | session 1974 | 20,000 1975 | macphee 1976 | rushed 1977 | kevin 1978 | appointment 1979 | forman 1980 | sweeping 1981 | reporter 1982 | adopted 1983 | becker 1984 | traditions 1985 | choose 1986 | construction 1987 | simple 1988 | bbc 1989 | telegraph 1990 | 1972 1991 | ordinary 1992 | settlement 1993 | aimed 1994 | adultery 1995 | heppner 1996 | cancelled 1997 | suggested 1998 | pacemaker 1999 | 7 2000 | arrested 2001 | background 2002 | czech 2003 | remembered 2004 | fourth 2005 | 1936 2006 | priesthood 2007 | stayed 2008 | putting 2009 | tudjman 2010 | formed 2011 | ten 2012 | reportedly 2013 | intestinal 2014 | thoughts 2015 | extraordinary 2016 | guard 2017 | screen 2018 | beliefs 2019 | lisbon 2020 | criminal 2021 | alone 2022 | millennium 2023 | 1982 2024 | kelley 2025 | recognised 2026 | grant 2027 | constitutional 2028 | desire 2029 | offering 2030 | hindus 2031 | staying 2032 | mandela 2033 | severe 2034 | drug 2035 | inspired 2036 | = 2037 | georgetown 2038 | ex-communists 2039 | extrapyramidal 2040 | announce 2041 | 1976 2042 | municipal 2043 | delegates 2044 | mount 2045 | complaints 2046 | damaged 2047 | ability 2048 | caught 2049 | fitted 2050 | presidency 2051 | season 2052 | suggestions 2053 | regarded 2054 | vomiting 2055 | moving 2056 | chemotherapy 2057 | influential 2058 | 85 2059 | marcello 2060 | failure 2061 | briefly 2062 | klan 2063 | convent 2064 | pancreatic 2065 | luxury 2066 | sports 2067 | aims 2068 | pronounced 2069 | racism 2070 | interviews 2071 | perfect 2072 | rival 2073 | horta 2074 | taiwan 2075 | expert 2076 | clark 2077 | arkansas 2078 | proved 2079 | intelligence 2080 | unit 2081 | condemned 2082 | publishing 2083 | proceedings 2084 | becomes 2085 | so-called 2086 | lung 2087 | hosted 2088 | speedy 2089 | pulled 2090 | victim 2091 | clearly 2092 | wake 2093 | generation 2094 | greeted 2095 | tourist 2096 | chances 2097 | fresh 2098 | cost 2099 | valuable 2100 | vladimir 2101 | freemasons 2102 | publication 2103 | specialists 2104 | size 2105 | headlines 2106 | foster 2107 | kalyanam 2108 | save 2109 | madrid 2110 | garden 2111 | chavis 2112 | arrest 2113 | ordained 2114 | constantinescu 2115 | hundred 2116 | areas 2117 | attempts 2118 | cheered 2119 | operating 2120 | llosa 2121 | gallagher 2122 | 1988 2123 | flock 2124 | improved 2125 | companion 2126 | 8 2127 | closed 2128 | broken 2129 | deliver 2130 | alexiy 2131 | remarkable 2132 | dior 2133 | rebel 2134 | formerly 2135 | guilty 2136 | editor 2137 | separation 2138 | singers 2139 | single 2140 | wine 2141 | malignant 2142 | camp 2143 | vargas 2144 | sixth 2145 | response 2146 | ignatyev 2147 | class 2148 | homecoming 2149 | field 2150 | indeed 2151 | reform 2152 | mp 2153 | girl 2154 | prices 2155 | beat 2156 | associated 2157 | praise 2158 | runaway 2159 | carry 2160 | crowds 2161 | oasis 2162 | karol 2163 | frail 2164 | round 2165 | invasion 2166 | officers 2167 | anaesthetic 2168 | burial 2169 | overseas 2170 | veteran 2171 | moments 2172 | nobody 2173 | trees 2174 | stopped 2175 | abuse 2176 | duchess 2177 | amount 2178 | sang 2179 | kansas 2180 | collapsed 2181 | 50,000 2182 | bigger 2183 | fred 2184 | occupation 2185 | harrelson 2186 | oscar 2187 | rumours 2188 | allies 2189 | reference 2190 | defend 2191 | bun 2192 | ude 2193 | points 2194 | chose 2195 | pride 2196 | colleagues 2197 | deny 2198 | elegant 2199 | charities 2200 | agents 2201 | mostly 2202 | magnate 2203 | argued 2204 | philadelphia 2205 | heirs 2206 | homeless 2207 | cardinals 2208 | dubbed 2209 | tells 2210 | wages 2211 | linked 2212 | products 2213 | demands 2214 | branded 2215 | persuade 2216 | baby 2217 | decree 2218 | houston 2219 | type 2220 | secrets 2221 | silent 2222 | reluctant 2223 | paper 2224 | beginning 2225 | hotels 2226 | devoted 2227 | opening 2228 | dying 2229 | priscilla 2230 | suite 2231 | inflammation 2232 | retirement 2233 | catholicism 2234 | relatives 2235 | studio 2236 | gianni 2237 | dancers 2238 | staged 2239 | resign 2240 | rap 2241 | reagan 2242 | male 2243 | wing 2244 | bonn 2245 | sisters 2246 | memories 2247 | dignity 2248 | effects 2249 | paying 2250 | casket 2251 | mehmet 2252 | shelter 2253 | either 2254 | threats 2255 | studies 2256 | eager 2257 | collapse 2258 | backed 2259 | 10th 2260 | celebration 2261 | prelate 2262 | prostate 2263 | unesco 2264 | gifted 2265 | 'brien 2266 | charm 2267 | kidney 2268 | organisers 2269 | send 2270 | leftist 2271 | quietly 2272 | cited 2273 | slain 2274 | wished 2275 | necessary 2276 | explain 2277 | concerns 2278 | suharto 2279 | certain 2280 | surgeons 2281 | stations 2282 | maxim 2283 | volunteers 2284 | cash 2285 | dramatic 2286 | emerged 2287 | calling 2288 | literary 2289 | remarry 2290 | venizelos 2291 | search 2292 | available 2293 | reese 2294 | addressed 2295 | painful 2296 | replacement 2297 | prosecutor 2298 | islam 2299 | sexually 2300 | ruth 2301 | trapp 2302 | crash 2303 | icons 2304 | gained 2305 | vanity 2306 | virgin 2307 | meetings 2308 | blame 2309 | character 2310 | shadow 2311 | colston 2312 | cardiologist 2313 | behalf 2314 | reelection 2315 | restored 2316 | 6 2317 | mercy 2318 | digby 2319 | chalet 2320 | punishing 2321 | request 2322 | lavish 2323 | touched 2324 | manager 2325 | angry 2326 | intensive 2327 | supported 2328 | population 2329 | regime 2330 | ritz 2331 | original 2332 | stores 2333 | involving 2334 | ageing 2335 | silence 2336 | firms 2337 | cremated 2338 | liberia 2339 | feelings 2340 | earned 2341 | jan 2342 | cover 2343 | certainly 2344 | convince 2345 | increasingly 2346 | writing 2347 | shop 2348 | currency 2349 | florida 2350 | discussions 2351 | hair 2352 | revelations 2353 | nominations 2354 | recording 2355 | swim 2356 | enormous 2357 | separate 2358 | nice 2359 | marsalis 2360 | romance 2361 | 1950s 2362 | acquired 2363 | prompted 2364 | grey 2365 | rural 2366 | delighted 2367 | four-day 2368 | gives 2369 | 33 2370 | journey 2371 | cast 2372 | keen 2373 | uk 2374 | traditionally 2375 | enemy 2376 | congregation 2377 | activity 2378 | grandchildren 2379 | gather 2380 | word 2381 | mr 2382 | outspoken 2383 | jack 2384 | 4,500 2385 | hectic 2386 | lower 2387 | christopher 2388 | beijing 2389 | profile 2390 | prostitute 2391 | appendectomy 2392 | managed 2393 | camps 2394 | practice 2395 | obviously 2396 | 90 2397 | ethnic 2398 | wealthy 2399 | iliescu 2400 | telling 2401 | acting 2402 | 11-day 2403 | strongly 2404 | finds 2405 | viewers 2406 | encouraged 2407 | base 2408 | happiness 2409 | snow 2410 | democrats 2411 | stories 2412 | launching 2413 | balcony 2414 | risk 2415 | host 2416 | engagement 2417 | der 2418 | copenhagen 2419 | authority 2420 | bringing 2421 | mail 2422 | fit 2423 | bond 2424 | miracle 2425 | moldova 2426 | java 2427 | unpaid 2428 | track 2429 | presented 2430 | continues 2431 | table 2432 | space 2433 | game 2434 | catherine 2435 | gov 2436 | tickets 2437 | hoping 2438 | receiving 2439 | infection 2440 | t-shirts 2441 | mike 2442 | lights 2443 | frequently 2444 | voted 2445 | ladies 2446 | flight 2447 | 960 2448 | weizman 2449 | balkan 2450 | moves 2451 | athos 2452 | album 2453 | biography 2454 | neither 2455 | dropped 2456 | provincial 2457 | students 2458 | technology 2459 | perez 2460 | bills 2461 | evident 2462 | bath 2463 | spoken 2464 | openly 2465 | lay 2466 | studied 2467 | archive 2468 | columbia 2469 | frequent 2470 | abbey 2471 | older 2472 | caring 2473 | hungarian 2474 | 48 2475 | francesco 2476 | jimmy 2477 | mistake 2478 | academy 2479 | motorcade 2480 | seeks 2481 | policies 2482 | volcano 2483 | responsibility 2484 | singing 2485 | picked 2486 | forget 2487 | synod 2488 | prevent 2489 | intestine 2490 | scholars 2491 | butrint 2492 | worst 2493 | daniel 2494 | 1,000 2495 | gypsy 2496 | riviera 2497 | totally 2498 | below 2499 | nature 2500 | sean 2501 | tomorrow 2502 | joking 2503 | scandals 2504 | advertisement 2505 | stroke 2506 | dedicated 2507 | helmut 2508 | network 2509 | helmsley 2510 | massachusetts 2511 | 16th 2512 | designed 2513 | variety 2514 | 1965 2515 | reaction 2516 | 23 2517 | moccia 2518 | artist 2519 | burned 2520 | ideals 2521 | turbulent 2522 | celibacy 2523 | pocahontas 2524 | reveal 2525 | diagnosed 2526 | rate 2527 | laid 2528 | thatcher 2529 | weak 2530 | discovered 2531 | rise 2532 | investigation 2533 | fatigue 2534 | sympathy 2535 | orders 2536 | smile 2537 | ambulance 2538 | calm 2539 | upon 2540 | landscape 2541 | 9 2542 | pimen 2543 | abdication 2544 | factory 2545 | sir 2546 | undergo 2547 | ministers 2548 | exclusive 2549 | judges 2550 | viktor 2551 | insisted 2552 | tips 2553 | preserve 2554 | bury 2555 | expression 2556 | sides 2557 | determined 2558 | campaigner 2559 | dance 2560 | demanding 2561 | represent 2562 | uncertainty 2563 | vaillant 2564 | stops 2565 | wealth 2566 | homily 2567 | privacy 2568 | crypt 2569 | full-page 2570 | joanna 2571 | asia 2572 | sparked 2573 | journalist 2574 | tough 2575 | xvi 2576 | thinking 2577 | burns 2578 | prior 2579 | features 2580 | surrounding 2581 | broadcast 2582 | paint 2583 | corrects 2584 | amelia 2585 | architect 2586 | kwon 2587 | administrative 2588 | bernard 2589 | lovers 2590 | falling 2591 | pilla 2592 | contact 2593 | heat 2594 | extent 2595 | generations 2596 | balmoral 2597 | formally 2598 | inspiration 2599 | chubais 2600 | elite 2601 | alert 2602 | suspect 2603 | 1917 2604 | teacher 2605 | subject 2606 | bahamas 2607 | recover 2608 | painting 2609 | bouts 2610 | ailments 2611 | minority 2612 | enjoy 2613 | tesh 2614 | spring 2615 | sermon 2616 | hustler 2617 | unique 2618 | harris 2619 | acted 2620 | park 2621 | shields 2622 | blind 2623 | article 2624 | production 2625 | prosecutors 2626 | musician 2627 | roderick 2628 | neighbourhood 2629 | hurt 2630 | struck 2631 | thrown 2632 | w 2633 | feels 2634 | worshippers 2635 | supporters 2636 | landed 2637 | conscious 2638 | throw 2639 | turks 2640 | door 2641 | hopes 2642 | flown 2643 | represents 2644 | high-profile 2645 | respects 2646 | hymns 2647 | topic 2648 | heroes 2649 | assassinated 2650 | achievements 2651 | senator 2652 | hite 2653 | doubt 2654 | theme 2655 | granted 2656 | believes 2657 | convicted 2658 | draft 2659 | b.b 2660 | strip 2661 | warm 2662 | sites 2663 | widow 2664 | laws 2665 | nannen 2666 | facing 2667 | excluded 2668 | vs 2669 | apparent 2670 | kostroma 2671 | producer 2672 | smiled 2673 | parish 2674 | spending 2675 | finnish 2676 | marcos 2677 | food 2678 | boesak 2679 | naval 2680 | notorious 2681 | kung 2682 | intended 2683 | bars 2684 | nationwide 2685 | protestants 2686 | talking 2687 | renewed 2688 | clovis 2689 | advantage 2690 | episode 2691 | atlanta 2692 | bitter 2693 | famed 2694 | jersey 2695 | sentenced 2696 | deadly 2697 | angel 2698 | option 2699 | measures 2700 | marking 2701 | bones 2702 | destruction 2703 | lifestyle 2704 | numbers 2705 | convention 2706 | caribbean 2707 | confident 2708 | tatra 2709 | celebrating 2710 | anthony 2711 | 132 2712 | altar 2713 | study 2714 | orleans 2715 | empty 2716 | lyons 2717 | clergy 2718 | lines 2719 | miller 2720 | 100,000 2721 | inspire 2722 | setting 2723 | petersburg 2724 | poured 2725 | governments 2726 | friendship 2727 | photo 2728 | pilgrim 2729 | brigitte 2730 | gore 2731 | temperature 2732 | charismatic 2733 | bokassa 2734 | bit 2735 | depicts 2736 | vows 2737 | covered 2738 | tape 2739 | colourful 2740 | dollar 2741 | bedroom 2742 | earhart 2743 | temporary 2744 | ticket 2745 | closer 2746 | re-elected 2747 | competition 2748 | gogh 2749 | roles 2750 | towns 2751 | mario 2752 | tipped 2753 | channel 2754 | atrocities 2755 | 1998 2756 | lire 2757 | worried 2758 | citizens 2759 | prigione 2760 | seized 2761 | stuff 2762 | saints 2763 | cdu 2764 | ss 2765 | performances 2766 | listed 2767 | surrounded 2768 | golf 2769 | honorary 2770 | hill 2771 | hailed 2772 | filled 2773 | kim 2774 | enjoyed 2775 | fast 2776 | walk 2777 | recall 2778 | bubis 2779 | croatian 2780 | leon 2781 | celebrates 2782 | combs 2783 | entering 2784 | quoting 2785 | 15-year-old 2786 | statue 2787 | nunbun 2788 | conversion 2789 | neighbouring 2790 | statements 2791 | outrage 2792 | establishment 2793 | midway 2794 | tribune 2795 | protesters 2796 | complex 2797 | taught 2798 | grandmother 2799 | neighbour 2800 | historians 2801 | breaking 2802 | rebuild 2803 | milos 2804 | quality 2805 | belgian 2806 | zyuganov 2807 | baptists 2808 | philippine 2809 | 2.3 2810 | decent 2811 | hermannsburg 2812 | zealand 2813 | philip 2814 | lasted 2815 | 'connor 2816 | established 2817 | 26 2818 | matthew 2819 | satisfaction 2820 | membership 2821 | admiration 2822 | nelson 2823 | economist 2824 | meets 2825 | absolutely 2826 | considering 2827 | annexed 2828 | reich 2829 | truly 2830 | awarded 2831 | commander 2832 | missions 2833 | suffer 2834 | corrected 2835 | involves 2836 | cut 2837 | speaks 2838 | conversation 2839 | mystery 2840 | bids 2841 | pelvis 2842 | account 2843 | mirecki 2844 | tonight 2845 | 15-year 2846 | danish 2847 | overlooking 2848 | 17th 2849 | resting 2850 | dogs 2851 | secure 2852 | unsuccessfully 2853 | lined 2854 | focused 2855 | assault 2856 | critical 2857 | trained 2858 | lake 2859 | aware 2860 | toward 2861 | solicitor 2862 | song 2863 | pledged 2864 | manner 2865 | penn 2866 | link 2867 | minor 2868 | politician 2869 | devastated 2870 | alan 2871 | summit 2872 | loud 2873 | tens 2874 | carolina 2875 | ottoman 2876 | itc 2877 | ($ 2878 | guest 2879 | vice 2880 | juan 2881 | hear 2882 | two-day 2883 | 45 2884 | discuss 2885 | bedside 2886 | acts 2887 | conclave 2888 | reserved 2889 | kill 2890 | margrethe 2891 | aids 2892 | sofia 2893 | difficulties 2894 | suit 2895 | noted 2896 | paparazzi 2897 | economics 2898 | fort 2899 | dark 2900 | mahatma 2901 | thanks 2902 | blonde 2903 | 65-year-old 2904 | wallis 2905 | aim 2906 | gathering 2907 | couples 2908 | forgive 2909 | marriages 2910 | accord 2911 | zakatov 2912 | forms 2913 | compare 2914 | guerrillas 2915 | akchurin 2916 | materials 2917 | 42 2918 | shaking 2919 | graham 2920 | survive 2921 | closely 2922 | organiser 2923 | failing 2924 | destroyed 2925 | shares 2926 | weight 2927 | jean 2928 | elsewhere 2929 | politically 2930 | arteries 2931 | tony 2932 | except 2933 | flow 2934 | swept 2935 | immediate 2936 | criticise 2937 | julia 2938 | norwegian 2939 | platform 2940 | monument 2941 | surviving 2942 | rooms 2943 | unconscious 2944 | unknown 2945 | yes 2946 | decade 2947 | fidel 2948 | fatima 2949 | higher 2950 | secluded 2951 | stability 2952 | bulgaria 2953 | facial 2954 | teaching 2955 | well-known 2956 | jobs 2957 | corp 2958 | pregnancy 2959 | provide 2960 | impoverished 2961 | originally 2962 | surprised 2963 | shed 2964 | imperial 2965 | prepare 2966 | persecution 2967 | mentally 2968 | increased 2969 | onto 2970 | venture 2971 | donations 2972 | las 2973 | joint 2974 | ranks 2975 | churchmen 2976 | multiple 2977 | intelligent 2978 | aunt 2979 | royalty 2980 | wave 2981 | argentina 2982 | anti-abortion 2983 | milan 2984 | stature 2985 | relative 2986 | spot 2987 | 150 2988 | arriving 2989 | chest 2990 | buddhism 2991 | commandments 2992 | 2000 2993 | exactly 2994 | respected 2995 | vi 2996 | festivals 2997 | athens 2998 | shakur 2999 | 28 3000 | result 3001 | handwritten 3002 | abandoned 3003 | tax 3004 | acknowledged 3005 | plastic 3006 | reality 3007 | reminded 3008 | flee 3009 | executive 3010 | hot 3011 | 36 3012 | stressed 3013 | theodoridis 3014 | hillary 3015 | stamp 3016 | dialogue 3017 | launch 3018 | emperor 3019 | debut 3020 | killings 3021 | zakopane 3022 | allegedly 3023 | counter 3024 | self-styled 3025 | ordination 3026 | transition 3027 | nor 3028 | breathing 3029 | claimed 3030 | jr 3031 | highgrove 3032 | dangerous 3033 | frank 3034 | jamaica 3035 | institute 3036 | musical 3037 | scenes 3038 | bouche 3039 | fields 3040 | lebanon 3041 | regularly 3042 | loyalty 3043 | mosques 3044 | rocked 3045 | 1969 3046 | theft 3047 | gorbachev 3048 | absolute 3049 | baseball 3050 | laura 3051 | solution 3052 | typical 3053 | stock 3054 | explained 3055 | georgia 3056 | dangers 3057 | develop 3058 | aug 3059 | specifically 3060 | balabagan 3061 | prestigious 3062 | festivities 3063 | heavy 3064 | dancing 3065 | fiction 3066 | domestic 3067 | renat 3068 | donatella 3069 | bridges 3070 | sayer 3071 | bodies 3072 | buildings 3073 | slaughter 3074 | steady 3075 | disaster 3076 | complete 3077 | escorted 3078 | sees 3079 | copies 3080 | furniture 3081 | vegas 3082 | approval 3083 | gradually 3084 | inoperable 3085 | teachings 3086 | ash 3087 | estimate 3088 | passing 3089 | deaths 3090 | bureau 3091 | modest 3092 | absence 3093 | ghandi 3094 | scientific 3095 | virginia 3096 | ira 3097 | erupted 3098 | trendy 3099 | abdicate 3100 | illegal 3101 | astiz 3102 | prospect 3103 | knows 3104 | adhesions 3105 | loyal 3106 | feared 3107 | wilderness 3108 | minds 3109 | sensitive 3110 | blocks 3111 | precious 3112 | immigrant 3113 | acknowledge 3114 | hearing 3115 | recurrent 3116 | photograph 3117 | execution 3118 | heir-to-the-throne 3119 | open-air 3120 | rapper 3121 | remarried 3122 | 1967 3123 | teach 3124 | wild 3125 | retiring 3126 | 52 3127 | entry 3128 | lowell 3129 | ohio 3130 | 1953 3131 | 1941 3132 | delivered 3133 | mirror 3134 | discussed 3135 | hanoi 3136 | loves 3137 | entitled 3138 | freed 3139 | hip 3140 | presbyterian 3141 | belfast 3142 | outraged 3143 | norway 3144 | dardanelles 3145 | liberty 3146 | missing 3147 | convert 3148 | pull 3149 | ousted 3150 | hospitals 3151 | outpouring 3152 | document 3153 | concentration 3154 | donald 3155 | perverted 3156 | referendum 3157 | adored 3158 | peru 3159 | offers 3160 | treat 3161 | 30,000 3162 | imagine 3163 | label 3164 | shirt 3165 | turns 3166 | frankfurt 3167 | locals 3168 | cooperation 3169 | comedy 3170 | upper 3171 | delhi 3172 | highness 3173 | ruins 3174 | talent 3175 | verdi 3176 | goal 3177 | ralph 3178 | biographer 3179 | energy 3180 | losing 3181 | alzheimer 3182 | neighbours 3183 | goat 3184 | albums 3185 | 1920s 3186 | admire 3187 | minute 3188 | radical 3189 | bennett 3190 | two-week 3191 | earning 3192 | returns 3193 | ex-king 3194 | urban 3195 | profit 3196 | benjamin 3197 | silver 3198 | reasons 3199 | teachers 3200 | importance 3201 | geller 3202 | barred 3203 | purpose 3204 | authors 3205 | troubled 3206 | liked 3207 | founding 3208 | birthplace 3209 | 58 3210 | lama 3211 | auctioned 3212 | countrymen 3213 | notably 3214 | tension 3215 | hostages 3216 | shut 3217 | zong 3218 | nada 3219 | patterson 3220 | co 3221 | featured 3222 | reims 3223 | charitable 3224 | mourning 3225 | santo 3226 | list 3227 | daughter-in-law 3228 | training 3229 | somebody 3230 | liberals 3231 | allied 3232 | stole 3233 | expects 3234 | continent 3235 | anatoly 3236 | ring 3237 | commitment 3238 | bolsheviks 3239 | sinead 3240 | gyor 3241 | realise 3242 | dissident 3243 | ignored 3244 | pen 3245 | rein 3246 | everywhere 3247 | inflamed 3248 | suddenly 3249 | colony 3250 | blend 3251 | warning 3252 | plunge 3253 | fifth 3254 | barcelona 3255 | structure 3256 | venezuela 3257 | kathleen 3258 | macapagal 3259 | commentator 3260 | parishioners 3261 | baker 3262 | columnist 3263 | entrance 3264 | dispute 3265 | manni 3266 | cleric 3267 | ballot 3268 | minghella 3269 | salinas 3270 | leyland 3271 | individual 3272 | two-hour 3273 | rebellion 3274 | dna 3275 | demonstration 3276 | stands 3277 | preached 3278 | engaged 3279 | feast 3280 | drawn 3281 | heartbeat 3282 | finance 3283 | progress 3284 | paramount 3285 | timing 3286 | connection 3287 | 86th 3288 | maintaining 3289 | sector 3290 | exposure 3291 | quarter 3292 | partners 3293 | 600 3294 | cosentino 3295 | rangitikei 3296 | county 3297 | hamer 3298 | elegance 3299 | pilgrimage 3300 | merely 3301 | toured 3302 | virtually 3303 | colour 3304 | premier 3305 | contender 3306 | block 3307 | ranging 3308 | satisfy 3309 | shoes 3310 | arthur 3311 | resumed 3312 | sacked 3313 | learned 3314 | eruption 3315 | tributes 3316 | hudec 3317 | firmly 3318 | dresden 3319 | solo 3320 | honours 3321 | slavs 3322 | publicist 3323 | textbook 3324 | egypt 3325 | lunch 3326 | slipped 3327 | well-wishers 3328 | exhibit 3329 | proper 3330 | landless 3331 | sartre 3332 | sitting 3333 | arrangements 3334 | wisner 3335 | preserved 3336 | waite 3337 | anger 3338 | poorest 3339 | patrick 3340 | hunters 3341 | sydney 3342 | shattered 3343 | somewhat 3344 | raid 3345 | calcutta-based 3346 | 77 3347 | concept 3348 | 29 3349 | hans 3350 | tears 3351 | effect 3352 | carol 3353 | 40,000 3354 | nbc 3355 | pastor 3356 | baroque 3357 | circle 3358 | soft-spoken 3359 | wellington 3360 | post-war 3361 | evans 3362 | pretty 3363 | unions 3364 | owen 3365 | awaiting 3366 | seek 3367 | looted 3368 | cannes 3369 | herzog 3370 | seize 3371 | mountbatten 3372 | gallery 3373 | low-key 3374 | church-owned 3375 | index 3376 | rid 3377 | massive 3378 | largely 3379 | restaurant 3380 | activists 3381 | privately 3382 | fed 3383 | dollars 3384 | emil 3385 | heads 3386 | 84 3387 | knights 3388 | thien 3389 | reason 3390 | consumer 3391 | 1918 3392 | tumultuous 3393 | burning 3394 | critic 3395 | joked 3396 | dublin 3397 | rigidity 3398 | secretly 3399 | northeast 3400 | wear 3401 | opposing 3402 | mccurry 3403 | outstretched 3404 | divine 3405 | strike 3406 | dresses 3407 | owned 3408 | directed 3409 | potential 3410 | visitor 3411 | reverend 3412 | entire 3413 | genuinely 3414 | seminary 3415 | bc 3416 | reduce 3417 | societies 3418 | sculptures 3419 | towering 3420 | girlfriend 3421 | recognises 3422 | scepticism 3423 | roads 3424 | 20-year 3425 | ease 3426 | inauguration 3427 | matters 3428 | fly 3429 | kompromat 3430 | unless 3431 | divided 3432 | store 3433 | second-largest 3434 | dream 3435 | hierarchy 3436 | albanian 3437 | 81 3438 | parades 3439 | apartheid 3440 | presided 3441 | visible 3442 | 72 3443 | addressing 3444 | sued 3445 | settlers 3446 | benefit 3447 | ritual 3448 | salzburg 3449 | motion 3450 | assisted 3451 | armenians 3452 | suspicious 3453 | sarajevo 3454 | difficulty 3455 | disappearance 3456 | sophia 3457 | familiar 3458 | distance 3459 | trigger 3460 | scale 3461 | 10-day 3462 | avoid 3463 | jean-marie 3464 | soup 3465 | organise 3466 | literature 3467 | favours 3468 | lawyer 3469 | et 3470 | 1980s 3471 | none 3472 | noting 3473 | mixed 3474 | taped 3475 | clean 3476 | wehrmacht 3477 | assured 3478 | briefing 3479 | exercising 3480 | palestinian 3481 | neuilly 3482 | effective 3483 | vittorio 3484 | bribes 3485 | operated 3486 | likes 3487 | flamboyant 3488 | exist 3489 | meditate 3490 | roger 3491 | governors 3492 | 68-year-old 3493 | equivalent 3494 | jokes 3495 | $1 3496 | emotionally 3497 | speculated 3498 | smaller 3499 | tender 3500 | tiny 3501 | increasing 3502 | fled 3503 | improvement 3504 | believers 3505 | parliamentary 3506 | lakeside 3507 | bikini-clad 3508 | spaniards 3509 | coffee 3510 | direct 3511 | indicated 3512 | secrecy 3513 | income 3514 | believing 3515 | businesses 3516 | chatterjee 3517 | whatever 3518 | cuts 3519 | write 3520 | villagers 3521 | albanian-born 3522 | belgium 3523 | nicole 3524 | unlikely 3525 | viii 3526 | harry 3527 | closet 3528 | prosecution 3529 | firing 3530 | joining 3531 | attract 3532 | s.k 3533 | peninsula 3534 | threatening 3535 | contract 3536 | ronald 3537 | leaves 3538 | magazines 3539 | centrist 3540 | creighton 3541 | remarrying 3542 | welcomed 3543 | egyptian 3544 | luther 3545 | bomb 3546 | dili 3547 | 450 3548 | bar 3549 | wheelchair 3550 | denying 3551 | non-violent 3552 | loins 3553 | lucinschi 3554 | impose 3555 | avoided 3556 | underground 3557 | whoever 3558 | repeated 3559 | pleased 3560 | josef 3561 | passionate 3562 | cafe 3563 | woody 3564 | knelt 3565 | lobby 3566 | disappeared 3567 | 22 3568 | opportunity 3569 | complicated 3570 | parade 3571 | acclaimed 3572 | ivory 3573 | schemer 3574 | teenage 3575 | worship 3576 | driving 3577 | barbara 3578 | greet 3579 | effectively 3580 | dalai 3581 | centred 3582 | pianist 3583 | connecticut 3584 | upset 3585 | transfer 3586 | intimate 3587 | sergei 3588 | morgan 3589 | elton 3590 | celebrity 3591 | slave 3592 | captain 3593 | dynasty 3594 | appearing 3595 | bride 3596 | religions 3597 | oliver 3598 | conditions 3599 | expressions 3600 | aitken 3601 | legendary 3602 | intense 3603 | equally 3604 | battling 3605 | convalescence 3606 | flurry 3607 | tapes 3608 | sleep 3609 | hanging 3610 | pending 3611 | kkk 3612 | honest 3613 | bolshevik 3614 | foremost 3615 | mondrian 3616 | esquivel 3617 | 1984 3618 | nervous 3619 | elder 3620 | impossible 3621 | cautious 3622 | note 3623 | potent 3624 | fun 3625 | worries 3626 | shouted 3627 | hao 3628 | coptic 3629 | naked 3630 | advised 3631 | shanghai 3632 | lawsuits 3633 | walter 3634 | listening 3635 | confidence 3636 | bosnia 3637 | refrain 3638 | sounds 3639 | literally 3640 | watched 3641 | ocean 3642 | interested 3643 | shaping 3644 | explaining 3645 | organisations 3646 | manuscript 3647 | ships 3648 | experiences 3649 | drama 3650 | sicilian 3651 | ensure 3652 | fraud 3653 | votes 3654 | sentiment 3655 | lesbianism 3656 | causing 3657 | carefully 3658 | suggesting 3659 | beatles 3660 | japanese-americans 3661 | hits 3662 | intolerance 3663 | backs 3664 | unstable 3665 | walters 3666 | salvi 3667 | changing 3668 | intend 3669 | hype 3670 | mentioned 3671 | rising 3672 | garage 3673 | horse 3674 | colonial 3675 | recovering 3676 | v 3677 | commissioner 3678 | doors 3679 | suggestion 3680 | passport 3681 | 44 3682 | ages 3683 | text 3684 | articles 3685 | programmes 3686 | civic 3687 | promoting 3688 | pratt 3689 | patron 3690 | hoffman 3691 | flying 3692 | demonstrations 3693 | hired 3694 | monarchist 3695 | herrera 3696 | eternal 3697 | worshipped 3698 | blow 3699 | halls 3700 | walks 3701 | frustration 3702 | thich 3703 | contains 3704 | uncle 3705 | unsolved 3706 | 4,000 3707 | banners 3708 | pure 3709 | paris-based 3710 | viewed 3711 | freedoms 3712 | stated 3713 | cell 3714 | mourn 3715 | directly 3716 | alarm 3717 | numerous 3718 | female 3719 | treating 3720 | okay 3721 | toast 3722 | rarely 3723 | improving 3724 | progressive 3725 | direction 3726 | mothers 3727 | occupied 3728 | augenthaler 3729 | gutenberg 3730 | pinatubo 3731 | treasure 3732 | enduring 3733 | sincerity 3734 | forgiveness 3735 | warn 3736 | shaped 3737 | patients 3738 | stick 3739 | feminist 3740 | 2,500 3741 | ironically 3742 | participating 3743 | partner 3744 | capture 3745 | appearances 3746 | delay 3747 | secular 3748 | 1961 3749 | chanted 3750 | division 3751 | boys 3752 | belonged 3753 | pledge 3754 | aside 3755 | bowed 3756 | assistant 3757 | otherwise 3758 | interior 3759 | tissue 3760 | pose 3761 | atmosphere 3762 | mysterious 3763 | incident 3764 | unfortunately 3765 | goldie 3766 | break-up 3767 | patrons 3768 | push 3769 | pays 3770 | halt 3771 | ex-husband 3772 | walking 3773 | provided 3774 | spirits 3775 | educational 3776 | heavily 3777 | revered 3778 | seventh 3779 | guidelines 3780 | mikhail 3781 | developing 3782 | 4 3783 | confessed 3784 | bonds 3785 | recurrence 3786 | assassin 3787 | seagal 3788 | les 3789 | formal 3790 | tissues 3791 | train 3792 | baptism 3793 | distributed 3794 | estranged 3795 | arden 3796 | gennady 3797 | fail 3798 | approach 3799 | sadness 3800 | internal 3801 | repair 3802 | desperation 3803 | detained 3804 | charming 3805 | pieces 3806 | foreigners 3807 | clarence 3808 | laughed 3809 | patronage 3810 | priority 3811 | combat 3812 | export 3813 | threw 3814 | portrait 3815 | eulogy 3816 | excellent 3817 | papa 3818 | shrugged 3819 | partisan 3820 | tea 3821 | fallen 3822 | bout 3823 | drove 3824 | blocked 3825 | trends 3826 | federation 3827 | hatred 3828 | 350 3829 | 1920 3830 | covering 3831 | slavery 3832 | shape 3833 | jozef 3834 | panel 3835 | disclose 3836 | location 3837 | analyst 3838 | remaining 3839 | successfully 3840 | awkward 3841 | globe 3842 | bloc 3843 | spree 3844 | photographed 3845 | weather 3846 | shock 3847 | entourage 3848 | nemtsov 3849 | vision 3850 | photographer 3851 | wadowice 3852 | resident 3853 | lesbians 3854 | degree 3855 | stunning 3856 | brings 3857 | bulk 3858 | hadeln 3859 | co-chairman 3860 | trials 3861 | ornate 3862 | cnn 3863 | replaced 3864 | luzhkov 3865 | wind 3866 | salute 3867 | partly 3868 | confronted 3869 | drop-out 3870 | locked 3871 | machel 3872 | outcome 3873 | oil 3874 | regards 3875 | mutiny 3876 | scrawled 3877 | target 3878 | eulogised 3879 | rests 3880 | dearest 3881 | turkey 3882 | proposed 3883 | geneva 3884 | nuncio 3885 | average 3886 | physical 3887 | arab 3888 | dole 3889 | basically 3890 | clock 3891 | environment 3892 | arranged 3893 | absent 3894 | bullet 3895 | studios 3896 | cable 3897 | bruce 3898 | ponomaryov 3899 | gentleman 3900 | alain 3901 | long-term 3902 | pantheon 3903 | telephoned 3904 | reminder 3905 | please 3906 | symbolic 3907 | demonstrating 3908 | unhappy 3909 | individuals 3910 | angered 3911 | relax 3912 | 74 3913 | tupelo 3914 | window 3915 | refuge 3916 | newsroom 3917 | stripes 3918 | denies 3919 | diminutive 3920 | muhammad 3921 | bidding 3922 | administering 3923 | urine 3924 | cheerful 3925 | oslo 3926 | cracking 3927 | cluedo 3928 | leads 3929 | honeymoon 3930 | s 3931 | subsidiaries 3932 | visibly 3933 | selfishness 3934 | blond 3935 | incision 3936 | nancy 3937 | racecourse 3938 | frenzy 3939 | oporto 3940 | ex-communist 3941 | equipment 3942 | gentle 3943 | interfax 3944 | admired 3945 | wars 3946 | pushed 3947 | notion 3948 | blocking 3949 | blaze 3950 | netherlands 3951 | integration 3952 | stripped 3953 | portman 3954 | slums 3955 | survey 3956 | fraction 3957 | 1944 3958 | o.j 3959 | huertas 3960 | grief 3961 | telegram 3962 | hearts 3963 | announcing 3964 | feted 3965 | improves 3966 | fascinated 3967 | tall 3968 | kempton 3969 | blacks 3970 | chair 3971 | talked 3972 | unveiled 3973 | priestly 3974 | outdoor 3975 | spared 3976 | generous 3977 | renew 3978 | kalyviotis 3979 | worry 3980 | possibly 3981 | awaited 3982 | extaordinary 3983 | jovial 3984 | possessions 3985 | marched 3986 | betrayed 3987 | finland 3988 | starring 3989 | withdraw 3990 | 68 3991 | performers 3992 | persuaded 3993 | desmond 3994 | wept 3995 | liberation 3996 | b.i.g 3997 | receive 3998 | british-born 3999 | succeeded 4000 | answer 4001 | sells 4002 | abortions 4003 | solitary 4004 | turin 4005 | anti-semitism 4006 | spanish-style 4007 | barring 4008 | defending 4009 | crosby 4010 | knowledge 4011 | permanent 4012 | reign 4013 | plunged 4014 | 18-year 4015 | 1968 4016 | featuring 4017 | smith 4018 | dear 4019 | gloucester 4020 | norman 4021 | aspects 4022 | management 4023 | isolated 4024 | agricultural 4025 | loving 4026 | passion 4027 | confirmation 4028 | sting 4029 | fiery 4030 | displayed 4031 | violently 4032 | atlantic 4033 | andre 4034 | disastrous 4035 | crowded 4036 | fiestas 4037 | witnesses 4038 | backdrop 4039 | appropriate 4040 | busy 4041 | buying 4042 | northwest 4043 | trading 4044 | philosophy 4045 | fur 4046 | tycoon 4047 | speculate 4048 | respond 4049 | strategic 4050 | encourage 4051 | tower 4052 | nobility 4053 | praising 4054 | il 4055 | allows 4056 | website 4057 | nomination 4058 | sharp 4059 | taste 4060 | replied 4061 | dividing 4062 | opposite 4063 | rosemary 4064 | haven 4065 | shy 4066 | results 4067 | thinks 4068 | puccini 4069 | nigeria 4070 | battery 4071 | dimona 4072 | cottage 4073 | drink 4074 | determine 4075 | sexy 4076 | create 4077 | videos 4078 | persecuted 4079 | london-based 4080 | offices 4081 | sit 4082 | breakdown 4083 | regret 4084 | judgment 4085 | christoff 4086 | novel 4087 | relationships 4088 | responded 4089 | trio 4090 | marxist 4091 | achievement 4092 | bright 4093 | chechen 4094 | ho 4095 | daughters 4096 | patriotic 4097 | languages 4098 | ballet 4099 | promotion 4100 | yastrzhembsky 4101 | apartment 4102 | lourdes 4103 | executed 4104 | brutality 4105 | naumov 4106 | descendants 4107 | costs 4108 | afraid 4109 | displaying 4110 | specialist 4111 | uprising 4112 | nightclubs 4113 | chronic 4114 | hat 4115 | races 4116 | concrete 4117 | vadim 4118 | servant 4119 | unpopular 4120 | 1,000-year-old 4121 | rivalry 4122 | maverick 4123 | gesture 4124 | separated 4125 | unemployment 4126 | value 4127 | dennis 4128 | attracted 4129 | 1937 4130 | comeback 4131 | remarriage 4132 | puts 4133 | investigations 4134 | samuel 4135 | smoking 4136 | mussolini 4137 | madras 4138 | briefs 4139 | flags 4140 | french-speaking 4141 | hawn 4142 | attraction 4143 | plot 4144 | conscience 4145 | attorneys 4146 | obvious 4147 | infant 4148 | fainted 4149 | dossetti 4150 | pro-catholic 4151 | madeleine 4152 | rituals 4153 | discussion 4154 | tune 4155 | waved 4156 | maseri 4157 | diplomats 4158 | rope 4159 | hideaway 4160 | function 4161 | destitutes 4162 | llakana 4163 | recipients 4164 | hamburg 4165 | meanwhile 4166 | beauty 4167 | attilio 4168 | domination 4169 | architectural 4170 | delays 4171 | noel 4172 | pool 4173 | alpine 4174 | dictionary 4175 | custody 4176 | museums 4177 | aristocrat 4178 | abuses 4179 | tenors 4180 | recovered 4181 | proof 4182 | forbidden 4183 | toll 4184 | stance 4185 | clerk 4186 | relief 4187 | nicolae 4188 | 1913 4189 | romantic 4190 | riding 4191 | falls 4192 | indians 4193 | column 4194 | notebook 4195 | fondly 4196 | prostitution 4197 | computer 4198 | easier 4199 | sharply 4200 | cane 4201 | nowhere 4202 | tent 4203 | 25,000 4204 | tennis 4205 | alfonso 4206 | crying 4207 | professional 4208 | christendom 4209 | roof 4210 | 35 4211 | beart 4212 | neighbourhoods 4213 | credited 4214 | profits 4215 | preparing 4216 | decline 4217 | guardian 4218 | lacking 4219 | frailty 4220 | object 4221 | mrta 4222 | unionists 4223 | nomadic 4224 | avid 4225 | dustin 4226 | albert 4227 | relaxed 4228 | heal 4229 | switched 4230 | speed 4231 | reunion 4232 | laughter 4233 | pioneered 4234 | annually 4235 | examinations 4236 | preparation 4237 | surely 4238 | engineer 4239 | sending 4240 | financed 4241 | challenges 4242 | real-life 4243 | missed 4244 | finest 4245 | affect 4246 | ninth 4247 | knife 4248 | hung 4249 | novelist 4250 | smoke 4251 | sentimental 4252 | tale 4253 | lawrence 4254 | cristina 4255 | 50-year-old 4256 | gruelling 4257 | rapidly 4258 | jailed 4259 | -------------------------------------------------------------------------------- /lda/tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, unicode_literals # noqa 3 | 4 | import oslotest.base 5 | 6 | import lda.datasets 7 | 8 | 9 | class TestDatasets(oslotest.base.BaseTestCase): 10 | 11 | def test_datasets(self): 12 | X = lda.datasets.load_reuters() 13 | self.assertEqual(X.shape, (395, 4258)) 14 | titles = lda.datasets.load_reuters_titles() 15 | self.assertEqual(len(titles), X.shape[0]) 16 | vocab = lda.datasets.load_reuters_vocab() 17 | self.assertEqual(len(vocab), X.shape[1]) 18 | -------------------------------------------------------------------------------- /lda/tests/test_lda.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, unicode_literals # noqa 3 | 4 | import numpy as np 5 | import oslotest.base 6 | 7 | import lda 8 | 9 | 10 | class TestLDA(oslotest.base.BaseTestCase): 11 | 12 | def test_lda_constructor(self): 13 | n_topics = 10 14 | model1 = lda.LDA(n_topics) 15 | self.assertIsNotNone(model1) 16 | model2 = lda.LDA(n_topics=n_topics) 17 | self.assertIsNotNone(model2) 18 | 19 | def test_lda_params(self): 20 | n_topics = 10 21 | model1 = lda.LDA(n_topics, alpha=0.3) 22 | self.assertIsNotNone(model1) 23 | model2 = lda.LDA(n_topics=n_topics, alpha=0.3, eta=0.4) 24 | self.assertIsNotNone(model2) 25 | self.assertRaises(ValueError, lda.LDA, n_topics, alpha=-3) 26 | self.assertRaises(ValueError, lda.LDA, n_topics, eta=-3) 27 | self.assertRaises(ValueError, lda.LDA, n_topics, alpha=-3, eta=-3) 28 | 29 | def test_lda_getting_started(self): 30 | X = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 8], [6, 1]]) 31 | model = lda.LDA(n_topics=2, n_iter=100, random_state=1) 32 | doc_topic = model.fit_transform(X) 33 | self.assertIsNotNone(doc_topic) 34 | self.assertIsNotNone(model.doc_topic_) 35 | self.assertIsNotNone(model.components_) 36 | 37 | def test_lda_loglikelihoods(self): 38 | X = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 8], [6, 1]]) 39 | model = lda.LDA(n_topics=2, n_iter=100, random_state=1) 40 | model.fit(X) 41 | self.assertGreater(len(model.loglikelihoods_), 1) 42 | -------------------------------------------------------------------------------- /lda/tests/test_lda_reuters.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, unicode_literals # noqa 3 | import os 4 | 5 | import numpy as np 6 | import oslotest.base 7 | 8 | import lda 9 | import lda.utils 10 | 11 | 12 | class TestLDANewsReuters(oslotest.base.BaseTestCase): 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | test_dir = os.path.dirname(__file__) 17 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 18 | cls.dtm = dtm = lda.utils.ldac2dtm(open(reuters_ldac_fn), offset=0) 19 | cls.n_iter = n_iter = 1 20 | cls.n_topics = n_topics = 10 21 | cls.random_seed = random_seed = 1 22 | cls.model = model = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 23 | cls.doc_topic = model.fit_transform(dtm) 24 | 25 | def test_lda_news(self): 26 | dtm = self.dtm 27 | doc_topic = self.doc_topic 28 | self.assertEqual(len(doc_topic), len(dtm)) 29 | 30 | def test_lda_attributes(self): 31 | dtm = self.dtm 32 | doc_topic = self.doc_topic 33 | model = self.model 34 | 35 | # check dims 36 | N = dtm.sum() 37 | D, V = dtm.shape 38 | _, K = doc_topic.shape 39 | self.assertEqual(model.doc_topic_.shape, doc_topic.shape) 40 | np.testing.assert_array_equal(model.doc_topic_, doc_topic) 41 | self.assertEqual(model.doc_topic_.shape, (D, K)) 42 | self.assertEqual(model.ndz_.shape, (D, K)) 43 | self.assertEqual(model.topic_word_.shape, (K, V)) 44 | self.assertEqual(model.nzw_.shape, (K, V)) 45 | 46 | # check contents 47 | self.assertAlmostEqual(model.nzw_.sum(), N) 48 | self.assertAlmostEqual(model.ndz_.sum(), N) 49 | self.assertAlmostEqual(model.nz_.sum(), N) 50 | self.assertAlmostEqual(model.doc_topic_.sum(), D) 51 | self.assertAlmostEqual(model.topic_word_.sum(), K) 52 | np.testing.assert_array_equal(model.ndz_.sum(axis=0), model.nz_) 53 | 54 | # check distributions sum to one 55 | np.testing.assert_array_almost_equal(model.doc_topic_.sum(axis=1), np.ones(D)) 56 | np.testing.assert_array_almost_equal(model.topic_word_.sum(axis=1), np.ones(K)) 57 | 58 | def test_lda_random_seed(self): 59 | dtm = self.dtm 60 | doc_topic = self.doc_topic 61 | n_iter = self.n_iter 62 | n_topics = self.n_topics 63 | random_seed = self.random_seed 64 | random_state = self.model.random_state 65 | 66 | # refit model with same random seed and verify results identical 67 | model_new = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 68 | rands_init = model_new._rands.copy() 69 | doc_topic_new = model_new.fit_transform(dtm) 70 | rands_fit = model_new._rands.copy() 71 | random_state_new = model_new.random_state 72 | np.testing.assert_array_equal(doc_topic_new, doc_topic) 73 | np.testing.assert_array_equal(random_state_new, random_state) 74 | 75 | # verify random variates are not changed 76 | np.testing.assert_array_equal(rands_init, rands_fit) 77 | 78 | def test_lda_monotone(self): 79 | dtm = self.dtm 80 | model = self.model 81 | n_topics = self.n_topics 82 | random_seed = self.random_seed 83 | 84 | # fit model with additional iterations, verify improvement in log likelihood 85 | n_iter = self.n_iter * 2 86 | model_new = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 87 | model_new.fit(dtm) 88 | self.assertGreater(model_new.loglikelihood(), model.loglikelihood()) 89 | 90 | def test_lda_zero_iter(self): 91 | dtm = self.dtm 92 | model = self.model 93 | doc_topic = self.doc_topic 94 | n_topics = self.n_topics 95 | random_seed = self.random_seed 96 | 97 | # fit a new model with 0 iterations 98 | n_iter = 0 99 | model_new = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 100 | doc_topic_new = model_new.fit_transform(dtm) 101 | self.assertIsNotNone(model_new) 102 | self.assertIsNotNone(doc_topic_new) 103 | self.assertLess(model_new.loglikelihood(), model.loglikelihood()) 104 | self.assertFalse((doc_topic_new == doc_topic).all()) 105 | -------------------------------------------------------------------------------- /lda/tests/test_lda_sparse.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, unicode_literals # noqa 3 | import os 4 | 5 | 6 | import numpy as np 7 | import oslotest.base 8 | import scipy.sparse 9 | 10 | import lda 11 | import lda.utils 12 | 13 | 14 | class TestLDASparse(oslotest.base.BaseTestCase): 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | test_dir = os.path.dirname(__file__) 19 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 20 | cls.dtm = scipy.sparse.csr_matrix(lda.utils.ldac2dtm(open(reuters_ldac_fn), offset=0)).astype(np.int64) 21 | cls.n_iter = n_iter = 1 22 | cls.n_topics = n_topics = 10 23 | cls.random_seed = random_seed = 1 24 | cls.model = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 25 | 26 | def test_lda_sparse(self): 27 | dtm = self.dtm 28 | model = self.model 29 | doc_topic = model.fit_transform(dtm) 30 | self.assertEqual(len(doc_topic), dtm.shape[0]) 31 | N = dtm.sum() 32 | D, V = dtm.shape 33 | _, K = doc_topic.shape 34 | self.assertEqual(model.doc_topic_.shape, doc_topic.shape) 35 | np.testing.assert_array_equal(model.doc_topic_, doc_topic) 36 | self.assertEqual(model.doc_topic_.shape, (D, K)) 37 | self.assertEqual(model.ndz_.shape, (D, K)) 38 | self.assertEqual(model.topic_word_.shape, (K, V)) 39 | self.assertEqual(model.nzw_.shape, (K, V)) 40 | 41 | # check contents 42 | self.assertAlmostEqual(model.nzw_.sum(), N) 43 | self.assertAlmostEqual(model.ndz_.sum(), N) 44 | self.assertAlmostEqual(model.nz_.sum(), N) 45 | self.assertAlmostEqual(model.doc_topic_.sum(), D) 46 | self.assertAlmostEqual(model.topic_word_.sum(), K) 47 | np.testing.assert_array_equal(model.ndz_.sum(axis=0), model.nz_) 48 | 49 | # check distributions sum to one 50 | np.testing.assert_array_almost_equal(model.doc_topic_.sum(axis=1), np.ones(D)) 51 | np.testing.assert_array_almost_equal(model.topic_word_.sum(axis=1), np.ones(K)) 52 | 53 | def test_lda_sparse_error_float(self): 54 | dtm = self.dtm.astype(float) 55 | model = self.model 56 | self.assertRaises(ValueError, model.transform, dtm) 57 | -------------------------------------------------------------------------------- /lda/tests/test_lda_transform.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import, unicode_literals # noqa 3 | import os 4 | 5 | import numpy as np 6 | import oslotest.base 7 | import scipy.sparse 8 | import scipy.stats 9 | 10 | import lda 11 | import lda.utils 12 | 13 | 14 | class TestLDATransform(oslotest.base.BaseTestCase): 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | test_dir = os.path.dirname(__file__) 19 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 20 | cls.dtm = dtm = lda.utils.ldac2dtm(open(reuters_ldac_fn), offset=0) 21 | cls.dtm_sparse = scipy.sparse.csr_matrix(dtm) 22 | cls.n_iter = n_iter = 400 23 | cls.n_topics = n_topics = 15 24 | cls.random_seed = random_seed = 1 25 | cls.model = model = lda.LDA(n_topics=n_topics, n_iter=n_iter, random_state=random_seed) 26 | cls.doc_topic = model.fit_transform(dtm) 27 | 28 | def test_lda_transform_null(self): 29 | """Evaluate transform by checking predicted doc_topic distribution 30 | 31 | In this case, our null hypothesis is that we are doing no better than 32 | picking at random from a fitted model and calculating the KL divergence. 33 | """ 34 | random_seed = self.random_seed 35 | model = self.model 36 | dtm = self.dtm 37 | doc_topic = self.doc_topic 38 | 39 | n_docs = 10 40 | assert n_docs < len(dtm) / 2 41 | dtm_test = dtm[:n_docs] 42 | doc_topic_test_true = doc_topic[:n_docs] 43 | doc_topic_test = model.transform(dtm_test) 44 | 45 | S = 2000 46 | kl_div_dist = np.empty(S) 47 | np.random.seed(random_seed) 48 | for s in range(S): 49 | # scipy.stats.entropy(p, q) calculates Kullback-Leibler divergence 50 | kl_div_dist[s] = scipy.stats.entropy(doc_topic_test_true[np.random.choice(len(doc_topic_test_true))], 51 | doc_topic[np.random.choice(len(doc_topic))]) 52 | quantiles = scipy.stats.mstats.mquantiles(kl_div_dist, prob=np.linspace(0, 1, 500, endpoint=False)) 53 | 54 | for p, q in zip(doc_topic_test_true, doc_topic_test): 55 | kl_div = scipy.stats.entropy(p, q) 56 | quantile = np.searchsorted(quantiles, kl_div) / len(quantiles) 57 | self.assertLess(quantile, 0.05) 58 | 59 | def test_lda_transform_basic(self): 60 | """Basic checks on transform""" 61 | model = self.model 62 | dtm = self.dtm 63 | 64 | n_docs = 3 65 | n_topics = len(model.components_) 66 | dtm_test = dtm[0:n_docs] 67 | doc_topic_test = model.transform(dtm_test) 68 | self.assertEqual(doc_topic_test.shape, (n_docs, n_topics)) 69 | np.testing.assert_array_almost_equal(doc_topic_test.sum(axis=1), 1) 70 | 71 | # one document 72 | dtm_test = dtm[0] 73 | doc_topic_test = model.transform(dtm_test) 74 | self.assertEqual(doc_topic_test.shape, (1, n_topics)) 75 | np.testing.assert_array_almost_equal(doc_topic_test.sum(axis=1), 1) 76 | 77 | def test_lda_transform_basic_sparse(self): 78 | """Basic checks on transform""" 79 | model = self.model 80 | dtm = self.dtm_sparse 81 | 82 | n_docs = 3 83 | n_topics = len(model.components_) 84 | dtm_test = dtm[0:n_docs] 85 | doc_topic_test = model.transform(dtm_test) 86 | self.assertEqual(doc_topic_test.shape, (n_docs, n_topics)) 87 | np.testing.assert_array_almost_equal(doc_topic_test.sum(axis=1), 1) 88 | 89 | # one document 90 | dtm_test = dtm[0] 91 | doc_topic_test = model.transform(dtm_test) 92 | self.assertEqual(doc_topic_test.shape, (1, n_topics)) 93 | np.testing.assert_array_almost_equal(doc_topic_test.sum(axis=1), 1) 94 | -------------------------------------------------------------------------------- /lda/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, unicode_literals # noqa 2 | 3 | import io 4 | import os 5 | 6 | import numpy as np 7 | import oslotest.base 8 | import scipy.sparse 9 | 10 | import lda.utils as utils 11 | 12 | 13 | class TestUtils(oslotest.base.BaseTestCase): 14 | 15 | np.random.seed(99) 16 | 17 | D = 100 18 | W = 50 19 | N_WORDS_PER_DOC = 500 20 | N = W * N_WORDS_PER_DOC 21 | dtm = np.zeros((D, W), dtype=int) 22 | for d in range(D): 23 | dtm[d] = np.random.multinomial(N_WORDS_PER_DOC, np.ones(W) / W) 24 | dtm_sparse = scipy.sparse.csr_matrix(dtm) 25 | N_BY_W = np.sum(dtm, axis=0) 26 | N_BY_D = np.sum(dtm, axis=1) 27 | 28 | def test_setup(self): 29 | dtm, D, N_WORDS_PER_DOC = self.dtm, self.D, self.N_WORDS_PER_DOC 30 | self.assertEqual(np.sum(dtm), D * N_WORDS_PER_DOC) 31 | 32 | def test_matrix_to_lists(self): 33 | dtm, D, N_WORDS_PER_DOC = self.dtm, self.D, self.N_WORDS_PER_DOC 34 | N_BY_D, N_BY_W = self.N_BY_D, self.N_BY_W 35 | WS, DS = utils.matrix_to_lists(dtm) 36 | self.assertEqual(len(WS), D * N_WORDS_PER_DOC) 37 | self.assertEqual(len(WS), len(DS)) 38 | self.assertEqual(dtm.shape, (max(DS) + 1, max(WS) + 1)) 39 | self.assertTrue(all(DS == sorted(DS))) 40 | self.assertTrue(np.all(np.bincount(DS) == N_BY_D)) 41 | self.assertTrue(np.all(np.bincount(WS) == N_BY_W)) 42 | 43 | def test_matrix_row_to_lists(self): 44 | dtm = self.dtm 45 | N = sum(dtm[0]) 46 | 47 | WS, DS = utils.matrix_to_lists(dtm) 48 | WS_row, DS_row = utils.matrix_to_lists(np.atleast_2d(dtm[0])) 49 | 50 | np.testing.assert_array_equal(WS_row, WS[:N]) 51 | np.testing.assert_array_equal(DS_row, DS[:N]) 52 | 53 | def test_matrix_rows_to_lists(self): 54 | dtm = self.dtm 55 | rows = dtm[0:2] 56 | N = rows.ravel().sum() 57 | 58 | WS, DS = utils.matrix_to_lists(dtm) 59 | WS_rows, DS_rows = utils.matrix_to_lists(rows) 60 | 61 | np.testing.assert_array_equal(WS_rows, WS[:N]) 62 | np.testing.assert_array_equal(DS_rows, DS[:N]) 63 | 64 | def test_matrix_row_to_lists_sparse(self): 65 | dtm = self.dtm_sparse 66 | N = dtm[0].sum() 67 | 68 | WS, DS = utils.matrix_to_lists(dtm) 69 | WS_row, DS_row = utils.matrix_to_lists(dtm[0]) 70 | 71 | np.testing.assert_array_equal(WS_row, WS[:N]) 72 | np.testing.assert_array_equal(DS_row, DS[:N]) 73 | 74 | def test_matrix_rows_to_lists_sparse(self): 75 | dtm = self.dtm_sparse 76 | rows = dtm[0:2] 77 | N = rows.sum() 78 | 79 | WS, DS = utils.matrix_to_lists(dtm) 80 | WS_rows, DS_rows = utils.matrix_to_lists(rows) 81 | 82 | np.testing.assert_array_equal(WS_rows, WS[:N]) 83 | np.testing.assert_array_equal(DS_rows, DS[:N]) 84 | 85 | def test_lists_to_matrix(self): 86 | dtm = self.dtm 87 | WS, DS = utils.matrix_to_lists(dtm) 88 | dtm_new = utils.lists_to_matrix(WS, DS) 89 | self.assertTrue(np.all(dtm == dtm_new)) 90 | 91 | def test_ldac2dtm_offset(self): 92 | test_dir = os.path.dirname(__file__) 93 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 94 | self.assertRaises(ValueError, utils.ldac2dtm, open(reuters_ldac_fn), offset=1) 95 | 96 | def test_ldac2dtm(self): 97 | test_dir = os.path.dirname(__file__) 98 | reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') 99 | dtm = utils.ldac2dtm(open(reuters_ldac_fn)) 100 | self.assertEqual(dtm.shape, (395, 4258)) 101 | self.assertEqual(dtm.sum(), 84010) 102 | 103 | def test_ldac_conversion(self): 104 | dtm = self.dtm 105 | N, V = dtm.shape 106 | doclines = list(utils.dtm2ldac(self.dtm)) 107 | nd_unique = np.sum(dtm > 0, axis=1) 108 | for n, docline in zip(nd_unique, doclines): 109 | self.assertEqual(n, int(docline.split(' ')[0])) 110 | self.assertEqual(len(doclines), N) 111 | f = io.StringIO('\n'.join(doclines)) 112 | dtm_new = utils.ldac2dtm(f) 113 | self.assertTrue(np.all(dtm == dtm_new)) 114 | 115 | def test_lists_to_matrix_sparse(self): 116 | dtm = self.dtm_sparse 117 | WS, DS = utils.matrix_to_lists(dtm) 118 | dtm_new = utils.lists_to_matrix(WS, DS) 119 | self.assertTrue(np.all(dtm == dtm_new)) 120 | 121 | def test_ldac_conversion_sparse(self): 122 | dtm = self.dtm 123 | dtm_sparse = self.dtm_sparse 124 | N, V = dtm.shape 125 | doclines = list(utils.dtm2ldac(dtm_sparse)) 126 | nd_unique = np.sum(dtm > 0, axis=1) 127 | for n, docline in zip(nd_unique, doclines): 128 | self.assertEqual(n, int(docline.split(' ')[0])) 129 | self.assertEqual(len(doclines), N) 130 | f = io.StringIO('\n'.join(doclines)) 131 | dtm_new = utils.ldac2dtm(f) 132 | self.assertTrue(np.all(dtm == dtm_new)) 133 | -------------------------------------------------------------------------------- /lda/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals # noqa 2 | 3 | import logging 4 | import numbers 5 | import sys 6 | 7 | import numpy as np 8 | 9 | PY2 = sys.version_info[0] == 2 10 | if PY2: 11 | import itertools 12 | zip = itertools.izip 13 | 14 | 15 | logger = logging.getLogger('lda') 16 | 17 | 18 | def check_random_state(seed): 19 | if seed is None: 20 | # i.e., use existing RandomState 21 | return np.random.mtrand._rand 22 | if isinstance(seed, (numbers.Integral, np.integer)): 23 | return np.random.RandomState(seed) 24 | if isinstance(seed, np.random.RandomState): 25 | return seed 26 | raise ValueError("{} cannot be used as a random seed.".format(seed)) 27 | 28 | 29 | def matrix_to_lists(doc_word): 30 | """Convert a (sparse) matrix of counts into arrays of word and doc indices 31 | 32 | Parameters 33 | ---------- 34 | doc_word : array or sparse matrix (D, V) 35 | document-term matrix of counts 36 | 37 | Returns 38 | ------- 39 | (WS, DS) : tuple of two arrays 40 | WS[k] contains the kth word in the corpus 41 | DS[k] contains the document index for the kth word 42 | 43 | """ 44 | if np.count_nonzero(doc_word.sum(axis=1)) != doc_word.shape[0]: 45 | logger.warning("all zero row in document-term matrix found") 46 | if np.count_nonzero(doc_word.sum(axis=0)) != doc_word.shape[1]: 47 | logger.warning("all zero column in document-term matrix found") 48 | sparse = True 49 | try: 50 | # if doc_word is a scipy sparse matrix 51 | doc_word = doc_word.copy().tolil() 52 | except AttributeError: 53 | sparse = False 54 | 55 | if sparse and not np.issubdtype(doc_word.dtype, np.integer): 56 | raise ValueError("expected sparse matrix with integer values, found float values") 57 | 58 | ii, jj = np.nonzero(doc_word) 59 | if sparse: 60 | ss = tuple(doc_word[i, j] for i, j in zip(ii, jj)) 61 | else: 62 | ss = doc_word[ii, jj] 63 | 64 | DS = np.repeat(ii, ss).astype(np.intc) 65 | WS = np.repeat(jj, ss).astype(np.intc) 66 | return WS, DS 67 | 68 | 69 | def lists_to_matrix(WS, DS): 70 | """Convert array of word (or topic) and document indices to doc-term array 71 | 72 | Parameters 73 | ----------- 74 | (WS, DS) : tuple of two arrays 75 | WS[k] contains the kth word in the corpus 76 | DS[k] contains the document index for the kth word 77 | 78 | Returns 79 | ------- 80 | doc_word : array (D, V) 81 | document-term array of counts 82 | 83 | """ 84 | D = max(DS) + 1 85 | V = max(WS) + 1 86 | doc_word = np.zeros((D, V), dtype=np.intc) 87 | indices, counts = np.unique(list(zip(DS, WS)), axis=0, return_counts=True) 88 | doc_word[indices[:, 0], indices[:, 1]] += counts 89 | return doc_word 90 | 91 | 92 | def dtm2ldac(dtm, offset=0): 93 | """Convert a document-term matrix into an LDA-C formatted file 94 | 95 | Parameters 96 | ---------- 97 | dtm : array of shape N,V 98 | 99 | Returns 100 | ------- 101 | doclines : iterable of LDA-C lines suitable for writing to file 102 | 103 | Notes 104 | ----- 105 | If a format similar to SVMLight is desired, `offset` of 1 may be used. 106 | """ 107 | try: 108 | dtm = dtm.tocsr() 109 | except AttributeError: 110 | pass 111 | assert np.issubdtype(dtm.dtype, np.integer) 112 | n_rows = dtm.shape[0] 113 | for i, row in enumerate(dtm): 114 | try: 115 | row = row.toarray().squeeze() 116 | except AttributeError: 117 | pass 118 | unique_terms = np.count_nonzero(row) 119 | if unique_terms == 0: 120 | raise ValueError("dtm row {} has all zero entries.".format(i)) 121 | term_cnt_pairs = [(i + offset, cnt) for i, cnt in enumerate(row) if cnt > 0] 122 | docline = str(unique_terms) + ' ' 123 | docline += ' '.join(["{}:{}".format(i, cnt) for i, cnt in term_cnt_pairs]) 124 | if (i + 1) % 1000 == 0: 125 | logger.info("dtm2ldac: on row {} of {}".format(i + 1, n_rows)) 126 | yield docline 127 | 128 | 129 | def ldac2dtm(stream, offset=0): 130 | """Convert an LDA-C formatted file to a document-term array 131 | 132 | Parameters 133 | ---------- 134 | stream: file object 135 | File yielding unicode strings in LDA-C format. 136 | 137 | Returns 138 | ------- 139 | dtm : array of shape N,V 140 | 141 | Notes 142 | ----- 143 | If a format similar to SVMLight is the source, an `offset` of 1 may be used. 144 | """ 145 | doclines = stream 146 | 147 | # We need to figure out the dimensions of the dtm. 148 | N = 0 149 | V = -1 150 | data = [] 151 | for l in doclines: # noqa 152 | l = l.strip() # noqa 153 | # skip empty lines 154 | if not l: 155 | continue 156 | unique_terms = int(l.split(' ')[0]) 157 | term_cnt_pairs = [s.split(':') for s in l.split(' ')[1:]] 158 | for v, _ in term_cnt_pairs: 159 | # check that format is indeed LDA-C with the appropriate offset 160 | if int(v) == 0 and offset == 1: 161 | raise ValueError("Indexes in LDA-C are offset 1") 162 | term_cnt_pairs = tuple((int(v) - offset, int(cnt)) for v, cnt in term_cnt_pairs) 163 | np.testing.assert_equal(unique_terms, len(term_cnt_pairs)) 164 | V = max(V, *[v for v, cnt in term_cnt_pairs]) 165 | data.append(term_cnt_pairs) 166 | N += 1 167 | V = V + 1 168 | dtm = np.zeros((N, V), dtype=np.intc) 169 | for i, doc in enumerate(data): 170 | for v, cnt in doc: 171 | np.testing.assert_equal(dtm[i, v], 0) 172 | dtm[i, v] = cnt 173 | return dtm 174 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('lda', 'cython', 'c') 2 | 3 | py = import('python').find_installation(pure: false) 4 | 5 | py.extension_module( 6 | '_lda', 7 | files('lda/_lda.pyx', 'lda/gamma.c'), 8 | dependencies : py.dependency(), 9 | include_directories : include_directories('lda'), 10 | install : true, 11 | install_dir : meson.source_root() / 'lda', 12 | ) 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "lda" 3 | version = "3.0.2" 4 | description = "Topic modeling with latent Dirichlet allocation" 5 | authors = ["lda developers "] 6 | license = "MPL 2.0" 7 | readme = "README.rst" 8 | classifiers = [ 9 | "Development Status :: 4 - Beta", 10 | "Intended Audience :: Information Technology", 11 | "Intended Audience :: Developers", 12 | "Intended Audience :: Science/Research", 13 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 14 | "Programming Language :: C", 15 | "Programming Language :: Cython", 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Operating System :: MacOS", 22 | "Operating System :: Microsoft :: Windows", 23 | "Operating System :: POSIX", 24 | "Operating System :: Unix", 25 | ] 26 | include = [ 27 | { path = "meson.build", format = "sdist" }, 28 | { path = "lda/*.so", format = "wheel" }, 29 | { path = "lda/*.pyd", format = "wheel"}, 30 | ] 31 | 32 | [tool.poetry.build] 33 | script = "build.py" 34 | 35 | [tool.poetry.dependencies] 36 | python = ">=3.10" 37 | numpy = ">=1.13.0" 38 | 39 | [tool.poetry.group.dev.dependencies] 40 | cython = "^3.0.8" 41 | scipy = "^1.11.3" 42 | sphinx = "^7.2.6" 43 | sphinx-autoapi = "^3.0.0" 44 | hacking = "^6.0.1" 45 | coverage = "^7.3.2" 46 | discover = "^0.4.0" 47 | python-subunit = "^1.4.3" 48 | oslosphinx = "^4.18.0" 49 | oslotest = "^4.5.0" 50 | testrepository = "^0.0.20" 51 | testscenarios = "^0.5.0" 52 | testtools = "^2.6.0" 53 | numpydoc = "^1.6.0" 54 | sphinx-rtd-theme = "^1.3.0" 55 | flake8 = "^5.0.1" 56 | 57 | [tool.poetry.group.build.dependencies] 58 | meson-python = "^0.14.0" 59 | cython = "^3.0.8" 60 | ninja = "^1.11" 61 | cibuildwheel = "^2.16" 62 | 63 | [tool.cibuildwheel] 64 | build = "cp310-* cp311-* cp312-*" 65 | skip = ["pp*", "*i686", "*win32"] 66 | test-requires = ["oslotest", "scipy", "setuptools"] 67 | test-command = "python -m unittest discover -s {project}/lda/tests" 68 | 69 | [build-system] 70 | requires = ["poetry-core", "cython", "meson-python", "ninja"] 71 | build-backend = "poetry.core.masonry.api" 72 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | minversion = 1.6 3 | envlist = py35,py34,py33,py27,pep8 4 | skipsdist = True 5 | 6 | [testenv] 7 | usedevelop = True 8 | install_command = pip install -U {opts} {packages} 9 | setenv = 10 | VIRTUAL_ENV={envdir} 11 | deps = -r{toxinidir}/requirements.txt 12 | -r{toxinidir}/test-requirements.txt 13 | whitelist_externals = make 14 | commands = make cython 15 | python setup.py testr --slowest --testr-args='{posargs}' 16 | 17 | [testenv:pep8] 18 | commands = flake8 19 | 20 | [testenv:venv] 21 | commands = {posargs} 22 | 23 | [testenv:cover] 24 | commands = python setup.py testr --coverage --testr-args='{posargs}' 25 | 26 | [testenv:docs] 27 | commands = python setup.py build_sphinx 28 | 29 | [flake8] 30 | # H803 skipped on purpose per list discussion. 31 | # E123, E125 skipped as they are invalid PEP-8. 32 | 33 | show-source = True 34 | max-line-length = 119 35 | select = C,E,F,W,B 36 | ignore = E501,E203,W503 37 | exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build 38 | --------------------------------------------------------------------------------