├── metaknowledge ├── WOS │ ├── tagProcessing │ │ ├── __init__.py │ │ ├── helpFuncs.py │ │ └── funcDicts.py │ ├── __init__.py │ └── wosHandlers.py ├── proquest │ ├── tagProcessing │ │ ├── __init__.py │ │ ├── specialFunctions.py │ │ └── tagFunctions.py │ └── __init__.py ├── scopus │ ├── tagProcessing │ │ ├── __init__.py │ │ ├── specialFunctions.py │ │ └── tagFunctions.py │ ├── __init__.py │ └── scopusHandlers.py ├── medline │ ├── tagProcessing │ │ ├── __init__.py │ │ ├── specialFunctions.py │ │ └── tagNames.py │ ├── __init__.py │ └── medlineHandlers.py ├── tests │ ├── __init__.py │ ├── nsfTestFiles │ │ ├── source.md │ │ ├── 6800031.xml │ │ ├── 6800030.xml │ │ ├── 6800097.xml │ │ ├── 6800025.xml │ │ ├── noID.xml │ │ ├── 6800104.xml │ │ ├── 6800077.xml │ │ ├── 69W3551.xml │ │ ├── 69W3546.xml │ │ ├── 69W3548.xml │ │ ├── 69W3550.xml │ │ ├── 69W3547.xml │ │ ├── 69W3549.xml │ │ ├── badXMLfile.xml │ │ ├── 6800039.xml │ │ ├── 1500217.xml │ │ ├── 1500219.xml │ │ ├── 1500201.xml │ │ ├── twoAwardFile.xml │ │ ├── 1500216.xml │ │ ├── 1500186.xml │ │ └── 1500194.xml │ ├── isitopythonHelper.py │ ├── SimplePaper.isi │ ├── test_constants.py │ ├── test_proquest.py │ ├── test_wos.py │ ├── test_grants.py │ ├── OnePaperNoCites.isi │ ├── test_medline.py │ ├── test_scopus.py │ ├── test_citation.py │ ├── test_grantCollection.py │ ├── test_diffusion.py │ ├── TwoPaper.isi │ ├── test_graphhelpers.py │ └── OnePaper2.isi ├── genders │ ├── __init__.py │ └── nameGender.py ├── bin │ ├── __init__.py │ └── metaknowledgeMdToNb.py ├── contour │ └── __init__.py ├── constants.py ├── grants │ ├── scopusGrant.py │ ├── medlineGrant.py │ ├── cihrGrant.py │ ├── __init__.py │ └── nsercGrant.py ├── journalAbbreviations │ └── __init__.py ├── fileHandlers.py ├── mkExceptions.py ├── RCglimpse.py └── __init__.py ├── setup.cfg ├── vagrant ├── win_run.cmd ├── linux_run.sh ├── updates.sh ├── mac_run ├── Vagrantfile └── bootstrap ├── docs ├── requirements.txt ├── documentation │ ├── classes │ │ ├── CIHRGrant.rst │ │ ├── Citation.rst │ │ ├── Record.rst │ │ ├── NSFGrant.rst │ │ ├── ExtendedRecord.rst │ │ ├── NSERCGrant.rst │ │ ├── WOSRecord.rst │ │ ├── FallbackGrant.rst │ │ ├── MedlineGrant.rst │ │ ├── Collection.rst │ │ ├── ScopusRecord.rst │ │ ├── CollectionWithIDs.rst │ │ ├── MedlineRecord.rst │ │ ├── GrantCollection.rst │ │ ├── ProQuestRecord.rst │ │ ├── RecordCollection.rst │ │ ├── Grant.rst │ │ └── index.rst │ ├── exceptions │ │ └── index.rst │ ├── modules │ │ ├── index.rst │ │ ├── contour.rst │ │ ├── journalAbbreviations.rst │ │ ├── scopus.rst │ │ ├── medline.rst │ │ ├── proquest.rst │ │ ├── WOS.rst │ │ └── grants.rst │ ├── index.rst │ ├── example.rst │ ├── overview.rst │ └── functions_methods │ │ └── index.rst ├── Makefile ├── make.bat ├── examples │ ├── index.rst │ ├── Getting-Started.ipynb │ └── Reading-Files.ipynb ├── mkdsupport.py ├── CLI.rst └── index.rst ├── README.md ├── .github └── workflows │ └── python-publish.yml ├── notebooks ├── Lesson-2-Reading-Files │ ├── Reading-Files.md │ └── Reading-Files.ipynb └── Lesson-1-Getting-Started │ ├── Getting-Started.md │ └── Getting-Started.ipynb ├── .gitignore ├── inheritance-structure.dot └── setup.py /metaknowledge/WOS/tagProcessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /metaknowledge/proquest/tagProcessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /metaknowledge/scopus/tagProcessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /metaknowledge/medline/tagProcessing/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /vagrant/win_run.cmd: -------------------------------------------------------------------------------- 1 | @Echo OFF 2 | vagrant up 3 | start "" http://localhost:1159 4 | pause 5 | -------------------------------------------------------------------------------- /vagrant/linux_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | cd "$(dirname "$0")" 4 | 5 | vagrant up 6 | vagrant ssh 7 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | nbsphinx 2 | pypandoc 3 | cloud_sptheme 4 | sphinx==1.8.2 5 | sphinx_rtd_theme 6 | ipython -------------------------------------------------------------------------------- /vagrant/updates.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "Updating" 4 | echo "No updates found" 5 | 6 | exit 0 7 | -------------------------------------------------------------------------------- /metaknowledge/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | -------------------------------------------------------------------------------- /vagrant/mac_run: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | cd "$(dirname "$0")" 4 | 5 | vagrant up 6 | sleep 1 7 | open http://localhost:1159 8 | tput bel 9 | vagrant ssh 10 | -------------------------------------------------------------------------------- /docs/documentation/classes/CIHRGrant.rst: -------------------------------------------------------------------------------- 1 | CIHRGrant(Grant) 2 | ================ 3 | 4 | 5 | 6 | .. automodule:: metaknowledge.grants.cihrGrant 7 | :members: 8 | :undoc-members: 9 | -------------------------------------------------------------------------------- /docs/documentation/classes/Citation.rst: -------------------------------------------------------------------------------- 1 | Citation(Hashable) 2 | ================== 3 | 4 | 5 | .. automodule:: metaknowledge.citation 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /metaknowledge/genders/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | from .nameGender import nameStringGender, recordGenders, downloadData, getMapping 3 | -------------------------------------------------------------------------------- /docs/documentation/classes/Record.rst: -------------------------------------------------------------------------------- 1 | Record(Mapping, Hashable) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.Record 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/source.md: -------------------------------------------------------------------------------- 1 | Theses files are originally from [https://www.nsf.gov/awardsearch/download.jsp](https://www.nsf.gov/awardsearch/download.jsp) and have been modified to aid testing. 2 | -------------------------------------------------------------------------------- /docs/documentation/classes/NSFGrant.rst: -------------------------------------------------------------------------------- 1 | NSFGrant(Grant) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.grants.NSFGrant 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/exceptions/index.rst: -------------------------------------------------------------------------------- 1 | Exceptions 2 | ========== 3 | 4 | The exceptions defined by *metaknowledge* are: 5 | 6 | .. automodule:: metaknowledge.mkExceptions 7 | :members: 8 | :undoc-members: 9 | -------------------------------------------------------------------------------- /docs/documentation/classes/ExtendedRecord.rst: -------------------------------------------------------------------------------- 1 | ExtendedRecord(Record) 2 | ====================== 3 | 4 | 5 | .. autoclass:: metaknowledge.ExtendedRecord 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/NSERCGrant.rst: -------------------------------------------------------------------------------- 1 | NSERCGrant(Grant) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.grants.NSERCGrant 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/WOSRecord.rst: -------------------------------------------------------------------------------- 1 | WOSRecord(ExtendedRecord) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.WOS.WOSRecord 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/FallbackGrant.rst: -------------------------------------------------------------------------------- 1 | FallbackGrant(Grant) 2 | ====================== 3 | 4 | 5 | .. autoclass:: metaknowledge.grants.FallbackGrant 6 | :members: 7 | :special-members: 8 | :private-members: 9 | -------------------------------------------------------------------------------- /docs/documentation/classes/MedlineGrant.rst: -------------------------------------------------------------------------------- 1 | MedlineGrant(Grant) 2 | ================================== 3 | 4 | 5 | .. autoclass:: metaknowledge.MedlineGrant 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/modules/index.rst: -------------------------------------------------------------------------------- 1 | Modules 2 | ******* 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | contour 8 | grants 9 | journalAbbreviations 10 | medline 11 | proquest 12 | scopus 13 | WOS -------------------------------------------------------------------------------- /docs/documentation/classes/Collection.rst: -------------------------------------------------------------------------------- 1 | Collection(MutableSet, Hashable) 2 | ================================ 3 | 4 | 5 | .. autoclass:: metaknowledge.Collection 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/ScopusRecord.rst: -------------------------------------------------------------------------------- 1 | ScopusRecord(ExtendedRecord) 2 | ============================ 3 | 4 | 5 | .. autoclass:: metaknowledge.scopus.ScopusRecord 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/CollectionWithIDs.rst: -------------------------------------------------------------------------------- 1 | CollectionWithIDs(Collection) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.CollectionWithIDs 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/MedlineRecord.rst: -------------------------------------------------------------------------------- 1 | MedlineRecord(ExtendedRecord) 2 | ============================= 3 | 4 | 5 | .. autoclass:: metaknowledge.medline.MedlineRecord 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/modules/contour.rst: -------------------------------------------------------------------------------- 1 | contour 2 | ======= 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.contour 7 | 8 | Functions 9 | --------- 10 | .. automodule:: metaknowledge.contour.plotting 11 | :members: -------------------------------------------------------------------------------- /docs/documentation/classes/GrantCollection.rst: -------------------------------------------------------------------------------- 1 | GrantCollection(CollectionWithIDs) 2 | ================================== 3 | 4 | 5 | .. autoclass:: metaknowledge.GrantCollection 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/ProQuestRecord.rst: -------------------------------------------------------------------------------- 1 | ProQuestRecord(ExtendedRecord) 2 | ============================== 3 | 4 | 5 | .. autoclass:: metaknowledge.proquest.ProQuestRecord 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /metaknowledge/bin/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | from .metaknowledgeCLI import mkCLI 3 | from .metaknowledgeMdToNb import mkMdToNb 4 | from .metaknowledgeDocsGen import mkDocs 5 | -------------------------------------------------------------------------------- /docs/documentation/classes/RecordCollection.rst: -------------------------------------------------------------------------------- 1 | RecordCollection(CollectionWithIDs) 2 | =================================== 3 | 4 | 5 | .. autoclass:: metaknowledge.RecordCollection 6 | :members: 7 | :special-members: 8 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/classes/Grant.rst: -------------------------------------------------------------------------------- 1 | Grant(Record, MutableMapping) 2 | ============================= 3 | 4 | .. automodule:: metaknowledge.grants 5 | 6 | .. autoclass:: metaknowledge.grants.Grant 7 | :members: 8 | :special-members: 9 | :private-members: -------------------------------------------------------------------------------- /docs/documentation/index.rst: -------------------------------------------------------------------------------- 1 | Documentation 2 | ************* 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | 9 | example 10 | overview 11 | modules/index 12 | classes/index 13 | functions_methods/index 14 | exceptions/index -------------------------------------------------------------------------------- /docs/documentation/modules/journalAbbreviations.rst: -------------------------------------------------------------------------------- 1 | journalAbbreviations 2 | ========================================== 3 | 4 | 5 | Overview 6 | --------------- 7 | .. automodule:: metaknowledge.journalAbbreviations 8 | 9 | Functions 10 | --------- 11 | .. automodule:: metaknowledge.journalAbbreviations.backend 12 | :members: 13 | -------------------------------------------------------------------------------- /metaknowledge/tests/isitopythonHelper.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import metaknowledge 3 | 4 | if __name__ == "__main__": 5 | rlst = metaknowledge.wosParser("testFile.isi") 6 | s = '[' 7 | for R in rlst: 8 | s +=(str(R.__getstate__()) + ',\n') 9 | s += ']' 10 | print(s) 11 | -------------------------------------------------------------------------------- /docs/documentation/classes/index.rst: -------------------------------------------------------------------------------- 1 | Classes 2 | ******* 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | CIHRGrant 8 | Citation 9 | Collection 10 | CollectionWithIDs 11 | ExtendedRecord 12 | FallbackGrant 13 | Grant 14 | GrantCollection 15 | MedlineGrant 16 | MedlineRecord 17 | NSERCGrant 18 | NSFGrant 19 | ProQuestRecord 20 | Record 21 | RecordCollection 22 | ScopusRecord 23 | WOSRecord -------------------------------------------------------------------------------- /metaknowledge/tests/SimplePaper.isi: -------------------------------------------------------------------------------- 1 | PT J 2 | AU John, D 3 | AF John, Doe 4 | TI Example Paper 5 | SO TOPICS IN COGNITIVE SCIENCE 6 | LA English 7 | DT Article 8 | DE Example; testing 9 | ID REAL; TIME 10 | AB This is a test. 11 | C1 UW, Ontario, Canada. 12 | RP John, D (reprint author), UW, Ontario, Canada. 13 | CR John D., 1999, TOPICS IN COGNITIVE SCIENCE 14 | J9 EXAMPLE 15 | JI examaple 16 | PD APR 17 | PY 2015 18 | UT WOS:123317623000007 19 | ER 20 | -------------------------------------------------------------------------------- /metaknowledge/contour/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | """Two functions based on _matplotlib_ for generating nicer looking graphs 3 | 4 | This is the only module that depends on anything besides _networkx_, it depends on [numpy](http://www.numpy.org/), [scipy](https://www.scipy.org/) and [matplotlib](http://matplotlib.org/). 5 | """ 6 | 7 | from .plotting import graphDensityContourPlot, quickVisual 8 | -------------------------------------------------------------------------------- /metaknowledge/medline/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge. 3 | """ 4 | from .recordMedline import MedlineRecord, medlineRecordParser 5 | from .medlineHandlers import isMedlineFile, medlineParser 6 | from .tagProcessing.tagNames import tagNameDict, authorBasedTags, tagNameConverterDict 7 | from .tagProcessing.specialFunctions import medlineSpecialTagToFunc 8 | from .tagProcessing.tagFunctions import * 9 | -------------------------------------------------------------------------------- /metaknowledge/WOS/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | """These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge.""" 3 | 4 | from .tagProcessing.tagFunctions import * 5 | from .tagProcessing.funcDicts import tagToFullDict, fullToTagDict, tagNameConverterDict, tagsAndNameSet, knownTagsList 6 | 7 | from .recordWOS import WOSRecord, recordParser 8 | from .wosHandlers import isWOSFile, wosParser 9 | -------------------------------------------------------------------------------- /metaknowledge/proquest/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | """These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge. 3 | """ 4 | from .recordProQuest import ProQuestRecord, proQuestRecordParser 5 | from .proQuestHandlers import isProQuestFile, proQuestParser 6 | from .tagProcessing.specialFunctions import proQuestSpecialTagToFunc 7 | from .tagProcessing.tagFunctions import proQuestTagToFunc 8 | -------------------------------------------------------------------------------- /metaknowledge/scopus/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | """These are the functions used to process scopus csv files at the backend. They are meant for use internal use by metaknowledge. 3 | """ 4 | from .recordScopus import ScopusRecord, scopusRecordParser, scopusHeader 5 | from .scopusHandlers import isScopusFile, scopusParser 6 | 7 | from .tagProcessing.tagFunctions import scopusTagToFunction 8 | from .tagProcessing.specialFunctions import scopusSpecialTagToFunc 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /metaknowledge/proquest/tagProcessing/specialFunctions.py: -------------------------------------------------------------------------------- 1 | proQuestSpecialTagToFunc = { 2 | #'DOI' : lambda R : None, 3 | 'authorsShort' : lambda R : R['Author'], 4 | #'grants' : lambda R : [], 5 | #'address' : '', 6 | 'selfCitation' : lambda R: R.createCitation(), 7 | 'beginningPage' : lambda R : 0, 8 | 'keywords' : lambda R : R['Identifier / keyword'], 9 | 'abstract' : lambda R : R['Abstract'], 10 | #'citations' : '', 11 | 'authorsFull' : lambda R : 'Author', 12 | #'volume' : '', 13 | 'year' : lambda R : R['Degree date'], 14 | #'j9' : '', 15 | #'journal' : '', 16 | 'title' : lambda R : R['Title'], 17 | 'id' : lambda R : R.id, 18 | } 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # metaknowledge 4 | 5 | `metaknowledge` is a Python3 package that simplifies bibliometric research using data from various sources. It reads a directory of plain text files containing meta-data on publications and citations, and writes to a variety of data structures that are suitable for quantitative, network, and text analyses. It handles large datasets (e.g. several million records) efficiently. You can find the [documentation](https://metaknowledge.readthedocs.io/). 6 | 7 | ## Installing 8 | 9 | To install run `python3 setup.py install` 10 | -------------------------------------------------------------------------------- /vagrant/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 4 | 5 | Vagrant.configure(2) do |config| 6 | config.vm.provider "virtualbox" do |vb, override| 7 | config.vm.box = "ubuntu/trusty64" 8 | override.vm.box_url = "https://atlas.hashicorp.com/ubuntu/trusty64" 9 | vb.name = "Networks_Labs" 10 | vb.memory = "2048" 11 | override.vm.network :forwarded_port, host: 1159, guest: 8888, auto_correct: true 12 | end 13 | config.ssh.insert_key = false 14 | config.vm.synced_folder ".", "/vagrant", disabled: true 15 | config.vm.synced_folder '..', "/vagrant" 16 | config.vm.synced_folder '../notebooks', "/notebooks" 17 | config.vm.provision :shell, path: "bootstrap" 18 | 19 | end 20 | -------------------------------------------------------------------------------- /docs/documentation/modules/scopus.rst: -------------------------------------------------------------------------------- 1 | scopus 2 | ====== 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.scopus 7 | 8 | Functions 9 | --------- 10 | 11 | .. automodule:: metaknowledge.scopus.scopusHandlers 12 | :members: 13 | 14 | Special Functions 15 | ----------------- 16 | 17 | .. automodule:: metaknowledge.scopus.tagProcessing.specialFunctions 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | :inherited-members: 22 | 23 | Tag Functions 24 | ------------- 25 | 26 | .. automodule:: metaknowledge.scopus.tagProcessing.tagFunctions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | :inherited-members: 31 | 32 | Backend 33 | ------- 34 | 35 | .. automodule:: metaknowledge.scopus.recordScopus 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | :inherited-members: 40 | -------------------------------------------------------------------------------- /metaknowledge/scopus/tagProcessing/specialFunctions.py: -------------------------------------------------------------------------------- 1 | scopusSpecialTagToFunc = { 2 | 'year' : lambda R : R['Year'], 3 | 'volume' : lambda R : R['Volume'], 4 | 'beginningPage' : lambda R : R['Page start'], 5 | #'DOI' : lambda R : R['DOI'], Causese recursion errors if not commented out 6 | #'address' : lambda R : R[''], 7 | 'j9' : lambda R : R['Abbreviated Source Title'], 8 | 'citations' : lambda R : R['References'], 9 | #'grants' : lambda R : R['References'], 10 | 'selfCitation' : lambda R : R.createCitation(), 11 | 'authorsShort' : lambda R : R['Authors'], 12 | 'authorsFull' : lambda R : R['Authors'], 13 | 'title' : lambda R : R['Title'], 14 | 'journal' : lambda R : R['Source title'], 15 | 'keywords' : lambda R : R['Index Keywords'], 16 | 'abstract' : lambda R : R['Abstract'], 17 | 'id' : lambda R : R['EID'], 18 | } 19 | -------------------------------------------------------------------------------- /docs/documentation/modules/medline.rst: -------------------------------------------------------------------------------- 1 | medline 2 | ======= 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.medline 7 | 8 | Functions 9 | --------- 10 | 11 | .. automodule:: metaknowledge.medline.medlineHandlers 12 | :members: 13 | 14 | Special Functions 15 | ----------------- 16 | 17 | .. automodule:: metaknowledge.medline.tagProcessing.specialFunctions 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | :inherited-members: 22 | 23 | Tag Functions 24 | ------------- 25 | 26 | .. automodule:: metaknowledge.medline.tagProcessing.tagFunctions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | :inherited-members: 31 | 32 | Backend 33 | ------- 34 | 35 | .. automodule:: metaknowledge.medline.recordMedline 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | :inherited-members: 40 | 41 | -------------------------------------------------------------------------------- /docs/documentation/modules/proquest.rst: -------------------------------------------------------------------------------- 1 | proquest 2 | ======== 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.proquest 7 | 8 | Functions 9 | --------- 10 | 11 | .. automodule:: metaknowledge.proquest.proQuestHandlers 12 | :members: 13 | 14 | Special Functions 15 | ----------------- 16 | 17 | .. automodule:: metaknowledge.proquest.tagProcessing.specialFunctions 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | :inherited-members: 22 | 23 | Tag Functions 24 | ------------- 25 | 26 | .. automodule:: metaknowledge.proquest.tagProcessing.tagFunctions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | :inherited-members: 31 | 32 | Backend 33 | ------- 34 | 35 | .. automodule:: metaknowledge.proquest.recordProQuest 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | :inherited-members: 40 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/documentation/example.rst: -------------------------------------------------------------------------------- 1 | ############## 2 | Basic Example 3 | ############## 4 | 5 | *metaknoweldge* is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data. 6 | 7 | To load the data from files and make a network: :: 8 | >>> import metaknowledge as mk 9 | >>> RC = mk.RecordCollection("records/") 10 | >>> print(RC) 11 | Collection of 33 records 12 | >>> G = RC.coCiteNetwork(nodeType = 'journal') 13 | Done making a co-citation network of files-from-records 1.1s 14 | >>> print(len(G.nodes())) 15 | 223 16 | >>> mk.writeGraph(G, "Cocitation-Network-of-Journals") 17 | 18 | There is also a simple command line program called ``metaknowledge`` that comes with the package. It allows for creating networks without any need to know Python. More information about it can be found `here <../CLI.html>`_. 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | :caption: Example: 23 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /metaknowledge/constants.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import os 3 | import sys 4 | 5 | __version__ = '3.4.1' 6 | 7 | commonRecordFields = [ 8 | 'year', 9 | 'volume', 10 | 'beginningPage', 11 | 'DOI', 12 | 'address', 13 | 'j9', 14 | 'citations', 15 | 'grants', 16 | 'selfCitation', 17 | 'authorsShort', 18 | 'authorsFull', 19 | 'title', 20 | 'journal', 21 | 'keywords', 22 | 'abstract', 23 | 'id', 24 | ] 25 | 26 | 27 | def isInteractive(): 28 | """ 29 | A basic check of if the program is running in interactive mode 30 | """ 31 | if sys.stdout.isatty() and os.name != 'nt': 32 | #Hopefully everything but ms supports '\r' 33 | try: 34 | import threading 35 | except ImportError: 36 | return False 37 | else: 38 | return True 39 | else: 40 | return False 41 | 42 | VERBOSE_MODE = isInteractive() 43 | 44 | FAST_CITES = False 45 | -------------------------------------------------------------------------------- /docs/documentation/modules/WOS.rst: -------------------------------------------------------------------------------- 1 | WOS 2 | === 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.WOS 7 | 8 | Functions 9 | --------- 10 | 11 | .. automodule:: metaknowledge.WOS.wosHandlers 12 | :members: 13 | 14 | Help Functions 15 | -------------- 16 | 17 | .. automodule:: metaknowledge.WOS.tagProcessing.helpFuncs 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | :inherited-members: 22 | 23 | Tag Functions 24 | ------------- 25 | 26 | .. automodule:: metaknowledge.WOS.tagProcessing.tagFunctions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | :inherited-members: 31 | 32 | Dict Functions 33 | -------------- 34 | 35 | .. automodule:: metaknowledge.WOS.tagProcessing.funcDicts 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | :inherited-members: 40 | 41 | Backend 42 | ------- 43 | 44 | .. automodule:: metaknowledge.WOS.recordWOS 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: 48 | :inherited-members: -------------------------------------------------------------------------------- /docs/documentation/modules/grants.rst: -------------------------------------------------------------------------------- 1 | grants 2 | ====== 3 | 4 | Overview 5 | -------- 6 | .. automodule:: metaknowledge.grants 7 | 8 | baseGrant 9 | --------- 10 | 11 | .. automodule:: metaknowledge.grants.baseGrant 12 | :members: 13 | :undoc-members: 14 | :noindex: 15 | 16 | cihrGrant 17 | --------- 18 | 19 | .. automodule:: metaknowledge.grants.cihrGrant 20 | :members: 21 | :undoc-members: 22 | :noindex: 23 | 24 | medlineGrant 25 | ------------ 26 | 27 | .. automodule:: metaknowledge.grants.medlineGrant 28 | :members: 29 | :undoc-members: 30 | :noindex: 31 | 32 | nsercGrant 33 | ---------- 34 | 35 | .. automodule:: metaknowledge.grants.nsercGrant 36 | :members: 37 | :undoc-members: 38 | :noindex: 39 | 40 | nsfGrant 41 | -------- 42 | 43 | .. automodule:: metaknowledge.grants.nsfGrant 44 | :members: 45 | :undoc-members: 46 | :noindex: 47 | 48 | scopusGrant 49 | ----------- 50 | 51 | .. automodule:: metaknowledge.grants.scopusGrant 52 | :members: 53 | :undoc-members: 54 | :noindex: 55 | 56 | -------------------------------------------------------------------------------- /docs/examples/index.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | **Note:** for a more recent example of using *metaknowledge*, please visit `the NetLab blog `_. 5 | 6 | *metaknowledge* is a python library for creating and analyzing scientific metadata. It uses records obtained from Web of Science (WOS), Scopus and other sources. It is intended to be usable by those who do not know much python. This page will be a short overview of its capabilities, to allow you to use it for your own work. 7 | 8 | This document was made from a `jupyter `_ notebook, if you know how to use them, you can download the notebook |notebook|_ and the sample file is `here `_ if you wish to have an interactive version of this page. Now let's begin. 9 | 10 | .. _notebook: networkslab.org/metaknowledge/examples/metaknowledgeExamples.ipynb 11 | .. |notebook| replace:: here 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | 16 | Getting-Started.ipynb 17 | Reading-Files.ipynb 18 | Objects.ipynb 19 | Making-Networks.ipynb -------------------------------------------------------------------------------- /metaknowledge/tests/test_constants.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import unittest.mock 4 | import builtins 5 | import importlib 6 | import sys 7 | import metaknowledge.constants 8 | 9 | class TestConstants(unittest.TestCase): 10 | def test_VerboseMode(self): 11 | self.assertFalse(metaknowledge.constants.isInteractive()) 12 | sys.stdout.isatty = lambda : True 13 | self.assertTrue(metaknowledge.constants.isInteractive()) 14 | class ImportMock(unittest.mock.Mock): 15 | def __call__(self, *args, **kwargs): 16 | if args[0] == 'threading': 17 | raise ImportError 18 | else: 19 | return importlib.__import__(*args, **kwargs) 20 | with unittest.mock.patch('builtins.__import__', new_callable = ImportMock):#, NoThreadingImport): 21 | #builtins.__import__ = 22 | self.assertFalse(metaknowledge.constants.isInteractive()) #This will fail for setup.py test 23 | #Failure for setup.py is what is supposed to happen as that would be an interactive enviroment 24 | -------------------------------------------------------------------------------- /metaknowledge/grants/scopusGrant.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from .baseGrant import Grant 4 | from ..mkExceptions import BadGrant 5 | 6 | class ScopusGrant(Grant): 7 | def __init__(self, grantString): 8 | 9 | grantDict = collections.OrderedDict() 10 | 11 | bad = False 12 | error = None 13 | 14 | split = grantString.split(', ') 15 | try: 16 | grantDict['agency'] = split.pop() 17 | except IndexError: 18 | bad = True 19 | grantDict['agency'] = grantString 20 | error = BadGrant("The grant string '{}' does not contain enough comma-spaces (', ') to be a scopus grant.".format(grantString)) 21 | else: 22 | try: 23 | grantDict['agencyCode'] = split.pop() 24 | except IndexError: 25 | pass 26 | else: 27 | try: 28 | grantDict['code'] = split.pop() 29 | except IndexError: 30 | pass 31 | idValue = "{}-{}-{}".format(grantDict.get('code', ''), grantDict.get('agencyCode', ''), grantDict.get('agency', '')) 32 | Grant.__init__(self, grantString, grantDict, idValue, bad, error) 33 | -------------------------------------------------------------------------------- /notebooks/Lesson-2-Reading-Files/Reading-Files.md: -------------------------------------------------------------------------------- 1 | # Reading Files 2 | 3 | 4 | First we need to import _metaknowledge_ like we saw in lesson 1. 5 | 6 | []import metaknowledge as mk 7 | 8 | we only need _metaknowledge_ for now so no need to import everything 9 | 10 | 11 | The files from the Web of Science (WOS) can be loaded into a [`RecordCollections`](http://networkslab.org/metaknowledge/docs/RecordCollection#RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string. 12 | 13 | []RC = mk.RecordCollection("savedrecs.txt") 14 | []repr(RC) 15 | 16 | You can also read a whole directory, in this case it is reading the current working directory 17 | 18 | []RC = mk.RecordCollection(".") 19 | []repr(RC) 20 | 21 | _metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument. 22 | 23 | []RC = mk.RecordCollection(".", extension = "txt") 24 | []repr(RC) 25 | 26 | Now you have a `RecordCollection` composed of all the WOS records in the selected file(s). 27 | 28 | []print("RC is a " + str(RC)) 29 | 30 | You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`. 31 | -------------------------------------------------------------------------------- /metaknowledge/journalAbbreviations/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | """This module handles the abbreviations, known as J29 abbreviations and given by the J9 tag in WOS Records and for journal titles that WOS employs in citations. 3 | 4 | The citations provided by WOS used abbreviated journal titles instead of the full names. The full list of abbreviations can be found at a series pages divided by letter starting at [images.webofknowledge.com/WOK46/help/WOS/A_abrvjt.html](http://images.webofknowledge.com/WOK46/help/WOS/A_abrvjt.html). The function [updatej9DB()](#metaknowledge.journalAbbreviations.backend.getj9dict) is used to scape and parse the pages, it must be run without error before the other features can be used. _metaknowledge_. If the database is requested by `getj9dict()`, which is what [Citations](../classes/Citation.html#metaknowledge.citation.Citation) use, and the database is not found or is corrupted then [updatej9DB()](#metaknowledge.journalAbbreviations.backend.updatej9DB) will be run to download the database if this fails an `mkException` will be raised, the download and parsing usually takes less than a second on a good internet connection. 5 | 6 | The other functions of the module are for manually adding and removing abbreviations from the database. It is recommended that this be done with the command-line tool `metaknowledge` instead of with a script. 7 | """ 8 | 9 | from .backend import getj9dict, abrevDBname, manualDBname, addToDB 10 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_proquest.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import metaknowledge 4 | 5 | import os 6 | 7 | class TestProQuest(unittest.TestCase): 8 | 9 | def setUp(self): 10 | metaknowledge.VERBOSE_MODE = False 11 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/ProQuest_TestFile.testtxt") 12 | self.R = self.RC.peek() 13 | 14 | def test_isCollection(self): 15 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection) 16 | 17 | def test_isProQuest(self): 18 | self.assertIsInstance(self.R, metaknowledge.ProQuestRecord) 19 | 20 | def test_specials(self): 21 | for R in self.RC: 22 | for s in metaknowledge.proquest.proQuestSpecialTagToFunc.keys(): 23 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation)) 24 | 25 | def test_allFields(self): 26 | for R in self.RC: 27 | for k,v in R.items(): 28 | self.assertIsInstance(k, str) 29 | self.assertIsInstance(v, (str, list, int)) 30 | 31 | def test_graphs(self): 32 | self.assertEqual(metaknowledge.graphStats(self.RC.networkMultiMode(self.RC.tags()), sentenceString = True), "The graph has 1928 nodes, 50833 edges, 0 isolates, 114 self loops, a density of 0.0273952 and a transitivity of 0.0815136") 33 | 34 | def test_write(self): 35 | #No writer currently implemented 36 | pass 37 | -------------------------------------------------------------------------------- /notebooks/Lesson-1-Getting-Started/Getting-Started.md: -------------------------------------------------------------------------------- 1 | # About Jupyter Notebooks 2 | 3 | 4 | This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page. 5 | 6 | []#This cell is python 7 | []#The cell below it is output 8 | []print("This is an output cell") 9 | 10 | The code cells contain python code that you can edit and run your self. Try changing the one above. 11 | 12 | 13 | # Importing 14 | 15 | 16 | First you need to import the _metaknowledge_ package 17 | 18 | []import metaknowledge as mk 19 | 20 | 21 | And you will often need the [_networkx_](https://networkx.github.io/documentation/networkx-1.9.1/) package 22 | 23 | []import networkx as nx 24 | 25 | And [_matplotlib_](http://matplotlib.org/) to display the graphs and to make them look nice when displayed 26 | 27 | []import matplotlib.pyplot as plt 28 | []%matplotlib inline 29 | 30 | _metaknowledge_ also has a _matplotlib_ based graph [visualizer](http://networkslab.org/metaknowledge/docs/visual#visual) that will be used sometimes 31 | 32 | []import metaknowledge.visual as mkv 33 | 34 | These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_. 35 | -------------------------------------------------------------------------------- /metaknowledge/WOS/tagProcessing/helpFuncs.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | monthDict = {'SPR': 3, 'SUM': 6, 'FAL': 9, 'WIN': 12, 'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6 , 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT' : 10, 'NOV' : 11, 'DEC' : 12} 3 | 4 | def getMonth(s): 5 | """ 6 | Known formats: 7 | Month ("%b") 8 | Month Day ("%b %d") 9 | Month-Month ("%b-%b") --- this gets coerced to the first %b, dropping the month range 10 | Season ("%s") --- this gets coerced to use the first month of the given season 11 | Month Day Year ("%b %d %Y") 12 | Month Year ("%b %Y") 13 | Year Month Day ("%Y %m %d") 14 | """ 15 | monthOrSeason = s.split('-')[0].upper() 16 | if monthOrSeason in monthDict: 17 | return monthDict[monthOrSeason] 18 | else: 19 | monthOrSeason = s.split('-')[-1].upper() 20 | if monthOrSeason.isdigit(): 21 | return monthOrSeason 22 | else: 23 | return monthDict[monthOrSeason] 24 | 25 | raise ValueError("Month format not recognized: " + s) 26 | 27 | def makeBiDirectional(d): 28 | """ 29 | Helper for generating tagNameConverter 30 | Makes dict that maps from key to value and back 31 | """ 32 | dTmp = d.copy() 33 | for k in d: 34 | dTmp[d[k]] = k 35 | return dTmp 36 | 37 | def reverseDict(d): 38 | """ 39 | Helper for generating fullToTag 40 | Makes dict of value to key 41 | """ 42 | retD = {} 43 | for k in d: 44 | retD[d[k]] = k 45 | return retD 46 | -------------------------------------------------------------------------------- /docs/mkdsupport.py: -------------------------------------------------------------------------------- 1 | """An Sphinx extension supporting for sphinx.ext.autodoc with modules containing docstrings in Markdown 2 | """ 3 | 4 | import pypandoc 5 | import platform 6 | 7 | # Since pypandoc.convert_text will always return strings ended with \r\n, the separator should also set to it 8 | 9 | if platform.system() == 'Windows': 10 | SEP = u'\r\n' 11 | else: 12 | SEP = u'\n' 13 | #SEP = u'\r\n' 14 | 15 | 16 | def setup(app): 17 | """Add extension's default value and set new function to ```autodoc-process-docstring``` event""" 18 | 19 | # The 'rebuild' parameter should set as 'html' rather than 'env' since this extension needs a full rebuild of HTML 20 | # document 21 | app.add_config_value('mkdsupport_use_parser', 'markdown_github', 'html') 22 | app.connect('autodoc-process-docstring', pandoc_process) 23 | 24 | 25 | 26 | def pandoc_process(app, what, name, obj, options, lines): 27 | """"Convert docstrings in Markdown into reStructureText using pandoc 28 | """ 29 | 30 | if not lines: 31 | return None 32 | 33 | input_format = app.config.mkdsupport_use_parser 34 | output_format = 'rst' 35 | 36 | # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a 37 | # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding 38 | text = SEP.join(lines) 39 | text = pypandoc.convert_text(text, output_format, format=input_format) 40 | 41 | # The 'lines' in Sphinx is a list of strings and the value should be changed 42 | del lines[:] 43 | lines.extend(text.split(SEP)) 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #isi Files and logins 2 | *.txt 3 | !requirements.txt 4 | *.sh 5 | *.graphml 6 | *.csv 7 | *_cites/ #<----------------------------------------------This might cause issues 8 | *.log 9 | cover/ 10 | profiling/ 11 | special.py 12 | .ipynb_checkpoints/ 13 | .vagrant/ 14 | .ipython/ 15 | security/ 16 | !vagrant/*.sh 17 | vagrant/logs/ 18 | profile_wosserver/ 19 | !profile_wosserver/ipython_* 20 | !Docs/*.sh 21 | metaknowledgeDocs.md 22 | j9Raws/ 23 | /manualj9Abbreviations* 24 | metaknowledge/journalAbbreviations/j9Abbreviations.bak 25 | metaknowledge/journalAbbreviations/j9Abbreviations.dir 26 | metaknowledge/journalAbbreviations/j9Abbreviations.dat 27 | !savedrecs.txt 28 | *.bib 29 | TODO.md 30 | testing_nb/ 31 | 32 | *.DS_Store 33 | 34 | # Byte-compiled / optimized / DLL files 35 | __pycache__/ 36 | *.py[cod] 37 | 38 | # C extensions 39 | *.so 40 | 41 | # Distribution / packaging 42 | .Python 43 | env/ 44 | build/ 45 | develop-eggs/ 46 | dist/ 47 | downloads/ 48 | eggs/ 49 | .eggs/ 50 | lib/ 51 | lib64/ 52 | parts/ 53 | sdist/ 54 | var/ 55 | *.egg-info/ 56 | .installed.cfg 57 | *.egg 58 | 59 | # PyInstaller 60 | # Usually these files are written by a python script from a template 61 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 62 | *.manifest 63 | *.spec 64 | 65 | # Installer logs 66 | pip-log.txt 67 | pip-delete-this-directory.txt 68 | 69 | # Unit test / coverage reports 70 | htmlcov/ 71 | .tox/ 72 | .coverage 73 | .coverage.* 74 | .cache 75 | nosetests.xml 76 | coverage.xml 77 | *,cover 78 | 79 | # Translations 80 | *.mo 81 | *.pot 82 | 83 | # Django stuff: 84 | *.log 85 | 86 | # Sphinx documentation 87 | docs/_build/ 88 | docs/_static/ 89 | docs/_templates 90 | 91 | # PyBuilder 92 | target/ 93 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800031.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Down Deep Bore Hole Carbon Dating Project at Byrd Station, Antarctica 6 | 08/15/1968 7 | 08/31/1969 8 | 12600 9 | 10 | Standard Grant 11 | 12 | 13 | 07000000 14 | 15 | Directorate For Engineering 16 | 17 | 18 | Directorate For Engineering 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 06/10/1968 26 | 06/10/1968 27 | 28 | 6800031 29 | 30 | Hans 31 | Oeschger 32 | 33 | 08/15/1968 34 | 35 | Principal Investigator 36 | 37 | 38 | University of Bern 39 | Bern 40 | 3007 41 | 0316314847 42 | Hochschulstrasse 4 43 | Switzerland 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800030.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Participation in Usarp Expeditions 6 | 07/01/1968 7 | 12/31/1977 8 | 49203 9 | 10 | Interagency Agreement 11 | 12 | 13 | 07000000 14 | 15 | Directorate For Engineering 16 | 17 | 18 | Directorate For Engineering 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 05/23/1968 26 | 01/25/1974 27 | 28 | 6800030 29 | 30 | H. 31 | Fehlmann 32 | 33 | 07/01/1968 34 | 35 | Principal Investigator 36 | 37 | 38 | Smithsonian Institution 39 | Arlington 40 | 222023709 41 | 2026337110 42 | Office of Sponsored Projects 43 | United States 44 | Virginia 45 | VA 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800097.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Genera of Chrysomelid Beetles of the World 6 | 06/01/1969 7 | 05/31/1971 8 | 30900 9 | 10 | Standard Grant 11 | 12 | 13 | 08000000 14 | 15 | Direct For Biological Sciences 16 | 17 | 18 | Direct For Biological Sciences 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 05/26/1969 26 | 05/26/1969 27 | 28 | 6800097 29 | 30 | J.Linsley 31 | Gressitt 32 | 33 | 06/01/1969 34 | 35 | Principal Investigator 36 | 37 | 38 | Bernice P Bishop Museum 39 | Honolulu 40 | 968172704 41 | 8088478204 42 | 1525 Bernice Street 43 | United States 44 | Hawaii 45 | HI 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800025.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chemical and Mineralogical Investigations of Puerto Rican Spilites 6 | 10/15/1968 7 | 06/30/1972 8 | 20300 9 | 10 | Standard Grant 11 | 12 | 13 | 06030003 14 | 15 | Directorate For Geosciences 16 | 17 | 18 | Division Of Earth Sciences 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 10/17/1968 26 | 10/17/1968 27 | 28 | 6800025 29 | 30 | 31 | DATA NOT AVAILABLE 32 | 33 | 10/15/1968 34 | 35 | Principal Investigator 36 | 37 | 38 | University of Pittsburgh 39 | Pittsburgh 40 | 152132303 41 | 4126247400 42 | University Club 43 | United States 44 | Pennsylvania 45 | PA 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/grants/medlineGrant.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from .baseGrant import Grant 4 | from ..mkExceptions import BadGrant 5 | 6 | class MedlineGrant(Grant): 7 | def __init__(self, grantString): 8 | 9 | grantDict = collections.OrderedDict() 10 | 11 | bad = False 12 | error = None 13 | 14 | split = grantString.split('/') 15 | try: 16 | grantDict['country'] = split.pop() 17 | grantDict['agency'] = split.pop() 18 | except IndexError: 19 | bad = True 20 | grantDict['country'] = grantString 21 | error = BadGrant("The grant string '{}' does not contain enough slashes (/) to be a medline grant.".format(grantString)) 22 | else: 23 | if len(split) == 1: 24 | code = split.pop() 25 | if len(code) == 2: 26 | grantDict['code'] = code 27 | else: 28 | grantDict['number'] = code 29 | elif len(split) == 2: 30 | code = split.pop() 31 | if len(code) == 2: 32 | grantDict['code'] = code 33 | grantDict['number'] = split.pop() 34 | else: 35 | grantDict['number'] = "{}/{}".format(split.pop(), code) 36 | else: 37 | grantDict['number'] = '/'.join(split) 38 | if 'number' in grantDict: 39 | idValue = "{}/{}-{}".format(grantDict.get('number', ''), grantDict.get('code', ''), grantDict.get('country', '')) 40 | else: 41 | idValue = "{}-{}-{}".format(grantDict.get('code', ''), grantDict.get('agency', ''), grantDict.get('country', '')) 42 | Grant.__init__(self, grantString, grantDict, idValue, bad, error) 43 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/noID.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Task Order For Partial Support of the Committee on Geography 6 | 12/08/1967 7 | 06/30/1971 8 | 30000 9 | 10 | BOA/Task Order 11 | 12 | 13 | 06030003 14 | 15 | Directorate For Geosciences 16 | 17 | 18 | Division Of Earth Sciences 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 12/08/1967 26 | 05/14/1969 27 | 28 | 29 | Jeremy 30 | Taylor 31 | jeremygtaylor@compuserve.com 32 | 12/08/1967 33 | 34 | Principal Investigator 35 | 36 | 37 | National Research Council 38 | WASHINGTON 39 | 204180006 40 | 41 | 2101 CONSTITUTION AVE NW 42 | United States 43 | District of Columbia 44 | DC 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800104.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Task Order For Partial Support of the Committee on Geography 6 | 12/08/1967 7 | 06/30/1971 8 | 30000 9 | 10 | BOA/Task Order 11 | 12 | 13 | 06030003 14 | 15 | Directorate For Geosciences 16 | 17 | 18 | Division Of Earth Sciences 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 12/08/1967 26 | 05/14/1969 27 | 28 | 6800104 29 | 30 | Jeremy 31 | Taylor 32 | jeremygtaylor@compuserve.com 33 | 12/08/1967 34 | 35 | Principal Investigator 36 | 37 | 38 | National Research Council 39 | WASHINGTON 40 | 204180006 41 | 42 | 2101 CONSTITUTION AVE NW 43 | United States 44 | District of Columbia 45 | DC 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/proquest/tagProcessing/tagFunctions.py: -------------------------------------------------------------------------------- 1 | singleLineEntries = { 2 | #The inverse cannot be done as new tags may be added that occupy mutiple line 3 | 'Author', 4 | 'Title', 5 | 'Advisor', 6 | 'Name', 7 | 'Committee member', 8 | 'Copyright', 9 | 'Country of publication', 10 | 'Database', 11 | 'Degree', 12 | 'Degree date', 13 | 'Department', 14 | 'Dissertation/thesis number', 15 | 'Document type', 16 | 'Language', 17 | 'Number of pages', 18 | 'Place of publication', 19 | 'ProQuest document ID', 20 | 'Publication year', 21 | 'School code', 22 | 'Source', 23 | 'Source type', 24 | 'University location', 25 | 'University/institution', 26 | 'ISBN', 27 | 'Publication subject', 28 | } 29 | 30 | def proQuestSubject(value): 31 | return value[0].split('; ') 32 | 33 | def proQuestIdentifier_Keyword(value): 34 | return value[0].split(', ') 35 | 36 | def proQuestClassification(value): 37 | return [tuple(s.split(': ')) for s in value[0].split('; ')] 38 | 39 | customTags = { 40 | 'Classification' : proQuestClassification, 41 | 'Identifier / keyword' : proQuestIdentifier_Keyword, 42 | 'Subject' : proQuestSubject 43 | } 44 | 45 | def proQuestTagToFunc(tag): 46 | """Takes a tag string, _tag_, and returns the processing function for its data. If their is not a predefined function returns the identity function (`lambda x : x`). 47 | 48 | # Parameters 49 | 50 | _tag_ : `str` 51 | 52 | > The requested tag 53 | 54 | # Returns 55 | 56 | `function` 57 | 58 | > A function to process the tag's data 59 | """ 60 | if tag in singleLineEntries: 61 | return lambda x : x[0] 62 | elif tag in customTags: 63 | return customTags[tag] 64 | else: 65 | return lambda x : x 66 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800077.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Cytogenetics of Coccid Chromosome Systems and Related Nutritional and Biochemical Studies 6 | 10/01/1968 7 | 11/30/1974 8 | 99600 9 | 10 | Standard Grant 11 | 12 | 13 | 08070100 14 | 15 | Direct For Biological Sciences 16 | 17 | 18 | Div Of Molecular and Cellular Bioscience 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 10/04/1968 26 | 02/03/1975 27 | 28 | 6800077 29 | 30 | Spencer 31 | Brown 32 | 33 | 10/01/1968 34 | 35 | Principal Investigator 36 | 37 | 38 | University of California-Berkeley 39 | BERKELEY 40 | 947045940 41 | 5106428109 42 | Sponsored Projects Office 43 | United States 44 | California 45 | CA 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_wos.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import metaknowledge 4 | 5 | class TestWOS(unittest.TestCase): 6 | def setUp(self): 7 | self.R = metaknowledge.WOSRecord(simplePaperString) 8 | self.Rbad = metaknowledge.WOSRecord(simplePaperString[:-3]) 9 | 10 | def test_creation(self): 11 | R = metaknowledge.WOSRecord(self.R._fieldDict) 12 | self.assertEqual(R, self.R) 13 | with open("metaknowledge/tests/testFile.isi") as f: 14 | f.readline() 15 | f.readline() 16 | R = metaknowledge.WOSRecord(f) 17 | self.assertEqual(R.id, 'WOS:A1979GV55600001') 18 | with self.assertRaises(TypeError): 19 | R = metaknowledge.WOSRecord(123456789) 20 | 21 | def test_badwrite(self): 22 | with self.assertRaises(metaknowledge.BadWOSRecord): 23 | self.Rbad.writeRecord('not a file object.txt') 24 | 25 | def test_dupDetection(self): 26 | s = simplePaperString[:-3] + "DE Example; testing\nPD APR\nER\n" 27 | R = metaknowledge.WOSRecord(s) 28 | self.assertTrue(R.bad) 29 | 30 | def test_WOSNum(self): 31 | self.assertEqual(self.R.UT, 'WOS:123317623000007') 32 | self.assertEqual(self.R.wosString, 'WOS:123317623000007') 33 | 34 | 35 | simplePaperString = """PT J 36 | AU John, D 37 | AF John, Doe 38 | TI Example Paper 39 | SO TOPICS IN COGNITIVE SCIENCE 40 | LA English 41 | DT Article 42 | DE Example; testing 43 | ID REAL; TIME 44 | AB This is a test. 45 | C1 UW, Ontario, Canada. 46 | RP John, D (reprint author), UW, Ontario, Canada. 47 | CR John D. 1999, TOPICS IN COGNITIVE SCIENCE 48 | J9 EXAMPLE 49 | JI examaple 50 | PD APR 51 | PY 2015 52 | VL 1 53 | BP 1 54 | EP 2 55 | DI 10.1111 56 | UT WOS:123317623000007 57 | ER 58 | """ 59 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3551.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Mathematics for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 57778 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | Stanford University 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 2 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3551 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3546.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Mathematics for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 33462 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | San Jose State University Foundation 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 1 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3546 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3548.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Engineering Concepts (ECCP) for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 36489 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | Harvey Mudd College 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 1 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3548 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3550.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Field Biology and Ecology for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 56375 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | Sonoma State University 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 2 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3550 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3547.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Mathematics for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 66126 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | California State University-Fresno Foundation 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 2 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3547 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/69W3549.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Biological Science for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 61335 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | Humboldt State University Foundation 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 2 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3549 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/badXMLfile.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | Summer Institute in Mathematics for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 66126 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | California State University-Fresno Foundation 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 2 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3547 56 | 57 | 58 | -------------------------------------------------------------------------------- /metaknowledge/fileHandlers.py: -------------------------------------------------------------------------------- 1 | try: 2 | import collections.abc 3 | except ImportError: 4 | import collections 5 | collections.abc = collections 6 | 7 | from .mkExceptions import UnknownFile 8 | 9 | from .grants.cihrGrant import parserCIHRfile, isCIHRfile 10 | from .grants.nsercGrant import parserNSERCfile, isNSERCfile 11 | from .grants.nsfGrant import parserNSFfile, isNSFfile 12 | from .grants.baseGrant import parserFallbackGrantFile, isFallbackGrantFile 13 | 14 | from .WOS.wosHandlers import isWOSFile, wosParser 15 | from .medline.medlineHandlers import isMedlineFile, medlineParser 16 | from .proquest.proQuestHandlers import isProQuestFile, proQuestParser 17 | from .scopus.scopusHandlers import isScopusFile, scopusParser 18 | 19 | ProccessorTuple = collections.namedtuple("ProccessorTuple", ("type", "processor", "detector")) 20 | 21 | def unrecognizedFileHandler(fileName): 22 | raise UnknownFile("'{}' is not recognized my metaknowledge.".format(fileName)) 23 | 24 | grantProcessors = [ 25 | ProccessorTuple("NSFGrant", parserNSFfile, isNSFfile), 26 | ProccessorTuple("CIHRGrant", parserCIHRfile, isCIHRfile), 27 | ProccessorTuple("NSERCGrant", parserNSERCfile, isNSERCfile), 28 | ProccessorTuple("FallbackGrant", parserFallbackGrantFile, isFallbackGrantFile), 29 | #Raises exception if reached, to indicate the end of the list 30 | #This simplifes things at the other end 31 | ProccessorTuple("Invalid File", None, unrecognizedFileHandler), 32 | ] 33 | 34 | recordHandlers = [ 35 | ProccessorTuple("WOSRecord", wosParser, isWOSFile), 36 | ProccessorTuple("MedlineRecord", medlineParser, isMedlineFile), 37 | ProccessorTuple("ProQuestRecord", proQuestParser, isProQuestFile), 38 | ProccessorTuple("ScopusRecord", scopusParser, isScopusFile), 39 | #Raises exception if reached, to indicate the end of the list 40 | #This simplifes things at the other end 41 | ProccessorTuple("Invalid File", None, unrecognizedFileHandler), 42 | ] 43 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_grants.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import metaknowledge 4 | 5 | class TestGrants(unittest.TestCase): 6 | def setUp(self): 7 | self.Grant1 = metaknowledge.MedlineGrant("U10 HD04267/HG/NICHD NHI HHS/Unit State") 8 | self.Grant2 = metaknowledge.MedlineGrant("HG/NICHD NHI HHS/Unit State") 9 | 10 | def test_isGrant(self): 11 | self.assertIsInstance(self.Grant1, metaknowledge.Grant) 12 | self.assertIsInstance(self.Grant2, metaknowledge.Grant) 13 | 14 | def test_init(self): 15 | Gshort = metaknowledge.MedlineGrant("U10 HD04267/NICHD NHI HHS/Unit State") 16 | Gmid = metaknowledge.MedlineGrant("U10 /HD04267HG/NICHD NHI HHS/Unit State") 17 | Glong = metaknowledge.MedlineGrant("U/10 /HD042/67HG/NICHD NHI HHS/Unit State") 18 | self.assertNotEqual(Gshort, Glong) 19 | self.assertNotEqual(Gmid, Glong) 20 | 21 | def test_bad(self): 22 | G = metaknowledge.MedlineGrant("NICHD NHI HHSUnit State") 23 | self.assertTrue(G.bad) 24 | 25 | def test_eq(self): 26 | self.assertNotEqual(1, self.Grant2) 27 | self.assertNotEqual(self.Grant1, self.Grant2) 28 | 29 | def test_hash(self): 30 | self.assertIsInstance(hash(self.Grant1), int) 31 | 32 | def test_orgin(self): 33 | self.assertEqual("U10 HD04267/HG/NICHD NHI HHS/Unit State", self.Grant1.original) 34 | 35 | def test_rerp(self): 36 | self.assertEqual(repr(self.Grant1), "") 37 | 38 | def test_NSF(self): 39 | GC = metaknowledge.GrantCollection("metaknowledge/tests/nsfTestFiles") 40 | G = GC.networkMultiMode(GC.tags()) 41 | self.assertEqual(metaknowledge.graphStats(G, sentenceString = True), "The graph has 244 nodes, 2077 edges, 0 isolates, 19 self loops, a density of 0.0703974 and a transitivity of 0.497237") 42 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/6800039.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Tides and Tidal Friction in the Bight of Abaco, Bahamas 6 | 11/01/1968 7 | 10/31/1970 8 | 62700 9 | 10 | Standard Grant 11 | 12 | 13 | 06040303 14 | 15 | Directorate For Geosciences 16 | 17 | 18 | Division Of Ocean Sciences 19 | 20 | 21 | 22 | name not available 23 | 24 | 25 | 11/08/1968 26 | 11/08/1968 27 | 28 | 6800039 29 | 30 | 31 | DATA NOT AVAILABLE 32 | 33 | 11/01/1968 34 | 35 | Principal Investigator 36 | 37 | 38 | 39 | DATA NOT AVAILABLE 40 | 41 | 11/01/1968 42 | 43 | Co-Principal Investigator 44 | 45 | 46 | Nova Southeastern University 47 | FORT LAUDERDALE 48 | 333147796 49 | 9542625366 50 | 3301 COLLEGE AVE 51 | United States 52 | Florida 53 | FL 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /docs/CLI.rst: -------------------------------------------------------------------------------- 1 | ###################### 2 | Command Line Tool 3 | ###################### 4 | 5 | metaknowledge comes with a command-line application named :code:`metaknowledge`. This provides a simple interface to the python package and allows the generation of most of the networks along with ways to manage the records themselves. 6 | 7 | Overview 8 | ^^^^^^^^ 9 | To start the tool run: :: 10 | 11 | $ metaknowledge 12 | 13 | You will be asked for the location of the file or files to use. These can be given by paths to the files or paths to directories with the files. Note: if a directory is used all files with the proper header will be read. 14 | 15 | You will then be asked what to do with the records: :: 16 | 17 | A collection of 537 WOS records has been created 18 | What do you wish to do with it: 19 | 1) Make a graph 20 | 2) Write the collection as a single WOS style file 21 | 3) Write the collection as a single WOS style file and make a graph 22 | 4) Write the collection as a single csv file 23 | 5) Write the collection as a single csv file and make a graph 24 | 6) Write all the citations to a single file 25 | 7) Go over non-journal citations 26 | i) open python console 27 | q) quit 28 | What is your selection: 29 | 30 | Select the option you want by typing the corresponding number or character and pressing enter. The menus after this step are controlled this way as well. 31 | 32 | The second last option :code:`i)` will start an interactive python session will all the objects you have created thus far accessible, their names will be given when it starts. 33 | 34 | The last option :code:`q)` will cause the program to exit. You can also quit at any time by pressing :code:`ctr-c`. 35 | 36 | Questions? 37 | ^^^^^^^^^^ 38 | If you find bugs, or have questions, please write to: 39 | 40 | | Reid McIlroy-Young `reid@reidmcy.com `_ 41 | | John McLevey `john.mclevey@uwaterloo.ca `_ 42 | 43 | License 44 | ^^^^^^^ 45 | *metaknowledge* is free and open source software, distributed under the GPL License. 46 | 47 | .. toctree:: 48 | :maxdepth: 2 49 | :caption: CLI: 50 | -------------------------------------------------------------------------------- /docs/documentation/overview.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | Overview 3 | ######## 4 | 5 | This package can read the files downloaded from the Thomson Reuters’ `Web of Science `_ (*WOS*), Elsevier’s `Scopus `_, `ProQuest `_ and Medline files from `PubMed `_. These files contain entries on the metadata of scientific records, such as authors, title, and citations. *metaknowledge* can also read grants from various organizations including *NSF* and *NSERC* which are handled similarly to records. 6 | 7 | The `metaknowledge.RecordCollection <./classes/RecordCollection.html#recordcollection-collectionwithids>`_ class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the `metaknowledge.Record <./classes/Record.html#record-mapping-hashable>`_ class that contains the results of the parsing of the record. 8 | 9 | The files read by *metaknowledge* are a databases containing a series of tags (implicitly or explicitly), e.g. ``'TI'`` is the title for WOS. Each tag has one or more values and metaknowledge can read them and extract useful information. As the tags differ between providers a small set of values can be accessed by special tags, the tags are listed in ``commonRecordFields``. These special tags can act on the whole ``Record`` and as such may contain information provided by any number of other tags. 10 | 11 | Citations are handled by a special `Citation <./classes/Citation.html#module-metaknowledge.citation>`_ class. This class can parse the citations given by *WOS* and journals cited by *Scopus* and allows for better comparisons when they are used in graphs. 12 | 13 | Note for those reading the docstrings metaknowledge’s docs are written in markdown and are processed to produce the documentation found at `metaknowledge.readthedocs.io `_, but you should have no problem reading them from the help function. 14 | 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :caption: Overview: 19 | -------------------------------------------------------------------------------- /metaknowledge/tests/OnePaperNoCites.isi: -------------------------------------------------------------------------------- 1 | FN Thomson Reuters Web of Science™ 2 | VR 1.0 3 | PT J 4 | AU Marghetis, T 5 | Nunez, R 6 | AF Marghetis, Tyler 7 | Nunez, Rafael 8 | TI The Motion Behind the Symbols: A Vital Role for Dynamism in the 9 | Conceptualization of Limits and Continuity in Expert Mathematics 10 | SO TOPICS IN COGNITIVE SCIENCE 11 | LA English 12 | DT Article 13 | DE Mathematical practice; Metaphor; Fictive motion; Gesture; Cauchy; 14 | Calculus; Conceptualization 15 | ID REPRESENTATIONS; GESTURE; REAL; TIME 16 | AB The canonical history of mathematics suggests that the late 19th-century arithmetization of calculus marked a shift away from spatial-dynamic intuitions, grounding concepts in static, rigorous definitions. Instead, we argue that mathematicians, both historically and currently, rely on dynamic conceptualizations of mathematical concepts like continuity, limits, and functions. In this article, we present two studies of the role of dynamic conceptual systems in expert proof. The first is an analysis of co-speech gesture produced by mathematics graduate students while proving a theorem, which reveals a reliance on dynamic conceptual resources. The second is a cognitive-historical case study of an incident in 19th-century mathematics that suggests a functional role for such dynamism in the reasoning of the renowned mathematician Augustin Cauchy. Taken together, these two studies indicate that essential concepts in calculus that have been defined entirely in abstract, static terms are nevertheless conceptualized dynamically, in both contemporary and historical practice. 17 | C1 [Marghetis, Tyler; Nunez, Rafael] Univ Calif San Diego, Dept Cognit Sci, La Jolla, CA 92093 USA. 18 | RP Marghetis, T (reprint author), Univ Calif San Diego, Dept Cognit Sci, La Jolla, CA 92093 USA. 19 | EM tmarghet@cogsci.ucsd.edu 20 | NR 45 21 | TC 4 22 | Z9 4 23 | PU WILEY-BLACKWELL 24 | PI HOBOKEN 25 | PA 111 RIVER ST, HOBOKEN 07030-5774, NJ USA 26 | SN 1756-8757 27 | J9 TOP COGN SCI 28 | JI Top. Cogn. Sci. 29 | PD APR 30 | PY 2013 31 | VL 5 32 | IS 2 33 | BP 299 34 | EP 316 35 | DI 10.1111/tops.12013 36 | PG 18 37 | WC Psychology, Experimental 38 | SC Psychology 39 | GA 126NA 40 | UT WOS:000317623000007 41 | PM 23460466 42 | ER 43 | 44 | EF 45 | -------------------------------------------------------------------------------- /metaknowledge/scopus/tagProcessing/tagFunctions.py: -------------------------------------------------------------------------------- 1 | from ...grants.scopusGrant import ScopusGrant 2 | from ...citation import Citation 3 | 4 | def commaSpaceSeperated(val): 5 | return val.split(', ') 6 | 7 | def semicolonSpaceSeperated(val): 8 | return val.split('; ') 9 | 10 | def semicolonSeperated(val): 11 | return val.split(';') 12 | 13 | def stringValue(val): 14 | return val 15 | 16 | def integralValue(val): 17 | return int(val) 18 | 19 | def grantValue(val): 20 | return [ScopusGrant(s) for s in val.split('; ')] 21 | 22 | def citeValue(val): 23 | return [Citation(s, scopusMode = True) for s in val.split('; ')] 24 | 25 | scopusTagToFunction = { 26 | 'Authors' : commaSpaceSeperated, 27 | 'Title' : stringValue, 28 | 'Year' : integralValue, 29 | 'Source title' : stringValue, 30 | 'Volume' : stringValue, 31 | 'Issue' : stringValue, 32 | 'Art. No.' : stringValue, 33 | 'Page start' : stringValue, 34 | 'Page end' : stringValue, 35 | 'Page count' : integralValue, 36 | 'Cited by' : integralValue, 37 | 'DOI' : stringValue, 38 | 'Link' : stringValue, 39 | 'Affiliations' : stringValue, 40 | 'Authors with affiliations' : semicolonSpaceSeperated, 41 | 'Abstract' : stringValue, 42 | 'Author Keywords' : semicolonSpaceSeperated, 43 | 'Index Keywords' : semicolonSpaceSeperated, 44 | 'Molecular Sequence Numbers' : stringValue, 45 | 'Chemicals/CAS' : stringValue, 46 | 'Tradenames' : semicolonSpaceSeperated, 47 | 'Manufacturers' : semicolonSpaceSeperated, 48 | 'Funding Details' : grantValue, 49 | 'References' : citeValue, 50 | 'Correspondence Address' : semicolonSpaceSeperated, 51 | 'Editors' : stringValue, 52 | 'Sponsors' : semicolonSeperated, 53 | 'Publisher' : stringValue, 54 | 'Conference name' : stringValue, 55 | 'Conference date' : stringValue, 56 | 'Conference location' : stringValue, 57 | 'Conference code' : integralValue, 58 | 'ISSN' : stringValue, 59 | 'ISBN' : semicolonSpaceSeperated, 60 | 'CODEN' : stringValue, 61 | 'PubMed ID' : stringValue, 62 | 'Language of Original Document' : semicolonSpaceSeperated, 63 | 'Abbreviated Source Title' : stringValue, 64 | 'Document Type' : stringValue, 65 | 'Source' : stringValue, 66 | 'EID' : stringValue, 67 | 68 | } 69 | -------------------------------------------------------------------------------- /inheritance-structure.dot: -------------------------------------------------------------------------------- 1 | /* 2 | Class diagrams for metaknowledge 3 | */ 4 | 5 | digraph Records { 6 | charset="utf-8"; 7 | rankdir=BT; 8 | node [shape=record, fontname="Source Code Pro"]; 9 | edge [arrowhead=empty, arrowsize=1]; 10 | /* 11 | Object [label="{Object|builtin}"]; 12 | */ 13 | Mapping [label="{Mapping|collections.abc}"] 14 | Hashable [label="{Hashable|collections.abc}"] 15 | ABCMeta [label="{ABCMeta|abc}", style=dashed]; 16 | MutableMapping [label="{MutableMapping|collections.abc}"] 17 | 18 | Record [label="{Record|metaknowledge}"]; 19 | ExtendedRecord [label="{ExtendedRecord|metaknowledge}"]; 20 | Grant [label="{Grant|metaknowledge}"]; 21 | 22 | DefaultGrant [label="{DefaultGrant|metaknowledge}"]; 23 | CIHRGrant [label="{CIHRGrant|metaknowledge}"]; 24 | MedlineGrant [label="{MedlineGrant|metaknowledge}"]; 25 | NSERCGrant [label="{NSERCGrant|metaknowledge}"]; 26 | ScopusGrant [label="{ScopusGrant|metaknowledge}"]; 27 | 28 | WOSRecord [label="{WOSRecord|metaknowledge.WOS}"]; 29 | ProQuestRecord [label="{ProQuestRecord|metaknowledge.proquest}"]; 30 | MedlineRecord [label="{MedlineRecord|metaknowledge.medline}"]; 31 | ScopusRecord [label="{ScopusRecord|metaknowledge.scopus}"]; 32 | 33 | MutableSet [label="{MutableSet|collections.abc}"]; 34 | Collection [label="{Collection|metaknowledge}"]; 35 | CollectionWithIDs [label="{CollectionWithIDs|metaknowledge}"]; 36 | RecordCollection [label="{RecordCollection|metaknowledge}"]; 37 | GrantCollection [label="{GrantCollection|metaknowledge}"]; 38 | 39 | /* 40 | Mapping -> Object; 41 | Hashable -> Object; 42 | */ 43 | 44 | Record -> Hashable; 45 | Record -> Mapping; 46 | 47 | Collection -> MutableSet; 48 | Collection -> Hashable; 49 | 50 | Grant -> Record; 51 | Grant -> MutableMapping; 52 | 53 | DefaultGrant -> Grant; 54 | CIHRGrant -> Grant; 55 | MedlineGrant -> Grant; 56 | NSERCGrant -> Grant; 57 | ScopusGrant -> Grant 58 | 59 | ExtendedRecord -> ABCMeta [style=dashed]; 60 | ExtendedRecord -> Record; 61 | WOSRecord -> ExtendedRecord; 62 | ProQuestRecord -> ExtendedRecord; 63 | MedlineRecord -> ExtendedRecord; 64 | ScopusRecord -> ExtendedRecord; 65 | 66 | CollectionWithIDs -> Collection; 67 | RecordCollection -> CollectionWithIDs; 68 | GrantCollection -> CollectionWithIDs; 69 | 70 | } 71 | -------------------------------------------------------------------------------- /metaknowledge/medline/tagProcessing/specialFunctions.py: -------------------------------------------------------------------------------- 1 | from ...WOS.tagProcessing.helpFuncs import getMonth 2 | 3 | import re 4 | 5 | def year(R): 6 | try: 7 | return int(R['DP'].split(' ')[0]) 8 | except ValueError: 9 | yVal = re.search(r'-?\d{1,4}', R['DP'].split(' ')[0]) 10 | if yVal is None: 11 | return 0 12 | else: 13 | return(int(yVal.group(0))) 14 | 15 | def month(R): 16 | try: 17 | m = R['DP'].split(' ')[1] 18 | except IndexError: 19 | raise KeyError("Unable to extract a month") 20 | else: 21 | return getMonth(m) 22 | 23 | def volume(R): 24 | """Returns the first number/word of the volume field, hopefully trimming something like: `'49 Suppl 20'` to `49`""" 25 | return R['VI'].split(' ')[0] 26 | 27 | def beginningPage(R): 28 | """As pages may not be given as numbers this is the most accurate this function can be""" 29 | p = R['PG'] 30 | if p.startswith('suppl '): 31 | p = p[6:] 32 | return p.split(' ')[0].split('-')[0].replace(';', '') 33 | 34 | 35 | def DOI(R): 36 | ids = R['AID'] 37 | for a in ids: 38 | if a.endswith(' [doi]'): 39 | return a[:-6] 40 | raise KeyError("No DOI number found") 41 | 42 | def address(R): 43 | """Gets the first address of the first author""" 44 | return R['AD'][R['AU'][0]][0] 45 | 46 | medlineSpecialTagToFunc = { 47 | 'year' : year, 48 | 'month' : month, 49 | 'volume' : volume, 50 | 'beginningPage' : beginningPage, 51 | 'DOI' : DOI, 52 | 'address' : address, 53 | 54 | 'j9' : lambda R : R['TA'], #remaps to the closests field TA, but J9 != TA 55 | 56 | #'citations' : lambda R: None, #Medline does not have citations 57 | 58 | 'grants' : lambda R: R['GR'],#This is the basis for the 'grants' special function 59 | 60 | 'selfCitation' : lambda R: R.createCitation(), #just remaps to the correct function 61 | 'authorsShort' : lambda R: R['AU'], #just remaps to the correct name 62 | 'authorsFull' : lambda R : R['FAU'], #just remaps to the correct name 63 | 'title' : lambda R : R['TI'], #just remaps to the correct name 64 | 'journal' : lambda R : R['JT'], #just remaps to the correct name 65 | 'keywords' : lambda R : R['OT'], #just remaps to the correct name 66 | 'abstract' : lambda R : R['AB'], #just remaps to the correct name 67 | 'id' : lambda R : R.id, #just remaps to the correct name 68 | } 69 | -------------------------------------------------------------------------------- /metaknowledge/genders/nameGender.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import zipfile 3 | import io 4 | import csv 5 | import os.path 6 | import urllib 7 | 8 | from ..mkExceptions import GenderException 9 | 10 | dataURL = 'https://github.com/UWNETLAB/globalnamedata/archive/0.3.zip' 11 | americanNamesPath = 'globalnamedata-0.3/assets/usprocessed.csv' 12 | ukNamesPath = 'globalnamedata-0.3/assets/ukprocessed.csv' 13 | 14 | targetFilePath = os.path.join(os.path.normpath(os.path.dirname(__file__)), 'namesData.csv') 15 | 16 | csvFields = [ 17 | 'Name', 18 | 'years.appearing', 19 | 'count.male', 20 | 'count.female', 21 | 'prob.gender', 22 | 'obs.male', 23 | 'est.male', 24 | 'upper', 25 | 'lower' 26 | ] 27 | 28 | #global to reduce need to reload dict 29 | mappingDict = None 30 | 31 | def downloadData(useUK = False): 32 | zipFile = io.BytesIO(urllib.request.urlopen(dataURL).read()) 33 | if useUK: 34 | namesFile = zipfile.ZipFile(zipFile).open(ukNamesPath) 35 | else: 36 | namesFile = zipfile.ZipFile(zipFile).open(americanNamesPath) 37 | try: 38 | with open(targetFilePath, 'wb') as f: 39 | f.write(namesFile.read()) 40 | except PermissionError: 41 | raise PermissionError("Can not write to {}, you try rerunning with higher privileges".format(targetFilePath)) 42 | 43 | def getMapping(useUK = False): 44 | if not os.path.isfile(targetFilePath): 45 | downloadData(useUK) 46 | retDict = {} 47 | with open(targetFilePath) as f: 48 | reader = csv.DictReader(f, fieldnames = csvFields) 49 | next(reader) 50 | for line in reader: 51 | retDict[line['Name'].title()] = line['prob.gender'] 52 | return retDict 53 | 54 | def nameStringGender(s, noExcept = False): 55 | """Expects `first, last`""" 56 | global mappingDict 57 | try: 58 | first = s.split(', ')[1].split(' ')[0].title() 59 | except IndexError: 60 | if noExcept: 61 | return 'Unknown' 62 | else: 63 | return GenderException("The given String: '{}' does not have a last name, first name pair in with a ', ' seperation.".format(s)) 64 | if mappingDict is None: 65 | mappingDict = getMapping() 66 | return mappingDict.get(first, 'Unknown') 67 | 68 | def recordGenders(R): 69 | return {auth : nameStringGender(auth, noExcept = True) for auth in R.get('authorsFull', [])} 70 | -------------------------------------------------------------------------------- /metaknowledge/grants/cihrGrant.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import csv 3 | 4 | from .baseGrant import Grant, csvAndLinesReader 5 | from ..mkExceptions import BadGrant 6 | 7 | class CIHRGrant(Grant): 8 | def __init__(self, original, grantdDict, sFile, sLine): 9 | bad = False 10 | error = None 11 | if grantdDict.get('PI Names', '') == '': 12 | bad = True 13 | error = BadGrant("Missing 'PI Names'") 14 | 15 | #Source file - line number - 6 character long numeric hash 16 | idValue = "{}-l:{}-{:0=20}".format(os.path.basename(sFile), sLine, hash(original)) 17 | 18 | Grant.__init__(self, original, grantdDict, idValue, bad, error, sFile = sFile, sLine = sLine) 19 | 20 | def isCIHRfile(fileName, useFileName = True): 21 | if useFileName and not os.path.basename(fileName).startswith('cihr_'): 22 | return False 23 | try: 24 | with open(fileName, 'r', encoding = 'latin-1') as openfile: 25 | if not openfile.readline().startswith('Search Criteria'): 26 | return False 27 | elif not openfile.readline().endswith(',,,,,,,,,\n'): 28 | return False 29 | elif not openfile.readline().endswith(',,,,,,,,,\n'): 30 | return False 31 | reader = csv.DictReader(openfile, fieldnames = None, dialect = 'excel') 32 | for row in reader: 33 | if 'PI Names' not in row: 34 | return False 35 | except (StopIteration, UnicodeDecodeError): 36 | return False 37 | else: 38 | return True 39 | 40 | def parserCIHRfile(fileName): 41 | grantSet = set() 42 | error = None 43 | try: 44 | with open(fileName, 'r', encoding = 'latin-1') as openfile: 45 | f = enumerate(openfile, start = 1) 46 | next(f) 47 | next(f) 48 | next(f) 49 | reader = csvAndLinesReader(f, fieldnames = None, dialect = 'excel') 50 | for lineNum, lineString, lineDict in reader: 51 | grantSet.add(CIHRGrant(lineString, lineDict, sFile = fileName, sLine = lineNum)) 52 | except Exception: 53 | if error is None: 54 | error = BadGrant("The file '{}' is having decoding issues. It may have been modifed since it was downloaded or not be a CIHR grant file.".format(fileName)) 55 | except KeyboardInterrupt as e: 56 | error = e 57 | finally: 58 | if isinstance(error, KeyboardInterrupt): 59 | raise error 60 | return grantSet, error 61 | -------------------------------------------------------------------------------- /metaknowledge/mkExceptions.py: -------------------------------------------------------------------------------- 1 | class mkException(Exception): 2 | pass 3 | 4 | class CollectionTypeError(mkException, TypeError): 5 | pass 6 | 7 | class RCTypeError(mkException, TypeError): 8 | pass 9 | 10 | class TagError(mkException): 11 | pass 12 | 13 | class RCValueError(mkException): 14 | pass 15 | 16 | class BadInputFile(mkException): 17 | pass 18 | 19 | class BadRecord(mkException): 20 | pass 21 | 22 | class BadPubmedRecord(mkException): 23 | pass 24 | 25 | class BadPubmedFile(mkException): 26 | pass 27 | 28 | class BadScopusRecord(mkException): 29 | pass 30 | 31 | class BadScopusFile(mkException): 32 | pass 33 | 34 | class BadProQuestRecord(mkException): 35 | pass 36 | 37 | class BadProQuestFile(mkException): 38 | pass 39 | 40 | class RecordsNotCompatible(mkException): 41 | pass 42 | 43 | class JournalDataBaseError(mkException): 44 | pass 45 | 46 | class GenderException(mkException): 47 | pass 48 | 49 | class cacheError(mkException): 50 | """Exception raised when loading a cached RecordCollection fails, should only be seen inside metaknowledge and always be caught.""" 51 | pass 52 | 53 | class BadWOSRecord(BadRecord): 54 | """Exception thrown by the [record parser](../modules/WOS.html#metaknowledge.WOS.recordWOS.recordParser) to indicate a mis-formated record. This occurs when some component of the record does not parse. The messages will be any of: 55 | 56 | * _Missing field on line (line Number):(line)_, which indicates a line was to short, there should have been a tag followed by information 57 | 58 | * _End of file reached before ER_, which indicates the file ended before the 'ER' indicator appeared, 'ER' indicates the end of a record. This is often due to a copy and paste error. 59 | 60 | * _Duplicate tags in record_, which indicates the record had 2 or more lines with the same tag. 61 | 62 | * _Missing WOS number_, which indicates the record did not have a 'UT' tag. 63 | 64 | Records with a BadWOSRecord error are likely incomplete or the combination of two or more single records. 65 | """ 66 | pass 67 | 68 | class BadWOSFile(Warning): 69 | """Exception thrown by wosParser for mis-formated files 70 | """ 71 | pass 72 | 73 | class BadCitation(Warning): 74 | """ 75 | Exception thrown by Citation 76 | """ 77 | pass 78 | 79 | class BadGrant(mkException): 80 | pass 81 | 82 | 83 | class GrantCollectionException(mkException): 84 | pass 85 | 86 | class UnknownFile(mkException): 87 | pass 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import os.path 3 | import re 4 | from setuptools import setup, find_packages 5 | 6 | with open('metaknowledge/constants.py') as f: 7 | versionString = re.search(r"__version__ = '(.+)'", f.read()).group(1) 8 | 9 | long_descriptionLOC = "README.md" 10 | if os.path.isfile(long_descriptionLOC): 11 | long_description = open(long_descriptionLOC).read() 12 | else: 13 | long_description = '' 14 | 15 | if __name__ == '__main__': 16 | 17 | setup(name='metaknowledge', 18 | version = versionString, 19 | description = "A library for handling Web of science files", 20 | long_description = long_description, 21 | long_description_content_type = 'text/markdown', 22 | author="Reid McIlroy-Young, John McLevey", 23 | author_email = "rmcilroy@uwaterloo.ca, john.mclevey@uwaterloo.ca", 24 | license = 'GPL', 25 | url="https://github.com/networks-lab/metaknowledge", 26 | download_url = "https://github.com/networks-lab/metaknowledge/archive/{}.tar.gz".format(versionString), 27 | keywords= 'WOS', 28 | classifiers = [ 29 | 'Development Status :: 5 - Production/Stable', 30 | 'Environment :: Console', 31 | 'Environment :: MacOS X', 32 | 'Intended Audience :: Science/Research', 33 | 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', 34 | 'Operating System :: MacOS :: MacOS X', 35 | 'Operating System :: POSIX', 36 | 'Operating System :: Microsoft :: Windows', 37 | 'Programming Language :: Python :: 3 :: Only', 38 | 'Topic :: Education', 39 | 'Topic :: Scientific/Engineering :: Information Analysis', 40 | 'Topic :: Sociology', 41 | 'Topic :: Text Processing', 42 | ], 43 | install_requires= ['networkx'], 44 | extras_require={'contour' : ['matplotlib', 'scipy', 'numpy']}, 45 | packages = find_packages(), 46 | entry_points={'console_scripts': [ 47 | 'metaknowledge = metaknowledge.bin:mkCLI', 48 | 'metaknowledge-mdToNb = metaknowledge.bin:mkMdToNb', 49 | 'metaknowledge-DocsGen = metaknowledge.bin:mkDocs', 50 | ]}, 51 | test_suite='metaknowledge.tests', 52 | ) 53 | print("metaknowledge installed\nIf you intend to use the gender name data or journal abbreviations facilities it is\nadvisable to download and setup the required files now.\nRunning following line in your interpreter will do it:\nimport metaknowledge;metaknowledge.downloadExtras()") 54 | -------------------------------------------------------------------------------- /vagrant/bootstrap: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 3 | apt-get update 4 | apt-get install -y git python3 libfreetype6-dev libpng12-dev python3-setuptools python3-dev pkg-config python3-numpy python3-scipy r-base libzmq3-dev 5 | sudo easy_install3 pip 6 | echo "alias python='python3'" >> ~/.bashrc 7 | echo "alias pip='pip3'" >> ~/.bashrc 8 | echo "alias ipython='ipython3'" >> ~/.bashrc 9 | echo "alias easy_install='easy_install3'" >> ~/.bashrc 10 | source ~/.bashrc 11 | 12 | echo "alias python='python3'" >> /home/vagrant/.bashrc 13 | echo "alias pip='pip3'" >> /home/vagrant/.bashrc 14 | echo "alias ipython='ipython3'" >> /home/vagrant/.bashrc 15 | echo "alias easy_install='easy_install3'" >> /home/vagrant/.bashrc 16 | 17 | pip3 install networkx ipython matplotlib pandas seaborn igraph jupyter metaknowledge #Add to pip here 18 | echo "Getting WOS J29 database" 19 | python3 -c "import metaknowledge.journalAbbreviations; metaknowledge.journalAbbreviations.updatej9DB()" 20 | 21 | echo "Setting up git repo" 22 | cd /vagrant 23 | 24 | git init 25 | git config user.name "Student" 26 | git config user.email "Student@uwaterloo.ca" #Not a real address 27 | git add . 28 | git commit -m "Setting up local student repo" 29 | git remote add origin https://github.com/networks-lab/metaknowledge.git 30 | git fetch --all 31 | git reset --hard origin/master 32 | git pull origin master 33 | git commit -m "Syncing student with github" 34 | 35 | mkdir -p /vagrant/vagrant/logs/ 36 | 37 | echo "#!/bin/bash" > /etc/rc.local 38 | echo "while [ ! -d /vagrant/vagrant ] ; do sleep 1 ; done" >> /etc/rc.local 39 | echo "/usr/local/bin/jupyter-notebook --no-browser --ip='*' --port=8888 --notebook-dir=/notebooks 2>&1 | tee -a /vagrant/vagrant/logs/ipythonNoteBook.log /home/vagrant/ipythonNoteBook.log &" >> /etc/rc.local 40 | echo "echo 'Starting Notebook server'" >> /etc/rc.local 41 | echo "echo 'Pulling metaknowledge'" >> /etc/rc.local 42 | echo "git -C /vagrant pull -q origin master" >> /etc/rc.local 43 | echo "/vagrant/vagrant/updates.sh" >> /etc/rc.local 44 | echo "echo 'Updating python packages'" >> /etc/rc.local 45 | echo "pip3 install --upgrade networkx ipython matplotlib pandas seaborn igraph jupyter metaknowledge" 46 | echo "exit 0" >> /etc/rc.local 47 | sudo chown root /etc/rc.local 48 | sudo chmod 755 /etc/rc.local 49 | 50 | echo "#!/bin/bash -e" > /home/vagrant/ipythonStartup.sh 51 | echo "/etc/rc.local" >> /home/vagrant/ipythonStartup.sh 52 | chmod +x /home/vagrant/ipythonStartup.sh 53 | 54 | /etc/rc.local 55 | 56 | echo "provisioning done" 57 | echo "Notebook Server running at http://localhost:1159" 58 | exit 0 59 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_medline.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import os.path 4 | import os 5 | 6 | import metaknowledge 7 | import metaknowledge.medline 8 | 9 | 10 | class TestMedline(unittest.TestCase): 11 | def setUp(self): 12 | metaknowledge.VERBOSE_MODE = False 13 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/medline_test.medline") 14 | self.R = self.RC.peek() 15 | 16 | def test_creation(self): 17 | Rstart = self.RC.peek() 18 | R = metaknowledge.MedlineRecord(Rstart._fieldDict) 19 | self.assertEqual(R, Rstart) 20 | with open("metaknowledge/tests/medline_test.medline") as f: 21 | f.readline() 22 | R = metaknowledge.MedlineRecord(f) 23 | self.assertEqual(R.id, 'PMID:26524502') 24 | s = f.read() 25 | R = metaknowledge.MedlineRecord(s) 26 | self.assertEqual(R.id, 'PMID:25802386') 27 | with self.assertRaises(TypeError): 28 | R = metaknowledge.MedlineRecord(12345678) 29 | R = metaknowledge.MedlineRecord("PMID- 25802386\njhgjhghjbgjhgjghhjgjh\nhdghjdfgjdfsgjh\n") 30 | self.assertTrue(R.bad) 31 | with self.assertRaises(metaknowledge.BadPubmedRecord): 32 | R.writeRecord('not a file') 33 | 34 | def test_isCollection(self): 35 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection) 36 | 37 | def test_ismedline(self): 38 | self.assertIsInstance(self.R, metaknowledge.MedlineRecord) 39 | 40 | def test_bibWrite(self): 41 | fileName = "tempFile.bib.tmp" 42 | self.RC.writeBib(fileName) 43 | self.assertEqual(os.path.getsize(fileName), 606182) 44 | self.RC.writeBib(fileName, wosMode = True, reducedOutput = True) 45 | self.assertEqual(os.path.getsize(fileName), 456151) 46 | os.remove("tempFile.bib.tmp") 47 | 48 | def test_specials(self): 49 | for R in self.RC: 50 | for s in metaknowledge.medline.medlineSpecialTagToFunc.keys(): 51 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation)) 52 | 53 | def test_allFields(self): 54 | for R in self.RC: 55 | for k,v in R.items(): 56 | self.assertIsInstance(k, str) 57 | self.assertIsInstance(v, (str, list, dict)) 58 | 59 | def test_write(self): 60 | fileName = 'tempFile.medline.tmp' 61 | self.RC.writeFile(fileName) 62 | self.assertEqual(os.path.getsize(fileName), os.path.getsize("metaknowledge/tests/medline_test.medline") + 526) #Not quite identical 63 | os.remove(fileName) 64 | -------------------------------------------------------------------------------- /metaknowledge/medline/tagProcessing/tagNames.py: -------------------------------------------------------------------------------- 1 | from ...WOS.tagProcessing.helpFuncs import makeBiDirectional 2 | 3 | 4 | authorBasedTags = [ 5 | 'AD', 6 | 'AUID', 7 | ] 8 | 9 | tagNameDict = { 10 | "Abstract" : "AB", 11 | "CopyrightInformation" : "CI", 12 | "Affiliation" : "AD", 13 | "InvestigatorAffiliation" : "IRAD", 14 | "ArticleIdentifier" : "AID", 15 | "Author" : "AU", 16 | "AuthorIdentifier" : "AUID", 17 | "FullAuthor" : "FAU", 18 | "BookTitle" : "BTI", 19 | "CollectionTitle" : "CTI", 20 | "CorporateAuthor" : "CN", 21 | "CreateDate" : "CRDT", 22 | "DateCompleted" : "DCOM", 23 | "DateCreated" : "DA", 24 | "DateLastRevised" : "LR", 25 | "DateElectronicPublication" : "DEP", 26 | "DatePublication" : "DP", 27 | "Edition" : "EN", 28 | "Editor" : "ED", 29 | "Editor" : "FED", 30 | "EntrezDate" : "EDAT", 31 | "GeneSymbol" : "GS", 32 | "GeneralNote" : "GN", 33 | "GrantNumber" : "GR", 34 | "Investigator" : "IR", 35 | "InvestigatorFull" : "FIR", 36 | "ISBN" : "ISBN", 37 | "ISSN" : "IS", 38 | "Issue" : "IP", 39 | "JournalTitleAbbreviation" : "TA", 40 | "JournalTitle" : "JT", 41 | "Language" : "LA", 42 | "LocationIdentifier" : "LID", 43 | "ManuscriptIdentifier" : "MID", 44 | "MeSHDate" : "MHDA", 45 | "MeSHTerms" : "MH", 46 | "NLMID" : "JID", 47 | "NumberReferences" : "RF", 48 | "OtherAbstract" : "OAB", 49 | "OtherAbstract" : "OABL", 50 | "OtherCopyright" : "OCI", 51 | "OtherID" : "OID", 52 | "OtherTerm" : "OT", 53 | "OtherTermOwner" : "OTO", 54 | "Owner" : "OWN", 55 | "Pagination" : "PG", 56 | "PersonalNameSubject" : "PS", 57 | "FullPersonalNameSubject" : "FPS", 58 | "PlacePublication" : "PL", 59 | "PublicationHistoryStatus" : "PHST", 60 | "PublicationStatus" : "PST", 61 | "PublicationType" : "PT", 62 | "PublishingModel" : "PUBM", 63 | "PubMedCentralIdentifier" : "PMC", 64 | "PubMedCentralRelease" : "PMCR", 65 | "PubMedUniqueIdentifier" : "PMID", 66 | "RegistryNumber" : "RN", 67 | "SubstanceName" : "NM", 68 | "SecondarySourceID" : "SI", 69 | "Source" : "SO", 70 | "SpaceFlightMission" : "SFM", 71 | "Status" : "STAT", 72 | "Subset" : "SB", 73 | "Title" : "TI", 74 | "TransliteratedTitle" : "TT", 75 | "Volume" : "VI", 76 | "VolumeTitle" : "VTI", 77 | "CommentIn" : "CIN", 78 | "ErratumIn" : "EIN", 79 | "ErratumFor" : "EFR", 80 | "CorrectedRepublishedIn" : "CRI", 81 | "CorrectedRepublishedFrom" : "CRF", 82 | "DatasetIn" : "DDIN", 83 | "DatasetUseReportedIn" : "DRIN", 84 | "PartialRetractionIn" : "PRIN", 85 | "PartialRetractionOf" : "PROF", 86 | "RepublishedIn" : "RPI", 87 | "RepublishedFrom" : "RPF", 88 | "RetractionIn" : "RIN", 89 | "RetractionOf" : "ROF", 90 | "UpdateIn" : "UIN", 91 | "UpdateOf" : "UOF", 92 | "SummaryForPatients" : "SPIN", 93 | "OriginalReportIn" : "ORI", 94 | } 95 | 96 | tagNameConverterDict = makeBiDirectional(tagNameDict) 97 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_scopus.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import metaknowledge 4 | 5 | import os 6 | 7 | class TestScopus(unittest.TestCase): 8 | 9 | def setUp(self): 10 | metaknowledge.VERBOSE_MODE = False 11 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/scopus_testing.csv.scopus") 12 | self.R = self.RC.peek() 13 | 14 | def test_creation(self): 15 | Rstart = self.RC.peek() 16 | R = metaknowledge.ScopusRecord(Rstart._fieldDict) 17 | self.assertEqual(R, Rstart) 18 | with open("metaknowledge/tests/scopus_testing.csv.scopus") as f: 19 | f.read(1) 20 | header = f.readline()[:-1].split(',') 21 | R = metaknowledge.ScopusRecord(f.readline(), header = header) 22 | self.assertEqual(R.id, 'EID:2-s2.0-84963944162') 23 | R = metaknowledge.ScopusRecord(f.readline(), header = header) 24 | self.assertEqual(R.id, 'EID:2-s2.0-84943362392') 25 | with self.assertRaises(TypeError): 26 | R = metaknowledge.ScopusRecord(12345678) 27 | R = metaknowledge.ScopusRecord(",2132,4,3fdgf,fgdgdfdg,dgfdg,,,,,,,,,,,,,,,,,,,2e5r6t789765432\n") 28 | self.assertTrue(R.bad) 29 | with self.assertRaises(metaknowledge.BadScopusRecord): 30 | R.writeRecord('not a file') 31 | 32 | def test_isCollection(self): 33 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection) 34 | 35 | def test_isScopus(self): 36 | self.assertIsInstance(self.R, metaknowledge.ScopusRecord) 37 | 38 | def test_specials(self): 39 | for R in self.RC: 40 | for s in metaknowledge.scopus.scopusSpecialTagToFunc.keys(): 41 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation)) 42 | 43 | def test_allFields(self): 44 | for R in self.RC: 45 | for k,v in R.items(): 46 | self.assertIsInstance(k, str) 47 | self.assertIsInstance(v, (str, list, int)) 48 | 49 | def test_graphs(self): 50 | self.assertEqual(metaknowledge.graphStats(self.RC.networkCoAuthor(), sentenceString = True), "The graph has 1798 nodes, 89236 edges, 36 isolates, 15 self loops, a density of 0.0552422 and a transitivity of 0.994673") 51 | self.assertEqual(metaknowledge.graphStats(self.RC.networkCitation(), sentenceString = True), "The graph has 10026 nodes, 10362 edges, 0 isolates, 0 self loops, a density of 0.000103094 and a transitivity of 0") 52 | 53 | def test_write(self): 54 | fileName = 'tempFile.scopus.tmp' 55 | self.RC.writeFile(fileName) 56 | self.assertEqual(os.path.getsize(fileName), os.path.getsize("metaknowledge/tests/scopus_testing.csv.scopus") + 11511) #Not quite identical due to double quotes 57 | os.remove(fileName) 58 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_citation.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import unittest 3 | import metaknowledge 4 | 5 | class TestCitation(unittest.TestCase): 6 | def setUp(self): 7 | self.Cite = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1, DOI 0.1063/1.1695064") 8 | 9 | def test_citation_author(self): 10 | self.assertEqual(self.Cite.author, "John D") 11 | 12 | def test_citation_year(self): 13 | self.assertEqual(self.Cite.year, 2015) 14 | 15 | def test_citation_journal(self): 16 | self.assertEqual(self.Cite.journal, "TOPICS IN COGNITIVE SCIENCE") 17 | 18 | def test_citation_v(self): 19 | self.assertEqual(self.Cite.V, "V1") 20 | 21 | def test_citation_p(self): 22 | self.assertEqual(self.Cite.P, "P1") 23 | 24 | def test_citation_DOI(self): 25 | self.assertEqual(self.Cite.DOI, "0.1063/1.1695064") 26 | 27 | def test_citation_id(self): 28 | self.assertEqual(self.Cite.ID(), "John D, 2015, TOPICS IN COGNITIVE SCIENCE") 29 | 30 | def test_citation_str(self): 31 | self.assertEqual(str(self.Cite), "John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1, DOI 0.1063/1.1695064") 32 | 33 | def test_citation_extra(self): 34 | self.assertEqual(self.Cite.Extra(), "V1, P1, 0.1063/1.1695064") 35 | 36 | def test_citation_badDetection(self): 37 | self.assertTrue(metaknowledge.Citation("").bad) 38 | 39 | def test_citation_equality(self): 40 | c1 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, P1, DOI 0.1063/1.1695064") 41 | c2 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1") 42 | c3 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P2") 43 | self.assertTrue(c1 == self.Cite) 44 | self.assertTrue(c2 == self.Cite) 45 | self.assertFalse(c1 != c2) 46 | self.assertFalse(c3 != c1) 47 | 48 | def test_citation_hash(self): 49 | self.assertTrue(bool(hash(self.Cite))) 50 | self.assertTrue(bool(hash(metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1")))) 51 | self.assertTrue(bool(hash(metaknowledge.Citation("John D., 2015")))) 52 | 53 | def test_citation_badLength(self): 54 | c = metaknowledge.Citation("ab, c") 55 | self.assertTrue(c.bad) 56 | self.assertEqual(str(c.error), "Not a complete set of author, year and journal") 57 | self.assertEqual(c.Extra(),'') 58 | self.assertEqual(c.author,'Ab') 59 | self.assertEqual(c.ID(),'Ab, C') 60 | 61 | def test_citation_badNumbers(self): 62 | c = metaknowledge.Citation("1, 2, 3, 4") 63 | self.assertTrue(c.bad) 64 | self.assertEqual(c.ID(), '1, 2') 65 | self.assertEqual(str(c.error), "The citation did not fully match the expected pattern") 66 | -------------------------------------------------------------------------------- /metaknowledge/WOS/tagProcessing/funcDicts.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | from .tagFunctions import tagToFunc 3 | from .helpFuncs import reverseDict, makeBiDirectional 4 | 5 | tagToFullDict = {k : v.__name__ for k, v in tagToFunc.items()} 6 | 7 | fullToTagDict = reverseDict(tagToFullDict) #Reverses tagToFull 8 | 9 | fullToTagDictUpper = {k.upper() : v for k,v in fullToTagDict.items()} 10 | 11 | 12 | tagNameConverterDict = makeBiDirectional(tagToFullDict) #tagToFull made reversible 13 | 14 | tagsAndNameSet = set(tagNameConverterDict.keys()) #set of WOS tags and their names 15 | 16 | tagsAndNameSetUpper = set(([c.upper() for c in tagsAndNameSet])) 17 | 18 | knownTagsList = list(tagToFullDict.keys()) #list of all the known tags 19 | 20 | def tagToFull(tag): 21 | """A wrapper for `tagToFullDict`, it maps 2 character tags to their full names. 22 | 23 | # Parameters 24 | 25 | _tag_: `str` 26 | 27 | > A two character string giving the tag 28 | 29 | # Returns 30 | 31 | `str` 32 | 33 | > The full name of _tag_ 34 | """ 35 | try: 36 | return tagToFullDict[tag] 37 | except KeyError: 38 | raise("Tag not in list of known tags") 39 | 40 | 41 | def normalizeToTag(val): 42 | """Converts tags or full names to 2 character tags, case insensitive 43 | 44 | # Parameters 45 | 46 | _val_: `str` 47 | 48 | > A two character string giving the tag or its full name 49 | 50 | # Returns 51 | 52 | `str` 53 | 54 | > The short name of _val_ 55 | """ 56 | try: 57 | val = val.upper() 58 | except AttributeError: 59 | raise KeyError("{} is not a tag or name string".format(val)) 60 | if val not in tagsAndNameSetUpper: 61 | raise KeyError("{} is not a tag or name string".format(val)) 62 | else: 63 | try: 64 | return fullToTagDictUpper[val] 65 | except KeyError: 66 | return val 67 | 68 | def normalizeToName(val): 69 | """Converts tags or full names to full names, case sensitive 70 | 71 | # Parameters 72 | 73 | _val_: `str` 74 | 75 | > A two character string giving the tag or its full name 76 | 77 | # Returns 78 | 79 | `str` 80 | 81 | > The full name of _val_ 82 | """ 83 | if val not in tagsAndNameSet: 84 | raise KeyError("{} is not a tag or name string".format(val)) 85 | else: 86 | try: 87 | return tagToFullDict[val] 88 | except KeyError: 89 | return val 90 | 91 | def isTagOrName(val): 92 | """Checks if _val_ is a tag or full name of tag if so returns `True` 93 | 94 | # Parameters 95 | 96 | _val_: `str` 97 | 98 | > A string possible forming a tag or name 99 | 100 | # Returns 101 | 102 | `bool` 103 | 104 | > `True` if _val_ is a tag or name, otherwise `False` 105 | """ 106 | return val in tagsAndNameSet 107 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_grantCollection.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | import unittest 3 | import shutil 4 | import os 5 | 6 | import metaknowledge 7 | 8 | class TestGrantCollection(unittest.TestCase): 9 | 10 | @classmethod 11 | def setUpClass(cls): 12 | metaknowledge.VERBOSE_MODE = False 13 | cls.GCmain = metaknowledge.GrantCollection("metaknowledge/tests/", cached = True) 14 | 15 | def setUp(self): 16 | self.GC = self.GCmain.copy() 17 | 18 | def test_empty(self): 19 | GCempty = metaknowledge.GrantCollection() 20 | self.assertEqual(len(GCempty), 0) 21 | self.assertEqual(GCempty.name, "Empty") 22 | 23 | def test_creationErrors(self): 24 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException): 25 | GCbad = metaknowledge.GrantCollection("README.md", extension = '.csv') 26 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile): 27 | GCbad = metaknowledge.GrantCollection("README.md") 28 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile): 29 | GCbad = metaknowledge.GrantCollection("README.md", extension = '.md') 30 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile): 31 | GCbad = metaknowledge.GrantCollection(".", extension = '.md') 32 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException): 33 | GCbad = metaknowledge.GrantCollection("README") 34 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException): 35 | GCbad = metaknowledge.GrantCollection(1) 36 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException): 37 | GCbad = metaknowledge.GrantCollection({1}) 38 | 39 | def test_creation(self): 40 | self.assertIsInstance(self.GC, metaknowledge.GrantCollection) 41 | self.assertIsInstance(self.GC, metaknowledge.Collection) 42 | self.assertAlmostEqual(len(self.GC), 2022, delta = 4) 43 | self.assertIsInstance(self.GC.peek(), metaknowledge.Record) 44 | self.assertEqual(metaknowledge.GrantCollection(self.GC), self.GC) 45 | 46 | def test_Caching(self): 47 | self.assertTrue(os.path.isfile("metaknowledge/tests/tests.[].mkGrantDirCache")) 48 | os.remove("metaknowledge/tests/tests.[].mkGrantDirCache") 49 | 50 | def test_fallback(self): 51 | fname = "DefaultGrantTestFile.csv" 52 | shutil.copyfile("metaknowledge/tests/NSERC_TEST_PARTNER.testcsv", fname) 53 | GC = metaknowledge.GrantCollection(fname, extension = '.csv') 54 | self.assertEqual(GC._collectedTypes, {"FallbackGrant"}) 55 | os.remove(fname) 56 | 57 | def test_CoInstitution(self): 58 | G = self.GC.networkCoInvestigatorInstitution() 59 | self.assertEqual(metaknowledge.graphStats(G), 'Nodes: 641\nEdges: 2034\nIsolates: 79\nSelf loops: 0\nDensity: 0.00991615\nTransitivity: 0.273548') 60 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_diffusion.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import unittest 3 | import metaknowledge 4 | 5 | 6 | class TestDiffusion(unittest.TestCase): 7 | def setUp(self): 8 | metaknowledge.VERBOSE_MODE = False 9 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/testFile.isi") 10 | 11 | def test_diffusionGraph(self): 12 | G = metaknowledge.diffusionGraph(self.RC, self.RC) 13 | Gcr_ut = metaknowledge.diffusionGraph(self.RC, self.RC, sourceType = "CR", targetType = "UT") 14 | self.assertEqual(metaknowledge.graphStats(G, sentenceString = True), 'The graph has 42 nodes, 1569 edges, 0 isolates, 35 self loops, a density of 0.91115 and a transitivity of 0.894934') 15 | self.assertEqual(metaknowledge.graphStats(Gcr_ut, sentenceString = True), 'The graph has 528 nodes, 3591 edges, 246 isolates, 0 self loops, a density of 0.0129054 and a transitivity of 0') 16 | 17 | def test_multiGraph(self): 18 | G = metaknowledge.diffusionGraph(self.RC, self.RC, labelEdgesBy = 'PY') 19 | metaknowledge.dropEdges(G, dropSelfLoops = True) 20 | #multigraphs have issues their edge counts are somewhat unpredictable 21 | self.assertEqual(metaknowledge.graphStats(G, stats = ('nodes', 'isolates', 'loops'), sentenceString = True), 'The graph has 42 nodes, 0 isolates and 0 self loops') 22 | 23 | def test_diffusionCounts(self): 24 | d = metaknowledge.diffusionCount(self.RC, self.RC) 25 | dc = metaknowledge.diffusionCount(self.RC, self.RC, compareCounts = True) 26 | dWC = metaknowledge.diffusionCount(self.RC, self.RC, sourceType = "WC") 27 | self.assertIsInstance(d.keys().__iter__().__next__(), metaknowledge.Record) 28 | self.assertTrue(-1 < d.values().__iter__().__next__() < 10) 29 | self.assertIsInstance(list(dWC.keys())[0], str) 30 | self.assertTrue(-1 < dWC.values().__iter__().__next__() < 24) 31 | for t in dc.values(): 32 | self.assertEqual(t[0], t[1]) 33 | 34 | def test_diffusionPandas(self): 35 | d = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True) 36 | dwc = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True, sourceType = "WC", compareCounts = True) 37 | dyear = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True, extraValue = 'year') 38 | self.assertTrue("TI" in d.keys()) 39 | self.assertEqual(len(d), 44) 40 | self.assertTrue(len(d["UT"]), len(self.RC)) 41 | self.assertTrue("WC" in dwc) 42 | self.assertEqual(3, len(dwc)) 43 | self.assertEqual(len(dwc["TargetCount"]), 9) 44 | self.assertEqual(dwc["TargetCount"], dwc["SourceCount"]) 45 | self.assertEqual(len(dyear), len(d) + 1) 46 | self.assertNotEqual(dyear["TargetCount"], dwc["SourceCount"]) 47 | self.assertEqual(len([c for c in dyear["TargetCount"] if c > 1]), 9) 48 | self.assertTrue(1979 in dyear['year']) 49 | -------------------------------------------------------------------------------- /docs/documentation/functions_methods/index.rst: -------------------------------------------------------------------------------- 1 | Functions 2 | ========= 3 | 4 | 5 | .. automodule:: metaknowledge.citation 6 | :members: 7 | :private-members: 8 | :special-members: 9 | :show-inheritance: 10 | :noindex: 11 | :exclude-members: Citation 12 | 13 | .. automodule:: metaknowledge.constants 14 | :members: 15 | :private-members: 16 | :special-members: 17 | 18 | .. automodule:: metaknowledge.diffusion 19 | :members: 20 | :private-members: 21 | :special-members: 22 | 23 | .. automodule:: metaknowledge.fileHandlers 24 | :members: 25 | :private-members: 26 | :special-members: 27 | :exclude-members: ProccessorTuple 28 | 29 | .. automodule:: metaknowledge.grantCollection 30 | :members: 31 | :private-members: 32 | :special-members: 33 | :exclude-members: GrantCollection 34 | 35 | .. automodule:: metaknowledge.graphHelpers 36 | :members: 37 | :private-members: 38 | :special-members: 39 | 40 | .. automodule:: metaknowledge.mkCollection 41 | :members: 42 | :private-members: 43 | :special-members: 44 | :exclude-members: Collection, CollectionWithIDs 45 | 46 | .. automodule:: metaknowledge.mkRecord 47 | :members: 48 | :private-members: 49 | :special-members: 50 | :exclude-members: ExtendedRecord, Record 51 | 52 | .. automodule:: metaknowledge.progressBar 53 | :members: 54 | :private-members: 55 | :special-members: 56 | 57 | .. automodule:: metaknowledge.RCglimpse 58 | :members: 59 | :private-members: 60 | :special-members: 61 | 62 | .. automodule:: metaknowledge.recordCollection 63 | :members: 64 | :private-members: 65 | :special-members: 66 | :exclude-members: RecordCollection 67 | 68 | .. automodule:: metaknowledge.genders 69 | :members: 70 | :private-members: 71 | :special-members: 72 | 73 | .. automodule:: metaknowledge.genders.nameGender 74 | :members: 75 | :private-members: 76 | :special-members: 77 | 78 | .. automodule:: metaknowledge.grants.baseGrant 79 | :members: 80 | :private-members: 81 | :special-members: 82 | :exclude-members: FallbackGrant, Grant 83 | 84 | .. automodule:: metaknowledge.grants.cihrGrant 85 | :members: 86 | :private-members: 87 | :special-members: 88 | :exclude-members: CIHRGrant 89 | 90 | .. automodule:: metaknowledge.grants.medlineGrant 91 | :members: 92 | :private-members: 93 | :special-members: 94 | :exclude-members: MedlineGrant 95 | 96 | .. automodule:: metaknowledge.grants.nsercGrant 97 | :members: 98 | :private-members: 99 | :special-members: 100 | :exclude-members: NSERCGrant 101 | 102 | .. automodule:: metaknowledge.grants.nsfGrant 103 | :members: 104 | :private-members: 105 | :special-members: 106 | :exclude-members: NSFGrant 107 | 108 | .. automodule:: metaknowledge.grants.scopusGrant 109 | :members: 110 | :private-members: 111 | :special-members: 112 | :exclude-members: ScopusGrant 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | metaknowledge 2 | ========================================= 3 | *A Python3 package for doing computational research on knowledge* 4 | 5 | *metaknowledge* is a Python3_ package for doing computational research in bibliometrics, scientometrics, and network analysis. It can also be easily used to simplify the process of doing systematic reviews in any disciplinary context. 6 | 7 | *metaknowledge* reads a directory of plain text files containing meta-data on publications and citations, and writes to a variety of data structures that are suitable for longitudinal research, computational text analysis (e.g. topic models and burst analysis), Reference Publication Year Spectroscopy (RPYS), and network analysis (including multi-modal, multi-level, and dynamic). It handles large datasets (e.g. several million records) efficiently. 8 | 9 | metaknowledge currently handles data from the Web of Science, PubMed, Scopus, Proquest Dissertations & Theses, and administrative data from the National Science Foundation and the Canadian tri-council granting agencies: SSHRC, CIHR, and NSERC. 10 | 11 | Datasets created with metaknowledge can be analyzed using NetworkX_ and the `standard libraries `_ for data analysis in Python. It is also easy to write data to :code:`csv` or :code:`graphml` files for analysis and visualization in `R `_, `Stata `_, `Visone `_, `Gephi `_, or any other tools for data analysis. 12 | 13 | *metaknowledge* also has a simple command line tool for extracting quantitative datasets and network files from Web of Science files. This makes the library more accessible to researchers who do not know Python, and makes it easier to quickly explore new datasets. 14 | 15 | Contact 16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 17 | | **Reid McIlroy-Young**, `reid@reidmcy.com `_ 18 | | *University of Chicago, Chicago, IL, USA* 19 | 20 | | **John McLevey**, `john.mclevey@uwaterloo.ca `_ 21 | | *University of Waterloo, Waterloo, ON, Canada* 22 | 23 | | **Jillian Anderson**, `jillianderson8@gmail.com `_ 24 | | *University of Waterloo, Waterloo, ON, Canada* 25 | 26 | Citation 27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 28 | If you are using metaknowledge for research that will be published or publicly distributed, please acknowledge us with the following citation: 29 | 30 | *Reid McIlroy-Young, John McLevey, and Jillian Anderson. 2015. metaknowledge: open source software for social networks, bibliometrics, and sociology of knowledge research. URL: http://www.networkslab.org/metaknowledge.* 31 | 32 | `Download .bib file: `_ 33 | 34 | License 35 | ^^^^^^^ 36 | *metaknowledge* is free and open source software, distributed under the GPL License. 37 | 38 | 39 | .. toctree:: 40 | :maxdepth: 1 41 | 42 | install 43 | documentation/index 44 | examples/index 45 | CLI 46 | 47 | 48 | Indices and tables 49 | ^^^^^^^^^^^^^^^^^^ 50 | 51 | * :ref:`genindex` 52 | * :ref:`modindex` 53 | * :ref:`search` 54 | 55 | .. _Python3: https://www.python.org 56 | .. _NetworkX: https://networkx.github.io -------------------------------------------------------------------------------- /metaknowledge/grants/__init__.py: -------------------------------------------------------------------------------- 1 | from .nsercGrant import NSERCGrant, isNSERCfile, parserNSERCfile 2 | from .medlineGrant import MedlineGrant 3 | from .baseGrant import Grant, FallbackGrant, isFallbackGrantFile, parserFallbackGrantFile 4 | from .cihrGrant import CIHRGrant, isCIHRfile, parserCIHRfile 5 | from .nsfGrant import NSFGrant, isNSFfile, parserNSFfile 6 | 7 | 8 | """#Creating new grants 9 | 10 | mk is intended to be expanded as different researchers will require processing of files not in it currently. To add a new grant you need to write 2 simple functions and class. To see a basic example look at the `baseGrant.py` file as it contains the fallback grant processors and you should be able to reuse much of that code. 11 | 12 | The way GrantCollections are created is when they are given a file or directory of files they check each file with the `detector` functions in the `grantProcessors` found in `fileHandlers.py` and if one returns `True` they use the `processor` function and added the `type` string to their `collectedTypes` set. The `processor` must return a tuple the first element being a set of all the Grants the second `None` or an `Exception` object. `processor` should not raise an exception, if there is an issue the GrantCollection should be given even a partial set of grants, GrantCollections have an errors attribute that contains all errors they encountered during the parsing. 13 | 14 | 15 | The first function is to determine if a given file path is to a collection of grants of the needed type. Determining if a file is of the needed type is usually done by reading the first few lines and checking that they match a known header template. For example CIHR files start with the string `"Search Criteria,"` so the function `isCIHRfile()` checks that the first lines start that way. 16 | 17 | One thing to watch out for is the encoding most grants are CSVs encoded with ISO-8859 which is what many windows programs, most notably excel, expect. Python will use that encoding (called `'latin-1'`) on Microsoft systems but on Mac OS and Linux will often use `'utf-8'` so you should always give the encoding as mk is intended for all 3 operating systems. 18 | 19 | The next function is the parser, this is the function that is called on the file to create the Grants. It is given a file path that has been confirmed to be a correctly formatted grant by the detector. 20 | 21 | The function must return a tuple the first entry being a set of all the Grants and the second an `Exception` if an error occurred or `None` if not. If an error occurs the function should attempt to return as many grants as possible, including the one that had the error (with its error handlers correctly indicated). The GrantCollectio will record the error and allow for the user/script to decide what actions to take.Note, often not doing anything is appropriate as errors have been found to most often occur at the end of the file so no data is actually lost. 22 | 23 | The`Grants` in the set returned by the processor, should be a new class the inherits from `Grant` even if no new attributes are defined. 24 | 25 | Once the `detector` and `processor` functions have been created and tested, they can be added to the list of grants found in `fileHandlers.py` called `grantProcessors`. Each file is checked in order so do not add them to the end as the last entry will tell the `GrantCollection` to stop and that the file does not match, so placing anything after it will not work. 26 | """ 27 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500217.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Radical Chemistry on Cloud and Aerosol Surfaces 6 | 09/01/2014 7 | 08/31/2016 8 | 269915 9 | 10 | Continuing grant 11 | 12 | 13 | 03090000 14 | 15 | Direct For Mathematical & Physical Scien 16 | 17 | 18 | Division Of Chemistry 19 | 20 | 21 | 22 | Tyrone D. Mitchell 23 | 24 | The Environmental Chemical Sciences Program in the Chemistry Division at the National Science Foundation supports the research of Professors Joseph S. Francisco and Sabre Kais both from Purdue University who will examine how free radicals contribute to numerous significant chemical processes in the atmosphere. Aerosols and cloud droplets play an important role in both the removal and the conversion of gases in the atmosphere. The interactions between gas-phase species on liquid surfaces are central to understanding chemistry at these interfaces. The overall goal of this award is to provide a theoretical framework, based on first principles including classical and ab initio molecular dynamics (MD) simulations, density functional theory combined with finite element methods for Car-Parrinello simulations, and finite size scaling for universal behavior of free energies and other thermodynamic quantities to understand how atmospheric free radicals accommodate and react at the gas-liquid interface.<br/><br/>Results of this project will help improve our understanding of the contribution to radical accommodation and uptake leading to more effective pollution control strategies as well as the improvement in air quality for pollutants whose chemistry is highly coupled to atmospheric free radicals. This multidisciplinary project will bring both undergraduate and graduate students from the departments of Chemistry and Earth and Atmospheric Sciences into the research environment. Moreover, this project will promote and support broader efforts to recruit minority and underrepresented graduate students to the chemical physics program at Purdue University. 25 | 01/23/2015 26 | 01/23/2015 27 | 28 | 1500217 29 | 30 | Joseph 31 | Francisco 32 | francisco3@unl.edu 33 | 01/23/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | University of Nebraska-Lincoln 39 | Lincoln 40 | 685031435 41 | 4024723171 42 | 2200 Vine St, 151 Whittier 43 | United States 44 | Nebraska 45 | NE 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500219.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Extremal graph theory, graph limits, and algebraic invariants 6 | 06/01/2015 7 | 05/31/2018 8 | 151604 9 | 10 | Standard Grant 11 | 12 | 13 | 03040000 14 | 15 | Direct For Mathematical & Physical Scien 16 | 17 | 18 | Division Of Mathematical Sciences 19 | 20 | 21 | 22 | Tomek Bartoszynski 23 | 24 | In this project, the PI aims to study very large networks using algebraic and analytic tools. Large networks like the Internet, molecular lattices and social networks (such as Facebook) naturally arise in many different areas of real life. The PI aims to look at these from a new perspective: we consider them as approximations of an infinite object. For molecular lattices this is a very natural approach, but via a recently developed theory of sparse graph convergence we can tackle a much broader class of problems, creating new links between mathematics, statistical physics and computer science.<br/><br/>The PI will investigate two essentially different, but still related topics. The first one is the study of extremal values of algebraic invariants of graphs with a special emphasis on those problems where the conjectured extremal graphs are not finite. Despite the lack of finite extremal solutions, using the recently emerging language of Benjamini--Schramm convergence, one can find and analyze the extremal solutions. This then leads to new asymptotic results on finite graphs. The second topic is the study of certain special infinite graphs and lattices via graph limit theory and analytic and algebraic combinatorics. The general theme is to consider a graph invariant of algebraic nature and analyze its limiting behaviour using analytic tools. Often the invariants come from graph polynomials like the matching, chromatic and independence polynomials and have various ties to statistical mechanics. 25 | 04/24/2015 26 | 04/24/2015 27 | 28 | 1500219 29 | 30 | Peter 31 | Csikvari 32 | csikvari@mit.edu 33 | 04/24/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | Massachusetts Institute of Technology 39 | Cambridge 40 | 021394301 41 | 6172531000 42 | 77 MASSACHUSETTS AVE 43 | United States 44 | Massachusetts 45 | MA 46 | 47 | 48 | 7970 49 | Combinatorics 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /metaknowledge/tests/TwoPaper.isi: -------------------------------------------------------------------------------- 1 | FN Thomson Reuters Web of Science™ 2 | VR 1.0 3 | PT J 4 | AU Kim, S 5 | AF Kim, S 6 | TI Supervenience and causation: A probabilistic approach 7 | SO SYNTHESE 8 | LA English 9 | DT Article 10 | AB It is often argued that if a mental property supervenes on a physical property, then (1) the mental property M "inherits'' its causal efficacy from the physical property P and (2) the causal efficacy of M reduces to that of P. However, once we understand the supervenience thesis and the concept of causation probabilistically, it turns out that we can infer the causal efficacy of M from that of P and vice versa if and only if a certain condition, which I call the "line-up'' thesis, holds. I argue that the supervenience thesis entails neither this condition nor its denial. I also argue that even when the line-up thesis holds true, reductionism about the causal efficacy of the mental property doesn't follow. 11 | C1 Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA. 12 | RP Kim, S (reprint author), Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA. 13 | CR Eells E, 1991, PROBABILISTIC CAUSAL 14 | ENC B, 1983, J PHILOS, V80, P279, DOI 10.2307/2026499 15 | Fodor J.A., 1997, PHILOS PERSPECTIVES, P149 16 | Hausman Daniel, 1998, CAUSAL ASYMMETRIES 17 | KIM J, 1997, PHILOS PERSPECTIVES, V11, P185 18 | KIM JW, 1992, PHILOS PHENOMEN RES, V52, P1, DOI 10.2307/2107741 19 | Kim J., 1989, PHILOS PERSPECTIVES, V3, P77, DOI 10.2307/2214264 20 | Kim J., 1993, SUPERVENIENCE MIND, P358, DOI 10.1017/CBO9780511625220.019 21 | Kim J., 1989, P ADDRESSES AM PHILO, V63, P31, DOI DOI 10.2307/3130081 22 | Kim J., 1993, MENTAL CAUSATION, P189 23 | Kim Jaegwon, 1996, PHILOS MIND 24 | Shoemaker S., 1980, TIME CAUSE, P109 25 | SOBER E, 1999, PHILOS STUDIES 26 | NR 13 27 | TC 1 28 | Z9 1 29 | PU KLUWER ACADEMIC PUBL 30 | PI DORDRECHT 31 | PA SPUIBOULEVARD 50, PO BOX 17, 3300 AA DORDRECHT, NETHERLANDS 32 | SN 0039-7857 33 | J9 SYNTHESE 34 | JI Synthese 35 | PD MAR 36 | PY 2000 37 | VL 122 38 | IS 3 39 | BP 245 40 | EP 259 41 | DI 10.1023/A:1005282128866 42 | PG 15 43 | WC History & Philosophy Of Science; Philosophy 44 | SC History & Philosophy of Science; Philosophy 45 | GA 312WP 46 | UT WOS:000086967200001 47 | ER 48 | 49 | PT J 50 | AU Kim, S 51 | AF Kim, S 52 | TI Physical process theories and token-probabilistic causation 53 | SO ERKENNTNIS 54 | LA English 55 | DT Article 56 | ID CAUSALITY 57 | C1 Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA. 58 | RP Kim, S (reprint author), Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA. 59 | CR DOWE P, 1995, PHILOS SCI, V62, P321, DOI 10.1086/289859 60 | Dowe P., 1992, ERKENNTNIS, V37, P179 61 | DOWE P, 1992, PHILOS SCI, V59, P195, DOI 10.1086/289662 62 | Eells E, 1991, PROBABILISTIC CAUSAL 63 | Hausman Daniel, 1998, CAUSAL ASYMMETRIES 64 | HITCHCOCK CR, 1995, PHILOS SCI, V62, P304, DOI 10.1086/289858 65 | KITCHER P, 1989, MINN STUD PHILOS SCI, V13, P410 66 | MACKIE J, 1947, CEMENT UNVIERSE 67 | SALMON WC, 1990, TOPOI-INT REV PHILOS, V9, P95, DOI 10.1007/BF00135890 68 | Salmon W. C., 1984, SCI EXPLANATION CAUS 69 | SALMON W. C., 1998, CAUSALITY EXPLANATIO 70 | SALMON WC, 1994, PHILOS SCI, V61, P297, DOI 10.1086/289801 71 | Salmon WC, 1997, PHILOS SCI, V64, P461, DOI 10.1086/392561 72 | SOBER E, 1987, BRIT J PHILOS SCI, V38, P243, DOI 10.1093/bjps/38.2.243 73 | NR 14 74 | TC 0 75 | Z9 0 76 | PU KLUWER ACADEMIC PUBL 77 | PI DORDRECHT 78 | PA SPUIBOULEVARD 50, PO BOX 17, 3300 AA DORDRECHT, NETHERLANDS 79 | SN 0165-0106 80 | J9 ERKENNTNIS 81 | JI Erkenntnis 82 | PY 2001 83 | VL 54 84 | IS 2 85 | BP 235 86 | EP 245 87 | DI 10.1023/A:1005677609556 88 | PG 11 89 | WC Philosophy 90 | SC Philosophy 91 | GA 420JH 92 | UT WOS:000168001500006 93 | ER 94 | 95 | EF 96 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500201.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Redox, Electronic, and Rectifying Response of Five- and Six-coordinate Metallosurfactants in Solution, as Films, and on Electrodes 6 | 05/01/2015 7 | 04/30/2018 8 | 449000 9 | 10 | Standard Grant 11 | 12 | 13 | 03090000 14 | 15 | Direct For Mathematical & Physical Scien 16 | 17 | 18 | Division Of Chemistry 19 | 20 | 21 | 22 | James Lisy 23 | 24 | In this project funded by the Macromolecular, Supramolecular and Nanochemistry Program of the Division of Chemistry, Professor Cláudio N. Verani and his research group at Wayne State University in Detroit are studying metal-based molecules able to act as diodes for electric current rectification. Rectification, or directional current flow from an electrode A to an electrode B (but not from B back to A) is fundamental to the conversion of alternating into direct current, and is absolutely necessary for electronic data computation. This interdisciplinary proposal seeks to enhance our fundamental understanding on the use of metallosurfactants for molecular diodes. Broader impacts include scientific outreach to fourth and fifth graders and effort to promote Latino-student inclusion in STEM research.<br/><br/>Verani and collaborators are studying the redox, electronic, and rectifying behavior of metallosurfactants, both in solution and as Langmuir-Blodgett monolayer films deposited onto gold electrodes. Therefore, this interdisciplinary program focuses on the use of amphiphilic coordination complexes towards current-rectifying assemblies as measured by the asymmetry of current/potential (I/V) curves aiming to understand (i) the predominant rectification mechanisms in metallosurfactants; (ii) the possibility of electron-transfer mediation in metal-based singly occupied molecular orbitals (SOMOs); (iii) the viability of electron transfer mediation by metals between ligand-centered lowest unoccupied & highest occupied molecular orbitals (LUMOS & HOMOs); (iv) the role of metallosurfactant orientation in the mechanism of rectification; (v) the influence of the metallosurfactant geometry in observed symmetric conduction, unimolecular or asymmetric rectification, or insulation. This research is multi-faceted, incluidng efforts to make strides in synthetic methodologies, and in electrochemical, spectroscopic, computational, isothermal compression, and microscopy methods. 25 | 04/23/2015 26 | 04/23/2015 27 | 28 | 1500201 29 | 30 | Claudio 31 | Verani 32 | cnverani@chem.wayne.edu 33 | 04/23/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | Wayne State University 39 | Detroit 40 | 482023622 41 | 3135772424 42 | 5057 Woodward 43 | United States 44 | Michigan 45 | MI 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /metaknowledge/scopus/scopusHandlers.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | 3 | import csv 4 | 5 | from .recordScopus import ScopusRecord, scopusHeader 6 | 7 | from ..mkExceptions import BadScopusFile 8 | 9 | def isScopusFile(infile, checkedLines = 2, maxHeaderDiff = 3): 10 | """Determines if _infile_ is the path to a Scopus csv file. A file is considerd to be a Scopus file if it has the correct encoding (`utf-8` with BOM (Byte Order Mark)) and within the first _checkedLines_ a line contains the complete header, the list of all header entries in order is found in [`scopus.scopusHeader`](#metaknowledge.scopus). 11 | 12 | **Note** this is for csv files _not_ plain text files from scopus, plain text files are not complete. 13 | 14 | # Parameters 15 | 16 | _infile_ : `str` 17 | 18 | > The path to the targets file 19 | 20 | _checkedLines_ : `optional [int]` 21 | 22 | > default 2, the number of lines to check for the header 23 | 24 | _maxHeaderDiff_ : `optional [int]` 25 | 26 | > default 3, maximum number of different entries in the potetial file from the current known header `metaknowledge.scopus.scopusHeader`, if exceeded an `False` will be returned 27 | 28 | # Returns 29 | 30 | `bool` 31 | 32 | > `True` if the file is a Scopus csv file 33 | """ 34 | try: 35 | with open(infile, 'r', encoding='utf-8') as openfile: 36 | if openfile.read(1) != "\ufeff": 37 | return False 38 | for i in range(checkedLines): 39 | if len(set(openfile.readline()[:-1].split(',')) ^ set(scopusHeader)) < maxHeaderDiff: 40 | return True 41 | except (StopIteration, UnicodeDecodeError): 42 | return False 43 | else: 44 | return False 45 | 46 | def scopusParser(scopusFile): 47 | """Parses a scopus file, _scopusFile_, to extract the individual lines as [ScopusRecords](../classes/ScopusRecord.html#metaknowledge.scopus.ScopusRecord). 48 | 49 | A Scopus file is a csv (Comma-separated values) with a complete header, see [`scopus.scopusHeader`](#metaknowledge.scopus) for the entries, and each line after it containing a record's entry. The string valued entries are quoted with double quotes which means double quotes inside them can cause issues, see [scopusRecordParser()](#metaknowledge.scopus.recordScopus.scopusRecordParser) for more information. 50 | 51 | # Parameters 52 | 53 | _scopusFile_ : `str` 54 | 55 | > A path to a valid scopus file, use [isScopusFile()](#metaknowledge.scopus.scopusHandlers.isScopusFile) to verify 56 | 57 | # Returns 58 | 59 | `set[ScopusRecord]` 60 | 61 | > Records for each of the entries 62 | """ 63 | #assumes the file is Scopus 64 | recSet = set() 65 | error = None 66 | lineNum = 0 67 | try: 68 | with open(scopusFile, 'r', encoding = 'utf-8') as openfile: 69 | #Get rid of the BOM 70 | openfile.read(1) 71 | header = openfile.readline()[:-1].split(',') 72 | if len(set(header) ^ set(scopusHeader)) == 0: 73 | header = None 74 | lineNum = 0 75 | try: 76 | for line, row in enumerate(openfile, start = 2): 77 | lineNum = line 78 | recSet.add(ScopusRecord(row, header = header, sFile = scopusFile, sLine = line)) 79 | except BadScopusFile as e: 80 | if error is None: 81 | error = BadScopusFile("The file '{}' becomes unparsable after line: {}, due to the error: {} ".format(scopusFile, lineNum, e)) 82 | except (csv.Error, UnicodeDecodeError): 83 | if error is None: 84 | error = BadScopusFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(scopusFile, lineNum)) 85 | return recSet, error 86 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/twoAwardFile.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Summer Institute in Mathematics for Secondary School Teachers 6 | 01/01/1969 7 | 12/01/1969 8 | 33462 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 04/13/2004 34 | 35 | 1 36 | 37 | 38 | San Jose State University Foundation 39 | 40 | CA 41 | 42 | 43 | 44 | 45 | 46 | 47 | California 48 | 1 49 | 50 | 1 51 | -2660400 52 | 1081828800 53 | 54 | -31518000 55 | 69W3546aaaaaaaaa 56 | 57 | 58 | Summer Institute in Mathematics for Secondary School Teachers 59 | 01/01/1969 60 | 12/01/1969 61 | 33462 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 04/13/2004 87 | 88 | 1 89 | 90 | 91 | San Jose State University Foundation 92 | 93 | CA 94 | 95 | 96 | 97 | 98 | 99 | 100 | California 101 | 1 102 | 103 | 1 104 | -2660400 105 | 1081828800 106 | 107 | -31518000 108 | 69W3sdfghj546 109 | 110 | 111 | -------------------------------------------------------------------------------- /metaknowledge/RCglimpse.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import collections 3 | import datetime 4 | 5 | from .mkExceptions import mkException 6 | 7 | glimpseTags = collections.OrderedDict([ 8 | ('Top Authors','authorsFull'), 9 | ('Top Journals','journal'), 10 | ('Top Cited','citations'), 11 | ]) 12 | 13 | descriptionString1 = 'Columns are ranked by num. of occurrences' 14 | descriptionString2 = 'and are independent of one another' 15 | 16 | descriptionStringFull = descriptionString1 + ' ' + descriptionString2 17 | 18 | def _glimpse(RC, *tags, compact = False): 19 | tColumns, tRows = tuple(shutil.get_terminal_size()) 20 | if len(tags) < 1: 21 | targetTags = glimpseTags 22 | else: 23 | targetTags = {t: t for t in tags} 24 | #If it can't fit just go with the usual settings 25 | if tColumns < 55: 26 | tColumns = 80 27 | if tRows < 6: 28 | tRows = 24 29 | glimpseVals = collections.OrderedDict() 30 | if len(descriptionStringFull) > tColumns: 31 | maxRows = tRows - 7 32 | else: 33 | maxRows = tRows - 6 34 | for name, tag in targetTags.items(): 35 | glimpseVals[name] = RC.rankedSeries(tag, giveCounts = False, giveRanks = True, pandasMode = False) 36 | return makeHeader(RC, tColumns, targetTags, compact) + makeTable(glimpseVals, maxRows, tColumns, compact) 37 | 38 | def makeHeader(RC, width, glimpseVals, compact): 39 | now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 40 | firstLine = "{} glimpse made at: {}".format(type(RC).__name__, now) 41 | secondLine = "{} Records from {}".format(len(RC), RC.name[:30]) 42 | if compact: 43 | if len(descriptionStringFull) > width - 2: 44 | thirdLine = '|{1:+<{0}}|\n|{2:+<{0}}|\n'.format(width - 2, descriptionString1, descriptionString2) 45 | else: 46 | thirdLine = '|{1:+<{0}}|\n'.format(width - 2, descriptionStringFull) 47 | return '+{2:+<{0}}\n|{3:+<{1}}|\n{4}'.format(width - 1, width - 2, firstLine, secondLine, thirdLine) 48 | else: 49 | return '{}\n{}\n'.format(firstLine, secondLine) 50 | 51 | def makeTable(values, height, width, compact): 52 | retLines = [] 53 | if compact: 54 | lines = [[] for i in range(height + 1)] 55 | firstRowString = "|{}" + "+{}" * (len(values) - 1) + '|' 56 | rowString = "|{}" * len(values) + '|' 57 | cWidth = (width // len(values)) - 1 58 | cRemainder = width % len(values) - 1 59 | for title, rows in values.items(): 60 | if cRemainder > 0: 61 | heading = "{1:-^{0}}".format(cWidth + 1, title) 62 | cRemainder -= 1 63 | elif cRemainder < 0: 64 | heading = "{1:-^{0}}".format(cWidth - 1, title) 65 | cRemainder += 1 66 | else: 67 | heading = "{1:-^{0}}".format(cWidth, title) 68 | hWidth = len(heading) 69 | lines[0].append(heading) 70 | if len(rows) < height: 71 | for i in range(height - len(rows)): 72 | rows.append(('NA', -1)) 73 | for index, entry in enumerate((prepEntry(hWidth, *s) for s in rows[:height]), start = 1): 74 | lines[index].append(entry) 75 | retLines.append(firstRowString.format(*tuple(lines[0]))) 76 | for line in lines[1:]: 77 | retLines.append(rowString.format(*tuple(line))) 78 | else: 79 | for title, rows in values.items(): 80 | retLines.append('') 81 | retLines.append(title) 82 | retLines += ['{} {}'.format(c, str(s)[:width - len(str(c)) - 1]) for s, c in rows[:height // 2]] 83 | return '\n'.join(retLines) 84 | 85 | def prepEntry(maxLength, valString, rank): 86 | valString = str(valString) 87 | if len(valString) <= maxLength - 2: 88 | valString = valString.rjust(maxLength - 2, ' ') 89 | else: 90 | valString = "{}.".format(valString[:maxLength - 3]) 91 | return "{:<2.0f}{}".format(rank, valString) 92 | -------------------------------------------------------------------------------- /metaknowledge/medline/medlineHandlers.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from ..mkExceptions import BadPubmedFile 4 | 5 | from .recordMedline import MedlineRecord 6 | 7 | def isMedlineFile(infile, checkedLines = 2): 8 | """Determines if _infile_ is the path to a Medline file. A file is considerd to be a Medline file if it has the correct encoding (`latin-1`) and within the first _checkedLines_ a line starts with `"PMID- "`. 9 | 10 | # Parameters 11 | 12 | _infile_ : `str` 13 | 14 | > The path to the targets file 15 | 16 | _checkedLines_ : `optional [int]` 17 | 18 | > default 2, the number of lines to check for the header 19 | 20 | # Returns 21 | 22 | `bool` 23 | 24 | > `True` if the file is a Medline file 25 | """ 26 | try: 27 | with open(infile, 'r', encoding='latin-1') as openfile: 28 | f = enumerate(openfile, start = 0) 29 | for i in range(checkedLines): 30 | if f.__next__()[1].startswith("PMID- "): 31 | #Only indicator I could find 32 | return True 33 | except (StopIteration, UnicodeDecodeError): 34 | return False 35 | else: 36 | return False 37 | 38 | def medlineParser(pubFile): 39 | """Parses a medline file, _pubFile_, to extract the individual entries as [MedlineRecords](#metaknowledge.medline.recordMedline.MedlineRecord). 40 | 41 | A medline file is a series of entries, each entry is a series of tags. A tag is a 2 to 4 character string each tag is padded with spaces on the left to make it 4 characters which is followed by a dash and a space (`'- '`). Everything after the tag and on all lines after it not starting with a tag is considered associated with the tag. Each entry's first tag is `PMID`, so a first line looks something like `PMID- 26524502`. Entries end with a single blank line. 42 | 43 | # Parameters 44 | 45 | _pubFile_ : `str` 46 | 47 | > A path to a valid medline file, use [isMedlineFile](#metaknowledge.medline.medlineHandlers.isMedlineFile) to verify 48 | 49 | # Returns 50 | 51 | `set[MedlineRecord]` 52 | 53 | > Records for each of the entries 54 | """ 55 | #assumes the file is MEDLINE 56 | recSet = set() 57 | error = None 58 | lineNum = 0 59 | try: 60 | with open(pubFile, 'r', encoding = 'latin-1') as openfile: 61 | f = enumerate(openfile, start = 1) 62 | lineNum, line = next(f) 63 | try: 64 | while True: 65 | if line.startswith("PMID- "): 66 | try: 67 | r = MedlineRecord(itertools.chain([(lineNum, line)], f), sFile = pubFile, sLine = lineNum) 68 | recSet.add(r) 69 | except BadPubmedFile as e: 70 | badLine = lineNum 71 | try: 72 | lineNum, line = next(f) 73 | while not line.startswith("PMID- "): 74 | lineNum, line = next(f) 75 | except (StopIteration, UnicodeDecodeError) as e: 76 | if error is None: 77 | error = BadPubmedFile("The file '{}' becomes unparsable after line: {}, due to the error: {} ".format(pubFile, badLine, e)) 78 | raise e 79 | elif line != '\n': 80 | if error is None: 81 | error = BadPubmedFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(pubFile, lineNum)) 82 | lineNum, line = next(f) 83 | except StopIteration: 84 | #End of the file has been reached 85 | pass 86 | except UnicodeDecodeError: 87 | if error is None: 88 | error = BadPubmedFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(pubFile, lineNum)) 89 | return recSet, error 90 | -------------------------------------------------------------------------------- /docs/examples/Getting-Started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# About Jupyter Notebooks" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "This is an output cell\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "#This cell is python\n", 34 | "#The cell below it is output\n", 35 | "print(\"This is an output cell\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "The code cells contain python code that you can edit and run your self. Try changing the one above." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# Importing" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "First you need to import the _metaknowledge_ package\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "import metaknowledge as mk" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "And you will often need the [networkx](https://networkx.github.io/documentation/networkx-1.9.1/) package\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import networkx as nx" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "And [matplotlib](http://matplotlib.org/) to display the graphs and to make them look nice when displayed\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "import matplotlib.pyplot as plt\n", 104 | "%matplotlib inline" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "_metaknowledge_ also has a _matplotlib_ based graph [visualizer](../documentation/modules/contour.html#metaknowledge.contour.plotting.quickVisual) that will be used sometimes\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 5, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "import metaknowledge.visual as mkv" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_." 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 3", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.4.0" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 0 154 | } 155 | -------------------------------------------------------------------------------- /metaknowledge/WOS/wosHandlers.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from .recordWOS import WOSRecord 4 | from ..mkExceptions import cacheError, BadWOSFile, BadWOSRecord 5 | 6 | def isWOSFile(infile, checkedLines = 3): 7 | """Determines if _infile_ is the path to a WOS file. A file is considerd to be a WOS file if it has the correct encoding (`utf-8` with a BOM) and within the first _checkedLines_ a line starts with `"VR 1.0"`. 8 | 9 | # Parameters 10 | 11 | _infile_ : `str` 12 | 13 | > The path to the targets file 14 | 15 | _checkedLines_ : `optional [int]` 16 | 17 | > default 2, the number of lines to check for the header 18 | 19 | # Returns 20 | 21 | `bool` 22 | 23 | > `True` if the file is a WOS file 24 | """ 25 | try: 26 | with open(infile, 'r', encoding='utf-8-sig') as openfile: 27 | f = enumerate(openfile, start = 0) 28 | for i in range(checkedLines): 29 | if "VR 1.0" in f.__next__()[1]: 30 | return True 31 | except (StopIteration, UnicodeDecodeError): 32 | return False 33 | else: 34 | return False 35 | 36 | def wosParser(isifile): 37 | """This is a function that is used to create [RecordCollections](../classes/RecordCollection.html#metaknowledge.RecordCollection) from files. 38 | 39 | **wosParser**() reads the file given by the path isifile, checks that the header is correct then reads until it reaches EF. All WOS records it encounters are parsed with [recordParser()](#metaknowledge.WOS.recordWOS.recordParser) and converted into [Records](../classes/Record.html#metaknowledge.Record). A list of these `Records` is returned. 40 | 41 | `BadWOSFile` is raised if an issue is found with the file. 42 | 43 | # Parameters 44 | 45 | _isifile_ : `str` 46 | 47 | > The path to the target file 48 | 49 | # Returns 50 | 51 | `List[Record]` 52 | 53 | > All the `Records` found in _isifile_ 54 | """ 55 | plst = set() 56 | error = None 57 | try: 58 | with open(isifile, 'r', encoding='utf-8-sig') as openfile: 59 | f = enumerate(openfile, start = 0) 60 | while "VR 1.0" not in f.__next__()[1]: 61 | pass 62 | notEnd = True 63 | while notEnd: 64 | line = f.__next__() 65 | if line[1] == '': 66 | error = BadWOSFile("'{}' does not have an 'EF', lines 1 to {} were checked".format(isifile, line[0] + 1)) 67 | elif line[1].isspace(): 68 | continue 69 | elif 'EF' in line[1][:2]: 70 | notEnd = False 71 | continue 72 | else: 73 | try: 74 | plst.add(WOSRecord(itertools.chain([line], f), sFile = isifile, sLine = line[0])) 75 | except BadWOSFile as e: 76 | try: 77 | s = f.__next__()[1] 78 | while s[:2] != 'ER': 79 | s = f.__next__()[1] 80 | except: 81 | error = BadWOSFile("The file {} was not terminated corrrectly caused the following error:\n{}".format(isifile, str(e))) 82 | try: 83 | f.__next__() 84 | except StopIteration: 85 | pass 86 | else: 87 | error = BadWOSFile("EF not at end of " + isifile) 88 | except UnicodeDecodeError: 89 | try: 90 | error = BadWOSFile("'{}' has a unicode issue on line: {}.".format(isifile, f.__next__()[0])) 91 | except: 92 | #Fallback needed incase f.__next__() causes issues 93 | error = BadWOSFile("'{}' has a unicode issue. Probably when being opened or possibly on the first line".format(isifile)) 94 | except StopIteration: 95 | error = BadWOSFile("The file '{}' ends before EF was found".format(isifile)) 96 | except KeyboardInterrupt as e: 97 | error = e 98 | finally: 99 | if isinstance(error, KeyboardInterrupt): 100 | raise error 101 | return plst, error 102 | -------------------------------------------------------------------------------- /notebooks/Lesson-2-Reading-Files/Reading-Files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reading Files" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First we need to import _metaknowledge_ like we saw in lesson 1.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false, 22 | "jupyter": { 23 | "outputs_hidden": false 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import metaknowledge as mk" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "we only need _metaknowledge_ for now so no need to import everything" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "The files from the Web of Science (WOS) can be loaded into a [`RecordCollections`](http://networkslab.org/metaknowledge/docs/RecordCollection#RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string.\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false, 50 | "jupyter": { 51 | "outputs_hidden": false 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "RC = mk.RecordCollection(\"savedrecs.txt\")\n", 57 | "repr(RC)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "You can also read a whole directory, in this case it is reading the current working directory\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false, 72 | "jupyter": { 73 | "outputs_hidden": false 74 | } 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "RC = mk.RecordCollection(\".\")\n", 79 | "repr(RC)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "_metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument.\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false, 94 | "jupyter": { 95 | "outputs_hidden": false 96 | } 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "RC = mk.RecordCollection(\".\", extension = \"txt\")\n", 101 | "repr(RC)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "Now you have a `RecordCollection` composed of all the WOS records in the selected file(s).\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": false, 116 | "jupyter": { 117 | "outputs_hidden": false 118 | } 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "print(\"RC is a \" + str(RC))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`." 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 3", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.7.4" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 4 154 | } 155 | -------------------------------------------------------------------------------- /metaknowledge/grants/nsercGrant.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os.path 3 | 4 | from .baseGrant import Grant, csvAndLinesReader 5 | from ..mkExceptions import BadGrant 6 | 7 | class NSERCGrant(Grant): 8 | def __init__(self, original, grantdDict, sFile, sLine): 9 | bad = False 10 | error = None 11 | if grantdDict.get('Cle', '') == '': 12 | bad = True 13 | error = BadGrant("Missing 'CLE'") 14 | idValue = "NSERC:{}".format(hash(original)) 15 | else: 16 | idValue = "NSERC:{}".format(grantdDict.get('Cle', '')) 17 | Grant.__init__(self, original, grantdDict, idValue, bad, error, sFile = sFile, sLine = sLine) 18 | 19 | def update(self, other): 20 | for field, value in other._fieldDict.items(): 21 | if value == '': 22 | continue 23 | elif self._fieldDict.get(field, '') == '': 24 | self._fieldDict[field] = value 25 | else: 26 | self._fieldDict[field] += "; {}".format(value) 27 | 28 | def getInvestigators(self, tags = None, seperator = ";", _getTag = False): 29 | """Returns a list of the names of investigators. The optional arguments are ignored. 30 | 31 | # Returns 32 | 33 | `list [str]` 34 | 35 | > A list of all the found investigator's names 36 | """ 37 | if tags is None: 38 | tags = [] 39 | elif isinstance(tags, str): 40 | tags = [tags] 41 | for k in self.keys(): 42 | if 'name-' in k.lower() and k not in tags: 43 | tags.append(k) 44 | return super().getInvestigators(tags = tags, seperator = seperator, _getTag = _getTag) 45 | 46 | def getInstitutions(self, tags = None, seperator = ";", _getTag = False): 47 | """Returns a list with the names of the institution. The optional arguments are ignored 48 | 49 | # Returns 50 | 51 | `list [str]` 52 | 53 | > A list with 1 entry the name of the institution 54 | """ 55 | if tags is None: 56 | tags = [] 57 | elif isinstance(tags, str): 58 | tags = [tags] 59 | for k in self.keys(): 60 | if 'institution' in k.lower() and k not in tags: 61 | tags.append(k) 62 | return super().getInvestigators(tags = tags, seperator = seperator, _getTag = _getTag) 63 | 64 | def isNSERCfile(fileName, useFileName = True): 65 | if useFileName and not os.path.basename(fileName).startswith('NSERC_'): 66 | return False 67 | try: 68 | with open(fileName, 'r', encoding = 'latin-1') as openfile: 69 | reader = csv.DictReader(openfile, fieldnames=None, dialect='excel') 70 | length = 0 71 | for row in reader: 72 | length += 1 73 | if set(row.keys()) != set(reader.fieldnames): 74 | return False 75 | if length < 1: 76 | return False 77 | except (StopIteration, UnicodeDecodeError, KeyError): 78 | return False 79 | else: 80 | return True 81 | 82 | def parserNSERCfile(fileName): 83 | grantSet = set() 84 | error = None 85 | try: 86 | with open(fileName, 'r', encoding = 'latin-1') as openfile: 87 | f = enumerate(openfile, start = 1) 88 | reader = csvAndLinesReader(f, fieldnames = None, dialect = 'excel') 89 | for lineNum, lineString, lineDict in reader: 90 | G = NSERCGrant(lineString, lineDict, sFile = fileName, sLine = lineNum) 91 | if G in grantSet: 92 | for Gin in grantSet: 93 | if Gin == G: 94 | Gin.update(G) 95 | break 96 | else: 97 | grantSet.add(G) 98 | except Exception: 99 | if error is None: 100 | error = BadGrant("The file '{}' is having decoding issues. It may have been modifed since it was downloaded or not be a NSERC grant file.".format(fileName)) 101 | except KeyboardInterrupt as e: 102 | error = e 103 | finally: 104 | if isinstance(error, KeyboardInterrupt): 105 | raise error 106 | return grantSet, error 107 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500216.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Collaborative Research: Algebra and Algorithms, Structure and Complexity Theory 6 | 09/15/2015 7 | 08/31/2018 8 | 67889 9 | 10 | Standard Grant 11 | 12 | 13 | 03040000 14 | 15 | Direct For Mathematical & Physical Scien 16 | 17 | 18 | Division Of Mathematical Sciences 19 | 20 | 21 | 22 | Tomek Bartoszynski 23 | 24 | This project is a collaboration between mathematical researchers at five universities, including young mathematicians at the early stages of their careers, who are joining forces to tackle fundamental problems at the confluence of mathematical logic, algebra, and computer science. The overall goal is to deepen understanding about how to recognize the complexity of certain types of computational problems. The project focuses on a suite of mathematical problems whose solutions will yield new information about the complexity of Constraint Satisfaction Problems. These problems (CSP's) include scheduling problems, resource allocation problems, and problems reducible to solving systems of linear equations. CSP's are theoretically solvable, but some are not solvable efficiently. The research will be aimed at identifying a clear boundary between the tractable and intractable cases, and at providing efficient algorithms for solutions in the tractable cases. Many fundamental problems in mathematics and computer science can be formulated as CSP's, and progress here would have both practical and theoretical significance. A second component of the project investigates classical computational problems in algebra in order to determine whether they are algorithmically solvable. A third component of the project is the further development of the software UACalc, which is a proof assistant developed to handle computations involving algebraic structures.<br/><br/>The researchers shall work to decide the truth of the CSP Dichotomy Conjecture of Feder and Vardi, which states that every Constraint Satisfaction Problem with a finite template is solvable in polynomial time or is NP complete. They will further develop the algebraic approach to CSP's by refining knowledge about relations compatible with weak idempotent Maltsev conditions and about algebras with finitely related clones. A second goal of the project concerns the computable recognition of properties of finite algebras connected with the varieties they generate, such as whether a finite algebra with a finite residual bound is finitely axiomatizable, or whether a finite algebra can serve as the algebra of character values for a natural duality. One of the more tangible accomplishments of this project will be a broadening and strengthening of the applicability of the UACalc software. The agenda for this part of the project includes parallelizing the important subroutines, building in conjecture-testing and search features, adding further algorithms, and further developing the community of users and contributors. 25 | 09/11/2015 26 | 09/11/2015 27 | 28 | 1500216 29 | 30 | George 31 | McNulty 32 | mcnulty@math.sc.edu 33 | 09/11/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | University of South Carolina at Columbia 39 | COLUMBIA 40 | 292080001 41 | 8037777093 42 | Sponsored Awards Management 43 | United States 44 | South Carolina 45 | SC 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /notebooks/Lesson-1-Getting-Started/Getting-Started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# About Jupyter Notebooks" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false, 22 | "jupyter": { 23 | "outputs_hidden": false 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "#This cell is python\n", 29 | "#The cell below it is output\n", 30 | "print(\"This is an output cell\")" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "The code cells contain python code that you can edit and run your self. Try changing the one above." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "# Importing" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "First you need to import the _metaknowledge_ package\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false, 59 | "jupyter": { 60 | "outputs_hidden": false 61 | } 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "import metaknowledge as mk" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "And you will often need the [_networkx_](https://networkx.github.io/documentation/networkx-1.9.1/) package\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false, 80 | "jupyter": { 81 | "outputs_hidden": false 82 | } 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "import networkx as nx" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "And [_matplotlib_](http://matplotlib.org/) to display the graphs and to make them look nice when displayed\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false, 101 | "jupyter": { 102 | "outputs_hidden": false 103 | } 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "import matplotlib.pyplot as plt\n", 108 | "%matplotlib inline" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "_metaknowledge_ also has a _matplotlib_ based graph [visualizer](http://networkslab.org/metaknowledge/docs/visual#visual) that will be used sometimes\n" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false, 123 | "jupyter": { 124 | "outputs_hidden": false 125 | } 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "import metaknowledge.contour.plotting as mkv" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_." 137 | ] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "Python 3", 143 | "language": "python", 144 | "name": "python3" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.7.4" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 4 161 | } 162 | -------------------------------------------------------------------------------- /metaknowledge/tests/test_graphhelpers.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import unittest 3 | import metaknowledge 4 | import os 5 | import io 6 | import sys 7 | from metaknowledge.progressBar import _ProgressBar 8 | 9 | fileShortName = 'testNetworks' 10 | fileEName = 'testNetworks_edgeList.tst' 11 | fileNName = 'testNetworks_nodeAttributes.tst' 12 | filesuffix = 'tst' 13 | 14 | class TestHelpers(unittest.TestCase): 15 | @classmethod 16 | def setUpClass(cls): 17 | cls.RCmain = metaknowledge.RecordCollection("metaknowledge/tests/testFile.isi") 18 | cls.Gmain = cls.RCmain.networkCoCitation() 19 | 20 | def setUp(self): 21 | metaknowledge.VERBOSE_MODE = False 22 | self.RC = self.RCmain.copy() 23 | self.G = self.Gmain.copy() 24 | 25 | def test_graphwrite(self): 26 | metaknowledge.writeGraph(self.G, fileShortName, suffix = filesuffix) 27 | tmpG = metaknowledge.readGraph(fileEName, fileNName) 28 | self.assertEqual(len(tmpG.edges()), len(self.G.edges())) 29 | self.assertEqual(len(tmpG.nodes()), len(self.G.nodes())) 30 | os.remove(fileEName) 31 | os.remove(fileNName) 32 | 33 | def test_tnetWriter(self): 34 | fName = fileShortName + "_tnet.csv" 35 | G = self.RC.networkTwoMode('AF', 'WC', edgeAttribute = 'PY') 36 | metaknowledge.writeTnetFile(G, fName, 'type', weighted = True, timeString = 'key') 37 | self.assertAlmostEqual(os.path.getsize(fName), 1015, delta=100) 38 | os.remove(fName) 39 | metaknowledge.writeTnetFile(G, fName, 'type') 40 | self.assertAlmostEqual(os.path.getsize(fName), 378, delta=50) 41 | os.remove(fName) 42 | 43 | def test_progress(self): 44 | metaknowledge.VERBOSE_MODE = True 45 | tmpIO = io.StringIO() 46 | P = _ProgressBar(0, "testing", output = tmpIO, dummy = True) 47 | metaknowledge.writeEdgeList(self.G, fileEName, _progBar = P, ) 48 | tmpIO.seek(0) 49 | s = ''.join(tmpIO.readlines()) 50 | self.assertEqual(len(s), 0) 51 | P = _ProgressBar(0, "testing", output = tmpIO) 52 | metaknowledge.writeEdgeList(self.G, fileEName, _progBar = P) 53 | tmpIO.seek(0) 54 | os.remove(fileEName) 55 | s = ''.join(tmpIO.readlines()) 56 | self.assertEqual(s[-14], '[') 57 | self.assertEqual(s[-1], '%') 58 | P.finish("done test") 59 | tmpIO.seek(0) 60 | s = ''.join(tmpIO.readlines()) 61 | self.assertEqual(s[-81:-3], 'done test 0.') 62 | metaknowledge.VERBOSE_MODE = False 63 | 64 | def test_dropEdges(self): 65 | metaknowledge.dropEdges(self.G, minWeight = 1, maxWeight = 3, dropSelfLoops = True) 66 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 493 nodes, 12711 edges, 0 isolates, 0 self loops, a density of 0.104809 and a transitivity of 0.588968") 67 | self.assertTrue(self.G.edges['Imbert C, 1975, NOUV REV OPT', 'Fainman Y, 1984, APPL OPTICS']['weight'] == 1) 68 | 69 | def test_dropNodeByCount(self): 70 | metaknowledge.dropNodesByCount(self.G, minCount = 2, maxCount = 5) 71 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 106 nodes, 1205 edges, 0 isolates, 17 self loops, a density of 0.218149 and a transitivity of 0.751036") 72 | self.assertTrue(self.G.node['Shih H, 1971, PHYS REV A']['count'] == 2) 73 | 74 | def test_dropNodesByDegree(self): 75 | metaknowledge.dropNodesByDegree(self.G, minDegree = 20, maxDegree = 100) 76 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 385 nodes, 5923 edges, 0 isolates, 11 self loops, a density of 0.0802083 and a transitivity of 0.954487") 77 | self.assertTrue(self.G.edges['Mazur P, 1953, MEM ACAD ROY BELG', 'Livens Gh, 1948, P CAMB PHILOS SOC']['weight'] == 1) 78 | 79 | def test_mergeGraphs(self): 80 | RC1 = self.RC.yearSplit(0,1978) 81 | RC2 = self.RC.yearSplit(1979,10000) 82 | G1 = RC1.networkCoCitation() 83 | G2 = RC2.networkCoCitation() 84 | metaknowledge.mergeGraphs(G1,G2) 85 | for node, attr in G1.nodes(data = True): 86 | self.assertEqual(self.G.node[node]['count'], attr['count']) 87 | for node1, node2, attr in G1.edges(data = True): 88 | self.assertEqual(self.G.edges[node1, node2]['weight'], attr['weight']) 89 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500186.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | DDRIG: Expanding Phonological Typology through Kaco' Sound Patterns 6 | 06/01/2015 7 | 11/30/2017 8 | 14500 9 | 10 | Standard Grant 11 | 12 | 13 | 04040000 14 | 15 | Direct For Social, Behav & Economic Scie 16 | 17 | 18 | Division Of Behavioral and Cognitive Sci 19 | 20 | 21 | 22 | Colleen Fitzgerald 23 | 24 | Emily Olsen, under the direction of Juliette Blevins of the City University of New York, will conduct a study of the sound system of Kaco', an undescribed minority language spoken in rural Ratanakiri Province, Cambodia. Kaco' is an Austro-Asiatic language of the Mon Khmer family and, like other languages of this family, Kaco' is phonologically complex and unusual. Olsen's study of Kaco' will therefore advance knowledge of what types of phonological systems are possible for human languages. In addition, because sounds and sound processes provide important clues to language relatedness, Olsen's description of Kaco' sound patterns will facilitate hypotheses on the features that are unique to or characteristic of the Austroasiatic language family and will help disambiguate historical relationships between languages and speaker groups of the region.<br/><br/>The primary research questions involve the description of the sound patterns of the Kaco' language including contrastive vowels and consonants and their phonetic variants, the structure of Kaco' syllables; laryngeal contrasts in Kaco'syllables and their phonetic properties and phonological domains. Olsen's study will focus on documenting complex syllable structure and the role between syllables within a word. She will document phonation (vocal fold vibration) and tonogenesis (e.g., the birth of tone through consonant weakening). <br/><br/>In order to pursue these research questions, Olsen will collect speech samples from Kaco' speakers from several villages. Her data samples will include recordings of freeform narratives, wordlists, songs, and community folklore. The resulting corpus will be the first cultural and linguistic resource of its kind for use by Kaco' people. This resource will enable development of literacy materials for the community, including a Kaco' orthography, children's books and recordings, and a dictionary.<br/><br/>Data from this project will be archived at the Lund University's Repository &<br/>Workspace for Austroasiatic Intangible Heritage. 25 | 06/01/2015 26 | 06/01/2015 27 | 28 | 1500186 29 | 30 | Juliette 31 | Blevins 32 | jblevins@gc.cuny.edu 33 | 06/01/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | Emily 39 | Olsen 40 | elong1@gc.cuny.edu 41 | 06/01/2015 42 | 43 | Co-Principal Investigator 44 | 45 | 46 | CUNY Graduate School University Center 47 | New York 48 | 100164309 49 | 2128177523 50 | 365 Fifth Avenue 51 | United States 52 | New York 53 | NY 54 | 55 | 56 | 7719 57 | DEL 58 | 59 | 60 | 7719 61 | DEL 62 | 63 | 64 | 9179 65 | GRADUATE INVOLVEMENT 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /metaknowledge/tests/nsfTestFiles/1500194.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | PFI:AIR - TT: Pulsed Shaped Magnetic Fields to Focus Therapy to Deep Tissue Targets 6 | 05/15/2015 7 | 10/31/2016 8 | 200000 9 | 10 | Standard Grant 11 | 12 | 13 | 07070000 14 | 15 | Directorate For Engineering 16 | 17 | 18 | Div Of Industrial Innovation & Partnersh 19 | 20 | 21 | 22 | Barbara H. Kenny 23 | 24 | This PFI: AIR Technology Translation (TT) project aims to enable a safe and effective magnetic focusing of magnetic particle therapies to address inoperable deep tissue tumors. The proposed technique of pulsed magnetic focusing will deliver nanotherapeutics to deep targets in order to direct chemotherapy to where it needs to go in the body. If successful, this technique would enable a technology that could improve treatment for a wide range of diseases. The project will result in a prototype device that will dynamically focus nanorods to deep targets in preclinical studies. In this research, biocompatible nanorods are first aligned in one direction by a fast magnetic pulse, and then before they can turn around a second shaped fast magnetic pulse applies forces on the rods that serve to focus them to a central target. Repeat magnetic pulsing brings all the rods to a central target between the magnets. These features provide the key advantage that therapy can now be focused to a deep target between magnets, for example to treat inoperable deep tissue tumors. Focusing of therapy to deep tissue targets has been a key goal in magnetic drug targeting, and prior efforts in this field have not yet been able to achieve this goal. <br/><br/>This project addresses the following technology gap(s) as it translates from research discovery toward commercial application. Dynamic magnetic focusing of nanorods to a target between magnets was shown in benchtop experiments. In this NSF AIR TT research, the technology will be tested in tissue samples, scaled up to an in-vivo system, and its safety and utility shall be optimized and verified. In addition, personnel involved in this project will receive innovation, entrepreneurship, and technology translation experiences through developing and helping commercialize this technology.<br/><br/>The project engages Weinberg Medical Physics who will act as an industry liaison and supply the effort with equipment, expertise, and with connections to strategic partners and future investors in this technology translation effort from research discovery toward commercial reality. 25 | 05/06/2015 26 | 05/06/2015 27 | 28 | 1500194 29 | 30 | Benjamin 31 | Shapiro 32 | benshap@eng.umd.edu 33 | 05/06/2015 34 | 35 | Principal Investigator 36 | 37 | 38 | Irving 39 | Weinberg 40 | inweinberg@gmail.com 41 | 05/06/2015 42 | 43 | Co-Principal Investigator 44 | 45 | 46 | University of Maryland College Park 47 | COLLEGE PARK 48 | 207425141 49 | 3014056269 50 | 3112 LEE BLDG 7809 Regents Drive 51 | United States 52 | Maryland 53 | MD 54 | 55 | 56 | 8019 57 | Accelerating Innovation Rsrch 58 | 59 | 60 | 8019 61 | Accelerating Innovation Rsrch 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /metaknowledge/__init__.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016 2 | """_metaknowledge_ is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data. 3 | 4 | # Example 5 | 6 | To load the data from files and make a network: 7 | 8 | >>> import metaknowledge as mk 9 | >>> RC = mk.RecordCollection("records/") 10 | >>> print(RC) 11 | Collection of 33 records 12 | >>> G = RC.coCiteNetwork(nodeType = 'journal') 13 | Done making a co-citation network of files-from-records 1.1s 14 | >>> print(len(G.nodes())) 15 | 223 16 | >>> mk.writeGraph(G, "Cocitation-Network-of-Journals") 17 | 18 | There is also a simple command line program called `metaknowledge` that comes with the package. It allows for creating networks without any need to know Python. More information about it can be found at [networkslab.org/metaknowledge/cli]({{ site.baseurl }}/cli) 19 | 20 | # Overview 21 | 22 | This package can read the files downloaded from the Thomson Reuters' [Web of Science](https://webofknowledge.com) (_WOS_), Elsevier's [Scopus](https://www.scopus.com/), [ProQuest](www.proquest.com/) and Medline files from [PubMed](www.ncbi.nlm.nih.gov/pubmed). These files contain entries on the metadata of scientific records, such as authors, title, and citations. _metaknowledge_ can also read grants from various organizations including _NSF_ and _NSERC_ which are handled similarly to records. 23 | 24 | The [metaknowledge.RecordCollection](./documentation/classes/RecordCollection.html#recordcollection-collectionwithids) class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the [metaknowledge.Record](./documentation/classes/Record.html#record-mapping-hashable) class that contains the results of the parsing of the record. 25 | 26 | The files read by _metaknowledge_ are a databases containing a series of tags (implicitly or explicitly), e.g. `'TI'` is the title for WOS. Each tag has one or more values and metaknowledge can read them and extract useful information. As the tags differ between providers a small set of values can be accessed by special tags, the tags are listed in `commonRecordFields`. These special tags can act on the whole `Record` and as such may contain information provided by any number of other tags. 27 | 28 | Citations are handled by a special [Citation](./documentation/classes/Citation.html#module-metaknowledge.citation) class. This class can parse the citations given by _WOS_ and journals cited by _Scopus_ and allows for better comparisons when they are used in graphs. 29 | 30 | Note for those reading the docstrings metaknowledge's docs are written in markdown and are processed to produce the documentation found at [metaknowledge.readthedocs.io](https://metaknowledge.readthedocs.io/en/latest/), but you should have no problem reading them from the help function. 31 | """ 32 | 33 | from .constants import VERBOSE_MODE, __version__, commonRecordFields, FAST_CITES 34 | from .mkExceptions import BadCitation, BadGrant, BadInputFile, BadProQuestFile, BadProQuestRecord, BadPubmedFile, BadPubmedRecord, BadRecord, BadWOSFile, BadWOSRecord, CollectionTypeError, GrantCollectionException, RCTypeError, RCValueError, RecordsNotCompatible, UnknownFile, cacheError, mkException, TagError, BadScopusRecord 35 | 36 | from .graphHelpers import writeEdgeList, writeNodeAttributeFile, writeGraph, readGraph, dropEdges, dropNodesByDegree, dropNodesByCount, mergeGraphs, graphStats, writeTnetFile 37 | from .diffusion import diffusionGraph, diffusionCount, diffusionAddCountsFromSource 38 | 39 | from .citation import Citation, filterNonJournals 40 | from .mkCollection import Collection, CollectionWithIDs 41 | from .mkRecord import Record, ExtendedRecord 42 | 43 | from .grantCollection import GrantCollection 44 | from .grants import NSERCGrant, CIHRGrant, MedlineGrant, NSFGrant, Grant, FallbackGrant 45 | 46 | from .recordCollection import RecordCollection 47 | from .WOS import WOSRecord 48 | from .medline import MedlineRecord 49 | from .proquest import ProQuestRecord 50 | from .scopus import ScopusRecord 51 | 52 | from .journalAbbreviations.backend import updatej9DB 53 | from .genders.nameGender import downloadData 54 | 55 | def downloadExtras(): 56 | """Downloads all the external files used by metaknowledge. This will overwrite exiting files 57 | """ 58 | print("Downloading journal abbreviations data") 59 | updatej9DB() 60 | print("Downloading gender name data") 61 | downloadData() 62 | -------------------------------------------------------------------------------- /metaknowledge/tests/OnePaper2.isi: -------------------------------------------------------------------------------- 1 | FN Thomson Reuters Web of Science™ 2 | VR 1.0 3 | PT J 4 | AU Hu, SZ 5 | Li, SF 6 | Fu, XW 7 | Li, YJ 8 | Wang, TD 9 | AF Hu, S. Z. 10 | Li, S. F. 11 | Fu, X. W. 12 | Li, Y. J. 13 | Wang, T. D. 14 | TI Identification of Highly to Over Mature Gas Interval Using Reservoir 15 | Bitumen Features: Sichuan Basin of Southwest China 16 | SO PETROLEUM SCIENCE AND TECHNOLOGY 17 | LA English 18 | DT Article 19 | DE reservoir geochemistry; highly to over mature; reservoir bitumen; gas 20 | geochemistry; China; Sichuan Basin; gas interval 21 | ID FIELD 22 | AB The identification of oil and gas intervals has been mainly constrained by the interpretation on well logging data. However, this has met great difficulties with petroleum accumulation cases becoming more and more complex. The authors discuss an identification of highly to over mature gas interval using bitumen features, based on a case study in the Sichuan Basin of southwest China. The bitumen in the gas interval is characterized by relatively rich low-weight n-alkanes with a single pre-peak. In addition, pregnane, homopregnane, and tricyclic terpanes are enriched. Thus, the indicated maturity is highly to over mature. The late-arrived highly mature oils were cracked into gas, with little filling of bitumen in reservoir space. By contrast, the bitumen in the interval with little bearing of gas has opposite features. It is relatively enriched in high-weight n-alkanes and extremely low abundance of pregnane, homopregnane, and tricyclic terpanes. Thus the oils in such dry intervals are mostly low-mature early arrived oils, which tightly adsorbed on the carbonate grains. The pore space is almost fully filled by bitumen, being unfavorable for gas charge and preservation. As these features are all collected from routine analyses, the method can be applicable. The results can provide new fundamental information for regional oil testing and petroleum exploration, thereby reducing economic loss and exploration risk. In addition, the results also have wide implications for the other works with similar aims. 23 | C1 [Hu, S. Z.; Li, S. F.] China Univ Geosci, Minist Educ, Key Lab Tecton & Petr Resources, Wuhan 430074, Peoples R China. 24 | [Fu, X. W.; Li, Y. J.; Wang, T. D.] Southwest Petr Univ, Sch Resources & Environm, Chengdu, Peoples R China. 25 | RP Hu, SZ (reprint author), China Univ Geosci, Minist Educ, Key Lab Tecton & Petr Resources, Wuhan 430074, Peoples R China. 26 | EM hushzh@cug.edu.cn 27 | FU "973" project of China [2012CB214804]; National Natural Science 28 | Foundation of China [41273052, 40902037] 29 | FX This study was jointly funded by the "973" project of China (Grant No. 30 | 2012CB214804) and the National Natural Science Foundation of China 31 | (Grant Nos. 41273052 and 40902037). 32 | CR BASKIN DK, 1995, AAPG BULL, V79, P337 33 | Cai CF, 2010, ORG GEOCHEM, V41, P871, DOI 10.1016/j.orggeochem.2010.03.009 34 | 陈世加, 1998, 沉积学报, V16, P150 35 | Chen S. J., 2001, WELL LOGGING TECHNOL, V25, P136 36 | Chen S.J., 2001, NAT GAS IND, V21, P39 37 | Fang YX, 2011, J ASIAN EARTH SCI, V41, P147, DOI 10.1016/j.jseaes.2011.01.012 38 | Gao C. X., 2010, TUHA OIL GAS, V5, P326 39 | Hu AP, 2010, ORG GEOCHEM, V41, P924, DOI 10.1016/j.orggeochem.2010.01.001 40 | [胡守志 Hu Shouzhi], 2005, [石油实验地质, Petroleum Geology & Experiment], V27, P222 41 | Jones P. J., 2010, European Patent, Patent No. [EP1719875, 1719875] 42 | Kim D, 2010, AAPG BULL, V94, P1031, DOI 10.1306/12090909096 43 | Larijani G. R., 2010, 136318 SPE 44 | [李潮流 Li Chaoliu], 2004, [测井技术, Well Logging Technology], V28, P128 45 | Liu Dehan, 1994, PETROLEUM EXPLORATIO, V21, P113 46 | Liu SG, 2008, J CHINA UNIV GEOSCI, V19, P700 47 | LOMANDO AJ, 1992, AAPG BULL, V76, P1137 48 | Mathur N, 2001, AAPG BULL, V85, P309 49 | [沈慧萍 Shen Huiping], 2005, [天然气工业, Natural Gas Industry], V25, P47 50 | Simon R. K., 2010, GEOCHIM COSMOCHIM AC, V74, P5305 51 | Xiao D. S., 2010, FAULT BLOCK OIL GAS, V17, P509 52 | [张津海 Zhang Jinhai], 2011, [石油与天然气地质, Oil & Gas Geology], V32, P593 53 | [赵军 Zhao Jun], 2011, [石油与天然气地质, Oil & Gas Geology], V32, P245 54 | Zhao W. Z., 2010, PETROL SCI, V7, P289 55 | NR 23 56 | TC 0 57 | Z9 0 58 | PU TAYLOR & FRANCIS INC 59 | PI PHILADELPHIA 60 | PA 520 CHESTNUT STREET, STE 850, PHILADELPHIA, PA 19106 USA 61 | SN 1091-6466 62 | EI 1532-2459 63 | J9 PETROL SCI TECHNOL 64 | JI Pet. Sci. Technol. 65 | PD JUN 18 66 | PY 2014 67 | VL 32 68 | IS 12 69 | BP 1437 70 | EP 1442 71 | DI 10.1080/10916466.2012.664227 72 | PG 6 73 | WC Energy & Fuels; Engineering, Chemical; Engineering, Petroleum 74 | SC Energy & Fuels; Engineering 75 | GA AE4LR 76 | UT WOS:000333954400006 77 | ER 78 | 79 | EF -------------------------------------------------------------------------------- /docs/examples/Reading-Files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reading Files" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First we need to import _metaknowledge_ like we saw in lesson 1.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import metaknowledge as mk" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "we only need _metaknowledge_ for now so no need to import everything" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "The files from the Web of Science (WOS) can be loaded into a [RecordCollections](../documentation/classes/RecordCollection.html#metaknowledge.RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string.\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "'savedrecs'" 53 | ] 54 | }, 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "RC = mk.RecordCollection(\"savedrecs.txt\")\n", 62 | "repr(RC)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "You can also read a whole directory, in this case it is reading the current working directory\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "'files-from-.'" 83 | ] 84 | }, 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "RC = mk.RecordCollection(\".\")\n", 92 | "repr(RC)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "_metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument.\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 4, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "'txt-files-from-.'" 113 | ] 114 | }, 115 | "execution_count": 4, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "RC = mk.RecordCollection(\".\", extension = \"txt\")\n", 122 | "repr(RC)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Now you have a `RecordCollection` composed of all the WOS records in the selected file(s).\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "RC is a Collection of 32 records\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "print(\"RC is a \" + str(RC))" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`." 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.4.0" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 0 180 | } 181 | -------------------------------------------------------------------------------- /metaknowledge/bin/metaknowledgeMdToNb.py: -------------------------------------------------------------------------------- 1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 2 | import argparse 3 | import re 4 | import os.path 5 | import subprocess 6 | 7 | args = argparse.Namespace() 8 | codeRegex = re.compile(r'\[([0-9]*)\](.*)') 9 | 10 | startString = """{ 11 | "cells": [ 12 | """ 13 | 14 | endString = """ ], 15 | "metadata": { 16 | "kernelspec": { 17 | "display_name": "Python 3", 18 | "language": "python", 19 | "name": "python3" 20 | }, 21 | "language_info": { 22 | "codemirror_mode": { 23 | "name": "ipython", 24 | "version": 3 25 | }, 26 | "file_extension": ".py", 27 | "mimetype": "text/x-python", 28 | "name": "python", 29 | "nbconvert_exporter": "python", 30 | "pygments_lexer": "ipython3", 31 | "version": "3.4.3" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 0 36 | }""" 37 | 38 | def argumentParser(): 39 | parser = argparse.ArgumentParser(description="A simple script to convert markdown (.md) files to iPython Notebooks (.ipynb)") 40 | #parser.add_argument("--output", "-o") 41 | #parser.add_argument("--execute", "-e", action = "store_true", default = False) 42 | parser.add_argument("files", type=argparse.FileType('r'), default = [], nargs = '*') 43 | 44 | return parser.parse_args() 45 | 46 | def convertString(file): 47 | currentExNum = float('inf') 48 | currentBufferType = '' 49 | stringBuffer = '' 50 | stringResult = [] 51 | for line in file.readlines(): 52 | code = re.match(codeRegex,line) 53 | if stringBuffer == '': 54 | if code: 55 | stringBuffer = code.group(2) + '\n' 56 | currentExNum = code.group(1) 57 | currentBufferType = 'py' 58 | else: 59 | stringBuffer = line 60 | currentBufferType = 'md' 61 | currentExNum = float('inf') 62 | else: 63 | if code: 64 | if currentBufferType == 'py' and currentExNum == code.group(1): 65 | stringBuffer += code.group(2) + '\n' 66 | elif currentBufferType == 'py': 67 | stringResult.append(writePYcell(stringBuffer, currentExNum)) 68 | stringBuffer = code.group(2)+ '\n' 69 | currentExNum = code.group(1) 70 | currentBufferType = 'py' 71 | else: 72 | stringResult.append(writeMDcell(stringBuffer)) 73 | stringBuffer = code.group(2)+ '\n' 74 | currentExNum = code.group(1) 75 | currentBufferType = 'py' 76 | else: 77 | if currentBufferType == 'md': 78 | if line == '\n' and stringBuffer[-2:] == '\n\n': 79 | stringResult.append(writeMDcell(stringBuffer[:-1])) 80 | stringBuffer = '' 81 | currentBufferType = '' 82 | else: 83 | stringBuffer += line 84 | else: 85 | stringResult.append(writePYcell(stringBuffer, currentExNum)) 86 | stringBuffer = line 87 | currentBufferType = 'md' 88 | currentExNum = float('inf') 89 | if stringBuffer != '': 90 | if currentBufferType == 'md': 91 | stringResult.append(writeMDcell(stringBuffer)) 92 | else: 93 | stringResult.append(writePYcell(stringBuffer, excount = currentExNum)) 94 | return startString + ',\n '.join(stringResult) + endString 95 | 96 | def convert(file): 97 | nameCompts = os.path.splitext( os.path.expanduser(os.path.normpath(file.name))) 98 | fileName = nameCompts[0] + '.ipynb' 99 | outFile = open(fileName, 'w+') 100 | outFile.write(convertString(file)) 101 | return fileName 102 | 103 | 104 | def stringPreprossesing(s): 105 | s = s.lstrip('\n') 106 | s = s.replace(r'"', r'\"') 107 | s = s.replace('\n', '\\n",\n "')[:-11] 108 | return s 109 | 110 | 111 | def writeMDcell(s): 112 | return """{{ 113 | "cell_type": "markdown", 114 | "metadata": {{}}, 115 | "source": [ 116 | "{0}" 117 | ] 118 | }}""".format(stringPreprossesing(s)) 119 | 120 | def writePYcell(s, excount = ''): 121 | return """{{ 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {{ 125 | "collapsed": false 126 | }}, 127 | "outputs": [], 128 | "source": [ 129 | "{}" 130 | ] 131 | }}""".format(stringPreprossesing(s)) 132 | 133 | def mkMdToNb(): 134 | args = argumentParser() 135 | for f in args.files: 136 | fname = convert(f) 137 | if __name__ =='__main__': 138 | mkMdToNb() 139 | --------------------------------------------------------------------------------