├── metaknowledge
├── WOS
│ ├── tagProcessing
│ │ ├── __init__.py
│ │ ├── helpFuncs.py
│ │ └── funcDicts.py
│ ├── __init__.py
│ └── wosHandlers.py
├── proquest
│ ├── tagProcessing
│ │ ├── __init__.py
│ │ ├── specialFunctions.py
│ │ └── tagFunctions.py
│ └── __init__.py
├── scopus
│ ├── tagProcessing
│ │ ├── __init__.py
│ │ ├── specialFunctions.py
│ │ └── tagFunctions.py
│ ├── __init__.py
│ └── scopusHandlers.py
├── medline
│ ├── tagProcessing
│ │ ├── __init__.py
│ │ ├── specialFunctions.py
│ │ └── tagNames.py
│ ├── __init__.py
│ └── medlineHandlers.py
├── tests
│ ├── __init__.py
│ ├── nsfTestFiles
│ │ ├── source.md
│ │ ├── 6800031.xml
│ │ ├── 6800030.xml
│ │ ├── 6800097.xml
│ │ ├── 6800025.xml
│ │ ├── noID.xml
│ │ ├── 6800104.xml
│ │ ├── 6800077.xml
│ │ ├── 69W3551.xml
│ │ ├── 69W3546.xml
│ │ ├── 69W3548.xml
│ │ ├── 69W3550.xml
│ │ ├── 69W3547.xml
│ │ ├── 69W3549.xml
│ │ ├── badXMLfile.xml
│ │ ├── 6800039.xml
│ │ ├── 1500217.xml
│ │ ├── 1500219.xml
│ │ ├── 1500201.xml
│ │ ├── twoAwardFile.xml
│ │ ├── 1500216.xml
│ │ ├── 1500186.xml
│ │ └── 1500194.xml
│ ├── isitopythonHelper.py
│ ├── SimplePaper.isi
│ ├── test_constants.py
│ ├── test_proquest.py
│ ├── test_wos.py
│ ├── test_grants.py
│ ├── OnePaperNoCites.isi
│ ├── test_medline.py
│ ├── test_scopus.py
│ ├── test_citation.py
│ ├── test_grantCollection.py
│ ├── test_diffusion.py
│ ├── TwoPaper.isi
│ ├── test_graphhelpers.py
│ └── OnePaper2.isi
├── genders
│ ├── __init__.py
│ └── nameGender.py
├── bin
│ ├── __init__.py
│ └── metaknowledgeMdToNb.py
├── contour
│ └── __init__.py
├── constants.py
├── grants
│ ├── scopusGrant.py
│ ├── medlineGrant.py
│ ├── cihrGrant.py
│ ├── __init__.py
│ └── nsercGrant.py
├── journalAbbreviations
│ └── __init__.py
├── fileHandlers.py
├── mkExceptions.py
├── RCglimpse.py
└── __init__.py
├── setup.cfg
├── vagrant
├── win_run.cmd
├── linux_run.sh
├── updates.sh
├── mac_run
├── Vagrantfile
└── bootstrap
├── docs
├── requirements.txt
├── documentation
│ ├── classes
│ │ ├── CIHRGrant.rst
│ │ ├── Citation.rst
│ │ ├── Record.rst
│ │ ├── NSFGrant.rst
│ │ ├── ExtendedRecord.rst
│ │ ├── NSERCGrant.rst
│ │ ├── WOSRecord.rst
│ │ ├── FallbackGrant.rst
│ │ ├── MedlineGrant.rst
│ │ ├── Collection.rst
│ │ ├── ScopusRecord.rst
│ │ ├── CollectionWithIDs.rst
│ │ ├── MedlineRecord.rst
│ │ ├── GrantCollection.rst
│ │ ├── ProQuestRecord.rst
│ │ ├── RecordCollection.rst
│ │ ├── Grant.rst
│ │ └── index.rst
│ ├── exceptions
│ │ └── index.rst
│ ├── modules
│ │ ├── index.rst
│ │ ├── contour.rst
│ │ ├── journalAbbreviations.rst
│ │ ├── scopus.rst
│ │ ├── medline.rst
│ │ ├── proquest.rst
│ │ ├── WOS.rst
│ │ └── grants.rst
│ ├── index.rst
│ ├── example.rst
│ ├── overview.rst
│ └── functions_methods
│ │ └── index.rst
├── Makefile
├── make.bat
├── examples
│ ├── index.rst
│ ├── Getting-Started.ipynb
│ └── Reading-Files.ipynb
├── mkdsupport.py
├── CLI.rst
└── index.rst
├── README.md
├── .github
└── workflows
│ └── python-publish.yml
├── notebooks
├── Lesson-2-Reading-Files
│ ├── Reading-Files.md
│ └── Reading-Files.ipynb
└── Lesson-1-Getting-Started
│ ├── Getting-Started.md
│ └── Getting-Started.ipynb
├── .gitignore
├── inheritance-structure.dot
└── setup.py
/metaknowledge/WOS/tagProcessing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/metaknowledge/proquest/tagProcessing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/metaknowledge/scopus/tagProcessing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/metaknowledge/medline/tagProcessing/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/vagrant/win_run.cmd:
--------------------------------------------------------------------------------
1 | @Echo OFF
2 | vagrant up
3 | start "" http://localhost:1159
4 | pause
5 |
--------------------------------------------------------------------------------
/vagrant/linux_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | cd "$(dirname "$0")"
4 |
5 | vagrant up
6 | vagrant ssh
7 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | nbsphinx
2 | pypandoc
3 | cloud_sptheme
4 | sphinx==1.8.2
5 | sphinx_rtd_theme
6 | ipython
--------------------------------------------------------------------------------
/vagrant/updates.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | echo "Updating"
4 | echo "No updates found"
5 |
6 | exit 0
7 |
--------------------------------------------------------------------------------
/metaknowledge/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 |
--------------------------------------------------------------------------------
/vagrant/mac_run:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | cd "$(dirname "$0")"
4 |
5 | vagrant up
6 | sleep 1
7 | open http://localhost:1159
8 | tput bel
9 | vagrant ssh
10 |
--------------------------------------------------------------------------------
/docs/documentation/classes/CIHRGrant.rst:
--------------------------------------------------------------------------------
1 | CIHRGrant(Grant)
2 | ================
3 |
4 |
5 |
6 | .. automodule:: metaknowledge.grants.cihrGrant
7 | :members:
8 | :undoc-members:
9 |
--------------------------------------------------------------------------------
/docs/documentation/classes/Citation.rst:
--------------------------------------------------------------------------------
1 | Citation(Hashable)
2 | ==================
3 |
4 |
5 | .. automodule:: metaknowledge.citation
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/metaknowledge/genders/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | from .nameGender import nameStringGender, recordGenders, downloadData, getMapping
3 |
--------------------------------------------------------------------------------
/docs/documentation/classes/Record.rst:
--------------------------------------------------------------------------------
1 | Record(Mapping, Hashable)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.Record
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/source.md:
--------------------------------------------------------------------------------
1 | Theses files are originally from [https://www.nsf.gov/awardsearch/download.jsp](https://www.nsf.gov/awardsearch/download.jsp) and have been modified to aid testing.
2 |
--------------------------------------------------------------------------------
/docs/documentation/classes/NSFGrant.rst:
--------------------------------------------------------------------------------
1 | NSFGrant(Grant)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.grants.NSFGrant
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/exceptions/index.rst:
--------------------------------------------------------------------------------
1 | Exceptions
2 | ==========
3 |
4 | The exceptions defined by *metaknowledge* are:
5 |
6 | .. automodule:: metaknowledge.mkExceptions
7 | :members:
8 | :undoc-members:
9 |
--------------------------------------------------------------------------------
/docs/documentation/classes/ExtendedRecord.rst:
--------------------------------------------------------------------------------
1 | ExtendedRecord(Record)
2 | ======================
3 |
4 |
5 | .. autoclass:: metaknowledge.ExtendedRecord
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/NSERCGrant.rst:
--------------------------------------------------------------------------------
1 | NSERCGrant(Grant)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.grants.NSERCGrant
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/WOSRecord.rst:
--------------------------------------------------------------------------------
1 | WOSRecord(ExtendedRecord)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.WOS.WOSRecord
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/FallbackGrant.rst:
--------------------------------------------------------------------------------
1 | FallbackGrant(Grant)
2 | ======================
3 |
4 |
5 | .. autoclass:: metaknowledge.grants.FallbackGrant
6 | :members:
7 | :special-members:
8 | :private-members:
9 |
--------------------------------------------------------------------------------
/docs/documentation/classes/MedlineGrant.rst:
--------------------------------------------------------------------------------
1 | MedlineGrant(Grant)
2 | ==================================
3 |
4 |
5 | .. autoclass:: metaknowledge.MedlineGrant
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/modules/index.rst:
--------------------------------------------------------------------------------
1 | Modules
2 | *******
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | contour
8 | grants
9 | journalAbbreviations
10 | medline
11 | proquest
12 | scopus
13 | WOS
--------------------------------------------------------------------------------
/docs/documentation/classes/Collection.rst:
--------------------------------------------------------------------------------
1 | Collection(MutableSet, Hashable)
2 | ================================
3 |
4 |
5 | .. autoclass:: metaknowledge.Collection
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/ScopusRecord.rst:
--------------------------------------------------------------------------------
1 | ScopusRecord(ExtendedRecord)
2 | ============================
3 |
4 |
5 | .. autoclass:: metaknowledge.scopus.ScopusRecord
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/CollectionWithIDs.rst:
--------------------------------------------------------------------------------
1 | CollectionWithIDs(Collection)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.CollectionWithIDs
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/MedlineRecord.rst:
--------------------------------------------------------------------------------
1 | MedlineRecord(ExtendedRecord)
2 | =============================
3 |
4 |
5 | .. autoclass:: metaknowledge.medline.MedlineRecord
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/modules/contour.rst:
--------------------------------------------------------------------------------
1 | contour
2 | =======
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.contour
7 |
8 | Functions
9 | ---------
10 | .. automodule:: metaknowledge.contour.plotting
11 | :members:
--------------------------------------------------------------------------------
/docs/documentation/classes/GrantCollection.rst:
--------------------------------------------------------------------------------
1 | GrantCollection(CollectionWithIDs)
2 | ==================================
3 |
4 |
5 | .. autoclass:: metaknowledge.GrantCollection
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/ProQuestRecord.rst:
--------------------------------------------------------------------------------
1 | ProQuestRecord(ExtendedRecord)
2 | ==============================
3 |
4 |
5 | .. autoclass:: metaknowledge.proquest.ProQuestRecord
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/metaknowledge/bin/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | from .metaknowledgeCLI import mkCLI
3 | from .metaknowledgeMdToNb import mkMdToNb
4 | from .metaknowledgeDocsGen import mkDocs
5 |
--------------------------------------------------------------------------------
/docs/documentation/classes/RecordCollection.rst:
--------------------------------------------------------------------------------
1 | RecordCollection(CollectionWithIDs)
2 | ===================================
3 |
4 |
5 | .. autoclass:: metaknowledge.RecordCollection
6 | :members:
7 | :special-members:
8 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/classes/Grant.rst:
--------------------------------------------------------------------------------
1 | Grant(Record, MutableMapping)
2 | =============================
3 |
4 | .. automodule:: metaknowledge.grants
5 |
6 | .. autoclass:: metaknowledge.grants.Grant
7 | :members:
8 | :special-members:
9 | :private-members:
--------------------------------------------------------------------------------
/docs/documentation/index.rst:
--------------------------------------------------------------------------------
1 | Documentation
2 | *************
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 1
7 |
8 |
9 | example
10 | overview
11 | modules/index
12 | classes/index
13 | functions_methods/index
14 | exceptions/index
--------------------------------------------------------------------------------
/docs/documentation/modules/journalAbbreviations.rst:
--------------------------------------------------------------------------------
1 | journalAbbreviations
2 | ==========================================
3 |
4 |
5 | Overview
6 | ---------------
7 | .. automodule:: metaknowledge.journalAbbreviations
8 |
9 | Functions
10 | ---------
11 | .. automodule:: metaknowledge.journalAbbreviations.backend
12 | :members:
13 |
--------------------------------------------------------------------------------
/metaknowledge/tests/isitopythonHelper.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import metaknowledge
3 |
4 | if __name__ == "__main__":
5 | rlst = metaknowledge.wosParser("testFile.isi")
6 | s = '['
7 | for R in rlst:
8 | s +=(str(R.__getstate__()) + ',\n')
9 | s += ']'
10 | print(s)
11 |
--------------------------------------------------------------------------------
/docs/documentation/classes/index.rst:
--------------------------------------------------------------------------------
1 | Classes
2 | *******
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | CIHRGrant
8 | Citation
9 | Collection
10 | CollectionWithIDs
11 | ExtendedRecord
12 | FallbackGrant
13 | Grant
14 | GrantCollection
15 | MedlineGrant
16 | MedlineRecord
17 | NSERCGrant
18 | NSFGrant
19 | ProQuestRecord
20 | Record
21 | RecordCollection
22 | ScopusRecord
23 | WOSRecord
--------------------------------------------------------------------------------
/metaknowledge/tests/SimplePaper.isi:
--------------------------------------------------------------------------------
1 | PT J
2 | AU John, D
3 | AF John, Doe
4 | TI Example Paper
5 | SO TOPICS IN COGNITIVE SCIENCE
6 | LA English
7 | DT Article
8 | DE Example; testing
9 | ID REAL; TIME
10 | AB This is a test.
11 | C1 UW, Ontario, Canada.
12 | RP John, D (reprint author), UW, Ontario, Canada.
13 | CR John D., 1999, TOPICS IN COGNITIVE SCIENCE
14 | J9 EXAMPLE
15 | JI examaple
16 | PD APR
17 | PY 2015
18 | UT WOS:123317623000007
19 | ER
20 |
--------------------------------------------------------------------------------
/metaknowledge/contour/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | """Two functions based on _matplotlib_ for generating nicer looking graphs
3 |
4 | This is the only module that depends on anything besides _networkx_, it depends on [numpy](http://www.numpy.org/), [scipy](https://www.scipy.org/) and [matplotlib](http://matplotlib.org/).
5 | """
6 |
7 | from .plotting import graphDensityContourPlot, quickVisual
8 |
--------------------------------------------------------------------------------
/metaknowledge/medline/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge.
3 | """
4 | from .recordMedline import MedlineRecord, medlineRecordParser
5 | from .medlineHandlers import isMedlineFile, medlineParser
6 | from .tagProcessing.tagNames import tagNameDict, authorBasedTags, tagNameConverterDict
7 | from .tagProcessing.specialFunctions import medlineSpecialTagToFunc
8 | from .tagProcessing.tagFunctions import *
9 |
--------------------------------------------------------------------------------
/metaknowledge/WOS/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | """These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge."""
3 |
4 | from .tagProcessing.tagFunctions import *
5 | from .tagProcessing.funcDicts import tagToFullDict, fullToTagDict, tagNameConverterDict, tagsAndNameSet, knownTagsList
6 |
7 | from .recordWOS import WOSRecord, recordParser
8 | from .wosHandlers import isWOSFile, wosParser
9 |
--------------------------------------------------------------------------------
/metaknowledge/proquest/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | """These are the functions used to process medline (pubmed) files at the backend. They are meant for use internal use by metaknowledge.
3 | """
4 | from .recordProQuest import ProQuestRecord, proQuestRecordParser
5 | from .proQuestHandlers import isProQuestFile, proQuestParser
6 | from .tagProcessing.specialFunctions import proQuestSpecialTagToFunc
7 | from .tagProcessing.tagFunctions import proQuestTagToFunc
8 |
--------------------------------------------------------------------------------
/metaknowledge/scopus/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | """These are the functions used to process scopus csv files at the backend. They are meant for use internal use by metaknowledge.
3 | """
4 | from .recordScopus import ScopusRecord, scopusRecordParser, scopusHeader
5 | from .scopusHandlers import isScopusFile, scopusParser
6 |
7 | from .tagProcessing.tagFunctions import scopusTagToFunction
8 | from .tagProcessing.specialFunctions import scopusSpecialTagToFunc
9 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = .
8 | BUILDDIR = _build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/metaknowledge/proquest/tagProcessing/specialFunctions.py:
--------------------------------------------------------------------------------
1 | proQuestSpecialTagToFunc = {
2 | #'DOI' : lambda R : None,
3 | 'authorsShort' : lambda R : R['Author'],
4 | #'grants' : lambda R : [],
5 | #'address' : '',
6 | 'selfCitation' : lambda R: R.createCitation(),
7 | 'beginningPage' : lambda R : 0,
8 | 'keywords' : lambda R : R['Identifier / keyword'],
9 | 'abstract' : lambda R : R['Abstract'],
10 | #'citations' : '',
11 | 'authorsFull' : lambda R : 'Author',
12 | #'volume' : '',
13 | 'year' : lambda R : R['Degree date'],
14 | #'j9' : '',
15 | #'journal' : '',
16 | 'title' : lambda R : R['Title'],
17 | 'id' : lambda R : R.id,
18 | }
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # metaknowledge
4 |
5 | `metaknowledge` is a Python3 package that simplifies bibliometric research using data from various sources. It reads a directory of plain text files containing meta-data on publications and citations, and writes to a variety of data structures that are suitable for quantitative, network, and text analyses. It handles large datasets (e.g. several million records) efficiently. You can find the [documentation](https://metaknowledge.readthedocs.io/).
6 |
7 | ## Installing
8 |
9 | To install run `python3 setup.py install`
10 |
--------------------------------------------------------------------------------
/vagrant/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
4 |
5 | Vagrant.configure(2) do |config|
6 | config.vm.provider "virtualbox" do |vb, override|
7 | config.vm.box = "ubuntu/trusty64"
8 | override.vm.box_url = "https://atlas.hashicorp.com/ubuntu/trusty64"
9 | vb.name = "Networks_Labs"
10 | vb.memory = "2048"
11 | override.vm.network :forwarded_port, host: 1159, guest: 8888, auto_correct: true
12 | end
13 | config.ssh.insert_key = false
14 | config.vm.synced_folder ".", "/vagrant", disabled: true
15 | config.vm.synced_folder '..', "/vagrant"
16 | config.vm.synced_folder '../notebooks', "/notebooks"
17 | config.vm.provision :shell, path: "bootstrap"
18 |
19 | end
20 |
--------------------------------------------------------------------------------
/docs/documentation/modules/scopus.rst:
--------------------------------------------------------------------------------
1 | scopus
2 | ======
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.scopus
7 |
8 | Functions
9 | ---------
10 |
11 | .. automodule:: metaknowledge.scopus.scopusHandlers
12 | :members:
13 |
14 | Special Functions
15 | -----------------
16 |
17 | .. automodule:: metaknowledge.scopus.tagProcessing.specialFunctions
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 | :inherited-members:
22 |
23 | Tag Functions
24 | -------------
25 |
26 | .. automodule:: metaknowledge.scopus.tagProcessing.tagFunctions
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 | :inherited-members:
31 |
32 | Backend
33 | -------
34 |
35 | .. automodule:: metaknowledge.scopus.recordScopus
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 | :inherited-members:
40 |
--------------------------------------------------------------------------------
/metaknowledge/scopus/tagProcessing/specialFunctions.py:
--------------------------------------------------------------------------------
1 | scopusSpecialTagToFunc = {
2 | 'year' : lambda R : R['Year'],
3 | 'volume' : lambda R : R['Volume'],
4 | 'beginningPage' : lambda R : R['Page start'],
5 | #'DOI' : lambda R : R['DOI'], Causese recursion errors if not commented out
6 | #'address' : lambda R : R[''],
7 | 'j9' : lambda R : R['Abbreviated Source Title'],
8 | 'citations' : lambda R : R['References'],
9 | #'grants' : lambda R : R['References'],
10 | 'selfCitation' : lambda R : R.createCitation(),
11 | 'authorsShort' : lambda R : R['Authors'],
12 | 'authorsFull' : lambda R : R['Authors'],
13 | 'title' : lambda R : R['Title'],
14 | 'journal' : lambda R : R['Source title'],
15 | 'keywords' : lambda R : R['Index Keywords'],
16 | 'abstract' : lambda R : R['Abstract'],
17 | 'id' : lambda R : R['EID'],
18 | }
19 |
--------------------------------------------------------------------------------
/docs/documentation/modules/medline.rst:
--------------------------------------------------------------------------------
1 | medline
2 | =======
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.medline
7 |
8 | Functions
9 | ---------
10 |
11 | .. automodule:: metaknowledge.medline.medlineHandlers
12 | :members:
13 |
14 | Special Functions
15 | -----------------
16 |
17 | .. automodule:: metaknowledge.medline.tagProcessing.specialFunctions
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 | :inherited-members:
22 |
23 | Tag Functions
24 | -------------
25 |
26 | .. automodule:: metaknowledge.medline.tagProcessing.tagFunctions
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 | :inherited-members:
31 |
32 | Backend
33 | -------
34 |
35 | .. automodule:: metaknowledge.medline.recordMedline
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 | :inherited-members:
40 |
41 |
--------------------------------------------------------------------------------
/docs/documentation/modules/proquest.rst:
--------------------------------------------------------------------------------
1 | proquest
2 | ========
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.proquest
7 |
8 | Functions
9 | ---------
10 |
11 | .. automodule:: metaknowledge.proquest.proQuestHandlers
12 | :members:
13 |
14 | Special Functions
15 | -----------------
16 |
17 | .. automodule:: metaknowledge.proquest.tagProcessing.specialFunctions
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 | :inherited-members:
22 |
23 | Tag Functions
24 | -------------
25 |
26 | .. automodule:: metaknowledge.proquest.tagProcessing.tagFunctions
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 | :inherited-members:
31 |
32 | Backend
33 | -------
34 |
35 | .. automodule:: metaknowledge.proquest.recordProQuest
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 | :inherited-members:
40 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/documentation/example.rst:
--------------------------------------------------------------------------------
1 | ##############
2 | Basic Example
3 | ##############
4 |
5 | *metaknoweldge* is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data.
6 |
7 | To load the data from files and make a network: ::
8 | >>> import metaknowledge as mk
9 | >>> RC = mk.RecordCollection("records/")
10 | >>> print(RC)
11 | Collection of 33 records
12 | >>> G = RC.coCiteNetwork(nodeType = 'journal')
13 | Done making a co-citation network of files-from-records 1.1s
14 | >>> print(len(G.nodes()))
15 | 223
16 | >>> mk.writeGraph(G, "Cocitation-Network-of-Journals")
17 |
18 | There is also a simple command line program called ``metaknowledge`` that comes with the package. It allows for creating networks without any need to know Python. More information about it can be found `here <../CLI.html>`_.
19 |
20 | .. toctree::
21 | :maxdepth: 2
22 | :caption: Example:
23 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/*
32 |
--------------------------------------------------------------------------------
/metaknowledge/constants.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import os
3 | import sys
4 |
5 | __version__ = '3.4.1'
6 |
7 | commonRecordFields = [
8 | 'year',
9 | 'volume',
10 | 'beginningPage',
11 | 'DOI',
12 | 'address',
13 | 'j9',
14 | 'citations',
15 | 'grants',
16 | 'selfCitation',
17 | 'authorsShort',
18 | 'authorsFull',
19 | 'title',
20 | 'journal',
21 | 'keywords',
22 | 'abstract',
23 | 'id',
24 | ]
25 |
26 |
27 | def isInteractive():
28 | """
29 | A basic check of if the program is running in interactive mode
30 | """
31 | if sys.stdout.isatty() and os.name != 'nt':
32 | #Hopefully everything but ms supports '\r'
33 | try:
34 | import threading
35 | except ImportError:
36 | return False
37 | else:
38 | return True
39 | else:
40 | return False
41 |
42 | VERBOSE_MODE = isInteractive()
43 |
44 | FAST_CITES = False
45 |
--------------------------------------------------------------------------------
/docs/documentation/modules/WOS.rst:
--------------------------------------------------------------------------------
1 | WOS
2 | ===
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.WOS
7 |
8 | Functions
9 | ---------
10 |
11 | .. automodule:: metaknowledge.WOS.wosHandlers
12 | :members:
13 |
14 | Help Functions
15 | --------------
16 |
17 | .. automodule:: metaknowledge.WOS.tagProcessing.helpFuncs
18 | :members:
19 | :undoc-members:
20 | :show-inheritance:
21 | :inherited-members:
22 |
23 | Tag Functions
24 | -------------
25 |
26 | .. automodule:: metaknowledge.WOS.tagProcessing.tagFunctions
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 | :inherited-members:
31 |
32 | Dict Functions
33 | --------------
34 |
35 | .. automodule:: metaknowledge.WOS.tagProcessing.funcDicts
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 | :inherited-members:
40 |
41 | Backend
42 | -------
43 |
44 | .. automodule:: metaknowledge.WOS.recordWOS
45 | :members:
46 | :undoc-members:
47 | :show-inheritance:
48 | :inherited-members:
--------------------------------------------------------------------------------
/docs/documentation/modules/grants.rst:
--------------------------------------------------------------------------------
1 | grants
2 | ======
3 |
4 | Overview
5 | --------
6 | .. automodule:: metaknowledge.grants
7 |
8 | baseGrant
9 | ---------
10 |
11 | .. automodule:: metaknowledge.grants.baseGrant
12 | :members:
13 | :undoc-members:
14 | :noindex:
15 |
16 | cihrGrant
17 | ---------
18 |
19 | .. automodule:: metaknowledge.grants.cihrGrant
20 | :members:
21 | :undoc-members:
22 | :noindex:
23 |
24 | medlineGrant
25 | ------------
26 |
27 | .. automodule:: metaknowledge.grants.medlineGrant
28 | :members:
29 | :undoc-members:
30 | :noindex:
31 |
32 | nsercGrant
33 | ----------
34 |
35 | .. automodule:: metaknowledge.grants.nsercGrant
36 | :members:
37 | :undoc-members:
38 | :noindex:
39 |
40 | nsfGrant
41 | --------
42 |
43 | .. automodule:: metaknowledge.grants.nsfGrant
44 | :members:
45 | :undoc-members:
46 | :noindex:
47 |
48 | scopusGrant
49 | -----------
50 |
51 | .. automodule:: metaknowledge.grants.scopusGrant
52 | :members:
53 | :undoc-members:
54 | :noindex:
55 |
56 |
--------------------------------------------------------------------------------
/docs/examples/index.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 |
4 | **Note:** for a more recent example of using *metaknowledge*, please visit `the NetLab blog `_.
5 |
6 | *metaknowledge* is a python library for creating and analyzing scientific metadata. It uses records obtained from Web of Science (WOS), Scopus and other sources. It is intended to be usable by those who do not know much python. This page will be a short overview of its capabilities, to allow you to use it for your own work.
7 |
8 | This document was made from a `jupyter `_ notebook, if you know how to use them, you can download the notebook |notebook|_ and the sample file is `here `_ if you wish to have an interactive version of this page. Now let's begin.
9 |
10 | .. _notebook: networkslab.org/metaknowledge/examples/metaknowledgeExamples.ipynb
11 | .. |notebook| replace:: here
12 |
13 | .. toctree::
14 | :maxdepth: 1
15 |
16 | Getting-Started.ipynb
17 | Reading-Files.ipynb
18 | Objects.ipynb
19 | Making-Networks.ipynb
--------------------------------------------------------------------------------
/metaknowledge/tests/test_constants.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import unittest.mock
4 | import builtins
5 | import importlib
6 | import sys
7 | import metaknowledge.constants
8 |
9 | class TestConstants(unittest.TestCase):
10 | def test_VerboseMode(self):
11 | self.assertFalse(metaknowledge.constants.isInteractive())
12 | sys.stdout.isatty = lambda : True
13 | self.assertTrue(metaknowledge.constants.isInteractive())
14 | class ImportMock(unittest.mock.Mock):
15 | def __call__(self, *args, **kwargs):
16 | if args[0] == 'threading':
17 | raise ImportError
18 | else:
19 | return importlib.__import__(*args, **kwargs)
20 | with unittest.mock.patch('builtins.__import__', new_callable = ImportMock):#, NoThreadingImport):
21 | #builtins.__import__ =
22 | self.assertFalse(metaknowledge.constants.isInteractive()) #This will fail for setup.py test
23 | #Failure for setup.py is what is supposed to happen as that would be an interactive enviroment
24 |
--------------------------------------------------------------------------------
/metaknowledge/grants/scopusGrant.py:
--------------------------------------------------------------------------------
1 | import collections
2 |
3 | from .baseGrant import Grant
4 | from ..mkExceptions import BadGrant
5 |
6 | class ScopusGrant(Grant):
7 | def __init__(self, grantString):
8 |
9 | grantDict = collections.OrderedDict()
10 |
11 | bad = False
12 | error = None
13 |
14 | split = grantString.split(', ')
15 | try:
16 | grantDict['agency'] = split.pop()
17 | except IndexError:
18 | bad = True
19 | grantDict['agency'] = grantString
20 | error = BadGrant("The grant string '{}' does not contain enough comma-spaces (', ') to be a scopus grant.".format(grantString))
21 | else:
22 | try:
23 | grantDict['agencyCode'] = split.pop()
24 | except IndexError:
25 | pass
26 | else:
27 | try:
28 | grantDict['code'] = split.pop()
29 | except IndexError:
30 | pass
31 | idValue = "{}-{}-{}".format(grantDict.get('code', ''), grantDict.get('agencyCode', ''), grantDict.get('agency', ''))
32 | Grant.__init__(self, grantString, grantDict, idValue, bad, error)
33 |
--------------------------------------------------------------------------------
/notebooks/Lesson-2-Reading-Files/Reading-Files.md:
--------------------------------------------------------------------------------
1 | # Reading Files
2 |
3 |
4 | First we need to import _metaknowledge_ like we saw in lesson 1.
5 |
6 | []import metaknowledge as mk
7 |
8 | we only need _metaknowledge_ for now so no need to import everything
9 |
10 |
11 | The files from the Web of Science (WOS) can be loaded into a [`RecordCollections`](http://networkslab.org/metaknowledge/docs/RecordCollection#RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string.
12 |
13 | []RC = mk.RecordCollection("savedrecs.txt")
14 | []repr(RC)
15 |
16 | You can also read a whole directory, in this case it is reading the current working directory
17 |
18 | []RC = mk.RecordCollection(".")
19 | []repr(RC)
20 |
21 | _metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument.
22 |
23 | []RC = mk.RecordCollection(".", extension = "txt")
24 | []repr(RC)
25 |
26 | Now you have a `RecordCollection` composed of all the WOS records in the selected file(s).
27 |
28 | []print("RC is a " + str(RC))
29 |
30 | You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`.
31 |
--------------------------------------------------------------------------------
/metaknowledge/journalAbbreviations/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | """This module handles the abbreviations, known as J29 abbreviations and given by the J9 tag in WOS Records and for journal titles that WOS employs in citations.
3 |
4 | The citations provided by WOS used abbreviated journal titles instead of the full names. The full list of abbreviations can be found at a series pages divided by letter starting at [images.webofknowledge.com/WOK46/help/WOS/A_abrvjt.html](http://images.webofknowledge.com/WOK46/help/WOS/A_abrvjt.html). The function [updatej9DB()](#metaknowledge.journalAbbreviations.backend.getj9dict) is used to scape and parse the pages, it must be run without error before the other features can be used. _metaknowledge_. If the database is requested by `getj9dict()`, which is what [Citations](../classes/Citation.html#metaknowledge.citation.Citation) use, and the database is not found or is corrupted then [updatej9DB()](#metaknowledge.journalAbbreviations.backend.updatej9DB) will be run to download the database if this fails an `mkException` will be raised, the download and parsing usually takes less than a second on a good internet connection.
5 |
6 | The other functions of the module are for manually adding and removing abbreviations from the database. It is recommended that this be done with the command-line tool `metaknowledge` instead of with a script.
7 | """
8 |
9 | from .backend import getj9dict, abrevDBname, manualDBname, addToDB
10 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_proquest.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import metaknowledge
4 |
5 | import os
6 |
7 | class TestProQuest(unittest.TestCase):
8 |
9 | def setUp(self):
10 | metaknowledge.VERBOSE_MODE = False
11 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/ProQuest_TestFile.testtxt")
12 | self.R = self.RC.peek()
13 |
14 | def test_isCollection(self):
15 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection)
16 |
17 | def test_isProQuest(self):
18 | self.assertIsInstance(self.R, metaknowledge.ProQuestRecord)
19 |
20 | def test_specials(self):
21 | for R in self.RC:
22 | for s in metaknowledge.proquest.proQuestSpecialTagToFunc.keys():
23 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation))
24 |
25 | def test_allFields(self):
26 | for R in self.RC:
27 | for k,v in R.items():
28 | self.assertIsInstance(k, str)
29 | self.assertIsInstance(v, (str, list, int))
30 |
31 | def test_graphs(self):
32 | self.assertEqual(metaknowledge.graphStats(self.RC.networkMultiMode(self.RC.tags()), sentenceString = True), "The graph has 1928 nodes, 50833 edges, 0 isolates, 114 self loops, a density of 0.0273952 and a transitivity of 0.0815136")
33 |
34 | def test_write(self):
35 | #No writer currently implemented
36 | pass
37 |
--------------------------------------------------------------------------------
/notebooks/Lesson-1-Getting-Started/Getting-Started.md:
--------------------------------------------------------------------------------
1 | # About Jupyter Notebooks
2 |
3 |
4 | This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page.
5 |
6 | []#This cell is python
7 | []#The cell below it is output
8 | []print("This is an output cell")
9 |
10 | The code cells contain python code that you can edit and run your self. Try changing the one above.
11 |
12 |
13 | # Importing
14 |
15 |
16 | First you need to import the _metaknowledge_ package
17 |
18 | []import metaknowledge as mk
19 |
20 |
21 | And you will often need the [_networkx_](https://networkx.github.io/documentation/networkx-1.9.1/) package
22 |
23 | []import networkx as nx
24 |
25 | And [_matplotlib_](http://matplotlib.org/) to display the graphs and to make them look nice when displayed
26 |
27 | []import matplotlib.pyplot as plt
28 | []%matplotlib inline
29 |
30 | _metaknowledge_ also has a _matplotlib_ based graph [visualizer](http://networkslab.org/metaknowledge/docs/visual#visual) that will be used sometimes
31 |
32 | []import metaknowledge.visual as mkv
33 |
34 | These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_.
35 |
--------------------------------------------------------------------------------
/metaknowledge/WOS/tagProcessing/helpFuncs.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | monthDict = {'SPR': 3, 'SUM': 6, 'FAL': 9, 'WIN': 12, 'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6 , 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT' : 10, 'NOV' : 11, 'DEC' : 12}
3 |
4 | def getMonth(s):
5 | """
6 | Known formats:
7 | Month ("%b")
8 | Month Day ("%b %d")
9 | Month-Month ("%b-%b") --- this gets coerced to the first %b, dropping the month range
10 | Season ("%s") --- this gets coerced to use the first month of the given season
11 | Month Day Year ("%b %d %Y")
12 | Month Year ("%b %Y")
13 | Year Month Day ("%Y %m %d")
14 | """
15 | monthOrSeason = s.split('-')[0].upper()
16 | if monthOrSeason in monthDict:
17 | return monthDict[monthOrSeason]
18 | else:
19 | monthOrSeason = s.split('-')[-1].upper()
20 | if monthOrSeason.isdigit():
21 | return monthOrSeason
22 | else:
23 | return monthDict[monthOrSeason]
24 |
25 | raise ValueError("Month format not recognized: " + s)
26 |
27 | def makeBiDirectional(d):
28 | """
29 | Helper for generating tagNameConverter
30 | Makes dict that maps from key to value and back
31 | """
32 | dTmp = d.copy()
33 | for k in d:
34 | dTmp[d[k]] = k
35 | return dTmp
36 |
37 | def reverseDict(d):
38 | """
39 | Helper for generating fullToTag
40 | Makes dict of value to key
41 | """
42 | retD = {}
43 | for k in d:
44 | retD[d[k]] = k
45 | return retD
46 |
--------------------------------------------------------------------------------
/docs/mkdsupport.py:
--------------------------------------------------------------------------------
1 | """An Sphinx extension supporting for sphinx.ext.autodoc with modules containing docstrings in Markdown
2 | """
3 |
4 | import pypandoc
5 | import platform
6 |
7 | # Since pypandoc.convert_text will always return strings ended with \r\n, the separator should also set to it
8 |
9 | if platform.system() == 'Windows':
10 | SEP = u'\r\n'
11 | else:
12 | SEP = u'\n'
13 | #SEP = u'\r\n'
14 |
15 |
16 | def setup(app):
17 | """Add extension's default value and set new function to ```autodoc-process-docstring``` event"""
18 |
19 | # The 'rebuild' parameter should set as 'html' rather than 'env' since this extension needs a full rebuild of HTML
20 | # document
21 | app.add_config_value('mkdsupport_use_parser', 'markdown_github', 'html')
22 | app.connect('autodoc-process-docstring', pandoc_process)
23 |
24 |
25 |
26 | def pandoc_process(app, what, name, obj, options, lines):
27 | """"Convert docstrings in Markdown into reStructureText using pandoc
28 | """
29 |
30 | if not lines:
31 | return None
32 |
33 | input_format = app.config.mkdsupport_use_parser
34 | output_format = 'rst'
35 |
36 | # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a
37 | # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding
38 | text = SEP.join(lines)
39 | text = pypandoc.convert_text(text, output_format, format=input_format)
40 |
41 | # The 'lines' in Sphinx is a list of strings and the value should be changed
42 | del lines[:]
43 | lines.extend(text.split(SEP))
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #isi Files and logins
2 | *.txt
3 | !requirements.txt
4 | *.sh
5 | *.graphml
6 | *.csv
7 | *_cites/ #<----------------------------------------------This might cause issues
8 | *.log
9 | cover/
10 | profiling/
11 | special.py
12 | .ipynb_checkpoints/
13 | .vagrant/
14 | .ipython/
15 | security/
16 | !vagrant/*.sh
17 | vagrant/logs/
18 | profile_wosserver/
19 | !profile_wosserver/ipython_*
20 | !Docs/*.sh
21 | metaknowledgeDocs.md
22 | j9Raws/
23 | /manualj9Abbreviations*
24 | metaknowledge/journalAbbreviations/j9Abbreviations.bak
25 | metaknowledge/journalAbbreviations/j9Abbreviations.dir
26 | metaknowledge/journalAbbreviations/j9Abbreviations.dat
27 | !savedrecs.txt
28 | *.bib
29 | TODO.md
30 | testing_nb/
31 |
32 | *.DS_Store
33 |
34 | # Byte-compiled / optimized / DLL files
35 | __pycache__/
36 | *.py[cod]
37 |
38 | # C extensions
39 | *.so
40 |
41 | # Distribution / packaging
42 | .Python
43 | env/
44 | build/
45 | develop-eggs/
46 | dist/
47 | downloads/
48 | eggs/
49 | .eggs/
50 | lib/
51 | lib64/
52 | parts/
53 | sdist/
54 | var/
55 | *.egg-info/
56 | .installed.cfg
57 | *.egg
58 |
59 | # PyInstaller
60 | # Usually these files are written by a python script from a template
61 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
62 | *.manifest
63 | *.spec
64 |
65 | # Installer logs
66 | pip-log.txt
67 | pip-delete-this-directory.txt
68 |
69 | # Unit test / coverage reports
70 | htmlcov/
71 | .tox/
72 | .coverage
73 | .coverage.*
74 | .cache
75 | nosetests.xml
76 | coverage.xml
77 | *,cover
78 |
79 | # Translations
80 | *.mo
81 | *.pot
82 |
83 | # Django stuff:
84 | *.log
85 |
86 | # Sphinx documentation
87 | docs/_build/
88 | docs/_static/
89 | docs/_templates
90 |
91 | # PyBuilder
92 | target/
93 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800031.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Down Deep Bore Hole Carbon Dating Project at Byrd Station, Antarctica
6 | 08/15/1968
7 | 08/31/1969
8 | 12600
9 |
10 | Standard Grant
11 |
12 |
13 | 07000000
14 |
15 | Directorate For Engineering
16 |
17 |
18 | Directorate For Engineering
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 06/10/1968
26 | 06/10/1968
27 |
28 | 6800031
29 |
30 | Hans
31 | Oeschger
32 |
33 | 08/15/1968
34 |
35 | Principal Investigator
36 |
37 |
38 | University of Bern
39 | Bern
40 | 3007
41 | 0316314847
42 | Hochschulstrasse 4
43 | Switzerland
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800030.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Participation in Usarp Expeditions
6 | 07/01/1968
7 | 12/31/1977
8 | 49203
9 |
10 | Interagency Agreement
11 |
12 |
13 | 07000000
14 |
15 | Directorate For Engineering
16 |
17 |
18 | Directorate For Engineering
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 05/23/1968
26 | 01/25/1974
27 |
28 | 6800030
29 |
30 | H.
31 | Fehlmann
32 |
33 | 07/01/1968
34 |
35 | Principal Investigator
36 |
37 |
38 | Smithsonian Institution
39 | Arlington
40 | 222023709
41 | 2026337110
42 | Office of Sponsored Projects
43 | United States
44 | Virginia
45 | VA
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800097.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Genera of Chrysomelid Beetles of the World
6 | 06/01/1969
7 | 05/31/1971
8 | 30900
9 |
10 | Standard Grant
11 |
12 |
13 | 08000000
14 |
15 | Direct For Biological Sciences
16 |
17 |
18 | Direct For Biological Sciences
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 05/26/1969
26 | 05/26/1969
27 |
28 | 6800097
29 |
30 | J.Linsley
31 | Gressitt
32 |
33 | 06/01/1969
34 |
35 | Principal Investigator
36 |
37 |
38 | Bernice P Bishop Museum
39 | Honolulu
40 | 968172704
41 | 8088478204
42 | 1525 Bernice Street
43 | United States
44 | Hawaii
45 | HI
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800025.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Chemical and Mineralogical Investigations of Puerto Rican Spilites
6 | 10/15/1968
7 | 06/30/1972
8 | 20300
9 |
10 | Standard Grant
11 |
12 |
13 | 06030003
14 |
15 | Directorate For Geosciences
16 |
17 |
18 | Division Of Earth Sciences
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 10/17/1968
26 | 10/17/1968
27 |
28 | 6800025
29 |
30 |
31 | DATA NOT AVAILABLE
32 |
33 | 10/15/1968
34 |
35 | Principal Investigator
36 |
37 |
38 | University of Pittsburgh
39 | Pittsburgh
40 | 152132303
41 | 4126247400
42 | University Club
43 | United States
44 | Pennsylvania
45 | PA
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/grants/medlineGrant.py:
--------------------------------------------------------------------------------
1 | import collections
2 |
3 | from .baseGrant import Grant
4 | from ..mkExceptions import BadGrant
5 |
6 | class MedlineGrant(Grant):
7 | def __init__(self, grantString):
8 |
9 | grantDict = collections.OrderedDict()
10 |
11 | bad = False
12 | error = None
13 |
14 | split = grantString.split('/')
15 | try:
16 | grantDict['country'] = split.pop()
17 | grantDict['agency'] = split.pop()
18 | except IndexError:
19 | bad = True
20 | grantDict['country'] = grantString
21 | error = BadGrant("The grant string '{}' does not contain enough slashes (/) to be a medline grant.".format(grantString))
22 | else:
23 | if len(split) == 1:
24 | code = split.pop()
25 | if len(code) == 2:
26 | grantDict['code'] = code
27 | else:
28 | grantDict['number'] = code
29 | elif len(split) == 2:
30 | code = split.pop()
31 | if len(code) == 2:
32 | grantDict['code'] = code
33 | grantDict['number'] = split.pop()
34 | else:
35 | grantDict['number'] = "{}/{}".format(split.pop(), code)
36 | else:
37 | grantDict['number'] = '/'.join(split)
38 | if 'number' in grantDict:
39 | idValue = "{}/{}-{}".format(grantDict.get('number', ''), grantDict.get('code', ''), grantDict.get('country', ''))
40 | else:
41 | idValue = "{}-{}-{}".format(grantDict.get('code', ''), grantDict.get('agency', ''), grantDict.get('country', ''))
42 | Grant.__init__(self, grantString, grantDict, idValue, bad, error)
43 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/noID.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Task Order For Partial Support of the Committee on Geography
6 | 12/08/1967
7 | 06/30/1971
8 | 30000
9 |
10 | BOA/Task Order
11 |
12 |
13 | 06030003
14 |
15 | Directorate For Geosciences
16 |
17 |
18 | Division Of Earth Sciences
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 12/08/1967
26 | 05/14/1969
27 |
28 |
29 | Jeremy
30 | Taylor
31 | jeremygtaylor@compuserve.com
32 | 12/08/1967
33 |
34 | Principal Investigator
35 |
36 |
37 | National Research Council
38 | WASHINGTON
39 | 204180006
40 |
41 | 2101 CONSTITUTION AVE NW
42 | United States
43 | District of Columbia
44 | DC
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800104.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Task Order For Partial Support of the Committee on Geography
6 | 12/08/1967
7 | 06/30/1971
8 | 30000
9 |
10 | BOA/Task Order
11 |
12 |
13 | 06030003
14 |
15 | Directorate For Geosciences
16 |
17 |
18 | Division Of Earth Sciences
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 12/08/1967
26 | 05/14/1969
27 |
28 | 6800104
29 |
30 | Jeremy
31 | Taylor
32 | jeremygtaylor@compuserve.com
33 | 12/08/1967
34 |
35 | Principal Investigator
36 |
37 |
38 | National Research Council
39 | WASHINGTON
40 | 204180006
41 |
42 | 2101 CONSTITUTION AVE NW
43 | United States
44 | District of Columbia
45 | DC
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/proquest/tagProcessing/tagFunctions.py:
--------------------------------------------------------------------------------
1 | singleLineEntries = {
2 | #The inverse cannot be done as new tags may be added that occupy mutiple line
3 | 'Author',
4 | 'Title',
5 | 'Advisor',
6 | 'Name',
7 | 'Committee member',
8 | 'Copyright',
9 | 'Country of publication',
10 | 'Database',
11 | 'Degree',
12 | 'Degree date',
13 | 'Department',
14 | 'Dissertation/thesis number',
15 | 'Document type',
16 | 'Language',
17 | 'Number of pages',
18 | 'Place of publication',
19 | 'ProQuest document ID',
20 | 'Publication year',
21 | 'School code',
22 | 'Source',
23 | 'Source type',
24 | 'University location',
25 | 'University/institution',
26 | 'ISBN',
27 | 'Publication subject',
28 | }
29 |
30 | def proQuestSubject(value):
31 | return value[0].split('; ')
32 |
33 | def proQuestIdentifier_Keyword(value):
34 | return value[0].split(', ')
35 |
36 | def proQuestClassification(value):
37 | return [tuple(s.split(': ')) for s in value[0].split('; ')]
38 |
39 | customTags = {
40 | 'Classification' : proQuestClassification,
41 | 'Identifier / keyword' : proQuestIdentifier_Keyword,
42 | 'Subject' : proQuestSubject
43 | }
44 |
45 | def proQuestTagToFunc(tag):
46 | """Takes a tag string, _tag_, and returns the processing function for its data. If their is not a predefined function returns the identity function (`lambda x : x`).
47 |
48 | # Parameters
49 |
50 | _tag_ : `str`
51 |
52 | > The requested tag
53 |
54 | # Returns
55 |
56 | `function`
57 |
58 | > A function to process the tag's data
59 | """
60 | if tag in singleLineEntries:
61 | return lambda x : x[0]
62 | elif tag in customTags:
63 | return customTags[tag]
64 | else:
65 | return lambda x : x
66 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800077.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Cytogenetics of Coccid Chromosome Systems and Related Nutritional and Biochemical Studies
6 | 10/01/1968
7 | 11/30/1974
8 | 99600
9 |
10 | Standard Grant
11 |
12 |
13 | 08070100
14 |
15 | Direct For Biological Sciences
16 |
17 |
18 | Div Of Molecular and Cellular Bioscience
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 10/04/1968
26 | 02/03/1975
27 |
28 | 6800077
29 |
30 | Spencer
31 | Brown
32 |
33 | 10/01/1968
34 |
35 | Principal Investigator
36 |
37 |
38 | University of California-Berkeley
39 | BERKELEY
40 | 947045940
41 | 5106428109
42 | Sponsored Projects Office
43 | United States
44 | California
45 | CA
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_wos.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import metaknowledge
4 |
5 | class TestWOS(unittest.TestCase):
6 | def setUp(self):
7 | self.R = metaknowledge.WOSRecord(simplePaperString)
8 | self.Rbad = metaknowledge.WOSRecord(simplePaperString[:-3])
9 |
10 | def test_creation(self):
11 | R = metaknowledge.WOSRecord(self.R._fieldDict)
12 | self.assertEqual(R, self.R)
13 | with open("metaknowledge/tests/testFile.isi") as f:
14 | f.readline()
15 | f.readline()
16 | R = metaknowledge.WOSRecord(f)
17 | self.assertEqual(R.id, 'WOS:A1979GV55600001')
18 | with self.assertRaises(TypeError):
19 | R = metaknowledge.WOSRecord(123456789)
20 |
21 | def test_badwrite(self):
22 | with self.assertRaises(metaknowledge.BadWOSRecord):
23 | self.Rbad.writeRecord('not a file object.txt')
24 |
25 | def test_dupDetection(self):
26 | s = simplePaperString[:-3] + "DE Example; testing\nPD APR\nER\n"
27 | R = metaknowledge.WOSRecord(s)
28 | self.assertTrue(R.bad)
29 |
30 | def test_WOSNum(self):
31 | self.assertEqual(self.R.UT, 'WOS:123317623000007')
32 | self.assertEqual(self.R.wosString, 'WOS:123317623000007')
33 |
34 |
35 | simplePaperString = """PT J
36 | AU John, D
37 | AF John, Doe
38 | TI Example Paper
39 | SO TOPICS IN COGNITIVE SCIENCE
40 | LA English
41 | DT Article
42 | DE Example; testing
43 | ID REAL; TIME
44 | AB This is a test.
45 | C1 UW, Ontario, Canada.
46 | RP John, D (reprint author), UW, Ontario, Canada.
47 | CR John D. 1999, TOPICS IN COGNITIVE SCIENCE
48 | J9 EXAMPLE
49 | JI examaple
50 | PD APR
51 | PY 2015
52 | VL 1
53 | BP 1
54 | EP 2
55 | DI 10.1111
56 | UT WOS:123317623000007
57 | ER
58 | """
59 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3551.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Mathematics for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 57778
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | Stanford University
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 2
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3551
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3546.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Mathematics for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 33462
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | San Jose State University Foundation
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 1
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3546
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3548.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Engineering Concepts (ECCP) for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 36489
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | Harvey Mudd College
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 1
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3548
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3550.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Field Biology and Ecology for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 56375
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | Sonoma State University
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 2
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3550
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3547.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Mathematics for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 66126
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | California State University-Fresno Foundation
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 2
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3547
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/69W3549.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Biological Science for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 61335
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | Humboldt State University Foundation
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 2
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3549
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/badXMLfile.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 | Summer Institute in Mathematics for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 66126
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | California State University-Fresno Foundation
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 2
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3547
56 |
57 |
58 |
--------------------------------------------------------------------------------
/metaknowledge/fileHandlers.py:
--------------------------------------------------------------------------------
1 | try:
2 | import collections.abc
3 | except ImportError:
4 | import collections
5 | collections.abc = collections
6 |
7 | from .mkExceptions import UnknownFile
8 |
9 | from .grants.cihrGrant import parserCIHRfile, isCIHRfile
10 | from .grants.nsercGrant import parserNSERCfile, isNSERCfile
11 | from .grants.nsfGrant import parserNSFfile, isNSFfile
12 | from .grants.baseGrant import parserFallbackGrantFile, isFallbackGrantFile
13 |
14 | from .WOS.wosHandlers import isWOSFile, wosParser
15 | from .medline.medlineHandlers import isMedlineFile, medlineParser
16 | from .proquest.proQuestHandlers import isProQuestFile, proQuestParser
17 | from .scopus.scopusHandlers import isScopusFile, scopusParser
18 |
19 | ProccessorTuple = collections.namedtuple("ProccessorTuple", ("type", "processor", "detector"))
20 |
21 | def unrecognizedFileHandler(fileName):
22 | raise UnknownFile("'{}' is not recognized my metaknowledge.".format(fileName))
23 |
24 | grantProcessors = [
25 | ProccessorTuple("NSFGrant", parserNSFfile, isNSFfile),
26 | ProccessorTuple("CIHRGrant", parserCIHRfile, isCIHRfile),
27 | ProccessorTuple("NSERCGrant", parserNSERCfile, isNSERCfile),
28 | ProccessorTuple("FallbackGrant", parserFallbackGrantFile, isFallbackGrantFile),
29 | #Raises exception if reached, to indicate the end of the list
30 | #This simplifes things at the other end
31 | ProccessorTuple("Invalid File", None, unrecognizedFileHandler),
32 | ]
33 |
34 | recordHandlers = [
35 | ProccessorTuple("WOSRecord", wosParser, isWOSFile),
36 | ProccessorTuple("MedlineRecord", medlineParser, isMedlineFile),
37 | ProccessorTuple("ProQuestRecord", proQuestParser, isProQuestFile),
38 | ProccessorTuple("ScopusRecord", scopusParser, isScopusFile),
39 | #Raises exception if reached, to indicate the end of the list
40 | #This simplifes things at the other end
41 | ProccessorTuple("Invalid File", None, unrecognizedFileHandler),
42 | ]
43 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_grants.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import metaknowledge
4 |
5 | class TestGrants(unittest.TestCase):
6 | def setUp(self):
7 | self.Grant1 = metaknowledge.MedlineGrant("U10 HD04267/HG/NICHD NHI HHS/Unit State")
8 | self.Grant2 = metaknowledge.MedlineGrant("HG/NICHD NHI HHS/Unit State")
9 |
10 | def test_isGrant(self):
11 | self.assertIsInstance(self.Grant1, metaknowledge.Grant)
12 | self.assertIsInstance(self.Grant2, metaknowledge.Grant)
13 |
14 | def test_init(self):
15 | Gshort = metaknowledge.MedlineGrant("U10 HD04267/NICHD NHI HHS/Unit State")
16 | Gmid = metaknowledge.MedlineGrant("U10 /HD04267HG/NICHD NHI HHS/Unit State")
17 | Glong = metaknowledge.MedlineGrant("U/10 /HD042/67HG/NICHD NHI HHS/Unit State")
18 | self.assertNotEqual(Gshort, Glong)
19 | self.assertNotEqual(Gmid, Glong)
20 |
21 | def test_bad(self):
22 | G = metaknowledge.MedlineGrant("NICHD NHI HHSUnit State")
23 | self.assertTrue(G.bad)
24 |
25 | def test_eq(self):
26 | self.assertNotEqual(1, self.Grant2)
27 | self.assertNotEqual(self.Grant1, self.Grant2)
28 |
29 | def test_hash(self):
30 | self.assertIsInstance(hash(self.Grant1), int)
31 |
32 | def test_orgin(self):
33 | self.assertEqual("U10 HD04267/HG/NICHD NHI HHS/Unit State", self.Grant1.original)
34 |
35 | def test_rerp(self):
36 | self.assertEqual(repr(self.Grant1), "")
37 |
38 | def test_NSF(self):
39 | GC = metaknowledge.GrantCollection("metaknowledge/tests/nsfTestFiles")
40 | G = GC.networkMultiMode(GC.tags())
41 | self.assertEqual(metaknowledge.graphStats(G, sentenceString = True), "The graph has 244 nodes, 2077 edges, 0 isolates, 19 self loops, a density of 0.0703974 and a transitivity of 0.497237")
42 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/6800039.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Tides and Tidal Friction in the Bight of Abaco, Bahamas
6 | 11/01/1968
7 | 10/31/1970
8 | 62700
9 |
10 | Standard Grant
11 |
12 |
13 | 06040303
14 |
15 | Directorate For Geosciences
16 |
17 |
18 | Division Of Ocean Sciences
19 |
20 |
21 |
22 | name not available
23 |
24 |
25 | 11/08/1968
26 | 11/08/1968
27 |
28 | 6800039
29 |
30 |
31 | DATA NOT AVAILABLE
32 |
33 | 11/01/1968
34 |
35 | Principal Investigator
36 |
37 |
38 |
39 | DATA NOT AVAILABLE
40 |
41 | 11/01/1968
42 |
43 | Co-Principal Investigator
44 |
45 |
46 | Nova Southeastern University
47 | FORT LAUDERDALE
48 | 333147796
49 | 9542625366
50 | 3301 COLLEGE AVE
51 | United States
52 | Florida
53 | FL
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/docs/CLI.rst:
--------------------------------------------------------------------------------
1 | ######################
2 | Command Line Tool
3 | ######################
4 |
5 | metaknowledge comes with a command-line application named :code:`metaknowledge`. This provides a simple interface to the python package and allows the generation of most of the networks along with ways to manage the records themselves.
6 |
7 | Overview
8 | ^^^^^^^^
9 | To start the tool run: ::
10 |
11 | $ metaknowledge
12 |
13 | You will be asked for the location of the file or files to use. These can be given by paths to the files or paths to directories with the files. Note: if a directory is used all files with the proper header will be read.
14 |
15 | You will then be asked what to do with the records: ::
16 |
17 | A collection of 537 WOS records has been created
18 | What do you wish to do with it:
19 | 1) Make a graph
20 | 2) Write the collection as a single WOS style file
21 | 3) Write the collection as a single WOS style file and make a graph
22 | 4) Write the collection as a single csv file
23 | 5) Write the collection as a single csv file and make a graph
24 | 6) Write all the citations to a single file
25 | 7) Go over non-journal citations
26 | i) open python console
27 | q) quit
28 | What is your selection:
29 |
30 | Select the option you want by typing the corresponding number or character and pressing enter. The menus after this step are controlled this way as well.
31 |
32 | The second last option :code:`i)` will start an interactive python session will all the objects you have created thus far accessible, their names will be given when it starts.
33 |
34 | The last option :code:`q)` will cause the program to exit. You can also quit at any time by pressing :code:`ctr-c`.
35 |
36 | Questions?
37 | ^^^^^^^^^^
38 | If you find bugs, or have questions, please write to:
39 |
40 | | Reid McIlroy-Young `reid@reidmcy.com `_
41 | | John McLevey `john.mclevey@uwaterloo.ca `_
42 |
43 | License
44 | ^^^^^^^
45 | *metaknowledge* is free and open source software, distributed under the GPL License.
46 |
47 | .. toctree::
48 | :maxdepth: 2
49 | :caption: CLI:
50 |
--------------------------------------------------------------------------------
/docs/documentation/overview.rst:
--------------------------------------------------------------------------------
1 | ########
2 | Overview
3 | ########
4 |
5 | This package can read the files downloaded from the Thomson Reuters’ `Web of Science `_ (*WOS*), Elsevier’s `Scopus `_, `ProQuest `_ and Medline files from `PubMed `_. These files contain entries on the metadata of scientific records, such as authors, title, and citations. *metaknowledge* can also read grants from various organizations including *NSF* and *NSERC* which are handled similarly to records.
6 |
7 | The `metaknowledge.RecordCollection <./classes/RecordCollection.html#recordcollection-collectionwithids>`_ class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the `metaknowledge.Record <./classes/Record.html#record-mapping-hashable>`_ class that contains the results of the parsing of the record.
8 |
9 | The files read by *metaknowledge* are a databases containing a series of tags (implicitly or explicitly), e.g. ``'TI'`` is the title for WOS. Each tag has one or more values and metaknowledge can read them and extract useful information. As the tags differ between providers a small set of values can be accessed by special tags, the tags are listed in ``commonRecordFields``. These special tags can act on the whole ``Record`` and as such may contain information provided by any number of other tags.
10 |
11 | Citations are handled by a special `Citation <./classes/Citation.html#module-metaknowledge.citation>`_ class. This class can parse the citations given by *WOS* and journals cited by *Scopus* and allows for better comparisons when they are used in graphs.
12 |
13 | Note for those reading the docstrings metaknowledge’s docs are written in markdown and are processed to produce the documentation found at `metaknowledge.readthedocs.io `_, but you should have no problem reading them from the help function.
14 |
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 | :caption: Overview:
19 |
--------------------------------------------------------------------------------
/metaknowledge/tests/OnePaperNoCites.isi:
--------------------------------------------------------------------------------
1 | FN Thomson Reuters Web of Science™
2 | VR 1.0
3 | PT J
4 | AU Marghetis, T
5 | Nunez, R
6 | AF Marghetis, Tyler
7 | Nunez, Rafael
8 | TI The Motion Behind the Symbols: A Vital Role for Dynamism in the
9 | Conceptualization of Limits and Continuity in Expert Mathematics
10 | SO TOPICS IN COGNITIVE SCIENCE
11 | LA English
12 | DT Article
13 | DE Mathematical practice; Metaphor; Fictive motion; Gesture; Cauchy;
14 | Calculus; Conceptualization
15 | ID REPRESENTATIONS; GESTURE; REAL; TIME
16 | AB The canonical history of mathematics suggests that the late 19th-century arithmetization of calculus marked a shift away from spatial-dynamic intuitions, grounding concepts in static, rigorous definitions. Instead, we argue that mathematicians, both historically and currently, rely on dynamic conceptualizations of mathematical concepts like continuity, limits, and functions. In this article, we present two studies of the role of dynamic conceptual systems in expert proof. The first is an analysis of co-speech gesture produced by mathematics graduate students while proving a theorem, which reveals a reliance on dynamic conceptual resources. The second is a cognitive-historical case study of an incident in 19th-century mathematics that suggests a functional role for such dynamism in the reasoning of the renowned mathematician Augustin Cauchy. Taken together, these two studies indicate that essential concepts in calculus that have been defined entirely in abstract, static terms are nevertheless conceptualized dynamically, in both contemporary and historical practice.
17 | C1 [Marghetis, Tyler; Nunez, Rafael] Univ Calif San Diego, Dept Cognit Sci, La Jolla, CA 92093 USA.
18 | RP Marghetis, T (reprint author), Univ Calif San Diego, Dept Cognit Sci, La Jolla, CA 92093 USA.
19 | EM tmarghet@cogsci.ucsd.edu
20 | NR 45
21 | TC 4
22 | Z9 4
23 | PU WILEY-BLACKWELL
24 | PI HOBOKEN
25 | PA 111 RIVER ST, HOBOKEN 07030-5774, NJ USA
26 | SN 1756-8757
27 | J9 TOP COGN SCI
28 | JI Top. Cogn. Sci.
29 | PD APR
30 | PY 2013
31 | VL 5
32 | IS 2
33 | BP 299
34 | EP 316
35 | DI 10.1111/tops.12013
36 | PG 18
37 | WC Psychology, Experimental
38 | SC Psychology
39 | GA 126NA
40 | UT WOS:000317623000007
41 | PM 23460466
42 | ER
43 |
44 | EF
45 |
--------------------------------------------------------------------------------
/metaknowledge/scopus/tagProcessing/tagFunctions.py:
--------------------------------------------------------------------------------
1 | from ...grants.scopusGrant import ScopusGrant
2 | from ...citation import Citation
3 |
4 | def commaSpaceSeperated(val):
5 | return val.split(', ')
6 |
7 | def semicolonSpaceSeperated(val):
8 | return val.split('; ')
9 |
10 | def semicolonSeperated(val):
11 | return val.split(';')
12 |
13 | def stringValue(val):
14 | return val
15 |
16 | def integralValue(val):
17 | return int(val)
18 |
19 | def grantValue(val):
20 | return [ScopusGrant(s) for s in val.split('; ')]
21 |
22 | def citeValue(val):
23 | return [Citation(s, scopusMode = True) for s in val.split('; ')]
24 |
25 | scopusTagToFunction = {
26 | 'Authors' : commaSpaceSeperated,
27 | 'Title' : stringValue,
28 | 'Year' : integralValue,
29 | 'Source title' : stringValue,
30 | 'Volume' : stringValue,
31 | 'Issue' : stringValue,
32 | 'Art. No.' : stringValue,
33 | 'Page start' : stringValue,
34 | 'Page end' : stringValue,
35 | 'Page count' : integralValue,
36 | 'Cited by' : integralValue,
37 | 'DOI' : stringValue,
38 | 'Link' : stringValue,
39 | 'Affiliations' : stringValue,
40 | 'Authors with affiliations' : semicolonSpaceSeperated,
41 | 'Abstract' : stringValue,
42 | 'Author Keywords' : semicolonSpaceSeperated,
43 | 'Index Keywords' : semicolonSpaceSeperated,
44 | 'Molecular Sequence Numbers' : stringValue,
45 | 'Chemicals/CAS' : stringValue,
46 | 'Tradenames' : semicolonSpaceSeperated,
47 | 'Manufacturers' : semicolonSpaceSeperated,
48 | 'Funding Details' : grantValue,
49 | 'References' : citeValue,
50 | 'Correspondence Address' : semicolonSpaceSeperated,
51 | 'Editors' : stringValue,
52 | 'Sponsors' : semicolonSeperated,
53 | 'Publisher' : stringValue,
54 | 'Conference name' : stringValue,
55 | 'Conference date' : stringValue,
56 | 'Conference location' : stringValue,
57 | 'Conference code' : integralValue,
58 | 'ISSN' : stringValue,
59 | 'ISBN' : semicolonSpaceSeperated,
60 | 'CODEN' : stringValue,
61 | 'PubMed ID' : stringValue,
62 | 'Language of Original Document' : semicolonSpaceSeperated,
63 | 'Abbreviated Source Title' : stringValue,
64 | 'Document Type' : stringValue,
65 | 'Source' : stringValue,
66 | 'EID' : stringValue,
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/inheritance-structure.dot:
--------------------------------------------------------------------------------
1 | /*
2 | Class diagrams for metaknowledge
3 | */
4 |
5 | digraph Records {
6 | charset="utf-8";
7 | rankdir=BT;
8 | node [shape=record, fontname="Source Code Pro"];
9 | edge [arrowhead=empty, arrowsize=1];
10 | /*
11 | Object [label="{Object|builtin}"];
12 | */
13 | Mapping [label="{Mapping|collections.abc}"]
14 | Hashable [label="{Hashable|collections.abc}"]
15 | ABCMeta [label="{ABCMeta|abc}", style=dashed];
16 | MutableMapping [label="{MutableMapping|collections.abc}"]
17 |
18 | Record [label="{Record|metaknowledge}"];
19 | ExtendedRecord [label="{ExtendedRecord|metaknowledge}"];
20 | Grant [label="{Grant|metaknowledge}"];
21 |
22 | DefaultGrant [label="{DefaultGrant|metaknowledge}"];
23 | CIHRGrant [label="{CIHRGrant|metaknowledge}"];
24 | MedlineGrant [label="{MedlineGrant|metaknowledge}"];
25 | NSERCGrant [label="{NSERCGrant|metaknowledge}"];
26 | ScopusGrant [label="{ScopusGrant|metaknowledge}"];
27 |
28 | WOSRecord [label="{WOSRecord|metaknowledge.WOS}"];
29 | ProQuestRecord [label="{ProQuestRecord|metaknowledge.proquest}"];
30 | MedlineRecord [label="{MedlineRecord|metaknowledge.medline}"];
31 | ScopusRecord [label="{ScopusRecord|metaknowledge.scopus}"];
32 |
33 | MutableSet [label="{MutableSet|collections.abc}"];
34 | Collection [label="{Collection|metaknowledge}"];
35 | CollectionWithIDs [label="{CollectionWithIDs|metaknowledge}"];
36 | RecordCollection [label="{RecordCollection|metaknowledge}"];
37 | GrantCollection [label="{GrantCollection|metaknowledge}"];
38 |
39 | /*
40 | Mapping -> Object;
41 | Hashable -> Object;
42 | */
43 |
44 | Record -> Hashable;
45 | Record -> Mapping;
46 |
47 | Collection -> MutableSet;
48 | Collection -> Hashable;
49 |
50 | Grant -> Record;
51 | Grant -> MutableMapping;
52 |
53 | DefaultGrant -> Grant;
54 | CIHRGrant -> Grant;
55 | MedlineGrant -> Grant;
56 | NSERCGrant -> Grant;
57 | ScopusGrant -> Grant
58 |
59 | ExtendedRecord -> ABCMeta [style=dashed];
60 | ExtendedRecord -> Record;
61 | WOSRecord -> ExtendedRecord;
62 | ProQuestRecord -> ExtendedRecord;
63 | MedlineRecord -> ExtendedRecord;
64 | ScopusRecord -> ExtendedRecord;
65 |
66 | CollectionWithIDs -> Collection;
67 | RecordCollection -> CollectionWithIDs;
68 | GrantCollection -> CollectionWithIDs;
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/metaknowledge/medline/tagProcessing/specialFunctions.py:
--------------------------------------------------------------------------------
1 | from ...WOS.tagProcessing.helpFuncs import getMonth
2 |
3 | import re
4 |
5 | def year(R):
6 | try:
7 | return int(R['DP'].split(' ')[0])
8 | except ValueError:
9 | yVal = re.search(r'-?\d{1,4}', R['DP'].split(' ')[0])
10 | if yVal is None:
11 | return 0
12 | else:
13 | return(int(yVal.group(0)))
14 |
15 | def month(R):
16 | try:
17 | m = R['DP'].split(' ')[1]
18 | except IndexError:
19 | raise KeyError("Unable to extract a month")
20 | else:
21 | return getMonth(m)
22 |
23 | def volume(R):
24 | """Returns the first number/word of the volume field, hopefully trimming something like: `'49 Suppl 20'` to `49`"""
25 | return R['VI'].split(' ')[0]
26 |
27 | def beginningPage(R):
28 | """As pages may not be given as numbers this is the most accurate this function can be"""
29 | p = R['PG']
30 | if p.startswith('suppl '):
31 | p = p[6:]
32 | return p.split(' ')[0].split('-')[0].replace(';', '')
33 |
34 |
35 | def DOI(R):
36 | ids = R['AID']
37 | for a in ids:
38 | if a.endswith(' [doi]'):
39 | return a[:-6]
40 | raise KeyError("No DOI number found")
41 |
42 | def address(R):
43 | """Gets the first address of the first author"""
44 | return R['AD'][R['AU'][0]][0]
45 |
46 | medlineSpecialTagToFunc = {
47 | 'year' : year,
48 | 'month' : month,
49 | 'volume' : volume,
50 | 'beginningPage' : beginningPage,
51 | 'DOI' : DOI,
52 | 'address' : address,
53 |
54 | 'j9' : lambda R : R['TA'], #remaps to the closests field TA, but J9 != TA
55 |
56 | #'citations' : lambda R: None, #Medline does not have citations
57 |
58 | 'grants' : lambda R: R['GR'],#This is the basis for the 'grants' special function
59 |
60 | 'selfCitation' : lambda R: R.createCitation(), #just remaps to the correct function
61 | 'authorsShort' : lambda R: R['AU'], #just remaps to the correct name
62 | 'authorsFull' : lambda R : R['FAU'], #just remaps to the correct name
63 | 'title' : lambda R : R['TI'], #just remaps to the correct name
64 | 'journal' : lambda R : R['JT'], #just remaps to the correct name
65 | 'keywords' : lambda R : R['OT'], #just remaps to the correct name
66 | 'abstract' : lambda R : R['AB'], #just remaps to the correct name
67 | 'id' : lambda R : R.id, #just remaps to the correct name
68 | }
69 |
--------------------------------------------------------------------------------
/metaknowledge/genders/nameGender.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import zipfile
3 | import io
4 | import csv
5 | import os.path
6 | import urllib
7 |
8 | from ..mkExceptions import GenderException
9 |
10 | dataURL = 'https://github.com/UWNETLAB/globalnamedata/archive/0.3.zip'
11 | americanNamesPath = 'globalnamedata-0.3/assets/usprocessed.csv'
12 | ukNamesPath = 'globalnamedata-0.3/assets/ukprocessed.csv'
13 |
14 | targetFilePath = os.path.join(os.path.normpath(os.path.dirname(__file__)), 'namesData.csv')
15 |
16 | csvFields = [
17 | 'Name',
18 | 'years.appearing',
19 | 'count.male',
20 | 'count.female',
21 | 'prob.gender',
22 | 'obs.male',
23 | 'est.male',
24 | 'upper',
25 | 'lower'
26 | ]
27 |
28 | #global to reduce need to reload dict
29 | mappingDict = None
30 |
31 | def downloadData(useUK = False):
32 | zipFile = io.BytesIO(urllib.request.urlopen(dataURL).read())
33 | if useUK:
34 | namesFile = zipfile.ZipFile(zipFile).open(ukNamesPath)
35 | else:
36 | namesFile = zipfile.ZipFile(zipFile).open(americanNamesPath)
37 | try:
38 | with open(targetFilePath, 'wb') as f:
39 | f.write(namesFile.read())
40 | except PermissionError:
41 | raise PermissionError("Can not write to {}, you try rerunning with higher privileges".format(targetFilePath))
42 |
43 | def getMapping(useUK = False):
44 | if not os.path.isfile(targetFilePath):
45 | downloadData(useUK)
46 | retDict = {}
47 | with open(targetFilePath) as f:
48 | reader = csv.DictReader(f, fieldnames = csvFields)
49 | next(reader)
50 | for line in reader:
51 | retDict[line['Name'].title()] = line['prob.gender']
52 | return retDict
53 |
54 | def nameStringGender(s, noExcept = False):
55 | """Expects `first, last`"""
56 | global mappingDict
57 | try:
58 | first = s.split(', ')[1].split(' ')[0].title()
59 | except IndexError:
60 | if noExcept:
61 | return 'Unknown'
62 | else:
63 | return GenderException("The given String: '{}' does not have a last name, first name pair in with a ', ' seperation.".format(s))
64 | if mappingDict is None:
65 | mappingDict = getMapping()
66 | return mappingDict.get(first, 'Unknown')
67 |
68 | def recordGenders(R):
69 | return {auth : nameStringGender(auth, noExcept = True) for auth in R.get('authorsFull', [])}
70 |
--------------------------------------------------------------------------------
/metaknowledge/grants/cihrGrant.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import csv
3 |
4 | from .baseGrant import Grant, csvAndLinesReader
5 | from ..mkExceptions import BadGrant
6 |
7 | class CIHRGrant(Grant):
8 | def __init__(self, original, grantdDict, sFile, sLine):
9 | bad = False
10 | error = None
11 | if grantdDict.get('PI Names', '') == '':
12 | bad = True
13 | error = BadGrant("Missing 'PI Names'")
14 |
15 | #Source file - line number - 6 character long numeric hash
16 | idValue = "{}-l:{}-{:0=20}".format(os.path.basename(sFile), sLine, hash(original))
17 |
18 | Grant.__init__(self, original, grantdDict, idValue, bad, error, sFile = sFile, sLine = sLine)
19 |
20 | def isCIHRfile(fileName, useFileName = True):
21 | if useFileName and not os.path.basename(fileName).startswith('cihr_'):
22 | return False
23 | try:
24 | with open(fileName, 'r', encoding = 'latin-1') as openfile:
25 | if not openfile.readline().startswith('Search Criteria'):
26 | return False
27 | elif not openfile.readline().endswith(',,,,,,,,,\n'):
28 | return False
29 | elif not openfile.readline().endswith(',,,,,,,,,\n'):
30 | return False
31 | reader = csv.DictReader(openfile, fieldnames = None, dialect = 'excel')
32 | for row in reader:
33 | if 'PI Names' not in row:
34 | return False
35 | except (StopIteration, UnicodeDecodeError):
36 | return False
37 | else:
38 | return True
39 |
40 | def parserCIHRfile(fileName):
41 | grantSet = set()
42 | error = None
43 | try:
44 | with open(fileName, 'r', encoding = 'latin-1') as openfile:
45 | f = enumerate(openfile, start = 1)
46 | next(f)
47 | next(f)
48 | next(f)
49 | reader = csvAndLinesReader(f, fieldnames = None, dialect = 'excel')
50 | for lineNum, lineString, lineDict in reader:
51 | grantSet.add(CIHRGrant(lineString, lineDict, sFile = fileName, sLine = lineNum))
52 | except Exception:
53 | if error is None:
54 | error = BadGrant("The file '{}' is having decoding issues. It may have been modifed since it was downloaded or not be a CIHR grant file.".format(fileName))
55 | except KeyboardInterrupt as e:
56 | error = e
57 | finally:
58 | if isinstance(error, KeyboardInterrupt):
59 | raise error
60 | return grantSet, error
61 |
--------------------------------------------------------------------------------
/metaknowledge/mkExceptions.py:
--------------------------------------------------------------------------------
1 | class mkException(Exception):
2 | pass
3 |
4 | class CollectionTypeError(mkException, TypeError):
5 | pass
6 |
7 | class RCTypeError(mkException, TypeError):
8 | pass
9 |
10 | class TagError(mkException):
11 | pass
12 |
13 | class RCValueError(mkException):
14 | pass
15 |
16 | class BadInputFile(mkException):
17 | pass
18 |
19 | class BadRecord(mkException):
20 | pass
21 |
22 | class BadPubmedRecord(mkException):
23 | pass
24 |
25 | class BadPubmedFile(mkException):
26 | pass
27 |
28 | class BadScopusRecord(mkException):
29 | pass
30 |
31 | class BadScopusFile(mkException):
32 | pass
33 |
34 | class BadProQuestRecord(mkException):
35 | pass
36 |
37 | class BadProQuestFile(mkException):
38 | pass
39 |
40 | class RecordsNotCompatible(mkException):
41 | pass
42 |
43 | class JournalDataBaseError(mkException):
44 | pass
45 |
46 | class GenderException(mkException):
47 | pass
48 |
49 | class cacheError(mkException):
50 | """Exception raised when loading a cached RecordCollection fails, should only be seen inside metaknowledge and always be caught."""
51 | pass
52 |
53 | class BadWOSRecord(BadRecord):
54 | """Exception thrown by the [record parser](../modules/WOS.html#metaknowledge.WOS.recordWOS.recordParser) to indicate a mis-formated record. This occurs when some component of the record does not parse. The messages will be any of:
55 |
56 | * _Missing field on line (line Number):(line)_, which indicates a line was to short, there should have been a tag followed by information
57 |
58 | * _End of file reached before ER_, which indicates the file ended before the 'ER' indicator appeared, 'ER' indicates the end of a record. This is often due to a copy and paste error.
59 |
60 | * _Duplicate tags in record_, which indicates the record had 2 or more lines with the same tag.
61 |
62 | * _Missing WOS number_, which indicates the record did not have a 'UT' tag.
63 |
64 | Records with a BadWOSRecord error are likely incomplete or the combination of two or more single records.
65 | """
66 | pass
67 |
68 | class BadWOSFile(Warning):
69 | """Exception thrown by wosParser for mis-formated files
70 | """
71 | pass
72 |
73 | class BadCitation(Warning):
74 | """
75 | Exception thrown by Citation
76 | """
77 | pass
78 |
79 | class BadGrant(mkException):
80 | pass
81 |
82 |
83 | class GrantCollectionException(mkException):
84 | pass
85 |
86 | class UnknownFile(mkException):
87 | pass
88 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import os.path
3 | import re
4 | from setuptools import setup, find_packages
5 |
6 | with open('metaknowledge/constants.py') as f:
7 | versionString = re.search(r"__version__ = '(.+)'", f.read()).group(1)
8 |
9 | long_descriptionLOC = "README.md"
10 | if os.path.isfile(long_descriptionLOC):
11 | long_description = open(long_descriptionLOC).read()
12 | else:
13 | long_description = ''
14 |
15 | if __name__ == '__main__':
16 |
17 | setup(name='metaknowledge',
18 | version = versionString,
19 | description = "A library for handling Web of science files",
20 | long_description = long_description,
21 | long_description_content_type = 'text/markdown',
22 | author="Reid McIlroy-Young, John McLevey",
23 | author_email = "rmcilroy@uwaterloo.ca, john.mclevey@uwaterloo.ca",
24 | license = 'GPL',
25 | url="https://github.com/networks-lab/metaknowledge",
26 | download_url = "https://github.com/networks-lab/metaknowledge/archive/{}.tar.gz".format(versionString),
27 | keywords= 'WOS',
28 | classifiers = [
29 | 'Development Status :: 5 - Production/Stable',
30 | 'Environment :: Console',
31 | 'Environment :: MacOS X',
32 | 'Intended Audience :: Science/Research',
33 | 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
34 | 'Operating System :: MacOS :: MacOS X',
35 | 'Operating System :: POSIX',
36 | 'Operating System :: Microsoft :: Windows',
37 | 'Programming Language :: Python :: 3 :: Only',
38 | 'Topic :: Education',
39 | 'Topic :: Scientific/Engineering :: Information Analysis',
40 | 'Topic :: Sociology',
41 | 'Topic :: Text Processing',
42 | ],
43 | install_requires= ['networkx'],
44 | extras_require={'contour' : ['matplotlib', 'scipy', 'numpy']},
45 | packages = find_packages(),
46 | entry_points={'console_scripts': [
47 | 'metaknowledge = metaknowledge.bin:mkCLI',
48 | 'metaknowledge-mdToNb = metaknowledge.bin:mkMdToNb',
49 | 'metaknowledge-DocsGen = metaknowledge.bin:mkDocs',
50 | ]},
51 | test_suite='metaknowledge.tests',
52 | )
53 | print("metaknowledge installed\nIf you intend to use the gender name data or journal abbreviations facilities it is\nadvisable to download and setup the required files now.\nRunning following line in your interpreter will do it:\nimport metaknowledge;metaknowledge.downloadExtras()")
54 |
--------------------------------------------------------------------------------
/vagrant/bootstrap:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
3 | apt-get update
4 | apt-get install -y git python3 libfreetype6-dev libpng12-dev python3-setuptools python3-dev pkg-config python3-numpy python3-scipy r-base libzmq3-dev
5 | sudo easy_install3 pip
6 | echo "alias python='python3'" >> ~/.bashrc
7 | echo "alias pip='pip3'" >> ~/.bashrc
8 | echo "alias ipython='ipython3'" >> ~/.bashrc
9 | echo "alias easy_install='easy_install3'" >> ~/.bashrc
10 | source ~/.bashrc
11 |
12 | echo "alias python='python3'" >> /home/vagrant/.bashrc
13 | echo "alias pip='pip3'" >> /home/vagrant/.bashrc
14 | echo "alias ipython='ipython3'" >> /home/vagrant/.bashrc
15 | echo "alias easy_install='easy_install3'" >> /home/vagrant/.bashrc
16 |
17 | pip3 install networkx ipython matplotlib pandas seaborn igraph jupyter metaknowledge #Add to pip here
18 | echo "Getting WOS J29 database"
19 | python3 -c "import metaknowledge.journalAbbreviations; metaknowledge.journalAbbreviations.updatej9DB()"
20 |
21 | echo "Setting up git repo"
22 | cd /vagrant
23 |
24 | git init
25 | git config user.name "Student"
26 | git config user.email "Student@uwaterloo.ca" #Not a real address
27 | git add .
28 | git commit -m "Setting up local student repo"
29 | git remote add origin https://github.com/networks-lab/metaknowledge.git
30 | git fetch --all
31 | git reset --hard origin/master
32 | git pull origin master
33 | git commit -m "Syncing student with github"
34 |
35 | mkdir -p /vagrant/vagrant/logs/
36 |
37 | echo "#!/bin/bash" > /etc/rc.local
38 | echo "while [ ! -d /vagrant/vagrant ] ; do sleep 1 ; done" >> /etc/rc.local
39 | echo "/usr/local/bin/jupyter-notebook --no-browser --ip='*' --port=8888 --notebook-dir=/notebooks 2>&1 | tee -a /vagrant/vagrant/logs/ipythonNoteBook.log /home/vagrant/ipythonNoteBook.log &" >> /etc/rc.local
40 | echo "echo 'Starting Notebook server'" >> /etc/rc.local
41 | echo "echo 'Pulling metaknowledge'" >> /etc/rc.local
42 | echo "git -C /vagrant pull -q origin master" >> /etc/rc.local
43 | echo "/vagrant/vagrant/updates.sh" >> /etc/rc.local
44 | echo "echo 'Updating python packages'" >> /etc/rc.local
45 | echo "pip3 install --upgrade networkx ipython matplotlib pandas seaborn igraph jupyter metaknowledge"
46 | echo "exit 0" >> /etc/rc.local
47 | sudo chown root /etc/rc.local
48 | sudo chmod 755 /etc/rc.local
49 |
50 | echo "#!/bin/bash -e" > /home/vagrant/ipythonStartup.sh
51 | echo "/etc/rc.local" >> /home/vagrant/ipythonStartup.sh
52 | chmod +x /home/vagrant/ipythonStartup.sh
53 |
54 | /etc/rc.local
55 |
56 | echo "provisioning done"
57 | echo "Notebook Server running at http://localhost:1159"
58 | exit 0
59 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_medline.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import os.path
4 | import os
5 |
6 | import metaknowledge
7 | import metaknowledge.medline
8 |
9 |
10 | class TestMedline(unittest.TestCase):
11 | def setUp(self):
12 | metaknowledge.VERBOSE_MODE = False
13 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/medline_test.medline")
14 | self.R = self.RC.peek()
15 |
16 | def test_creation(self):
17 | Rstart = self.RC.peek()
18 | R = metaknowledge.MedlineRecord(Rstart._fieldDict)
19 | self.assertEqual(R, Rstart)
20 | with open("metaknowledge/tests/medline_test.medline") as f:
21 | f.readline()
22 | R = metaknowledge.MedlineRecord(f)
23 | self.assertEqual(R.id, 'PMID:26524502')
24 | s = f.read()
25 | R = metaknowledge.MedlineRecord(s)
26 | self.assertEqual(R.id, 'PMID:25802386')
27 | with self.assertRaises(TypeError):
28 | R = metaknowledge.MedlineRecord(12345678)
29 | R = metaknowledge.MedlineRecord("PMID- 25802386\njhgjhghjbgjhgjghhjgjh\nhdghjdfgjdfsgjh\n")
30 | self.assertTrue(R.bad)
31 | with self.assertRaises(metaknowledge.BadPubmedRecord):
32 | R.writeRecord('not a file')
33 |
34 | def test_isCollection(self):
35 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection)
36 |
37 | def test_ismedline(self):
38 | self.assertIsInstance(self.R, metaknowledge.MedlineRecord)
39 |
40 | def test_bibWrite(self):
41 | fileName = "tempFile.bib.tmp"
42 | self.RC.writeBib(fileName)
43 | self.assertEqual(os.path.getsize(fileName), 606182)
44 | self.RC.writeBib(fileName, wosMode = True, reducedOutput = True)
45 | self.assertEqual(os.path.getsize(fileName), 456151)
46 | os.remove("tempFile.bib.tmp")
47 |
48 | def test_specials(self):
49 | for R in self.RC:
50 | for s in metaknowledge.medline.medlineSpecialTagToFunc.keys():
51 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation))
52 |
53 | def test_allFields(self):
54 | for R in self.RC:
55 | for k,v in R.items():
56 | self.assertIsInstance(k, str)
57 | self.assertIsInstance(v, (str, list, dict))
58 |
59 | def test_write(self):
60 | fileName = 'tempFile.medline.tmp'
61 | self.RC.writeFile(fileName)
62 | self.assertEqual(os.path.getsize(fileName), os.path.getsize("metaknowledge/tests/medline_test.medline") + 526) #Not quite identical
63 | os.remove(fileName)
64 |
--------------------------------------------------------------------------------
/metaknowledge/medline/tagProcessing/tagNames.py:
--------------------------------------------------------------------------------
1 | from ...WOS.tagProcessing.helpFuncs import makeBiDirectional
2 |
3 |
4 | authorBasedTags = [
5 | 'AD',
6 | 'AUID',
7 | ]
8 |
9 | tagNameDict = {
10 | "Abstract" : "AB",
11 | "CopyrightInformation" : "CI",
12 | "Affiliation" : "AD",
13 | "InvestigatorAffiliation" : "IRAD",
14 | "ArticleIdentifier" : "AID",
15 | "Author" : "AU",
16 | "AuthorIdentifier" : "AUID",
17 | "FullAuthor" : "FAU",
18 | "BookTitle" : "BTI",
19 | "CollectionTitle" : "CTI",
20 | "CorporateAuthor" : "CN",
21 | "CreateDate" : "CRDT",
22 | "DateCompleted" : "DCOM",
23 | "DateCreated" : "DA",
24 | "DateLastRevised" : "LR",
25 | "DateElectronicPublication" : "DEP",
26 | "DatePublication" : "DP",
27 | "Edition" : "EN",
28 | "Editor" : "ED",
29 | "Editor" : "FED",
30 | "EntrezDate" : "EDAT",
31 | "GeneSymbol" : "GS",
32 | "GeneralNote" : "GN",
33 | "GrantNumber" : "GR",
34 | "Investigator" : "IR",
35 | "InvestigatorFull" : "FIR",
36 | "ISBN" : "ISBN",
37 | "ISSN" : "IS",
38 | "Issue" : "IP",
39 | "JournalTitleAbbreviation" : "TA",
40 | "JournalTitle" : "JT",
41 | "Language" : "LA",
42 | "LocationIdentifier" : "LID",
43 | "ManuscriptIdentifier" : "MID",
44 | "MeSHDate" : "MHDA",
45 | "MeSHTerms" : "MH",
46 | "NLMID" : "JID",
47 | "NumberReferences" : "RF",
48 | "OtherAbstract" : "OAB",
49 | "OtherAbstract" : "OABL",
50 | "OtherCopyright" : "OCI",
51 | "OtherID" : "OID",
52 | "OtherTerm" : "OT",
53 | "OtherTermOwner" : "OTO",
54 | "Owner" : "OWN",
55 | "Pagination" : "PG",
56 | "PersonalNameSubject" : "PS",
57 | "FullPersonalNameSubject" : "FPS",
58 | "PlacePublication" : "PL",
59 | "PublicationHistoryStatus" : "PHST",
60 | "PublicationStatus" : "PST",
61 | "PublicationType" : "PT",
62 | "PublishingModel" : "PUBM",
63 | "PubMedCentralIdentifier" : "PMC",
64 | "PubMedCentralRelease" : "PMCR",
65 | "PubMedUniqueIdentifier" : "PMID",
66 | "RegistryNumber" : "RN",
67 | "SubstanceName" : "NM",
68 | "SecondarySourceID" : "SI",
69 | "Source" : "SO",
70 | "SpaceFlightMission" : "SFM",
71 | "Status" : "STAT",
72 | "Subset" : "SB",
73 | "Title" : "TI",
74 | "TransliteratedTitle" : "TT",
75 | "Volume" : "VI",
76 | "VolumeTitle" : "VTI",
77 | "CommentIn" : "CIN",
78 | "ErratumIn" : "EIN",
79 | "ErratumFor" : "EFR",
80 | "CorrectedRepublishedIn" : "CRI",
81 | "CorrectedRepublishedFrom" : "CRF",
82 | "DatasetIn" : "DDIN",
83 | "DatasetUseReportedIn" : "DRIN",
84 | "PartialRetractionIn" : "PRIN",
85 | "PartialRetractionOf" : "PROF",
86 | "RepublishedIn" : "RPI",
87 | "RepublishedFrom" : "RPF",
88 | "RetractionIn" : "RIN",
89 | "RetractionOf" : "ROF",
90 | "UpdateIn" : "UIN",
91 | "UpdateOf" : "UOF",
92 | "SummaryForPatients" : "SPIN",
93 | "OriginalReportIn" : "ORI",
94 | }
95 |
96 | tagNameConverterDict = makeBiDirectional(tagNameDict)
97 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_scopus.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import metaknowledge
4 |
5 | import os
6 |
7 | class TestScopus(unittest.TestCase):
8 |
9 | def setUp(self):
10 | metaknowledge.VERBOSE_MODE = False
11 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/scopus_testing.csv.scopus")
12 | self.R = self.RC.peek()
13 |
14 | def test_creation(self):
15 | Rstart = self.RC.peek()
16 | R = metaknowledge.ScopusRecord(Rstart._fieldDict)
17 | self.assertEqual(R, Rstart)
18 | with open("metaknowledge/tests/scopus_testing.csv.scopus") as f:
19 | f.read(1)
20 | header = f.readline()[:-1].split(',')
21 | R = metaknowledge.ScopusRecord(f.readline(), header = header)
22 | self.assertEqual(R.id, 'EID:2-s2.0-84963944162')
23 | R = metaknowledge.ScopusRecord(f.readline(), header = header)
24 | self.assertEqual(R.id, 'EID:2-s2.0-84943362392')
25 | with self.assertRaises(TypeError):
26 | R = metaknowledge.ScopusRecord(12345678)
27 | R = metaknowledge.ScopusRecord(",2132,4,3fdgf,fgdgdfdg,dgfdg,,,,,,,,,,,,,,,,,,,2e5r6t789765432\n")
28 | self.assertTrue(R.bad)
29 | with self.assertRaises(metaknowledge.BadScopusRecord):
30 | R.writeRecord('not a file')
31 |
32 | def test_isCollection(self):
33 | self.assertIsInstance(self.RC, metaknowledge.RecordCollection)
34 |
35 | def test_isScopus(self):
36 | self.assertIsInstance(self.R, metaknowledge.ScopusRecord)
37 |
38 | def test_specials(self):
39 | for R in self.RC:
40 | for s in metaknowledge.scopus.scopusSpecialTagToFunc.keys():
41 | self.assertIsInstance(R.get(s), (str, type(None), list, int, metaknowledge.Citation))
42 |
43 | def test_allFields(self):
44 | for R in self.RC:
45 | for k,v in R.items():
46 | self.assertIsInstance(k, str)
47 | self.assertIsInstance(v, (str, list, int))
48 |
49 | def test_graphs(self):
50 | self.assertEqual(metaknowledge.graphStats(self.RC.networkCoAuthor(), sentenceString = True), "The graph has 1798 nodes, 89236 edges, 36 isolates, 15 self loops, a density of 0.0552422 and a transitivity of 0.994673")
51 | self.assertEqual(metaknowledge.graphStats(self.RC.networkCitation(), sentenceString = True), "The graph has 10026 nodes, 10362 edges, 0 isolates, 0 self loops, a density of 0.000103094 and a transitivity of 0")
52 |
53 | def test_write(self):
54 | fileName = 'tempFile.scopus.tmp'
55 | self.RC.writeFile(fileName)
56 | self.assertEqual(os.path.getsize(fileName), os.path.getsize("metaknowledge/tests/scopus_testing.csv.scopus") + 11511) #Not quite identical due to double quotes
57 | os.remove(fileName)
58 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_citation.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import unittest
3 | import metaknowledge
4 |
5 | class TestCitation(unittest.TestCase):
6 | def setUp(self):
7 | self.Cite = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1, DOI 0.1063/1.1695064")
8 |
9 | def test_citation_author(self):
10 | self.assertEqual(self.Cite.author, "John D")
11 |
12 | def test_citation_year(self):
13 | self.assertEqual(self.Cite.year, 2015)
14 |
15 | def test_citation_journal(self):
16 | self.assertEqual(self.Cite.journal, "TOPICS IN COGNITIVE SCIENCE")
17 |
18 | def test_citation_v(self):
19 | self.assertEqual(self.Cite.V, "V1")
20 |
21 | def test_citation_p(self):
22 | self.assertEqual(self.Cite.P, "P1")
23 |
24 | def test_citation_DOI(self):
25 | self.assertEqual(self.Cite.DOI, "0.1063/1.1695064")
26 |
27 | def test_citation_id(self):
28 | self.assertEqual(self.Cite.ID(), "John D, 2015, TOPICS IN COGNITIVE SCIENCE")
29 |
30 | def test_citation_str(self):
31 | self.assertEqual(str(self.Cite), "John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1, DOI 0.1063/1.1695064")
32 |
33 | def test_citation_extra(self):
34 | self.assertEqual(self.Cite.Extra(), "V1, P1, 0.1063/1.1695064")
35 |
36 | def test_citation_badDetection(self):
37 | self.assertTrue(metaknowledge.Citation("").bad)
38 |
39 | def test_citation_equality(self):
40 | c1 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, P1, DOI 0.1063/1.1695064")
41 | c2 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1")
42 | c3 = metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P2")
43 | self.assertTrue(c1 == self.Cite)
44 | self.assertTrue(c2 == self.Cite)
45 | self.assertFalse(c1 != c2)
46 | self.assertFalse(c3 != c1)
47 |
48 | def test_citation_hash(self):
49 | self.assertTrue(bool(hash(self.Cite)))
50 | self.assertTrue(bool(hash(metaknowledge.Citation("John D., 2015, TOPICS IN COGNITIVE SCIENCE, V1, P1"))))
51 | self.assertTrue(bool(hash(metaknowledge.Citation("John D., 2015"))))
52 |
53 | def test_citation_badLength(self):
54 | c = metaknowledge.Citation("ab, c")
55 | self.assertTrue(c.bad)
56 | self.assertEqual(str(c.error), "Not a complete set of author, year and journal")
57 | self.assertEqual(c.Extra(),'')
58 | self.assertEqual(c.author,'Ab')
59 | self.assertEqual(c.ID(),'Ab, C')
60 |
61 | def test_citation_badNumbers(self):
62 | c = metaknowledge.Citation("1, 2, 3, 4")
63 | self.assertTrue(c.bad)
64 | self.assertEqual(c.ID(), '1, 2')
65 | self.assertEqual(str(c.error), "The citation did not fully match the expected pattern")
66 |
--------------------------------------------------------------------------------
/metaknowledge/WOS/tagProcessing/funcDicts.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | from .tagFunctions import tagToFunc
3 | from .helpFuncs import reverseDict, makeBiDirectional
4 |
5 | tagToFullDict = {k : v.__name__ for k, v in tagToFunc.items()}
6 |
7 | fullToTagDict = reverseDict(tagToFullDict) #Reverses tagToFull
8 |
9 | fullToTagDictUpper = {k.upper() : v for k,v in fullToTagDict.items()}
10 |
11 |
12 | tagNameConverterDict = makeBiDirectional(tagToFullDict) #tagToFull made reversible
13 |
14 | tagsAndNameSet = set(tagNameConverterDict.keys()) #set of WOS tags and their names
15 |
16 | tagsAndNameSetUpper = set(([c.upper() for c in tagsAndNameSet]))
17 |
18 | knownTagsList = list(tagToFullDict.keys()) #list of all the known tags
19 |
20 | def tagToFull(tag):
21 | """A wrapper for `tagToFullDict`, it maps 2 character tags to their full names.
22 |
23 | # Parameters
24 |
25 | _tag_: `str`
26 |
27 | > A two character string giving the tag
28 |
29 | # Returns
30 |
31 | `str`
32 |
33 | > The full name of _tag_
34 | """
35 | try:
36 | return tagToFullDict[tag]
37 | except KeyError:
38 | raise("Tag not in list of known tags")
39 |
40 |
41 | def normalizeToTag(val):
42 | """Converts tags or full names to 2 character tags, case insensitive
43 |
44 | # Parameters
45 |
46 | _val_: `str`
47 |
48 | > A two character string giving the tag or its full name
49 |
50 | # Returns
51 |
52 | `str`
53 |
54 | > The short name of _val_
55 | """
56 | try:
57 | val = val.upper()
58 | except AttributeError:
59 | raise KeyError("{} is not a tag or name string".format(val))
60 | if val not in tagsAndNameSetUpper:
61 | raise KeyError("{} is not a tag or name string".format(val))
62 | else:
63 | try:
64 | return fullToTagDictUpper[val]
65 | except KeyError:
66 | return val
67 |
68 | def normalizeToName(val):
69 | """Converts tags or full names to full names, case sensitive
70 |
71 | # Parameters
72 |
73 | _val_: `str`
74 |
75 | > A two character string giving the tag or its full name
76 |
77 | # Returns
78 |
79 | `str`
80 |
81 | > The full name of _val_
82 | """
83 | if val not in tagsAndNameSet:
84 | raise KeyError("{} is not a tag or name string".format(val))
85 | else:
86 | try:
87 | return tagToFullDict[val]
88 | except KeyError:
89 | return val
90 |
91 | def isTagOrName(val):
92 | """Checks if _val_ is a tag or full name of tag if so returns `True`
93 |
94 | # Parameters
95 |
96 | _val_: `str`
97 |
98 | > A string possible forming a tag or name
99 |
100 | # Returns
101 |
102 | `bool`
103 |
104 | > `True` if _val_ is a tag or name, otherwise `False`
105 | """
106 | return val in tagsAndNameSet
107 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_grantCollection.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | import unittest
3 | import shutil
4 | import os
5 |
6 | import metaknowledge
7 |
8 | class TestGrantCollection(unittest.TestCase):
9 |
10 | @classmethod
11 | def setUpClass(cls):
12 | metaknowledge.VERBOSE_MODE = False
13 | cls.GCmain = metaknowledge.GrantCollection("metaknowledge/tests/", cached = True)
14 |
15 | def setUp(self):
16 | self.GC = self.GCmain.copy()
17 |
18 | def test_empty(self):
19 | GCempty = metaknowledge.GrantCollection()
20 | self.assertEqual(len(GCempty), 0)
21 | self.assertEqual(GCempty.name, "Empty")
22 |
23 | def test_creationErrors(self):
24 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException):
25 | GCbad = metaknowledge.GrantCollection("README.md", extension = '.csv')
26 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile):
27 | GCbad = metaknowledge.GrantCollection("README.md")
28 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile):
29 | GCbad = metaknowledge.GrantCollection("README.md", extension = '.md')
30 | with self.assertRaises(metaknowledge.mkExceptions.BadInputFile):
31 | GCbad = metaknowledge.GrantCollection(".", extension = '.md')
32 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException):
33 | GCbad = metaknowledge.GrantCollection("README")
34 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException):
35 | GCbad = metaknowledge.GrantCollection(1)
36 | with self.assertRaises(metaknowledge.mkExceptions.GrantCollectionException):
37 | GCbad = metaknowledge.GrantCollection({1})
38 |
39 | def test_creation(self):
40 | self.assertIsInstance(self.GC, metaknowledge.GrantCollection)
41 | self.assertIsInstance(self.GC, metaknowledge.Collection)
42 | self.assertAlmostEqual(len(self.GC), 2022, delta = 4)
43 | self.assertIsInstance(self.GC.peek(), metaknowledge.Record)
44 | self.assertEqual(metaknowledge.GrantCollection(self.GC), self.GC)
45 |
46 | def test_Caching(self):
47 | self.assertTrue(os.path.isfile("metaknowledge/tests/tests.[].mkGrantDirCache"))
48 | os.remove("metaknowledge/tests/tests.[].mkGrantDirCache")
49 |
50 | def test_fallback(self):
51 | fname = "DefaultGrantTestFile.csv"
52 | shutil.copyfile("metaknowledge/tests/NSERC_TEST_PARTNER.testcsv", fname)
53 | GC = metaknowledge.GrantCollection(fname, extension = '.csv')
54 | self.assertEqual(GC._collectedTypes, {"FallbackGrant"})
55 | os.remove(fname)
56 |
57 | def test_CoInstitution(self):
58 | G = self.GC.networkCoInvestigatorInstitution()
59 | self.assertEqual(metaknowledge.graphStats(G), 'Nodes: 641\nEdges: 2034\nIsolates: 79\nSelf loops: 0\nDensity: 0.00991615\nTransitivity: 0.273548')
60 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_diffusion.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import unittest
3 | import metaknowledge
4 |
5 |
6 | class TestDiffusion(unittest.TestCase):
7 | def setUp(self):
8 | metaknowledge.VERBOSE_MODE = False
9 | self.RC = metaknowledge.RecordCollection("metaknowledge/tests/testFile.isi")
10 |
11 | def test_diffusionGraph(self):
12 | G = metaknowledge.diffusionGraph(self.RC, self.RC)
13 | Gcr_ut = metaknowledge.diffusionGraph(self.RC, self.RC, sourceType = "CR", targetType = "UT")
14 | self.assertEqual(metaknowledge.graphStats(G, sentenceString = True), 'The graph has 42 nodes, 1569 edges, 0 isolates, 35 self loops, a density of 0.91115 and a transitivity of 0.894934')
15 | self.assertEqual(metaknowledge.graphStats(Gcr_ut, sentenceString = True), 'The graph has 528 nodes, 3591 edges, 246 isolates, 0 self loops, a density of 0.0129054 and a transitivity of 0')
16 |
17 | def test_multiGraph(self):
18 | G = metaknowledge.diffusionGraph(self.RC, self.RC, labelEdgesBy = 'PY')
19 | metaknowledge.dropEdges(G, dropSelfLoops = True)
20 | #multigraphs have issues their edge counts are somewhat unpredictable
21 | self.assertEqual(metaknowledge.graphStats(G, stats = ('nodes', 'isolates', 'loops'), sentenceString = True), 'The graph has 42 nodes, 0 isolates and 0 self loops')
22 |
23 | def test_diffusionCounts(self):
24 | d = metaknowledge.diffusionCount(self.RC, self.RC)
25 | dc = metaknowledge.diffusionCount(self.RC, self.RC, compareCounts = True)
26 | dWC = metaknowledge.diffusionCount(self.RC, self.RC, sourceType = "WC")
27 | self.assertIsInstance(d.keys().__iter__().__next__(), metaknowledge.Record)
28 | self.assertTrue(-1 < d.values().__iter__().__next__() < 10)
29 | self.assertIsInstance(list(dWC.keys())[0], str)
30 | self.assertTrue(-1 < dWC.values().__iter__().__next__() < 24)
31 | for t in dc.values():
32 | self.assertEqual(t[0], t[1])
33 |
34 | def test_diffusionPandas(self):
35 | d = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True)
36 | dwc = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True, sourceType = "WC", compareCounts = True)
37 | dyear = metaknowledge.diffusionCount(self.RC, self.RC, pandasFriendly = True, extraValue = 'year')
38 | self.assertTrue("TI" in d.keys())
39 | self.assertEqual(len(d), 44)
40 | self.assertTrue(len(d["UT"]), len(self.RC))
41 | self.assertTrue("WC" in dwc)
42 | self.assertEqual(3, len(dwc))
43 | self.assertEqual(len(dwc["TargetCount"]), 9)
44 | self.assertEqual(dwc["TargetCount"], dwc["SourceCount"])
45 | self.assertEqual(len(dyear), len(d) + 1)
46 | self.assertNotEqual(dyear["TargetCount"], dwc["SourceCount"])
47 | self.assertEqual(len([c for c in dyear["TargetCount"] if c > 1]), 9)
48 | self.assertTrue(1979 in dyear['year'])
49 |
--------------------------------------------------------------------------------
/docs/documentation/functions_methods/index.rst:
--------------------------------------------------------------------------------
1 | Functions
2 | =========
3 |
4 |
5 | .. automodule:: metaknowledge.citation
6 | :members:
7 | :private-members:
8 | :special-members:
9 | :show-inheritance:
10 | :noindex:
11 | :exclude-members: Citation
12 |
13 | .. automodule:: metaknowledge.constants
14 | :members:
15 | :private-members:
16 | :special-members:
17 |
18 | .. automodule:: metaknowledge.diffusion
19 | :members:
20 | :private-members:
21 | :special-members:
22 |
23 | .. automodule:: metaknowledge.fileHandlers
24 | :members:
25 | :private-members:
26 | :special-members:
27 | :exclude-members: ProccessorTuple
28 |
29 | .. automodule:: metaknowledge.grantCollection
30 | :members:
31 | :private-members:
32 | :special-members:
33 | :exclude-members: GrantCollection
34 |
35 | .. automodule:: metaknowledge.graphHelpers
36 | :members:
37 | :private-members:
38 | :special-members:
39 |
40 | .. automodule:: metaknowledge.mkCollection
41 | :members:
42 | :private-members:
43 | :special-members:
44 | :exclude-members: Collection, CollectionWithIDs
45 |
46 | .. automodule:: metaknowledge.mkRecord
47 | :members:
48 | :private-members:
49 | :special-members:
50 | :exclude-members: ExtendedRecord, Record
51 |
52 | .. automodule:: metaknowledge.progressBar
53 | :members:
54 | :private-members:
55 | :special-members:
56 |
57 | .. automodule:: metaknowledge.RCglimpse
58 | :members:
59 | :private-members:
60 | :special-members:
61 |
62 | .. automodule:: metaknowledge.recordCollection
63 | :members:
64 | :private-members:
65 | :special-members:
66 | :exclude-members: RecordCollection
67 |
68 | .. automodule:: metaknowledge.genders
69 | :members:
70 | :private-members:
71 | :special-members:
72 |
73 | .. automodule:: metaknowledge.genders.nameGender
74 | :members:
75 | :private-members:
76 | :special-members:
77 |
78 | .. automodule:: metaknowledge.grants.baseGrant
79 | :members:
80 | :private-members:
81 | :special-members:
82 | :exclude-members: FallbackGrant, Grant
83 |
84 | .. automodule:: metaknowledge.grants.cihrGrant
85 | :members:
86 | :private-members:
87 | :special-members:
88 | :exclude-members: CIHRGrant
89 |
90 | .. automodule:: metaknowledge.grants.medlineGrant
91 | :members:
92 | :private-members:
93 | :special-members:
94 | :exclude-members: MedlineGrant
95 |
96 | .. automodule:: metaknowledge.grants.nsercGrant
97 | :members:
98 | :private-members:
99 | :special-members:
100 | :exclude-members: NSERCGrant
101 |
102 | .. automodule:: metaknowledge.grants.nsfGrant
103 | :members:
104 | :private-members:
105 | :special-members:
106 | :exclude-members: NSFGrant
107 |
108 | .. automodule:: metaknowledge.grants.scopusGrant
109 | :members:
110 | :private-members:
111 | :special-members:
112 | :exclude-members: ScopusGrant
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | metaknowledge
2 | =========================================
3 | *A Python3 package for doing computational research on knowledge*
4 |
5 | *metaknowledge* is a Python3_ package for doing computational research in bibliometrics, scientometrics, and network analysis. It can also be easily used to simplify the process of doing systematic reviews in any disciplinary context.
6 |
7 | *metaknowledge* reads a directory of plain text files containing meta-data on publications and citations, and writes to a variety of data structures that are suitable for longitudinal research, computational text analysis (e.g. topic models and burst analysis), Reference Publication Year Spectroscopy (RPYS), and network analysis (including multi-modal, multi-level, and dynamic). It handles large datasets (e.g. several million records) efficiently.
8 |
9 | metaknowledge currently handles data from the Web of Science, PubMed, Scopus, Proquest Dissertations & Theses, and administrative data from the National Science Foundation and the Canadian tri-council granting agencies: SSHRC, CIHR, and NSERC.
10 |
11 | Datasets created with metaknowledge can be analyzed using NetworkX_ and the `standard libraries `_ for data analysis in Python. It is also easy to write data to :code:`csv` or :code:`graphml` files for analysis and visualization in `R `_, `Stata `_, `Visone `_, `Gephi `_, or any other tools for data analysis.
12 |
13 | *metaknowledge* also has a simple command line tool for extracting quantitative datasets and network files from Web of Science files. This makes the library more accessible to researchers who do not know Python, and makes it easier to quickly explore new datasets.
14 |
15 | Contact
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 | | **Reid McIlroy-Young**, `reid@reidmcy.com `_
18 | | *University of Chicago, Chicago, IL, USA*
19 |
20 | | **John McLevey**, `john.mclevey@uwaterloo.ca `_
21 | | *University of Waterloo, Waterloo, ON, Canada*
22 |
23 | | **Jillian Anderson**, `jillianderson8@gmail.com `_
24 | | *University of Waterloo, Waterloo, ON, Canada*
25 |
26 | Citation
27 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
28 | If you are using metaknowledge for research that will be published or publicly distributed, please acknowledge us with the following citation:
29 |
30 | *Reid McIlroy-Young, John McLevey, and Jillian Anderson. 2015. metaknowledge: open source software for social networks, bibliometrics, and sociology of knowledge research. URL: http://www.networkslab.org/metaknowledge.*
31 |
32 | `Download .bib file: `_
33 |
34 | License
35 | ^^^^^^^
36 | *metaknowledge* is free and open source software, distributed under the GPL License.
37 |
38 |
39 | .. toctree::
40 | :maxdepth: 1
41 |
42 | install
43 | documentation/index
44 | examples/index
45 | CLI
46 |
47 |
48 | Indices and tables
49 | ^^^^^^^^^^^^^^^^^^
50 |
51 | * :ref:`genindex`
52 | * :ref:`modindex`
53 | * :ref:`search`
54 |
55 | .. _Python3: https://www.python.org
56 | .. _NetworkX: https://networkx.github.io
--------------------------------------------------------------------------------
/metaknowledge/grants/__init__.py:
--------------------------------------------------------------------------------
1 | from .nsercGrant import NSERCGrant, isNSERCfile, parserNSERCfile
2 | from .medlineGrant import MedlineGrant
3 | from .baseGrant import Grant, FallbackGrant, isFallbackGrantFile, parserFallbackGrantFile
4 | from .cihrGrant import CIHRGrant, isCIHRfile, parserCIHRfile
5 | from .nsfGrant import NSFGrant, isNSFfile, parserNSFfile
6 |
7 |
8 | """#Creating new grants
9 |
10 | mk is intended to be expanded as different researchers will require processing of files not in it currently. To add a new grant you need to write 2 simple functions and class. To see a basic example look at the `baseGrant.py` file as it contains the fallback grant processors and you should be able to reuse much of that code.
11 |
12 | The way GrantCollections are created is when they are given a file or directory of files they check each file with the `detector` functions in the `grantProcessors` found in `fileHandlers.py` and if one returns `True` they use the `processor` function and added the `type` string to their `collectedTypes` set. The `processor` must return a tuple the first element being a set of all the Grants the second `None` or an `Exception` object. `processor` should not raise an exception, if there is an issue the GrantCollection should be given even a partial set of grants, GrantCollections have an errors attribute that contains all errors they encountered during the parsing.
13 |
14 |
15 | The first function is to determine if a given file path is to a collection of grants of the needed type. Determining if a file is of the needed type is usually done by reading the first few lines and checking that they match a known header template. For example CIHR files start with the string `"Search Criteria,"` so the function `isCIHRfile()` checks that the first lines start that way.
16 |
17 | One thing to watch out for is the encoding most grants are CSVs encoded with ISO-8859 which is what many windows programs, most notably excel, expect. Python will use that encoding (called `'latin-1'`) on Microsoft systems but on Mac OS and Linux will often use `'utf-8'` so you should always give the encoding as mk is intended for all 3 operating systems.
18 |
19 | The next function is the parser, this is the function that is called on the file to create the Grants. It is given a file path that has been confirmed to be a correctly formatted grant by the detector.
20 |
21 | The function must return a tuple the first entry being a set of all the Grants and the second an `Exception` if an error occurred or `None` if not. If an error occurs the function should attempt to return as many grants as possible, including the one that had the error (with its error handlers correctly indicated). The GrantCollectio will record the error and allow for the user/script to decide what actions to take.Note, often not doing anything is appropriate as errors have been found to most often occur at the end of the file so no data is actually lost.
22 |
23 | The`Grants` in the set returned by the processor, should be a new class the inherits from `Grant` even if no new attributes are defined.
24 |
25 | Once the `detector` and `processor` functions have been created and tested, they can be added to the list of grants found in `fileHandlers.py` called `grantProcessors`. Each file is checked in order so do not add them to the end as the last entry will tell the `GrantCollection` to stop and that the file does not match, so placing anything after it will not work.
26 | """
27 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500217.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Radical Chemistry on Cloud and Aerosol Surfaces
6 | 09/01/2014
7 | 08/31/2016
8 | 269915
9 |
10 | Continuing grant
11 |
12 |
13 | 03090000
14 |
15 | Direct For Mathematical & Physical Scien
16 |
17 |
18 | Division Of Chemistry
19 |
20 |
21 |
22 | Tyrone D. Mitchell
23 |
24 | The Environmental Chemical Sciences Program in the Chemistry Division at the National Science Foundation supports the research of Professors Joseph S. Francisco and Sabre Kais both from Purdue University who will examine how free radicals contribute to numerous significant chemical processes in the atmosphere. Aerosols and cloud droplets play an important role in both the removal and the conversion of gases in the atmosphere. The interactions between gas-phase species on liquid surfaces are central to understanding chemistry at these interfaces. The overall goal of this award is to provide a theoretical framework, based on first principles including classical and ab initio molecular dynamics (MD) simulations, density functional theory combined with finite element methods for Car-Parrinello simulations, and finite size scaling for universal behavior of free energies and other thermodynamic quantities to understand how atmospheric free radicals accommodate and react at the gas-liquid interface.<br/><br/>Results of this project will help improve our understanding of the contribution to radical accommodation and uptake leading to more effective pollution control strategies as well as the improvement in air quality for pollutants whose chemistry is highly coupled to atmospheric free radicals. This multidisciplinary project will bring both undergraduate and graduate students from the departments of Chemistry and Earth and Atmospheric Sciences into the research environment. Moreover, this project will promote and support broader efforts to recruit minority and underrepresented graduate students to the chemical physics program at Purdue University.
25 | 01/23/2015
26 | 01/23/2015
27 |
28 | 1500217
29 |
30 | Joseph
31 | Francisco
32 | francisco3@unl.edu
33 | 01/23/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | University of Nebraska-Lincoln
39 | Lincoln
40 | 685031435
41 | 4024723171
42 | 2200 Vine St, 151 Whittier
43 | United States
44 | Nebraska
45 | NE
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500219.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Extremal graph theory, graph limits, and algebraic invariants
6 | 06/01/2015
7 | 05/31/2018
8 | 151604
9 |
10 | Standard Grant
11 |
12 |
13 | 03040000
14 |
15 | Direct For Mathematical & Physical Scien
16 |
17 |
18 | Division Of Mathematical Sciences
19 |
20 |
21 |
22 | Tomek Bartoszynski
23 |
24 | In this project, the PI aims to study very large networks using algebraic and analytic tools. Large networks like the Internet, molecular lattices and social networks (such as Facebook) naturally arise in many different areas of real life. The PI aims to look at these from a new perspective: we consider them as approximations of an infinite object. For molecular lattices this is a very natural approach, but via a recently developed theory of sparse graph convergence we can tackle a much broader class of problems, creating new links between mathematics, statistical physics and computer science.<br/><br/>The PI will investigate two essentially different, but still related topics. The first one is the study of extremal values of algebraic invariants of graphs with a special emphasis on those problems where the conjectured extremal graphs are not finite. Despite the lack of finite extremal solutions, using the recently emerging language of Benjamini--Schramm convergence, one can find and analyze the extremal solutions. This then leads to new asymptotic results on finite graphs. The second topic is the study of certain special infinite graphs and lattices via graph limit theory and analytic and algebraic combinatorics. The general theme is to consider a graph invariant of algebraic nature and analyze its limiting behaviour using analytic tools. Often the invariants come from graph polynomials like the matching, chromatic and independence polynomials and have various ties to statistical mechanics.
25 | 04/24/2015
26 | 04/24/2015
27 |
28 | 1500219
29 |
30 | Peter
31 | Csikvari
32 | csikvari@mit.edu
33 | 04/24/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | Massachusetts Institute of Technology
39 | Cambridge
40 | 021394301
41 | 6172531000
42 | 77 MASSACHUSETTS AVE
43 | United States
44 | Massachusetts
45 | MA
46 |
47 |
48 | 7970
49 | Combinatorics
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/metaknowledge/tests/TwoPaper.isi:
--------------------------------------------------------------------------------
1 | FN Thomson Reuters Web of Science™
2 | VR 1.0
3 | PT J
4 | AU Kim, S
5 | AF Kim, S
6 | TI Supervenience and causation: A probabilistic approach
7 | SO SYNTHESE
8 | LA English
9 | DT Article
10 | AB It is often argued that if a mental property supervenes on a physical property, then (1) the mental property M "inherits'' its causal efficacy from the physical property P and (2) the causal efficacy of M reduces to that of P. However, once we understand the supervenience thesis and the concept of causation probabilistically, it turns out that we can infer the causal efficacy of M from that of P and vice versa if and only if a certain condition, which I call the "line-up'' thesis, holds. I argue that the supervenience thesis entails neither this condition nor its denial. I also argue that even when the line-up thesis holds true, reductionism about the causal efficacy of the mental property doesn't follow.
11 | C1 Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA.
12 | RP Kim, S (reprint author), Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA.
13 | CR Eells E, 1991, PROBABILISTIC CAUSAL
14 | ENC B, 1983, J PHILOS, V80, P279, DOI 10.2307/2026499
15 | Fodor J.A., 1997, PHILOS PERSPECTIVES, P149
16 | Hausman Daniel, 1998, CAUSAL ASYMMETRIES
17 | KIM J, 1997, PHILOS PERSPECTIVES, V11, P185
18 | KIM JW, 1992, PHILOS PHENOMEN RES, V52, P1, DOI 10.2307/2107741
19 | Kim J., 1989, PHILOS PERSPECTIVES, V3, P77, DOI 10.2307/2214264
20 | Kim J., 1993, SUPERVENIENCE MIND, P358, DOI 10.1017/CBO9780511625220.019
21 | Kim J., 1989, P ADDRESSES AM PHILO, V63, P31, DOI DOI 10.2307/3130081
22 | Kim J., 1993, MENTAL CAUSATION, P189
23 | Kim Jaegwon, 1996, PHILOS MIND
24 | Shoemaker S., 1980, TIME CAUSE, P109
25 | SOBER E, 1999, PHILOS STUDIES
26 | NR 13
27 | TC 1
28 | Z9 1
29 | PU KLUWER ACADEMIC PUBL
30 | PI DORDRECHT
31 | PA SPUIBOULEVARD 50, PO BOX 17, 3300 AA DORDRECHT, NETHERLANDS
32 | SN 0039-7857
33 | J9 SYNTHESE
34 | JI Synthese
35 | PD MAR
36 | PY 2000
37 | VL 122
38 | IS 3
39 | BP 245
40 | EP 259
41 | DI 10.1023/A:1005282128866
42 | PG 15
43 | WC History & Philosophy Of Science; Philosophy
44 | SC History & Philosophy of Science; Philosophy
45 | GA 312WP
46 | UT WOS:000086967200001
47 | ER
48 |
49 | PT J
50 | AU Kim, S
51 | AF Kim, S
52 | TI Physical process theories and token-probabilistic causation
53 | SO ERKENNTNIS
54 | LA English
55 | DT Article
56 | ID CAUSALITY
57 | C1 Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA.
58 | RP Kim, S (reprint author), Univ Wisconsin, Dept Philosophy, Madison, WI 53706 USA.
59 | CR DOWE P, 1995, PHILOS SCI, V62, P321, DOI 10.1086/289859
60 | Dowe P., 1992, ERKENNTNIS, V37, P179
61 | DOWE P, 1992, PHILOS SCI, V59, P195, DOI 10.1086/289662
62 | Eells E, 1991, PROBABILISTIC CAUSAL
63 | Hausman Daniel, 1998, CAUSAL ASYMMETRIES
64 | HITCHCOCK CR, 1995, PHILOS SCI, V62, P304, DOI 10.1086/289858
65 | KITCHER P, 1989, MINN STUD PHILOS SCI, V13, P410
66 | MACKIE J, 1947, CEMENT UNVIERSE
67 | SALMON WC, 1990, TOPOI-INT REV PHILOS, V9, P95, DOI 10.1007/BF00135890
68 | Salmon W. C., 1984, SCI EXPLANATION CAUS
69 | SALMON W. C., 1998, CAUSALITY EXPLANATIO
70 | SALMON WC, 1994, PHILOS SCI, V61, P297, DOI 10.1086/289801
71 | Salmon WC, 1997, PHILOS SCI, V64, P461, DOI 10.1086/392561
72 | SOBER E, 1987, BRIT J PHILOS SCI, V38, P243, DOI 10.1093/bjps/38.2.243
73 | NR 14
74 | TC 0
75 | Z9 0
76 | PU KLUWER ACADEMIC PUBL
77 | PI DORDRECHT
78 | PA SPUIBOULEVARD 50, PO BOX 17, 3300 AA DORDRECHT, NETHERLANDS
79 | SN 0165-0106
80 | J9 ERKENNTNIS
81 | JI Erkenntnis
82 | PY 2001
83 | VL 54
84 | IS 2
85 | BP 235
86 | EP 245
87 | DI 10.1023/A:1005677609556
88 | PG 11
89 | WC Philosophy
90 | SC Philosophy
91 | GA 420JH
92 | UT WOS:000168001500006
93 | ER
94 |
95 | EF
96 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500201.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Redox, Electronic, and Rectifying Response of Five- and Six-coordinate Metallosurfactants in Solution, as Films, and on Electrodes
6 | 05/01/2015
7 | 04/30/2018
8 | 449000
9 |
10 | Standard Grant
11 |
12 |
13 | 03090000
14 |
15 | Direct For Mathematical & Physical Scien
16 |
17 |
18 | Division Of Chemistry
19 |
20 |
21 |
22 | James Lisy
23 |
24 | In this project funded by the Macromolecular, Supramolecular and Nanochemistry Program of the Division of Chemistry, Professor Cláudio N. Verani and his research group at Wayne State University in Detroit are studying metal-based molecules able to act as diodes for electric current rectification. Rectification, or directional current flow from an electrode A to an electrode B (but not from B back to A) is fundamental to the conversion of alternating into direct current, and is absolutely necessary for electronic data computation. This interdisciplinary proposal seeks to enhance our fundamental understanding on the use of metallosurfactants for molecular diodes. Broader impacts include scientific outreach to fourth and fifth graders and effort to promote Latino-student inclusion in STEM research.<br/><br/>Verani and collaborators are studying the redox, electronic, and rectifying behavior of metallosurfactants, both in solution and as Langmuir-Blodgett monolayer films deposited onto gold electrodes. Therefore, this interdisciplinary program focuses on the use of amphiphilic coordination complexes towards current-rectifying assemblies as measured by the asymmetry of current/potential (I/V) curves aiming to understand (i) the predominant rectification mechanisms in metallosurfactants; (ii) the possibility of electron-transfer mediation in metal-based singly occupied molecular orbitals (SOMOs); (iii) the viability of electron transfer mediation by metals between ligand-centered lowest unoccupied & highest occupied molecular orbitals (LUMOS & HOMOs); (iv) the role of metallosurfactant orientation in the mechanism of rectification; (v) the influence of the metallosurfactant geometry in observed symmetric conduction, unimolecular or asymmetric rectification, or insulation. This research is multi-faceted, incluidng efforts to make strides in synthetic methodologies, and in electrochemical, spectroscopic, computational, isothermal compression, and microscopy methods.
25 | 04/23/2015
26 | 04/23/2015
27 |
28 | 1500201
29 |
30 | Claudio
31 | Verani
32 | cnverani@chem.wayne.edu
33 | 04/23/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | Wayne State University
39 | Detroit
40 | 482023622
41 | 3135772424
42 | 5057 Woodward
43 | United States
44 | Michigan
45 | MI
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/metaknowledge/scopus/scopusHandlers.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 |
3 | import csv
4 |
5 | from .recordScopus import ScopusRecord, scopusHeader
6 |
7 | from ..mkExceptions import BadScopusFile
8 |
9 | def isScopusFile(infile, checkedLines = 2, maxHeaderDiff = 3):
10 | """Determines if _infile_ is the path to a Scopus csv file. A file is considerd to be a Scopus file if it has the correct encoding (`utf-8` with BOM (Byte Order Mark)) and within the first _checkedLines_ a line contains the complete header, the list of all header entries in order is found in [`scopus.scopusHeader`](#metaknowledge.scopus).
11 |
12 | **Note** this is for csv files _not_ plain text files from scopus, plain text files are not complete.
13 |
14 | # Parameters
15 |
16 | _infile_ : `str`
17 |
18 | > The path to the targets file
19 |
20 | _checkedLines_ : `optional [int]`
21 |
22 | > default 2, the number of lines to check for the header
23 |
24 | _maxHeaderDiff_ : `optional [int]`
25 |
26 | > default 3, maximum number of different entries in the potetial file from the current known header `metaknowledge.scopus.scopusHeader`, if exceeded an `False` will be returned
27 |
28 | # Returns
29 |
30 | `bool`
31 |
32 | > `True` if the file is a Scopus csv file
33 | """
34 | try:
35 | with open(infile, 'r', encoding='utf-8') as openfile:
36 | if openfile.read(1) != "\ufeff":
37 | return False
38 | for i in range(checkedLines):
39 | if len(set(openfile.readline()[:-1].split(',')) ^ set(scopusHeader)) < maxHeaderDiff:
40 | return True
41 | except (StopIteration, UnicodeDecodeError):
42 | return False
43 | else:
44 | return False
45 |
46 | def scopusParser(scopusFile):
47 | """Parses a scopus file, _scopusFile_, to extract the individual lines as [ScopusRecords](../classes/ScopusRecord.html#metaknowledge.scopus.ScopusRecord).
48 |
49 | A Scopus file is a csv (Comma-separated values) with a complete header, see [`scopus.scopusHeader`](#metaknowledge.scopus) for the entries, and each line after it containing a record's entry. The string valued entries are quoted with double quotes which means double quotes inside them can cause issues, see [scopusRecordParser()](#metaknowledge.scopus.recordScopus.scopusRecordParser) for more information.
50 |
51 | # Parameters
52 |
53 | _scopusFile_ : `str`
54 |
55 | > A path to a valid scopus file, use [isScopusFile()](#metaknowledge.scopus.scopusHandlers.isScopusFile) to verify
56 |
57 | # Returns
58 |
59 | `set[ScopusRecord]`
60 |
61 | > Records for each of the entries
62 | """
63 | #assumes the file is Scopus
64 | recSet = set()
65 | error = None
66 | lineNum = 0
67 | try:
68 | with open(scopusFile, 'r', encoding = 'utf-8') as openfile:
69 | #Get rid of the BOM
70 | openfile.read(1)
71 | header = openfile.readline()[:-1].split(',')
72 | if len(set(header) ^ set(scopusHeader)) == 0:
73 | header = None
74 | lineNum = 0
75 | try:
76 | for line, row in enumerate(openfile, start = 2):
77 | lineNum = line
78 | recSet.add(ScopusRecord(row, header = header, sFile = scopusFile, sLine = line))
79 | except BadScopusFile as e:
80 | if error is None:
81 | error = BadScopusFile("The file '{}' becomes unparsable after line: {}, due to the error: {} ".format(scopusFile, lineNum, e))
82 | except (csv.Error, UnicodeDecodeError):
83 | if error is None:
84 | error = BadScopusFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(scopusFile, lineNum))
85 | return recSet, error
86 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/twoAwardFile.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Summer Institute in Mathematics for Secondary School Teachers
6 | 01/01/1969
7 | 12/01/1969
8 | 33462
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 04/13/2004
34 |
35 | 1
36 |
37 |
38 | San Jose State University Foundation
39 |
40 | CA
41 |
42 |
43 |
44 |
45 |
46 |
47 | California
48 | 1
49 |
50 | 1
51 | -2660400
52 | 1081828800
53 |
54 | -31518000
55 | 69W3546aaaaaaaaa
56 |
57 |
58 | Summer Institute in Mathematics for Secondary School Teachers
59 | 01/01/1969
60 | 12/01/1969
61 | 33462
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 | 04/13/2004
87 |
88 | 1
89 |
90 |
91 | San Jose State University Foundation
92 |
93 | CA
94 |
95 |
96 |
97 |
98 |
99 |
100 | California
101 | 1
102 |
103 | 1
104 | -2660400
105 | 1081828800
106 |
107 | -31518000
108 | 69W3sdfghj546
109 |
110 |
111 |
--------------------------------------------------------------------------------
/metaknowledge/RCglimpse.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import collections
3 | import datetime
4 |
5 | from .mkExceptions import mkException
6 |
7 | glimpseTags = collections.OrderedDict([
8 | ('Top Authors','authorsFull'),
9 | ('Top Journals','journal'),
10 | ('Top Cited','citations'),
11 | ])
12 |
13 | descriptionString1 = 'Columns are ranked by num. of occurrences'
14 | descriptionString2 = 'and are independent of one another'
15 |
16 | descriptionStringFull = descriptionString1 + ' ' + descriptionString2
17 |
18 | def _glimpse(RC, *tags, compact = False):
19 | tColumns, tRows = tuple(shutil.get_terminal_size())
20 | if len(tags) < 1:
21 | targetTags = glimpseTags
22 | else:
23 | targetTags = {t: t for t in tags}
24 | #If it can't fit just go with the usual settings
25 | if tColumns < 55:
26 | tColumns = 80
27 | if tRows < 6:
28 | tRows = 24
29 | glimpseVals = collections.OrderedDict()
30 | if len(descriptionStringFull) > tColumns:
31 | maxRows = tRows - 7
32 | else:
33 | maxRows = tRows - 6
34 | for name, tag in targetTags.items():
35 | glimpseVals[name] = RC.rankedSeries(tag, giveCounts = False, giveRanks = True, pandasMode = False)
36 | return makeHeader(RC, tColumns, targetTags, compact) + makeTable(glimpseVals, maxRows, tColumns, compact)
37 |
38 | def makeHeader(RC, width, glimpseVals, compact):
39 | now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
40 | firstLine = "{} glimpse made at: {}".format(type(RC).__name__, now)
41 | secondLine = "{} Records from {}".format(len(RC), RC.name[:30])
42 | if compact:
43 | if len(descriptionStringFull) > width - 2:
44 | thirdLine = '|{1:+<{0}}|\n|{2:+<{0}}|\n'.format(width - 2, descriptionString1, descriptionString2)
45 | else:
46 | thirdLine = '|{1:+<{0}}|\n'.format(width - 2, descriptionStringFull)
47 | return '+{2:+<{0}}\n|{3:+<{1}}|\n{4}'.format(width - 1, width - 2, firstLine, secondLine, thirdLine)
48 | else:
49 | return '{}\n{}\n'.format(firstLine, secondLine)
50 |
51 | def makeTable(values, height, width, compact):
52 | retLines = []
53 | if compact:
54 | lines = [[] for i in range(height + 1)]
55 | firstRowString = "|{}" + "+{}" * (len(values) - 1) + '|'
56 | rowString = "|{}" * len(values) + '|'
57 | cWidth = (width // len(values)) - 1
58 | cRemainder = width % len(values) - 1
59 | for title, rows in values.items():
60 | if cRemainder > 0:
61 | heading = "{1:-^{0}}".format(cWidth + 1, title)
62 | cRemainder -= 1
63 | elif cRemainder < 0:
64 | heading = "{1:-^{0}}".format(cWidth - 1, title)
65 | cRemainder += 1
66 | else:
67 | heading = "{1:-^{0}}".format(cWidth, title)
68 | hWidth = len(heading)
69 | lines[0].append(heading)
70 | if len(rows) < height:
71 | for i in range(height - len(rows)):
72 | rows.append(('NA', -1))
73 | for index, entry in enumerate((prepEntry(hWidth, *s) for s in rows[:height]), start = 1):
74 | lines[index].append(entry)
75 | retLines.append(firstRowString.format(*tuple(lines[0])))
76 | for line in lines[1:]:
77 | retLines.append(rowString.format(*tuple(line)))
78 | else:
79 | for title, rows in values.items():
80 | retLines.append('')
81 | retLines.append(title)
82 | retLines += ['{} {}'.format(c, str(s)[:width - len(str(c)) - 1]) for s, c in rows[:height // 2]]
83 | return '\n'.join(retLines)
84 |
85 | def prepEntry(maxLength, valString, rank):
86 | valString = str(valString)
87 | if len(valString) <= maxLength - 2:
88 | valString = valString.rjust(maxLength - 2, ' ')
89 | else:
90 | valString = "{}.".format(valString[:maxLength - 3])
91 | return "{:<2.0f}{}".format(rank, valString)
92 |
--------------------------------------------------------------------------------
/metaknowledge/medline/medlineHandlers.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | from ..mkExceptions import BadPubmedFile
4 |
5 | from .recordMedline import MedlineRecord
6 |
7 | def isMedlineFile(infile, checkedLines = 2):
8 | """Determines if _infile_ is the path to a Medline file. A file is considerd to be a Medline file if it has the correct encoding (`latin-1`) and within the first _checkedLines_ a line starts with `"PMID- "`.
9 |
10 | # Parameters
11 |
12 | _infile_ : `str`
13 |
14 | > The path to the targets file
15 |
16 | _checkedLines_ : `optional [int]`
17 |
18 | > default 2, the number of lines to check for the header
19 |
20 | # Returns
21 |
22 | `bool`
23 |
24 | > `True` if the file is a Medline file
25 | """
26 | try:
27 | with open(infile, 'r', encoding='latin-1') as openfile:
28 | f = enumerate(openfile, start = 0)
29 | for i in range(checkedLines):
30 | if f.__next__()[1].startswith("PMID- "):
31 | #Only indicator I could find
32 | return True
33 | except (StopIteration, UnicodeDecodeError):
34 | return False
35 | else:
36 | return False
37 |
38 | def medlineParser(pubFile):
39 | """Parses a medline file, _pubFile_, to extract the individual entries as [MedlineRecords](#metaknowledge.medline.recordMedline.MedlineRecord).
40 |
41 | A medline file is a series of entries, each entry is a series of tags. A tag is a 2 to 4 character string each tag is padded with spaces on the left to make it 4 characters which is followed by a dash and a space (`'- '`). Everything after the tag and on all lines after it not starting with a tag is considered associated with the tag. Each entry's first tag is `PMID`, so a first line looks something like `PMID- 26524502`. Entries end with a single blank line.
42 |
43 | # Parameters
44 |
45 | _pubFile_ : `str`
46 |
47 | > A path to a valid medline file, use [isMedlineFile](#metaknowledge.medline.medlineHandlers.isMedlineFile) to verify
48 |
49 | # Returns
50 |
51 | `set[MedlineRecord]`
52 |
53 | > Records for each of the entries
54 | """
55 | #assumes the file is MEDLINE
56 | recSet = set()
57 | error = None
58 | lineNum = 0
59 | try:
60 | with open(pubFile, 'r', encoding = 'latin-1') as openfile:
61 | f = enumerate(openfile, start = 1)
62 | lineNum, line = next(f)
63 | try:
64 | while True:
65 | if line.startswith("PMID- "):
66 | try:
67 | r = MedlineRecord(itertools.chain([(lineNum, line)], f), sFile = pubFile, sLine = lineNum)
68 | recSet.add(r)
69 | except BadPubmedFile as e:
70 | badLine = lineNum
71 | try:
72 | lineNum, line = next(f)
73 | while not line.startswith("PMID- "):
74 | lineNum, line = next(f)
75 | except (StopIteration, UnicodeDecodeError) as e:
76 | if error is None:
77 | error = BadPubmedFile("The file '{}' becomes unparsable after line: {}, due to the error: {} ".format(pubFile, badLine, e))
78 | raise e
79 | elif line != '\n':
80 | if error is None:
81 | error = BadPubmedFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(pubFile, lineNum))
82 | lineNum, line = next(f)
83 | except StopIteration:
84 | #End of the file has been reached
85 | pass
86 | except UnicodeDecodeError:
87 | if error is None:
88 | error = BadPubmedFile("The file '{}' has parts of it that are unparsable starting at line: {}.".format(pubFile, lineNum))
89 | return recSet, error
90 |
--------------------------------------------------------------------------------
/docs/examples/Getting-Started.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# About Jupyter Notebooks"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page.\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": false
22 | },
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "This is an output cell\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "#This cell is python\n",
34 | "#The cell below it is output\n",
35 | "print(\"This is an output cell\")"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "The code cells contain python code that you can edit and run your self. Try changing the one above."
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "# Importing"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "First you need to import the _metaknowledge_ package\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 2,
62 | "metadata": {
63 | "collapsed": false
64 | },
65 | "outputs": [],
66 | "source": [
67 | "import metaknowledge as mk"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "And you will often need the [networkx](https://networkx.github.io/documentation/networkx-1.9.1/) package\n"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {
81 | "collapsed": false
82 | },
83 | "outputs": [],
84 | "source": [
85 | "import networkx as nx"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "And [matplotlib](http://matplotlib.org/) to display the graphs and to make them look nice when displayed\n"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 4,
98 | "metadata": {
99 | "collapsed": false
100 | },
101 | "outputs": [],
102 | "source": [
103 | "import matplotlib.pyplot as plt\n",
104 | "%matplotlib inline"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "_metaknowledge_ also has a _matplotlib_ based graph [visualizer](../documentation/modules/contour.html#metaknowledge.contour.plotting.quickVisual) that will be used sometimes\n"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 5,
117 | "metadata": {
118 | "collapsed": false
119 | },
120 | "outputs": [],
121 | "source": [
122 | "import metaknowledge.visual as mkv"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_."
130 | ]
131 | }
132 | ],
133 | "metadata": {
134 | "kernelspec": {
135 | "display_name": "Python 3",
136 | "language": "python",
137 | "name": "python3"
138 | },
139 | "language_info": {
140 | "codemirror_mode": {
141 | "name": "ipython",
142 | "version": 3
143 | },
144 | "file_extension": ".py",
145 | "mimetype": "text/x-python",
146 | "name": "python",
147 | "nbconvert_exporter": "python",
148 | "pygments_lexer": "ipython3",
149 | "version": "3.4.0"
150 | }
151 | },
152 | "nbformat": 4,
153 | "nbformat_minor": 0
154 | }
155 |
--------------------------------------------------------------------------------
/metaknowledge/WOS/wosHandlers.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | from .recordWOS import WOSRecord
4 | from ..mkExceptions import cacheError, BadWOSFile, BadWOSRecord
5 |
6 | def isWOSFile(infile, checkedLines = 3):
7 | """Determines if _infile_ is the path to a WOS file. A file is considerd to be a WOS file if it has the correct encoding (`utf-8` with a BOM) and within the first _checkedLines_ a line starts with `"VR 1.0"`.
8 |
9 | # Parameters
10 |
11 | _infile_ : `str`
12 |
13 | > The path to the targets file
14 |
15 | _checkedLines_ : `optional [int]`
16 |
17 | > default 2, the number of lines to check for the header
18 |
19 | # Returns
20 |
21 | `bool`
22 |
23 | > `True` if the file is a WOS file
24 | """
25 | try:
26 | with open(infile, 'r', encoding='utf-8-sig') as openfile:
27 | f = enumerate(openfile, start = 0)
28 | for i in range(checkedLines):
29 | if "VR 1.0" in f.__next__()[1]:
30 | return True
31 | except (StopIteration, UnicodeDecodeError):
32 | return False
33 | else:
34 | return False
35 |
36 | def wosParser(isifile):
37 | """This is a function that is used to create [RecordCollections](../classes/RecordCollection.html#metaknowledge.RecordCollection) from files.
38 |
39 | **wosParser**() reads the file given by the path isifile, checks that the header is correct then reads until it reaches EF. All WOS records it encounters are parsed with [recordParser()](#metaknowledge.WOS.recordWOS.recordParser) and converted into [Records](../classes/Record.html#metaknowledge.Record). A list of these `Records` is returned.
40 |
41 | `BadWOSFile` is raised if an issue is found with the file.
42 |
43 | # Parameters
44 |
45 | _isifile_ : `str`
46 |
47 | > The path to the target file
48 |
49 | # Returns
50 |
51 | `List[Record]`
52 |
53 | > All the `Records` found in _isifile_
54 | """
55 | plst = set()
56 | error = None
57 | try:
58 | with open(isifile, 'r', encoding='utf-8-sig') as openfile:
59 | f = enumerate(openfile, start = 0)
60 | while "VR 1.0" not in f.__next__()[1]:
61 | pass
62 | notEnd = True
63 | while notEnd:
64 | line = f.__next__()
65 | if line[1] == '':
66 | error = BadWOSFile("'{}' does not have an 'EF', lines 1 to {} were checked".format(isifile, line[0] + 1))
67 | elif line[1].isspace():
68 | continue
69 | elif 'EF' in line[1][:2]:
70 | notEnd = False
71 | continue
72 | else:
73 | try:
74 | plst.add(WOSRecord(itertools.chain([line], f), sFile = isifile, sLine = line[0]))
75 | except BadWOSFile as e:
76 | try:
77 | s = f.__next__()[1]
78 | while s[:2] != 'ER':
79 | s = f.__next__()[1]
80 | except:
81 | error = BadWOSFile("The file {} was not terminated corrrectly caused the following error:\n{}".format(isifile, str(e)))
82 | try:
83 | f.__next__()
84 | except StopIteration:
85 | pass
86 | else:
87 | error = BadWOSFile("EF not at end of " + isifile)
88 | except UnicodeDecodeError:
89 | try:
90 | error = BadWOSFile("'{}' has a unicode issue on line: {}.".format(isifile, f.__next__()[0]))
91 | except:
92 | #Fallback needed incase f.__next__() causes issues
93 | error = BadWOSFile("'{}' has a unicode issue. Probably when being opened or possibly on the first line".format(isifile))
94 | except StopIteration:
95 | error = BadWOSFile("The file '{}' ends before EF was found".format(isifile))
96 | except KeyboardInterrupt as e:
97 | error = e
98 | finally:
99 | if isinstance(error, KeyboardInterrupt):
100 | raise error
101 | return plst, error
102 |
--------------------------------------------------------------------------------
/notebooks/Lesson-2-Reading-Files/Reading-Files.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Reading Files"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "First we need to import _metaknowledge_ like we saw in lesson 1.\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {
21 | "collapsed": false,
22 | "jupyter": {
23 | "outputs_hidden": false
24 | }
25 | },
26 | "outputs": [],
27 | "source": [
28 | "import metaknowledge as mk"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "we only need _metaknowledge_ for now so no need to import everything"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "The files from the Web of Science (WOS) can be loaded into a [`RecordCollections`](http://networkslab.org/metaknowledge/docs/RecordCollection#RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string.\n"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {
49 | "collapsed": false,
50 | "jupyter": {
51 | "outputs_hidden": false
52 | }
53 | },
54 | "outputs": [],
55 | "source": [
56 | "RC = mk.RecordCollection(\"savedrecs.txt\")\n",
57 | "repr(RC)"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "You can also read a whole directory, in this case it is reading the current working directory\n"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false,
72 | "jupyter": {
73 | "outputs_hidden": false
74 | }
75 | },
76 | "outputs": [],
77 | "source": [
78 | "RC = mk.RecordCollection(\".\")\n",
79 | "repr(RC)"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "_metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument.\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {
93 | "collapsed": false,
94 | "jupyter": {
95 | "outputs_hidden": false
96 | }
97 | },
98 | "outputs": [],
99 | "source": [
100 | "RC = mk.RecordCollection(\".\", extension = \"txt\")\n",
101 | "repr(RC)"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "Now you have a `RecordCollection` composed of all the WOS records in the selected file(s).\n"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "collapsed": false,
116 | "jupyter": {
117 | "outputs_hidden": false
118 | }
119 | },
120 | "outputs": [],
121 | "source": [
122 | "print(\"RC is a \" + str(RC))"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`."
130 | ]
131 | }
132 | ],
133 | "metadata": {
134 | "kernelspec": {
135 | "display_name": "Python 3",
136 | "language": "python",
137 | "name": "python3"
138 | },
139 | "language_info": {
140 | "codemirror_mode": {
141 | "name": "ipython",
142 | "version": 3
143 | },
144 | "file_extension": ".py",
145 | "mimetype": "text/x-python",
146 | "name": "python",
147 | "nbconvert_exporter": "python",
148 | "pygments_lexer": "ipython3",
149 | "version": "3.7.4"
150 | }
151 | },
152 | "nbformat": 4,
153 | "nbformat_minor": 4
154 | }
155 |
--------------------------------------------------------------------------------
/metaknowledge/grants/nsercGrant.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os.path
3 |
4 | from .baseGrant import Grant, csvAndLinesReader
5 | from ..mkExceptions import BadGrant
6 |
7 | class NSERCGrant(Grant):
8 | def __init__(self, original, grantdDict, sFile, sLine):
9 | bad = False
10 | error = None
11 | if grantdDict.get('Cle', '') == '':
12 | bad = True
13 | error = BadGrant("Missing 'CLE'")
14 | idValue = "NSERC:{}".format(hash(original))
15 | else:
16 | idValue = "NSERC:{}".format(grantdDict.get('Cle', ''))
17 | Grant.__init__(self, original, grantdDict, idValue, bad, error, sFile = sFile, sLine = sLine)
18 |
19 | def update(self, other):
20 | for field, value in other._fieldDict.items():
21 | if value == '':
22 | continue
23 | elif self._fieldDict.get(field, '') == '':
24 | self._fieldDict[field] = value
25 | else:
26 | self._fieldDict[field] += "; {}".format(value)
27 |
28 | def getInvestigators(self, tags = None, seperator = ";", _getTag = False):
29 | """Returns a list of the names of investigators. The optional arguments are ignored.
30 |
31 | # Returns
32 |
33 | `list [str]`
34 |
35 | > A list of all the found investigator's names
36 | """
37 | if tags is None:
38 | tags = []
39 | elif isinstance(tags, str):
40 | tags = [tags]
41 | for k in self.keys():
42 | if 'name-' in k.lower() and k not in tags:
43 | tags.append(k)
44 | return super().getInvestigators(tags = tags, seperator = seperator, _getTag = _getTag)
45 |
46 | def getInstitutions(self, tags = None, seperator = ";", _getTag = False):
47 | """Returns a list with the names of the institution. The optional arguments are ignored
48 |
49 | # Returns
50 |
51 | `list [str]`
52 |
53 | > A list with 1 entry the name of the institution
54 | """
55 | if tags is None:
56 | tags = []
57 | elif isinstance(tags, str):
58 | tags = [tags]
59 | for k in self.keys():
60 | if 'institution' in k.lower() and k not in tags:
61 | tags.append(k)
62 | return super().getInvestigators(tags = tags, seperator = seperator, _getTag = _getTag)
63 |
64 | def isNSERCfile(fileName, useFileName = True):
65 | if useFileName and not os.path.basename(fileName).startswith('NSERC_'):
66 | return False
67 | try:
68 | with open(fileName, 'r', encoding = 'latin-1') as openfile:
69 | reader = csv.DictReader(openfile, fieldnames=None, dialect='excel')
70 | length = 0
71 | for row in reader:
72 | length += 1
73 | if set(row.keys()) != set(reader.fieldnames):
74 | return False
75 | if length < 1:
76 | return False
77 | except (StopIteration, UnicodeDecodeError, KeyError):
78 | return False
79 | else:
80 | return True
81 |
82 | def parserNSERCfile(fileName):
83 | grantSet = set()
84 | error = None
85 | try:
86 | with open(fileName, 'r', encoding = 'latin-1') as openfile:
87 | f = enumerate(openfile, start = 1)
88 | reader = csvAndLinesReader(f, fieldnames = None, dialect = 'excel')
89 | for lineNum, lineString, lineDict in reader:
90 | G = NSERCGrant(lineString, lineDict, sFile = fileName, sLine = lineNum)
91 | if G in grantSet:
92 | for Gin in grantSet:
93 | if Gin == G:
94 | Gin.update(G)
95 | break
96 | else:
97 | grantSet.add(G)
98 | except Exception:
99 | if error is None:
100 | error = BadGrant("The file '{}' is having decoding issues. It may have been modifed since it was downloaded or not be a NSERC grant file.".format(fileName))
101 | except KeyboardInterrupt as e:
102 | error = e
103 | finally:
104 | if isinstance(error, KeyboardInterrupt):
105 | raise error
106 | return grantSet, error
107 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500216.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Collaborative Research: Algebra and Algorithms, Structure and Complexity Theory
6 | 09/15/2015
7 | 08/31/2018
8 | 67889
9 |
10 | Standard Grant
11 |
12 |
13 | 03040000
14 |
15 | Direct For Mathematical & Physical Scien
16 |
17 |
18 | Division Of Mathematical Sciences
19 |
20 |
21 |
22 | Tomek Bartoszynski
23 |
24 | This project is a collaboration between mathematical researchers at five universities, including young mathematicians at the early stages of their careers, who are joining forces to tackle fundamental problems at the confluence of mathematical logic, algebra, and computer science. The overall goal is to deepen understanding about how to recognize the complexity of certain types of computational problems. The project focuses on a suite of mathematical problems whose solutions will yield new information about the complexity of Constraint Satisfaction Problems. These problems (CSP's) include scheduling problems, resource allocation problems, and problems reducible to solving systems of linear equations. CSP's are theoretically solvable, but some are not solvable efficiently. The research will be aimed at identifying a clear boundary between the tractable and intractable cases, and at providing efficient algorithms for solutions in the tractable cases. Many fundamental problems in mathematics and computer science can be formulated as CSP's, and progress here would have both practical and theoretical significance. A second component of the project investigates classical computational problems in algebra in order to determine whether they are algorithmically solvable. A third component of the project is the further development of the software UACalc, which is a proof assistant developed to handle computations involving algebraic structures.<br/><br/>The researchers shall work to decide the truth of the CSP Dichotomy Conjecture of Feder and Vardi, which states that every Constraint Satisfaction Problem with a finite template is solvable in polynomial time or is NP complete. They will further develop the algebraic approach to CSP's by refining knowledge about relations compatible with weak idempotent Maltsev conditions and about algebras with finitely related clones. A second goal of the project concerns the computable recognition of properties of finite algebras connected with the varieties they generate, such as whether a finite algebra with a finite residual bound is finitely axiomatizable, or whether a finite algebra can serve as the algebra of character values for a natural duality. One of the more tangible accomplishments of this project will be a broadening and strengthening of the applicability of the UACalc software. The agenda for this part of the project includes parallelizing the important subroutines, building in conjecture-testing and search features, adding further algorithms, and further developing the community of users and contributors.
25 | 09/11/2015
26 | 09/11/2015
27 |
28 | 1500216
29 |
30 | George
31 | McNulty
32 | mcnulty@math.sc.edu
33 | 09/11/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | University of South Carolina at Columbia
39 | COLUMBIA
40 | 292080001
41 | 8037777093
42 | Sponsored Awards Management
43 | United States
44 | South Carolina
45 | SC
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/notebooks/Lesson-1-Getting-Started/Getting-Started.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# About Jupyter Notebooks"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This document was made from a [jupyter](https://jupyter.org) notebook and can show and run python code. The document is broken up into what are called cells, each cell is either code, output, or markdown (text). For example this cell is markdown, which means it is plain text with a couple small formatting things, like the link in the first sentence. You can change the cell type using the dropdown menu at the top of the page.\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {
21 | "collapsed": false,
22 | "jupyter": {
23 | "outputs_hidden": false
24 | }
25 | },
26 | "outputs": [],
27 | "source": [
28 | "#This cell is python\n",
29 | "#The cell below it is output\n",
30 | "print(\"This is an output cell\")"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "The code cells contain python code that you can edit and run your self. Try changing the one above."
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "# Importing"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "First you need to import the _metaknowledge_ package\n"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false,
59 | "jupyter": {
60 | "outputs_hidden": false
61 | }
62 | },
63 | "outputs": [],
64 | "source": [
65 | "import metaknowledge as mk"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "And you will often need the [_networkx_](https://networkx.github.io/documentation/networkx-1.9.1/) package\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "collapsed": false,
80 | "jupyter": {
81 | "outputs_hidden": false
82 | }
83 | },
84 | "outputs": [],
85 | "source": [
86 | "import networkx as nx"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "And [_matplotlib_](http://matplotlib.org/) to display the graphs and to make them look nice when displayed\n"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "collapsed": false,
101 | "jupyter": {
102 | "outputs_hidden": false
103 | }
104 | },
105 | "outputs": [],
106 | "source": [
107 | "import matplotlib.pyplot as plt\n",
108 | "%matplotlib inline"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "_metaknowledge_ also has a _matplotlib_ based graph [visualizer](http://networkslab.org/metaknowledge/docs/visual#visual) that will be used sometimes\n"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "collapsed": false,
123 | "jupyter": {
124 | "outputs_hidden": false
125 | }
126 | },
127 | "outputs": [],
128 | "source": [
129 | "import metaknowledge.contour.plotting as mkv"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "These lines of code will be at the top of all the other lessons as they are what let us use _metaknowledge_."
137 | ]
138 | }
139 | ],
140 | "metadata": {
141 | "kernelspec": {
142 | "display_name": "Python 3",
143 | "language": "python",
144 | "name": "python3"
145 | },
146 | "language_info": {
147 | "codemirror_mode": {
148 | "name": "ipython",
149 | "version": 3
150 | },
151 | "file_extension": ".py",
152 | "mimetype": "text/x-python",
153 | "name": "python",
154 | "nbconvert_exporter": "python",
155 | "pygments_lexer": "ipython3",
156 | "version": "3.7.4"
157 | }
158 | },
159 | "nbformat": 4,
160 | "nbformat_minor": 4
161 | }
162 |
--------------------------------------------------------------------------------
/metaknowledge/tests/test_graphhelpers.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import unittest
3 | import metaknowledge
4 | import os
5 | import io
6 | import sys
7 | from metaknowledge.progressBar import _ProgressBar
8 |
9 | fileShortName = 'testNetworks'
10 | fileEName = 'testNetworks_edgeList.tst'
11 | fileNName = 'testNetworks_nodeAttributes.tst'
12 | filesuffix = 'tst'
13 |
14 | class TestHelpers(unittest.TestCase):
15 | @classmethod
16 | def setUpClass(cls):
17 | cls.RCmain = metaknowledge.RecordCollection("metaknowledge/tests/testFile.isi")
18 | cls.Gmain = cls.RCmain.networkCoCitation()
19 |
20 | def setUp(self):
21 | metaknowledge.VERBOSE_MODE = False
22 | self.RC = self.RCmain.copy()
23 | self.G = self.Gmain.copy()
24 |
25 | def test_graphwrite(self):
26 | metaknowledge.writeGraph(self.G, fileShortName, suffix = filesuffix)
27 | tmpG = metaknowledge.readGraph(fileEName, fileNName)
28 | self.assertEqual(len(tmpG.edges()), len(self.G.edges()))
29 | self.assertEqual(len(tmpG.nodes()), len(self.G.nodes()))
30 | os.remove(fileEName)
31 | os.remove(fileNName)
32 |
33 | def test_tnetWriter(self):
34 | fName = fileShortName + "_tnet.csv"
35 | G = self.RC.networkTwoMode('AF', 'WC', edgeAttribute = 'PY')
36 | metaknowledge.writeTnetFile(G, fName, 'type', weighted = True, timeString = 'key')
37 | self.assertAlmostEqual(os.path.getsize(fName), 1015, delta=100)
38 | os.remove(fName)
39 | metaknowledge.writeTnetFile(G, fName, 'type')
40 | self.assertAlmostEqual(os.path.getsize(fName), 378, delta=50)
41 | os.remove(fName)
42 |
43 | def test_progress(self):
44 | metaknowledge.VERBOSE_MODE = True
45 | tmpIO = io.StringIO()
46 | P = _ProgressBar(0, "testing", output = tmpIO, dummy = True)
47 | metaknowledge.writeEdgeList(self.G, fileEName, _progBar = P, )
48 | tmpIO.seek(0)
49 | s = ''.join(tmpIO.readlines())
50 | self.assertEqual(len(s), 0)
51 | P = _ProgressBar(0, "testing", output = tmpIO)
52 | metaknowledge.writeEdgeList(self.G, fileEName, _progBar = P)
53 | tmpIO.seek(0)
54 | os.remove(fileEName)
55 | s = ''.join(tmpIO.readlines())
56 | self.assertEqual(s[-14], '[')
57 | self.assertEqual(s[-1], '%')
58 | P.finish("done test")
59 | tmpIO.seek(0)
60 | s = ''.join(tmpIO.readlines())
61 | self.assertEqual(s[-81:-3], 'done test 0.')
62 | metaknowledge.VERBOSE_MODE = False
63 |
64 | def test_dropEdges(self):
65 | metaknowledge.dropEdges(self.G, minWeight = 1, maxWeight = 3, dropSelfLoops = True)
66 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 493 nodes, 12711 edges, 0 isolates, 0 self loops, a density of 0.104809 and a transitivity of 0.588968")
67 | self.assertTrue(self.G.edges['Imbert C, 1975, NOUV REV OPT', 'Fainman Y, 1984, APPL OPTICS']['weight'] == 1)
68 |
69 | def test_dropNodeByCount(self):
70 | metaknowledge.dropNodesByCount(self.G, minCount = 2, maxCount = 5)
71 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 106 nodes, 1205 edges, 0 isolates, 17 self loops, a density of 0.218149 and a transitivity of 0.751036")
72 | self.assertTrue(self.G.node['Shih H, 1971, PHYS REV A']['count'] == 2)
73 |
74 | def test_dropNodesByDegree(self):
75 | metaknowledge.dropNodesByDegree(self.G, minDegree = 20, maxDegree = 100)
76 | self.assertEqual(metaknowledge.graphStats(self.G, sentenceString = True), "The graph has 385 nodes, 5923 edges, 0 isolates, 11 self loops, a density of 0.0802083 and a transitivity of 0.954487")
77 | self.assertTrue(self.G.edges['Mazur P, 1953, MEM ACAD ROY BELG', 'Livens Gh, 1948, P CAMB PHILOS SOC']['weight'] == 1)
78 |
79 | def test_mergeGraphs(self):
80 | RC1 = self.RC.yearSplit(0,1978)
81 | RC2 = self.RC.yearSplit(1979,10000)
82 | G1 = RC1.networkCoCitation()
83 | G2 = RC2.networkCoCitation()
84 | metaknowledge.mergeGraphs(G1,G2)
85 | for node, attr in G1.nodes(data = True):
86 | self.assertEqual(self.G.node[node]['count'], attr['count'])
87 | for node1, node2, attr in G1.edges(data = True):
88 | self.assertEqual(self.G.edges[node1, node2]['weight'], attr['weight'])
89 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500186.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | DDRIG: Expanding Phonological Typology through Kaco' Sound Patterns
6 | 06/01/2015
7 | 11/30/2017
8 | 14500
9 |
10 | Standard Grant
11 |
12 |
13 | 04040000
14 |
15 | Direct For Social, Behav & Economic Scie
16 |
17 |
18 | Division Of Behavioral and Cognitive Sci
19 |
20 |
21 |
22 | Colleen Fitzgerald
23 |
24 | Emily Olsen, under the direction of Juliette Blevins of the City University of New York, will conduct a study of the sound system of Kaco', an undescribed minority language spoken in rural Ratanakiri Province, Cambodia. Kaco' is an Austro-Asiatic language of the Mon Khmer family and, like other languages of this family, Kaco' is phonologically complex and unusual. Olsen's study of Kaco' will therefore advance knowledge of what types of phonological systems are possible for human languages. In addition, because sounds and sound processes provide important clues to language relatedness, Olsen's description of Kaco' sound patterns will facilitate hypotheses on the features that are unique to or characteristic of the Austroasiatic language family and will help disambiguate historical relationships between languages and speaker groups of the region.<br/><br/>The primary research questions involve the description of the sound patterns of the Kaco' language including contrastive vowels and consonants and their phonetic variants, the structure of Kaco' syllables; laryngeal contrasts in Kaco'syllables and their phonetic properties and phonological domains. Olsen's study will focus on documenting complex syllable structure and the role between syllables within a word. She will document phonation (vocal fold vibration) and tonogenesis (e.g., the birth of tone through consonant weakening). <br/><br/>In order to pursue these research questions, Olsen will collect speech samples from Kaco' speakers from several villages. Her data samples will include recordings of freeform narratives, wordlists, songs, and community folklore. The resulting corpus will be the first cultural and linguistic resource of its kind for use by Kaco' people. This resource will enable development of literacy materials for the community, including a Kaco' orthography, children's books and recordings, and a dictionary.<br/><br/>Data from this project will be archived at the Lund University's Repository &<br/>Workspace for Austroasiatic Intangible Heritage.
25 | 06/01/2015
26 | 06/01/2015
27 |
28 | 1500186
29 |
30 | Juliette
31 | Blevins
32 | jblevins@gc.cuny.edu
33 | 06/01/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | Emily
39 | Olsen
40 | elong1@gc.cuny.edu
41 | 06/01/2015
42 |
43 | Co-Principal Investigator
44 |
45 |
46 | CUNY Graduate School University Center
47 | New York
48 | 100164309
49 | 2128177523
50 | 365 Fifth Avenue
51 | United States
52 | New York
53 | NY
54 |
55 |
56 | 7719
57 | DEL
58 |
59 |
60 | 7719
61 | DEL
62 |
63 |
64 | 9179
65 | GRADUATE INVOLVEMENT
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/metaknowledge/tests/nsfTestFiles/1500194.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | PFI:AIR - TT: Pulsed Shaped Magnetic Fields to Focus Therapy to Deep Tissue Targets
6 | 05/15/2015
7 | 10/31/2016
8 | 200000
9 |
10 | Standard Grant
11 |
12 |
13 | 07070000
14 |
15 | Directorate For Engineering
16 |
17 |
18 | Div Of Industrial Innovation & Partnersh
19 |
20 |
21 |
22 | Barbara H. Kenny
23 |
24 | This PFI: AIR Technology Translation (TT) project aims to enable a safe and effective magnetic focusing of magnetic particle therapies to address inoperable deep tissue tumors. The proposed technique of pulsed magnetic focusing will deliver nanotherapeutics to deep targets in order to direct chemotherapy to where it needs to go in the body. If successful, this technique would enable a technology that could improve treatment for a wide range of diseases. The project will result in a prototype device that will dynamically focus nanorods to deep targets in preclinical studies. In this research, biocompatible nanorods are first aligned in one direction by a fast magnetic pulse, and then before they can turn around a second shaped fast magnetic pulse applies forces on the rods that serve to focus them to a central target. Repeat magnetic pulsing brings all the rods to a central target between the magnets. These features provide the key advantage that therapy can now be focused to a deep target between magnets, for example to treat inoperable deep tissue tumors. Focusing of therapy to deep tissue targets has been a key goal in magnetic drug targeting, and prior efforts in this field have not yet been able to achieve this goal. <br/><br/>This project addresses the following technology gap(s) as it translates from research discovery toward commercial application. Dynamic magnetic focusing of nanorods to a target between magnets was shown in benchtop experiments. In this NSF AIR TT research, the technology will be tested in tissue samples, scaled up to an in-vivo system, and its safety and utility shall be optimized and verified. In addition, personnel involved in this project will receive innovation, entrepreneurship, and technology translation experiences through developing and helping commercialize this technology.<br/><br/>The project engages Weinberg Medical Physics who will act as an industry liaison and supply the effort with equipment, expertise, and with connections to strategic partners and future investors in this technology translation effort from research discovery toward commercial reality.
25 | 05/06/2015
26 | 05/06/2015
27 |
28 | 1500194
29 |
30 | Benjamin
31 | Shapiro
32 | benshap@eng.umd.edu
33 | 05/06/2015
34 |
35 | Principal Investigator
36 |
37 |
38 | Irving
39 | Weinberg
40 | inweinberg@gmail.com
41 | 05/06/2015
42 |
43 | Co-Principal Investigator
44 |
45 |
46 | University of Maryland College Park
47 | COLLEGE PARK
48 | 207425141
49 | 3014056269
50 | 3112 LEE BLDG 7809 Regents Drive
51 | United States
52 | Maryland
53 | MD
54 |
55 |
56 | 8019
57 | Accelerating Innovation Rsrch
58 |
59 |
60 | 8019
61 | Accelerating Innovation Rsrch
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/metaknowledge/__init__.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2 | """_metaknowledge_ is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data.
3 |
4 | # Example
5 |
6 | To load the data from files and make a network:
7 |
8 | >>> import metaknowledge as mk
9 | >>> RC = mk.RecordCollection("records/")
10 | >>> print(RC)
11 | Collection of 33 records
12 | >>> G = RC.coCiteNetwork(nodeType = 'journal')
13 | Done making a co-citation network of files-from-records 1.1s
14 | >>> print(len(G.nodes()))
15 | 223
16 | >>> mk.writeGraph(G, "Cocitation-Network-of-Journals")
17 |
18 | There is also a simple command line program called `metaknowledge` that comes with the package. It allows for creating networks without any need to know Python. More information about it can be found at [networkslab.org/metaknowledge/cli]({{ site.baseurl }}/cli)
19 |
20 | # Overview
21 |
22 | This package can read the files downloaded from the Thomson Reuters' [Web of Science](https://webofknowledge.com) (_WOS_), Elsevier's [Scopus](https://www.scopus.com/), [ProQuest](www.proquest.com/) and Medline files from [PubMed](www.ncbi.nlm.nih.gov/pubmed). These files contain entries on the metadata of scientific records, such as authors, title, and citations. _metaknowledge_ can also read grants from various organizations including _NSF_ and _NSERC_ which are handled similarly to records.
23 |
24 | The [metaknowledge.RecordCollection](./documentation/classes/RecordCollection.html#recordcollection-collectionwithids) class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the [metaknowledge.Record](./documentation/classes/Record.html#record-mapping-hashable) class that contains the results of the parsing of the record.
25 |
26 | The files read by _metaknowledge_ are a databases containing a series of tags (implicitly or explicitly), e.g. `'TI'` is the title for WOS. Each tag has one or more values and metaknowledge can read them and extract useful information. As the tags differ between providers a small set of values can be accessed by special tags, the tags are listed in `commonRecordFields`. These special tags can act on the whole `Record` and as such may contain information provided by any number of other tags.
27 |
28 | Citations are handled by a special [Citation](./documentation/classes/Citation.html#module-metaknowledge.citation) class. This class can parse the citations given by _WOS_ and journals cited by _Scopus_ and allows for better comparisons when they are used in graphs.
29 |
30 | Note for those reading the docstrings metaknowledge's docs are written in markdown and are processed to produce the documentation found at [metaknowledge.readthedocs.io](https://metaknowledge.readthedocs.io/en/latest/), but you should have no problem reading them from the help function.
31 | """
32 |
33 | from .constants import VERBOSE_MODE, __version__, commonRecordFields, FAST_CITES
34 | from .mkExceptions import BadCitation, BadGrant, BadInputFile, BadProQuestFile, BadProQuestRecord, BadPubmedFile, BadPubmedRecord, BadRecord, BadWOSFile, BadWOSRecord, CollectionTypeError, GrantCollectionException, RCTypeError, RCValueError, RecordsNotCompatible, UnknownFile, cacheError, mkException, TagError, BadScopusRecord
35 |
36 | from .graphHelpers import writeEdgeList, writeNodeAttributeFile, writeGraph, readGraph, dropEdges, dropNodesByDegree, dropNodesByCount, mergeGraphs, graphStats, writeTnetFile
37 | from .diffusion import diffusionGraph, diffusionCount, diffusionAddCountsFromSource
38 |
39 | from .citation import Citation, filterNonJournals
40 | from .mkCollection import Collection, CollectionWithIDs
41 | from .mkRecord import Record, ExtendedRecord
42 |
43 | from .grantCollection import GrantCollection
44 | from .grants import NSERCGrant, CIHRGrant, MedlineGrant, NSFGrant, Grant, FallbackGrant
45 |
46 | from .recordCollection import RecordCollection
47 | from .WOS import WOSRecord
48 | from .medline import MedlineRecord
49 | from .proquest import ProQuestRecord
50 | from .scopus import ScopusRecord
51 |
52 | from .journalAbbreviations.backend import updatej9DB
53 | from .genders.nameGender import downloadData
54 |
55 | def downloadExtras():
56 | """Downloads all the external files used by metaknowledge. This will overwrite exiting files
57 | """
58 | print("Downloading journal abbreviations data")
59 | updatej9DB()
60 | print("Downloading gender name data")
61 | downloadData()
62 |
--------------------------------------------------------------------------------
/metaknowledge/tests/OnePaper2.isi:
--------------------------------------------------------------------------------
1 | FN Thomson Reuters Web of Science™
2 | VR 1.0
3 | PT J
4 | AU Hu, SZ
5 | Li, SF
6 | Fu, XW
7 | Li, YJ
8 | Wang, TD
9 | AF Hu, S. Z.
10 | Li, S. F.
11 | Fu, X. W.
12 | Li, Y. J.
13 | Wang, T. D.
14 | TI Identification of Highly to Over Mature Gas Interval Using Reservoir
15 | Bitumen Features: Sichuan Basin of Southwest China
16 | SO PETROLEUM SCIENCE AND TECHNOLOGY
17 | LA English
18 | DT Article
19 | DE reservoir geochemistry; highly to over mature; reservoir bitumen; gas
20 | geochemistry; China; Sichuan Basin; gas interval
21 | ID FIELD
22 | AB The identification of oil and gas intervals has been mainly constrained by the interpretation on well logging data. However, this has met great difficulties with petroleum accumulation cases becoming more and more complex. The authors discuss an identification of highly to over mature gas interval using bitumen features, based on a case study in the Sichuan Basin of southwest China. The bitumen in the gas interval is characterized by relatively rich low-weight n-alkanes with a single pre-peak. In addition, pregnane, homopregnane, and tricyclic terpanes are enriched. Thus, the indicated maturity is highly to over mature. The late-arrived highly mature oils were cracked into gas, with little filling of bitumen in reservoir space. By contrast, the bitumen in the interval with little bearing of gas has opposite features. It is relatively enriched in high-weight n-alkanes and extremely low abundance of pregnane, homopregnane, and tricyclic terpanes. Thus the oils in such dry intervals are mostly low-mature early arrived oils, which tightly adsorbed on the carbonate grains. The pore space is almost fully filled by bitumen, being unfavorable for gas charge and preservation. As these features are all collected from routine analyses, the method can be applicable. The results can provide new fundamental information for regional oil testing and petroleum exploration, thereby reducing economic loss and exploration risk. In addition, the results also have wide implications for the other works with similar aims.
23 | C1 [Hu, S. Z.; Li, S. F.] China Univ Geosci, Minist Educ, Key Lab Tecton & Petr Resources, Wuhan 430074, Peoples R China.
24 | [Fu, X. W.; Li, Y. J.; Wang, T. D.] Southwest Petr Univ, Sch Resources & Environm, Chengdu, Peoples R China.
25 | RP Hu, SZ (reprint author), China Univ Geosci, Minist Educ, Key Lab Tecton & Petr Resources, Wuhan 430074, Peoples R China.
26 | EM hushzh@cug.edu.cn
27 | FU "973" project of China [2012CB214804]; National Natural Science
28 | Foundation of China [41273052, 40902037]
29 | FX This study was jointly funded by the "973" project of China (Grant No.
30 | 2012CB214804) and the National Natural Science Foundation of China
31 | (Grant Nos. 41273052 and 40902037).
32 | CR BASKIN DK, 1995, AAPG BULL, V79, P337
33 | Cai CF, 2010, ORG GEOCHEM, V41, P871, DOI 10.1016/j.orggeochem.2010.03.009
34 | 陈世加, 1998, 沉积学报, V16, P150
35 | Chen S. J., 2001, WELL LOGGING TECHNOL, V25, P136
36 | Chen S.J., 2001, NAT GAS IND, V21, P39
37 | Fang YX, 2011, J ASIAN EARTH SCI, V41, P147, DOI 10.1016/j.jseaes.2011.01.012
38 | Gao C. X., 2010, TUHA OIL GAS, V5, P326
39 | Hu AP, 2010, ORG GEOCHEM, V41, P924, DOI 10.1016/j.orggeochem.2010.01.001
40 | [胡守志 Hu Shouzhi], 2005, [石油实验地质, Petroleum Geology & Experiment], V27, P222
41 | Jones P. J., 2010, European Patent, Patent No. [EP1719875, 1719875]
42 | Kim D, 2010, AAPG BULL, V94, P1031, DOI 10.1306/12090909096
43 | Larijani G. R., 2010, 136318 SPE
44 | [李潮流 Li Chaoliu], 2004, [测井技术, Well Logging Technology], V28, P128
45 | Liu Dehan, 1994, PETROLEUM EXPLORATIO, V21, P113
46 | Liu SG, 2008, J CHINA UNIV GEOSCI, V19, P700
47 | LOMANDO AJ, 1992, AAPG BULL, V76, P1137
48 | Mathur N, 2001, AAPG BULL, V85, P309
49 | [沈慧萍 Shen Huiping], 2005, [天然气工业, Natural Gas Industry], V25, P47
50 | Simon R. K., 2010, GEOCHIM COSMOCHIM AC, V74, P5305
51 | Xiao D. S., 2010, FAULT BLOCK OIL GAS, V17, P509
52 | [张津海 Zhang Jinhai], 2011, [石油与天然气地质, Oil & Gas Geology], V32, P593
53 | [赵军 Zhao Jun], 2011, [石油与天然气地质, Oil & Gas Geology], V32, P245
54 | Zhao W. Z., 2010, PETROL SCI, V7, P289
55 | NR 23
56 | TC 0
57 | Z9 0
58 | PU TAYLOR & FRANCIS INC
59 | PI PHILADELPHIA
60 | PA 520 CHESTNUT STREET, STE 850, PHILADELPHIA, PA 19106 USA
61 | SN 1091-6466
62 | EI 1532-2459
63 | J9 PETROL SCI TECHNOL
64 | JI Pet. Sci. Technol.
65 | PD JUN 18
66 | PY 2014
67 | VL 32
68 | IS 12
69 | BP 1437
70 | EP 1442
71 | DI 10.1080/10916466.2012.664227
72 | PG 6
73 | WC Energy & Fuels; Engineering, Chemical; Engineering, Petroleum
74 | SC Energy & Fuels; Engineering
75 | GA AE4LR
76 | UT WOS:000333954400006
77 | ER
78 |
79 | EF
--------------------------------------------------------------------------------
/docs/examples/Reading-Files.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Reading Files"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "First we need to import _metaknowledge_ like we saw in lesson 1.\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": false
22 | },
23 | "outputs": [],
24 | "source": [
25 | "import metaknowledge as mk"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "we only need _metaknowledge_ for now so no need to import everything"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "The files from the Web of Science (WOS) can be loaded into a [RecordCollections](../documentation/classes/RecordCollection.html#metaknowledge.RecordCollection) by creating a `RecordCollection` with the path to the files given to it as a string.\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "collapsed": false
47 | },
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "'savedrecs'"
53 | ]
54 | },
55 | "execution_count": 2,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "RC = mk.RecordCollection(\"savedrecs.txt\")\n",
62 | "repr(RC)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "You can also read a whole directory, in this case it is reading the current working directory\n"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 3,
75 | "metadata": {
76 | "collapsed": false
77 | },
78 | "outputs": [
79 | {
80 | "data": {
81 | "text/plain": [
82 | "'files-from-.'"
83 | ]
84 | },
85 | "execution_count": 3,
86 | "metadata": {},
87 | "output_type": "execute_result"
88 | }
89 | ],
90 | "source": [
91 | "RC = mk.RecordCollection(\".\")\n",
92 | "repr(RC)"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "_metaknowledge_ can detect if a file is a valid WOS file or not and will read the entire directory and load only those that have the right header. You can also tell it to only read a certain type of file, by using the extension argument.\n"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 4,
105 | "metadata": {
106 | "collapsed": false
107 | },
108 | "outputs": [
109 | {
110 | "data": {
111 | "text/plain": [
112 | "'txt-files-from-.'"
113 | ]
114 | },
115 | "execution_count": 4,
116 | "metadata": {},
117 | "output_type": "execute_result"
118 | }
119 | ],
120 | "source": [
121 | "RC = mk.RecordCollection(\".\", extension = \"txt\")\n",
122 | "repr(RC)"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "Now you have a `RecordCollection` composed of all the WOS records in the selected file(s).\n"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 5,
135 | "metadata": {
136 | "collapsed": false
137 | },
138 | "outputs": [
139 | {
140 | "name": "stdout",
141 | "output_type": "stream",
142 | "text": [
143 | "RC is a Collection of 32 records\n"
144 | ]
145 | }
146 | ],
147 | "source": [
148 | "print(\"RC is a \" + str(RC))"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "You might have noticed I used two different ways to display the `RecordCollection`. `repr(RC)` will give you where _metaknowledge_ thinks the collection came from. While `str(RC)` will give you a nice string containing the number of `Records`."
156 | ]
157 | }
158 | ],
159 | "metadata": {
160 | "kernelspec": {
161 | "display_name": "Python 3",
162 | "language": "python",
163 | "name": "python3"
164 | },
165 | "language_info": {
166 | "codemirror_mode": {
167 | "name": "ipython",
168 | "version": 3
169 | },
170 | "file_extension": ".py",
171 | "mimetype": "text/x-python",
172 | "name": "python",
173 | "nbconvert_exporter": "python",
174 | "pygments_lexer": "ipython3",
175 | "version": "3.4.0"
176 | }
177 | },
178 | "nbformat": 4,
179 | "nbformat_minor": 0
180 | }
181 |
--------------------------------------------------------------------------------
/metaknowledge/bin/metaknowledgeMdToNb.py:
--------------------------------------------------------------------------------
1 | #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2 | import argparse
3 | import re
4 | import os.path
5 | import subprocess
6 |
7 | args = argparse.Namespace()
8 | codeRegex = re.compile(r'\[([0-9]*)\](.*)')
9 |
10 | startString = """{
11 | "cells": [
12 | """
13 |
14 | endString = """ ],
15 | "metadata": {
16 | "kernelspec": {
17 | "display_name": "Python 3",
18 | "language": "python",
19 | "name": "python3"
20 | },
21 | "language_info": {
22 | "codemirror_mode": {
23 | "name": "ipython",
24 | "version": 3
25 | },
26 | "file_extension": ".py",
27 | "mimetype": "text/x-python",
28 | "name": "python",
29 | "nbconvert_exporter": "python",
30 | "pygments_lexer": "ipython3",
31 | "version": "3.4.3"
32 | }
33 | },
34 | "nbformat": 4,
35 | "nbformat_minor": 0
36 | }"""
37 |
38 | def argumentParser():
39 | parser = argparse.ArgumentParser(description="A simple script to convert markdown (.md) files to iPython Notebooks (.ipynb)")
40 | #parser.add_argument("--output", "-o")
41 | #parser.add_argument("--execute", "-e", action = "store_true", default = False)
42 | parser.add_argument("files", type=argparse.FileType('r'), default = [], nargs = '*')
43 |
44 | return parser.parse_args()
45 |
46 | def convertString(file):
47 | currentExNum = float('inf')
48 | currentBufferType = ''
49 | stringBuffer = ''
50 | stringResult = []
51 | for line in file.readlines():
52 | code = re.match(codeRegex,line)
53 | if stringBuffer == '':
54 | if code:
55 | stringBuffer = code.group(2) + '\n'
56 | currentExNum = code.group(1)
57 | currentBufferType = 'py'
58 | else:
59 | stringBuffer = line
60 | currentBufferType = 'md'
61 | currentExNum = float('inf')
62 | else:
63 | if code:
64 | if currentBufferType == 'py' and currentExNum == code.group(1):
65 | stringBuffer += code.group(2) + '\n'
66 | elif currentBufferType == 'py':
67 | stringResult.append(writePYcell(stringBuffer, currentExNum))
68 | stringBuffer = code.group(2)+ '\n'
69 | currentExNum = code.group(1)
70 | currentBufferType = 'py'
71 | else:
72 | stringResult.append(writeMDcell(stringBuffer))
73 | stringBuffer = code.group(2)+ '\n'
74 | currentExNum = code.group(1)
75 | currentBufferType = 'py'
76 | else:
77 | if currentBufferType == 'md':
78 | if line == '\n' and stringBuffer[-2:] == '\n\n':
79 | stringResult.append(writeMDcell(stringBuffer[:-1]))
80 | stringBuffer = ''
81 | currentBufferType = ''
82 | else:
83 | stringBuffer += line
84 | else:
85 | stringResult.append(writePYcell(stringBuffer, currentExNum))
86 | stringBuffer = line
87 | currentBufferType = 'md'
88 | currentExNum = float('inf')
89 | if stringBuffer != '':
90 | if currentBufferType == 'md':
91 | stringResult.append(writeMDcell(stringBuffer))
92 | else:
93 | stringResult.append(writePYcell(stringBuffer, excount = currentExNum))
94 | return startString + ',\n '.join(stringResult) + endString
95 |
96 | def convert(file):
97 | nameCompts = os.path.splitext( os.path.expanduser(os.path.normpath(file.name)))
98 | fileName = nameCompts[0] + '.ipynb'
99 | outFile = open(fileName, 'w+')
100 | outFile.write(convertString(file))
101 | return fileName
102 |
103 |
104 | def stringPreprossesing(s):
105 | s = s.lstrip('\n')
106 | s = s.replace(r'"', r'\"')
107 | s = s.replace('\n', '\\n",\n "')[:-11]
108 | return s
109 |
110 |
111 | def writeMDcell(s):
112 | return """{{
113 | "cell_type": "markdown",
114 | "metadata": {{}},
115 | "source": [
116 | "{0}"
117 | ]
118 | }}""".format(stringPreprossesing(s))
119 |
120 | def writePYcell(s, excount = ''):
121 | return """{{
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {{
125 | "collapsed": false
126 | }},
127 | "outputs": [],
128 | "source": [
129 | "{}"
130 | ]
131 | }}""".format(stringPreprossesing(s))
132 |
133 | def mkMdToNb():
134 | args = argumentParser()
135 | for f in args.files:
136 | fname = convert(f)
137 | if __name__ =='__main__':
138 | mkMdToNb()
139 |
--------------------------------------------------------------------------------