├── .gitignore
├── LICENSE
├── Makefile
├── consolidate.py
├── get_invpat.py
├── integrate.py
├── integration
├── clean_integration.sh
├── consolidate_integration.sh
├── parse_integration.sh
└── run_integration_tests.sh
├── lib
├── .gitignore
├── LICENSE_WC.txt
├── __init__.py
├── alchemy
│ ├── README.md
│ ├── __init__.py
│ ├── config.ini
│ ├── match.py
│ ├── schema.py
│ └── schema_func.py
├── argconfig_parse.py
├── assignee_disambiguation.py
├── config_parser.py
├── geoalchemy.py
├── geoalchemy_util.py
├── handlers
│ ├── README.md
│ ├── __init__.py
│ ├── application_handler_v41.py
│ ├── application_handler_v42.py
│ ├── application_handler_v43.py
│ ├── grant_handler_v42.py
│ ├── grant_handler_v44.py
│ ├── handler.py
│ ├── xml_driver.py
│ └── xml_util.py
├── lawyer_disambiguation.py
├── manual_replacement_library.txt
├── nber_substitutions.json
├── state_abbreviations.txt
├── tasks.py
└── util
│ ├── __init__.py
│ ├── csv_reader.py
│ └── getpatent.py
├── nber_substitutions.json
├── notebooks
├── MySQL.ipynb
├── Patent Database Stat Report.ipynb
├── README.md
├── buildpdf
└── latex_nocode.tplx
├── parse.py
├── process.cfg
├── readme.md
├── requirements.txt
├── run_clean.sh
├── run_consolidation.sh
├── starcluster
├── README.md
├── built_tsv.py
├── config.ini
├── fetch_xml.py
├── load.sql
├── load_drop.sql
├── load_pre.sh
└── urls.pickle
├── start.py
├── test
├── .gitignore
├── Makefile
├── __init__.py
├── colortest.py
├── colortest.rb
├── config.ini
├── fixtures
│ ├── GNS
│ │ └── geonames_10.txt
│ ├── SAS
│ │ └── national_file_head_20120204.txt
│ ├── alchemy
│ │ ├── alchemy.raw
│ │ ├── ipg120103.xml
│ │ └── ipg130416.xml
│ ├── csv
│ │ ├── .gitignore
│ │ ├── gen_sample.csv
│ │ ├── test.v0.csv
│ │ ├── test.v10.csv
│ │ ├── test.v2.csv
│ │ ├── test.v3.csv
│ │ ├── test.v4.csv
│ │ ├── test.v5.csv
│ │ ├── test.v6.csv
│ │ ├── test.v6_alex.csv
│ │ ├── test.v7.csv
│ │ ├── test.v9.csv
│ │ ├── test.v9_UC.csv
│ │ └── typos.csv
│ ├── goldstandard
│ │ ├── .gitignore
│ │ ├── benchmark.csv
│ │ ├── benchmark.sh
│ │ ├── benchmark_confirm.py
│ │ ├── berkeleyinventors.csv
│ │ ├── gs2011.sh
│ │ ├── jamesrhunt.csv
│ │ └── readme.md
│ ├── ipgxml
│ │ ├── ipg050104.small.xml
│ │ ├── ipg060103.small.xml
│ │ ├── ipg070102.small.xml
│ │ ├── ipg080101.small.xml
│ │ ├── ipg090106.small.xml
│ │ ├── ipg100105.small.xml
│ │ ├── ipg110104.small.xml
│ │ ├── ipg120103.small.xml
│ │ └── ipg130416.small.xml
│ ├── sqlite3
│ │ └── combined.sqlite3
│ ├── text
│ │ └── accented_characters.txt
│ ├── unittest
│ │ ├── .gitignore
│ │ └── patent_two_parsed_general.xml
│ └── xml
│ │ ├── .gitignore
│ │ ├── 2009_1.xml
│ │ ├── 2009_10.xml
│ │ ├── 2009_2.xml
│ │ ├── 2009_3.xml
│ │ ├── 2009_4.xml
│ │ ├── 2009_5.xml
│ │ ├── 2009_6.xml
│ │ ├── 2009_7.xml
│ │ ├── 2009_8.xml
│ │ ├── 2009_9.xml
│ │ ├── 2010_1.xml
│ │ ├── 2010_10.xml
│ │ ├── 2010_2.xml
│ │ ├── 2010_3.xml
│ │ ├── 2010_4.xml
│ │ ├── 2010_5.xml
│ │ ├── 2010_6.xml
│ │ ├── 2010_7.xml
│ │ ├── 2010_8.xml
│ │ ├── 2010_9.xml
│ │ ├── 2011_1.xml
│ │ ├── 2011_10.xml
│ │ ├── 2011_2.xml
│ │ ├── 2011_3.xml
│ │ ├── 2011_4.xml
│ │ ├── 2011_5.xml
│ │ ├── 2011_6.xml
│ │ ├── 2011_7.xml
│ │ ├── 2011_8.xml
│ │ ├── 2011_9.xml
│ │ ├── 2012_1.xml
│ │ ├── 2012_10.xml
│ │ ├── 2012_2.xml
│ │ ├── 2012_3.xml
│ │ ├── 2012_4.xml
│ │ ├── 2012_5.xml
│ │ ├── 2012_6.xml
│ │ ├── 2012_7.xml
│ │ ├── 2012_8.xml
│ │ ├── 2012_9.xml
│ │ ├── basic.xml
│ │ ├── ipa061228.one.xml
│ │ ├── ipa130117.one.xml
│ │ ├── ipg100824-hyphenated.xml
│ │ ├── ipg120327.18.xml
│ │ ├── ipg120327.196.xml
│ │ ├── ipg120327.one.xml
│ │ ├── ipg120327.two.xml
│ │ ├── pa040101.two.xml
│ │ ├── patent_eight_parsed_.xml
│ │ ├── patent_eight_unparsed_dna.xml
│ │ ├── patent_five_parsed_.xml
│ │ ├── patent_five_unparsed_dna.xml
│ │ ├── patent_four_parsed_.xml
│ │ ├── patent_four_unparsed_.xml
│ │ ├── patent_nine_parsed_.xml
│ │ ├── patent_nine_unparsed_dna.xml
│ │ ├── patent_one_parsed_.xml
│ │ ├── patent_one_unparsed_dna.xml
│ │ ├── patent_seven_parsed_.xml
│ │ ├── patent_seven_unparsed_dna.xml
│ │ ├── patent_six_parsed_.xml
│ │ ├── patent_six_unparsed_.xml
│ │ ├── patent_ten_parsed_.xml
│ │ ├── patent_ten_unparsed_dna.xml
│ │ ├── patent_three_parsed_.xml
│ │ ├── patent_three_unparsed_.xml
│ │ └── patent_two_unparsed_.xml
├── integration
│ ├── .gitignore
│ ├── clean
│ │ ├── ipg120327.18
│ │ │ ├── assignee.csv
│ │ │ ├── lawyer.csv
│ │ │ └── location.csv
│ │ ├── ipg120327.one
│ │ │ ├── assignee.csv
│ │ │ ├── lawyer.csv
│ │ │ └── location.csv
│ │ └── ipg120327.two
│ │ │ ├── assignee.csv
│ │ │ ├── lawyer.csv
│ │ │ └── location.csv
│ ├── consolidate
│ │ ├── ipg120327.18
│ │ │ └── disambiguator.csv
│ │ └── ipg120327.two
│ │ │ └── disambiguator.csv
│ ├── parse
│ │ ├── ipa061228.one
│ │ │ ├── application.csv
│ │ │ ├── claim.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ │ ├── ipa130117.one
│ │ │ ├── application.csv
│ │ │ ├── claim.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ │ ├── ipg120327.18
│ │ │ ├── application.csv
│ │ │ ├── citation.csv
│ │ │ ├── claim.csv
│ │ │ ├── foreigncitation.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── otherreference.csv
│ │ │ ├── patent.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlawyer.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── usapplicationcitation.csv
│ │ │ ├── uspatentcitation.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ │ ├── ipg120327.one
│ │ │ ├── application.csv
│ │ │ ├── citation.csv
│ │ │ ├── claim.csv
│ │ │ ├── foreigncitation.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── otherreference.csv
│ │ │ ├── patent.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlawyer.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── usapplicationcitation.csv
│ │ │ ├── uspatentcitation.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ │ ├── ipg120327.two
│ │ │ ├── application.csv
│ │ │ ├── citation.csv
│ │ │ ├── claim.csv
│ │ │ ├── foreigncitation.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── otherreference.csv
│ │ │ ├── patent.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlawyer.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── usapplicationcitation.csv
│ │ │ ├── uspatentcitation.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ │ └── pa040101.two
│ │ │ ├── application.csv
│ │ │ ├── claim.csv
│ │ │ ├── ipcr.csv
│ │ │ ├── mainclass.csv
│ │ │ ├── rawassignee.csv
│ │ │ ├── rawinventor.csv
│ │ │ ├── rawlocation.csv
│ │ │ ├── subclass.csv
│ │ │ ├── uspc.csv
│ │ │ └── usreldoc.csv
│ └── readme.md
├── make_test_databases.py
├── patenttest.sh
├── process.cfg
├── readme.md
├── sqlitetest.py
├── test_alchemy.py
├── test_ascit.py
├── test_configuration.py
├── test_fwork.py
├── test_keylist.py
├── test_parse_file.py
├── test_separate_row_geocode.py
├── test_sqlite.py
├── test_sqlite_index.py
├── test_sqlite_merge.py
├── test_xml_driver.py
└── test_xml_util.py
└── vm
├── Vagrantfile
└── manifests
└── default.pp
/.gitignore:
--------------------------------------------------------------------------------
1 | *-journal
2 | bmschema.txt
3 | dump.sql
4 | tmp*
5 | results*.txt
6 | .sass*
7 | .DS_*
8 | *~
9 | *.swp
10 | *.csv
11 | tmp/
12 | *.log
13 | invpat*.csv
14 | *.pyc
15 | *.sqlite3
16 | data/
17 | fibo.py
18 | NBER_asg
19 | loctbl
20 | latex/
21 | html/
22 | *.project
23 | .pydevproject
24 | hashTbl
25 | test1/
26 | test_loc.sql
27 | patentroot/
28 | .settings/
29 | XML/*
30 | *.s3
31 | *.db
32 | lib/alchemy/config.ini
33 | *.pickle
34 | build/
35 | tags
36 | *.cmd
37 |
38 | *.pid # pid files
39 | dump.rdb # redis dump
40 |
41 | *.aux
42 | *.idx
43 | *.out
44 | *.pdf
45 | *.tex
46 | *_files
47 | *.ipynb_checkpoints
48 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | clean:
3 | rm -rf *~ *.pyc *.log
4 |
5 | spotless: clean
6 | rm -rf *.sqlite3 tmp grant.db application.db *-journal disambiguator.csv
7 |
8 |
--------------------------------------------------------------------------------
/get_invpat.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | """
30 | Creates the invpat file as seen and constructed in the Harvard DVN project
31 | """
32 | from lib import alchemy
33 | import pandas as pd
34 |
35 | session_generator = alchemy.session_generator
36 | session = session_generator()
37 |
38 | #res = session.execute('select rawinventor.name_first, rawinventor.name_last, rawlocation.city, rawlocation.state, \
39 | # rawlocation.country, rawinventor.sequence, patent.id, \
40 | # year(application.date), year(patent.date), rawassignee.organization, uspc.mainclass_id, inventor.id \
41 | # from rawinventor left join patent on patent.id = rawinventor.patent_id \
42 | # left join application on application.patent_id = patent.id \
43 | # left join rawlocation on rawlocation.id = rawinventor.rawlocation_id \
44 | # left join rawassignee on rawassignee.patent_id = patent.id \
45 | # left join uspc on uspc.patent_id = patent.id \
46 | # left join inventor on inventor.id = rawinventor.inventor_id \
47 | # where uspc.sequence = 0;')
48 | res = session.execute('select rawinventor.name_first, rawinventor.name_last, location.city, location.state, \
49 | location.country, rawinventor.sequence, patent.id, year(application.date), \
50 | year(patent.date), rawassignee.organization, uspc.mainclass_id, inventor.id \
51 | from rawinventor, rawlocation, patent, application, rawassignee, uspc, inventor,location \
52 | where rawinventor.patent_id = patent.id and \
53 | application.patent_id = patent.id and \
54 | rawlocation.id = rawinventor.rawlocation_id and \
55 | location.id = rawlocation.location_id and \
56 | rawassignee.patent_id = patent.id and \
57 | uspc.patent_id = patent.id and \
58 | inventor.id = rawinventor.inventor_id;')
59 | data = pd.DataFrame.from_records(res.fetchall())
60 | data = data.drop_duplicates((6,11))
61 | data.columns = ['first_name', 'last_name', 'city', 'state', 'country', 'sequence', 'patent', 'app_year', 'grant_year', 'assignee', 'mainclass', 'inventorid']
62 | data.to_csv('invpat.csv',index=False,encoding='utf8')
63 |
--------------------------------------------------------------------------------
/integration/clean_integration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Integration testing for the clean.py script
4 |
5 | cd ..
6 |
7 | echo 'Testing test/fixtures/xml/ipg120327.one.xml'
8 | make spotless > /dev/null
9 | mkdir -p tmp/integration/ipg120327.one
10 | ./parse.py -p test/fixtures/xml/ -x ipg120327.one.xml -o .
11 | ./run_clean.sh grant
12 |
13 | for table in assignee lawyer location
14 | do
15 | echo $table 'diffs...'
16 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.one/${table}.csv
17 | # remove UUIDs from database dump because these change each time
18 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.one/${table}.csv
19 | diff test/integration/clean/ipg120327.one/${table}.csv tmp/integration/ipg120327.one/${table}.csv
20 | done
21 |
22 | echo 'Testing test/fixtures/xml/ipg120327.two.xml'
23 | make spotless > /dev/null
24 | mkdir -p tmp/integration/ipg120327.two
25 | ./parse.py -p test/fixtures/xml/ -x ipg120327.two.xml -o .
26 | ./run_clean.sh grant
27 |
28 | for table in assignee lawyer location
29 | do
30 | echo $table 'diffs...'
31 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.two/${table}.csv
32 | # remove UUIDs from database dump because these change each time
33 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.two/${table}.csv
34 | diff test/integration/clean/ipg120327.two/${table}.csv tmp/integration/ipg120327.two/${table}.csv
35 | done
36 |
37 | echo 'Testing test/fixtures/xml/ipg120327.18.xml'
38 | make spotless > /dev/null
39 | mkdir -p tmp/integration/ipg120327.18
40 | ./parse.py -p test/fixtures/xml/ -x ipg120327.18.xml -o .
41 | ./run_clean.sh grant
42 |
43 | for table in assignee lawyer location
44 | do
45 | echo $table 'diffs...'
46 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.18/${table}.csv
47 | # remove UUIDs from database dump because these change each time
48 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.18/${table}.csv
49 | diff test/integration/clean/ipg120327.18/${table}.csv tmp/integration/ipg120327.18/${table}.csv
50 | done
51 |
52 | # clean up after we're done
53 | rm -rf tmp
54 | make spotless > /dev/null
55 |
--------------------------------------------------------------------------------
/integration/consolidate_integration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Integration testing for the consolidate.py script
4 |
5 | cd ..
6 |
7 | ##### Two rows
8 |
9 | make spotless > /dev/null
10 | ./parse.py -p test/fixtures/xml/ -x ipg120327.two.xml -o .
11 | mkdir -p tmp/integration/ipg120327.two
12 |
13 | echo Starting clean...
14 | ./run_clean.sh grant
15 |
16 | echo Starting consolidate...
17 | python consolidate.py
18 |
19 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}\t//' disambiguator.csv
20 | diff test/integration/consolidate/ipg120327.two/disambiguator.csv disambiguator.csv
21 |
22 | ### 18 rows
23 |
24 | make spotless > /dev/null
25 | ./parse.py -p test/fixtures/xml/ -x ipg120327.18.xml -o .
26 | mkdir -p tmp/integration/ipg120327.18
27 |
28 | echo Starting clean...
29 | ./run_clean.sh grant
30 |
31 | echo Starting consolidate...
32 | python consolidate.py
33 |
34 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}\t//' disambiguator.csv
35 | diff test/integration/consolidate/ipg120327.18/disambiguator.csv disambiguator.csv
36 |
37 | ## clean up after we're done
38 | rm -rf tmp
39 | make spotless > /dev/null
40 |
--------------------------------------------------------------------------------
/integration/parse_integration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ..
4 |
5 | echo 'Testing test/fixtures/xml/ipg120327.one.xml'
6 | make spotless > /dev/null
7 | mkdir -p tmp/integration/ipg120327.one
8 | ./parse.py -p test/fixtures/xml/ -x ipg120327.one.xml -o .
9 |
10 | for table in application uspatentcitation usapplicationcitation foreigncitation ipcr mainclass otherreference patent rawassignee rawinventor rawlawyer rawlocation subclass uspc usreldoc claim
11 | do
12 | echo $table 'diffs...'
13 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.one/${table}.csv
14 | # remove UUIDs from database dump because these change each time
15 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.one/${table}.csv
16 | diff test/integration/parse/ipg120327.one/${table}.csv tmp/integration/ipg120327.one/${table}.csv
17 | done
18 |
19 | echo 'Testing test/fixtures/xml/ipg120327.two.xml'
20 | make spotless > /dev/null
21 | mkdir -p tmp/integration/ipg120327.two
22 | ./parse.py -p test/fixtures/xml/ -x ipg120327.two.xml -o .
23 |
24 | for table in application uspatentcitation usapplicationcitation foreigncitation ipcr mainclass otherreference patent rawassignee rawinventor rawlawyer rawlocation subclass uspc usreldoc claim
25 | do
26 | echo $table 'diffs...'
27 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.two/${table}.csv
28 | # remove UUIDs from database dump because these change each time
29 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.two/${table}.csv
30 | diff test/integration/parse/ipg120327.two/${table}.csv tmp/integration/ipg120327.two/${table}.csv
31 | done
32 |
33 | echo 'Testing test/fixtures/xml/ipg120327.18.xml'
34 | make spotless > /dev/null
35 | mkdir -p tmp/integration/ipg120327.18
36 | ./parse.py -p test/fixtures/xml/ -x ipg120327.18.xml -o .
37 |
38 | for table in application uspatentcitation usapplicationcitation foreigncitation ipcr mainclass otherreference patent rawassignee rawinventor rawlawyer rawlocation subclass uspc usreldoc claim
39 | do
40 | echo $table 'diffs...'
41 | sqlite3 -csv grant.db "select * from ${table}" > tmp/integration/ipg120327.18/${table}.csv
42 | # remove UUIDs from database dump because these change each time
43 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipg120327.18/${table}.csv
44 | diff test/integration/parse/ipg120327.18/${table}.csv tmp/integration/ipg120327.18/${table}.csv
45 | done
46 |
47 | echo 'Testing test/fixtures/xml/pa040101.two.xml'
48 | make spotless > /dev/null
49 | mkdir -p tmp/integration/pa040101.two
50 | ./parse.py -p test/fixtures/xml/ -x pa040101.two.xml -d application -o .
51 |
52 | for table in application mainclass subclass ipcr uspc claim usreldoc rawlocation rawinventor rawassignee
53 | do
54 | echo $table 'diffs...'
55 | sqlite3 -csv -header application.db "select * from ${table}" > tmp/integration/pa040101.two/${table}.csv
56 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/pa040101.two/${table}.csv
57 | diff test/integration/parse/pa040101.two/${table}.csv tmp/integration/pa040101.two/${table}.csv
58 | done
59 |
60 | echo 'Testing test/fixtures/xml/ipa061228.one.xml'
61 | make spotless > /dev/null
62 | mkdir -p tmp/integration/ipa061228.one
63 | ./parse.py -p test/fixtures/xml/ -x ipa061228.one.xml -d application -o .
64 |
65 | for table in application mainclass subclass ipcr uspc claim usreldoc rawlocation rawinventor rawassignee
66 | do
67 | echo $table 'diffs...'
68 | sqlite3 -csv -header application.db "select * from ${table}" > tmp/integration/ipa061228.one/${table}.csv
69 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipa061228.one/${table}.csv
70 | diff test/integration/parse/ipa061228.one/${table}.csv tmp/integration/ipa061228.one/${table}.csv
71 | done
72 |
73 | echo 'Testing test/fixtures/xml/ipa130117.one.xml'
74 | make spotless > /dev/null
75 | mkdir -p tmp/integration/ipa130117.one
76 | ./parse.py -p test/fixtures/xml/ -x ipa130117.one.xml -d application -o .
77 |
78 | for table in application mainclass subclass ipcr uspc claim usreldoc rawlocation rawinventor rawassignee
79 | do
80 | echo $table 'diffs...'
81 | sqlite3 -csv -header application.db "select * from ${table}" > tmp/integration/ipa130117.one/${table}.csv
82 | perl -pi -e 's/^[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12},//' tmp/integration/ipa130117.one/${table}.csv
83 | diff test/integration/parse/ipa130117.one/${table}.csv tmp/integration/ipa130117.one/${table}.csv
84 | done
85 |
86 | # clean up after we're done
87 | rm -rf tmp
88 | make spotless > /dev/null
89 |
--------------------------------------------------------------------------------
/integration/run_integration_tests.sh:
--------------------------------------------------------------------------------
1 | bash parse_integration.sh
2 | bash clean_integration.sh
3 | bash consolidate_integration.sh
4 |
--------------------------------------------------------------------------------
/lib/.gitignore:
--------------------------------------------------------------------------------
1 | patXML.py
2 |
--------------------------------------------------------------------------------
/lib/LICENSE_WC.txt:
--------------------------------------------------------------------------------
1 | OPEN DATA LICENSE for MaxMind WorldCities and Postal Code Databases
2 |
3 | Copyright (c) 2008 MaxMind Inc. All Rights Reserved.
4 |
5 | The database uses toponymic information, based on the Geographic Names Data Base, containing official standard names approved by
6 | the United States
7 | Board on Geographic Names and maintained by the National Geospatial-Intelligence Agency. More information is available at the Maps and
8 | Geodata link at www.nga.mil. The National Geospatial-Intelligence Agency name, initials, and seal
9 | are protected by 10 United States
10 | Code Section 445.
11 |
12 | It also uses free population data from Stefan Helders www.world-gazetteer.com.
13 | Visit his website to download the free population data. Our database
14 | combines Stefan's population data with the list of all cities in the world.
15 |
16 | All advertising materials and documentation mentioning features or use of
17 | this database must display the following acknowledgment:
18 | "This product includes data created by MaxMind, available from
19 | http://www.maxmind.com/"
20 |
21 | Redistribution and use with or without modification, are permitted provided
22 | that the following conditions are met:
23 | 1. Redistributions must retain the above copyright notice, this list of
24 | conditions and the following disclaimer in the documentation and/or other
25 | materials provided with the distribution.
26 | 2. All advertising materials and documentation mentioning features or use of
27 | this database must display the following acknowledgement:
28 | "This product includes data created by MaxMind, available from
29 | http://www.maxmind.com/"
30 | 3. "MaxMind" may not be used to endorse or promote products derived from this
31 | database without specific prior written permission.
32 |
33 | THIS DATABASE IS PROVIDED BY MAXMIND.COM ``AS IS'' AND ANY
34 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | DISCLAIMED. IN NO EVENT SHALL MAXMIND.COM BE LIABLE FOR ANY
37 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | DATABASE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 |
--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/lib/__init__.py
--------------------------------------------------------------------------------
/lib/alchemy/README.md:
--------------------------------------------------------------------------------
1 | README
2 | ======
3 |
4 | #### Installation:
5 |
6 | If using an Ubuntu 12/13 enviornment, several packages are required (or beneficial) to be installed before pulling from the directory.
7 |
8 | ```
9 | sudo apt-get install -y git
10 | sudo apt-get install -y redis-server
11 | sudo apt-get install -y python-pip
12 | sudo apt-get install -y python-zmq
13 | sudo apt-get install -y p7zip-full
14 | sudo apt-get install -y python-mysqldb
15 | sudo apt-get install -y python-Levenshtein
16 | ```
17 |
18 | #### Installing the repository
19 |
20 | ```
21 | git clone git@github.com:funginstitute/patentprocessor
22 | ```
23 |
24 | After cloning, install the packages via PIP
25 |
26 | ```
27 | cd patentprocessor
28 | sudo pip install -r requirements.txt
29 | ```
30 |
31 | Download:
32 |
33 | * [Location Table](https://s3.amazonaws.com/funginstitute/geolocation_data.sqlite3). Place this file in the `lib` directory
34 |
35 | #### Collaborating to the repository
36 |
37 | Rather than cloning the repository, fork it and issue pull requests. To keep your personal repository up to date, we set up `.git/config` to include upstream as follows:
38 |
39 | ```
40 | ...
41 |
42 | [remote "upstream"]
43 | url = https://github.com/funginstitute/patentprocessor.git
44 | fetch = +refs/heads/*:refs/remotes/upstream/*
45 | [remote "origin"]
46 | fetch = +refs/heads/*:refs/remotes/origin/*
47 | url = git@github.com:[your_username]/patentprocessor.git
48 |
49 | ...
50 | ```
51 |
52 | Once that is complete, we can fetch and merge.
53 |
54 | ```
55 | git fetch upstream
56 | git merge upstream\[branch]
57 | ```
58 |
59 | Issue pull requests to the [FungInstitute GitHub](https://github.com/funginstitute/patentprocessor) repository and the orginators will take a look at the code being modified.
60 |
61 | #### Some MySQL recipes specific to AWS:
62 |
63 | Export files into CSV
64 |
65 | ```
66 | mysql -u [user] -p [passwd] --database=[db] --host=[host] --batch -e "select * from [table] limit 10" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > [table].csv
67 | ```
68 |
69 | Allow local file reading (local-infile must be 1 for security purposes)
70 |
71 | ```
72 | mysql -u [user] -p --local-infile=1 -h [db] [tbl]
73 | ```
74 |
75 | #### Other notes
76 |
77 | * [Adding Indices to SQLAlchemy](http://stackoverflow.com/questions/6626810/multiple-columns-index-when-using-the-declarative-orm-extension-of-sqlalchemy)
78 | * [Ignoring Files in GIT](https://help.github.com/articles/ignoring-files)
79 | * [Permanently removing files in GIT](http://dalibornasevic.com/posts/2-permanently-remove-files-and-folders-from-a-git-repository)
--------------------------------------------------------------------------------
/lib/alchemy/config.ini:
--------------------------------------------------------------------------------
1 | [global]
2 | database = sqlite
3 | echo = False
4 |
5 | [mysql]
6 | host =
7 | user =
8 | password =
9 | grant-database =
10 | application-database =
11 |
12 | [sqlite]
13 | grant-database = grant.db
14 | application-database = application.db
15 | path = .
16 | refresh = True
17 |
18 | [assignee]
19 | threshold = 0.90
20 |
21 | [location]
22 | database = geolocation_data.sqlite3
23 | path = lib
24 | commit_frequency = 10000
25 |
26 | [lawyer]
27 | threshold = 0.9
28 |
29 | [parse]
30 | # if not specified, defaults to 0 (commits after all rows added)
31 | commit_frequency = 1000
32 |
--------------------------------------------------------------------------------
/lib/alchemy/match.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | from collections import defaultdict
30 | from collections import Counter
31 | from sqlalchemy.sql.expression import bindparam
32 | from sqlalchemy import create_engine, MetaData, Table, inspect, VARCHAR, Column
33 | from sqlalchemy.orm import sessionmaker
34 |
35 | from datetime import datetime
36 |
37 | def commit_inserts(session, insert_statements, table, is_mysql, commit_frequency = 1000):
38 | """
39 | Executes bulk inserts for a given table. This is typically much faster than going through
40 | the SQLAlchemy ORM. The insert_statement list of dictionaries may fall victim to SQLAlchemy
41 | complaining that certain columns are null, if you did not specify a value for every single
42 | column for a table.
43 |
44 | Args:
45 | session -- alchemy session object
46 | insert_statements -- list of dictionaries where each dictionary contains key-value pairs of the object
47 | table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table__
48 | is_mysql -- adjusts syntax based on if we are committing to MySQL or SQLite. You can use alchemy.is_mysql() to get this
49 | commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items
50 | """
51 | if is_mysql:
52 | ignore_prefix = ("IGNORE",)
53 | session.execute("set foreign_key_checks = 0; set unique_checks = 0;")
54 | session.commit()
55 | else:
56 | ignore_prefix = ("OR IGNORE",)
57 | numgroups = len(insert_statements) / commit_frequency
58 | for ng in range(numgroups):
59 | if numgroups == 0:
60 | break
61 | chunk = insert_statements[ng*commit_frequency:(ng+1)*commit_frequency]
62 | session.connection().execute(table.insert(prefixes=ignore_prefix), chunk)
63 | print "committing chunk",ng+1,"of",numgroups,"with length",len(chunk),"at",datetime.now()
64 | session.commit()
65 | last_chunk = insert_statements[numgroups*commit_frequency:]
66 | if last_chunk:
67 | print "committing last",len(last_chunk),"records at",datetime.now()
68 | session.connection().execute(table.insert(prefixes=ignore_prefix), last_chunk)
69 | session.commit()
70 |
71 | def commit_updates(session, update_key, update_statements, table, commit_frequency = 1000):
72 | """
73 | Executes bulk updates for a given table. This is typically much faster than going through
74 | the SQLAlchemy ORM. In order to be flexible, the update statements must be set up in a specific
75 | way. You can only update one column at a time. The dictionaries in the list `update_statements`
76 | must have two keys: `pk`, which is the primary_key for the record to be updated, and `update`
77 | which is the new value for the column you want to change. The column you want to change
78 | is specified as a string by the argument `update_key`.
79 |
80 | This method will work regardless if you run it over MySQL or SQLite, but with MySQL, it is
81 | usually faster to use the bulk_commit_updates method (see lib/tasks.py), because it uses
82 | a table join to do the updates instead of executing individual statements.
83 |
84 | Args:
85 | session -- alchemy session object
86 | update_key -- the name of the column we want to update
87 | update_statements -- list of dictionaries of updates. See above description
88 | table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table
89 | commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items
90 | """
91 | primary_key = table.primary_key.columns.values()[0]
92 | update_key = table.columns[update_key]
93 | u = table.update().where(primary_key==bindparam('pk')).values({update_key: bindparam('update')})
94 | numgroups = len(update_statements) / commit_frequency
95 | for ng in range(numgroups):
96 | if numgroups == 0:
97 | break
98 | chunk = update_statements[ng*commit_frequency:(ng+1)*commit_frequency]
99 | session.connection().execute(u, *chunk)
100 | print "committing chunk",ng+1,"of",numgroups,"with length",len(chunk),"at",datetime.now()
101 | session.commit()
102 | last_chunk = update_statements[numgroups*commit_frequency:]
103 | if last_chunk:
104 | print "committing last",len(last_chunk),"records at",datetime.now()
105 | session.connection().execute(u, *last_chunk)
106 | session.commit()
107 |
--------------------------------------------------------------------------------
/lib/alchemy/schema_func.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | """
30 | These functions support schema so it doesn't get too bloated
31 | """
32 |
33 |
34 | def fetch(clean, matchSet, session, default):
35 | """
36 | Takes the values in the existing parameter.
37 | If all the tests in matchset pass, it returns
38 | the object related to it.
39 |
40 | if the params in default refer to an instance that exists,
41 | return it!
42 | """
43 | for keys in matchSet:
44 | cleanCnt = session.query(clean)
45 | keep = True
46 | for k in keys:
47 | if k not in default:
48 | keep = False
49 | break
50 | cleanCnt.filter(clean.__dict__[k] == default[k])
51 | if keep and cleanCnt.count() > 0:
52 | return cleanCnt.first()
53 | return None
54 |
--------------------------------------------------------------------------------
/lib/argconfig_parse.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | """
27 | """
28 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
29 | """
30 | """
31 | Handles argument parsing for parse.py
32 | """
33 |
34 | import sys
35 | import os
36 | import argparse
37 | import logging
38 |
39 | class ArgHandler(object):
40 |
41 | def __init__(self, arglist):
42 | self.arglist = arglist
43 |
44 | # setup argparse
45 | self.parser = argparse.ArgumentParser(description=\
46 | 'Specify source directory/directories for xml files to be parsed')
47 | self.parser.add_argument('--patentroot','-p', type=str, nargs='?',
48 | default=os.environ['PATENTROOT'] \
49 | if os.environ.has_key('PATENTROOT') else '.',
50 | help='root directory of all patent files')
51 | self.parser.add_argument('--xmlregex','-x', type=str,
52 | nargs='?',
53 | help='regex used to match xml files in the PATENTROOT directory.\
54 | Defaults to ipg\d{6}.xml')
55 | self.parser.add_argument('--verbosity', '-v', type = int,
56 | nargs='?', default=0,
57 | help='Set the level of verbosity for the computation. The higher the \
58 | verbosity level, the less restrictive the print policy. 0 (default) \
59 | = error, 1 = warning, 2 = info, 3 = debug')
60 | self.parser.add_argument('--output-directory', '-o', type=str, nargs='?',
61 | default=os.environ['PATENTOUTPUTDIR'] \
62 | if os.environ.has_key('PATENTOUTPUTDIR') else '.',
63 | help='Set the output directory for the resulting sqlite3 files. Defaults\
64 | to the current directory "."')
65 | self.parser.add_argument('--document-type', '-d', type=str, nargs='?',
66 | default='grant',
67 | help='Set the type of patent document to be parsed: grant (default) \
68 | or application')
69 |
70 | # parse arguments and assign values
71 | args = self.parser.parse_args(self.arglist)
72 | self.xmlregex = args.xmlregex
73 | self.patentroot = args.patentroot
74 | self.output_directory = args.output_directory
75 | self.document_type = args.document_type
76 | if self.xmlregex == None: # set defaults for xmlregex here depending on doctype
77 | if self.document_type == 'grant':
78 | self.xmlregex = r"ipg\d{6}.xml"
79 | else:
80 | self.xmlregex = r"i?pa\d{6}.xml"
81 |
82 | # adjust verbosity levels based on specified input
83 | logging_levels = {0: logging.ERROR,
84 | 1: logging.WARNING,
85 | 2: logging.INFO,
86 | 3: logging.DEBUG}
87 | self.verbosity = logging_levels[args.verbosity]
88 |
89 | def get_xmlregex(self):
90 | return self.xmlregex
91 |
92 | def get_patentroot(self):
93 | return self.patentroot
94 |
95 | def get_verbosity(self):
96 | return self.verbosity
97 |
98 | def get_output_directory(self):
99 | return self.output_directory
100 |
101 | def get_document_type(self):
102 | return self.document_type
103 |
104 | def get_help(self):
105 | self.parser.print_help()
106 | sys.exit(1)
107 |
--------------------------------------------------------------------------------
/lib/config_parser.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | """
30 | Parses the process.cfg file
31 | """
32 | import importlib
33 | from ConfigParser import ConfigParser
34 |
35 | defaults = {'parse': 'defaultparse',
36 | 'clean': 'True',
37 | 'consolidate': 'True',
38 | 'datadir': '/data/patentdata/patents/2013',
39 | 'grantregex': 'ipg\d{6}.xml',
40 | 'applicationregex': 'ipa\d{6}.xml',
41 | 'years': None,
42 | 'downloaddir' : None}
43 |
44 | def extract_process_options(handler, config_section):
45 | """
46 | Extracts the high level options from the [process] section
47 | of the configuration file. Returns a dictionary of the options
48 | """
49 | result = {}
50 | result['parse'] = handler.get('process','parse')
51 | result['clean'] = handler.get('process','clean') == 'True'
52 | result['consolidate'] = handler.get('process','consolidate') == 'True'
53 | result['doctype'] = handler.get(config_section,'doctype')
54 | return result
55 |
56 | def extract_parse_options(handler, config_section):
57 | """
58 | Extracts the specific parsing options from the parse section
59 | as given by the [parse] config option in the [process] section
60 | """
61 | options = {}
62 | options['datadir'] = handler.get(config_section,'datadir')
63 | options['grantregex'] = handler.get(config_section,'grantregex')
64 | options['applicationregex'] = handler.get(config_section, 'applicationregex')
65 | options['years'] = handler.get(config_section,'years')
66 | options['downloaddir'] = handler.get(config_section,'downloaddir')
67 | if options['years'] and options['downloaddir']:
68 | options['datadir'] = options['downloaddir']
69 | return options
70 |
71 | def get_config_options(configfile):
72 | """
73 | Takes in a filepath to a configuration file, returns
74 | two dicts representing the process and parse configuration options.
75 | See `process.cfg` for explanation of the optiosn
76 | """
77 | handler = ConfigParser(defaults)
78 | try:
79 | handler.read(configfile)
80 | except IOError:
81 | print('Error reading config file ' + configfile)
82 | exit()
83 | process_config = extract_process_options(handler, 'process')
84 | parse_config = extract_parse_options(handler, process_config['parse'])
85 | return process_config, parse_config
86 |
87 | def get_dates(yearstring):
88 | """
89 | Given a [yearstring] of forms
90 | year1
91 | year1-year2
92 | year1,year2,year3
93 | year1-year2,year3-year4
94 | Creates tuples of dates
95 | """
96 | years = []
97 | for subset in yearstring.split(','):
98 | if subset == 'default':
99 | years.append('default')
100 | continue
101 | sublist = subset.split('-')
102 | # left-justify the strings with 0s to add support
103 | # for days and weeks in the date
104 | start = int(sublist[0].ljust(8,'0'))
105 | end = int(sublist[1].ljust(8,'0')) if len(sublist) > 1 else float('inf')
106 | years.append((start,end))
107 | return years
108 |
109 |
110 | def get_xml_handlers(configfile, document_type='grant'):
111 | """
112 | Called by parse.py to generate a lookup dictionary for which parser should
113 | be used for a given file
114 | """
115 | handler = ConfigParser()
116 | handler.read(configfile)
117 | xmlhandlers = {}
118 | config_item = 'grant-xml-handlers' if document_type == 'grant' \
119 | else 'application-xml-handlers'
120 | for yearrange, handler in handler.items(config_item):
121 | for year in get_dates(yearrange):
122 | try:
123 | xmlhandlers[year] = importlib.import_module(handler)
124 | except:
125 | importlib.sys.path.append('..')
126 | xmlhandlers[year] = importlib.import_module(handler)
127 | return xmlhandlers
128 |
--------------------------------------------------------------------------------
/lib/handlers/README.md:
--------------------------------------------------------------------------------
1 | # XML Handlers
2 |
3 | A handler for parsing USPTO XML files must provide the following interface in
4 | order to be immediately compatible with the rest of the toolchain.
5 |
6 | DOCUMENTATION COMING
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/lib/handlers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/lib/handlers/__init__.py
--------------------------------------------------------------------------------
/lib/handlers/handler.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | class Patobj(object):
30 | pass
31 |
32 | class PatentHandler(object):
33 | def get_patobj(self):
34 | patobj = Patobj()
35 | for attr in self.attributes:
36 | patobj.__dict__[attr] = getattr(self, attr)
37 | return patobj
38 |
--------------------------------------------------------------------------------
/lib/handlers/xml_util.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | """
27 | """
28 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
29 | """
30 |
31 | """
32 | Collection of useful functions and tools for working with XML documents
33 | """
34 |
35 | import re
36 | from itertools import izip
37 | from unicodedata import normalize
38 | from cgi import escape
39 |
40 |
41 | def flatten(ls_of_ls):
42 | """
43 | Takes in a list of lists, returns a new list of lists
44 | where list `i` contains the `i`th element from the original
45 | set of lists.
46 | """
47 | return map(list, list(izip(*ls_of_ls)))
48 |
49 | def extend_padding(ls_of_ls, padding=''):
50 | """
51 | Takes in a lists of lists, returns a new list of lists
52 | where each list is padded up to the length of the longest
53 | list by [padding] (defaults to the empty string)
54 | """
55 | maxlen = max(map(len, ls_of_ls))
56 | newls = []
57 | for ls in ls_of_ls:
58 | if len(ls) != maxlen:
59 | ls.extend([padding]*(maxlen - len(ls)))
60 | newls.append(ls)
61 | return newls
62 |
63 | def escape_html_nosub(string):
64 | """
65 | Escapes html sequences (e.g. ) that are not the known idiom
66 | for subscript: ...
67 | """
68 | lt = re.compile('<(?!/?sub>)',flags=re.I)
69 | gt = re.compile('(?=.)*(?',flags=re.I)
70 | amp = re.compile('&(?!(amp;|lt;|gt;))',flags=re.I)
71 | string = re.sub(amp,'&',string)
72 | string = re.sub(lt,"<",string)
73 | string = re.sub(gt,">",string)
74 | return string
75 |
76 | def has_content(l):
77 | """
78 | Returns true if list [l] contains any non-null objects
79 | """
80 | return any(filter(lambda x: x, l))
81 |
82 | def normalize_utf8(string):
83 | """
84 | Normalizes [string] to be UTF-8 encoded. Accepts both unicode and normal
85 | Python strings.
86 | """
87 | if isinstance(string, unicode):
88 | return normalize('NFC', string)
89 | else:
90 | return normalize('NFC', string.decode('utf-8'))
91 |
92 | def remove_escape_sequences(string):
93 | """
94 | Replaces all contiguous instances of "\r\n\t\v\b\f\a " and replaces
95 | it with a single space. Preserves at most one space of surrounding whitespace
96 | """
97 | escape_seqs = r'[\r\n\t\v\b\f\a ]+'
98 | return re.sub(escape_seqs,' ', string)
99 |
100 | def translate_underscore(string, lower=False):
101 | """
102 | Replaces the underscore HTML idiom — with the literal
103 | underscore character _.
104 | """
105 | if lower:
106 | string = string.lower()
107 | return string.replace('—','_').replace('-','_').replace(u'\u2014','_')
108 |
109 |
110 | def escape_html(string):
111 | """
112 | Call cgi.escape on the string after applying translate_underscore
113 | """
114 | s = translate_underscore(string)
115 | return escape(s)
116 |
117 | def normalize_document_identifier(identifier):
118 | """
119 | [identifier] is a string representing the document-id field from an XML document
120 | """
121 | # create splits on identifier
122 | if not identifier: return ''
123 | return re.sub(r'([A-Z]*)0?',r'\g<1>',identifier,1)
124 |
125 | def associate_prefix(firstname, lastname):
126 | """
127 | Prepends everything after the first space-delineated word in [firstname] to
128 | [lastname].
129 | """
130 | if ' ' in firstname:
131 | name, prefix = firstname.split(' ',1) # split on first space
132 | else:
133 | name, prefix = firstname, ''
134 | space = ' '*(prefix is not '')
135 | last = prefix+space+lastname
136 | return name, last
137 |
138 | def clean(string, upper=True):
139 | """
140 | Applies a subset of the above functions in the correct order
141 | and returns the string in all uppercase.
142 |
143 | Change &
144 | """
145 | string = normalize_utf8(string)
146 | string = remove_escape_sequences(string)
147 | string = translate_underscore(string)
148 | string = escape_html(string)
149 | string = string.replace(" ", " ").replace("&", "&")
150 | if upper:
151 | return string.upper()
152 | else:
153 | return string
154 |
155 | def augment_class(string):
156 | """
157 | Given a [string] representing the contents of a tag
158 | (see USPTO XML Documentation 4.2 or later), realize the semantic meaning
159 | of the string and return a string of form recognized by USPTO:
160 | /.
161 | """
162 | mainclass = string[:3]
163 | subclass1 = string[3:6]
164 | subclass2 = string[6:]
165 | if subclass2:
166 | return "{0}/{1}.{2}".format(mainclass, subclass1, subclass2)
167 | return "{0}/{1}".format(mainclass, subclass1)
168 |
--------------------------------------------------------------------------------
/lib/manual_replacement_library.txt:
--------------------------------------------------------------------------------
1 | # a
2 | .ANG.|Å
3 | .ang.|å
4 | â|å
5 | â ;|å
6 | Å|Å
7 | {dot over (A)}|Å
8 | {dot over (a)}|å
9 | #Inaccurate, but too many possibilities - this should be easier to debug
10 | {hacek over (a)}|a
11 | .circle.|Å
12 | {overscore (A)}|Ä
13 | #Inaccurate, but too many possibilities - this should be easier to debug
14 | {overscore (a)}|a
15 | {umlaut over (Aa)}|Ää
16 | {acute over (Å)}|Å
17 | /e,uml/a/ |ä
18 | /a/ |ä
19 | /a/|ä
20 | #Inaccurate, but too many possibilities - this should be easier to debug
21 | {haeck over (a)}|a
22 | # b
23 | # c
24 | ć|ć
25 | Ć|Ć
26 | {hacek over (C)}|Č
27 | {haeck over (C)}|Č
28 | {hacek over (c)}|č
29 | # d
30 | {hacek over (D)}|Ď
31 | {hacek over (d)}|ď
32 | # e
33 | ē|ē
34 | {haeck over (e)}|ě
35 | {hacek over (e)}|ě
36 | {overscore (e)}|è
37 | # f
38 | # g
39 | ǵ|ǵ
40 | # h
41 | # i
42 | {hacek over (i)}|i
43 | # j
44 | # k
45 | {umlaut over (K)}|K
46 | # l
47 | {umlaut over (L)}|L
48 | # m
49 | {umlaut over (M)}|M
50 | {umlaut over (m)}|m
51 | # n
52 | ń|ń
53 | {haeck over (n)}|ñ
54 | {overscore (n)}|ñ
55 | # o
56 | ō|ō
57 | #{hacek over (o)}|
58 | #{overscore (o)}|
59 | #{umlaut over (oo)}|
60 | uml/O/ |Ö
61 | uml/o/ |ö
62 | .0.|ø
63 | .O slashed.|Ø
64 | .o slashed.|ø
65 | /o/ |ö
66 | /o/|ő
67 | {hacek over (o)}|ö
68 | {overscore (o)}|ö
69 | #Inaccurate, but too many possibilities - this should be easier to debug
70 | {dot over (o)}|o
71 | {acute over (ø)}|ø
72 | # p
73 | # q
74 | # r
75 | ŕ|ŕ
76 | {hacek over (r)}|ř
77 | {haeck over (r)} |ř
78 | {haeck over (r)}|ř
79 | {grave over (R)}|R
80 | {grave over (r)}|r
81 | # s
82 | ś|ś
83 | ŝ|ŝ
84 | {haeck over (S)}|Š
85 | {hacek over (S)}|Š
86 | {hacek over (s)}|š
87 | {haeck over (s)}|š
88 | {umlaut over (S)}|S
89 | /s/ | sous
90 | /s/|-sur-
91 | /S/|-sur-
92 | {dot over (s)}|s
93 | # t
94 | {dot over (T)}okyo|Tokyo
95 | # u
96 | ũ|ũ
97 | /U/ |Ü
98 | /u/ |ü
99 | /u/|ü
100 | {overscore (u)}|ü
101 | {hacek over (u)}|ǔ
102 | {dot over (u)}|u
103 | # v
104 | # w
105 | # x
106 | # y
107 | # z
108 | Ź|Ź
109 | ź|ź
110 | {hacek over (Z)}|Ž
111 | # Misspellings
112 | a/d/|aan den
113 | {hacek over (A)}lta|Älta
114 | {acute over (B)}uehl|Bühl
115 | {umlaut over (C)}ernilov|Černilov
116 | O/ d.ANG.kra|Ödåkra
117 | {haeck over (u)}ttenre{haeck over (u)}th|Uttenreuth
118 | D{haeck over (u)}sseldorf|Düsseldorf
119 | P{haeck over (u)}choen|Pocheon
120 | Gro{burgwedel|Großburgwedel
121 | B{umlaut over (j)}rringbro|Bjerringbro
122 | B{acute over (j)}árred|Bjärred
123 | Defreggerstra{e|Defreggerstraße
124 | Vaster{s|Västerås
125 | # other
126 | .cent.|
127 | #∘|∘
128 | =|=
129 | #|#
130 | +|+
131 | & Engraving;|& Engraving
132 | {umlaut over ( )}|
133 | {acute over (æ)}|æ
134 | “A”-Cdad. de|
135 | all of|
136 | all Of|
137 | All of|
138 | al of |
139 | Both of|
140 | BOTH OF|
141 | both of|
142 | bot of |
143 | both ot|
144 | late of |
145 | LATE OF |
146 | # greek
147 | &agr;|α
148 | &bgr;|β
149 | [|[
150 | &mgr;|μ
151 | &phgr;|φ
152 | &pgr;|π
153 | ]|]
154 | &thgr;|θ
155 | # URL custom characters
156 | |Ç
157 | |Ç
158 | |i
159 | |i
160 | |Ł
161 | |ș
162 | |ș
163 | |ș
164 | |ș
165 | |ș
166 | |ș
167 | |ș
168 | #Note: should be º, but this is not a valid character so ignore it
169 | |
--------------------------------------------------------------------------------
/lib/state_abbreviations.txt:
--------------------------------------------------------------------------------
1 | Alabama|AL
2 | Alaska|AK
3 | Arizona|AZ
4 | Arkansas|AR
5 | California|CA
6 | Colorado|CO
7 | Connecticut|CT
8 | Delaware|DE
9 | Florida|FL
10 | Georgia|GA
11 | Hawaii|HI
12 | Idaho|ID
13 | Illinois|IL
14 | Indiana|IN
15 | Iowa|IA
16 | Kansas|KS
17 | Kentucky|KY
18 | Louisiana|LA
19 | Maine|ME
20 | Maryland|MD
21 | Massachusetts|MA
22 | Michigan|MI
23 | Minnesota|MN
24 | Mississippi|MS
25 | Missouri|MO
26 | Montana|MT
27 | Nebraska|NE
28 | Nevada|NV
29 | New Hampshire|NH
30 | New Jersey|NJ
31 | New Mexico|NM
32 | New York|NY
33 | North Carolina|NC
34 | North Dakota|ND
35 | Ohio|OH
36 | Oklahoma|OK
37 | Oregon|OR
38 | Pennsylvania|PA
39 | Rhode Island|RI
40 | South Carolina|SC
41 | South Dakota|SD
42 | Tennessee|TN
43 | Texas|TX
44 | Utah|UT
45 | Vermont|VT
46 | Virginia|VA
47 | Washington|WA
48 | West Virginia|WV
49 | Wisconsin|WI
50 | Wyoming|WY
51 |
--------------------------------------------------------------------------------
/lib/tasks.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | """
26 | """
27 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
28 | """
29 | """
30 | Functions for doing bulk inserts and bulk updates
31 | """
32 | from alchemy.match import commit_inserts, commit_updates
33 | from alchemy import session_generator
34 | from alchemy.schema import temporary_update, app_temporary_update
35 | from sqlalchemy import create_engine, MetaData, Table, inspect, VARCHAR, Column
36 | from sqlalchemy.orm import sessionmaker
37 |
38 | # fetch reference to temporary_update table.
39 |
40 | def bulk_commit_inserts(insert_statements, table, is_mysql, commit_frequency = 1000, dbtype='grant'):
41 | """
42 | Executes bulk inserts for a given table. This is typically much faster than going through
43 | the SQLAlchemy ORM. The insert_statement list of dictionaries may fall victim to SQLAlchemy
44 | complaining that certain columns are null, if you did not specify a value for every single
45 | column for a table.
46 |
47 | A session is generated using the scoped_session factory through SQLAlchemy, and then
48 | the actual lib.alchemy.match.commit_inserts task is dispatched.
49 |
50 | Args:
51 | insert_statements -- list of dictionaries where each dictionary contains key-value pairs of the object
52 | table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table__
53 | is_mysql -- adjusts syntax based on if we are committing to MySQL or SQLite. You can use alchemy.is_mysql() to get this
54 | commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items
55 | dbtype -- which base schema to use. Either 'grant' or 'application'
56 | """
57 | session = session_generator(dbtype=dbtype)
58 | commit_inserts(session, insert_statements, table, is_mysql, commit_frequency)
59 |
60 | def bulk_commit_updates(update_key, update_statements, table, is_mysql, commit_frequency = 1000, dbtype='grant'):
61 | """
62 | Executes bulk updates for a given table. This is typically much faster than going through
63 | the SQLAlchemy ORM. In order to be flexible, the update statements must be set up in a specific
64 | way. You can only update one column at a time. The dictionaries in the list `update_statements`
65 | must have two keys: `pk`, which is the primary_key for the record to be updated, and `update`
66 | which is the new value for the column you want to change. The column you want to change
67 | is specified as a string by the argument `update_key`.
68 |
69 | If is_mysql is True, then the update will be performed by inserting the record updates
70 | into the table temporary_update and then executing an UPDATE/JOIN. If is_mysql is False,
71 | then SQLite is assumed, and traditional updates are used (lib.alchemy.match.commit_updates)
72 |
73 | A session is generated using the scoped_session factory through SQLAlchemy, and then
74 | the actual task is dispatched.
75 |
76 | Args:
77 | update_key -- the name of the column we want to update
78 | update_statements -- list of dictionaries of updates. See above description
79 | table -- SQLAlchemy table object. If you have a table reference, you can use TableName.__table
80 | is_mysql -- adjusts syntax based on if we are committing to MySQL or SQLite. You can use alchemy.is_mysql() to get this
81 | commit_frequency -- tune this for speed. Runs "session.commit" every `commit_frequency` items
82 | dbtype -- which base schema to use. Either 'grant' or 'application'
83 | """
84 | session = session_generator(dbtype=dbtype)
85 | if not is_mysql:
86 | commit_updates(session, update_key, update_statements, table, commit_frequency)
87 | return
88 | session.rollback()
89 | if is_mysql:
90 | session.execute('truncate temporary_update;')
91 | else:
92 | session.execute('delete from temporary_update;')
93 | if dbtype == 'grant':
94 | commit_inserts(session, update_statements, temporary_update, is_mysql, 10000)
95 | else:
96 | commit_inserts(session, update_statements, app_temporary_update, is_mysql, 10000)
97 | # now update using the join
98 | primary_key = table.primary_key.columns.values()[0]
99 | update_key = table.columns[update_key]
100 | session.execute("UPDATE {0} join temporary_update ON temporary_update.pk = {1} SET {2} = temporary_update.update;".format(table.name, primary_key.name, update_key.name ))
101 | session.commit()
102 | if is_mysql:
103 | session.execute('truncate temporary_update;')
104 | else:
105 | session.execute('delete from temporary_update;')
106 | session.commit()
107 |
--------------------------------------------------------------------------------
/lib/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/lib/util/__init__.py
--------------------------------------------------------------------------------
/lib/util/csv_reader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | """
27 | """
28 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
29 | """
30 |
31 | """
32 | Simplifies the process for reading in unicode CSV files
33 | """
34 |
35 | import csv
36 | from unicodedata import normalize
37 | import codecs
38 |
39 | def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
40 | """
41 | Creates a unicode CSV reader
42 | """
43 | csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), dialect=dialect, **kwargs)
44 | for row in csv_reader:
45 | yield [unicode(cell, 'utf-8') for cell in row]
46 |
47 | def utf_8_encoder(unicode_csv_data):
48 | """
49 | Encodes data in utf-8
50 | """
51 | for line in unicode_csv_data:
52 | yield line.encode('utf-8')
53 |
54 | def read_file(filename):
55 | """
56 | Given a string [filename], returns an iterator of the lines in the CSV file
57 | """
58 | with codecs.open(filename, encoding='utf-8') as csvfile:
59 | reader = unicode_csv_reader(csvfile)
60 | for row in reader:
61 | yield row
62 |
--------------------------------------------------------------------------------
/lib/util/getpatent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Copyright (c) 2013 The Regents of the University of California, AMERICAN INSTITUTES FOR RESEARCH
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | """
27 | """
28 | @author Gabe Fierro gt.fierro@berkeley.edu github.com/gtfierro
29 | """
30 |
31 | import sys
32 | import re
33 | import time
34 | import mechanize
35 | from BeautifulSoup import BeautifulSoup
36 |
37 | if len(sys.argv) < 2:
38 | print "Given a patent id number, will download the relevant zipfile"
39 | print "Usage: ./getpatent.py "
40 | print "Example: ./getpatent.py 7783348"
41 | sys.exit(0)
42 |
43 | patent_name = sys.argv[1]
44 |
45 | if patent_name[:2].upper() != 'US':
46 | patent_name = 'US'+patent_name
47 |
48 | BASE_URL = 'http://www.google.com/patents/'
49 | ZIP_BASE_URL = 'http://commondatastorage.googleapis.com/patents/grant_full_text/'
50 | br = mechanize.Browser()
51 | br.addheaders = [('User-agent', 'Feedfetcher-Google-iGoogleGadgets;\
52 | (+http://www.google.com/feedfetcher.html)')]
53 | br.set_handle_robots(False)
54 | html = br.open(BASE_URL+patent_name).read()
55 |
56 | print 'Got HTML for patent page'
57 |
58 | soup = BeautifulSoup(html)
59 | sidebar = soup.find('div', {'class': 'patent_bibdata'})
60 | text = str(sidebar.text)
61 | date = re.search(r'(?<=Issue date: )[A-Za-z]{3} [0-9]{1,2}, [0-9]{4}', text).group()
62 | date_struct = time.strptime(date, '%b %d, %Y')
63 | year = str(date_struct.tm_year)[2:]
64 | month = str(date_struct.tm_mon).zfill(2)
65 | day = str(date_struct.tm_mday).zfill(2)
66 |
67 | zipfile = 'ipg{0}{1}{2}.zip'.format(year,month,day)
68 |
69 | zipurl = '{0}{1}/{2}'.format(ZIP_BASE_URL,date_struct.tm_year,zipfile)
70 |
71 | print 'Downloading ZIP file: ',zipurl
72 |
73 | res = br.retrieve(zipurl, zipfile)
74 | print res
75 |
76 | print 'Finished downloading'
77 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # IPython Notebooks
2 |
3 | To run a notebook, you need
4 | [iPython](http://ipython.org/ipython-doc/dev/index.html) as well as
5 | [matplotlib](http://matplotlib.org/). Copy the notebook to the directory
6 | containing the sqlite3 files and open using
7 |
8 | ```
9 | ipython notebook
10 | ```
11 |
12 | It should open in your browser.
13 |
--------------------------------------------------------------------------------
/notebooks/buildpdf:
--------------------------------------------------------------------------------
1 | ipython nbconvert --to=latex --template=latex_nocode.tplx --SphinxTransformer.author='Fung Institute' MySQL.ipynb
2 | sed -e 's/\\vspace{-0\.5\\baselineskip}//g' MySQL.tex > tmp
3 | sed -e 's/\\title{MySQL}/\\title{Patent Database Report}/' tmp > tmp2
4 | mv tmp2 MySQL.tex
5 | pdflatex MySQL.tex
6 | rm tmp tmp2
7 |
--------------------------------------------------------------------------------
/notebooks/latex_nocode.tplx:
--------------------------------------------------------------------------------
1 | % Disable input cells
2 | ((* extends 'latex_article.tplx' *))
3 | ((* block input *))
4 | ((* endblock input *))
5 | ((* block output_group *))
6 | % Add remainer of the document contents below.
7 | ((* for output in cell.outputs *))
8 | ((( render_output(output) )))
9 | ((* endfor *))
10 | ((* endblock *))
11 |
--------------------------------------------------------------------------------
/process.cfg:
--------------------------------------------------------------------------------
1 | # This is a sample file that configures the environment for the preprocessing
2 | # steps of parsing, cleaning, consolidation
3 |
4 | # [process] defines which configured steps the current run of the preprocessor
5 | # will be run. Accepts 4 options:
6 | # parse: defines which parsing configuration will be run
7 | # clean: if True, runs the cleaning step on the output of parse
8 | # consolidate: if True, runs the conslidation step on the output of clean
9 | # doctype: can be grant, application, or all, and processing will proceed accordingly.
10 | # Note: make sure that the value for grantregex and/or applicationregex
11 | # is defined if you wish to use a value other than the default for either
12 |
13 | [process]
14 | parse=download
15 | clean=True
16 | consolidate=True
17 | doctype=all
18 |
19 | #[defaultparse]
20 | ## 'datadir' specifies the path to the directory containing the XML files that
21 | ## we want to parse. This path will be evaluated relative to the main directory
22 | ## of preprocessor. Defaults to '/data/patentdata/patents/2013'
23 | #
24 | # datadir=/path/to/patent/data
25 |
26 | ## 'grantregex' and 'applicationregex' specify the regular expression that
27 | ## matches the XML files that we want to parse. If you are downloading data
28 | ## from the USPTO, then the default value should be fine. Defaults to
29 | ## 'ipg\d{6}.xml', the format found for most USPTO files since 2005
30 | #
31 | # grantregex=ipg\d{6}.xml
32 | # applicationregex=ipa\d{6}.xml
33 |
34 | ## 'years' specifies the range of years for which you want to download and
35 | ## parse. If the current year is specified, the script will download all
36 | ## possible files. Specifying the 'years' option will ignore the 'datadir'
37 | ## option and just download the relevant files to 'downloaddir' (see below)
38 | ## Specify years as:
39 | ## year1
40 | ## year1-year2
41 | ## year1,year2,year3
42 | ## year1-year2,year3-year4
43 | ## latest (downloads the most recent week's data)
44 | ## If this option is NOT specified, the parse will run on the contents of 'datadir'
45 | #
46 | # years=2010-2013
47 |
48 | ## 'downloaddir' specifies the target base directory into which the weekly
49 | ## patent files will be downloaded. Note that the preprocessor will create
50 | ## directories named for each year inside 'downloaddir', and if they already
51 | ## exist, will look inside for previously downloaded files
52 | ## If this option is NOT specified, the parse will run on the contents of 'datadir'
53 | #
54 | # downloaddir=/path/to/base/directory/for/downloads
55 |
56 | # example configuration for a parse of 2012 data. Note that the 'grantregex'
57 | # option is not specified because the default value is sufficient
58 | [2012parse]
59 | datadir=/data/patentdata/patents/2012
60 |
61 | # example configuration to test the parsing
62 | [test]
63 | datadir=test/fixtures/xml
64 | grantregex=\d{4}_\d.xml
65 | applicationregex=ipa\d{6}.*.xml
66 |
67 | # example configuration for a parse of the latest data. Note that the
68 | # regexes for grants and applications will be used if 'all' is specified
69 | # for doctype in [process], and otherwise only the appropriate one will be used.
70 | [download]
71 | years=latest
72 | downloaddir=./data
73 | grantregex=i?pg\d{6}.xml
74 | applicationregex=i?pa\d{6}.xml
75 |
76 | # This section specifies which grant_handler is to be used for each date of the
77 | # released patent. This section should only have to be touched when a new parser is
78 | # introduced. In the case where a year cannot be parsed from the filename (the
79 | # format `ipgYYMMDD` is assumed), then the default parser is used.
80 | # The dates in the ranges are either YYYY or YYYYMMDD. If only one date is provided,
81 | # then the corresponding handler is assumed for all subsequent patents
82 | [grant-xml-handlers]
83 | 2005-20130108=lib.handlers.grant_handler_v42
84 | 20130115=lib.handlers.grant_handler_v44
85 | default=lib.handlers.grant_handler_v42
86 |
87 | [application-xml-handlers]
88 | 2001-20060822=lib.handlers.application_handler_v41
89 | 20060823-20130116=lib.handlers.application_handler_v42
90 | 20130117=lib.handlers.application_handler_v43
91 | default=lib.handlers.application_handler_v42
92 |
93 | # schema changes were in 20010131 (v15), 20020101 (v16),
94 | # 20050825 (v41), 20060823 (v42), 20130121 (v43)
95 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Python scripts for processing USPTO inventor and patent data
2 |
3 | The following collection of scripts performs pre- and post-processing on patent
4 | data as part of the patent inventor disambiguation process. Raw patent data is
5 | obtained from [Google Bulk Patent
6 | Download](http://www.google.com/googlebooks/uspto-patents-grants-text.html).
7 |
8 | For a high-level overview of the patentprocessor toolchain, please see [our
9 | technical
10 | report](https://github.com/funginstitute/publications/raw/master/patentprocessor/patentprocessor.pdf).
11 |
12 | For a description of configuration of the patentprocessor toolchain, please see
13 | [this technical
14 | report](https://github.com/funginstitute/publications/raw/master/weeklyupdate/weeklyupdate.pdf).
15 |
16 | To follow development, subscribe to
17 | [RSS feed](https://github.com/funginstitute/patentprocessor/commits/master.atom).
18 |
19 | ## Patentprocessor Overview
20 |
21 | There are several steps in the patentprocessor toolchain:
22 |
23 | 1. Retrieve/locate parsing target
24 | 2. Execute parsing phase
25 | 3. Run preliminary disambiguations:
26 | * assignee disambiguation
27 | * location disambiguation
28 | 4. Prepare input for inventor disambiguation
29 | 5. Disambiguate inventors (external process)
30 | 6. Ingest disambiguated inventors into database
31 |
32 | For the preliminary disambiguations, you need the [location
33 | database](https://s3.amazonaws.com/fungpatdownloads/geolocation_data.7z). File
34 | requires [7zip](http://www.7-zip.org/) to unpack.
35 |
36 | ## Installation and Configuration of the Preprocessing Environment
37 |
38 | The python-based preprocessor is tested on Ubuntu 12.04 and MacOSX 10.6. Any
39 | flavor of Unix with the following installed should work, though it is possible
40 | to get the toolchain running on Windows.
41 |
42 | If you have [`pip`](http://www.pip-installer.org/en/latest/index.html)
43 | installed, you can simplify the installation process by just running `sudo pip
44 | install -r requirements.txt` from within the patentprocessor directory.
45 |
46 | Please [file an issue](https://github.com/funginstitute/patentprocessor/issues) if you find another dependency.
47 |
48 | ### Ubuntu
49 |
50 | ```
51 | sudo apt-get update
52 | sudo apt-get install python-dev
53 | sudo apt-get install python-setuptools
54 | sudo easy_install -U distribute
55 | sudo apt-get install -y python-Levenshtein make libmysqlclient-dev python-mysqldb python-pip python-zmq python-numpy gfortran libopenblas-dev liblapack-dev g++ sqlite3 libsqlite3-dev python-sqlite redis-server
56 | sudo pip install -r requirements.txt
57 | ```
58 |
59 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | MySQL-python==1.2.4
2 | SQLAlchemy==0.8.3
3 | Unidecode==0.04.14
4 | beautifulsoup4==4.3.2
5 | ipython==1.1.0
6 | numpy==1.8.0
7 | python-Levenshtein==0.10.2
8 | pyzmq==14.0.0
9 | requests==2.0.1
10 | wsgiref==0.1.2
11 | pandas==0.12.0
12 |
--------------------------------------------------------------------------------
/run_clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo 'Running assignee disambiguation'
4 | python lib/assignee_disambiguation.py
5 |
6 | # TODO: fixup lawyer disambiguation
7 | #echo 'Running lawyer disambiguation'
8 | #python lib/lawyer_disambiguation.py 'grant'
9 |
10 | echo 'Running geo disambiguation'
11 | python lib/geoalchemy.py
12 |
--------------------------------------------------------------------------------
/run_consolidation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | rm disambiguator.csv
4 | echo 'Running consolidation for disambiguator'
5 | python consolidate.py $1
6 |
--------------------------------------------------------------------------------
/starcluster/README.md:
--------------------------------------------------------------------------------
1 | README
2 | ======
3 |
4 | #### StarCluster and Batch Parsing
5 |
6 | We take advantage of the fantastic [StarCluster](http://star.mit.edu/cluster/) for batch processing of the Patent XML files. Defaults (such as sgeadmin) are assumed.
7 |
8 | We use StarCluster mostly for distributed jobs.
9 | The StarCluster machines are configured to replicate the environment necessary to parse
10 |
11 | 1. `python fetch_xml.py` Fetching the XML files from USPTO. For now this fetches files specified in `urls.pickle`. As of 8/1/2013, `urls.pickle` contains files from 2005-mid 2013
12 | 2. Login to the starcluster using root
13 | 3. Point the `config.ini` file so it points to the MySQL database
14 | 4. `cd /home/sgeadmin/patentprocessor/starcluster; sh load_pre.sh > ../tar/[num].log` execute the shell script
15 | 5. Transfer the tar files to a separate location (or server) to begin the MySQL ingestion process.
16 | 6. Execute `build_tsv.py` and specify the location of the `tar.gz` files. This builds several text files which can be later ingested.
17 | 7. Modify `config.ini` file and set the proper credentials to the desired database. `from lib import alchemy` so the schema is fully updated.
18 | 8. Log into mysql. If it is a remote server, such as on Amazon RDS, `mysql -u [user] -p --local-infile=1 -h [db] [tbl]` and execute `source load.sql`. The default database is assumed to be `uspto_new` so if this should be something else, please make the appropriate adjustments.
19 |
--------------------------------------------------------------------------------
/starcluster/built_tsv.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import sys
4 |
5 | # specify the input directory
6 | # something like 20* is fine if we want to take
7 | # care of the directories that begin in the 2000s
8 |
9 | search = sys.argv[1]
10 |
11 | os.system("rm *.txt")
12 | for f in glob.glob("{0}/*.tar.gz".format(search)):
13 | s = f.split("/")[-1].split(".")[0]
14 | y = f.split("/")[0]
15 | os.system("tar -xzf {0}".format(f))
16 | for t in glob.glob("*.txt"):
17 | os.system("cat {0} >> new/{0}".format(t, y))
18 | os.system("rm *.txt")
19 |
--------------------------------------------------------------------------------
/starcluster/config.ini:
--------------------------------------------------------------------------------
1 | [directory]
2 | home = /home/sgeadmin/patentprocessor
3 | sqlalchemy = /home/sgeadmin/patentprocessor/lib/alchemy
4 | xml = /home/sgeadmin/patentprocessor/XML
5 | local = /mnt/sgeadmin
--------------------------------------------------------------------------------
/starcluster/fetch_xml.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | from datetime import datetime
4 | from ConfigParser import ConfigParser
5 | from IPython.parallel import Client
6 |
7 | config = ConfigParser()
8 | config.read('{0}/config.ini'.format(os.path.dirname(os.path.realpath(__file__))))
9 |
10 | rc = Client(packer="pickle")
11 | dview = rc[:]
12 | print rc.ids
13 |
14 |
15 | @dview.remote(block=True)
16 | def fetch():
17 | import os
18 | os.chdir(node)
19 | for i, f in enumerate(files):
20 | fname = f.split("/")[-1].split(".")[0]
21 | if not os.path.exists("{0}.xml".format(fname)):
22 | os.system("wget {0}".format(f))
23 | os.system("unzip {0}.zip".format(fname))
24 |
25 | fname = open("urls.pickle", "rb")
26 | urls = pickle.load(fname)
27 |
28 | master = config.get('directory', 'home')
29 | node = config.get('directory', 'local')
30 | if not os.path.exists("{0}/tar".format(master)):
31 | os.makedirs("{0}/tar".format(master))
32 |
33 | dview["master"] = master
34 | dview["node"] = node
35 | full = []
36 | for year in urls.keys():
37 | full.extend(urls[year])
38 | dview.scatter("files", full)
39 | fetch()
40 |
--------------------------------------------------------------------------------
/starcluster/load.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Merge together two disctinct MySQL tables
3 | mysqldump [options] uspto -T /var/lib/mysql/uspto
4 | mysql -u [user] -p --local-infile=1 -h [db] [tbl]
5 |
6 | READ THIS: http://dev.mysql.com/doc/refman/5.5/en/optimizing-innodb-bulk-data-loading.html
7 | */
8 |
9 |
10 | SELECT "new base", NOW();
11 | SET FOREIGN_KEY_CHECKS = 0;
12 | SET UNIQUE_CHECKS = 0;
13 | SET SESSION tx_isolation='READ-UNCOMMITTED';
14 | SET innodb_lock_wait_timeout = 500;
15 | SET autocommit=0;
16 |
17 | SELECT "patent";
18 | LOAD DATA LOCAL INFILE 'new/patent.txt' INTO TABLE uspto_new.patent FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
19 | SELECT "rawlocation";
20 | LOAD DATA LOCAL INFILE 'new/rawlocation.txt' IGNORE INTO TABLE uspto_new.rawlocation FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
21 | SELECT "subclass";
22 | LOAD DATA LOCAL INFILE 'new/subclass.txt' IGNORE INTO TABLE uspto_new.subclass FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
23 | SELECT "mainclass";
24 | LOAD DATA LOCAL INFILE 'new/mainclass.txt' IGNORE INTO TABLE uspto_new.mainclass FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
25 | SELECT "application";
26 | LOAD DATA LOCAL INFILE 'new/application.txt' INTO TABLE uspto_new.application FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
27 | SELECT "rawassignee";
28 | LOAD DATA LOCAL INFILE 'new/rawassignee.txt' INTO TABLE uspto_new.rawassignee FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
29 | SELECT "rawinventor";
30 | LOAD DATA LOCAL INFILE 'new/rawinventor.txt' INTO TABLE uspto_new.rawinventor FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
31 | SELECT "ipcr";
32 | LOAD DATA LOCAL INFILE 'new/ipcr.txt' INTO TABLE uspto_new.ipcr FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
33 | SELECT "rawlawyer";
34 | LOAD DATA LOCAL INFILE 'new/rawlawyer.txt' INTO TABLE uspto_new.rawlawyer FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
35 | SELECT "usreldoc";
36 | LOAD DATA LOCAL INFILE 'new/usreldoc.txt' INTO TABLE uspto_new.usreldoc FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
37 | SELECT "uspc";
38 | LOAD DATA LOCAL INFILE 'new/uspc.txt' INTO TABLE uspto_new.uspc FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
39 |
40 | COMMIT;
41 | SET autocommit=1;
42 | SET innodb_lock_wait_timeout = 50;
43 | SET UNIQUE_CHECKS = 1;
44 | SET FOREIGN_KEY_CHECKS = 1;
45 | SET SESSION tx_isolation='REPEATABLE-READ';
46 | SELECT NOW();
47 |
48 | /* ------------------------------- */
49 |
50 | SELECT "new citatons", NOW();
51 | SET FOREIGN_KEY_CHECKS = 0;
52 | SET UNIQUE_CHECKS = 0;
53 | SET SESSION tx_isolation='READ-UNCOMMITTED';
54 | SET innodb_lock_wait_timeout = 500;
55 | SET autocommit=0;
56 |
57 | SELECT "foreigncitation";
58 | LOAD DATA LOCAL INFILE 'new/foreigncitation.txt' INTO TABLE uspto_new.foreigncitation FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
59 | SELECT "otherreference";
60 | LOAD DATA LOCAL INFILE 'new/otherreference.txt' INTO TABLE uspto_new.otherreference FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
61 | SELECT "usapplicationcitation";
62 | LOAD DATA LOCAL INFILE 'new/usapplicationcitation.txt' INTO TABLE uspto_new.usapplicationcitation FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
63 | SELECT "uspatentcitation";
64 | LOAD DATA LOCAL INFILE 'new/uspatentcitation.txt' INTO TABLE uspto_new.uspatentcitation FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
65 |
66 | COMMIT;
67 | SET autocommit=1;
68 | SET innodb_lock_wait_timeout = 50;
69 | SET UNIQUE_CHECKS = 1;
70 | SET FOREIGN_KEY_CHECKS = 1;
71 | SET SESSION tx_isolation='REPEATABLE-READ';
72 | SELECT NOW();
73 |
74 | /* ------------------------------- */
75 |
76 | SELECT "new claims", NOW();
77 | SET FOREIGN_KEY_CHECKS = 0;
78 | SET UNIQUE_CHECKS = 0;
79 | SET SESSION tx_isolation='READ-UNCOMMITTED';
80 | SET innodb_lock_wait_timeout = 500;
81 | SET autocommit=0;
82 |
83 | SELECT "citation";
84 | LOAD DATA LOCAL INFILE 'new/claim.txt' INTO TABLE uspto_new.claim FIELDS TERMINATED by '\t' ENCLOSED BY '\"';
85 |
86 | COMMIT;
87 | SET autocommit=1;
88 | SET innodb_lock_wait_timeout = 50;
89 | SET UNIQUE_CHECKS = 1;
90 | SET FOREIGN_KEY_CHECKS = 1;
91 | SET SESSION tx_isolation='REPEATABLE-READ';
92 | SELECT NOW();
93 |
--------------------------------------------------------------------------------
/starcluster/load_drop.sql:
--------------------------------------------------------------------------------
1 | drop table ipcr;
2 | drop table patent_assignee;
3 | drop table patent_inventor;
4 | drop table patent_lawyer;
5 | drop table location_assignee;
6 | drop table location_inventor;
7 | drop table rawassignee;
8 | drop table rawinventor;
9 | drop table rawlawyer;
10 | drop table rawlocation;
11 | drop table application;
12 | drop table assignee;
13 | drop table inventor;
14 | drop table lawyer;
15 | drop table otherreference;
16 | drop table foreigncitation;
17 | drop table uspatentcitation;
18 | drop table usapplicationcitation;
19 | drop table claim;
20 | drop table uspc;
21 | drop table usreldoc;
22 | drop table patent;
23 | drop table location;
24 | drop table mainclass;
25 | drop table subclass;
26 |
--------------------------------------------------------------------------------
/starcluster/load_pre.sh:
--------------------------------------------------------------------------------
1 | #cd /home/sgeadmin/patentprocessor/starcluster; sh load_pre.sh > ../tar/2.log
2 |
3 | cd /mnt/sgeadmin
4 | for i in `ls *.xml`
5 | do echo $i
6 | cd /var/lib/mysql/uspto
7 | echo " - remove txt"
8 | rm *.txt
9 |
10 | cd /home/sgeadmin/patentprocessor
11 | echo " - drop database"
12 | mysql -root uspto < starcluster/load_drop.sql
13 | mysql -root uspto < starcluster/load_drop.sql
14 |
15 | cd /home/sgeadmin/patentprocessor
16 | echo " - python"
17 | python parse.py -p /mnt/sgeadmin -x $i
18 | echo " - mysqldump"
19 | mysqldump -root uspto -T /var/lib/mysql/uspto
20 |
21 | echo " - duplicate"
22 | cd /var/lib/mysql/uspto
23 | tar -czf $i.tar.gz *.txt
24 | mv $i.tar.gz /home/sgeadmin/patentprocessor/tar
25 |
26 | done
27 |
--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
1 | results.csv
2 | *.db
3 | tmp
4 | err
5 | fibotest.py
6 | *.sqlite3
7 |
--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | clean:
4 | rm -rf *.sqlite3 *~ *.pyc err
5 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/__init__.py
--------------------------------------------------------------------------------
/test/colortest.py:
--------------------------------------------------------------------------------
1 | COLOR16 = "\33[38;5;16m"
2 | COLOR17 = "\33[38;5;17m"
3 | COLOR18 = "\33[38;5;18m"
4 | COLOR19 = "\33[38;5;19m"
5 | COLOR20 = "\33[38;5;20m"
6 | COLOR21 = "\33[38;5;21m"
7 | COLOR22 = "\33[38;5;22m"
8 | COLOR23 = "\33[38;5;23m"
9 | COLOR24 = "\33[38;5;24m"
10 | COLOR25 = "\33[38;5;25m"
11 | COLOR26 = "\33[38;5;26m"
12 | COLOR27 = "\33[38;5;27m"
13 | COLOR28 = "\33[38;5;28m"
14 | COLOR29 = "\33[38;5;29m"
15 | COLOR30 = "\33[38;5;30m"
16 | COLOR31 = "\33[38;5;31m"
17 | COLOR32 = "\33[38;5;32m"
18 | COLOR33 = "\33[38;5;33m"
19 | COLOR34 = "\33[38;5;34m"
20 | COLOR35 = "\33[38;5;35m"
21 | COLOR36 = "\33[38;5;36m"
22 | COLOR37 = "\33[38;5;37m"
23 | COLOR38 = "\33[38;5;38m"
24 | COLOR39 = "\33[38;5;39m"
25 | COLOR40 = "\33[38;5;40m"
26 | COLOR41 = "\33[38;5;41m"
27 | COLOR42 = "\33[38;5;42m"
28 | COLOR43 = "\33[38;5;43m"
29 | COLOR44 = "\33[38;5;44m"
30 | COLOR45 = "\33[38;5;45m"
31 | COLOR46 = "\33[38;5;46m"
32 | COLOR47 = "\33[38;5;47m"
33 | COLOR48 = "\33[38;5;48m"
34 | COLOR49 = "\33[38;5;49m"
35 | COLOR50 = "\33[38;5;50m"
36 | COLOR51 = "\33[38;5;51m"
37 | COLOR52 = "\33[38;5;52m"
38 | COLOR53 = "\33[38;5;53m"
39 | COLOR54 = "\33[38;5;54m"
40 | COLOR55 = "\33[38;5;55m"
41 | COLOR56 = "\33[38;5;56m"
42 | COLOR57 = "\33[38;5;57m"
43 | COLOR58 = "\33[38;5;58m"
44 | COLOR59 = "\33[38;5;59m"
45 | COLOR60 = "\33[38;5;60m"
46 | COLOR61 = "\33[38;5;61m"
47 | COLOR62 = "\33[38;5;62m"
48 | COLOR63 = "\33[38;5;63m"
49 | COLOR64 = "\33[38;5;64m"
50 | COLOR65 = "\33[38;5;65m"
51 | COLOR66 = "\33[38;5;66m"
52 | COLOR67 = "\33[38;5;67m"
53 | COLOR68 = "\33[38;5;68m"
54 | COLOR69 = "\33[38;5;69m"
55 | COLOR70 = "\33[38;5;70m"
56 | COLOR71 = "\33[38;5;71m"
57 | COLOR72 = "\33[38;5;72m"
58 | COLOR73 = "\33[38;5;73m"
59 | COLOR74 = "\33[38;5;74m"
60 | COLOR75 = "\33[38;5;75m"
61 | COLOR76 = "\33[38;5;76m"
62 | COLOR77 = "\33[38;5;77m"
63 | COLOR78 = "\33[38;5;78m"
64 | COLOR79 = "\33[38;5;79m"
65 | COLOR80 = "\33[38;5;80m"
66 | COLOR81 = "\33[38;5;81m"
67 | COLOR82 = "\33[38;5;82m"
68 | COLOR83 = "\33[38;5;83m"
69 | COLOR84 = "\33[38;5;84m"
70 | COLOR85 = "\33[38;5;85m"
71 | COLOR86 = "\33[38;5;86m"
72 | COLOR87 = "\33[38;5;87m"
73 | COLOR88 = "\33[38;5;88m"
74 | COLOR89 = "\33[38;5;89m"
75 | COLOR90 = "\33[38;5;90m"
76 | COLOR91 = "\33[38;5;91m"
77 | COLOR92 = "\33[38;5;92m"
78 | COLOR93 = "\33[38;5;93m"
79 | COLOR94 = "\33[38;5;94m"
80 | COLOR95 = "\33[38;5;95m"
81 | COLOR96 = "\33[38;5;96m"
82 | COLOR97 = "\33[38;5;97m"
83 | COLOR98 = "\33[38;5;98m"
84 | COLOR99 = "\33[38;5;99m"
85 | COLOR100 = "\33[38;5;100m"
86 | COLOR101 = "\33[38;5;101m"
87 | COLOR102 = "\33[38;5;102m"
88 | COLOR103 = "\33[38;5;103m"
89 | COLOR104 = "\33[38;5;104m"
90 | COLOR105 = "\33[38;5;105m"
91 | COLOR106 = "\33[38;5;106m"
92 | COLOR107 = "\33[38;5;107m"
93 | COLOR108 = "\33[38;5;108m"
94 | COLOR109 = "\33[38;5;109m"
95 | COLOR110 = "\33[38;5;110m"
96 | COLOR111 = "\33[38;5;111m"
97 | COLOR112 = "\33[38;5;112m"
98 | COLOR113 = "\33[38;5;113m"
99 | COLOR114 = "\33[38;5;114m"
100 | COLOR115 = "\33[38;5;115m"
101 | COLOR116 = "\33[38;5;116m"
102 | COLOR117 = "\33[38;5;117m"
103 | COLOR118 = "\33[38;5;118m"
104 | COLOR119 = "\33[38;5;119m"
105 | COLOR120 = "\33[38;5;120m"
106 | COLOR121 = "\33[38;5;121m"
107 | COLOR122 = "\33[38;5;122m"
108 | COLOR123 = "\33[38;5;123m"
109 | COLOR124 = "\33[38;5;124m"
110 | COLOR125 = "\33[38;5;125m"
111 | COLOR126 = "\33[38;5;126m"
112 | COLOR127 = "\33[38;5;127m"
113 | COLOR128 = "\33[38;5;128m"
114 | COLOR129 = "\33[38;5;129m"
115 | COLOR130 = "\33[38;5;130m"
116 | COLOR131 = "\33[38;5;131m"
117 | COLOR132 = "\33[38;5;132m"
118 | COLOR133 = "\33[38;5;133m"
119 | COLOR134 = "\33[38;5;134m"
120 | COLOR135 = "\33[38;5;135m"
121 | COLOR136 = "\33[38;5;136m"
122 | COLOR137 = "\33[38;5;137m"
123 | COLOR138 = "\33[38;5;138m"
124 | COLOR139 = "\33[38;5;139m"
125 | COLOR140 = "\33[38;5;140m"
126 | COLOR141 = "\33[38;5;141m"
127 | COLOR142 = "\33[38;5;142m"
128 | COLOR143 = "\33[38;5;143m"
129 | COLOR144 = "\33[38;5;144m"
130 | COLOR145 = "\33[38;5;145m"
131 | COLOR146 = "\33[38;5;146m"
132 | COLOR147 = "\33[38;5;147m"
133 | COLOR148 = "\33[38;5;148m"
134 | COLOR149 = "\33[38;5;149m"
135 | COLOR150 = "\33[38;5;150m"
136 | COLOR151 = "\33[38;5;151m"
137 | COLOR152 = "\33[38;5;152m"
138 | COLOR153 = "\33[38;5;153m"
139 | COLOR154 = "\33[38;5;154m"
140 | COLOR155 = "\33[38;5;155m"
141 | COLOR156 = "\33[38;5;156m"
142 | COLOR157 = "\33[38;5;157m"
143 | COLOR158 = "\33[38;5;158m"
144 | COLOR159 = "\33[38;5;159m"
145 | COLOR160 = "\33[38;5;160m"
146 | COLOR161 = "\33[38;5;161m"
147 | COLOR162 = "\33[38;5;162m"
148 | COLOR163 = "\33[38;5;163m"
149 | COLOR164 = "\33[38;5;164m"
150 | COLOR165 = "\33[38;5;165m"
151 | COLOR166 = "\33[38;5;166m"
152 | COLOR167 = "\33[38;5;167m"
153 | COLOR168 = "\33[38;5;168m"
154 | COLOR169 = "\33[38;5;169m"
155 | COLOR170 = "\33[38;5;170m"
156 | COLOR171 = "\33[38;5;171m"
157 | COLOR172 = "\33[38;5;172m"
158 | COLOR173 = "\33[38;5;173m"
159 | COLOR174 = "\33[38;5;174m"
160 | COLOR175 = "\33[38;5;175m"
161 | COLOR176 = "\33[38;5;176m"
162 | COLOR177 = "\33[38;5;177m"
163 | COLOR178 = "\33[38;5;178m"
164 | COLOR179 = "\33[38;5;179m"
165 | COLOR180 = "\33[38;5;180m"
166 | COLOR181 = "\33[38;5;181m"
167 | COLOR182 = "\33[38;5;182m"
168 | COLOR183 = "\33[38;5;183m"
169 | COLOR184 = "\33[38;5;184m"
170 | COLOR185 = "\33[38;5;185m"
171 | COLOR186 = "\33[38;5;186m"
172 | COLOR187 = "\33[38;5;187m"
173 | COLOR188 = "\33[38;5;188m"
174 | COLOR189 = "\33[38;5;189m"
175 | COLOR190 = "\33[38;5;190m"
176 | COLOR191 = "\33[38;5;191m"
177 | COLOR192 = "\33[38;5;192m"
178 | COLOR193 = "\33[38;5;193m"
179 | COLOR194 = "\33[38;5;194m"
180 | COLOR195 = "\33[38;5;195m"
181 | COLOR196 = "\33[38;5;196m"
182 | COLOR197 = "\33[38;5;197m"
183 | COLOR198 = "\33[38;5;198m"
184 | COLOR199 = "\33[38;5;199m"
185 | COLOR200 = "\33[38;5;200m"
186 | COLOR201 = "\33[38;5;201m"
187 | COLOR202 = "\33[38;5;202m"
188 | COLOR203 = "\33[38;5;203m"
189 | COLOR204 = "\33[38;5;204m"
190 | COLOR205 = "\33[38;5;205m"
191 | COLOR206 = "\33[38;5;206m"
192 | COLOR207 = "\33[38;5;207m"
193 | COLOR208 = "\33[38;5;208m"
194 | COLOR209 = "\33[38;5;209m"
195 | COLOR210 = "\33[38;5;210m"
196 | COLOR211 = "\33[38;5;211m"
197 | COLOR212 = "\33[38;5;212m"
198 | COLOR213 = "\33[38;5;213m"
199 | COLOR214 = "\33[38;5;214m"
200 | COLOR215 = "\33[38;5;215m"
201 | COLOR216 = "\33[38;5;216m"
202 | COLOR217 = "\33[38;5;217m"
203 | COLOR218 = "\33[38;5;218m"
204 | COLOR219 = "\33[38;5;219m"
205 | COLOR220 = "\33[38;5;220m"
206 | COLOR221 = "\33[38;5;221m"
207 | COLOR222 = "\33[38;5;222m"
208 | COLOR223 = "\33[38;5;223m"
209 | COLOR224 = "\33[38;5;224m"
210 | COLOR225 = "\33[38;5;225m"
211 | COLOR226 = "\33[38;5;226m"
212 | COLOR227 = "\33[38;5;227m"
213 | COLOR228 = "\33[38;5;228m"
214 | COLOR229 = "\33[38;5;229m"
215 | COLOR230 = "\33[38;5;230m"
216 | COLOR231 = "\33[38;5;231m"
217 | COLOR232 = "\33[38;5;232m"
218 | COLOR233 = "\33[38;5;233m"
219 | COLOR234 = "\33[38;5;234m"
220 | COLOR235 = "\33[38;5;235m"
221 | COLOR236 = "\33[38;5;236m"
222 | COLOR237 = "\33[38;5;237m"
223 | COLOR238 = "\33[38;5;238m"
224 | COLOR239 = "\33[38;5;239m"
225 | COLOR240 = "\33[38;5;240m"
226 | COLOR241 = "\33[38;5;241m"
227 | COLOR242 = "\33[38;5;242m"
228 | COLOR243 = "\33[38;5;243m"
229 | COLOR244 = "\33[38;5;244m"
230 | COLOR245 = "\33[38;5;245m"
231 | COLOR246 = "\33[38;5;246m"
232 | COLOR247 = "\33[38;5;247m"
233 | COLOR248 = "\33[38;5;248m"
234 | COLOR249 = "\33[38;5;249m"
235 | COLOR250 = "\33[38;5;250m"
236 | COLOR251 = "\33[38;5;251m"
237 | COLOR252 = "\33[38;5;252m"
238 | COLOR253 = "\33[38;5;253m"
239 | COLOR254 = "\33[38;5;254m"
240 | COLOR255 = "\33[38;5;255m"
241 | RESET_COLOR = "[0m"
242 |
--------------------------------------------------------------------------------
/test/config.ini:
--------------------------------------------------------------------------------
1 | [global]
2 | database = sqlite
3 |
4 | [sqlite]
5 | database = test.db
6 | path = fixtures/alchemy
7 |
8 | [sqlite2]
9 | database = test.db
10 | path = fixtures/alchemy
11 |
--------------------------------------------------------------------------------
/test/fixtures/GNS/geonames_10.txt:
--------------------------------------------------------------------------------
1 | RC UFI UNI LAT LONG DMS_LAT DMS_LONG MGRS JOG FC DSG PC CC1 ADM1 POP ELEV CC2 NT LC SHORT_FORM GENERIC SORT_NAME_RO FULL_NAME_RO FULL_NAME_ND_RO SORT_NAME_RG FULL_NAME_RG FULL_NAME_ND_RG NOTE MODIFY_DATE
2 | 1 -1307834 -1891810 12.516667 -69.983333 123100 -695900 19PCP9315983885 ND19-14 P PPLL AA 00 N PAVIA Pavía Pavia PAVIA Pavía Pavia 1993-12-21
3 | 1 -1307889 -1891862 12.566667 -70.033333 123400 -700200 19PCP8774789436 ND19-14 P PPL AA 00 V SANTAANNA Santa Anna Santa Anna SANTAANNA Santa Anna Santa Anna 1993-12-21
4 | 1 -1307889 -1891878 12.566667 -70.033333 123400 -700200 19PCP8774789436 ND19-14 P PPL AA 00 N SINTANNA Sint Anna Sint Anna SINTANNA Sint Anna Sint Anna 1993-12-21
5 | 1 -1307793 -1891762 12.483333 -69.95 122900 -695700 19PCP9676780186 ND19-14 T HLL AA 00 N KLEINEJAMANOTA Kleine Jamanota Kleine Jamanota KLEINEJAMANOTA Kleine Jamanota Kleine Jamanota 1993-12-21
6 | 1 -1307696 -1891642 12.6 -70.05 123600 -700300 19PCP8595193130 ND19-14 H COVE AA 00 N ARASHI Arashi Arashi ARASHI Arashi Arashi 1993-12-21
7 | 1 -1307696 -1891879 12.6 -70.05 123600 -700300 19PCP8595193130 ND19-14 H COVE AA 00 V SINTARASJI Sint Arasji Sint Arasji SINTARASJI Sint Arasji Sint Arasji 1993-12-21
8 | 1 -1307696 -1891643 12.6 -70.05 123600 -700300 19PCP8595193130 ND19-14 H COVE AA 00 V ARASJI Arasji Arasji ARASJI Arasji Arasji 1993-12-21
9 | 1 -1307748 -1891711 12.55 -69.983333 123300 -695900 19PCP9317287572 ND19-14 T HLL AA 00 V CERUCRISTAL Ceru Cristal Ceru Cristal CRISTAL CERU Cristal, Ceru Cristal, Ceru 1993-12-21
10 | 1 -1307748 -1891712 12.55 -69.983333 123300 -695900 19PCP9317287572 ND19-14 T HLL AA 00 N SEROCRISTAL Sero Cristal Sero Cristal CRISTAL SERO Cristal, Sero Cristal, Sero 1993-12-21
11 |
--------------------------------------------------------------------------------
/test/fixtures/SAS/national_file_head_20120204.txt:
--------------------------------------------------------------------------------
1 | FEATURE_ID|FEATURE_NAME|FEATURE_CLASS|STATE_ALPHA|STATE_NUMERIC|COUNTY_NAME|COUNTY_NUMERIC|PRIMARY_LAT_DMS|PRIM_LONG_DMS|PRIM_LAT_DEC|PRIM_LONG_DEC|SOURCE_LAT_DMS|SOURCE_LONG_DMS|SOURCE_LAT_DEC|SOURCE_LONG_DEC|ELEV_IN_M|ELEV_IN_FT|MAP_NAME|DATE_CREATED|DATE_EDITED
2 | 399|Agua Sal Creek|Stream|AZ|04|Apache|001|362740N|1092842W|36.4611122|-109.4784394|362053N|1090915W|36.3480582|-109.1542662|1645|5397|Fire Dance Mesa|02/08/1980|
3 | 400|Agua Sal Wash|Valley|AZ|04|Apache|001|363246N|1093103W|36.546112|-109.5176069|362740N|1092842W|36.4611122|-109.4784394|1597|5239|Little Round Rock|02/08/1980|
4 | 401|Aguaje Draw|Valley|AZ|04|Apache|001|343417N|1091313W|34.5714281|-109.2203696|344308N|1085826W|34.7188|-108.9739|1750|5741|Kearn Lake|02/08/1980|01/14/2008
5 | 402|Arlington State Wildlife Area|Park|AZ|04|Maricopa|013|331455N|1124625W|33.2486547|-112.7735045|||||231|758|Spring Mountain|02/08/1980|
6 | 403|Bar X Wash|Stream|AZ|04|Graham|009|322815N|1095610W|32.4709038|-109.9361853|323048N|1095233W|32.5134024|-109.8759075|1339|4393|West of Greasewood Mountain|02/08/1980|
7 | 404|Bis Ii Ah Wash|Stream|AZ|04|Apache|001|355230N|1093239W|35.8750096|-109.5442721|354903N|1093001W|35.8175|-109.5002778|1799|5902|Beautiful Valley Well|02/08/1980|
8 | 405|Brawley Wash|Stream|AZ|04|Pima|019|322540N|1111726W|32.4278489|-111.2906617|315820N|1112329W|31.972302|-111.3914941|591|1939|West of Marana|02/08/1980|
9 | 406|Cement Trough Canyon|Valley|AZ|04|Navajo|017|335942N|1103045W|33.9950482|-110.5126118|340437N|1103304W|34.0769908|-110.5512265|1494|4902|Blue House Mountain|02/08/1980|
10 | 407|Corn Creek Wash|Stream|AZ|04|Coconino|005|351621N|1105537W|35.2725114|-110.9268068|351958N|1105231W|35.3327883|-110.8751392|1435|4708|Old Leupp|02/08/1980|
11 |
--------------------------------------------------------------------------------
/test/fixtures/alchemy/alchemy.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/fixtures/alchemy/alchemy.raw
--------------------------------------------------------------------------------
/test/fixtures/csv/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 |
--------------------------------------------------------------------------------
/test/fixtures/csv/gen_sample.csv:
--------------------------------------------------------------------------------
1 | 0,foo,bar,01234567
2 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/.gitignore:
--------------------------------------------------------------------------------
1 | <<<<<<< HEAD
2 | ikhlaq.csv
3 | =======
4 | dt*
5 | Def*
6 | >>>>>>> 39c819113636d5908622990fdaf38898ce2912d4
7 | patentlist.txt
8 | .#bm-gs.py
9 | .goutputstream-5G9IGW
10 | benchmark_errors.txt
11 | grep_scrpt.sh
12 | nancy_list.txt
13 | berkeleyinventors1.csv
14 | berkeleyinventors2.csv
15 | csv/
16 | ikhlaq.csv
17 | benchmark.sh
18 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/benchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 |
4 | <<<<<<< HEAD
5 | gawk -F, '{print $2",",$3",",$4}' < benchmark.csv > patentlist.txt
6 | =======
7 | gawk -F, '{print $1",", $2",",$3",",$4}' < benchmark.csv > dt5.csv
8 |
9 | >>>>>>> 39c819113636d5908622990fdaf38898ce2912d4
10 | #gawk -F, '{print $2}' < benchmark.csv > patentlist.txt
11 |
12 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/benchmark_confirm.py:
--------------------------------------------------------------------------------
1 | import sqlite3 as sql
2 | import os
3 | import sys
4 | import logging
5 |
6 | # bmVerify(['final_r7', 'final_r8'], filepath="/home/ysun/disambig/newcode/all/", outdir = "/home/ayu/results_v2/")
7 |
8 | # Text Files
9 | txt_file = 'patentlist.txt'
10 | opened_file = open(txt_file, 'U')
11 | log_file = 'benchmark_results.log'
12 |
13 | # Logging
14 | logging.basicConfig(filename=log_file, level=logging.DEBUG)
15 | open(log_file, "w")
16 |
17 | # Set Up SQL Connections
18 | con = sql.connect('/test/goldstandard/invnum_N_zardoz_with_invpat.sqlite3')
19 |
20 | with con:
21 |
22 | con_cur = con.cursor()
23 | logging.info("Beginning to query database")
24 | con_cur.execute("CREATE INDEX IF NOT EXISTS index_invnum ON invpat (Invnum)");
25 | con_cur.execute("CREATE INDEX IF NOT EXISTS index_lastname ON invpat (Lastname)");
26 | con_cur.execute("CREATE INDEX IF NOT EXISTS index_firstname ON invpat (Firstname)");
27 | count = 0
28 | errors = 0
29 | success = 0
30 |
31 | while True:
32 |
33 | line_read = opened_file.readline()
34 | # print line_read
35 |
36 | if not line_read:
37 | print "EXITING"
38 | break
39 | count = count + 1
40 | if count%100 == 0:
41 | print "starting patent", count
42 |
43 | split_lines = line_read.split(', ')
44 |
45 | # Strip out weird characters/formatting
46 | # Need to add leading "0" to Patent if not Design/Util/etc..
47 |
48 | patent_to_match = split_lines[0].strip(' \t\n\r')
49 | if len(patent_to_match) == 7:
50 | patent_to_match = "0" + patent_to_match
51 | last_name = split_lines[1].strip(' \t\n\r')
52 | first_name = split_lines[2].strip(' \t\n\r')
53 |
54 | # print patent_to_match, last_name, first_name
55 |
56 | con_cur.execute("SELECT Patent FROM invpat WHERE (Lastname = \"%s\" and Firstname = \"%s\");" % (last_name, first_name))
57 |
58 | patents_matched_from_SQL = con_cur.fetchall()
59 | match_found = False
60 | for patent_match in patents_matched_from_SQL:
61 | # print patent_match[0]
62 | # print patent_to_match
63 | if patent_match[0] == patent_to_match:
64 | match_found = True
65 | success = success + 1
66 |
67 | if not match_found:
68 | logging.error("Did not find a match for %s, %s, %s" % (first_name, last_name, patent_to_match))
69 | errors = errors + 1
70 |
71 | logging.info("Total Patents: %d" % count)
72 | logging.info("Patents ran successfully: %d" % success)
73 | logging.info("Patents FAILED: %d" % errors)
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/berkeleyinventors.csv:
--------------------------------------------------------------------------------
1 | UniqueID,Patent,Lastname,Firstname
2 | ,VARCHAR,,
3 | ,%08d,,
4 | UNIQUE,EXACT,FUZZY,FUZZY
5 | 1,5241635,ARVIND,
6 | 2,5241635,CULLER,DAVID
7 | 2,5123095,CULLER,DAVID
8 | 2,5018062,CULLER,DAVID
9 | 3,5018576,SITAR,NICHOLAS
10 | 4,5241635,PAPADOPOULOS,GREGORY M
11 | 4,5123095,PAPADOPOULOS,GREGORY M
12 | 4,5018062,PAPADOPOULOS,GREGORY M
13 | 5,5123095,PINKERTON,JAMES T
14 | 6,5018062,SCHNEIDER,RICHARD P
15 | 7,8194655,PISTER,KRISTOPHER
16 | 7,8059629,PISTER,KRISTOPHER
17 | 7,7961664,PISTER,KRISTOPHER
18 | 7,5726480,PISTER,KRISTOPHER
19 | 7,7873043,PISTER,KRISTOPHER
20 | 7,7420980,PISTER,KRISTOPHER
21 | 7,7529217,PISTER,KRISTOPHER
22 | 7,7881239,PISTER,KRISTOPHER
23 | 7,6517734,PISTER,KRISTOPHER
24 | 7,6517734,PISTER,KRISTOPHER
25 | 7,5659195,PISTER,KRISTOPHER
26 | 8,7873043,SHEAR,ROBERT M
27 | 8,7420980,SHEAR,ROBERT M
28 | 9,7881239,ZATS,YURI S
29 | 9,7529217,ZATS,YURI S
30 | 10,7881239,CONTNAT,ROBERT A
31 | 10,7529217,CONTNAT,ROBERT A
32 | 11,7881239,TREUHAFT,N
33 | 11,7529217,TREUHAFT,N
34 | 12,6517734,MULLER,LILAC
35 | 13,6517734,ARNETT,KENNETH E
36 | 14,6517734,FABINY,LARRY
37 | 15,5659195,STAFSUDD,OSCAR M
38 | 16,5659195,KAISER,WILLIAM J
39 | 17,5659195,NELSON,PHILLIS R
40 | 18,7913472,PISTER,JACINTA
41 | 19,7913472,LIN,JIN-JIN
42 | 20,7913472,TROTH,STEVE
43 | 21,7099871,DOOLIN,DAVID M
44 | 21,7013303,DOOLIN,DAVID M
45 | 21,7171415,DOOLIN,DAVID M
46 | 22,7171415,KAN,GENE H
47 | 22,7099871,KAN,GENE H
48 | 22,7013303,KAN,GENE H
49 | 23,7099871,FAYBISHENKO,YAROSLAY
50 | 23,7013303,FAYBISHENKO,YAROSLAY
51 | 23,7171415,FAYBISHENKO,YAROSLAY
52 | 24,7099871,CUTTING,DOUGLASS R
53 | 24,7171415,CUTTING,DOUGLASS R
54 | 25,5018576,HUNT,JAMES R
55 | 26,7099871,CAMARDA,THOMAS J
56 | 26,7013303,CAMARDA,THOMAS J
57 | 26,7171415,CAMARDA,THOMAS J
58 | 27,7099871,WATERHOUSE,STEVE
59 | 27,7013303,WATERHOUSE,STEVE
60 | 27,7171415,WATERHOUSE,STEVE
61 | 28,7013303,BEATTY,JOHN
62 | 29,5136185,FLEMING,LEE
63 | 29,5029133,FLEMING,LEE
64 | 30,5029133,LA FETRA,ROSS V
65 | 31,6453319,HAINES,MATTHEW
66 | 31,6209003,HAINES,MATTHEW
67 | 31,6128627,HAINES,MATTHEW
68 | 31,6128623,HAINES,MATTHEW
69 | 31,6289358,HAINES,MATTHEW
70 | 31,6292880,HAINES,MATTHEW
71 | 31,6913307,HAINES,MATTHEW
72 | 32,6209003,GOURLEY,DAVID
73 | 32,6128627,GOURLEY,DAVID
74 | 32,6128623,GOURLEY,DAVID
75 | 32,6289358,GOURLEY,DAVID
76 | 32,6292880,GOURLEY,DAVID
77 | 32,6453319,GOURLEY,DAVID
78 | 32,6913307,GOURLEY,DAVID
79 | 33,6209003,TOTTY,BRIAN
80 | 33,6128627,TOTTY,BRIAN
81 | 33,6128623,TOTTY,BRIAN
82 | 33,6289358,TOTTY,BRIAN
83 | 33,6292880,TOTTY,BRIAN
84 | 33,6453319,TOTTY,BRIAN
85 | 34,6913307,TOTTY,BRIAN
86 | 35,6209003,BEGUELIN,ADAM
87 | 35,6128627,BEGUELIN,ADAM
88 | 35,6128623,BEGUELIN,ADAM
89 | 35,6289358,BEGUELIN,ADAM
90 | 35,6292880,BEGUELIN,ADAM
91 | 35,6453319,BEGUELIN,ADAM
92 | 35,6913307,BEGUELIN,ADAM
93 | 36,6913307,PLEVYAK,JOHN
94 | 36,6292880,PLEVYAK,JOHN
95 | 36,6209003,PLEVYAK,JOHN
96 | 36,6128627,PLEVYAK,JOHN
97 | 36,6128623,PLEVYAK,JOHN
98 | 36,6289358,PLEVYAK,JOHN
99 | 36,6453319,PLEVYAK,JOHN
100 | 37,6128627,MATTHIS,PETER
101 | 37,6128623,MATTHIS,PETER
102 | 37,6209003,MATTHIS,PETER
103 | 37,6292880,MATTHIS,PETER
104 | 37,6289358,MATTHIS,PETER
105 | 37,6453319,MATTHIS,PETER
106 | 37,6913307,MATTHIS,PETER
107 | 38,6158781,AARON III,JOHN W
108 | 39,7625697,SHALON,TIDHAR D
109 | 39,7378236,SHALON,TIDHAR D
110 | 39,7442499,SHALON,TIDHAR D
111 | 39,7323298,SHALON,TIDHAR D
112 | 39,6110426,SHALON,TIDHAR D
113 | 39,5807522,SHALON,TIDHAR D
114 | 40,7442499,BROWN,PATRICK O
115 | 40,7625697,BROWN,PATRICK O
116 | 40,7378236,BROWN,PATRICK O
117 | 40,7323298,BROWN,PATRICK O
118 | 40,6110426,BROWN,PATRICK O
119 | 40,5807522,BROWN,PATRICK O
120 | 41,7573873,SIDHU,IKHLAQ S
121 | 41,7453815,SIDHU,IKHLAQ S
122 | 41,7032242,SIDHU,IKHLAQ S
123 | 41,7016675,SIDHU,IKHLAQ S
124 | 41,6954454,SIDHU,IKHLAQ S
125 | 41,6937699,SIDHU,IKHLAQ S
126 | 41,6937610,SIDHU,IKHLAQ S
127 | 41,6914897,SIDHU,IKHLAQ S
128 | 41,6870830,SIDHU,IKHLAQ S
129 | 41,6857072,SIDHU,IKHLAQ S
130 | 41,6857021,SIDHU,IKHLAQ S
131 | 41,6856616,SIDHU,IKHLAQ S
132 | 41,6822957,SIDHU,IKHLAQ S
133 | 41,6804224,SIDHU,IKHLAQ S
134 | 41,6795429,SIDHU,IKHLAQ S
135 | 41,6785261,SIDHU,IKHLAQ S
136 | 41,6771674,SIDHU,IKHLAQ S
137 | 41,6744759,SIDHU,IKHLAQ S
138 | 41,6741586,SIDHU,IKHLAQ S
139 | 41,6732314,SIDHU,IKHLAQ S
140 | 41,6731642,SIDHU,IKHLAQ S
141 | 41,6731630,SIDHU,IKHLAQ S
142 | 41,6697354,SIDHU,IKHLAQ S
143 | 41,6681252,SIDHU,IKHLAQ S
144 | 41,6678250,SIDHU,IKHLAQ S
145 | 41,6675218,SIDHU,IKHLAQ S
146 | 41,6674745,SIDHU,IKHLAQ S
147 | 41,6650901,SIDHU,IKHLAQ S
148 | 41,6650619,SIDHU,IKHLAQ S
149 | 41,6625119,SIDHU,IKHLAQ S
150 | 41,6587433,SIDHU,IKHLAQ S
151 | 41,6584490,SIDHU,IKHLAQ S
152 | 41,6577622,SIDHU,IKHLAQ S
153 | 41,6570606,SIDHU,IKHLAQ S
154 | 41,6567405,SIDHU,IKHLAQ S
155 | 41,6567399,SIDHU,IKHLAQ S
156 | 41,6542504,SIDHU,IKHLAQ S
157 | 41,6512761,SIDHU,IKHLAQ S
158 | 41,6487690,SIDHU,IKHLAQ S
159 | 41,6487603,SIDHU,IKHLAQ S
160 | 41,6446127,SIDHU,IKHLAQ S
161 | 41,7012141,SIDHU,IKHLAQ S
162 | 41,6434606,SIDHU,IKHLAQ S
163 | 41,6381638,SIDHU,IKHLAQ S
164 | 41,6366959,SIDHU,IKHLAQ S
165 | 41,6363053,SIDHU,IKHLAQ S
166 | 41,6360271,SIDHU,IKHLAQ S
167 | 41,6353614,SIDHU,IKHLAQ S
168 | 41,6351524,SIDHU,IKHLAQ S
169 | 41,6269099,SIDHU,IKHLAQ S
170 | 41,6243846,SIDHU,IKHLAQ S
171 | 41,6226769,SIDHU,IKHLAQ S
172 | 41,6182125,SIDHU,IKHLAQ S
173 | 41,6175871,SIDHU,IKHLAQ S
174 | 41,6170075,SIDHU,IKHLAQ S
175 | 41,6169744,SIDHU,IKHLAQ S
176 | 41,6151636,SIDHU,IKHLAQ S
177 | 41,6145109,SIDHU,IKHLAQ S
178 | 41,6055236,SIDHU,IKHLAQ S
179 | 41,6006271,SIDHU,IKHLAQ S
180 | 41,5870412,SIDHU,IKHLAQ S
181 | 42,5018576,UDELL,KENT S
182 | 43,5018576,STEWARD JR,LLOYD D
183 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/gs2011.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | echo "Queries the sqlite3 database with 2011 gold standard inventor data"
4 |
5 |
6 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/jamesrhunt.csv:
--------------------------------------------------------------------------------
1 | 1,5362759,HUNT,JAMES R
2 | 1,5860770,HUNT,JAMES R
3 | 1,7195753,HUNT,JAMES R
4 | 1,7482021,HUNT,JAMES R
5 | 1,7195753,HUNT,JAMES R
6 | 1,6783767,HUNT,JAMES R
7 | 1,6770268,HUNT,JAMES R
8 | 1,6579514,HUNT,JAMES R
9 | 1,6071043,HUNT,JAMES R
10 | 1,5951947,HUNT,JAMES R
11 | 1,5353449,HUNT,JAMES R
12 | 1,5578700,HUNT,JAMES R
13 | 1,5675882,HUNT,JAMES R
14 | 1,5613221,HUNT,JAMES R
15 | 1,5611492,HUNT,JAMES R
16 | 1,D329481,HUNT,JAMES R
17 | 1,D322932,HUNT,JAMES R
18 | 1,D322931,HUNT,JAMES R
19 | 1,D322215,HUNT,JAMES R
20 | 1,D321253,HUNT,JAMES R
21 | 1,4977893,HUNT,JAMES R
22 | 1,4571850,HUNT,JAMES R
23 | 1,D271911,HUNT,JAMES R
24 | 1,4040424,HUNT,JAMES R
25 |
--------------------------------------------------------------------------------
/test/fixtures/goldstandard/readme.md:
--------------------------------------------------------------------------------
1 | # "Gold Standard" processing for verified patent data
2 |
3 | ## Benchmark files
4 |
5 | * `benchmark.csv` is the csv export from the benchmark.xlsx spreadsheet
6 | file, converted from Windows `crlf` to unix line convention.
7 | * `benchmark.sh` is a wrapper around some `gawk` which processes the
8 | csv file to acquire relevant data.
9 |
10 | ## Gold standard files
11 |
12 | * `gs2011.sh` wraps various operations
13 | * `goldstandard.csv` is an input file in a format acceptable to the
14 | disambiguator.
15 |
16 |
--------------------------------------------------------------------------------
/test/fixtures/sqlite3/combined.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/fixtures/sqlite3/combined.sqlite3
--------------------------------------------------------------------------------
/test/fixtures/text/accented_characters.txt:
--------------------------------------------------------------------------------
1 | réâ∑œ®\üñµ
2 |
--------------------------------------------------------------------------------
/test/fixtures/unittest/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 |
--------------------------------------------------------------------------------
/test/fixtures/xml/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 |
--------------------------------------------------------------------------------
/test/fixtures/xml/basic.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | hello
4 | world
5 |
6 |
7 | 1
8 | 2
9 | 3
10 |
11 |
12 |
--------------------------------------------------------------------------------
/test/integration/.gitignore:
--------------------------------------------------------------------------------
1 | !*.csv
2 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.18/assignee.csv:
--------------------------------------------------------------------------------
1 | a1e27fa698dd2383ac0e0c5a85bf8e07,2,"","","Acushnet Company","",""
2 | 5729cdc83217097b2de5fd057f6c01d4,2,"","","BBC International LLC","",""
3 | 32fecc3fbbbb2058c5efd7746f8c92ce,2,"","","Devi Kroell Inc.","",""
4 | 917c17490595190472a0c880d468d1f8,3,"","","Design Sportswears","",""
5 | 6b7f48f21749641c4f67f138da7983bf,2,"","","Frito-Lay North America, Inc.","",""
6 | 8e480b469958f8a97a4d98fecc53aa46,3,"","","Hermes Sellier (Societe Par Actions Simplifiee)","",""
7 | 2067d04477dd28ed9fa8dd2c8a338352,2,"","","Icon IP, Inc.","",""
8 | 2e06355f463a868c3830ada75c071757,3,"","","Kybun AG","",""
9 | 70d61f29771e8463211d94090414205e,2,"","","OrthoCor Medical, Inc.","",""
10 | a020612580b3d66a7a11f16cd3edc6a3,2,"","","Times Three Clothier, LLC","",""
11 | de9c78a8eae461883b1bcf5ebdd57612,2,"","","Thompson-Weiler Enterprises, LLC","",""
12 | 769084ad4f0ed13b7c09ed62767e4789,2,"","","The Vermont Teddy Bear Company, Inc.","",""
13 | 6ee34b4c1a39003733e2f3959e9afb10,3,"","","Zero1.tv GmbH","",""
14 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.18/lawyer.csv:
--------------------------------------------------------------------------------
1 | 9577d947a8cdebd0b2958ceccb68ffdf,Andrew,"F. Young, Esq.","",UNKNOWN
2 | a338d00e25226144b10c43ad50091feb,"","","Abelman, Frayne & Schwab",UNKNOWN
3 | 50f3e5a0ef8631495862f78ed762e466,Barry,"G. Magidoff","",UNKNOWN
4 | 197f4d96eccda10e78fdd24c5da8bcd4,Celina,"M. Corr","",UNKNOWN
5 | 953a70cad377ba2df2236a1e6815dcd1,"","","Carston & Cahoon, LLP",UNKNOWN
6 | 8b7ce792ee91fb499f7da344adbb7549,Colin,"P. Cahon","",UNKNOWN
7 | f46fac8a1c76017b4e20ffaafbc97832,D.,"Michael Burns","",UNKNOWN
8 | e3412e962e62f82664c4d92fedcf75d3,"","","Foley & Lardner LLP",UNKNOWN
9 | 1c33aced610a38673c47bdc668e30c4c,"","","GrayRobinson, P.A.",UNKNOWN
10 | 81fcca8ea1a38185db2abfb2ab016e36,H.,"Jay Spiegel","",UNKNOWN
11 | 02efc1400155615d9d936adee32d8892,Jerome,"V. Sartain","",UNKNOWN
12 | 84404f72b783c58c5f3efbb10511961c,Justin,"G. Sanders","",UNKNOWN
13 | fe79f99fb866c9015456cbd7bb4dfcdb,"","","Lackenbach Siegel, LLP",UNKNOWN
14 | 4f546f39addcde5bb509f1708717ca61,"","","Minn Law Firm",UNKNOWN
15 | 5b244d5a5242cbce914c9917814efdb9,"","","Perkins Coie LLP",UNKNOWN
16 | a79f4f0e80347f7e349acb351dfd04c5,Richard,"K. C. Chang, II","",UNKNOWN
17 | b3bb5efb58647ceabe00d01fbb8331db,"","","Schwegman, Lunberg & Woessner, P.A.",UNKNOWN
18 | c1864223e8ed094d1cd8365973cbe3ec,"","","Shoemaker and Mattare",UNKNOWN
19 | 60f4caadf31bf7d7dfcf14ac3da9674e,"","","Stroock & Stroock & Lavan LLP",UNKNOWN
20 | 8bbc28f3b70f7c0d6bca1e459a018e0a,"","","Thomas & Karceski, P.C.",UNKNOWN
21 | ce2ffb0caf580bc53370e856ea4ae87a,Veronica-Adele,"R. Cao","",UNKNOWN
22 | 59817b1e6a20c836b1fa7507497b2f02,Walter,"A. Hackler","",UNKNOWN
23 | 0f029da8bb9147c7b3980b3c219d2ee7,"","","Weiss & Moy, P.C.",UNKNOWN
24 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.18/location.csv:
--------------------------------------------------------------------------------
1 | 26.3586885|-80.0830984,"Boca Raton",FL,US,26.3586885,-80.0830984
2 | 33.0198431|-96.6988856,Plano,TX,US,33.0198431,-96.6988856
3 | 33.1506744|-96.8236116,Frisco,TX,US,33.1506744,-96.8236116
4 | 33.599722|-117.699444,"Laguna Hills",CA,US,33.599722,-117.699444
5 | 33.660297|-117.9992265,"Huntington Beach",CA,US,33.660297,-117.9992265
6 | 36.1988592|-115.1175013,"North Las Vegas",NV,US,36.1988592,-115.1175013
7 | 37.5407246|-77.4360481,Richmond,VA,US,37.5407246,-77.4360481
8 | 39.070388|-76.5452409,"Severna Park",MD,US,39.070388,-76.5452409
9 | 40.2968979|-111.6946475,Orem,UT,US,40.2968979,-111.6946475
10 | 40.7143528|-74.0059731,NYC,NY,US,40.7143528,-74.0059731
11 | 41.1628731|-73.8615246,Ossining,NY,US,41.1628731,-73.8615246
12 | 41.2042616|-73.7270761,"Mount Kisco",NY,US,41.2042616,-73.7270761
13 | 41.6376043|-70.9036487,Fairhaven,MA,US,41.6376043,-70.9036487
14 | 41.7369803|-111.8338359,Logan,UT,US,41.7369803,-111.8338359
15 | 41.7|-70.7633333,Marion,MA,US,41.7,-70.7633333
16 | 42.0714925|-70.8092,Pembroke,MA,US,42.0714925,-70.8092
17 | 42.2495321|-71.0661653,Milton,MA,US,42.2495321,-71.0661653
18 | 42.5792583|-71.4378411,Westford,MA,US,42.5792583,-71.4378411
19 | 44.3806065|-73.227626,Shelburne,VT,US,44.3806065,-73.227626
20 | 44.983334|-93.26667,Minneapolis,MN,US,44.983334,-93.26667
21 | 45.0791325|-93.1471667,Shoreview,MN,US,45.0791325,-93.1471667
22 | 45.775491|12.0439904,Montebelluna,Veneto,IT,45.775491,12.0439904
23 | 47.240075|7.822812,Roggwil,Bern,CH,47.240075,7.822812
24 | 48.856614|2.3522219,Paris,"Île-de-France",FR,48.856614,2.3522219
25 | 52.519171|13.4060912,Berlin,Berlin,DE,52.519171,13.4060912
26 | 53.5510846|9.9936818,Hamburg,Hamburg,DE,53.5510846,9.9936818
27 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.one/assignee.csv:
--------------------------------------------------------------------------------
1 | 6b7f48f21749641c4f67f138da7983bf,2,"","","Frito-Lay North America, Inc.","",""
2 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.one/lawyer.csv:
--------------------------------------------------------------------------------
1 | 197f4d96eccda10e78fdd24c5da8bcd4,Celina,"M. Corr","",UNKNOWN
2 | 953a70cad377ba2df2236a1e6815dcd1,"","","Carston & Cahoon, LLP",UNKNOWN
3 | 8b7ce792ee91fb499f7da344adbb7549,Colin,"P. Cahon","",UNKNOWN
4 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.one/location.csv:
--------------------------------------------------------------------------------
1 | 33.0198431|-96.6988856,Plano,TX,US,33.0198431,-96.6988856
2 | 33.1506744|-96.8236116,Frisco,TX,US,33.1506744,-96.8236116
3 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.two/assignee.csv:
--------------------------------------------------------------------------------
1 | 70d61f29771e8463211d94090414205e,2,"","","OrthoCor Medical, Inc.","",""
2 | 6ee34b4c1a39003733e2f3959e9afb10,3,"","","Zero1.tv GmbH","",""
3 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.two/lawyer.csv:
--------------------------------------------------------------------------------
1 | 5b244d5a5242cbce914c9917814efdb9,"","","Perkins Coie LLP",UNKNOWN
2 | b3bb5efb58647ceabe00d01fbb8331db,"","","Schwegman, Lunberg & Woessner, P.A.",UNKNOWN
3 |
--------------------------------------------------------------------------------
/test/integration/clean/ipg120327.two/location.csv:
--------------------------------------------------------------------------------
1 | 44.983334|-93.26667,Minneapolis,MN,US,44.983334,-93.26667
2 | 45.0791325|-93.1471667,Shoreview,MN,US,45.0791325,-93.1471667
3 | 52.519171|13.4060912,Berlin,Berlin,DE,52.519171,13.4060912
4 | 53.5510846|9.9936818,Hamburg,Hamburg,DE,53.5510846,9.9936818
5 |
--------------------------------------------------------------------------------
/test/integration/consolidate/ipg120327.18/disambiguator.csv:
--------------------------------------------------------------------------------
1 | Nicole Cavin D656296 D1 D1/128 Frisco TX US Frito-Lay North America, Inc. Frito-Lay North America, Inc.
2 | Divya Paruchuri D656296 D1 D1/128 Frisco TX US Frito-Lay North America, Inc. Frito-Lay North America, Inc.
3 | Michael Zbuchalski D656296 D1 D1/128 Frisco TX US Frito-Lay North America, Inc. Frito-Lay North America, Inc.
4 | Michaela M. Christian D656297 D2 D2/627 North Las Vegas NV US
5 | Heather Thomson Schindler D656298 D2 D2/703 New York NY US Times Three Clothier, LLC Times Three Clothier, LLC
6 | Brian Jeffery Peters D656299 D2 D2/742 Huntington Beach CA US
7 | Debi Purcell D656300 D2 D2/742 Laguna Hills CA US
8 | Michael Scott Randall D656301 D2 D2/858 Shelburne VT US The Vermont Teddy Bear Company, Inc. The Vermont Teddy Bear Company, Inc.
9 | Wade Driggers D656302 D2 D2/946 Richmond VA US
10 | Karl Muller D656303 D2 D2/947 Roggwil CH Kybun AG Kybun AG
11 | Claudio Franco D656303 D2 D2/947 Roggwil CH Kybun AG Kybun AG
12 | Gilberto Debiase D656304 D2 D2/960 Boca Raton FL US BBC International LLC BBC International LLC
13 | Kenneth Golden Harper D656305 D2 D2/960 Orem UT US Icon IP, Inc. Icon IP, Inc.
14 | Jonathan G. Bacon D656306 D2 D2/969 Westford MA US Acushnet Company Acushnet Company
15 | James M. Feeney D656307 D2 D2/969 Marion MA US Acushnet Company Acushnet Company
16 | Richard A. Mochen D656307 D2 D2/969 Marion MA US Acushnet Company Acushnet Company
17 | Paul O. Teeter D656307 D2 D2/969 Marion MA US Acushnet Company Acushnet Company
18 | Kin-Joe Sham D656308 D3 D3/2031 Shoreview MN US OrthoCor Medical, Inc. OrthoCor Medical, Inc.
19 | Oliver Renelt D656309 D3 D3/218 Hamburg DE Zero1.tv GmbH Zero1.tv GmbH
20 | Alexander Gruber D656309 D3 D3/218 Hamburg DE Zero1.tv GmbH Zero1.tv GmbH
21 | Valerie M. Ciptak D656310 D3 D3/226 Ossining NY US Thompson-Weiler Enterprises, LLC Thompson-Weiler Enterprises, LLC
22 | Justin S. Werner D656310 D3 D3/226 Ossining NY US Thompson-Weiler Enterprises, LLC Thompson-Weiler Enterprises, LLC
23 | Gracelia Chiurazzi D656311 D3 D3/232 New York NY US Devi Kroell Inc. Devi Kroell Inc.
24 | Valérie Gerbi D656312 D3 D3/232 Paris FR Design Sportswears Design Sportswears
25 | Jean-Louis Dumas D656313 D3 D3/243 Paris FR Hermes Sellier (Societe Par Actions Simplifiee) Hermes Sellier (Societe Par Actions Simplifiee)
26 | Pierre-Alexis Dumas, legal representative D656313 D3 D3/243 Paris FR Hermes Sellier (Societe Par Actions Simplifiee) Hermes Sellier (Societe Par Actions Simplifiee)
27 | Sandrine Brekke-Dumas, legal representative D656313 D3 D3/243 Paris FR Hermes Sellier (Societe Par Actions Simplifiee) Hermes Sellier (Societe Par Actions Simplifiee)
28 | Couli Jobert D656313 D3 D3/243 Paris FR Hermes Sellier (Societe Par Actions Simplifiee) Hermes Sellier (Societe Par Actions Simplifiee)
29 |
--------------------------------------------------------------------------------
/test/integration/consolidate/ipg120327.two/disambiguator.csv:
--------------------------------------------------------------------------------
1 | Kin-Joe Sham D656308 D3 D3/2031 Shoreview MN US OrthoCor Medical, Inc. OrthoCor Medical, Inc.
2 | Oliver Renelt D656309 D3 D3/218 Hamburg DE Zero1.tv GmbH Zero1.tv GmbH
3 | Alexander Gruber D656309 D3 D3/218 Hamburg DE Zero1.tv GmbH Zero1.tv GmbH
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/application.csv:
--------------------------------------------------------------------------------
1 | id,type,number,country,date,abstract,title,granted,num_claims
2 | 2006/20060288462,utility,20060288462,US,2006-12-28,"A plurality of substantially flexible skeletal members disposed in the form of a portion of a garment fabric panel is disclosed. A plurality of connecting members secure the skeletal member in a configuration corresponding to the shape of the portion of the fabric panel. A plurality of candy members is disposed on the skeletal member. A plurality of knots may be employed in the skeletal members at least at some points between the candy members, whereby limited removal of the candy members may be implemented. A plurality of skeletal lengths may be implemented to provide limited removal of the candy members. ","Garments composed of fabric panels incorporating edible cells",,10
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/claim.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,text,dependent,sequence
2 | 2006/20060288462,"A garment, comprising: ",,1
3 | 2006/20060288462,"A garment as in , wherein said candy members comprise hard candy. ",1,2
4 | 2006/20060288462,"A garment as in , wherein said candy members are disposed in a configuration to allow ventilation between said hard candy members. ",1,3
5 | 2006/20060288462,"A garment as in , wherein knots in the skeletal members are provided at least at said some points between said candy members whereby limited removal of said candy members may be implemented. ",1,4
6 | 2006/20060288462,"A garment as in , wherein skeletal members are of various lengths, whereby limited removal of said candy members may be implemented. ",1,5
7 | 2006/20060288462,"A garment as in , further comprising a coating disposed over said candy members whereby the action of moisture is limited. ",1,6
8 | 2006/20060288462,"A garment as in , wherein said candy members comprise an anti-stick filler material. ",1,7
9 | 2006/20060288462,"A garment as in , wherein said candy members comprise a compacted material. ",1,8
10 | 2006/20060288462,"A garment as in , wherein said candy members comprise a compacted xylitol. ",1,9
11 | 2006/20060288462,"A garment as in , wherein said skeletal members are cross-linked.",1,10
12 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/ipcr.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,classification_level,section,subclass,main_group,subgroup,symbol_position,classification_value,classification_status,classification_data_source,action_date,ipc_version_indicator,sequence
2 | 2006/20060288462,A,A,D,31,00,F,I,B,H,2006-12-28,2007-01-01,0
3 | 2006/20060288462,A,A,G,1,50,L,I,B,H,2006-12-28,2007-01-01,1
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/mainclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 002,,
3 | 426,,
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/rawassignee.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,assignee_id,rawlocation_id,type,name_first,name_last,organization,residence,nationality,sequence
2 | 2006/20060288462,,"new york|ny|us",,"","","AMERICAN EXPRESS TRAVEL RELATED SERVICES COMPANY, INC.","","",0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/rawinventor.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,inventor_id,rawlocation_id,name_first,name_last,nationality,sequence
2 | 2006/20060288462,,koln||de,Yvonne,Schroeder,DE,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/rawlocation.csv:
--------------------------------------------------------------------------------
1 | id,location_id,city,state,country
2 | "new york|ny|us",,"New York",NY,US
3 | koln||de,,Koln,"",DE
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/subclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 002/001000,,
3 | 426/104000,,
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/uspc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,mainclass_id,subclass_id,sequence
2 | 2006/20060288462,002,002/001000,0
3 | 2006/20060288462,426,426/104000,1
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa061228.one/usreldoc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,rel_id,doctype,status,date,number,kind,country,relationship,sequence
2 | 2006/20060288462,29232669,continuation_in_part,PENDING,2005-06-22,29232669,"",US,parent_doc,0
3 | 2006/20060288462,11197279,continuation_in_part,"",2005-08-04,11197279,A1,US,child_doc,1
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/application.csv:
--------------------------------------------------------------------------------
1 | id,type,number,country,date,abstract,title,granted,num_claims
2 | 2013/20130014308,utility,20130014308,US,2013-01-17,"A finger nail tip cover which acts as a typing aid for users when typing with long finger nails. It is made of rubber material and has one hard end, to prevent the nail tip from passing thru when typing and a semi-oval stretchy cavity at the other end where the finger nail tip is placed in.",TYPING-MATE,,12
3 | 2013/20130014306,utility,20130014306,US,2013-01-17,"A glove employable for mixed martial arts, including striking and grappling, employs a dorsal pad having distal and lateral extensions with an integral bend for covering both the dorsal and lateral sides of the user's metacarpal/proximal phalange joints. The integral bend conforms with the user's metacarpal/proximal phalange joints with the user's hand in the clinched fist position. The glove affords protection to the user's metacarpal/proximal phalange joints while striking. On the other hand, the integral bend is capable of easily unbending to an open position for grappling. The integral bend is unbent by unclenching the user's fingers by flexion from the clinched fist position to the open position. The glove also includes a wrist pad for protecting the wrist against heavy strikes.","TACTICAL MIXED MARTIAL ARTS GLOVE",,14
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/claim.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,text,dependent,sequence
2 | 2013/20130014308,"A finger nail tip cover appliance that has a first hard end and a second more soften end with a stretchy cavity to be placed on the finger nail tips of long nails when typing on any keyboard.",,1
3 | 2013/20130014308,"The appliance of wherein a shape that encircles 3/16 of an inches depth of a long finger nail tip.",1,2
4 | 2013/20130014308,"The appliance of wherein facilitates data entry with long natural, acrylic, gel or artificial finger nails.",1,3
5 | 2013/20130014308,"The appliance of wherein has a semi-oval cavity in one end where the finger nail tip is place in.",1,4
6 | 2013/20130014308,"The appliance of wherein has a stretch cavity to fit all sizes of the different long finger nail tips including natural, acrylic, gel or artificial finger nail tips.",1,5
7 | 2013/20130014308,"The appliance of wherein said is designed to be worn on the tips of all long finger nails of both hands, not including the thumbs.",1,6
8 | 2013/20130014308,"The appliance of wherein said a hard end comprising a symmetrical shape that simulates the actual finger tip typing action.",1,7
9 | 2013/20130014308,"The appliance of wherein has a hard end that prevents the finger nail tip from passing thru when typing.",7,8
10 | 2013/20130014308,"The appliance of wherein said is made of rubber material that allows the appliance to be hard on one end and stretchy on the other end.",1,9
11 | 2013/20130014308,"The appliance of wherein compromises a high friction end.",9,10
12 | 2013/20130014308,"The appliance on wherein said to be use for typing on any keyboard with long finger nails that can be natural, acrylic, gel or artificial nails.",1,11
13 | 2013/20130014308,"The appliance of wherein said makes typing with long finger nails faster, comfortable and accurate.",1,12
14 | 2013/20130014306,"An improved glove employable by a user for mixed martial arts, the glove being an open fingered type and of a type having a dorsal pad for protecting the dorsal side of the metacarpals of a user's hand against shock and of a type being capable of assuming an open position for grappling and a clinched fist position for striking, wherein the improvement includes:",,1
15 | 2013/20130014306,"The improved glove of wherein the improvement further includes the dorsal pad having a composition including molded foam, the integral bend being formed by the molded foam.",1,2
16 | 2013/20130014306,"The improved glove of wherein the improvement further includes the dorsal pad having a composition including a first layer of low density foam and a second layer of high density foam, the first and second layers being glued to one another for forming the integral bend.",1,3
17 | 2013/20130014306,"The improved glove of wherein the improvement further includes the distal extension extending sufficiently for covering and protecting the user's Intermediate phalanges.",1,4
18 | 2013/20130014306,"The improved glove of wherein the improvement further includes the dorsal pad extending distally for covering and protecting the user's distal phalanges.",1,5
19 | 2013/20130014306,"The improved glove of wherein the improvement further includes the dorsal pad and the integral bend having an extension extending laterally beyond the metacarpal/proximal phalange joint of the user's index finger for affording impact protection to the lateral side of the metacarpal/proximal phalanges joint of the user's index finger against a sloop hook punch.",1,6
20 | 2013/20130014306,"The improved glove of wherein the improvement further comprises:",1,7
21 | 2013/20130014306,"An improved glove employable by a user for mixed martial arts, the glove being of an opened fingered type and of a type having a dorsal pad for protecting the dorsal side of the metacarpals of a user's hand against shock, wherein the improvement comprising:",,8
22 | 2013/20130014306,"The improved glove of wherein the improvement further including the lateral strike pad having a composition including foam.",8,9
23 | 2013/20130014306,"An improved glove employable by a user for mixed martial arts, the glove being of an open fingered type having a dorsal pad for protecting the dorsal side of the metacarpals of a user's hand against shock, the glove being of a type capable of assuming an open position for grappling and a clinched fist position for striking, wherein the improvement including:",,10
24 | 2013/20130014306,"The improved glove of wherein the improvement further including the dorsal pad, include the integral bend, having a composition selected from the group consisting of molded foam and layered foam.",10,11
25 | 2013/20130014306,"The improved glove of wherein the improvement further including the dorsal pad extending distally for covering and protecting the user's distal phalanges against shock.",10,12
26 | 2013/20130014306,"The improved glove of , the improvement further compromising:",10,13
27 | 2013/20130014306,"An improved method for manufacturing a glove employable by a user for mixed martial arts, wherein the improvement comprises the step of making a dorsal pad having an integral bend by means of a molding process, according to . ",2,14
28 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/ipcr.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,classification_level,section,subclass,main_group,subgroup,symbol_position,classification_value,classification_status,classification_data_source,action_date,ipc_version_indicator,sequence
2 | 2013/20130014308,A,A,D,13,08,F,I,B,H,2013-01-17,2006-01-01,0
3 | 2013/20130014306,A,A,D,13,08,F,I,B,H,2013-01-17,2006-01-01,0
4 | 2013/20130014306,A,A,D,19,02,L,I,B,H,2013-01-17,2006-01-01,1
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/mainclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 2,,
3 | 21,,
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/rawassignee.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,assignee_id,rawlocation_id,type,name_first,name_last,organization,residence,nationality,sequence
2 | 2013/20130014308,,kissimmee|fl|us,,Yennifer,Feliciano,"",US,US,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/rawinventor.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,inventor_id,rawlocation_id,name_first,name_last,nationality,sequence
2 | 2013/20130014308,,kissimmee|fl|us,Yennifer,Feliciano,US,0
3 | 2013/20130014306,,"san diego|ca|us",Christopher,Mechling,US,0
4 | 2013/20130014306,,"san diego|ca|us",Nicholas,Mechling,US,1
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/rawlocation.csv:
--------------------------------------------------------------------------------
1 | id,location_id,city,state,country
2 | kissimmee|fl|us,,Kissimmee,FL,US
3 | "san diego|ca|us",,"San Diego",CA,US
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/subclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 2/21,,
3 | 2/20,,
4 | 21/69,,
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/uspc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,mainclass_id,subclass_id,sequence
2 | 2013/20130014308,2,2/21,0
3 | 2013/20130014306,2,2/20,0
4 | 2013/20130014306,21,21/69,1
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipa130117.one/usreldoc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,rel_id,doctype,status,date,number,kind,country,relationship,sequence
2 | 2013/20130014306,61433841,us_provisional_application,,2011-01-18,61433841,"",US,,0
3 | 2013/20130014306,61473378,us_provisional_application,,2011-04-08,61473378,"",US,,1
4 | 2013/20130014306,61526999,us_provisional_application,,2011-08-24,61526999,"",US,,2
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/application.csv:
--------------------------------------------------------------------------------
1 | 2010/29381217,D656296,29,29381217,US,2010-12-16,,
2 | 2009/29335910,D656297,29,29335910,US,2009-04-24,,
3 | 2009/29352440,D656298,29,29352440,US,2009-12-21,,
4 | 2011/29383856,D656299,29,29383856,US,2011-01-24,,
5 | 2011/29383977,D656300,29,29383977,US,2011-01-25,,
6 | 2011/29371812,D656301,29,29371812,US,2011-10-12,,
7 | 2010/29367468,D656302,29,29367468,US,2010-08-09,,
8 | 2009/29350925,D656303,29,29350925,US,2009-11-25,,
9 | 2011/29385973,D656304,29,29385973,US,2011-02-23,,
10 | 2011/29392441,D656305,29,29392441,US,2011-05-20,,
11 | 2010/29378661,D656306,29,29378661,US,2010-11-08,,
12 | 2010/29378662,D656307,29,29378662,US,2010-11-08,,
13 | 2010/29379369,D656308,29,29379369,US,2010-11-18,,
14 | 2011/29391097,D656309,29,29391097,US,2011-05-03,,
15 | 2011/29389305,D656310,29,29389305,US,2011-04-08,,
16 | 2010/29370848,D656311,29,29370848,US,2010-09-14,,
17 | 2011/29391518,D656312,29,29391518,US,2011-05-10,,
18 | 2010/29381881,D656313,29,29381881,US,2010-12-23,,
19 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/claim.csv:
--------------------------------------------------------------------------------
1 | D656296,"The ornamental design for a ready to eat snack piece, as shown and described.",,1
2 | D656297,"The ornamental design for a fashion belt, as shown and described.",,1
3 | D656298,"The ornamental design for a garment, as shown and described.",,1
4 | D656299,"The ornamental design for pants, as shown and described.",,1
5 | D656300,"The ornamental design for a pants, as shown and described.",,1
6 | D656301,"The ornamental design for apparel sleeve ends, as shown and described.",,1
7 | D656302,"The ornamental design for footwear, as shown and described.",,1
8 | D656303,"The ornamental design for a shoe sole, as shown and described.",,1
9 | D656304,"The ornamental design for a footwear outsole, as shown and described.",,1
10 | D656305,"The ornamental design for a shoe tread, as shown and described.",,1
11 | D656306,"The ornamental design for a golf shoe upper, as shown and described.",,1
12 | D656307,"The ornamental design for a golf shoe upper, as shown and described.",,1
13 | D656308,"The ornamental design for the replaceable cartridge for a pain management system, as shown and described.",,1
14 | D656309,"The ornamental design for a universal remote control accessory for a mobile device, as shown and described.",,1
15 | D656310,"The ornamental design for a wrist pouch, as shown and described.",,1
16 | D656311,"The ornamental design for a handbag with clasp, as shown and described.",,1
17 | D656312,"The ornamental design for a handbag, as shown and described.",,1
18 | D656313,"The ornamental design for a handbag, as shown and described.",,1
19 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/foreigncitation.csv:
--------------------------------------------------------------------------------
1 | D656296,2008-03-01,"","",8290305.5,EP,"cited by examiner",35
2 | D656298,1985-08-01,"","",G8513103.2,DE,"cited by other",123
3 | D656298,2005-04-01,"",U1,202005000716,DE,"cited by other",124
4 | D656298,2007-10-01,"","",00798897-0021,EM,"cited by examiner",125
5 | D656298,1986-03-01,"","",174179,EP,"cited by other",126
6 | D656298,1997-05-01,"","",774241,EP,"cited by other",127
7 | D656298,2001-03-01,"","",1082951,EP,"cited by other",128
8 | D656298,2001-08-01,"","",1125566,EP,"cited by other",129
9 | D656298,2005-07-01,"","",00385562-0106,EP,"cited by other",130
10 | D656298,2005-12-01,"","",00454202-0027,EP,"cited by other",131
11 | D656298,2005-12-01,"","",00454202-0121,EP,"cited by other",132
12 | D656298,2006-12-01,"","",00633755-0027,EP,"cited by other",133
13 | D656298,2007-05-01,"","",00730403-0023,EP,"cited by other",134
14 | D656298,1918-08-01,"","",116526,GB,"cited by other",135
15 | D656298,1924-03-01,"","",212307,GB,"cited by other",136
16 | D656298,1981-11-01,"","",1603600,GB,"cited by other",137
17 | D656298,2005-07-01,"","",3020687,GB,"cited by other",138
18 | D656298,2006-11-01,"","",4000420,GB,"cited by examiner",139
19 | D656298,2007-10-01,"","",4004524,GB,"cited by other",140
20 | D656298,1990-06-01,"","",2-82707,JP,"cited by other",141
21 | D656298,2001-06-01,"","",2001-172806,JP,"cited by other",142
22 | D656298,2002-05-01,"","",2002-138302,JP,"cited by other",143
23 | D656298,2003-05-01,"",A,2003-129303,JP,"cited by other",144
24 | D656298,2005-10-01,"",A,2005-281893,JP,"cited by other",145
25 | D656298,2006-11-01,"","",2006-316359,JP,"cited by other",146
26 | D656298,2006-11-01,"",A,2006-316359,JP,"cited by other",147
27 | D656298,2007-06-01,"","",2007-146337,JP,"cited by other",148
28 | D656298,2007-06-01,"",A,2007-146337,JP,"cited by other",149
29 | D656298,2007-11-01,"","",2007-303002,JP,"cited by other",150
30 | D656298,2008-07-01,"",A,2008-156812,JP,"cited by other",151
31 | D656298,1996-03-01,"","",96/08217,WO,"cited by other",152
32 | D656298,2001-10-01,"","",1/75201,WO,"cited by other",153
33 | D656300,2007-06-01,"",A,2007154394,JP,"cited by examiner",9
34 | D656300,2008-05-01,"",A,2008106395,JP,"cited by examiner",10
35 | D656303,1999-06-01,"","",99/29203,WO,"cited by other",48
36 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/ipcr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.18/ipcr.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/mainclass.csv:
--------------------------------------------------------------------------------
1 | D1,,
2 | D2,,
3 | D3,,
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/otherreference.csv:
--------------------------------------------------------------------------------
1 | D656296,"Football Shaped Cookies, posted Sep. 3, 2008 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://throwingwaffles.com/2008/09.",0
2 | D656296,"Super Bowel Sugar Cookies, posted Feb. 1, 2009 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://fodder-and-libations.blogspot.com/2009/02/super-bowl-sugar-cookies-with-royal.html.",1
3 | D656296,"Football Oreos, posted Sep. 18, 2010 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://puertabella.blogspot.com/2010/09/football-oreos.html.",2
4 | D656296,"Football Shaped Tortilla Crisps, posted Sep. 23, 2010 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://www.landolakes.com.",3
5 | D656296,"Football with Laces Cookie Cutter; www.karenscookies.net/Football-with-Laces-Cookie-Cutter_p_1068.html.",4
6 | D656296,"Football Cookie Cutter Set; www.michaels.com/Football-Cookie-Cutter-Set/bk0176,default,pd.html.",5
7 | D656298,"Email from Christine Conforte to Heather Schindler “FW: Tummy Tee,” sent Dec. 1, 2006.",0
8 | D656298,"Email from Christine Conforte to Heather Schindler “Liz Lange,” sent Dec. 13, 2006.",1
9 | D656298,"Email from Heather Schindler to Chistine Conforte “Liz Lange,” sent Dec. 13, 2006.",2
10 | D656298,"Email from Christine Conforte “Liz Lange,” sent Dec. 11, 2006.",3
11 | D656298,"Email from Heather Schindler to Christine Conforte “Liz Lange,” sent Dec. 9, 2006.",4
12 | D656298,"Email from Christine Conforte to Heather Schindler “Liz Lange,” sent Dec. 7, 2006.",5
13 | D656298,"Email from Heather Schindler to Christine Conforte “Liz Lange,” sent Dec. 7, 2006.",6
14 | D656298,"Email from Christine Conforte to Heather Schindler, “Liz Lange,” sent Dec. 6, 2006.",7
15 | D656298,"Email from Heather Schindler to Liz Lange copying Michelle Mooring and Christine Conforte, “Follow Up,” sent Jan. 24, 2007.",8
16 | D656298,"Email from Liz Lange to Heather Schindler copying Christine Conforte and Michelle Mooring, “Follow Up,” sent Jan. 23, 2007.",9
17 | D656298,"Email from Heather Schindler to Liz Lange copying Christine Conforte and Michelle Mooring, “Follow Up,” sent Jan. 23, 2007.",10
18 | D656298,"Email from Liz Lange to Heather Schindler copying Christine Conforte “Follow Up,” sent Jan. 19, 2007.",11
19 | D656298,"Email from Heather Schindler to Christine Conforte copying Michelle Mooring “FW: TummyTube,” sent Jan. 31, 2007.",12
20 | D656298,"Email from Christine Conforte to Heather Schindler copying Liz Lange “Tummy Tube,” sent Jan. 30, 2011.",13
21 | D656298,"Email from Heather Schindler to Liz Lange copying Michelle Mooring and Christine Conforte “TummyTube,” sent Feb. 6, 2007.",14
22 | D656298,"Email from Liz Lange to Heather Schindler and Christine Conforte “TummyTube,” sent Feb. 2, 2007.",15
23 | D656298,"Email from Heather Schindler to Christine Conforte copying Liz Lange “TummyTube,” sent Feb. 2, 2007.",16
24 | D656298,"Email from Christine Conforte to Heather Schindler copying Liz Lange “TummyTube,” sent Jan. 30, 2007.",17
25 | D656298,"“Style Spy,” Lucky Magazine, p. 26, Jan. 2002 issue.",18
26 | D656298,"LeCove swimwear catalog, p. 26.",19
27 | D656298,"JC Penney maternity catalog, p. 165.",20
28 | D656298,"“Fashion Q & A,” Shape Magazine, p. 68. Jan. 2008 issue.",21
29 | D656298,"“Self Selects,” Self Magazine, p. 162. Jun. 2000 issue.",22
30 | D656298,"“Do good while you shop,” Lucky Magazine, p. 286. Oct. 2006 issue.",23
31 | D656298,"“Photo Finish,” WWD Intimates catalog. 2006.",24
32 | D656298,"“Shapewear Report,” In Style Magazine, pp. 338-346. Sep. 2007 issue.",25
33 | D656298,"Self Magazine, p. 33. Dec. 2007 issue.",26
34 | D656298,"“My best stress bust is . . . ,” Self Magazine, p. 200. Apr. 2002 issue.",27
35 | D656298,"Times Three Clothier, LLC Sales Order No. 1000, Sales Order Date Apr. 30, 2007.",28
36 | D656298,"Email from Ivan A. Saperstein to Heather Schindler copying Michelle Mooring and J. Schindler “Gatsby's” sent Jul. 20, 2007.",29
37 | D656298,"Email from Heather Schindler to Ivan A. Saperstein copying Michelle Mooring and J. Schindler “Gatsby's” Jul. 20, 2007.",30
38 | D656298,"Email from Ivan A. Saperstein to Michelle Mooring, Heather Schindler and J. Schindler “Gatsby's” Jul. 20, 2007.",31
39 | D656298,"Invoice billed and shipped to Seams Beutiful-Carolyn Weaver, Invoice # 00006138, Aug. 11, 2006.",32
40 | D656298,"Invoice billed and shipped to Seams Beutiful-Carolyn Weaver, Invoice # 00006155, Aug. 25, 2006.",33
41 | D656298,"Packing Slip to Seams Beautiful, Aug. 25, 2006.",34
42 | D656298,"Maidenform v. Times Three Clothier LLC d/b/a Yummie Tummie, Case No. 10-cv-1661 (GBD)—Skinny Cami Declarations. pp. 1-118 (submitted in 9 parts).",35
43 | D656298,"Specifications. pp. 1-3 (submitted in 1 part).",36
44 | D656298,"Maidenform's Initial Disclosures—Jul. 9, 2010. pp. 1-13 (submitted in 1 part).",37
45 | D656298,"Maidenform's Responses and Objections to TTC's First Set of Interrogatories (Nos. 1-18)—Aug. 24, 2010. pp. 1-12 (submitted in 1 part).",38
46 | D656298,"Yummie Tummie and Maidenform Settle Patent Infringement and Invalidity Lawsuits, Fox News 44, PR Newswire (2011). http://www.fox44now.com/story/15242333/yummie-tummie-and-maidenform-settle-patent-infringement-and-invalidity-lawsuits?clienttype=printable—printed Aug. 12, 2011.",39
47 | D656298,"Maidenform Brands, Inc. Reports Second Quarter 2011 Results and Provides Guidance for Full Year 2011, News Releases—General News, Iselin, NJ—/PRNewswire via COMTEX/ Aug. 10, 2011.",40
48 | D656298,"Associated Press, “Maidenform 2Q net income drops on settlement,” Forbes.com (Aug. 10, 2011). http://www.forbes.com/feeds/ap/2011/08/10/business-specialized-consumer-services-us-earns-maidenform_8611962.html?partner=email—printed Aug. 12, 2011.",41
49 | D656298,"Maidenform's Responses and Objections to TTC's First Set of Requests for Production of Documents (Nos. 1-144)—Aug. 24, 2010.",42
50 | D656298,"Maidenform's Second Set of Requests for the Production of Documents and Things (Nos. 78-123)—Sep. 3, 2010.",43
51 | D656298,"Maidenform's Second Set of Interrogatories (Nos. 16-17)—Sep. 3, 2010.",44
52 | D656298,"Maidenform's First Supplemental Responses and Objections to TTC's First Set of Interrogatories —Sep. 10, 2010.",45
53 | D656298,"TTC's Responses to Maidenform's Second Set of Requests for Production (Nos. 78-123)—Oct. 12, 2010.",46
54 | D656298,"TTC's Written Responses to Maidenform's Second Set of Interrogatories (Nos. 16-17). San Francisco Chronicle article—MF0001240-1242. Women's Wear Daily article—MF0001087-1088. Oct. 12, 2010.",47
55 | D656298,"Maidenform's Responses and Objections to TTC's Supplemental First Set of Interrogatories (Nos. 1A-18A) —Nov. 26, 2010.",48
56 | D656298,"I. Donner's Objections to Maidenform's Request for Production of Documents Included in the Subpoena to I. Donner—Feb. 22, 2011.",49
57 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/patent.csv:
--------------------------------------------------------------------------------
1 | D656296,design,D656296,US,2012-03-27,"","Ready to eat snack piece",S1,1
2 | D656297,design,D656297,US,2012-03-27,"","Fashion belt",S1,1
3 | D656298,design,D656298,US,2012-03-27,"",Garment,S1,1
4 | D656299,design,D656299,US,2012-03-27,"",Pants,S1,1
5 | D656300,design,D656300,US,2012-03-27,"",Pants,S1,1
6 | D656301,design,D656301,US,2012-03-27,"","Apparel sleeve ends",S1,1
7 | D656302,design,D656302,US,2012-03-27,"",Footwear,S1,1
8 | D656303,design,D656303,US,2012-03-27,"","Shoe soles",S1,1
9 | D656304,design,D656304,US,2012-03-27,"","Footwear outsole",S1,1
10 | D656305,design,D656305,US,2012-03-27,"","Shoe tread",S1,1
11 | D656306,design,D656306,US,2012-03-27,"","Golf shoe upper",S1,1
12 | D656307,design,D656307,US,2012-03-27,"","Golf shoe upper",S1,1
13 | D656308,design,D656308,US,2012-03-27,"","Replaceable cartridge for a pain management system",S1,1
14 | D656309,design,D656309,US,2012-03-27,"","Universal remote control accessory for a mobile device",S1,1
15 | D656310,design,D656310,US,2012-03-27,"","Wrist pouch",S1,1
16 | D656311,design,D656311,US,2012-03-27,"","Handbag with clasp",S1,1
17 | D656312,design,D656312,US,2012-03-27,"",Handbag,S1,1
18 | D656313,design,D656313,US,2012-03-27,"",Handbag,S1,1
19 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/rawassignee.csv:
--------------------------------------------------------------------------------
1 | D656296,,plano|tx|us,2,"","","Frito-Lay North America, Inc.","","",0
2 | D656298,,"new york|ny|us",2,"","","Times Three Clothier, LLC","","",0
3 | D656301,,||,2,"","","The Vermont Teddy Bear Company, Inc.","","",0
4 | D656303,,roggwil||ch,3,"","","Kybun AG","","",0
5 | D656304,,"boca raton|fl|us",2,"","","BBC International LLC","","",0
6 | D656305,,logan|ut|us,2,"","","Icon IP, Inc.","","",0
7 | D656306,,fairhaven|ma|us,2,"","","Acushnet Company","","",0
8 | D656307,,fairhaven|ma|us,2,"","","Acushnet Company","","",0
9 | D656308,,minneapolis|mn|us,2,"","","OrthoCor Medical, Inc.","","",0
10 | D656309,,berlin||de,3,"","","Zero1.tv GmbH","","",0
11 | D656310,,"mt. kisco|ny|us",2,"","","Thompson-Weiler Enterprises, LLC","","",0
12 | D656311,,"new york|ny|us",2,"","","Devi Kroell Inc.","","",0
13 | D656312,,paris||fr,3,"","","Design Sportswears","","",0
14 | D656313,,paris||fr,3,"","","Hermes Sellier (Societe Par Actions Simplifiee)","","",0
15 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/rawinventor.csv:
--------------------------------------------------------------------------------
1 | D656296,,frisco|tx|us,Nicole,Cavin,OMITTED,0
2 | D656296,,plano|tx|us,Divya,Paruchuri,OMITTED,1
3 | D656296,,plano|tx|us,Michael,Zbuchalski,OMITTED,2
4 | D656297,,"north las vegas|nv|us",Michaela,"M. Christian",OMITTED,0
5 | D656298,,"new york|ny|us",Heather,"Thomson Schindler",OMITTED,0
6 | D656299,,"huntington beach|ca|us",Brian,"Jeffery Peters",OMITTED,0
7 | D656300,,"laguna hills|ca|us",Debi,Purcell,OMITTED,0
8 | D656301,,shelburne|vt|us,Michael,"Scott Randall",OMITTED,0
9 | D656302,,richmond|va|us,Wade,Driggers,OMITTED,0
10 | D656303,,roggwil||ch,Karl,Muller,OMITTED,0
11 | D656303,,montebelluna||it,Claudio,Franco,OMITTED,1
12 | D656304,,"boca raton|fl|us",Gilberto,Debiase,OMITTED,0
13 | D656305,,orem|ut|us,Kenneth,"Golden Harper",OMITTED,0
14 | D656306,,westford|ma|us,Jonathan,"G. Bacon",OMITTED,0
15 | D656307,,marion|ma|us,James,"M. Feeney",OMITTED,0
16 | D656307,,milton|ma|us,Richard,"A. Mochen",OMITTED,1
17 | D656307,,pembroke|ma|us,Paul,"O. Teeter",OMITTED,2
18 | D656308,,shoreview|mn|us,Kin-Joe,Sham,OMITTED,0
19 | D656309,,hamburg||de,Oliver,Renelt,OMITTED,0
20 | D656309,,berlin||de,Alexander,Gruber,OMITTED,1
21 | D656310,,ossining|ny|us,Valerie,"M. Ciptak",OMITTED,0
22 | D656310,,"severna park|md|us",Justin,"S. Werner",OMITTED,1
23 | D656311,,"new york|ny|us",Gracelia,Chiurazzi,OMITTED,0
24 | D656312,,paris||fr,"Valérie",Gerbi,OMITTED,0
25 | D656313,,paris||fr,Jean-Louis,Dumas,OMITTED,0
26 | D656313,,paris||fr,Pierre-Alexis,"Dumas, legal representative",OMITTED,1
27 | D656313,,paris||fr,Sandrine,"Brekke-Dumas, legal representative",OMITTED,2
28 | D656313,,paris||fr,Couli,Jobert,OMITTED,3
29 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/rawlawyer.csv:
--------------------------------------------------------------------------------
1 | ,D656296,Colin,"P. Cahon","",UNKNOWN,
2 | ,D656296,Celina,"M. Corr","",UNKNOWN,
3 | ,D656296,"","","Carston & Cahoon, LLP",UNKNOWN,
4 | ,D656297,Veronica-Adele,"R. Cao","",UNKNOWN,
5 | ,D656297,"","","Weiss & Moy, P.C.",UNKNOWN,
6 | ,D656298,"","","Stroock & Stroock & Lavan LLP",UNKNOWN,
7 | ,D656299,"","","Minn Law Firm",UNKNOWN,
8 | ,D656299,Jerome,"V. Sartain","",UNKNOWN,
9 | ,D656299,Justin,"G. Sanders","",UNKNOWN,
10 | ,D656300,Walter,"A. Hackler","",UNKNOWN,
11 | ,D656301,H.,"Jay Spiegel","",UNKNOWN,
12 | ,D656302,"","","Thomas & Karceski, P.C.",UNKNOWN,
13 | ,D656303,"","","Shoemaker and Mattare",UNKNOWN,
14 | ,D656304,"","","GrayRobinson, P.A.",UNKNOWN,
15 | ,D656305,Richard,"K. C. Chang, II","",UNKNOWN,
16 | ,D656306,D.,"Michael Burns","",UNKNOWN,
17 | ,D656307,D.,"Michael Burns","",UNKNOWN,
18 | ,D656308,"","","Schwegman, Lunberg & Woessner, P.A.",UNKNOWN,
19 | ,D656309,"","","Perkins Coie LLP",UNKNOWN,
20 | ,D656310,Andrew,"F. Young, Esq.","",UNKNOWN,
21 | ,D656310,"","","Lackenbach Siegel, LLP",UNKNOWN,
22 | ,D656311,Barry,"G. Magidoff","",UNKNOWN,
23 | ,D656312,"","","Abelman, Frayne & Schwab",UNKNOWN,
24 | ,D656313,"","","Foley & Lardner LLP",UNKNOWN,
25 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/rawlocation.csv:
--------------------------------------------------------------------------------
1 | plano|tx|us,,Plano,TX,US
2 | frisco|tx|us,,Frisco,TX,US
3 | "north las vegas|nv|us",,"North Las Vegas",NV,US
4 | "new york|ny|us",,"New York",NY,US
5 | "huntington beach|ca|us",,"Huntington Beach",CA,US
6 | "laguna hills|ca|us",,"Laguna Hills",CA,US
7 | ||,,"","",""
8 | shelburne|vt|us,,Shelburne,VT,US
9 | richmond|va|us,,Richmond,VA,US
10 | roggwil||ch,,Roggwil,"",CH
11 | montebelluna||it,,Montebelluna,"",IT
12 | "boca raton|fl|us",,"Boca Raton",FL,US
13 | logan|ut|us,,Logan,UT,US
14 | orem|ut|us,,Orem,UT,US
15 | fairhaven|ma|us,,Fairhaven,MA,US
16 | westford|ma|us,,Westford,MA,US
17 | marion|ma|us,,Marion,MA,US
18 | milton|ma|us,,Milton,MA,US
19 | pembroke|ma|us,,Pembroke,MA,US
20 | minneapolis|mn|us,,Minneapolis,MN,US
21 | shoreview|mn|us,,Shoreview,MN,US
22 | berlin||de,,Berlin,"",DE
23 | hamburg||de,,Hamburg,"",DE
24 | "mt. kisco|ny|us",,"Mt. Kisco",NY,US
25 | ossining|ny|us,,Ossining,NY,US
26 | "severna park|md|us",,"Severna Park",MD,US
27 | paris||fr,,Paris,"",FR
28 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/subclass.csv:
--------------------------------------------------------------------------------
1 | D1/128,,
2 | D1/106,,
3 | D2/627,,
4 | D2/703,,
5 | D2/742,,
6 | D2/858,,
7 | D2/946,,
8 | D2/947,,
9 | D2/956,,
10 | D2/960,,
11 | D2/951,,
12 | D2/958,,
13 | D2/954,,
14 | D2/969,,
15 | D3/2031,,
16 | D3/218,,
17 | D3/226,,
18 | D3/232,,
19 | D3/243,,
20 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/usapplicationcitation.csv:
--------------------------------------------------------------------------------
1 | D656296,2002/0043158,2002-04-01,"Stevenson et al.",A1,2002/0043158,US,"cited by examiner",30
2 | D656296,2004/0258806,2004-12-01,Khazaal,A1,2004/0258806,US,"cited by examiner",31
3 | D656296,2006/0073240,2006-04-01,"David et al.",A1,2006/0073240,US,"cited by examiner",32
4 | D656296,2009/0035433,2009-02-01,"France et al.",A1,2009/0035433,US,"cited by examiner",33
5 | D656296,2011/0111105,2011-05-01,"Besse et al.",A1,2011/0111105,US,"cited by examiner",34
6 | D656298,2004/0045325,2004-03-01,"Rabinowicz et al.",A1,2004/0045325,US,"cited by examiner",116
7 | D656298,2006/0166600,2006-07-01,"Ravoiu et al.",A1,2006/0166600,US,"cited by examiner",117
8 | D656298,2006/0242748,2006-11-01,Martz,A1,2006/0242748,US,"cited by other",118
9 | D656298,2007/0050881,2007-03-01,Kasprzak,A1,2007/0050881,US,"cited by examiner",119
10 | D656298,2007/0094765,2007-05-01,"Summers et al.",A1,2007/0094765,US,"cited by other",120
11 | D656298,2008/0134409,2008-06-01,Karasina,A1,2008/0134409,US,"cited by other",121
12 | D656298,2008/0244805,2008-10-01,Griffin,A1,2008/0244805,US,"cited by other",122
13 | D656299,2005/0246819,2005-11-01,Tucker,A1,2005/0246819,US,"cited by other",31
14 | D656299,2007/0022510,2007-02-01,"Chapuis et al.",A1,2007/0022510,US,"cited by examiner",32
15 | D656303,2004/0003513,2004-01-01,"Crane et al.",A1,2004/0003513,US,"cited by examiner",40
16 | D656303,2007/0028485,2007-02-01,"Crane et al.",A1,2007/0028485,US,"cited by examiner",41
17 | D656303,2010/0122472,2010-05-01,"Wilson et al.",A1,2010/0122472,US,"cited by examiner",42
18 | D656303,2010/0251565,2010-10-01,"Litchfield et al.",A1,2010/0251565,US,"cited by examiner",43
19 | D656303,2011/0023215,2011-02-01,Obradovic,A1,2011/0023215,US,"cited by examiner",44
20 | D656303,2011/0072684,2011-03-01,Stubblefield,A1,2011/0072684,US,"cited by examiner",45
21 | D656303,2011/0113646,2011-05-01,"Merritt et al.",A1,2011/0113646,US,"cited by examiner",46
22 | D656303,2011/0113649,2011-05-01,"Merritt et al.",A1,2011/0113649,US,"cited by examiner",47
23 | D656304,2008/0148598,2008-06-01,Schoenborn,A1,2008/0148598,US,"cited by examiner",21
24 | D656304,2011/0192054,2011-08-01,"Wojnar et al.",A1,2011/0192054,US,"cited by examiner",22
25 | D656305,2010/0293811,2010-11-01,Truelsen,A1,2010/0293811,US,"cited by examiner",28
26 | D656305,2010/0307025,2010-12-01,"Truelsen et al.",A1,2010/0307025,US,"cited by examiner",29
27 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/uspc.csv:
--------------------------------------------------------------------------------
1 | D656296,D1,D1/128,0
2 | D656296,D1,D1/106,1
3 | D656297,D2,D2/627,0
4 | D656298,D2,D2/703,0
5 | D656299,D2,D2/742,0
6 | D656300,D2,D2/742,0
7 | D656301,D2,D2/858,0
8 | D656302,D2,D2/946,0
9 | D656303,D2,D2/947,0
10 | D656303,D2,D2/956,1
11 | D656304,D2,D2/960,0
12 | D656304,D2,D2/951,1
13 | D656304,D2,D2/958,2
14 | D656305,D2,D2/960,0
15 | D656305,D2,D2/951,1
16 | D656305,D2,D2/954,2
17 | D656305,D2,D2/958,3
18 | D656306,D2,D2/969,0
19 | D656307,D2,D2/969,0
20 | D656308,D3,D3/2031,0
21 | D656309,D3,D3/218,0
22 | D656310,D3,D3/226,0
23 | D656311,D3,D3/232,0
24 | D656312,D3,D3/232,0
25 | D656313,D3,D3/243,0
26 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.18/usreldoc.csv:
--------------------------------------------------------------------------------
1 | D656297,29285560,continuation_in_part,ABANDONED,2007-04-02,29285560,"",US,parent_doc,0
2 | D656297,29335910,continuation_in_part,"",,29335910,"",US,child_doc,1
3 | D656298,29350198,continuation_in_part,"",2009-11-12,29350198,"",US,parent_doc,0
4 | D656298,D616627,continuation_in_part,"",,D616627,"",US,parent_grant_document,1
5 | D656298,29352440,continuation_in_part,"",,29352440,"",US,child_doc,2
6 | D656298,29302500,continuation,"",2008-01-17,29302500,"",US,parent_doc,3
7 | D656298,D606285,continuation,"",,D606285,"",US,parent_grant_document,4
8 | D656298,29350198,continuation,"",,29350198,"",US,child_doc,5
9 | D656299,29324767,continuation_in_part,"",2008-09-19,29324767,"",US,parent_doc,0
10 | D656299,D633280,continuation_in_part,"",,D633280,"",US,parent_grant_document,1
11 | D656299,29383856,continuation_in_part,"",,29383856,"",US,child_doc,2
12 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/application.csv:
--------------------------------------------------------------------------------
1 | 2010/29381217,D656296,29,29381217,US,2010-12-16,,
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/citation.csv:
--------------------------------------------------------------------------------
1 | D656296,,1906-08-01,Allen,S,D38206,US,"cited by examiner",0
2 | D656296,,1920-10-01,Shores,S,D56478,US,"cited by examiner",1
3 | D656296,,1959-02-01,Bedenk,A,2874052,US,"cited by examiner",2
4 | D656296,,1959-09-01,"Anderson et al.",A,2905559,US,"cited by examiner",3
5 | D656296,,1971-01-01,"Holtz et al.",S,D219637,US,"cited by examiner",4
6 | D656296,,1972-04-01,"Ball et al.",A,3656966,US,"cited by examiner",5
7 | D656296,,1983-07-01,Zonnenberg,S,D269729,US,"cited by examiner",6
8 | D656296,,1987-12-01,"Gagliardi, Jr.",S,D293040,US,"cited by examiner",7
9 | D656296,,1989-12-01,"Willard et al.",A,4889737,US,"cited by examiner",8
10 | D656296,,1991-05-01,Saks,S,D317171,US,"cited by examiner",9
11 | D656296,,1991-08-01,"Brewer et al.",A,5038201,US,"cited by examiner",10
12 | D656296,,1995-03-01,"Patoskie et al.",S,D355975,US,"cited by examiner",11
13 | D656296,,1995-04-01,"Novak et al.",S,D357710,US,"cited by examiner",12
14 | D656296,,1996-05-01,Tashiro,A,5518391,US,"cited by examiner",13
15 | D656296,,1996-11-01,"Song et al.",A,5571543,US,"cited by examiner",14
16 | D656296,,1997-12-01,"Wilson et al.",S,D388235,US,"cited by examiner",15
17 | D656296,,2001-03-01,Renda,B1,6197334,US,"cited by examiner",16
18 | D656296,,2001-12-01,Teras,S,D452360,US,"cited by examiner",17
19 | D656296,,2003-06-01,"Bell et al.",S,D475451,US,"cited by examiner",18
20 | D656296,,2003-11-01,Manville,S,D482736,US,"cited by examiner",19
21 | D656296,,2004-04-01,Childress,S,D488611,US,"cited by examiner",20
22 | D656296,,2004-07-01,"Bhaskar et al.",S,D493271,US,"cited by examiner",21
23 | D656296,,2004-11-01,"Mihalos et al.",S,D497702,US,"cited by examiner",22
24 | D656296,,2005-05-01,Childress,S,D505531,US,"cited by examiner",23
25 | D656296,,2005-06-01,Childress,S,D506051,US,"cited by examiner",24
26 | D656296,,2007-04-01,"Aleman et al.",S,D540507,US,"cited by examiner",25
27 | D656296,,2007-05-01,"Cocco et al.",S,D543006,US,"cited by examiner",26
28 | D656296,,2007-10-01,"Aleman et al.",S,D552327,US,"cited by examiner",27
29 | D656296,,2008-01-01,"Yuengling et al.",S,D560538,US,"cited by examiner",28
30 | D656296,,2009-10-01,Hodges,S,D601690,US,"cited by examiner",29
31 | D656296,,2002-04-01,"Stevenson et al.",A1,2002/0043158,US,"cited by examiner",30
32 | D656296,,2004-12-01,Khazaal,A1,2004/0258806,US,"cited by examiner",31
33 | D656296,,2006-04-01,"David et al.",A1,2006/0073240,US,"cited by examiner",32
34 | D656296,,2009-02-01,"France et al.",A1,2009/0035433,US,"cited by examiner",33
35 | D656296,,2011-05-01,"Besse et al.",A1,2011/0111105,US,"cited by examiner",34
36 | D656296,,2008-03-01,"","",8290305.5,EP,"cited by examiner",35
37 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/claim.csv:
--------------------------------------------------------------------------------
1 | D656296,"The ornamental design for a ready to eat snack piece, as shown and described.",,1
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/foreigncitation.csv:
--------------------------------------------------------------------------------
1 | D656296,2008-03-01,"","",8290305.5,EP,"cited by examiner",35
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/ipcr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.one/ipcr.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/mainclass.csv:
--------------------------------------------------------------------------------
1 | D1,,
2 | D11,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/otherreference.csv:
--------------------------------------------------------------------------------
1 | D656296,"Football Shaped Cookies, posted Sep. 3, 2008 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://throwingwaffles.com/2008/09.",0
2 | D656296,"Super Bowel Sugar Cookies, posted Feb. 1, 2009 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://fodder-and-libations.blogspot.com/2009/02/super-bowl-sugar-cookies-with-royal.html.",1
3 | D656296,"Football Oreos, posted Sep. 18, 2010 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://puertabella.blogspot.com/2010/09/football-oreos.html.",2
4 | D656296,"Football Shaped Tortilla Crisps, posted Sep. 23, 2010 [online], [retrieved Jul. 26, 2011]. Retrieved from Internet, http://www.landolakes.com.",3
5 | D656296,"Football with Laces Cookie Cutter; www.karenscookies.net/Football-with-Laces-Cookie-Cutter _p _1068.html.",4
6 | D656296,"Football Cookie Cutter Set; www.michaels.com/Football-Cookie-Cutter-Set/bk0176,default,pd.html.",5
7 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/patent.csv:
--------------------------------------------------------------------------------
1 | D656296,design,D656296,US,2012-03-27,"","Ready to eat snack piece",S1,1
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/rawassignee.csv:
--------------------------------------------------------------------------------
1 | D656296,,plano|tx|us,2,"","","Frito-Lay North America, Inc.","","",0
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/rawinventor.csv:
--------------------------------------------------------------------------------
1 | D656296,,frisco|tx|us,Nicole,Cavin,OMITTED,0
2 | D656296,,plano|tx|us,Divya,Paruchuri,OMITTED,1
3 | D656296,,plano|tx|us,Michael,Zbuchalski,OMITTED,2
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/rawlawyer.csv:
--------------------------------------------------------------------------------
1 | ,D656296,Colin,"P. Cahon","",UNKNOWN,
2 | ,D656296,Celina,"M. Corr","",UNKNOWN,
3 | ,D656296,"","","Carston & Cahoon, LLP",UNKNOWN,
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/rawlocation.csv:
--------------------------------------------------------------------------------
1 | plano|tx|us,,Plano,TX,US
2 | frisco|tx|us,,Frisco,TX,US
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/subclass.csv:
--------------------------------------------------------------------------------
1 | D1/128,,
2 | D11/06,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/usapplicationcitation.csv:
--------------------------------------------------------------------------------
1 | D656296,2002/0043158,2002-04-01,"Stevenson et al.",A1,2002/0043158,US,"cited by examiner",30
2 | D656296,2004/0258806,2004-12-01,Khazaal,A1,2004/0258806,US,"cited by examiner",31
3 | D656296,2006/0073240,2006-04-01,"David et al.",A1,2006/0073240,US,"cited by examiner",32
4 | D656296,2009/0035433,2009-02-01,"France et al.",A1,2009/0035433,US,"cited by examiner",33
5 | D656296,2011/0111105,2011-05-01,"Besse et al.",A1,2011/0111105,US,"cited by examiner",34
6 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/uspatentcitation.csv:
--------------------------------------------------------------------------------
1 | D656296,D38206,1906-08-01,Allen,S,D38206,US,"cited by examiner",0
2 | D656296,D56478,1920-10-01,Shores,S,D56478,US,"cited by examiner",1
3 | D656296,2874052,1959-02-01,Bedenk,A,2874052,US,"cited by examiner",2
4 | D656296,2905559,1959-09-01,"Anderson et al.",A,2905559,US,"cited by examiner",3
5 | D656296,D219637,1971-01-01,"Holtz et al.",S,D219637,US,"cited by examiner",4
6 | D656296,3656966,1972-04-01,"Ball et al.",A,3656966,US,"cited by examiner",5
7 | D656296,D269729,1983-07-01,Zonnenberg,S,D269729,US,"cited by examiner",6
8 | D656296,D293040,1987-12-01,"Gagliardi, Jr.",S,D293040,US,"cited by examiner",7
9 | D656296,4889737,1989-12-01,"Willard et al.",A,4889737,US,"cited by examiner",8
10 | D656296,D317171,1991-05-01,Saks,S,D317171,US,"cited by examiner",9
11 | D656296,5038201,1991-08-01,"Brewer et al.",A,5038201,US,"cited by examiner",10
12 | D656296,D355975,1995-03-01,"Patoskie et al.",S,D355975,US,"cited by examiner",11
13 | D656296,D357710,1995-04-01,"Novak et al.",S,D357710,US,"cited by examiner",12
14 | D656296,5518391,1996-05-01,Tashiro,A,5518391,US,"cited by examiner",13
15 | D656296,5571543,1996-11-01,"Song et al.",A,5571543,US,"cited by examiner",14
16 | D656296,D388235,1997-12-01,"Wilson et al.",S,D388235,US,"cited by examiner",15
17 | D656296,6197334,2001-03-01,Renda,B1,6197334,US,"cited by examiner",16
18 | D656296,D452360,2001-12-01,Teras,S,D452360,US,"cited by examiner",17
19 | D656296,D475451,2003-06-01,"Bell et al.",S,D475451,US,"cited by examiner",18
20 | D656296,D482736,2003-11-01,Manville,S,D482736,US,"cited by examiner",19
21 | D656296,D488611,2004-04-01,Childress,S,D488611,US,"cited by examiner",20
22 | D656296,D493271,2004-07-01,"Bhaskar et al.",S,D493271,US,"cited by examiner",21
23 | D656296,D497702,2004-11-01,"Mihalos et al.",S,D497702,US,"cited by examiner",22
24 | D656296,D505531,2005-05-01,Childress,S,D505531,US,"cited by examiner",23
25 | D656296,D506051,2005-06-01,Childress,S,D506051,US,"cited by examiner",24
26 | D656296,D540507,2007-04-01,"Aleman et al.",S,D540507,US,"cited by examiner",25
27 | D656296,D543006,2007-05-01,"Cocco et al.",S,D543006,US,"cited by examiner",26
28 | D656296,D552327,2007-10-01,"Aleman et al.",S,D552327,US,"cited by examiner",27
29 | D656296,D560538,2008-01-01,"Yuengling et al.",S,D560538,US,"cited by examiner",28
30 | D656296,D601690,2009-10-01,Hodges,S,D601690,US,"cited by examiner",29
31 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/uspc.csv:
--------------------------------------------------------------------------------
1 | D656296,D1,D1/128,0
2 | D656296,D11,D11/06,1
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.one/usreldoc.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.one/usreldoc.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/application.csv:
--------------------------------------------------------------------------------
1 | 2010/29379369,D656308,29,29379369,US,2010-11-18,,
2 | 2011/29391097,D656309,29,29391097,US,2011-05-03,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/citation.csv:
--------------------------------------------------------------------------------
1 | D656308,,2010-08-01,Traylor,S,D621603,US,"cited by examiner",0
2 | D656308,,2011-01-01,Barrass,S,D630009,US,"cited by examiner",1
3 | D656308,,2011-07-01,"Xu et al.",S,D641970,US,"cited by examiner",2
4 | D656308,,2011-09-01,Portney,S,D645657,US,"cited by examiner",3
5 | D656309,,1999-03-01,"Phillips et al.",S,D406191,US,"cited by examiner",0
6 | D656309,,1999-11-01,"Phillips et al.",A,5988577,US,"cited by examiner",1
7 | D656309,,2001-01-01,Lim,B1,6176401,US,"cited by examiner",2
8 | D656309,,2004-11-01,To,S,D497714,US,"cited by examiner",3
9 | D656309,,2005-05-01,"Robertson et al.",S,D504564,US,"cited by examiner",4
10 | D656309,,2006-09-01,McClaude,S,D528539,US,"cited by examiner",5
11 | D656309,,2010-02-01,Bullen,S,D609464,US,"cited by examiner",6
12 | D656309,,2011-07-01,Stampfli,S,D641974,US,"cited by examiner",7
13 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/claim.csv:
--------------------------------------------------------------------------------
1 | D656308,"The ornamental design for the replaceable cartridge for a pain management system, as shown and described.",,1
2 | D656309,"The ornamental design for a universal remote control accessory for a mobile device, as shown and described.",,1
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/foreigncitation.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.two/foreigncitation.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/ipcr.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.two/ipcr.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/mainclass.csv:
--------------------------------------------------------------------------------
1 | D3,,
2 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/otherreference.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.two/otherreference.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/patent.csv:
--------------------------------------------------------------------------------
1 | D656308,design,D656308,US,2012-03-27,"","Replaceable cartridge for a pain management system",S1,1
2 | D656309,design,D656309,US,2012-03-27,"","Universal remote control accessory for a mobile device",S1,1
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/rawassignee.csv:
--------------------------------------------------------------------------------
1 | D656308,,minneapolis|mn|us,2,"","","OrthoCor Medical, Inc.","","",0
2 | D656309,,berlin||de,3,"","","Zero1.tv GmbH","","",0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/rawinventor.csv:
--------------------------------------------------------------------------------
1 | D656308,,shoreview|mn|us,Kin-Joe,Sham,OMITTED,0
2 | D656309,,hamburg||de,Oliver,Renelt,OMITTED,0
3 | D656309,,berlin||de,Alexander,Gruber,OMITTED,1
4 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/rawlawyer.csv:
--------------------------------------------------------------------------------
1 | ,D656308,"","","Schwegman, Lunberg & Woessner, P.A.",UNKNOWN,
2 | ,D656309,"","","Perkins Coie LLP",UNKNOWN,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/rawlocation.csv:
--------------------------------------------------------------------------------
1 | minneapolis|mn|us,,Minneapolis,MN,US
2 | shoreview|mn|us,,Shoreview,MN,US
3 | berlin||de,,Berlin,"",DE
4 | hamburg||de,,Hamburg,"",DE
5 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/subclass.csv:
--------------------------------------------------------------------------------
1 | D3/2031,,
2 | D3/218,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/usapplicationcitation.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.two/usapplicationcitation.csv
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/uspatentcitation.csv:
--------------------------------------------------------------------------------
1 | D656308,D621603,2010-08-01,Traylor,S,D621603,US,"cited by examiner",0
2 | D656308,D630009,2011-01-01,Barrass,S,D630009,US,"cited by examiner",1
3 | D656308,D641970,2011-07-01,"Xu et al.",S,D641970,US,"cited by examiner",2
4 | D656308,D645657,2011-09-01,Portney,S,D645657,US,"cited by examiner",3
5 | D656309,D406191,1999-03-01,"Phillips et al.",S,D406191,US,"cited by examiner",0
6 | D656309,5988577,1999-11-01,"Phillips et al.",A,5988577,US,"cited by examiner",1
7 | D656309,6176401,2001-01-01,Lim,B1,6176401,US,"cited by examiner",2
8 | D656309,D497714,2004-11-01,To,S,D497714,US,"cited by examiner",3
9 | D656309,D504564,2005-05-01,"Robertson et al.",S,D504564,US,"cited by examiner",4
10 | D656309,D528539,2006-09-01,McClaude,S,D528539,US,"cited by examiner",5
11 | D656309,D609464,2010-02-01,Bullen,S,D609464,US,"cited by examiner",6
12 | D656309,D641974,2011-07-01,Stampfli,S,D641974,US,"cited by examiner",7
13 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/uspc.csv:
--------------------------------------------------------------------------------
1 | D656308,D3,D3/2031,0
2 | D656309,D3,D3/218,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/ipg120327.two/usreldoc.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funginstitute/patentprocessor/416a40ce348904af2463bc97144cd4d89dc2ca6d/test/integration/parse/ipg120327.two/usreldoc.csv
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/application.csv:
--------------------------------------------------------------------------------
1 | id,type,number,country,date,abstract,title,granted,num_claims
2 | 2004/20040000002,,20040000002,US,2004-01-01,"A garment adapted for wear by persons desiring to maintain their arms in a fixed position along their upper body. The garment includes an upper portion, such as a shirt, adapted for wear on the upper body, and at least one hand portion, such as a glove. The shirt is preferably made of a loop material and the glove includes a region of hook material. Utilizing the loop and hook materials, the glove can be releasably attached to the shirt to maintain the user's hand in a fixed position along the user's upper body. The garment is primarily intended for maintaining one or both of the user's arms close against the body to facilitate relaxation while in cramped or tight positions, such as in an airplane seat. However, the garment may also be used in a wide variety of applications wherein it is desired to support one or both of the user's arms in a releasably fixed position. ","Garment for preventing muscle strain",,1
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/claim.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,text,dependent,sequence
2 | 2004/20040000002,". A garment for releasably securing a user's hands at a fixed location along the user's body, comprising: ",,1
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/ipcr.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,classification_level,section,subclass,main_group,subgroup,symbol_position,classification_value,classification_status,classification_data_source,action_date,ipc_version_indicator,sequence
2 | 2004/20040000002,A,,B,001,00,,,,,,,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/mainclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 002,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/rawassignee.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,assignee_id,rawlocation_id,type,name_first,name_last,organization,residence,nationality,sequence
2 | 2004/20040000002,,"",,"","","Hill-Rom Services, Inc.",,,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/rawinventor.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,inventor_id,rawlocation_id,name_first,name_last,nationality,sequence
2 | 2004/20040000002,,tarzana|ca|us,Scott,Hollander,US,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/rawlocation.csv:
--------------------------------------------------------------------------------
1 | id,location_id,city,state,country
2 | "",,"","",
3 | tarzana|ca|us,,Tarzana,CA,US
4 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/subclass.csv:
--------------------------------------------------------------------------------
1 | id,title,text
2 | 002/069000,,
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/uspc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,mainclass_id,subclass_id,sequence
2 | 2004/20040000002,002,002/069000,0
3 |
--------------------------------------------------------------------------------
/test/integration/parse/pa040101.two/usreldoc.csv:
--------------------------------------------------------------------------------
1 | uuid,application_id,rel_id,doctype,status,date,number,kind,country,relationship,sequence
2 | 2004/20040000002,9919271,relation,GRANTED,2001-07-30,9919271,"",US,parent,0
3 | 2004/20040000002,10430371,relation,GRANTED,2003-05-05,10430371,A1,"",child,1
4 |
--------------------------------------------------------------------------------
/test/integration/readme.md:
--------------------------------------------------------------------------------
1 | # Integration testing for patent processing
2 |
3 | This integration directory stores known, good outputs
4 | from scripts running end to end.
5 |
6 | ### General procedure for generating a test
7 |
8 | 1. Run `preprocess.sh` on a limited, known input.
9 | 2. Export results from 1 or more inputs from 1 into csv.
10 | 3. Commit appropriate known, correct results into repo.
11 | 4. Write a wrapper script for running the integration test
12 | and checking output with (say) diff, automatically. This script
13 | could be written in sh, ruby or python, but preferably not sh.
14 |
15 |
--------------------------------------------------------------------------------
/test/make_test_databases.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # vim: set fileencoding=utf-8 :
3 |
4 | import os
5 | import sqlite3
6 | import csv
7 |
8 | def my_sane_remove_wrapper(filename):
9 | try:
10 | os.remove(filename)
11 | except os.error:
12 | pass
13 |
14 | def remove_existing_databases():
15 | my_sane_remove_wrapper("assignee.sqlite3")
16 | my_sane_remove_wrapper("inventor.sqlite3")
17 | my_sane_remove_wrapper("hashTbl.sqlite4")
18 |
19 |
20 | def make_assignee_db():
21 | conn = sqlite3.connect("assignee.sqlite3")
22 | f = open('../schemas/assignee.sql', 'r')
23 | schema = f.read()
24 | c = conn.cursor()
25 | c.executescript(schema)
26 | csvfile = open("./integration/parse/ipg120327.18/assignee.csv", 'r')
27 | assignees = csv.reader(csvfile)
28 | for a in assignees:
29 | c.execute('INSERT INTO assignee VALUES (?,?,?,?,?,?,?,?,?)', a )
30 | csvfile.close()
31 | conn.commit()
32 | conn.close()
33 |
34 | def make_inventor_db():
35 | conn = sqlite3.connect("inventor.sqlite3")
36 | f = open('../schemas/inventor.sql', 'r')
37 | schema = f.read()
38 | c = conn.cursor()
39 | c.executescript(schema)
40 | conn.text_factory = str
41 | csvfile = open("./integration/parse/ipg120327.18/inventor.csv", 'r')
42 | inventors = csv.reader(csvfile)
43 | for i in inventors:
44 | c.execute('INSERT INTO inventor VALUES (?,?,?,?,?,?,?,?,?,?)', i)
45 | csvfile.close()
46 | conn.commit()
47 | conn.close()
48 |
--------------------------------------------------------------------------------
/test/patenttest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -f /tmp/res ] ;
4 | then rm /tmp/res ;
5 | fi
6 |
7 | for f in test_*.py
8 | do
9 | printf "\e[0m"
10 | echo "Processing $f file.."
11 | python $f &> /tmp/res
12 | if [[ $? == 0 ]] ;
13 | then printf "\e[32m" ;
14 | else printf "\e[31m" ;
15 | fi
16 | cat /tmp/res
17 | done
18 |
19 | if [ -f /tmp/res ] ;
20 | then rm /tmp/res ;
21 | fi
22 | printf "\e[0m"
23 |
24 | rm -rf *.sqlite3 *.pyc
25 | rm -rf ../*.sqlite3 *.pyc
26 |
--------------------------------------------------------------------------------
/test/process.cfg:
--------------------------------------------------------------------------------
1 | ## NOTE: this is a truncated version of the main 'process.cfg' file used by parse.py and start.py in the
2 | ## above directory. The following section(s) are needed for unittests
3 | # This section specifies which grant_handler is to be used for each year of the
4 | # parser. This section should only have to be touched when a new parser is
5 | # introduced. In the case where a year cannot be parsed from the filename (the
6 | # format `ipgYYMMDD` is assumed), then the default parser is used.
7 | [xml-handlers]
8 | 2005-2012=lib.handlers.grant_handler_v42
9 | 2013=lib.handlers.grant_handler_v44
10 | default=lib.handlers.grant_handler_v42
11 |
--------------------------------------------------------------------------------
/test/readme.md:
--------------------------------------------------------------------------------
1 | # Test suite for patent preprocessing
2 |
3 | #### tl;dr
4 |
5 | * `./pattesttest.sh`
6 | * `./parse_integration.sh`
7 | * `./clean_integration.sh`
8 | * `./consolidation_integration.sh`
9 |
10 | ----
11 |
12 |
13 | We're intending on conforming to PEP guidelines,
14 | please note where implementation is not meeting
15 | a relevant PEP.
16 |
17 | Currently (January 30, 2013), we're running unit tests and integration
18 | tests. We do not have full coverage for unit tests. Unit tests are being
19 | constructed as part of the refactoring process, and all new code should
20 | be covered in unit tests.
21 |
22 | Integration tests will run end-to-end on the parsing, cleaning and consolidation
23 | phases, but the current data sets used in the integration tests are
24 | incomplete. Further, the location handling does not work correctly, so
25 | the integration test covering geocoding is broken by designed.
26 |
27 | ## Running unit tests
28 |
29 | Unit tests are constructed for two specific reasons:
30 |
31 | 1. Prevent regression as code base is refactored, and
32 | 2. Ensure extensions to the current code work correctly.
33 |
34 | A general explanation of either refactoring or unit testing new code is
35 | beyond the scope of this readme. File an enhancement request with
36 | specific questions you would like to have answered in this readme.
37 |
38 | The unit tests are invoked automatically in the `./patenttest.sh`
39 | script.
40 |
41 |
42 | ### PATENTROOT
43 |
44 | Not having `PATENTROOT` set will produce this warning notice:
45 |
46 | ```sh
47 | Processing test_parse_config.py file..
48 | Cannot find PATENTROOT environment variable. Setting PATENTROOT to the
49 | patentprocessor directory for the scope of this test. Use `export
50 | PATENTROOT=/path/to/directory` to change
51 | ```
52 |
53 | This is easy to silence: `$ export PATENTROOT=.`
54 |
55 | You may want to export `PATENTROOT` in your shell initialization script
56 | for convenience.
57 |
58 |
59 | ## Running integration tests
60 |
61 | Integration testing for the patent preprocessor simulates running both
62 | preprocessor components and the entire preprocessor on a limited set of
63 | patent data. The goal is ensuring that for a given input, the output
64 | doesn't change from run to run as the code continues development.
65 |
66 | The integration tests require two types of databases:
67 |
68 | 1. A set of sqlite databases located in the test directory as a result
69 | of a succesful parse, and
70 | 2. Databases `loctbl` and `NBER_asg` linked from elsewhere like so:
71 | * `ln -s /data/patentdata/NBER/NBER_asg .`
72 | * `ln -s /data/patentdata/location/loctbl.sqlite3 loctbl`
73 | (Your links may be different.)
74 |
75 | The databases mentioned in item 1 are constructed during the
76 | preprocessing, and require no initial setup.
77 |
78 | The databases mentioned in item 2 are used in the cleaning phase of the
79 | preprocessor.
80 |
81 | Fung Institute developers have access to both `loctbl` and `NBER_asg` on
82 | the server. These are read-only on the server, and should be copied into
83 | user's home areas with the soft adjusted appropriately.
84 |
85 | External developers and other interested parties can download:
86 |
87 | * [loctbl](https://s3-us-west-1.amazonaws.com/fidownloads/loctbl.sqlite3)
88 | * [NBER_asg](https://s3-us-west-1.amazonaws.com/fidownloads/NBER_asg)
89 |
90 | Note: the integration tests pass, that is, run correctly, for data we
91 | know is not 100% correct. However, these tests allow evolving the code
92 | to correctness incrementally.
93 |
94 |
95 |
96 | #### Test speed
97 |
98 | The integration tests require correctly indexed tables to operate
99 | efficiently. The run time difference is roughly 5 minutes for each test
100 | over the geocoding with unindexed tables, versus about 6 seconds for
101 | correctly indexed tables.
102 |
--------------------------------------------------------------------------------
/test/sqlitetest.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import sys
5 | import sqlite3
6 | sys.path.append('../')
7 | sys.path.append('../lib')
8 | import SQLite
9 |
10 | # TODO: Get a database connection for testing merge
11 |
12 | def create_connections():
13 | cls.conn1 = sqlite3.connect(':memory:')
14 | cls.conn2 = sqlite3.connect(':memory:')
15 |
16 | def close_connections():
17 | conn1.close()
18 | conn2.close()
19 |
20 | def create_assignee_schema(cursor):
21 | cursor.executescript("""
22 | CREATE TABLE IF NOT EXISTS assignee (
23 | Patent VARCHAR(8), AsgType INTEGER, Assignee VARCHAR(30),
24 | City VARCHAR(10), State VARCHAR(2), Country VARCHAR(2),
25 | Nationality VARCHAR(2), Residence VARCHAR(2), AsgSeq INTEGER);
26 | CREATE UNIQUE INDEX IF NOT EXISTS uqAsg ON assignee (Patent, AsgSeq);
27 | DROP INDEX IF EXISTS idx_pata;
28 | DROP INDEX IF EXISTS idx_patent;
29 | DROP INDEX IF EXISTS idx_asgtyp;
30 | DROP INDEX IF EXISTS idx_stt;
31 | DROP INDEX IF EXISTS idx_cty;
32 | """)
33 |
34 | def initialize_assignees(conn):
35 | q = ('D0656296',2,'Frito-Lay North America, Inc.','Plano','TX','US','','',0)
36 | conn.cursor().execute("""INSERT OR IGNORE INTO assignee VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", q)
37 | conn.commit()
38 |
39 | class TestSQLite(unittest.TestCase):
40 |
41 | @classmethod
42 | def setUp(cls):
43 | #print "Setting up..."
44 | cls.conn1 = sqlite3.connect(':memory:')
45 | cls.conn2 = sqlite3.connect(':memory:')
46 | #create_connections()
47 |
48 | @classmethod
49 | def tearDown(cls):
50 | #print "Tearing down..."
51 | cls.conn1.close()
52 | cls.conn2.close()
53 | #close_connections()
54 |
55 | def test_constructor_empty(self):
56 | s = SQLite.SQLite()
57 | assert(s.db == ':memory:')
58 | assert(s.tbl == 'main')
59 |
60 | def test_constructor_dbname(self):
61 | s = SQLite.SQLite(db='foobar.sqlite3')
62 | assert(s.db == 'foobar.sqlite3')
63 | assert(s.tbl == 'main')
64 |
65 | def test_constructor_dbname_tbl(self):
66 | s = SQLite.SQLite(db='foobar.sqlite3', tbl='tbl_foo')
67 | assert(s.db == 'foobar.sqlite3')
68 | assert(s.tbl == 'tbl_foo')
69 |
70 | def test_constructor_dbname_tbl_table(self):
71 | s = SQLite.SQLite(db='foobar.sqlite3', tbl='tbl_foo', table='table_foo')
72 | assert(s.db == 'foobar.sqlite3')
73 | assert(s.tbl == 'tbl_foo')
74 |
75 | def test_constructor_dbname_table(self):
76 | s = SQLite.SQLite(db='foobar.sqlite3', table='table_foo')
77 | assert(s.db == 'foobar.sqlite3')
78 | assert(s.tbl == 'table_foo')
79 |
80 | # def test_merge(self):
81 | # s = SQLite.SQLite()
82 | # s.merge(key=[['AsgNum', 'pdpass']], on=[['assigneeAsc', 'assignee']],
83 | # keyType=['INTEGER'], tableFrom='main', db='db')
84 | # assert(1 == 1)
85 |
86 | def test_index(self):
87 | s = SQLite.SQLite('test.sqlite3')
88 | create_assignee_schema(s.c)
89 | initialize_assignees(s.conn)
90 | assert(1 == 1)
91 |
92 | if __name__ == '__main__':
93 | unittest.main()
94 |
--------------------------------------------------------------------------------
/test/test_ascit.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # vim: set fileencoding=utf-8 :
3 |
4 | # The `ascit` function is used during the cleaning phase as
5 | # an sqlite3 function.
6 |
7 | """
8 | Macos keycodes for common utf characters found in patents.
9 |
10 | http://kb.iu.edu/data/anhf.html
11 |
12 | Keystroke Character
13 | Option-e [letter] acute (e.g., á)
14 | Option-` [letter] grave (e.g., è)
15 | Option-i [letter] circumflex (e.g., ô )
16 | Option-u [letter] umlaut or dieresis (e.g., ï )
17 | Option-n [letter] tilde (e.g., ñ )
18 | Option-q oe ligature ( œ )
19 | Option-c cedilla ( ç )
20 | Option-Shift-/ (forward slash) upside-down question mark ( ¿ )
21 | Option-1 (the number 1) upside-down exclamation point ( ¡ )
22 | """
23 |
24 | import unittest
25 | import sys
26 | sys.path.append( '.' )
27 | sys.path.append( '../lib/' )
28 | from fwork import ascit
29 | from fwork import remspace
30 |
31 | class TestAscit(unittest.TestCase):
32 |
33 | def setUp(self):
34 | self.foo = 'bar'
35 |
36 | def test_toupper(self):
37 | assert('FOO' == ascit('FOO'))
38 |
39 | def test_retain_acute_verite(self):
40 | #print ascit('verité').rstrip('\r\n')
41 | assert('verité' == ascit('verité'))
42 |
43 | def test_retain_acute(self):
44 | #print 'ascit é' + ascit('é')
45 | assert('é' == ascit('é'))
46 |
47 | def test_retain_grave(self):
48 | assert('è' == ascit('è'))
49 |
50 | def test_retain_circumflex(self):
51 | assert('ô' == ascit('ô'))
52 |
53 | def test_retain_umlaut(self):
54 | assert('ü' == ascit('ü'))
55 |
56 | def test_retain_tilde(self):
57 | assert('ñ' == ascit('ñ'))
58 |
59 | def test_retain_oeligature(self):
60 | assert('œ' == ascit('œ'))
61 |
62 | def test_retain_cedilla(self):
63 | assert('ç' == ascit('ç'))
64 |
65 | def test_retain_usdq(self):
66 | assert('¿' == ascit('¿'))
67 |
68 | def test_int(self):
69 | assert('1' == ascit('1'))
70 |
71 | def test_float(self):
72 | # Default strict=True removes periods.
73 | result = ascit('1.0', strict=False)
74 | assert('1.0' == result)
75 |
76 | def test_remove_period(self):
77 | assert('10' == ascit('1.0', strict=True))
78 |
79 | def test_remove_ampersand(self):
80 | assert('foobar' == ascit('foo&bar', strict=True))
81 |
82 | def test_remove_punctuation(self):
83 | assert('foobar' == ascit('f+=_oo@b!#$%^&*(){}ar', strict=True))
84 |
85 | def test_remove_space_plus(self):
86 | assert('' == ascit(' +', strict=True))
87 |
88 | def test_remove_spaces(self):
89 | #print ascit('foo bar')
90 | assert('foobar' == ascit('foobar'))
91 |
92 | def test_remove_duplicates(self):
93 | #print ascit('foo, |||,,, ,, |,,, bar')
94 | assert('foo bar' == ascit('foo, |||,,, ,, |,,, bar'))
95 |
96 | def test_remove_braces(self):
97 | #print ascit('{foo bar}', strict=True)
98 | assert('' == ascit('{foo bar}', strict=True))
99 |
100 | def test_remspace(self):
101 | assert('foobar' == remspace('foo bar'))
102 |
103 | def test_remove_parentheses(self):
104 | #print ascit('{foo bar}', strict=True)
105 | assert('' == ascit('(foo bar)', strict=True))
106 |
107 | def test_remove_period(self):
108 | assert('hello there' == ascit('hello. there'))
109 | assert('hello there' == ascit('hello. there',strict =True))
110 |
111 | def test_remove_comma(self):
112 | assert('hello there' == ascit('hello, there'))
113 | assert('hello there' == ascit('hello, there',strict =True))
114 |
115 | if __name__ == '__main__':
116 | unittest.main()
117 |
--------------------------------------------------------------------------------
/test/test_configuration.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import unittest
5 |
6 | sys.path.append('..')
7 | sys.path.append('../lib')
8 |
9 | from start import get_year_list
10 |
11 | class Test_Configuration(unittest.TestCase):
12 |
13 | def setUp(self):
14 | pass
15 |
16 | def test_get_year1(self):
17 | yearstring = '2013'
18 | expected = [2013]
19 | years = get_year_list(yearstring)
20 | self.assertTrue(expected == years, '\n{0} should be\n{1}'\
21 | .format(years, expected))
22 |
23 | def test_get_year2(self):
24 | yearstring = '2010-2013'
25 | expected = [2010, 2011, 2012, 2013]
26 | years = get_year_list(yearstring)
27 | self.assertTrue(expected == years, '\n{0} should be\n{1}'\
28 | .format(years, expected))
29 |
30 | def test_get_year3(self):
31 | yearstring = '2010-2013,2009'
32 | expected = [2010,2011,2012,2013,2009]
33 | years = get_year_list(yearstring)
34 | self.assertTrue(expected == years, '\n{0} should be\n{1}'\
35 | .format(years, expected))
36 |
37 | def test_get_year4(self):
38 | yearstring = '2008,2010-2013,2009'
39 | expected = [2008,2010,2011,2012,2013,2009]
40 | years = get_year_list(yearstring)
41 | self.assertTrue(expected == years, '\n{0} should be\n{1}'\
42 | .format(years, expected))
43 |
44 | def test_get_year5(self):
45 | yearstring = '1975-1978,2000-2002'
46 | expected = [1975,1976,1977,1978,2000,2001,2002]
47 | years = get_year_list(yearstring)
48 | self.assertTrue(expected == years, '\n{0} should be\n{1}'\
49 | .format(years, expected))
50 |
51 | unittest.main()
52 |
--------------------------------------------------------------------------------
/test/test_fwork.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # `fwork.py` will probably be renamed to something a little
4 | # more suggestive as to its purpose.
5 |
6 | import unittest
7 | import sys
8 |
9 | sys.path.append( '.' )
10 | sys.path.append( '../lib/' )
11 |
12 | #import imp
13 | #from yaml import load, dump
14 |
15 | from fwork import *
16 |
17 | class TestFWork(unittest.TestCase):
18 |
19 | def removeFile(self, file):
20 | #delete a file if it exists
21 | if os.path.isfile(file):
22 | os.system("rm {file}".format(file=file))
23 |
24 |
25 | def setUp(self):
26 | self.foo = 'bar'
27 |
28 | def test_dummy(self):
29 | assert(1 == 1)
30 |
31 | def test_int(self):
32 | assert('1' == ascit('1'))
33 |
34 | def test_float(self):
35 | # Default strict=True removes periods.
36 | result = ascit('1.0', strict=False)
37 | assert('1.0' == result)
38 |
39 | def test_remove_period(self):
40 | assert('10' == ascit('1.0', strict=True))
41 |
42 | def test_retain_hyphen(self):
43 | assert('KIN-JOE' == ascit('KIN-JOE', strict=True))
44 |
45 | # def test_quickSQL(self):
46 | # import sqlite3
47 | # self.conn = sqlite3.connect(":memory:")
48 | # self.cursor = self.conn.cursor()
49 | # data = [ ["Unique_ID", "Name"], [1, 1], [2, 2], [3, 3], [4, 4] ]
50 | # quickSQL(self.cursor, data, table="test")
51 | # self.conn.close
52 |
53 |
54 | def test_get_ctypes(self):
55 | assert('VARCHAR' == get_ctypes("FOO"))
56 | assert('REAL' == get_ctypes(4.2))
57 | assert('INTEGER' == get_ctypes(42))
58 |
59 | def get_quicksql_data(self):
60 | return [
61 | [u'UniqueID', u'Patent', u'Lastname', u'Firstname'],
62 | [u'1', u'0.8194655', u'PISTER', u'KRISTOPHER S J'],
63 | [u'1', u'0.8190055', u'PISTER', u'KRISTOPHER S J']
64 | ]
65 |
66 | def test_is_real(self):
67 | data = u'0.1234'
68 | assert(1 == is_real(data))
69 | data = u'01234'
70 | assert(0 == is_real(data))
71 |
72 |
73 | def get_typelist(self):
74 | #return [u'Patent REAL', u'Lastname INTEGER']
75 | return [u'Lastname INTEGER']
76 |
77 | def test_text_type(self):
78 | data = 'foo'
79 | assert(True == text_type(data))
80 | data = 123
81 | assert(False == text_type(data))
82 | data = 1.23
83 | assert(False == text_type(data))
84 | data = u'123'
85 | assert(True == text_type(data))
86 | data = '1.23'
87 | assert(True == text_type(data))
88 |
89 | def test_create_column_labels(self):
90 | assert(1 == 1)
91 |
92 |
93 | def test_have_schema_type(self):
94 | tl = self.get_typelist()
95 | assert(-1 == have_schema_type(tl, 'UNIQUEID'))
96 | #assert(3 == have_schema_type(tl, 'PATENT'))
97 | #assert(19 == have_schema_type(tl, 'LASTNAME'))
98 | assert(-1 == have_schema_type(tl, 'FIRSTNAME'))
99 |
100 | def test_quickSQL2(self):
101 | import sqlite3
102 | dbfilename = "fwork.sqlite3"
103 | self.removeFile(dbfilename)
104 | self.conn = sqlite3.connect(dbfilename)
105 | self.cursor = self.conn.cursor()
106 | data = self.get_quicksql_data()
107 | typelist = self.get_typelist()
108 | quickSQL2(self.cursor, data, table="test", header=True, typeList=typelist)
109 | self.conn.commit()
110 | self.cursor.close()
111 |
112 | if __name__ == '__main__':
113 | unittest.main()
114 |
--------------------------------------------------------------------------------
/test/test_keylist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import os
5 | import sqlite3
6 | import sys
7 | sys.path.append( '../lib/' )
8 | import SQLite
9 |
10 | class TestSQLite(unittest.TestCase):
11 |
12 | def removeFile(self, fname):
13 | #delete a fname if it exists
14 | if os.path.isfile(fname):
15 | os.remove(fname)
16 |
17 | def createFile(self, fname, ftype=None, data="1,2,3"):
18 | #create a fname db, csv
19 | if fname.split(".")[-1] == "db" or ftype == "db":
20 | conn = sqlite3.connect(fname)
21 | c = conn.cursor()
22 | c.executescript("""
23 | CREATE TABLE test (a, B, c);
24 | CREATE TABLE main (d, E, f);
25 | INSERT INTO test VALUES ({data});
26 | INSERT INTO main VALUES ({data});
27 | CREATE INDEX idx ON test (a);
28 | CREATE INDEX idy ON test (a, b);
29 | """.format(data=data)) #"""
30 | conn.commit()
31 | c.close()
32 | conn = sqlite3.connect(fname)
33 | elif fname.split(".")[-1] == "csv" or ftype == "csv":
34 | os.system("echo '{data}' >> {fname}".\
35 | format(data=data, fname=fname))
36 |
37 | def setUp(self):
38 | self.removeFile("test.db")
39 | self.removeFile("test.csv")
40 | self.removeFile("test2.db")
41 | self.removeFile("test2.csv")
42 | # create a really basic dataset
43 | self.createFile(fname="test.db")
44 | self.s = SQLite.SQLite(db="test.db", tbl="test")
45 | self.createFile("test2.db")
46 | s = SQLite.SQLite("test2.db", tbl="test")
47 | self.s.attach(s)
48 |
49 | def tearDown(self):
50 | self.s.close()
51 | self.removeFile("test.db")
52 | self.removeFile("test.csv")
53 | self.removeFile("test2.db")
54 | self.removeFile("test2.csv")
55 | self.removeFile("errlog")
56 |
57 | def test_keyList(self):
58 | #key = self.s._keyList('foo', kwargs={'tbl': 'main'})
59 | #print "key from test: ", key
60 | #key = self.s._keyList('foo', kwargs={"keys": ['bar', 'baz'], 'tbl': 'main'})
61 | #print "key from test: ", key
62 | #key = self.s._keyList('foo', kwargs={"keys": 'bar', 'tbl': 'main'})
63 | #print "key from test: ", key
64 | #key = self.s._keyList('foo', keys={"bar": 'baz'})
65 | #print "key from test: ", key
66 | key = self.s._keyList('foo', keys={"bar",'baz'})
67 | print "key from test: ", key
68 | print "key[0] from test: ", key[0]
69 |
70 | self.assertEquals(1,1)
71 |
72 |
73 | if __name__ == '__main__':
74 | unittest.main()
75 |
76 |
--------------------------------------------------------------------------------
/test/test_parse_file.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import sys
5 | import unittest
6 | import logging
7 | import re
8 | from collections import Iterable
9 |
10 | sys.path.append('../')
11 | import parse
12 | import lib.handlers.grant_handler_v42 as grant_handler_v42
13 |
14 | basedir = os.path.dirname(__file__)
15 | testdir = os.path.join(basedir, './fixtures/xml/')
16 | testfileone = 'ipg120327.one.xml'
17 | testfiletwo = 'ipg120327.two.xml'
18 | regex = re.compile(r"""([<][?]xml version.*?[>]\s*[<][!]DOCTYPE\s+([A-Za-z-]+)\s+.*?/\2[>])""", re.S+re.I)
19 |
20 | class TestParseFile(unittest.TestCase):
21 |
22 | def setUp(self):
23 | pass
24 |
25 | def test_extract_xml_strings_one(self):
26 | parsed_output = parse.extract_xml_strings(testdir+testfileone)
27 | self.assertTrue(isinstance(parsed_output, list))
28 | self.assertTrue(len(parsed_output) == 1)
29 | self.assertTrue(isinstance(parsed_output[0], tuple))
30 | self.assertTrue(isinstance(parsed_output[0][1], str))
31 | self.assertTrue(regex.match(parsed_output[0][1]))
32 |
33 | def test_parse_files_one(self):
34 | filelist = [testdir+testfileone]
35 | parsed_output = parse.parse_files(filelist)
36 | self.assertTrue(isinstance(parsed_output,Iterable))
37 | parsed_output = list(parsed_output)
38 | self.assertTrue(len(parsed_output) == 1)
39 | self.assertTrue(isinstance(parsed_output[0], tuple))
40 | self.assertTrue(isinstance(parsed_output[0][1], str))
41 | self.assertTrue(regex.match(parsed_output[0][1]))
42 |
43 | def test_extract_xml_strings_two(self):
44 | parsed_output = parse.extract_xml_strings(testdir+testfiletwo)
45 | self.assertTrue(isinstance(parsed_output, Iterable))
46 | parsed_output = list(parsed_output)
47 | self.assertTrue(len(parsed_output) == 2)
48 | self.assertTrue(isinstance(parsed_output[0], tuple))
49 | self.assertTrue(isinstance(parsed_output[0][1], str))
50 | self.assertTrue(isinstance(parsed_output[1], tuple))
51 | self.assertTrue(isinstance(parsed_output[1][1], str))
52 | self.assertTrue(regex.match(parsed_output[0][1]))
53 | self.assertTrue(regex.match(parsed_output[1][1]))
54 |
55 | def test_parse_files_two(self):
56 | filelist = [testdir+testfiletwo]
57 | parsed_output = parse.parse_files(filelist)
58 | self.assertTrue(isinstance(parsed_output,Iterable))
59 | parsed_output = list(parsed_output)
60 | self.assertTrue(len(parsed_output) == 2)
61 | self.assertTrue(isinstance(parsed_output[0], tuple))
62 | self.assertTrue(isinstance(parsed_output[0][1], str))
63 | self.assertTrue(isinstance(parsed_output[1], tuple))
64 | self.assertTrue(isinstance(parsed_output[1][1], str))
65 | self.assertTrue(regex.match(parsed_output[0][1]))
66 | self.assertTrue(regex.match(parsed_output[1][1]))
67 |
68 | def test_use_parse_files_one(self):
69 | filelist = [testdir+testfileone]
70 | parsed_output = list(parse.parse_files(filelist))
71 | patobj = grant_handler_v42.PatentGrant(parsed_output[0][1], True)
72 | self.assertTrue(patobj)
73 |
74 | def test_use_parse_files_two(self):
75 | filelist = [testdir+testfiletwo]
76 | parsed_output = parse.parse_files(filelist)
77 | parsed_xml = []
78 | for us_patent_grant in parsed_output:
79 | self.assertTrue(isinstance(us_patent_grant, tuple))
80 | self.assertTrue(isinstance(us_patent_grant[1], str))
81 | patobj = grant_handler_v42.PatentGrant(us_patent_grant[1], True)
82 | self.assertTrue(patobj)
83 |
84 | def test_list_files(self):
85 | testdir = os.path.join(basedir, './fixtures/xml')
86 | xmlregex = r'ipg120327.one.xml'
87 | files = parse.list_files(testdir, xmlregex)
88 | self.assertTrue(isinstance(files, list))
89 | self.assertTrue(len(files) == 1)
90 | self.assertTrue(all(filter(lambda x: isinstance(x, str), files)))
91 | self.assertTrue(all(map(lambda x: os.path.exists(x), files)))
92 |
93 | if __name__ == '__main__':
94 | unittest.main()
95 |
--------------------------------------------------------------------------------
/test/test_separate_row_geocode.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 |
4 | import unittest
5 | import sys
6 |
7 | sys.path.append( '.' )
8 | sys.path.append( '../lib/' )
9 |
10 | from geocode_setup import get_entry_from_row
11 |
12 | class TestSepWrd(unittest.TestCase):
13 |
14 | def test_get_entry_from_row_comma(self):
15 | assert("foo" == get_entry_from_row("foo,bar", 0))
16 | assert("bar" == get_entry_from_row("foo,bar", 1))
17 |
18 | def test_get_entry_from_row_pipe(self):
19 | assert("foo" == get_entry_from_row("foo|bar", 0))
20 | assert("bar" == get_entry_from_row("foo|bar", 1))
21 |
22 | def test_nosplit(self):
23 | result = get_entry_from_row("foo bar", 0)
24 | assert("foo bar" == result)
25 | result = get_entry_from_row("foo bar", 1)
26 | assert("" == result)
27 | # Check out of bounds index, really ought to fail
28 | assert("" == get_entry_from_row("foo bar", 2))
29 |
30 | def test_seq_neg1(self):
31 | assert("foo bar" == get_entry_from_row("foo bar", -1))
32 |
33 |
34 | if __name__ == '__main__':
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/test/test_sqlite_index.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import os
5 | import sqlite3
6 | import sys
7 | sys.path.append( '../lib/' )
8 | import SQLite
9 |
10 | class TestSQLite(unittest.TestCase):
11 |
12 | def removeFile(self, fname):
13 | #delete a fname if it exists
14 | try:
15 | os.remove(fname)
16 | except OSError:
17 | pass
18 |
19 | def createFile(self, file, type=None, data="1,2,3"):
20 | #create a file db, csv
21 | if file.split(".")[-1] == "db" or type == "db":
22 | connection = sqlite3.connect(file)
23 | cursor = connection.cursor()
24 | cursor.executescript("""
25 | CREATE TABLE test (a, B, cursor);
26 | CREATE TABLE main (d, E, f);
27 | INSERT INTO test VALUES ({data});
28 | INSERT INTO main VALUES ({data});
29 | CREATE INDEX idx ON test (a);
30 | CREATE INDEX idy ON test (a, b);
31 | """.format(data=data)) #"""
32 | connection.commit()
33 | cursor.close()
34 | connection = sqlite3.connect(file)
35 | elif file.split(".")[-1] == "csv" or type == "csv":
36 | os.system("echo '{data}' >> {file}".\
37 | format(data=data, file=file))
38 |
39 | def setUp(self):
40 | self.removeFile("test.db")
41 | self.removeFile("test.csv")
42 | self.removeFile("test2.db")
43 | self.removeFile("test2.csv")
44 | # create a really basic dataset
45 | self.createFile(file="test.db")
46 | self.s = SQLite.SQLite(db="test.db", tbl="test")
47 | self.createFile("test2.db")
48 | s = SQLite.SQLite("test2.db", tbl="test")
49 | self.s.attach(s)
50 |
51 | def tearDown(self):
52 | self.s.close()
53 | self.removeFile("test.db")
54 | self.removeFile("test.csv")
55 | self.removeFile("test2.db")
56 | self.removeFile("test2.csv")
57 | self.removeFile("errlog")
58 |
59 | def test_indexes(self):
60 | self.assertIn('idx', self.s.indexes())
61 | self.assertTrue(self.s.indexes(lookup="idx"))
62 | self.assertFalse(self.s.indexes(lookup="xdi"))
63 | self.assertEquals([0,0], self.s.indexes(seq="xdi"))
64 | self.assertEquals([1,1], self.s.indexes(seq="idx"))
65 | self.s.c.executescript("""
66 | CREATE INDEX idx1 ON test (b);
67 | CREATE INDEX idx2 ON test (cursor);
68 | CREATE INDEX idx5x3 ON test (a);
69 | CREATE INDEX idx10x ON test (a);
70 | """)
71 | self.assertEquals([1,3], self.s.indexes(seq="idx"))
72 |
73 |
74 | def test__baseIndex(self):
75 | self.assertItemsEqual(['test (a)', 'test (a,b)'],
76 | self.s._baseIndex(db="db"))
77 | self.assertEqual('test (a)',
78 | self.s._baseIndex(idx="idx"))
79 | self.assertEqual('foo (bar,foo)',
80 | self.s._baseIndex(idx="create index x on foo (foo, bar)"))
81 | self.assertEqual('unique foo (foo)',
82 | self.s._baseIndex(idx="create unique index x on foo (foo)"))
83 |
84 |
85 | def test_index(self):
86 | self.s.index([['a','cursor']])
87 | self.assertIn('test (a,cursor)', self.s._baseIndex())
88 |
89 | self.s.index('a', unique=True)
90 | self.assertIn('test (a)', self.s._baseIndex())
91 | self.assertFalse(self.s.index(['a','cursor']))
92 |
93 | self.s.index('f', tbl="main")
94 | self.assertIn('main (f)', self.s._baseIndex())
95 | self.assertFalse(self.s.index('a', tbl="main"))
96 |
97 | #self.s.index([['e', 'f']], combo=True, tbl="main")
98 | #self.assertIn('main (e)', self.s._baseIndex(tbl="main"))
99 | #self.assertIn('main (e,f)', self.s._baseIndex(tbl="main"))
100 |
101 | #self.s.index(['a','cursor'], db="db")
102 | #self.assertIn('test (a,cursor)', self.s._baseIndex(db="db"))
103 |
104 |
105 | if __name__ == '__main__':
106 | unittest.main()
107 |
108 |
--------------------------------------------------------------------------------
/test/test_sqlite_merge.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import os
5 | import sqlite3
6 | import sys
7 | sys.path.append( '../lib/' )
8 | import SQLite
9 |
10 | class TestSQLite(unittest.TestCase):
11 |
12 | def removeFile(self, fname):
13 | #delete a fname if it exists
14 | try:
15 | os.remove(fname)
16 | except OSError:
17 | pass
18 |
19 | def createFile(self, file, type=None, data="1,2,3"):
20 | #create a file db, csv
21 | if file.split(".")[-1] == "db" or type == "db":
22 | connection = sqlite3.connect(file)
23 | cursor = connection.cursor()
24 | cursor.executescript("""
25 | CREATE TABLE test (a, B, cursor);
26 | CREATE TABLE main (d, E, f);
27 | INSERT INTO test VALUES ({data});
28 | INSERT INTO main VALUES ({data});
29 | CREATE INDEX idx ON test (a);
30 | CREATE INDEX idy ON test (a, b);
31 | """.format(data=data)) #"""
32 | connection.commit()
33 | cursor.close()
34 | connection = sqlite3.connect(file)
35 | elif file.split(".")[-1] == "csv" or type == "csv":
36 | os.system("echo '{data}' >> {file}".\
37 | format(data=data, file=file))
38 |
39 | def setUp(self):
40 | self.removeFile("test.db")
41 | self.removeFile("test.csv")
42 | self.removeFile("test2.db")
43 | self.removeFile("test2.csv")
44 | # create a really basic dataset
45 | self.createFile(file="test.db")
46 | self.s = SQLite.SQLite(db="test.db", tbl="test")
47 | self.createFile("test2.db")
48 | s = SQLite.SQLite("test2.db", tbl="test")
49 | self.s.attach(s)
50 |
51 | def tearDown(self):
52 | self.s.close()
53 | self.removeFile("test.db")
54 | self.removeFile("test.csv")
55 | self.removeFile("test2.db")
56 | self.removeFile("test2.csv")
57 | self.removeFile("errlog")
58 |
59 |
60 | def test_indexes(self):
61 | self.assertIn('idx', self.s.indexes())
62 | self.assertTrue(self.s.indexes(lookup="idx"))
63 | self.assertFalse(self.s.indexes(lookup="xdi"))
64 | self.assertEquals([0,0], self.s.indexes(seq="xdi"))
65 | self.assertEquals([1,1], self.s.indexes(seq="idx"))
66 | self.s.c.executescript("""
67 | CREATE INDEX idx1 ON test (b);
68 | CREATE INDEX idx2 ON test (cursor);
69 | CREATE INDEX idx5x3 ON test (a);
70 | CREATE INDEX idx10x ON test (a);
71 | """)
72 | self.assertEquals([1,3], self.s.indexes(seq="idx"))
73 |
74 | def test__baseIndex(self):
75 | self.assertItemsEqual(['test (a)', 'test (a,b)'],
76 | self.s._baseIndex(db="db"))
77 | self.assertEqual('test (a)',
78 | self.s._baseIndex(idx="idx"))
79 | self.assertEqual('foo (bar,foo)',
80 | self.s._baseIndex(idx="create index x on foo (foo, bar)"))
81 | self.assertEqual('unique foo (foo)',
82 | self.s._baseIndex(idx="create unique index x on foo (foo)"))
83 |
84 |
85 | def test_index(self):
86 | self.s.index([['a','cursor']])
87 | self.assertIn('test (a,cursor)', self.s._baseIndex())
88 | self.s.index('a', unique=True)
89 | self.assertIn('test (a)', self.s._baseIndex())
90 | self.assertFalse(self.s.index(['a','cursor']))
91 | self.s.index('f', tbl="main")
92 | self.assertIn('main (f)', self.s._baseIndex())
93 | self.assertFalse(self.s.index('a', tbl="main"))
94 | #self.s.index(['e', 'f'], combo=True, tbl="main")
95 | #self.assertIn('main (e)', self.s._baseIndex(tbl="main"))
96 | #self.assertIn('main (e,f)', self.s._baseIndex(tbl="main"))
97 |
98 | self.s.index([['a','cursor']], db="db")
99 | self.assertIn('test (a,cursor)', self.s._baseIndex(db="db"))
100 |
101 | # def test_merge(self):
102 | # s = SQLite.SQLite()
103 | # s.merge(key=[['AsgNum', 'pdpass']], on=[['assigneeAsc', 'assignee']],
104 | # keyType=['INTEGER'], tableFrom='main', db='db')
105 | # assert(1 == 1)
106 |
107 |
108 | if __name__ == '__main__':
109 | unittest.main()
110 |
111 |
--------------------------------------------------------------------------------
/test/test_xml_driver.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import re
5 | import sys
6 | import unittest
7 | from xml.sax import make_parser, handler
8 | from cgi import escape as html_escape
9 |
10 | sys.path.append('../lib/handlers/')
11 | from xml_driver import XMLElement, XMLHandler
12 |
13 | # Directory of test files
14 | basedir = os.curdir
15 | testdir = os.path.join(basedir, 'fixtures/xml/')
16 |
17 | class Test_XMLElement_Basic(unittest.TestCase):
18 |
19 | def setUp(self):
20 | # setup basic.xml parser/handler
21 | xmlhandler = XMLHandler()
22 | parser = make_parser()
23 | parser.setContentHandler(xmlhandler)
24 | parser.setFeature(handler.feature_external_ges, False)
25 | parser.parse(testdir+'basic.xml')
26 | self.assertTrue(xmlhandler.root)
27 | self.root = xmlhandler.root
28 |
29 | def test_basic_xml_tag_counts(self):
30 | self.assertTrue(len(self.root.a) == 1)
31 | self.assertTrue(len(self.root.a.b) == 2)
32 | self.assertTrue(len(self.root.a.b.c) == 3)
33 | self.assertTrue(len(self.root.a.b.d) == 2)
34 | self.assertTrue(len(self.root.a.c) == 3)
35 |
36 | def test_basic_xml_tag_contents(self):
37 | self.assertTrue(self.root.a.b.c[0].get_content() == 'HELLO', \
38 | "{0} should be {1}".format(self.root.a.b.c[0].get_content(), 'HELLO'))
39 | self.assertTrue(self.root.a.b.c[1].get_content() == 'WORLD', \
40 | "{0} should be {1}".format(self.root.a.b.c[1].get_content(), 'WORLD'))
41 | self.assertTrue(self.root.a.b.c[2].get_content() == '3', \
42 | "{0} should be {1}".format(self.root.a.b.c[2].get_content(), '3'))
43 | self.assertTrue(self.root.a.b.d[0].get_content() == '1', \
44 | "{0} should be {1}".format(self.root.a.b.c[0].get_content(), '1'))
45 | self.assertTrue(self.root.a.b.d[1].get_content() == '2', \
46 | "{0} should be {1}".format(self.root.a.b.c[1].get_content(), '2'))
47 |
48 | def test_basic_xml_contents_of(self):
49 | self.assertTrue(self.root.a.b.contents_of('c') == ['HELLO','WORLD','3'])
50 | self.assertTrue(self.root.a.b[0].contents_of('c') == ['HELLO','WORLD'])
51 |
52 | unittest.main()
53 |
--------------------------------------------------------------------------------
/vm/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 | $script = <