├── .gitignore ├── requirements.txt ├── sample_patents.txt ├── sample_config.cfg ├── json_to_csv.py ├── api_wrapper.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | __pycache__/ 3 | .idea/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PatentsView/PatentsView-APIWrapper/HEAD/requirements.txt -------------------------------------------------------------------------------- /sample_patents.txt: -------------------------------------------------------------------------------- 1 | 6541233 2 | 7861317 3 | 4712336 4 | 5012395 5 | 5491236 6 | 5612354 7 | 4123425 8 | 4123538 -------------------------------------------------------------------------------- /sample_config.cfg: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # PatentsView-APIWrapper Configuration File 4 | # American Institutes for Research 5 | # 6 | # 7 | # See the accompanying README.md file for instructions 8 | # to modify this configuration file 9 | # 10 | # 11 | 12 | 13 | [QUERY1] 14 | entity = "patents" 15 | input_file = "sample_patents.txt" 16 | directory = "/Users/jsennett/Code/PatentsView-APIWrapper" 17 | input_type = "patent_number" 18 | fields = ["patent_number", "patent_title", "patent_date"] 19 | criteria1 = {"_lte":{"patent_date":"2018-06-01"}} 20 | criteria2 = {"_gte":{"patent_date":"2000-01-01"}} 21 | sort = [{"patent_number":"asc"}, {"patent_title":"desc"}] 22 | -------------------------------------------------------------------------------- /json_to_csv.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import sys 5 | import csv 6 | import io 7 | 8 | ENCODING = "Latin-1" 9 | 10 | def convertToCSV(jsonData, keys): 11 | returnData = {} 12 | global counter 13 | row = [] 14 | 15 | groups = ["cited_patents","inventors","application_citations", 16 | "applications", "assignees","citedby_patents","coinventors", 17 | "cpc_subgroups", "cpc_subsections", "cpcs", "IPCs", 18 | "locations", "nber_subcategories","nbers","patents", 19 | "uspc_mainclasses", "uspc_subclasses","uspcs","years", 20 | "rawinventors","wipos","gov_interests"] 21 | 22 | common = list(set(groups).intersection(set(keys))) 23 | 24 | if(len(common)>0): 25 | # Generate the length of maximum results 26 | length_dict = len(jsonData[common[0]]) 27 | for group in common: 28 | if len(jsonData[group]) > length_dict: 29 | length_dict = len(jsonData[group]) 30 | else: 31 | length_dict = 1 32 | for i in range(0, length_dict): 33 | row = [] 34 | returnData[i] = {} 35 | for key in keys: 36 | if key in common: 37 | try: 38 | index = keys.index(key) 39 | tempData = jsonData[key][i] 40 | tempKeys = sorted(tempData.keys()) 41 | for k in tempKeys: 42 | returnData[i][k] = tempData[k] 43 | except: 44 | pass 45 | else: 46 | returnData[i][key] = jsonData[key] 47 | return returnData 48 | 49 | def writeCSV(a, filename): 50 | write = csv.writer(io.open(filename, 'w', newline='', encoding=ENCODING)) 51 | groups = ["cited_patents","inventors","application_citations", 52 | "applications", "assignees","citedby_patents","coinventors", 53 | "cpc_subgroups", "cpc_subsections", "cpcs", "IPCs", 54 | "locations", "nber_subcategories","nbers","patents", 55 | "uspc_mainclasses", "uspc_subclasses","uspcs","years", 56 | "rawinventors","wipos","gov_interests"] 57 | key = list(a.keys()) 58 | key = list(set(groups).intersection(set(key))) 59 | j = a[key[0]] 60 | i = 0 61 | prevRow = [] 62 | if (j is not None): 63 | for jsonData in j: 64 | k = 0 65 | keys = jsonData.keys() 66 | csvData = convertToCSV(jsonData, sorted(keys)) 67 | if (i==0): 68 | write.writerow(list(sorted(csvData[0].keys()))) 69 | for key in csvData.keys(): 70 | row = [] 71 | row2 = [] 72 | data = csvData[key] 73 | for k in sorted(csvData[0].keys()): 74 | try: 75 | row = row + [data[k]] 76 | except: 77 | row = row + [csvData[0][k]] 78 | flag = False 79 | for item in row: 80 | if item != "": 81 | flag = True 82 | if (flag): 83 | try: 84 | row = [str(s).encode(ENCODING, "replace").decode(ENCODING, errors='ignore') for s in row] 85 | except: 86 | pass 87 | write.writerow(row) 88 | i += 1 89 | 90 | def merge_csv(fd,q,requests): 91 | diri = [d for d in os.listdir(fd) if re.search(q+'_\d+.csv',d)] 92 | csv_out = open(os.path.join(fd, q+'.csv'), 'w') 93 | for line in open(os.path.join(fd,q+'_0.csv'), 'rb').read().decode(ENCODING, errors='ignore'): 94 | csv_out.write(line) 95 | for i in range(requests): 96 | f = open(os.path.join(fd, q+'_'+str(i)+'.csv'), 'r+', encoding=ENCODING) 97 | if sys.version_info >= (3,): 98 | next(f) 99 | else: 100 | f.next() 101 | for line in f: 102 | csv_out.write(line) 103 | f.close() 104 | csv_out.close() 105 | 106 | def main(fd, q, requests): 107 | diri = [d for d in os.listdir(fd) if re.search(q+'_\d+.json',d)] 108 | for d in diri: 109 | filename = fd + '/' + d 110 | data = open(filename, "r").read() 111 | try: 112 | b = json.loads(data) 113 | except: 114 | print("ERROR") 115 | sys.exit(1) 116 | filename = filename.replace('.json', '.csv') 117 | writeCSV(b, filename) 118 | merge_csv(fd, q, requests) 119 | 120 | # Remove individual component files 121 | for d in diri: 122 | os.remove(os.path.join(fd,d)) 123 | os.remove(os.path.join(fd,d.replace('.json','.csv'))) -------------------------------------------------------------------------------- /api_wrapper.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import configparser 3 | import json 4 | import os 5 | import requests 6 | import json_to_csv 7 | import sys 8 | import pandas as pd 9 | 10 | 11 | def query(configfile): 12 | # Query the PatentsView database using parameters specified in configfile 13 | parser = configparser.ConfigParser() 14 | parser.read(configfile) 15 | 16 | # Loop through the separate queries listed in the config file. 17 | for q in parser.sections(): 18 | 19 | print("Running query: ", q) 20 | 21 | # Parse parameters from config file 22 | entity = json.loads(parser.get(q, 'entity')) 23 | url = 'https://api.patentsview.org/'+entity+'/query?' 24 | 25 | input_file = json.loads(parser.get(q, 'input_file')) 26 | directory = json.loads(parser.get(q, 'directory')) 27 | input_type = json.loads(parser.get(q, 'input_type')) 28 | fields = json.loads(parser.get(q, 'fields')) 29 | 30 | 31 | try: 32 | # If specified, 'sort' should be a list of dictionaries, specifying 33 | # the order of keys and direction of each key. 34 | 35 | sort = json.loads(parser.get(q, 'sort')) 36 | sort_fields, sort_directions = [], [] 37 | for dct in sort: 38 | for field in dct: 39 | # We can only sort by fields that are in the data 40 | if field in fields: 41 | sort_fields.append(field) 42 | sort_directions.append(dct[field]) 43 | if len(sort_fields) == 0: 44 | sort_fields = [fields[0]] 45 | sort_directions = ["asc"] 46 | except: 47 | sort_fields = [fields[0]] 48 | sort_directions = ["asc"] 49 | 50 | criteria = {"_and": [json.loads(parser.get(q, option)) for option in 51 | parser.options(q) if option.startswith('criteria')]} 52 | 53 | # remove the last line's carriage return 54 | item_list = list(set(open(os.path.join(directory, input_file)).read().rstrip('\n').split('\n'))) 55 | results_found = 0 56 | 57 | item_list_len = len(item_list) 58 | # request the maximum of 10000 matches per query and page forward as necessary 59 | per_page = 10000 60 | 61 | for item in item_list: 62 | count = per_page 63 | page = 1 64 | while count == per_page: 65 | params = { 66 | 'q': {"_and": [{input_type: item}, criteria]}, 67 | 'f': fields, 68 | 'o': {"per_page": per_page, "page": page} 69 | } 70 | 71 | r = requests.post(url, data=json.dumps(params)) 72 | page += 1 73 | count = 0 74 | 75 | if 400 <= r.status_code <= 499: 76 | print("Client error when quering for value {}".format(item)) 77 | elif r.status_code >= 500: 78 | print("Server error when quering for value {}. You may be exceeding the maximum API request size (1GB).".format(item)) 79 | else: 80 | count = json.loads(r.text)['count'] 81 | if count != 0: 82 | outp = open(os.path.join(directory, q + '_' + \ 83 | str(results_found) + '.json'), 'w') 84 | print(r.text, end = '', file=outp) 85 | outp.close() 86 | results_found += 1 87 | 88 | if results_found == 0: 89 | print("Query {} returned no results".format(q)) 90 | else: 91 | # Output merged CSV of formatted results. 92 | json_to_csv.main(directory, q, results_found) 93 | 94 | # Clean csv: reorder columns, drop duplicates, sort, then save 95 | output_filename = os.path.join(directory, q+'.csv') 96 | df = pd.read_csv(output_filename, dtype=object, encoding='Latin-1') 97 | df = df[fields].drop_duplicates().sort_values(by=sort_fields, 98 | ascending=[direction != 'desc' for direction in sort_directions]) 99 | df.to_csv(output_filename, index=False) 100 | print('({} rows returned)'.format(len(df))) 101 | 102 | 103 | if __name__ == '__main__': 104 | if sys.version_info[0] != 3: 105 | print("Please use Python version 3; you are using version:", sys.version) 106 | sys.exit(1) 107 | 108 | if len(sys.argv) < 2: 109 | print("USAGE: python api_wrapper.py config_file") 110 | sys.exit(1) 111 | 112 | if not os.path.isfile(sys.argv[1]): 113 | print("File not found: ", sys.argv[1]) 114 | 115 | query(sys.argv[1]) 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Archival Notice 2 | **NOTICE**: The PatentsView Legacy API has been discontinued as of May 1, 2025 and any requests made to api.patentsview.org will return a “410 Gone” code. Consequently, the API wrapper has been discontinued and set to archival mode. We encourage all users to transition to the new PatentSearch API for continued access to our API services. Refer to the [code examples](https://github.com/PatentsView/PatentsView-Code-Examples/) for example snippets or read more about the transition in this [blog post](https://patentsview.org/data-in-action/patentsview-ends-support-legacy-api). 3 | 4 | PatentsView API Wrapper 5 | =================================== 6 | 7 | The purpose of this API Wrapper is to extend the functionality of the 8 | [PatentsView API](patentsview.org/api/). The wrapper can take in a list of 9 | values (such as patent numbers), retrieve multiple data points, and then convert 10 | and merge the results into a CSV file. 11 | 12 | ## How To Use the API Wrapper 13 | 1. Clone or download this repository 14 | ```bash 15 | git clone https://github.com/CSSIP-AIR/PatentsView-APIWrapper.git 16 | ``` 17 | 18 | 2. Install dependencies 19 | ```bash 20 | cd PatentsView-APIWrapper 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | 3. Modify the sample config file `sample_config.cfg` or create a copy with your own configuration settings 25 | 26 | 4. Run the API Wrapper using Python 3: 27 | ```bash 28 | python api_wrapper.py sample_config.cfg 29 | ``` 30 | 31 | ## How to modify your query configuration file 32 | The PatentsView API Wrapper reads in query specifications from the configuration file you point it to. The configuration file should define at least one query. Below is a description of each parameter that defines each query. 33 | 34 | ### Query Name 35 | The name of the query, and the name given to the resulting file (for example, [QUERY1] produces QUERY1.csv). If your configuration file contains multiple queries, each query should have a distinct name. Query parameters must directly follow the query name. 36 | 37 | ### Entity 38 | The type of object you want to return. This must be one of the PatentsView API endpoints: 39 | ``` 40 | "patents" 41 | "inventors" 42 | "assignees" 43 | "locations" 44 | "cpc_subsections" 45 | "uspc_mainclasses" 46 | "nber_subcategories" 47 | ``` 48 | 49 | ### Input File 50 | The name or relative path of the input file containing the values you want to query. For example, `sample_config.cfg` points to `sample_patents.txt`, which contains a list of patent numbers; the API wrapper will query for each of these patents. 51 | 52 | ### Directory 53 | The absolute path of the directory of your input file and results. Use forward slashes (`/`) instead of backward slashes (`\`). For Windows, this may look like: 54 | 55 | ```directory = "/Users/jsennett/Code/PatentsView-APIWrapper"``` 56 | 57 | For OSX/Unix systems: 58 | 59 | ```directory = "C:/Users/jsennett/Code/PatentsView-APIWrapper"``` 60 | 61 | ### Input Type 62 | The type of object represented in the input file. The full list of 63 | input types can be found in the [PatentsView API Documentation](https://api.patentsview.org/doc.html). 64 | Common input types include: 65 | 66 | ``` 67 | "patent_number" 68 | "inventor_id" 69 | "assignee_id" 70 | "cpc_subsection_id" 71 | "location_id" 72 | "uspc_mainclass_id" 73 | ``` 74 | 75 | ### Fields 76 | The fields that will be returned in the results. Valid fields for each endpoint can be found in the [PatentsView API Documentation](https://api.patentsview.org/doc.html). Fields should be specified as an array of strings, such as: 77 | 78 | ```fields = ["patent_number", "patent_title", "patent_date"]``` 79 | 80 | 81 | ### Criteria (optional) 82 | Additional rules, written in the PatentsView API syntax, to be applied to each query. Each criteria can specify multiple rules combined with OR or AND operators. If multiple criteria are listed, they will be combined with the AND operator. Multiple criteria should be named criteria1, criteria2, criteria3, etc. 83 | 84 | For example, the following criteria will limit results to patents from Jan 1 to Dec 31, 2015 with a patent abstract containing either "cell" or "mobile". 85 | ``` 86 | criteria1 = {"_gte":{"patent_date":"2015-1-1"}} 87 | criteria2 = {"_lte":{"patent_date":"2015-12-31"}} 88 | criteria3 = {"_or":[{"_contains":{"patent_abstract":"cell"}, {"_contains":{"patent_abstract":"mobile"}]} 89 | ``` 90 | 91 | ### Sort (optional) 92 | The fields and directions over which the output file will be sorted. This should be specified as an array of JSON objects, pairing the field with its direction. The sort order will follow the array order. 93 | To sort just by patent number (ascending): 94 | 95 | ```sort = [{"patent_number": "asc"}]``` 96 | 97 | To sort first by patent_date (descending), and then by patent title (ascending): 98 | 99 | ```sort = [{"patent_date": "desc"}, {"patent_title":, "asc"}]``` 100 | 101 | ## Compatibility 102 | 103 | The API wrapper is currently compatible with Python 3. 104 | 105 | ## License 106 | 107 | Users are free to use, share, or adapt the material for any purpose, subject to the standards of the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). 108 | 109 | Attribution should be given to PatentsView for use, distribution, or derivative works. 110 | 111 | ## See also 112 | 113 | [USPTO PatentsView](https://www.patentsview.org/web/#viz/relationships) 114 | 115 | [PatentsView API](https://api.patentsview.org/doc.html) 116 | 117 | [PatentsView Query Language](https://api.patentsview.org/query-language.html) 118 | --------------------------------------------------------------------------------