├── config.ini ├── README.md ├── stock_db_dump.sql └── Extract_financial_data_from_XBRL.ipynb /config.ini: -------------------------------------------------------------------------------- 1 | [Sql] 2 | uid: ******** 3 | pwd: ******** 4 | host: localhost 5 | port: 5432 6 | 7 | [Path] 8 | dir_data_file: data_files 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | In this project, I use Python to automate the getting of financial data by extracting the financial data from XBRL instance documents. 2 | 3 | To view the project Extract_financial_data_from_XBRL.ipynb, click here which will display the project using Jupyter Notebook Viewer, as GitHub sometimes is not able to load this project. 4 | 5 | The `data_files` folder contains the XBRL instance documents that are used in this project. 6 | 7 | The `stock_db_dump.sql` file is a PostgreSQL database backup that contains a table that this project insert/update records to. The command to restore the backup can be found in the PostgreSQL documentation. 8 | 9 | Below is the description of each column of the table that this project insert/update records to. 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 73 | 74 | 75 |
ColumnDescription
company_nameCompany name
filingsThe type of filing that the financials data is retrieved from (e.g. 10-K, 20-F, 40-F)
reporting_currencyReporting currency
fiscal_year_endedFiscal year ended
cash_and_equivalentsCash and cash equivalents
short_term_investmentsShort-term investments
current_debtCurrent portion of debt/notes
revenueRevenue
cost_of_revenueCost of revenue
revenue_1_fy_agoRevenue from one fiscal year ago
operating_cash_flowOperating cash flow
capital_expenditureCapital expenditure. It consists of the following items under "Cash flows from investing activities": 66 |
    67 |
  • Purchases of property, plant and equipment
  • 68 |
  • Capitalized software development costs
  • 69 |
  • Capitalized internal-use software
  • 70 |
  • Purchase of intangible assets
  • 71 |
72 |
76 | -------------------------------------------------------------------------------- /stock_db_dump.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- PostgreSQL database dump 3 | -- 4 | 5 | -- Dumped from database version 12.3 6 | -- Dumped by pg_dump version 12.3 7 | 8 | SET statement_timeout = 0; 9 | SET lock_timeout = 0; 10 | SET idle_in_transaction_session_timeout = 0; 11 | SET client_encoding = 'UTF8'; 12 | SET standard_conforming_strings = on; 13 | SELECT pg_catalog.set_config('search_path', '', false); 14 | SET check_function_bodies = false; 15 | SET xmloption = content; 16 | SET client_min_messages = warning; 17 | SET row_security = off; 18 | 19 | -- 20 | -- Name: tablefunc; Type: EXTENSION; Schema: -; Owner: - 21 | -- 22 | 23 | CREATE EXTENSION IF NOT EXISTS tablefunc WITH SCHEMA public; 24 | 25 | 26 | -- 27 | -- Name: EXTENSION tablefunc; Type: COMMENT; Schema: -; Owner: 28 | -- 29 | 30 | COMMENT ON EXTENSION tablefunc IS 'functions that manipulate whole tables, including crosstab'; 31 | 32 | 33 | SET default_tablespace = ''; 34 | 35 | SET default_table_access_method = heap; 36 | 37 | -- 38 | -- Name: company; Type: TABLE; Schema: public; Owner: postgres 39 | -- 40 | 41 | CREATE TABLE public.company ( 42 | filings character varying(10), 43 | reporting_currency character varying(10) NOT NULL, 44 | fiscal_year_ended date NOT NULL, 45 | cash_and_equivalents numeric(20,2) NOT NULL, 46 | short_term_investments numeric(20,2) NOT NULL, 47 | current_debt numeric(20,2) NOT NULL, 48 | revenue numeric(20,2) NOT NULL, 49 | cost_of_revenue numeric(20,2) NOT NULL, 50 | revenue_1_fy_ago numeric(20,2) NOT NULL, 51 | operating_cash_flow numeric(20,2) NOT NULL, 52 | capital_expenditure numeric(20,2) NOT NULL, 53 | central_index_key character varying(20) NOT NULL, 54 | company_name character varying(100) NOT NULL 55 | ); 56 | 57 | 58 | ALTER TABLE public.company OWNER TO postgres; 59 | 60 | -- 61 | -- Data for Name: company; Type: TABLE DATA; Schema: public; Owner: postgres 62 | -- 63 | 64 | COPY public.company (filings, reporting_currency, fiscal_year_ended, cash_and_equivalents, short_term_investments, current_debt, revenue, cost_of_revenue, revenue_1_fy_ago, operating_cash_flow, capital_expenditure, central_index_key, company_name) FROM stdin; 65 | 10-K USD 2020-01-31 268045000.00 499160000.00 187115000.00 389719000.00 139216000.00 260366000.00 68156000.00 11970000.00 0001385867 COUPA SOFTWARE INC 66 | 10-K USD 2019-12-28 20092000.00 60596000.00 0.00 23604000.00 9741000.00 13017000.00 -32286000.00 165000.00 0001756262 TRANSMEDICS GROUP, INC. 67 | 10-K USD 2020-03-31 192341000.00 250775000.00 0.00 840883000.00 151305000.00 769432000.00 314920000.00 44006000.00 0000815094 ABIOMED, INC. 68 | 10-K USD 2019-12-31 96576000.00 480290000.00 0.00 85071000.00 22274000.00 55663000.00 205404000.00 11200000.00 0001478320 ADAPTIVE BIOTECHNOLOGIES CORPORATION 69 | 10-K USD 2019-11-29 2650221000.00 1526755000.00 3149343000.00 11171297000.00 1672720000.00 9030008000.00 4421813000.00 394479000.00 0000796343 ADOBE INC. 70 | 10-K USD 2020-01-31 264798000.00 647266000.00 0.00 481413000.00 141627000.00 249824000.00 99943000.00 87487000.00 0001535527 CROWDSTRIKE HOLDINGS, INC. 71 | 10-K USD 2019-12-31 597297000.00 176674000.00 0.00 362780000.00 88949000.00 198077000.00 24234000.00 23443000.00 0001561550 DATADOG, INC. 72 | 10-K USD 2020-01-31 241203000.00 414939000.00 0.00 973971000.00 243234000.00 700969000.00 115696000.00 72046000.00 0001261333 DOCUSIGN, INC. 73 | 10-K USD 2019-12-31 177254000.00 146401000.00 834000.00 876293000.00 216717000.00 454462000.00 -115010000.00 171802000.00 0001124140 EXACT SCIENCES CORPORATION 74 | 10-K USD 2019-12-31 16142000.00 114967000.00 4472000.00 200462000.00 88322000.00 144563000.00 -31303000.00 20100000.00 0001517413 FASTLY, INC. 75 | 10-K USD 2019-12-31 269670000.00 691834000.00 0.00 674860000.00 129958000.00 512980000.00 118973000.00 53846000.00 0001404655 HUBSPOT, INC. 76 | 10-K USD 2019-12-31 1167600000.00 2054100000.00 0.00 4478500000.00 1368300000.00 3724200000.00 1598200000.00 425600000.00 0001035267 INTUITIVE SURGICAL, INC. 77 | 10-K USD 2019-12-31 143228000.00 379574000.00 0.00 214375000.00 70720000.00 90639000.00 -47134000.00 21217000.00 0001576280 GUARDANT HEALTH, INC. 78 | 10-K USD 2019-12-31 241738000.00 150000000.00 0.00 170198000.00 46158000.00 68431000.00 -59396000.00 7194000.00 0001639225 LIVONGO HEALTH, INC. 79 | 10-K USD 2019-12-28 567687000.00 120000000.00 0.00 937837000.00 308665000.00 858289000.00 221640000.00 72492000.00 0000937556 MASIMO CORP 80 | 10-K USD 2020-01-31 706192000.00 280326000.00 0.00 421720000.00 125356000.00 267016000.00 -29540000.00 3564000.00 0001441816 MONGODB, INC. 81 | 10-K USD 2019-12-31 177321000.00 148769000.00 0.00 351318000.00 88606000.00 248069000.00 26620000.00 10485000.00 0001645113 NOVOCURE LIMITED 82 | 10-K USD 2020-01-31 520048000.00 882976000.00 100703000.00 586067000.00 159382000.00 399254000.00 55603000.00 27919000.00 0001660134 OKTA, INC. 83 | 10-K USD 2020-01-31 124024000.00 227375000.00 0.00 166351000.00 24579000.00 117823000.00 -173000.00 5174000.00 0001568100 PAGERDUTY, INC. 84 | 10-K USD 2019-12-31 528392000.00 0.00 0.00 270245000.00 119099000.00 194032000.00 67216000.00 23154000.00 0000730272 REPLIGEN CORP 85 | 10-K USD 2020-01-31 4145000000.00 3802000000.00 0.00 17098000000.00 4235000000.00 13282000000.00 4331000000.00 643000000.00 0001108524 SALESFORCE.COM, INC. 86 | 20-F USD 2019-12-31 3118988000.00 102324000.00 30739000.00 2175378000.00 1570458000.00 826968000.00 69865000.00 247098000.00 0001703399 SEA LIMITED 87 | 10-K USD 2019-12-31 139045000.00 56304000.00 6667000.00 42927000.00 17159000.00 12263000.00 -48107000.00 3817000.00 0001642545 SHOCKWAVE MEDICAL, INC. 88 | 40-F USD 2019-12-31 649916000.00 1805278000.00 0.00 1578173000.00 712530000.00 1073229000.00 70615000.00 62397000.00 0001594805 SHOPIFY INC. 89 | 10-K USD 2019-12-31 39181000.00 51508000.00 0.00 63354000.00 15927000.00 34557000.00 -29610000.00 535000.00 0001397702 SILK ROAD MEDICAL, INC. 90 | 10-K USD 2020-01-31 498999000.00 269593000.00 0.00 630422000.00 97191000.00 400552000.00 -12389000.00 52126000.00 0001764925 SLACK TECHNOLOGIES, INC. 91 | 10-K USD 2019-12-29 2042000000.00 1372000000.00 0.00 3543000000.00 1076000000.00 3333000000.00 1051000000.00 209000000.00 0001110803 ILLUMINA, INC. 92 | 10-K USD 2019-12-31 514353000.00 2711000.00 0.00 553307000.00 184465000.00 417907000.00 29869000.00 10900000.00 0001477449 TELADOC HEALTH, INC. 93 | 10-K USD 2019-12-31 253660000.00 1599033000.00 0.00 1134468000.00 525551000.00 650067000.00 14048000.00 67290000.00 0001447669 TWILIO INC. 94 | 10-K USD 2020-01-31 476733000.00 610015000.00 0.00 1104081000.00 303369000.00 862210000.00 437375000.00 4321000.00 0001393052 VEEVA SYSTEMS INC 95 | 10-K USD 2019-12-31 3109322000.00 698972000.00 0.00 4162821000.00 547758000.00 3047597000.00 1569330000.00 75451000.00 0000875320 VERTEX PHARMACEUTICALS INC / MA 96 | 10-K USD 2019-12-31 196591000.00 286958000.00 0.00 816416000.00 234282000.00 598746000.00 89261000.00 46981000.00 0001463172 ZENDESK, INC. 97 | 10-K USD 2020-01-31 283134000.00 572060000.00 0.00 622658000.00 115396000.00 330517000.00 151892000.00 38225000.00 0001585521 ZOOM VIDEO COMMUNICATIONS, INC. 98 | 10-K USD 2019-12-31 409949000.00 376995000.00 68154000.00 417910000.00 39151000.00 253570000.00 34192000.00 11453000.00 0001689923 ALTERYX, INC. 99 | 10-K USD 2020-01-31 1774700000.00 69000000.00 449700000.00 3274300000.00 324900000.00 2569800000.00 1415100000.00 53200000.00 0000769397 AUTODESK, INC. 100 | 10-K USD 2019-12-31 120232000.00 487515000.00 0.00 288976000.00 58975000.00 227788000.00 29724000.00 9692000.00 0001666134 BLACKLINE, INC. 101 | 10-K USD 2019-12-31 7741000.00 0.00 0.00 32767000.00 3661000.00 21491000.00 12728000.00 1698000.00 0001554859 SEMLER SCIENTIFIC, INC. 102 | 10-K USD 2019-12-31 22860000.00 126605000.00 0.00 82050000.00 13643000.00 50593000.00 -32846000.00 2739000.00 0001609550 INSPIRE MEDICAL SYSTEMS, INC. 103 | 10-K USD 2019-12-31 55046000.00 73243000.00 0.00 65207000.00 43127000.00 37774000.00 -18069000.00 8382000.00 0001527753 PERSONALIS, INC. 104 | 10-K USD 2019-12-31 52775000.00 0.00 12661000.00 534890000.00 214085000.00 522285000.00 134485000.00 27229000.00 0000353569 QUIDEL CORP /DE/ 105 | 20-F USD 2019-06-30 1268441000.00 445046000.00 853576000.00 1210127000.00 210285000.00 880978000.00 466342000.00 46302000.00 0001650372 ATLASSIAN CORP PLC 106 | 10-K USD 2019-09-30 7838000000.00 4236000000.00 0.00 22977000000.00 4165000000.00 20609000000.00 12784000000.00 756000000.00 0001403161 VISA INC. 107 | 10-K USD 2019-12-31 130876000.00 124112000.00 0.00 661058000.00 156180000.00 477294000.00 60205000.00 40604000.00 0001671933 TRADE DESK, INC. 108 | \. 109 | 110 | 111 | -- 112 | -- Name: company company_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres 113 | -- 114 | 115 | ALTER TABLE ONLY public.company 116 | ADD CONSTRAINT company_pkey PRIMARY KEY (central_index_key); 117 | 118 | 119 | -- 120 | -- PostgreSQL database dump complete 121 | -- 122 | 123 | -------------------------------------------------------------------------------- /Extract_financial_data_from_XBRL.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extract Financial Data From XBRL Instance Document\n", 8 | "\n", 9 | "I have been manually noting the financial data from some companies' annual reports into Google Sheets, then downloading it as CSV file for data analysis.\n", 10 | "\n", 11 | "I will try to automate the getting of financial data by extracting the financial data from XBRL instance documents.\n", 12 | "\n", 13 | "# XBRL Instance Documents From SEC Website\n", 14 | "\n", 15 | "The XBRL instance documents are data files of companies' annual reports. They can be downloaded from the [SEC](https://www.sec.gov/edgar/searchedgar/companysearch.html) website.\n", 16 | "\n", 17 | "# Create Functions For Extracting Data From XBRL Instance Document\n", 18 | "\n", 19 | "First, I will create the functions that I needed to do the extracting of data from XBRL instance document. It will extract the following data that I need for analysis:\n", 20 | "* Company name\n", 21 | "* Filing type\n", 22 | "* Period end date\n", 23 | "* Reporting currency\n", 24 | "* Cash and cash equivalents\n", 25 | "* Short-term investments\n", 26 | "* Current portion of debt/notes\n", 27 | "* Revenue\n", 28 | "* Cost of revenue\n", 29 | "* Revenue from one fiscal year ago\n", 30 | "* Operating cash flow\n", 31 | "* Capital expenditure" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 1, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import scipy.stats as stats\n", 41 | "import xml.etree.ElementTree as ET\n", 42 | "\n", 43 | "def get_context_id_and_currency(root, ns, tag_name, fy_ended):\n", 44 | " context_id_length = 0\n", 45 | " context_id_list = []\n", 46 | " end_date_list = []\n", 47 | " unit_id_list = []\n", 48 | " \n", 49 | " tags = root.findall(tag_name, ns)\n", 50 | " \n", 51 | " # get shortest context id length\n", 52 | " for tag in tags:\n", 53 | " context_id = tag.attrib['contextRef']\n", 54 | " \n", 55 | " if context_id_length == 0 \\\n", 56 | " or len(context_id) <= context_id_length:\n", 57 | " context_id_length = len(context_id)\n", 58 | " \n", 59 | " for tag in tags:\n", 60 | " context_id = tag.attrib['contextRef']\n", 61 | " \n", 62 | " # only process for the shortest context id as that should be overall id\n", 63 | " if len(context_id) == context_id_length:\n", 64 | " if len(root.findall(\"xbrli:context[@id='{}']//xbrli:instant\" \\\n", 65 | " .format(context_id), ns)) > 0:\n", 66 | " end_date = root.find(\"xbrli:context[@id='{}']//xbrli:instant\" \\\n", 67 | " .format(context_id), ns)\n", 68 | " else:\n", 69 | " end_date = root.find(\"xbrli:context[@id='{}']//xbrli:endDate\" \\\n", 70 | " .format(context_id), ns)\n", 71 | " \n", 72 | " end_date_list.append(end_date.text)\n", 73 | "\n", 74 | " if end_date.text == fy_ended:\n", 75 | " context_id_list.append(context_id)\n", 76 | " unit_id_list.append(tag.attrib['unitRef'])\n", 77 | " \n", 78 | " context_id_current_fy = stats.mode(context_id_list)[0][0]\n", 79 | " \n", 80 | " # get currency\n", 81 | " unit_id = stats.mode(unit_id_list)[0][0]\n", 82 | " measure = root.find(\"xbrli:unit[@id='{}']//xbrli:measure\".format(unit_id), ns)\n", 83 | " currency = measure.text.split(':')[1]\n", 84 | " \n", 85 | " # get context id for previous FY\n", 86 | " previous_fy_ended = sorted(set(end_date_list))[-2]\n", 87 | " \n", 88 | " context_id_list = []\n", 89 | " \n", 90 | " for tag in tags:\n", 91 | " context_id = tag.attrib['contextRef']\n", 92 | " \n", 93 | " # only process for the shortest context id as that should be overall id\n", 94 | " if len(context_id) == context_id_length:\n", 95 | " if(len(root.findall(\"xbrli:context[@id='{}']//xbrli:instant\" \\\n", 96 | " .format(context_id), ns)) > 0):\n", 97 | " end_date = root.find(\"xbrli:context[@id='{}']//xbrli:instant\" \\\n", 98 | " .format(context_id), ns)\n", 99 | " else:\n", 100 | " end_date = root.find(\"xbrli:context[@id='{}']//xbrli:endDate\" \\\n", 101 | " .format(context_id), ns)\n", 102 | " \n", 103 | " if end_date.text == previous_fy_ended:\n", 104 | " context_id_list.append(context_id)\n", 105 | " \n", 106 | " context_id_previous_fy = stats.mode(context_id_list)[0][0]\n", 107 | " \n", 108 | " return (context_id_current_fy, context_id_previous_fy, currency)\n", 109 | "\n", 110 | "\n", 111 | "\n", 112 | "def get_value(root, ns, tag_name, context_id):\n", 113 | " text_list = []\n", 114 | " \n", 115 | " elements = root.findall(\"{tag_name}[@contextRef='{context_id}']\" \\\n", 116 | " .format(tag_name=tag_name\n", 117 | " , context_id=context_id)\n", 118 | " , ns)\n", 119 | " \n", 120 | " for e in elements:\n", 121 | " text_list.append(e.text)\n", 122 | " \n", 123 | " # get value\n", 124 | " value = stats.mode(text_list)[0][0]\n", 125 | " \n", 126 | " return value \n", 127 | "\n", 128 | "\n", 129 | "\n", 130 | "import datetime as dt\n", 131 | "import pandas as pd\n", 132 | "import re\n", 133 | "\n", 134 | "def extract_data_from_XBRL(file_path):\n", 135 | " company = {}\n", 136 | " tree = ET.parse(file_path)\n", 137 | " root = tree.getroot()\n", 138 | " \n", 139 | " # get namespaces\n", 140 | " namespaces = []\n", 141 | " for key, value in ET.iterparse(file_path, ['start-ns']):\n", 142 | " namespaces.append(value)\n", 143 | "\n", 144 | " ns = dict(namespaces)\n", 145 | " \n", 146 | " # if xbrl instance namespace has no prefix, then set a prefix\n", 147 | " for k in ns:\n", 148 | " if re.search(r'^http://www\\.xbrl\\.org/\\d+/instance$', ns[k]) \\\n", 149 | " and len(k) == 0:\n", 150 | " ns['xbrli'] = ns[k]\n", 151 | " break;\n", 152 | "\n", 153 | " # get central index key\n", 154 | " central_index_key = root.find('dei:EntityCentralIndexKey', ns)\n", 155 | " \n", 156 | " if central_index_key is None:\n", 157 | " raise Exception('Central index key not found.')\n", 158 | " \n", 159 | " # get company name\n", 160 | " company_name = root.find('dei:EntityRegistrantName', ns)\n", 161 | " \n", 162 | " if company_name is None:\n", 163 | " raise Exception('Entity registrant name not found.')\n", 164 | " \n", 165 | " company['company_name'] = company_name.text.upper()\n", 166 | " \n", 167 | " # get document type\n", 168 | " document_type = root.find('dei:DocumentType', ns)\n", 169 | " \n", 170 | " if document_type is None:\n", 171 | " raise Exception('Document type not found.')\n", 172 | " \n", 173 | " company['filings'] = document_type.text\n", 174 | " \n", 175 | " # get period end date\n", 176 | " document_period_end_date = root.find('dei:DocumentPeriodEndDate', ns)\n", 177 | " \n", 178 | " if document_period_end_date is None:\n", 179 | " raise Exception('Document period end date not found.')\n", 180 | " \n", 181 | " company['fiscal_year_ended'] = dt.datetime.strptime(document_period_end_date.text\n", 182 | " , '%Y-%m-%d')\n", 183 | " \n", 184 | " # get namespace of financial data because not all are 'us-gaap'\n", 185 | " ns_fd = ''\n", 186 | " current_assets_tag_name = ''\n", 187 | " \n", 188 | " for child in root:\n", 189 | " namespace = child.tag.split('}')[0][1:]\n", 190 | " tag_name = child.tag.split('}')[1]\n", 191 | " \n", 192 | " if tag_name == 'AssetsCurrent' or tag_name == 'CurrentAssets':\n", 193 | " for k in ns:\n", 194 | " if ns[k] == namespace:\n", 195 | " ns_fd = k\n", 196 | " current_assets_tag_name = tag_name\n", 197 | "\n", 198 | " # get context id of balance sheet and balance sheet currency\n", 199 | " temp = get_context_id_and_currency(root, ns\n", 200 | " , ns_fd + ':' + current_assets_tag_name\n", 201 | " , document_period_end_date.text)\n", 202 | " bs_id_for_fy = temp[0]\n", 203 | " currency = temp[2]\n", 204 | " company['reporting_currency'] = currency\n", 205 | " \n", 206 | " # get current assets and current liabilities\n", 207 | " elements = root.findall(\"*[@contextRef='{context_id}']\" \\\n", 208 | " .format(context_id = bs_id_for_fy)\n", 209 | " , ns)\n", 210 | " \n", 211 | " reached_cash = False\n", 212 | " reached_total_current_assets = False\n", 213 | " reached_total_current_liabilities = False\n", 214 | " current_liabilities_started = False\n", 215 | " current_asset_list = ET.Element('CurrentAssetList')\n", 216 | " current_liability_list = ET.Element('CurrentLiabilityList')\n", 217 | " expected_bs_seqence = ['Cash', 'Total current assets', 'Total assets'\n", 218 | " , 'Total current liabilities']\n", 219 | " actual_bs_sequence = []\n", 220 | " \n", 221 | " for e in elements:\n", 222 | " tag_name = e.tag.split('}')[1]\n", 223 | " \n", 224 | " if tag_name == current_assets_tag_name:\n", 225 | " reached_total_current_assets = True\n", 226 | " actual_bs_sequence.append('Total current assets')\n", 227 | " elif tag_name.startswith('Cash') and \\\n", 228 | " not reached_total_current_assets:\n", 229 | " reached_cash = True\n", 230 | " actual_bs_sequence.append('Cash')\n", 231 | " elif tag_name == 'Assets':\n", 232 | " current_liabilities_started = True\n", 233 | " actual_bs_sequence.append('Total assets')\n", 234 | " continue\n", 235 | " elif tag_name == 'LiabilitiesCurrent':\n", 236 | " reached_total_current_liabilities = True\n", 237 | " actual_bs_sequence.append('Total current liabilities')\n", 238 | " \n", 239 | " if reached_cash and not reached_total_current_assets:\n", 240 | " current_asset_list.append(e)\n", 241 | " \n", 242 | " if current_liabilities_started \\\n", 243 | " and not reached_total_current_liabilities:\n", 244 | " current_liability_list.append(e)\n", 245 | " \n", 246 | " if reached_total_current_liabilities:\n", 247 | " break;\n", 248 | " \n", 249 | " if actual_bs_sequence != expected_bs_seqence:\n", 250 | " raise Exception('The balance sheet fields in file are not in the expected sequence.')\n", 251 | " \n", 252 | " # get cash and cash equivalents value\n", 253 | " # tag name may be 'CashAndCashEquivalentsAtCarryingValue'\n", 254 | " # or 'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'\n", 255 | " cash_and_equivalents = []\n", 256 | " tag_names = [ns_fd + ':CashAndCashEquivalentsAtCarryingValue'\n", 257 | " , ns_fd + ':CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents']\n", 258 | " \n", 259 | " if len(current_asset_list) > 0:\n", 260 | " for tag_name in tag_names:\n", 261 | " result = current_asset_list.find(tag_name, ns)\n", 262 | "\n", 263 | " if not(result is None):\n", 264 | " cash_and_equivalents.append(float(result.text))\n", 265 | " break\n", 266 | " else:\n", 267 | " for tag_name in tag_names:\n", 268 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 269 | " , bs_id_for_fy)\n", 270 | " , ns)) > 0:\n", 271 | " value = get_value(root, ns, tag_name, bs_id_for_fy)\n", 272 | " cash_and_equivalents.append(float(value))\n", 273 | " break\n", 274 | " \n", 275 | " if len(cash_and_equivalents) == 0:\n", 276 | " raise Exception('Cash and cash equivalents not found.')\n", 277 | " \n", 278 | " company['cash_and_equivalents'] = sum(cash_and_equivalents)\n", 279 | " \n", 280 | " # get short-term investments value, some company do not have short-term investment\n", 281 | " # tag name may be 'MarketableSecuritiesCurrent' \n", 282 | " # or 'AvailableForSaleSecuritiesDebtSecuritiesCurrent' \n", 283 | " # or 'AvailableForSaleSecuritiesCurrent'\n", 284 | " # or 'ShortTermInvestments' or 'HeldToMaturitySecuritiesCurrent'\n", 285 | " short_term_investments = []\n", 286 | " tag_names = [ns_fd + ':MarketableSecuritiesCurrent'\n", 287 | " , ns_fd + ':AvailableForSaleSecuritiesDebtSecuritiesCurrent'\n", 288 | " , ns_fd + ':AvailableForSaleSecuritiesCurrent'\n", 289 | " , ns_fd + ':ShortTermInvestments'\n", 290 | " , ns_fd + ':HeldToMaturitySecuritiesCurrent']\n", 291 | "\n", 292 | " if len(current_asset_list) > 0:\n", 293 | " for tag_name in tag_names:\n", 294 | " result = current_asset_list.find(tag_name, ns)\n", 295 | "\n", 296 | " if not(result is None):\n", 297 | " short_term_investments.append(float(result.text))\n", 298 | " else:\n", 299 | " for tag_name in tag_names:\n", 300 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 301 | " , bs_id_for_fy)\n", 302 | " , ns)) > 0:\n", 303 | " value = get_value(root, ns, tag_name, bs_id_for_fy)\n", 304 | " short_term_investments.append(float(value))\n", 305 | " \n", 306 | " company['short_term_investments'] = sum(short_term_investments)\n", 307 | " \n", 308 | " # get current debt value, some company do not have current debt\n", 309 | " # tag name may be 'NotesPayableCurrent' or 'ConvertibleDebtCurrent'\n", 310 | " # or 'ConvertibleNotesPayableCurrent' or 'LongTermDebtCurrent'\n", 311 | " # or 'LongTermDebtAndCapitalLeaseObligationsCurrent'\n", 312 | " # or 'LoansPayableToBankCurrent'\n", 313 | " current_debt_items = []\n", 314 | " tag_names = [ns_fd + ':NotesPayableCurrent'\n", 315 | " , ns_fd + ':ConvertibleDebtCurrent'\n", 316 | " , ns_fd + ':ConvertibleNotesPayableCurrent'\n", 317 | " , ns_fd + ':LongTermDebtCurrent'\n", 318 | " , ns_fd + ':LongTermDebtAndCapitalLeaseObligationsCurrent'\n", 319 | " , ns_fd + ':LoansPayableToBankCurrent'\n", 320 | " , ns_fd + ':DebtCurrent'\n", 321 | " , ns_fd + ':LinesOfCreditCurrent']\n", 322 | " \n", 323 | " if len(current_liability_list) > 0:\n", 324 | " for tag_name in tag_names:\n", 325 | " result = current_liability_list.find(tag_name, ns)\n", 326 | "\n", 327 | " if not(result is None):\n", 328 | " current_debt_items.append(float(result.text))\n", 329 | " else:\n", 330 | " for tag_name in tag_names:\n", 331 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 332 | " , bs_id_for_fy)\n", 333 | " , ns)) > 0:\n", 334 | " value = get_value(root, ns, tag_name, bs_id_for_fy)\n", 335 | " current_debt_items.append(float(value))\n", 336 | "\n", 337 | " company['current_debt'] = sum(current_debt_items)\n", 338 | " \n", 339 | " \n", 340 | " # get context id of cash flow and income statement for current and previous FY\n", 341 | " temp = get_context_id_and_currency(root, ns\n", 342 | " , ns_fd + ':NetCashProvidedByUsedInOperatingActivities'\n", 343 | " , document_period_end_date.text)\n", 344 | "\n", 345 | " cf_in_id_curr_fy = temp[0]\n", 346 | " cf_in_id_prev_fy = temp[1]\n", 347 | " currency = temp[2]\n", 348 | " \n", 349 | " # if currency from income statement is different from balance sheet\n", 350 | " if currency != company['reporting_currency']:\n", 351 | " raise Exception('Currency ' + currency +' from income statement is ' \n", 352 | " + 'different from currency ' + company['reporting_currency'] \n", 353 | " + ' from balance sheet.')\n", 354 | "\n", 355 | " # get cash flows from investing activities and income statement\n", 356 | " elements = root.findall(\"*[@contextRef='{context_id}']\" \\\n", 357 | " .format(context_id = cf_in_id_curr_fy)\n", 358 | " , ns)\n", 359 | " \n", 360 | " investing_cash_flow_started = False\n", 361 | " end_of_investing_cash_flow = False\n", 362 | " reached_revenue = False\n", 363 | " reached_operating_incomeloss = False\n", 364 | " expected_is_seqence = ['Total revenues', 'Operating income/loss']\n", 365 | " actual_is_sequence = []\n", 366 | " expected_cfs_seqence = ['Net cash provided by operating activities'\n", 367 | " , 'Net cash used in investing activities']\n", 368 | " actual_cfs_sequence = []\n", 369 | " \n", 370 | " cf_investing_activity_list = ET.Element('CashFlowInvestingActivities')\n", 371 | " income_statement_items = ET.Element('IncomeStatementItems')\n", 372 | " \n", 373 | " for e in elements:\n", 374 | " tag_name = e.tag.split('}')[1]\n", 375 | " \n", 376 | " if tag_name == 'NetCashProvidedByUsedInOperatingActivities':\n", 377 | " investing_cash_flow_started = True\n", 378 | " actual_cfs_sequence.append('Net cash provided by operating activities')\n", 379 | " continue\n", 380 | " elif tag_name == 'NetCashProvidedByUsedInInvestingActivities':\n", 381 | " end_of_investing_cash_flow = True\n", 382 | " actual_cfs_sequence.append('Net cash used in investing activities')\n", 383 | " elif (tag_name.startswith('Revenues') \\\n", 384 | " or tag_name.startswith('RevenueFromContractWithCustomer')) \\\n", 385 | " and not reached_operating_incomeloss:\n", 386 | " reached_revenue = True\n", 387 | " actual_is_sequence.append('Total revenues')\n", 388 | " elif tag_name == 'OperatingIncomeLoss':\n", 389 | " reached_operating_incomeloss = True\n", 390 | " actual_is_sequence.append('Operating income/loss')\n", 391 | " \n", 392 | " if investing_cash_flow_started \\\n", 393 | " and not end_of_investing_cash_flow:\n", 394 | " cf_investing_activity_list.append(e)\n", 395 | " \n", 396 | " if reached_revenue and not reached_operating_incomeloss:\n", 397 | " income_statement_items.append(e)\n", 398 | " \n", 399 | " if end_of_investing_cash_flow:\n", 400 | " break;\n", 401 | " \n", 402 | " if actual_is_sequence != expected_is_seqence:\n", 403 | " raise Exception('The income statement fields in file are not in the expected sequence')\n", 404 | " \n", 405 | " if actual_cfs_sequence != expected_cfs_seqence:\n", 406 | " raise Exception('The cash flow statement fields in file are not in the expected sequence.')\n", 407 | " \n", 408 | " # get revenue value\n", 409 | " # tag name may be 'RevenueFromContractWithCustomerExcludingAssessedTax'\n", 410 | " # or 'RevenueFromContractWithCustomerIncludingAssessedTax'\n", 411 | " # or 'Revenues'\n", 412 | " revenue_items = []\n", 413 | " tag_names = [ns_fd + ':RevenueFromContractWithCustomerExcludingAssessedTax'\n", 414 | " , ns_fd + ':RevenueFromContractWithCustomerIncludingAssessedTax'\n", 415 | " , ns_fd + ':Revenues']\n", 416 | " rev_tag_name = ''\n", 417 | " \n", 418 | " if len(income_statement_items) > 0:\n", 419 | " for tag_name in tag_names:\n", 420 | " result = income_statement_items.find(tag_name, ns)\n", 421 | "\n", 422 | " if not(result is None):\n", 423 | " rev_tag_name = tag_name\n", 424 | " revenue_items.append(float(result.text))\n", 425 | " break\n", 426 | " else:\n", 427 | " for tag_name in tag_names:\n", 428 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 429 | " , cf_in_id_curr_fy)\n", 430 | " , ns)) > 0:\n", 431 | " value = get_value(root, ns, tag_name, cf_in_id_curr_fy)\n", 432 | " revenue_items.append(float(value))\n", 433 | " rev_tag_name = tag_name\n", 434 | " break\n", 435 | " \n", 436 | " if len(revenue_items) == 0:\n", 437 | " raise Exception('Revenue not found.')\n", 438 | " \n", 439 | " company['revenue'] = sum(revenue_items)\n", 440 | " \n", 441 | " \n", 442 | " # get cost of revenue value\n", 443 | " # tag name may be 'CostOfRevenue' or 'CostOfGoodsAndServicesSold'\n", 444 | " cost_of_revenue_items = []\n", 445 | " tag_names = [ns_fd + ':CostOfRevenue'\n", 446 | " , ns_fd + ':CostOfGoodsAndServicesSold']\n", 447 | " \n", 448 | " if len(income_statement_items) > 0:\n", 449 | " for tag_name in tag_names:\n", 450 | " result = income_statement_items.find(tag_name, ns)\n", 451 | "\n", 452 | " if not(result is None):\n", 453 | " cost_of_revenue_items.append(float(result.text))\n", 454 | " break\n", 455 | " else:\n", 456 | " for tag_name in tag_names:\n", 457 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 458 | " , cf_in_id_curr_fy)\n", 459 | " , ns)) > 0:\n", 460 | " value = get_value(root, ns, tag_name, cf_in_id_curr_fy)\n", 461 | " cost_of_revenue_items.append(float(value))\n", 462 | " break\n", 463 | " \n", 464 | " if len(cost_of_revenue_items) == 0:\n", 465 | " raise Exception('Cost of revenue not found.')\n", 466 | " \n", 467 | " company['cost_of_revenue'] = sum(cost_of_revenue_items)\n", 468 | " \n", 469 | " # get revenue value of previous FY\n", 470 | " value = get_value(root, ns, rev_tag_name, cf_in_id_prev_fy)\n", 471 | " \n", 472 | " company['revenue_1_fy_ago'] = float(value)\n", 473 | " \n", 474 | " # get operating cash flow value\n", 475 | " value = get_value(root, ns\n", 476 | " , ns_fd + ':NetCashProvidedByUsedInOperatingActivities'\n", 477 | " , cf_in_id_curr_fy)\n", 478 | " \n", 479 | " company['operating_cash_flow'] = float(value)\n", 480 | " \n", 481 | " # get capital expenditure value\n", 482 | " # tag name may be 'PaymentsForCapitalImprovements' \n", 483 | " # or 'PaymentsToAcquirePropertyPlantAndEquipment'\n", 484 | " # or 'PaymentsToDevelopSoftware' or 'PaymentsToAcquireProductiveAssets'\n", 485 | " # or 'PaymentsForSoftware' or 'PaymentsToAcquireIntangibleAssets'\n", 486 | " # or 'PaymentsToAcquireSoftware' or 'PaymentsToAcquireEquipmentOnLease'\n", 487 | " tag_names = [ns_fd + ':PaymentsForCapitalImprovements'\n", 488 | " , ns_fd + ':PaymentsToAcquirePropertyPlantAndEquipment'\n", 489 | " , ns_fd + ':PaymentsToDevelopSoftware'\n", 490 | " , ns_fd + ':PaymentsToAcquireProductiveAssets'\n", 491 | " , ns_fd + ':PaymentsForSoftware'\n", 492 | " , ns_fd + ':PaymentsToAcquireIntangibleAssets'\n", 493 | " , ns_fd + ':PaymentsToAcquireSoftware'\n", 494 | " , ns_fd + ':PaymentsToAcquireEquipmentOnLease']\n", 495 | " cap_ex_items = []\n", 496 | " \n", 497 | " if len(cf_investing_activity_list) > 0:\n", 498 | " for tag_name in tag_names:\n", 499 | " result = cf_investing_activity_list.find(tag_name, ns)\n", 500 | "\n", 501 | " if not(result is None):\n", 502 | " cap_ex_items.append(float(result.text))\n", 503 | " else:\n", 504 | " for tag_name in tag_names:\n", 505 | " if len(root.findall(\"{}[@contextRef='{}']\".format(tag_name\n", 506 | " , cf_in_id_curr_fy)\n", 507 | " , ns)) > 0:\n", 508 | " value = get_value(root, ns, tag_name, cf_in_id_curr_fy)\n", 509 | " cap_ex_items.append(float(value))\n", 510 | " \n", 511 | " if len(cap_ex_items) == 0:\n", 512 | " raise Exception('Capital expenditure not found.')\n", 513 | " \n", 514 | " company['capital_expenditure'] = sum(cap_ex_items)\n", 515 | " \n", 516 | " return pd.DataFrame(company, index=[central_index_key.text])\n" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "# Process XBRL Instance Documents\n", 524 | "\n", 525 | "Now that I have created the functions, I will process each XBRL instance document by passing their file paths as input to the function." 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 2, 531 | "metadata": { 532 | "scrolled": true 533 | }, 534 | "outputs": [ 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "Processing \"adsk-20210131_htm.xml\" ... completed.\n", 540 | "Processing \"ayx-20201231_htm.xml\" ... completed.\n", 541 | "Processing \"bl-20201231_htm.xml\" ... completed.\n", 542 | "Processing \"brhc10022673_20f_htm.xml\" ... completed.\n", 543 | "Processing \"bynd-20201231_htm.xml\" ... completed.\n", 544 | "Processing \"cdna-20201231_htm.xml\" ... ERROR OCCURRED: Cost of revenue not found.\n", 545 | "Processing \"cloud-20201231_htm.xml\" ... completed.\n", 546 | "Processing \"coup-20210131_htm.xml\" ... completed.\n", 547 | "Processing \"crm-20210131_htm.xml\" ... completed.\n", 548 | "Processing \"crwd-20210131_htm.xml\" ... completed.\n", 549 | "Processing \"d105808d10k_htm.xml\" ... completed.\n", 550 | "Processing \"ddog-20201231.xml\" ... completed.\n", 551 | "Processing \"docu-20210131_htm.xml\" ... completed.\n", 552 | "Processing \"exas-20201231_htm.xml\" ... completed.\n", 553 | "Processing \"fivn-20201231_htm.xml\" ... completed.\n", 554 | "Processing \"frpt20201231b_10k_htm.xml\" ... completed.\n", 555 | "Processing \"fsly-20201231_htm.xml\" ... completed.\n", 556 | "Processing \"gh-20201231_htm.xml\" ... ERROR OCCURRED: Cost of revenue not found.\n", 557 | "Processing \"hubs-10k_20201231_htm.xml\" ... completed.\n", 558 | "Processing \"insp-20201231_htm.xml\" ... completed.\n", 559 | "Processing \"isrg-20201231_htm.xml\" ... completed.\n", 560 | "Processing \"mdb-20210131_htm.xml\" ... completed.\n", 561 | "Processing \"meli-20201231x10k_htm.xml\" ... completed.\n", 562 | "Processing \"nvcr-20201231_htm.xml\" ... completed.\n", 563 | "Processing \"okta-20210131_htm.xml\" ... completed.\n", 564 | "Processing \"pd-20210131_htm.xml\" ... completed.\n", 565 | "Processing \"pins-20201231_htm.xml\" ... completed.\n", 566 | "Processing \"psnl-20201231.xml\" ... completed.\n", 567 | "Processing \"roku-10k_20201231_htm.xml\" ... completed.\n", 568 | "Processing \"sgen-20201231_htm.xml\" ... completed.\n", 569 | "Processing \"shop-20201231_htm.xml\" ... completed.\n", 570 | "Processing \"silk-20201231_htm.xml\" ... completed.\n", 571 | "Processing \"smar-20200131_htm.xml\" ... ERROR OCCURRED: ':'\n", 572 | "Processing \"smlr-20201231x10k_htm.xml\" ... completed.\n", 573 | "Processing \"swav-20201231.xml\" ... completed.\n", 574 | "Processing \"tdoc-20201231x10k_htm.xml\" ... completed.\n", 575 | "Processing \"team-20200630_htm.xml\" ... ERROR OCCURRED: The balance sheet fields in file are not in the expected sequence.\n", 576 | "Processing \"tmdx-20201231.xml\" ... ERROR OCCURRED: The income statement fields in file are not in the expected sequence\n", 577 | "Processing \"ttd-10k_20201231_htm.xml\" ... completed.\n", 578 | "Processing \"twlo-20201231_htm.xml\" ... completed.\n", 579 | "Processing \"unity-20201231_htm.xml\" ... completed.\n", 580 | "Processing \"veev-20210131_htm.xml\" ... completed.\n", 581 | "Processing \"vrtx-20201231_htm.xml\" ... completed.\n", 582 | "Processing \"work-20210131_htm.xml\" ... completed.\n", 583 | "Processing \"zen-20201231_htm.xml\" ... completed.\n", 584 | "Processing \"zm-20210131_htm.xml\" ... completed.\n", 585 | "Processing \"zs-20200731_htm.xml\" ... completed.\n", 586 | "\n", 587 | "Out of the 47 documents, 5 encountered error.\n" 588 | ] 589 | } 590 | ], 591 | "source": [ 592 | "import glob\n", 593 | "import pathlib as pl\n", 594 | "import os\n", 595 | "import configparser\n", 596 | "\n", 597 | "# get configuration from config file\n", 598 | "config = configparser.ConfigParser()\n", 599 | "config.read('config.ini')\n", 600 | "#config.read(os.path.dirname(__file__) + '/config.ini') # use this line when running from cron job\n", 601 | "uid = config['Sql']['uid']\n", 602 | "pwd = config['Sql']['pwd']\n", 603 | "host = config['Sql']['host']\n", 604 | "port = config['Sql']['port']\n", 605 | "dir_data_file = config['Path']['dir_data_file']\n", 606 | "\n", 607 | "company_df_list = []\n", 608 | "error_count = 0\n", 609 | "dir_completed = dir_data_file + '/completed'\n", 610 | "dir_error = dir_data_file + '/error'\n", 611 | "\n", 612 | "# create directory if not exist\n", 613 | "pl.Path(dir_completed).mkdir(exist_ok=True)\n", 614 | "pl.Path(dir_error).mkdir(exist_ok=True)\n", 615 | "\n", 616 | "# process each file\n", 617 | "file_paths = glob.glob(dir_data_file + '/*.xml')\n", 618 | "\n", 619 | "for file_path in sorted(file_paths):\n", 620 | " file_name = file_path.split('/')[-1]\n", 621 | " print('Processing \"{}\" ... '.format(file_name), end='')\n", 622 | " \n", 623 | " try:\n", 624 | " df = extract_data_from_XBRL(file_path)\n", 625 | " company_df_list.append(df)\n", 626 | " os.replace(dir_data_file + '/' + file_name, dir_completed + '/' + file_name)\n", 627 | " \n", 628 | " print('completed.')\n", 629 | " except Exception as e:\n", 630 | " print('ERROR OCCURRED:',str(e))\n", 631 | " error_count += 1\n", 632 | " os.replace(dir_data_file + '/' + file_name, dir_error + '/' + file_name)\n", 633 | " \n", 634 | "\n", 635 | "print()\n", 636 | "print('Out of the {} documents, {} encountered error.'.format(len(file_paths)\n", 637 | " , error_count))\n", 638 | "\n", 639 | "# combined the list of dataframes into a single dataframe\n", 640 | "if len(company_df_list) > 0:\n", 641 | " companies_financials = pd.concat(company_df_list).sort_values('company_name')" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": {}, 647 | "source": [ 648 | "I noticed there are `ERROR OCCURRED` for some of the documents. After investigation, below are the causes that I have found:\n", 649 | "* `Cost of revenue not found`: There is no cost of revenue section at the income statement.\n", 650 | "* `The balance sheet fields in file are not in the expected sequence`: The balance sheet fields in file are not in the expected sequence.\n", 651 | "\n", 652 | "I will have to manually note the financial data for these companies.\n", 653 | "\n", 654 | "# Display Extracted Data For Verification\n", 655 | "\n", 656 | "I will display the extracted data to verify that the correct data are extracted." 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": 3, 662 | "metadata": {}, 663 | "outputs": [ 664 | { 665 | "data": { 666 | "text/html": [ 667 | "
\n", 668 | "\n", 681 | "\n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | "
company_namefilingsfiscal_year_endedreporting_currencycash_and_equivalentsshort_term_investmentscurrent_debtrevenuecost_of_revenuerevenue_1_fy_agooperating_cash_flowcapital_expenditure
0001689923ALTERYX, INC.10-K2020-12-31USD171,891,000.00584,445,000.0072,619,000.00495,308,000.0043,839,000.00417,910,000.0074,782,000.0026,358,000.00
0000769397AUTODESK, INC.10-K2021-01-31USD1,772,200,000.0085,000,000.000.003,790,400,000.00337,100,000.003,274,300,000.001,437,200,000.0095,900,000.00
0001655210BEYOND MEAT, INC.10-K2020-12-31USD159,127,000.000.0025,000,000.00406,785,000.00284,510,000.00297,897,000.00-39,995,000.0057,696,000.00
0001666134BLACKLINE, INC.10-K2020-12-31USD367,413,000.00175,206,000.000.00351,737,000.0068,972,000.00288,976,000.0054,735,000.0019,424,000.00
0001477333CLOUDFLARE, INC.10-K2020-12-31USD108,895,000.00923,201,000.000.00431,059,000.00101,055,000.00287,022,000.00-17,129,000.0074,962,000.00
0001385867COUPA SOFTWARE INC10-K2021-01-31USD323,284,000.00283,036,000.00609,068,000.00541,643,000.00221,701,000.00389,719,000.0078,202,000.0011,492,000.00
0001535527CROWDSTRIKE HOLDINGS, INC.10-K2021-01-31USD1,918,608,000.000.000.00874,438,000.00229,545,000.00481,413,000.00356,566,000.0063,843,000.00
0001561550DATADOG, INC.10-K2020-12-31USD224,927,000.001,292,532,000.000.00603,466,000.00130,197,000.00362,780,000.00109,091,000.0025,883,000.00
0001261333DOCUSIGN, INC.10-K2021-01-31USD566,055,000.00207,450,000.0020,469,000.001,453,047,000.00364,058,000.00973,971,000.00296,954,000.0082,395,000.00
0001124140EXACT SCIENCES CORPORATION10-K2020-12-31USD1,491,288,000.00348,699,000.00255,464,000.001,491,391,000.00354,324,000.00876,293,000.00136,482,000.0064,352,000.00
0001517413FASTLY, INC.10-K2020-12-31USD62,900,000.00131,283,000.000.00290,874,000.00120,007,000.00200,462,000.00-19,916,000.0037,511,000.00
0001288847FIVE9, INC.10-K2020-12-31USD220,372,000.00383,171,000.000.00434,908,000.00180,284,000.00328,006,000.0067,302,000.0030,422,000.00
0001611647FRESHPET INC10-K2020-12-31USD67,247,000.000.000.00318,790,000.00185,880,000.00245,862,000.0021,193,000.00134,568,000.00
0001404655HUBSPOT, INC.10-K2020-12-31USD378,123,000.00873,073,000.007,837,000.00883,026,000.00166,959,000.00674,860,000.0088,913,000.0058,873,000.00
0001609550INSPIRE MEDICAL SYSTEMS, INC.10-K2020-12-31USD190,518,000.0043,844,000.000.00115,381,000.0017,623,000.0082,050,000.00-53,045,000.002,455,000.00
0001035267INTUITIVE SURGICAL, INC.10-K2020-12-31USD1,622,600,000.003,488,800,000.000.004,358,400,000.001,497,200,000.004,478,500,000.001,484,800,000.00341,500,000.00
0001099590MERCADOLIBRE, INC.10-K2020-12-31USD1,856,394,000.001,241,306,000.000.003,973,465,000.002,264,255,000.002,296,314,000.001,182,552,000.00247,141,000.00
0001441816MONGODB, INC.10-K2021-01-31USD429,697,000.00528,045,000.000.00590,380,000.00177,076,000.00421,720,000.00-42,673,000.0011,773,000.00
0001645113NOVOCURE LIMITED10-K2020-12-31USD234,674,000.00607,902,000.000.00494,366,000.00106,501,000.00351,318,000.0099,148,000.0014,968,000.00
0001660134OKTA, INC.10-K2021-01-31USD434,607,000.002,121,584,000.00908,684,000.00835,424,000.00217,681,000.00586,067,000.00127,962,000.0017,368,000.00
0001568100PAGERDUTY, INC.10-K2021-01-31USD339,166,000.00221,112,000.000.00213,556,000.0030,686,000.00166,351,000.0010,095,000.004,848,000.00
0001527753PERSONALIS, INC.10-K2020-12-31USD68,525,000.00134,765,000.000.0078,648,000.0058,534,000.0065,207,000.00-42,653,000.003,246,000.00
0001506293PINTEREST, INC.10-K2020-12-31USD669,230,000.001,091,076,000.000.001,692,658,000.00449,358,000.001,142,761,000.0028,826,000.0017,401,000.00
0000730272REPLIGEN CORP10-K2020-12-31USD717,292,000.000.00243,737,000.00366,260,000.00156,634,000.00270,245,000.0062,625,000.0026,344,000.00
0001428439ROKU, INC10-K2020-12-31USD1,092,815,000.000.004,874,000.001,778,388,000.00970,169,000.001,128,921,000.00148,192,000.0082,382,000.00
0001108524SALESFORCE.COM, INC.10-K2021-01-31USD6,195,000,000.005,771,000,000.000.0021,252,000,000.005,438,000,000.0017,098,000,000.004,801,000,000.00710,000,000.00
0001703399SEA LIMITED20-F2020-12-31USD6,166,880,000.00126,099,000.000.004,375,664,000.003,026,759,000.002,175,378,000.00555,868,000.00357,054,000.00
0001060736SEAGEN INC.10-K2020-12-31USD558,424,000.002,000,996,000.000.002,175,536,000.00217,720,000.00916,713,000.00856,568,000.0082,409,000.00
0001554859SEMLER SCIENTIFIC, INC.10-K2020-12-31USD22,079,000.000.000.0038,603,000.003,356,000.0032,767,000.0015,417,000.001,061,000.00
0001642545SHOCKWAVE MEDICAL, INC.10-K2020-12-31USD50,423,000.00151,931,000.003,300,000.0067,789,000.0020,991,000.0042,927,000.00-71,184,000.0011,520,000.00
0001594805SHOPIFY INC.40-F2020-12-31USD2,703,597,000.003,684,370,000.000.002,929,491,000.001,387,971,000.001,578,173,000.00424,958,000.0041,995,000.00
0001397702SILK ROAD MEDICAL, INC.10-K2020-12-31USD69,466,000.0078,016,000.000.0075,227,000.0021,291,000.0063,354,000.00-42,068,000.00842,000.00
0001764925SLACK TECHNOLOGIES, INC.10-K2021-01-31USD1,081,357,000.00505,895,000.000.00902,610,000.00121,692,000.00630,422,000.0072,439,000.0012,607,000.00
0001477449TELADOC HEALTH, INC.10-K2020-12-31USD733,324,000.0053,245,000.0042,560,000.001,093,962,000.00390,829,000.00553,307,000.00-53,511,000.0026,042,000.00
0001671933TRADE DESK, INC.10-K2020-12-31USD437,353,000.00186,685,000.000.00836,033,000.00178,812,000.00661,058,000.00405,069,000.0080,114,000.00
0001447669TWILIO INC.10-K2020-12-31USD933,885,000.002,105,906,000.000.001,761,776,000.00846,115,000.001,134,468,000.0032,654,000.0059,133,000.00
0001810806UNITY SOFTWARE INC.10-K2020-12-31USD1,272,578,000.000.000.00772,445,000.00172,347,000.00541,779,000.0019,913,000.0040,906,000.00
0001393052VEEVA SYSTEMS INC.10-K2021-01-31USD730,504,000.00933,122,000.000.001,465,069,000.00408,928,000.001,104,081,000.00551,246,000.008,683,000.00
0000875320VERTEX PHARMACEUTICALS INC / MA10-K2020-12-31USD5,988,187,000.00670,710,000.000.006,205,683,000.00736,300,000.004,162,821,000.003,253,505,000.00259,798,000.00
0001463172ZENDESK, INC.10-K2020-12-31USD405,430,000.00565,593,000.00132,388,000.001,029,564,000.00251,255,000.00816,416,000.0026,428,000.0038,523,000.00
0001585521ZOOM VIDEO COMMUNICATIONS, INC.10-K2021-01-31USD2,240,303,000.002,004,410,000.000.002,651,368,000.00821,989,000.00622,658,000.001,471,177,000.0085,815,000.00
0001713683ZSCALER, INC.10-K2020-07-31USD141,851,000.001,228,722,000.000.00431,269,000.0095,733,000.00302,836,000.0079,317,000.0051,809,000.00
\n", 1332 | "
" 1333 | ], 1334 | "text/plain": [ 1335 | " company_name filings fiscal_year_ended \\\n", 1336 | "0001689923 ALTERYX, INC. 10-K 2020-12-31 \n", 1337 | "0000769397 AUTODESK, INC. 10-K 2021-01-31 \n", 1338 | "0001655210 BEYOND MEAT, INC. 10-K 2020-12-31 \n", 1339 | "0001666134 BLACKLINE, INC. 10-K 2020-12-31 \n", 1340 | "0001477333 CLOUDFLARE, INC. 10-K 2020-12-31 \n", 1341 | "0001385867 COUPA SOFTWARE INC 10-K 2021-01-31 \n", 1342 | "0001535527 CROWDSTRIKE HOLDINGS, INC. 10-K 2021-01-31 \n", 1343 | "0001561550 DATADOG, INC. 10-K 2020-12-31 \n", 1344 | "0001261333 DOCUSIGN, INC. 10-K 2021-01-31 \n", 1345 | "0001124140 EXACT SCIENCES CORPORATION 10-K 2020-12-31 \n", 1346 | "0001517413 FASTLY, INC. 10-K 2020-12-31 \n", 1347 | "0001288847 FIVE9, INC. 10-K 2020-12-31 \n", 1348 | "0001611647 FRESHPET INC 10-K 2020-12-31 \n", 1349 | "0001404655 HUBSPOT, INC. 10-K 2020-12-31 \n", 1350 | "0001609550 INSPIRE MEDICAL SYSTEMS, INC. 10-K 2020-12-31 \n", 1351 | "0001035267 INTUITIVE SURGICAL, INC. 10-K 2020-12-31 \n", 1352 | "0001099590 MERCADOLIBRE, INC. 10-K 2020-12-31 \n", 1353 | "0001441816 MONGODB, INC. 10-K 2021-01-31 \n", 1354 | "0001645113 NOVOCURE LIMITED 10-K 2020-12-31 \n", 1355 | "0001660134 OKTA, INC. 10-K 2021-01-31 \n", 1356 | "0001568100 PAGERDUTY, INC. 10-K 2021-01-31 \n", 1357 | "0001527753 PERSONALIS, INC. 10-K 2020-12-31 \n", 1358 | "0001506293 PINTEREST, INC. 10-K 2020-12-31 \n", 1359 | "0000730272 REPLIGEN CORP 10-K 2020-12-31 \n", 1360 | "0001428439 ROKU, INC 10-K 2020-12-31 \n", 1361 | "0001108524 SALESFORCE.COM, INC. 10-K 2021-01-31 \n", 1362 | "0001703399 SEA LIMITED 20-F 2020-12-31 \n", 1363 | "0001060736 SEAGEN INC. 10-K 2020-12-31 \n", 1364 | "0001554859 SEMLER SCIENTIFIC, INC. 10-K 2020-12-31 \n", 1365 | "0001642545 SHOCKWAVE MEDICAL, INC. 10-K 2020-12-31 \n", 1366 | "0001594805 SHOPIFY INC. 40-F 2020-12-31 \n", 1367 | "0001397702 SILK ROAD MEDICAL, INC. 10-K 2020-12-31 \n", 1368 | "0001764925 SLACK TECHNOLOGIES, INC. 10-K 2021-01-31 \n", 1369 | "0001477449 TELADOC HEALTH, INC. 10-K 2020-12-31 \n", 1370 | "0001671933 TRADE DESK, INC. 10-K 2020-12-31 \n", 1371 | "0001447669 TWILIO INC. 10-K 2020-12-31 \n", 1372 | "0001810806 UNITY SOFTWARE INC. 10-K 2020-12-31 \n", 1373 | "0001393052 VEEVA SYSTEMS INC. 10-K 2021-01-31 \n", 1374 | "0000875320 VERTEX PHARMACEUTICALS INC / MA 10-K 2020-12-31 \n", 1375 | "0001463172 ZENDESK, INC. 10-K 2020-12-31 \n", 1376 | "0001585521 ZOOM VIDEO COMMUNICATIONS, INC. 10-K 2021-01-31 \n", 1377 | "0001713683 ZSCALER, INC. 10-K 2020-07-31 \n", 1378 | "\n", 1379 | " reporting_currency cash_and_equivalents short_term_investments \\\n", 1380 | "0001689923 USD 171,891,000.00 584,445,000.00 \n", 1381 | "0000769397 USD 1,772,200,000.00 85,000,000.00 \n", 1382 | "0001655210 USD 159,127,000.00 0.00 \n", 1383 | "0001666134 USD 367,413,000.00 175,206,000.00 \n", 1384 | "0001477333 USD 108,895,000.00 923,201,000.00 \n", 1385 | "0001385867 USD 323,284,000.00 283,036,000.00 \n", 1386 | "0001535527 USD 1,918,608,000.00 0.00 \n", 1387 | "0001561550 USD 224,927,000.00 1,292,532,000.00 \n", 1388 | "0001261333 USD 566,055,000.00 207,450,000.00 \n", 1389 | "0001124140 USD 1,491,288,000.00 348,699,000.00 \n", 1390 | "0001517413 USD 62,900,000.00 131,283,000.00 \n", 1391 | "0001288847 USD 220,372,000.00 383,171,000.00 \n", 1392 | "0001611647 USD 67,247,000.00 0.00 \n", 1393 | "0001404655 USD 378,123,000.00 873,073,000.00 \n", 1394 | "0001609550 USD 190,518,000.00 43,844,000.00 \n", 1395 | "0001035267 USD 1,622,600,000.00 3,488,800,000.00 \n", 1396 | "0001099590 USD 1,856,394,000.00 1,241,306,000.00 \n", 1397 | "0001441816 USD 429,697,000.00 528,045,000.00 \n", 1398 | "0001645113 USD 234,674,000.00 607,902,000.00 \n", 1399 | "0001660134 USD 434,607,000.00 2,121,584,000.00 \n", 1400 | "0001568100 USD 339,166,000.00 221,112,000.00 \n", 1401 | "0001527753 USD 68,525,000.00 134,765,000.00 \n", 1402 | "0001506293 USD 669,230,000.00 1,091,076,000.00 \n", 1403 | "0000730272 USD 717,292,000.00 0.00 \n", 1404 | "0001428439 USD 1,092,815,000.00 0.00 \n", 1405 | "0001108524 USD 6,195,000,000.00 5,771,000,000.00 \n", 1406 | "0001703399 USD 6,166,880,000.00 126,099,000.00 \n", 1407 | "0001060736 USD 558,424,000.00 2,000,996,000.00 \n", 1408 | "0001554859 USD 22,079,000.00 0.00 \n", 1409 | "0001642545 USD 50,423,000.00 151,931,000.00 \n", 1410 | "0001594805 USD 2,703,597,000.00 3,684,370,000.00 \n", 1411 | "0001397702 USD 69,466,000.00 78,016,000.00 \n", 1412 | "0001764925 USD 1,081,357,000.00 505,895,000.00 \n", 1413 | "0001477449 USD 733,324,000.00 53,245,000.00 \n", 1414 | "0001671933 USD 437,353,000.00 186,685,000.00 \n", 1415 | "0001447669 USD 933,885,000.00 2,105,906,000.00 \n", 1416 | "0001810806 USD 1,272,578,000.00 0.00 \n", 1417 | "0001393052 USD 730,504,000.00 933,122,000.00 \n", 1418 | "0000875320 USD 5,988,187,000.00 670,710,000.00 \n", 1419 | "0001463172 USD 405,430,000.00 565,593,000.00 \n", 1420 | "0001585521 USD 2,240,303,000.00 2,004,410,000.00 \n", 1421 | "0001713683 USD 141,851,000.00 1,228,722,000.00 \n", 1422 | "\n", 1423 | " current_debt revenue cost_of_revenue \\\n", 1424 | "0001689923 72,619,000.00 495,308,000.00 43,839,000.00 \n", 1425 | "0000769397 0.00 3,790,400,000.00 337,100,000.00 \n", 1426 | "0001655210 25,000,000.00 406,785,000.00 284,510,000.00 \n", 1427 | "0001666134 0.00 351,737,000.00 68,972,000.00 \n", 1428 | "0001477333 0.00 431,059,000.00 101,055,000.00 \n", 1429 | "0001385867 609,068,000.00 541,643,000.00 221,701,000.00 \n", 1430 | "0001535527 0.00 874,438,000.00 229,545,000.00 \n", 1431 | "0001561550 0.00 603,466,000.00 130,197,000.00 \n", 1432 | "0001261333 20,469,000.00 1,453,047,000.00 364,058,000.00 \n", 1433 | "0001124140 255,464,000.00 1,491,391,000.00 354,324,000.00 \n", 1434 | "0001517413 0.00 290,874,000.00 120,007,000.00 \n", 1435 | "0001288847 0.00 434,908,000.00 180,284,000.00 \n", 1436 | "0001611647 0.00 318,790,000.00 185,880,000.00 \n", 1437 | "0001404655 7,837,000.00 883,026,000.00 166,959,000.00 \n", 1438 | "0001609550 0.00 115,381,000.00 17,623,000.00 \n", 1439 | "0001035267 0.00 4,358,400,000.00 1,497,200,000.00 \n", 1440 | "0001099590 0.00 3,973,465,000.00 2,264,255,000.00 \n", 1441 | "0001441816 0.00 590,380,000.00 177,076,000.00 \n", 1442 | "0001645113 0.00 494,366,000.00 106,501,000.00 \n", 1443 | "0001660134 908,684,000.00 835,424,000.00 217,681,000.00 \n", 1444 | "0001568100 0.00 213,556,000.00 30,686,000.00 \n", 1445 | "0001527753 0.00 78,648,000.00 58,534,000.00 \n", 1446 | "0001506293 0.00 1,692,658,000.00 449,358,000.00 \n", 1447 | "0000730272 243,737,000.00 366,260,000.00 156,634,000.00 \n", 1448 | "0001428439 4,874,000.00 1,778,388,000.00 970,169,000.00 \n", 1449 | "0001108524 0.00 21,252,000,000.00 5,438,000,000.00 \n", 1450 | "0001703399 0.00 4,375,664,000.00 3,026,759,000.00 \n", 1451 | "0001060736 0.00 2,175,536,000.00 217,720,000.00 \n", 1452 | "0001554859 0.00 38,603,000.00 3,356,000.00 \n", 1453 | "0001642545 3,300,000.00 67,789,000.00 20,991,000.00 \n", 1454 | "0001594805 0.00 2,929,491,000.00 1,387,971,000.00 \n", 1455 | "0001397702 0.00 75,227,000.00 21,291,000.00 \n", 1456 | "0001764925 0.00 902,610,000.00 121,692,000.00 \n", 1457 | "0001477449 42,560,000.00 1,093,962,000.00 390,829,000.00 \n", 1458 | "0001671933 0.00 836,033,000.00 178,812,000.00 \n", 1459 | "0001447669 0.00 1,761,776,000.00 846,115,000.00 \n", 1460 | "0001810806 0.00 772,445,000.00 172,347,000.00 \n", 1461 | "0001393052 0.00 1,465,069,000.00 408,928,000.00 \n", 1462 | "0000875320 0.00 6,205,683,000.00 736,300,000.00 \n", 1463 | "0001463172 132,388,000.00 1,029,564,000.00 251,255,000.00 \n", 1464 | "0001585521 0.00 2,651,368,000.00 821,989,000.00 \n", 1465 | "0001713683 0.00 431,269,000.00 95,733,000.00 \n", 1466 | "\n", 1467 | " revenue_1_fy_ago operating_cash_flow capital_expenditure \n", 1468 | "0001689923 417,910,000.00 74,782,000.00 26,358,000.00 \n", 1469 | "0000769397 3,274,300,000.00 1,437,200,000.00 95,900,000.00 \n", 1470 | "0001655210 297,897,000.00 -39,995,000.00 57,696,000.00 \n", 1471 | "0001666134 288,976,000.00 54,735,000.00 19,424,000.00 \n", 1472 | "0001477333 287,022,000.00 -17,129,000.00 74,962,000.00 \n", 1473 | "0001385867 389,719,000.00 78,202,000.00 11,492,000.00 \n", 1474 | "0001535527 481,413,000.00 356,566,000.00 63,843,000.00 \n", 1475 | "0001561550 362,780,000.00 109,091,000.00 25,883,000.00 \n", 1476 | "0001261333 973,971,000.00 296,954,000.00 82,395,000.00 \n", 1477 | "0001124140 876,293,000.00 136,482,000.00 64,352,000.00 \n", 1478 | "0001517413 200,462,000.00 -19,916,000.00 37,511,000.00 \n", 1479 | "0001288847 328,006,000.00 67,302,000.00 30,422,000.00 \n", 1480 | "0001611647 245,862,000.00 21,193,000.00 134,568,000.00 \n", 1481 | "0001404655 674,860,000.00 88,913,000.00 58,873,000.00 \n", 1482 | "0001609550 82,050,000.00 -53,045,000.00 2,455,000.00 \n", 1483 | "0001035267 4,478,500,000.00 1,484,800,000.00 341,500,000.00 \n", 1484 | "0001099590 2,296,314,000.00 1,182,552,000.00 247,141,000.00 \n", 1485 | "0001441816 421,720,000.00 -42,673,000.00 11,773,000.00 \n", 1486 | "0001645113 351,318,000.00 99,148,000.00 14,968,000.00 \n", 1487 | "0001660134 586,067,000.00 127,962,000.00 17,368,000.00 \n", 1488 | "0001568100 166,351,000.00 10,095,000.00 4,848,000.00 \n", 1489 | "0001527753 65,207,000.00 -42,653,000.00 3,246,000.00 \n", 1490 | "0001506293 1,142,761,000.00 28,826,000.00 17,401,000.00 \n", 1491 | "0000730272 270,245,000.00 62,625,000.00 26,344,000.00 \n", 1492 | "0001428439 1,128,921,000.00 148,192,000.00 82,382,000.00 \n", 1493 | "0001108524 17,098,000,000.00 4,801,000,000.00 710,000,000.00 \n", 1494 | "0001703399 2,175,378,000.00 555,868,000.00 357,054,000.00 \n", 1495 | "0001060736 916,713,000.00 856,568,000.00 82,409,000.00 \n", 1496 | "0001554859 32,767,000.00 15,417,000.00 1,061,000.00 \n", 1497 | "0001642545 42,927,000.00 -71,184,000.00 11,520,000.00 \n", 1498 | "0001594805 1,578,173,000.00 424,958,000.00 41,995,000.00 \n", 1499 | "0001397702 63,354,000.00 -42,068,000.00 842,000.00 \n", 1500 | "0001764925 630,422,000.00 72,439,000.00 12,607,000.00 \n", 1501 | "0001477449 553,307,000.00 -53,511,000.00 26,042,000.00 \n", 1502 | "0001671933 661,058,000.00 405,069,000.00 80,114,000.00 \n", 1503 | "0001447669 1,134,468,000.00 32,654,000.00 59,133,000.00 \n", 1504 | "0001810806 541,779,000.00 19,913,000.00 40,906,000.00 \n", 1505 | "0001393052 1,104,081,000.00 551,246,000.00 8,683,000.00 \n", 1506 | "0000875320 4,162,821,000.00 3,253,505,000.00 259,798,000.00 \n", 1507 | "0001463172 816,416,000.00 26,428,000.00 38,523,000.00 \n", 1508 | "0001585521 622,658,000.00 1,471,177,000.00 85,815,000.00 \n", 1509 | "0001713683 302,836,000.00 79,317,000.00 51,809,000.00 " 1510 | ] 1511 | }, 1512 | "execution_count": 3, 1513 | "metadata": {}, 1514 | "output_type": "execute_result" 1515 | } 1516 | ], 1517 | "source": [ 1518 | "pd.options.display.float_format = '{:,.2f}'.format\n", 1519 | "companies_financials if len(company_df_list) > 0 else None" 1520 | ] 1521 | }, 1522 | { 1523 | "cell_type": "markdown", 1524 | "metadata": {}, 1525 | "source": [ 1526 | "# Insert The Extracted Data Into Database\n", 1527 | "\n", 1528 | "I will insert the extracted data into database so that I can use them for analysis later." 1529 | ] 1530 | }, 1531 | { 1532 | "cell_type": "code", 1533 | "execution_count": 4, 1534 | "metadata": {}, 1535 | "outputs": [ 1536 | { 1537 | "name": "stdout", 1538 | "output_type": "stream", 1539 | "text": [ 1540 | "Inserted/Updated \"ALTERYX, INC.\" into database.\n", 1541 | "Inserted/Updated \"AUTODESK, INC.\" into database.\n", 1542 | "Inserted/Updated \"BEYOND MEAT, INC.\" into database.\n", 1543 | "Inserted/Updated \"BLACKLINE, INC.\" into database.\n", 1544 | "Inserted/Updated \"CLOUDFLARE, INC.\" into database.\n", 1545 | "Inserted/Updated \"COUPA SOFTWARE INC\" into database.\n", 1546 | "Inserted/Updated \"CROWDSTRIKE HOLDINGS, INC.\" into database.\n", 1547 | "Inserted/Updated \"DATADOG, INC.\" into database.\n", 1548 | "Inserted/Updated \"DOCUSIGN, INC.\" into database.\n", 1549 | "Inserted/Updated \"EXACT SCIENCES CORPORATION\" into database.\n", 1550 | "Inserted/Updated \"FASTLY, INC.\" into database.\n", 1551 | "Inserted/Updated \"FIVE9, INC.\" into database.\n", 1552 | "Inserted/Updated \"FRESHPET INC\" into database.\n", 1553 | "Inserted/Updated \"HUBSPOT, INC.\" into database.\n", 1554 | "Inserted/Updated \"INSPIRE MEDICAL SYSTEMS, INC.\" into database.\n", 1555 | "Inserted/Updated \"INTUITIVE SURGICAL, INC.\" into database.\n", 1556 | "Inserted/Updated \"MERCADOLIBRE, INC.\" into database.\n", 1557 | "Inserted/Updated \"MONGODB, INC.\" into database.\n", 1558 | "Inserted/Updated \"NOVOCURE LIMITED\" into database.\n", 1559 | "Inserted/Updated \"OKTA, INC.\" into database.\n", 1560 | "Inserted/Updated \"PAGERDUTY, INC.\" into database.\n", 1561 | "Inserted/Updated \"PERSONALIS, INC.\" into database.\n", 1562 | "Inserted/Updated \"PINTEREST, INC.\" into database.\n", 1563 | "Inserted/Updated \"REPLIGEN CORP\" into database.\n", 1564 | "Inserted/Updated \"ROKU, INC\" into database.\n", 1565 | "Inserted/Updated \"SALESFORCE.COM, INC.\" into database.\n", 1566 | "Inserted/Updated \"SEA LIMITED\" into database.\n", 1567 | "Inserted/Updated \"SEAGEN INC.\" into database.\n", 1568 | "Inserted/Updated \"SEMLER SCIENTIFIC, INC.\" into database.\n", 1569 | "Inserted/Updated \"SHOCKWAVE MEDICAL, INC.\" into database.\n", 1570 | "Inserted/Updated \"SHOPIFY INC.\" into database.\n", 1571 | "Inserted/Updated \"SILK ROAD MEDICAL, INC.\" into database.\n", 1572 | "Inserted/Updated \"SLACK TECHNOLOGIES, INC.\" into database.\n", 1573 | "Inserted/Updated \"TELADOC HEALTH, INC.\" into database.\n", 1574 | "Inserted/Updated \"TRADE DESK, INC.\" into database.\n", 1575 | "Inserted/Updated \"TWILIO INC.\" into database.\n", 1576 | "Inserted/Updated \"UNITY SOFTWARE INC.\" into database.\n", 1577 | "Inserted/Updated \"VEEVA SYSTEMS INC.\" into database.\n", 1578 | "Inserted/Updated \"VERTEX PHARMACEUTICALS INC / MA\" into database.\n", 1579 | "Inserted/Updated \"ZENDESK, INC.\" into database.\n", 1580 | "Inserted/Updated \"ZOOM VIDEO COMMUNICATIONS, INC.\" into database.\n", 1581 | "Inserted/Updated \"ZSCALER, INC.\" into database.\n" 1582 | ] 1583 | } 1584 | ], 1585 | "source": [ 1586 | "import psycopg2\n", 1587 | "\n", 1588 | "def insert_update_record(row):\n", 1589 | " is_company_exists = False\n", 1590 | " \n", 1591 | " company = {\n", 1592 | " 'central_index_key': row['index']\n", 1593 | " ,'company_name': row['company_name']\n", 1594 | " ,'filings': row['filings']\n", 1595 | " ,'fiscal_year_ended': row['fiscal_year_ended']\n", 1596 | " ,'reporting_currency': row['reporting_currency']\n", 1597 | " ,'cash_and_equivalents': row['cash_and_equivalents']\n", 1598 | " ,'short_term_investments': row['short_term_investments']\n", 1599 | " ,'current_debt': row['current_debt']\n", 1600 | " ,'revenue': row['revenue']\n", 1601 | " ,'cost_of_revenue': row['cost_of_revenue']\n", 1602 | " ,'revenue_1_fy_ago': row['revenue_1_fy_ago']\n", 1603 | " ,'operating_cash_flow': row['operating_cash_flow']\n", 1604 | " ,'capital_expenditure': row['capital_expenditure']\n", 1605 | " }\n", 1606 | " \n", 1607 | " try:\n", 1608 | " conn = psycopg2.connect(user=uid, password=pwd, host=host\n", 1609 | " , port=port, dbname='stock')\n", 1610 | " \n", 1611 | " # check whether the company exist in table\n", 1612 | " cur = conn.cursor()\n", 1613 | " query = 'SELECT central_index_key FROM company WHERE central_index_key = %s;'\n", 1614 | " cur.execute(query, (company['central_index_key'],))\n", 1615 | "\n", 1616 | " if len(cur.fetchall()) > 0:\n", 1617 | " is_company_exists = True\n", 1618 | "\n", 1619 | " # if exists, update existing record. Else, insert record\n", 1620 | " cur = conn.cursor()\n", 1621 | " \n", 1622 | " if is_company_exists:\n", 1623 | " upd_statement = 'UPDATE company \\\n", 1624 | " SET \\\n", 1625 | " filings = %(filings)s \\\n", 1626 | " , company_name = %(company_name)s \\\n", 1627 | " , fiscal_year_ended = %(fiscal_year_ended)s \\\n", 1628 | " , reporting_currency = %(reporting_currency)s \\\n", 1629 | " , cash_and_equivalents = %(cash_and_equivalents)s \\\n", 1630 | " , short_term_investments = %(short_term_investments)s \\\n", 1631 | " , current_debt = %(current_debt)s \\\n", 1632 | " , revenue = %(revenue)s \\\n", 1633 | " , cost_of_revenue = %(cost_of_revenue)s \\\n", 1634 | " , revenue_1_fy_ago = %(revenue_1_fy_ago)s \\\n", 1635 | " , operating_cash_flow = %(operating_cash_flow)s \\\n", 1636 | " , capital_expenditure = %(capital_expenditure)s \\\n", 1637 | " WHERE central_index_key = %(central_index_key)s;'\n", 1638 | " cur.execute(upd_statement, company)\n", 1639 | " conn.commit()\n", 1640 | " else:\n", 1641 | " ins_statement = 'INSERT INTO company \\\n", 1642 | " (central_index_key, company_name \\\n", 1643 | " , filings, fiscal_year_ended, reporting_currency \\\n", 1644 | " , cash_and_equivalents, short_term_investments \\\n", 1645 | " , current_debt, revenue, cost_of_revenue \\\n", 1646 | " , revenue_1_fy_ago, operating_cash_flow \\\n", 1647 | " , capital_expenditure) \\\n", 1648 | " VALUES \\\n", 1649 | " (%(central_index_key)s, %(company_name)s \\\n", 1650 | " , %(filings)s, %(fiscal_year_ended)s, %(reporting_currency)s, \\\n", 1651 | " %(cash_and_equivalents)s, %(short_term_investments)s \\\n", 1652 | " , %(current_debt)s, %(revenue)s, %(cost_of_revenue)s \\\n", 1653 | " , %(revenue_1_fy_ago)s, %(operating_cash_flow)s \\\n", 1654 | " , %(capital_expenditure)s);'\n", 1655 | " cur.execute(ins_statement, company)\n", 1656 | " conn.commit()\n", 1657 | " \n", 1658 | " print('Inserted/Updated \"{}\" into database.'.format(company['company_name']))\n", 1659 | " except Exception as e:\n", 1660 | " print(company['company_name'] + ':', str(e))\n", 1661 | " finally:\n", 1662 | " if(conn):\n", 1663 | " conn.close()\n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | "# call function for each row \n", 1668 | "if len(company_df_list) > 0:\n", 1669 | " for index, row in companies_financials.reset_index().iterrows():\n", 1670 | " insert_update_record(row)\n", 1671 | " " 1672 | ] 1673 | } 1674 | ], 1675 | "metadata": { 1676 | "kernelspec": { 1677 | "display_name": "Python 3", 1678 | "language": "python", 1679 | "name": "python3" 1680 | }, 1681 | "language_info": { 1682 | "codemirror_mode": { 1683 | "name": "ipython", 1684 | "version": 3 1685 | }, 1686 | "file_extension": ".py", 1687 | "mimetype": "text/x-python", 1688 | "name": "python", 1689 | "nbconvert_exporter": "python", 1690 | "pygments_lexer": "ipython3", 1691 | "version": "3.8.5" 1692 | } 1693 | }, 1694 | "nbformat": 4, 1695 | "nbformat_minor": 2 1696 | } 1697 | --------------------------------------------------------------------------------