├── estimate-extract-transform-load.sh ├── dividend-calendar-extract-transform-load.sh ├── earnings-calendar-extract-transform-load.sh ├── financial-statement-transform-load-yesterday.sh ├── dividend-calendar-extract.rkt ├── earnings-calendar-extract.rkt ├── estimate-extract.rkt ├── dump-dolt-earnings-calendar.rkt ├── dump-dat.rkt ├── dividend-calendar-transform-load.rkt ├── financial-statement-extract.rkt ├── earnings-calendar-transform-load.rkt ├── README.md ├── dump-dolt-estimates.rkt ├── schema.sql ├── dump-dolt-statements.rkt ├── cash-flow-statement-transform-load.rkt ├── LICENSE ├── income-statement-transform-load.rkt ├── cash-flow-statement-transform-load.2024-02-01.rkt ├── estimate-transform-load.rkt └── balance-sheet-transform-load.rkt /estimate-extract-transform-load.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | today=$(date "+%F") 4 | dir=$(dirname "$0") 5 | current_year=$(date "+%Y") 6 | 7 | racket -y ${dir}/estimate-extract.rkt -p "$1" 8 | racket -y ${dir}/estimate-transform-load.rkt -p "$1" 9 | 10 | 7zr a /var/local/zacks/estimates/${current_year}/${today}.7z /var/local/zacks/estimates/${today}/*.html 11 | 12 | racket -y ${dir}/dump-dolt-estimates.rkt -p "$1" 13 | -------------------------------------------------------------------------------- /dividend-calendar-extract-transform-load.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | today=$(date "+%F") 4 | dir=$(dirname "$0") 5 | current_year=$(date "+%Y") 6 | 7 | racket -y ${dir}/dividend-calendar-extract.rkt 8 | racket -y ${dir}/dividend-calendar-transform-load.rkt -p "$1" 9 | 10 | 7zr a /var/local/zacks/dividend-calendar/${current_year}.7z /var/local/zacks/dividend-calendar/${today} 11 | 12 | # racket -y ${dir}/dump-dolt-dividend-calendar.rkt -p "$1" 13 | -------------------------------------------------------------------------------- /earnings-calendar-extract-transform-load.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | today=$(date "+%F") 4 | dir=$(dirname "$0") 5 | current_year=$(date "+%Y") 6 | 7 | racket -y ${dir}/earnings-calendar-extract.rkt 8 | racket -y ${dir}/earnings-calendar-transform-load.rkt -p "$1" 9 | 10 | 7zr a /var/local/zacks/earnings-calendar/${current_year}.7z /var/local/zacks/earnings-calendar/${today} 11 | 12 | racket -y ${dir}/dump-dolt-earnings-calendar.rkt -p "$1" 13 | -------------------------------------------------------------------------------- /financial-statement-transform-load-yesterday.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | yesterday=$(date -d "-1 day" "+%F") 4 | dir=$(dirname "$0") 5 | 6 | racket -y ${dir}/balance-sheet-transform-load.rkt -d ${yesterday} -p "$1" 7 | racket -y ${dir}/cash-flow-statement-transform-load.2024-02-01.rkt -d ${yesterday} -p "$1" 8 | racket -y ${dir}/income-statement-transform-load.rkt -d ${yesterday} -p "$1" 9 | 10 | 7zr a /var/local/zacks/balance-sheet/${yesterday}.7z /var/local/zacks/balance-sheet/${yesterday}/*.html 11 | 7zr a /var/local/zacks/cash-flow-statement/${yesterday}.7z /var/local/zacks/cash-flow-statement/${yesterday}/*.html 12 | 7zr a /var/local/zacks/income-statement/${yesterday}.7z /var/local/zacks/income-statement/${yesterday}/*.html 13 | 14 | racket -y ${dir}/dump-dolt-statements.rkt -p "$1" 15 | -------------------------------------------------------------------------------- /dividend-calendar-extract.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require gregor 4 | gregor/period 5 | gregor/time 6 | net/http-easy 7 | racket/cmdline 8 | racket/file 9 | racket/list 10 | racket/port 11 | tasks 12 | threading) 13 | 14 | (define (download-day date) 15 | (make-directory* (string-append "/var/local/zacks/dividend-calendar/" (~t (today) "yyyy-MM-dd"))) 16 | (call-with-output-file* (string-append "/var/local/zacks/dividend-calendar/" (~t (today) "yyyy-MM-dd") "/" 17 | (~t date "yyyy-MM-dd") ".json") 18 | (λ (out) 19 | (with-handlers ([exn:fail? 20 | (λ (error) 21 | (displayln (string-append "Encountered error for " (~t date "yyyy-MM-dd"))) 22 | (displayln error))]) 23 | (~> (string-append "https://www.zacks.com/includes/classes/z2_class_calendarfunctions_data.php" 24 | "?calltype=eventscal&type=5&date=" 25 | (number->string (->posix (at-time date (time 6))))) 26 | (get _) 27 | (response-body _) 28 | (write-bytes _ out)))) 29 | #:exists 'replace)) 30 | 31 | (define end-date (make-parameter (+days (today) (* 7 6)))) 32 | 33 | (define start-date (make-parameter (today))) 34 | 35 | (command-line 36 | #:program "racket dividend-calendar-extract.rkt" 37 | #:once-each 38 | [("-e" "--end-date") ed 39 | "End date. Defaults to today + 6 weeks" 40 | (end-date (iso8601->date ed))] 41 | [("-s" "--start-date") sd 42 | "Start date. Defaults to today" 43 | (start-date (iso8601->date sd))]) 44 | 45 | (define delay-interval 10) 46 | 47 | (with-task-server (for-each (λ (i) (schedule-delayed-task (λ () (download-day (+days (start-date) i))) 48 | (* i delay-interval))) 49 | (range 0 (period-ref (period-between (start-date) (end-date) '(days)) 'days))) 50 | ; add a final task that will halt the task server 51 | (schedule-delayed-task 52 | (λ () (schedule-stop-task)) (* delay-interval (period-ref (period-between (start-date) (end-date) '(days)) 'days))) 53 | (run-tasks)) 54 | -------------------------------------------------------------------------------- /earnings-calendar-extract.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require gregor 4 | gregor/period 5 | gregor/time 6 | net/http-easy 7 | racket/cmdline 8 | racket/file 9 | racket/list 10 | racket/port 11 | tasks 12 | threading) 13 | 14 | (define (download-day date) 15 | (make-directory* (string-append "/var/local/zacks/earnings-calendar/" (~t (today) "yyyy-MM-dd"))) 16 | (call-with-output-file* (string-append "/var/local/zacks/earnings-calendar/" (~t (today) "yyyy-MM-dd") "/" 17 | (~t date "yyyy-MM-dd") ".json") 18 | (λ (out) 19 | (with-handlers ([exn:fail? 20 | (λ (error) 21 | (displayln (string-append "Encountered error for " (~t date "yyyy-MM-dd"))) 22 | (displayln error))]) 23 | (~> (string-append "https://www.zacks.com/includes/classes/z2_class_calendarfunctions_data.php" 24 | "?calltype=eventscal&type=1&date=" 25 | (number->string (->posix (at-time date (time 6))))) 26 | (get _) 27 | (response-body _) 28 | (write-bytes _ out)))) 29 | #:exists 'replace)) 30 | 31 | (define end-date (make-parameter (+days (today) (* 7 6)))) 32 | 33 | (define start-date (make-parameter (today))) 34 | 35 | (command-line 36 | #:program "racket earnings-calendar-extract.rkt" 37 | #:once-each 38 | [("-e" "--end-date") ed 39 | "End date. Defaults to today + 6 weeks" 40 | (end-date (iso8601->date ed))] 41 | [("-s" "--start-date") sd 42 | "Start date. Defaults to today" 43 | (start-date (iso8601->date sd))]) 44 | 45 | (define delay-interval 10) 46 | 47 | (with-task-server (for-each (λ (i) (schedule-delayed-task (λ () (download-day (+days (start-date) i))) 48 | (* i delay-interval))) 49 | (range 0 (period-ref (period-between (start-date) (end-date) '(days)) 'days))) 50 | ; add a final task that will halt the task server 51 | (schedule-delayed-task 52 | (λ () (schedule-stop-task)) (* delay-interval (period-ref (period-between (start-date) (end-date) '(days)) 'days))) 53 | (run-tasks)) 54 | -------------------------------------------------------------------------------- /estimate-extract.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | net/http-easy 6 | racket/cmdline 7 | racket/file 8 | racket/list 9 | racket/port 10 | tasks 11 | threading) 12 | 13 | (define (download-estimates symbol) 14 | (make-directory* (string-append "/var/local/zacks/estimates/" (~t (today) "yyyy-MM-dd"))) 15 | (call-with-output-file (string-append "/var/local/zacks/estimates/" (~t (today) "yyyy-MM-dd") "/" symbol ".detailed-estimates.html") 16 | (λ (out) (with-handlers ([exn:fail? 17 | (λ (error) 18 | (displayln (string-append "Encountered error for " symbol)) 19 | (displayln error))]) 20 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/detailed-earning-estimates") 21 | (get _) 22 | (response-body _) 23 | (write-bytes _ out)))) 24 | #:exists 'replace)) 25 | 26 | (define db-user (make-parameter "user")) 27 | 28 | (define db-name (make-parameter "local")) 29 | 30 | (define db-pass (make-parameter "")) 31 | 32 | (define first-symbol (make-parameter "")) 33 | 34 | (define last-symbol (make-parameter "")) 35 | 36 | (command-line 37 | #:program "racket estimate-extract.2023-02-27.rkt" 38 | #:once-each 39 | [("-f" "--first-symbol") first 40 | "First symbol to query. Defaults to nothing" 41 | (first-symbol first)] 42 | [("-l" "--last-symbol") last 43 | "Last symbol to query. Defaults to nothing" 44 | (last-symbol last)] 45 | [("-n" "--db-name") name 46 | "Database name. Defaults to 'local'" 47 | (db-name name)] 48 | [("-p" "--db-pass") password 49 | "Database password" 50 | (db-pass password)] 51 | [("-u" "--db-user") user 52 | "Database user name. Defaults to 'user'" 53 | (db-user user)]) 54 | 55 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 56 | 57 | (define symbols (query-list dbc " 58 | select 59 | act_symbol 60 | from 61 | nasdaq.symbol 62 | where 63 | is_etf = false and 64 | is_test_issue = false and 65 | is_next_shares = false and 66 | security_name !~ 'ETN' and 67 | nasdaq_symbol !~ '[-\\$\\+\\*#!@%\\^=~]' and 68 | case when nasdaq_symbol ~ '[A-Z]{4}[L-Z]' 69 | then security_name !~ '(Note|Preferred|Right|Unit|Warrant)' 70 | else true 71 | end and 72 | last_seen = (select max(last_seen) from nasdaq.symbol) and 73 | case when $1 != '' 74 | then act_symbol >= $1 75 | else true 76 | end and 77 | case when $2 != '' 78 | then act_symbol <= $2 79 | else true 80 | end 81 | order by 82 | act_symbol; 83 | " 84 | (first-symbol) 85 | (last-symbol))) 86 | 87 | (disconnect dbc) 88 | 89 | (define delay-interval 10) 90 | 91 | (define delays (map (λ (x) (* delay-interval x)) (range 0 (length symbols)))) 92 | 93 | (with-task-server (for-each (λ (l) (schedule-delayed-task (λ () (thread (λ () (download-estimates (first l))))) 94 | (second l))) 95 | (map list symbols delays)) 96 | ; add a final task that will halt the task server 97 | (schedule-delayed-task (λ () (schedule-stop-task)) (* delay-interval (length delays))) 98 | (run-tasks)) 99 | -------------------------------------------------------------------------------- /dump-dolt-earnings-calendar.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | racket/cmdline 6 | racket/string 7 | racket/system) 8 | 9 | (define base-folder (make-parameter "/var/local/dolt/earnings")) 10 | 11 | (define as-of-date (make-parameter (~t (today) "yyyy-MM-dd"))) 12 | 13 | (define db-user (make-parameter "user")) 14 | 15 | (define db-name (make-parameter "local")) 16 | 17 | (define db-pass (make-parameter "")) 18 | 19 | (command-line 20 | #:program "racket dump-dolt-calendar.rkt" 21 | #:once-each 22 | [("-b" "--base-folder") folder 23 | "Base dolt folder. Defaults to /var/local/dolt/earnings" 24 | (base-folder folder)] 25 | [("-d" "--date") date 26 | "Final date for history retrieval. Defaults to today" 27 | (as-of-date date)] 28 | [("-n" "--db-name") name 29 | "Database name. Defaults to 'local'" 30 | (db-name name)] 31 | [("-p" "--db-pass") password 32 | "Database password" 33 | (db-pass password)] 34 | [("-u" "--db-user") user 35 | "Database user name. Defaults to 'user'" 36 | (db-user user)]) 37 | 38 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 39 | 40 | ; earnings-calendar 41 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt sql -q \"delete from earnings_calendar where date >= date_sub('" (as-of-date) "', interval 7 day)\";")) 42 | 43 | (define earnings-calendar-file (string-append (base-folder) "/earnings-calendar-" (as-of-date) ".csv")) 44 | 45 | (call-with-output-file* earnings-calendar-file 46 | (λ (out) 47 | (displayln "act_symbol,date,when" out) 48 | (for-each (λ (row) 49 | (displayln (string-join (vector->list row) ",") out)) 50 | (query-rows dbc " 51 | select 52 | act_symbol::text, 53 | date::text, 54 | coalesce(\"when\"::text, '') 55 | from 56 | zacks.earnings_calendar 57 | where 58 | date >= $1::text::date - '7 days'::interval; 59 | " 60 | (as-of-date)))) 61 | #:exists 'replace) 62 | 63 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt sql -q \" 64 | with ecm (act_symbol, max_date, bsa_date) as ( 65 | select 66 | ec.act_symbol, 67 | max(ec.date), 68 | bsa.date 69 | from 70 | earnings_calendar ec 71 | join 72 | (select distinct 73 | act_symbol, 74 | date 75 | from 76 | balance_sheet_assets bsa 77 | union 78 | (select 79 | act_symbol, 80 | date_sub(date_add(date_add(max(date), interval 1 day), interval 3 month), interval 1 day) 81 | from 82 | balance_sheet_assets 83 | group by 84 | act_symbol) 85 | order by 86 | act_symbol, 87 | date) bsa 88 | on 89 | ec.act_symbol = bsa.act_symbol and 90 | ec.date > bsa.date and 91 | ec.date <= date_sub(date_add(date_add(bsa.date, interval 1 day), interval 3 month), interval 1 day) 92 | group by 93 | ec.act_symbol, 94 | bsa.date 95 | ) 96 | delete 97 | ec 98 | from 99 | earnings_calendar ec 100 | join 101 | ecm 102 | where 103 | ec.act_symbol = ecm.act_symbol and 104 | ec.date != ecm.max_date and 105 | ec.date > ecm.bsa_date and 106 | ec.date <= date_sub(date_add(date_add(ecm.bsa_date, interval 1 day), interval 3 month), interval 1 day); 107 | \"")) 108 | 109 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt table import -u --continue earnings_calendar earnings-calendar-" (as-of-date) ".csv")) 110 | 111 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt add earnings_calendar; " 112 | "/usr/local/bin/dolt commit -m 'earnings_calendar " (as-of-date) " update'; /usr/local/bin/dolt push --silent")) 113 | -------------------------------------------------------------------------------- /dump-dat.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | racket/cmdline 6 | racket/vector) 7 | 8 | (define start-date (make-parameter (~t (today) "yyyy-MM-dd"))) 9 | 10 | (define end-date (make-parameter (~t (today) "yyyy-MM-dd"))) 11 | 12 | (define db-user (make-parameter "user")) 13 | 14 | (define db-name (make-parameter "local")) 15 | 16 | (define db-pass (make-parameter "")) 17 | 18 | (command-line 19 | #:program "racket dump-dat.rkt" 20 | #:once-each 21 | [("-e" "--end-date") end 22 | "Final date for history retrieval. Defaults to today" 23 | (end-date end)] 24 | [("-n" "--db-name") name 25 | "Database name. Defaults to 'local'" 26 | (db-name name)] 27 | [("-p" "--db-pass") password 28 | "Database password" 29 | (db-pass password)] 30 | [("-s" "--start-date") start 31 | "Earliest date for history retrieval. Defaults to today" 32 | (start-date start)] 33 | [("-u" "--db-user") user 34 | "Database user name. Defaults to 'user'" 35 | (db-user user)]) 36 | 37 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 38 | 39 | (define (vector->csv-line vec) 40 | (if (= 1 (vector-length vec)) 41 | (vector-ref vec 0) 42 | (string-append (vector-ref vec 0) "," (vector->csv-line (vector-drop vec 1))))) 43 | 44 | (for-each (λ (date) 45 | (call-with-output-file (string-append "/var/local/dat/zacks/eps-estimate/" date ".csv") 46 | (λ (out) 47 | (displayln "act_symbol,date,period,period_end_date,consensus,recent,count,high,low,year_ago" out) 48 | (for-each (λ (row) 49 | (displayln (vector->csv-line row) out)) 50 | (query-rows dbc " 51 | select 52 | act_symbol::text, 53 | date::text, 54 | period::text, 55 | period_end_date::text, 56 | consensus::text, 57 | recent::text, 58 | count::text, 59 | high::text, 60 | low::text, 61 | year_ago::text 62 | from 63 | zacks.eps_estimate 64 | where 65 | date = $1::text::date and 66 | consensus is not null and 67 | recent is not null and 68 | count is not null and 69 | high is not null and 70 | low is not null and 71 | year_ago is not null 72 | order by 73 | act_symbol, date, period, period_end_date 74 | " 75 | date))) 76 | #:exists 'replace)) 77 | (query-list dbc " 78 | select distinct 79 | date::text 80 | from 81 | zacks.eps_estimate 82 | where 83 | date >= $1::text::date and 84 | date <= $2::text::date 85 | order by 86 | date; 87 | " 88 | (start-date) 89 | (end-date))) 90 | 91 | (for-each (λ (date) 92 | (call-with-output-file (string-append "/var/local/dat/zacks/sales-estimate/" date ".csv") 93 | (λ (out) 94 | (displayln "act_symbol,date,period,period_end_date,consensus,count,high,low,year_ago" out) 95 | (for-each (λ (row) 96 | (displayln (vector->csv-line row) out)) 97 | (query-rows dbc " 98 | select 99 | act_symbol::text, 100 | date::text, 101 | period::text, 102 | period_end_date::text, 103 | consensus::text, 104 | count::text, 105 | high::text, 106 | low::text, 107 | year_ago::text 108 | from 109 | zacks.sales_estimate 110 | where 111 | date = $1::text::date and 112 | consensus is not null and 113 | count is not null and 114 | high is not null and 115 | low is not null and 116 | year_ago is not null 117 | order by 118 | act_symbol, date, period, period_end_date 119 | " 120 | date))) 121 | #:exists 'replace)) 122 | (query-list dbc " 123 | select distinct 124 | date::text 125 | from 126 | zacks.sales_estimate 127 | where 128 | date >= $1::text::date and 129 | date <= $2::text::date 130 | order by 131 | date; 132 | " 133 | (start-date) 134 | (end-date))) 135 | 136 | -------------------------------------------------------------------------------- /dividend-calendar-transform-load.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | json 6 | racket/cmdline 7 | racket/list 8 | racket/port 9 | racket/sequence 10 | racket/string 11 | threading) 12 | 13 | (define base-folder (make-parameter "/var/local/zacks/dividend-calendar")) 14 | 15 | (define folder-date (make-parameter (today))) 16 | 17 | (define db-user (make-parameter "user")) 18 | 19 | (define db-name (make-parameter "local")) 20 | 21 | (define db-pass (make-parameter "")) 22 | 23 | (command-line 24 | #:program "racket transform-load.rkt" 25 | #:once-each 26 | [("-b" "--base-folder") folder 27 | "Dividend Calendar base folder. Defaults to /var/local/zacks/dividend-calendar" 28 | (base-folder folder)] 29 | [("-d" "--folder-date") date 30 | "Dividend Calendar folder date. Defaults to today" 31 | (folder-date (iso8601->date date))] 32 | [("-n" "--db-name") name 33 | "Database name. Defaults to 'local'" 34 | (db-name name)] 35 | [("-p" "--db-pass") password 36 | "Database password" 37 | (db-pass password)] 38 | [("-u" "--db-user") user 39 | "Database user name. Defaults to 'user'" 40 | (db-user user)]) 41 | 42 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 43 | 44 | ; we clean up the future part of the table in case dividend dates have been shifted 45 | (query-exec dbc " 46 | delete from 47 | zacks.dividend_calendar 48 | where 49 | ex_date >= $1::text::date; 50 | " 51 | (~t (folder-date) "yyyy-MM-dd")) 52 | 53 | (parameterize ([current-directory (string-append (base-folder) "/" (~t (folder-date) "yyyy-MM-dd") "/")]) 54 | (for ([p (sequence-filter (λ (p) (string-contains? (path->string p) ".json")) (in-directory (current-directory)))]) 55 | (let* ([file-name (path->string p)] 56 | [date-of-dividend (string-replace (string-replace file-name (path->string (current-directory)) "") ".json" "")]) 57 | (call-with-input-file file-name 58 | (λ (in) 59 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to parse " 60 | file-name 61 | " for date " 62 | date-of-dividend)) 63 | (displayln e))]) 64 | (~> (port->string in) 65 | (regexp-replace* #rx"<.*?>" _ "") 66 | (regexp-replace* #rx"[A-Z\\.]+ Quick Quote" _ "") 67 | (string-replace _ "window.app_data = " "") 68 | (string->jsexpr _) 69 | (hash-ref _ 'data) 70 | (for-each (λ (ticker-div-list) 71 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to insert " 72 | (first ticker-div-list) 73 | " for date " 74 | date-of-dividend)) 75 | (displayln e) 76 | (rollback-transaction dbc))]) 77 | (start-transaction dbc) 78 | ; if we have a record from last week for this symbol, move it forward 79 | (query-exec dbc " 80 | delete from 81 | zacks.dividend_calendar 82 | where 83 | act_symbol = $1 and 84 | ex_date >= $2::text::date - '7 days'::interval 85 | " 86 | (first ticker-div-list) 87 | (~t (folder-date) "yyyy-MM-dd")) 88 | (query-exec dbc " 89 | insert into zacks.dividend_calendar ( 90 | act_symbol, 91 | ex_date, 92 | amount, 93 | payable_date 94 | ) values ( 95 | $1, 96 | $2::text::date, 97 | $3::text::decimal, 98 | case 99 | when $4 = '--' then null 100 | else $4::text::date 101 | end 102 | ) on conflict do nothing; 103 | " 104 | (first ticker-div-list) 105 | (sixth ticker-div-list) 106 | (string-replace (fourth ticker-div-list) "$" "") 107 | (eighth ticker-div-list)) 108 | (commit-transaction dbc))) _)))))))) 109 | 110 | ; vacuum (garbage collect) and reindex table as we deleted from it earlier 111 | (query-exec dbc " 112 | vacuum full freeze analyze zacks.dividend_calendar; 113 | ") 114 | 115 | (query-exec dbc " 116 | reindex table zacks.dividend_calendar; 117 | ") 118 | 119 | (disconnect dbc) 120 | -------------------------------------------------------------------------------- /financial-statement-extract.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | net/http-easy 6 | racket/cmdline 7 | racket/file 8 | racket/list 9 | racket/port 10 | tasks 11 | threading) 12 | 13 | (define (download-income-statement symbol) 14 | (make-directory* (string-append "/var/local/zacks/income-statement/" (~t (today) "yyyy-MM-dd"))) 15 | (with-handlers ([exn:fail? 16 | (λ (error) 17 | (displayln (string-append "Encountered error for " symbol)) 18 | (displayln error))]) 19 | (call-with-output-file* (string-append "/var/local/zacks/income-statement/" (~t (today) "yyyy-MM-dd") "/" symbol ".income-statement.html") 20 | (λ (out) 21 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/income-statement") 22 | (get _) 23 | (response-body _) 24 | (write-bytes _ out))) 25 | #:exists 'replace))) 26 | 27 | (define (download-balance-sheet symbol) 28 | (make-directory* (string-append "/var/local/zacks/balance-sheet/" (~t (today) "yyyy-MM-dd"))) 29 | (with-handlers ([exn:fail? 30 | (λ (error) 31 | (displayln (string-append "Encountered error for " symbol)) 32 | (displayln error))]) 33 | (call-with-output-file* (string-append "/var/local/zacks/balance-sheet/" (~t (today) "yyyy-MM-dd") "/" symbol ".balance-sheet.html") 34 | (λ (out) 35 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/balance-sheet") 36 | (get _) 37 | (response-body _) 38 | (write-bytes _ out))) 39 | #:exists 'replace))) 40 | 41 | (define (download-cash-flow-statement symbol) 42 | (make-directory* (string-append "/var/local/zacks/cash-flow-statement/" (~t (today) "yyyy-MM-dd"))) 43 | (with-handlers ([exn:fail? 44 | (λ (error) 45 | (displayln (string-append "Encountered error for " symbol)) 46 | (displayln error))]) 47 | (call-with-output-file* (string-append "/var/local/zacks/cash-flow-statement/" (~t (today) "yyyy-MM-dd") "/" symbol ".cash-flow-statement.html") 48 | (λ (out) 49 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/cash-flow-statements") 50 | (get _) 51 | (response-body _) 52 | (write-bytes _ out))) 53 | #:exists 'replace))) 54 | 55 | (define db-user (make-parameter "user")) 56 | 57 | (define db-name (make-parameter "local")) 58 | 59 | (define db-pass (make-parameter "")) 60 | 61 | (define first-symbol (make-parameter "")) 62 | 63 | (define last-symbol (make-parameter "")) 64 | 65 | (command-line 66 | #:program "racket financial-statement-extract.rkt" 67 | #:once-each 68 | [("-f" "--first-symbol") first 69 | "First symbol to query. Defaults to nothing" 70 | (first-symbol first)] 71 | [("-l" "--last-symbol") last 72 | "Last symbol to query. Defaults to nothing" 73 | (last-symbol last)] 74 | [("-n" "--db-name") name 75 | "Database name. Defaults to 'local'" 76 | (db-name name)] 77 | [("-p" "--db-pass") password 78 | "Database password" 79 | (db-pass password)] 80 | [("-u" "--db-user") user 81 | "Database user name. Defaults to 'user'" 82 | (db-user user)]) 83 | 84 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 85 | 86 | (define symbols (query-list dbc " 87 | select 88 | act_symbol 89 | from 90 | nasdaq.symbol 91 | where 92 | is_etf = false and 93 | is_test_issue = false and 94 | is_next_shares = false and 95 | security_name !~ 'ETN' and 96 | nasdaq_symbol !~ '[-\\$\\+\\*#!@%\\^=~]' and 97 | case when nasdaq_symbol ~ '[A-Z]{4}[L-Z]' 98 | then security_name !~ '(Note|Preferred|Right|Unit|Warrant)' 99 | else true 100 | end and 101 | last_seen = (select max(last_seen) from nasdaq.symbol) and 102 | case when $1 != '' 103 | then act_symbol >= $1 104 | else true 105 | end and 106 | case when $2 != '' 107 | then act_symbol <= $2 108 | else true 109 | end 110 | order by 111 | act_symbol; 112 | " 113 | (first-symbol) 114 | (last-symbol))) 115 | 116 | (disconnect dbc) 117 | 118 | (define delay-interval 12) 119 | 120 | (define delays (map (λ (x) (* delay-interval x)) (range 0 (length symbols)))) 121 | 122 | (with-task-server (for-each (λ (l) (schedule-delayed-task (λ () (thread (λ () (download-income-statement (first l))))) 123 | (second l)) 124 | (schedule-delayed-task (λ () (thread (λ () (download-balance-sheet (first l))))) 125 | (+ 4 (second l))) 126 | (schedule-delayed-task (λ () (thread (λ () (download-cash-flow-statement (first l))))) 127 | (+ 8 (second l)))) 128 | (map list symbols delays)) 129 | ; add a final task that will halt the task server 130 | (schedule-delayed-task (λ () (schedule-stop-task)) (* delay-interval (length delays))) 131 | (run-tasks)) 132 | -------------------------------------------------------------------------------- /earnings-calendar-transform-load.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require db 4 | gregor 5 | json 6 | racket/cmdline 7 | racket/list 8 | racket/port 9 | racket/sequence 10 | racket/string 11 | threading) 12 | 13 | (define base-folder (make-parameter "/var/local/zacks/earnings-calendar")) 14 | 15 | (define folder-date (make-parameter (today))) 16 | 17 | (define db-user (make-parameter "user")) 18 | 19 | (define db-name (make-parameter "local")) 20 | 21 | (define db-pass (make-parameter "")) 22 | 23 | (command-line 24 | #:program "racket transform-load.rkt" 25 | #:once-each 26 | [("-b" "--base-folder") folder 27 | "Earnings Calendar base folder. Defaults to /var/local/zacks/earnings-calendar" 28 | (base-folder folder)] 29 | [("-d" "--folder-date") date 30 | "Earnings Calendar folder date. Defaults to today" 31 | (folder-date (iso8601->date date))] 32 | [("-n" "--db-name") name 33 | "Database name. Defaults to 'local'" 34 | (db-name name)] 35 | [("-p" "--db-pass") password 36 | "Database password" 37 | (db-pass password)] 38 | [("-u" "--db-user") user 39 | "Database user name. Defaults to 'user'" 40 | (db-user user)]) 41 | 42 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass))) 43 | 44 | ; we clean up the future part of the table in case earnings dates have been shifted 45 | (query-exec dbc " 46 | delete from 47 | zacks.earnings_calendar 48 | where 49 | date >= $1::text::date; 50 | " 51 | (~t (folder-date) "yyyy-MM-dd")) 52 | 53 | (parameterize ([current-directory (string-append (base-folder) "/" (~t (folder-date) "yyyy-MM-dd") "/")]) 54 | (for ([p (sequence-filter (λ (p) (string-contains? (path->string p) ".json")) (in-directory (current-directory)))]) 55 | (let* ([file-name (path->string p)] 56 | [date-of-earnings (string-replace (string-replace file-name (path->string (current-directory)) "") ".json" "")]) 57 | (call-with-input-file file-name 58 | (λ (in) 59 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to parse " 60 | file-name 61 | " for date " 62 | date-of-earnings)) 63 | (displayln e))]) 64 | (~> (port->string in) 65 | (regexp-replace* #rx"<.*?>" _ "") 66 | (regexp-replace* #rx"[A-Z\\.]+ Quick Quote" _ "") 67 | (string-replace _ "window.app_data = " "") 68 | (string->jsexpr _) 69 | (hash-ref _ 'data) 70 | (for-each (λ (ticker-when-list) 71 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to insert " 72 | (first ticker-when-list) 73 | " for date " 74 | date-of-earnings)) 75 | (displayln e) 76 | (rollback-transaction dbc))]) 77 | (start-transaction dbc) 78 | ; if we have a record from last week for this symbol, move it forward 79 | (query-exec dbc " 80 | delete from 81 | zacks.earnings_calendar 82 | where 83 | act_symbol = $1 and 84 | date >= $2::text::date - '7 days'::interval 85 | " 86 | (first ticker-when-list) 87 | (~t (folder-date) "yyyy-MM-dd")) 88 | (query-exec dbc " 89 | insert into zacks.earnings_calendar ( 90 | act_symbol, 91 | date, 92 | \"when\" 93 | ) values ( 94 | $1, 95 | $2::text::date, 96 | case $3 97 | when 'amc' then 'After market close'::zacks.when 98 | when 'bmo' then 'Before market open'::zacks.when 99 | when '--' then NULL 100 | end 101 | ) on conflict do nothing; 102 | " 103 | (first ticker-when-list) 104 | date-of-earnings 105 | (fourth ticker-when-list)) 106 | (commit-transaction dbc))) _)))))))) 107 | 108 | ; remove estimated dates when the estimate moves 109 | (query-exec dbc " 110 | delete from 111 | zacks.earnings_calendar ec 112 | using 113 | (select 114 | ec.act_symbol, 115 | max(ec.date) as max_date, 116 | bsa.date as bsa_date 117 | from 118 | zacks.earnings_calendar ec 119 | join 120 | (select distinct 121 | act_symbol, 122 | date 123 | from 124 | zacks.balance_sheet_assets bsa 125 | union 126 | (select 127 | act_symbol, 128 | (((max(date) + '1 day'::interval) + '3 months'::interval) - '1 day'::interval)::date 129 | from 130 | zacks.balance_sheet_assets 131 | group by 132 | act_symbol) 133 | order by 134 | act_symbol, 135 | date) bsa 136 | on 137 | ec.act_symbol = bsa.act_symbol and 138 | ec.date > bsa.date and 139 | ec.date <= ((bsa.date + '1 day'::interval) + '3 months'::interval) - '1 day'::interval 140 | group by 141 | ec.act_symbol, 142 | bsa.date) ecm 143 | where 144 | ec.act_symbol = ecm.act_symbol and 145 | ec.date != max_date and 146 | ec.date > bsa_date and 147 | ec.date <= ((bsa_date + '1 day'::interval) + '3 months'::interval) - '1 day'::interval; 148 | ") 149 | 150 | ; vacuum (garbage collect) and reindex table as we deleted from it earlier 151 | (query-exec dbc " 152 | vacuum full freeze analyze zacks.earnings_calendar; 153 | ") 154 | 155 | (query-exec dbc " 156 | reindex table zacks.earnings_calendar; 157 | ") 158 | 159 | (disconnect dbc) 160 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zacks-estimates-financial-statements 2 | 3 | These Racket programs will download the Zacks "Detailed Estimates" and "Financials" HTML documents and insert the 4 | estimates/statement data into a PostgreSQL database. The intended usage is: 5 | 6 | ```bash 7 | $ racket estimate-extract.rkt 8 | $ racket estimate-transform-load.rkt 9 | ``` 10 | 11 | ```bash 12 | $ racket financial-statement-extract.rkt 13 | $ racket balance-sheet-transform-load.rkt 14 | $ racket cash-flow-statement-transform-load.rkt 15 | $ racket income-statement-transform-load.rkt 16 | ``` 17 | 18 | ```bash 19 | $ racket earnings-calendar-extract.rkt 20 | $ racket earnings-calendar-transform-load.rkt 21 | ``` 22 | 23 | You will need to provide a database password for many of the above programs. The available parameters are: 24 | 25 | ```bash 26 | $ racket estimate-extract.rkt -h 27 | racket estimate-extract.rkt [