├── estimate-extract-transform-load.sh
├── dividend-calendar-extract-transform-load.sh
├── earnings-calendar-extract-transform-load.sh
├── financial-statement-transform-load-yesterday.sh
├── dividend-calendar-extract.rkt
├── earnings-calendar-extract.rkt
├── estimate-extract.rkt
├── dump-dolt-earnings-calendar.rkt
├── dump-dat.rkt
├── dividend-calendar-transform-load.rkt
├── financial-statement-extract.rkt
├── earnings-calendar-transform-load.rkt
├── README.md
├── dump-dolt-estimates.rkt
├── schema.sql
├── dump-dolt-statements.rkt
├── cash-flow-statement-transform-load.rkt
├── LICENSE
├── income-statement-transform-load.rkt
├── cash-flow-statement-transform-load.2024-02-01.rkt
├── estimate-transform-load.rkt
└── balance-sheet-transform-load.rkt
/estimate-extract-transform-load.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | today=$(date "+%F")
4 | dir=$(dirname "$0")
5 | current_year=$(date "+%Y")
6 |
7 | racket -y ${dir}/estimate-extract.rkt -p "$1"
8 | racket -y ${dir}/estimate-transform-load.rkt -p "$1"
9 |
10 | 7zr a /var/local/zacks/estimates/${current_year}/${today}.7z /var/local/zacks/estimates/${today}/*.html
11 |
12 | racket -y ${dir}/dump-dolt-estimates.rkt -p "$1"
13 |
--------------------------------------------------------------------------------
/dividend-calendar-extract-transform-load.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | today=$(date "+%F")
4 | dir=$(dirname "$0")
5 | current_year=$(date "+%Y")
6 |
7 | racket -y ${dir}/dividend-calendar-extract.rkt
8 | racket -y ${dir}/dividend-calendar-transform-load.rkt -p "$1"
9 |
10 | 7zr a /var/local/zacks/dividend-calendar/${current_year}.7z /var/local/zacks/dividend-calendar/${today}
11 |
12 | # racket -y ${dir}/dump-dolt-dividend-calendar.rkt -p "$1"
13 |
--------------------------------------------------------------------------------
/earnings-calendar-extract-transform-load.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | today=$(date "+%F")
4 | dir=$(dirname "$0")
5 | current_year=$(date "+%Y")
6 |
7 | racket -y ${dir}/earnings-calendar-extract.rkt
8 | racket -y ${dir}/earnings-calendar-transform-load.rkt -p "$1"
9 |
10 | 7zr a /var/local/zacks/earnings-calendar/${current_year}.7z /var/local/zacks/earnings-calendar/${today}
11 |
12 | racket -y ${dir}/dump-dolt-earnings-calendar.rkt -p "$1"
13 |
--------------------------------------------------------------------------------
/financial-statement-transform-load-yesterday.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | yesterday=$(date -d "-1 day" "+%F")
4 | dir=$(dirname "$0")
5 |
6 | racket -y ${dir}/balance-sheet-transform-load.rkt -d ${yesterday} -p "$1"
7 | racket -y ${dir}/cash-flow-statement-transform-load.2024-02-01.rkt -d ${yesterday} -p "$1"
8 | racket -y ${dir}/income-statement-transform-load.rkt -d ${yesterday} -p "$1"
9 |
10 | 7zr a /var/local/zacks/balance-sheet/${yesterday}.7z /var/local/zacks/balance-sheet/${yesterday}/*.html
11 | 7zr a /var/local/zacks/cash-flow-statement/${yesterday}.7z /var/local/zacks/cash-flow-statement/${yesterday}/*.html
12 | 7zr a /var/local/zacks/income-statement/${yesterday}.7z /var/local/zacks/income-statement/${yesterday}/*.html
13 |
14 | racket -y ${dir}/dump-dolt-statements.rkt -p "$1"
15 |
--------------------------------------------------------------------------------
/dividend-calendar-extract.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require gregor
4 | gregor/period
5 | gregor/time
6 | net/http-easy
7 | racket/cmdline
8 | racket/file
9 | racket/list
10 | racket/port
11 | tasks
12 | threading)
13 |
14 | (define (download-day date)
15 | (make-directory* (string-append "/var/local/zacks/dividend-calendar/" (~t (today) "yyyy-MM-dd")))
16 | (call-with-output-file* (string-append "/var/local/zacks/dividend-calendar/" (~t (today) "yyyy-MM-dd") "/"
17 | (~t date "yyyy-MM-dd") ".json")
18 | (λ (out)
19 | (with-handlers ([exn:fail?
20 | (λ (error)
21 | (displayln (string-append "Encountered error for " (~t date "yyyy-MM-dd")))
22 | (displayln error))])
23 | (~> (string-append "https://www.zacks.com/includes/classes/z2_class_calendarfunctions_data.php"
24 | "?calltype=eventscal&type=5&date="
25 | (number->string (->posix (at-time date (time 6)))))
26 | (get _)
27 | (response-body _)
28 | (write-bytes _ out))))
29 | #:exists 'replace))
30 |
31 | (define end-date (make-parameter (+days (today) (* 7 6))))
32 |
33 | (define start-date (make-parameter (today)))
34 |
35 | (command-line
36 | #:program "racket dividend-calendar-extract.rkt"
37 | #:once-each
38 | [("-e" "--end-date") ed
39 | "End date. Defaults to today + 6 weeks"
40 | (end-date (iso8601->date ed))]
41 | [("-s" "--start-date") sd
42 | "Start date. Defaults to today"
43 | (start-date (iso8601->date sd))])
44 |
45 | (define delay-interval 10)
46 |
47 | (with-task-server (for-each (λ (i) (schedule-delayed-task (λ () (download-day (+days (start-date) i)))
48 | (* i delay-interval)))
49 | (range 0 (period-ref (period-between (start-date) (end-date) '(days)) 'days)))
50 | ; add a final task that will halt the task server
51 | (schedule-delayed-task
52 | (λ () (schedule-stop-task)) (* delay-interval (period-ref (period-between (start-date) (end-date) '(days)) 'days)))
53 | (run-tasks))
54 |
--------------------------------------------------------------------------------
/earnings-calendar-extract.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require gregor
4 | gregor/period
5 | gregor/time
6 | net/http-easy
7 | racket/cmdline
8 | racket/file
9 | racket/list
10 | racket/port
11 | tasks
12 | threading)
13 |
14 | (define (download-day date)
15 | (make-directory* (string-append "/var/local/zacks/earnings-calendar/" (~t (today) "yyyy-MM-dd")))
16 | (call-with-output-file* (string-append "/var/local/zacks/earnings-calendar/" (~t (today) "yyyy-MM-dd") "/"
17 | (~t date "yyyy-MM-dd") ".json")
18 | (λ (out)
19 | (with-handlers ([exn:fail?
20 | (λ (error)
21 | (displayln (string-append "Encountered error for " (~t date "yyyy-MM-dd")))
22 | (displayln error))])
23 | (~> (string-append "https://www.zacks.com/includes/classes/z2_class_calendarfunctions_data.php"
24 | "?calltype=eventscal&type=1&date="
25 | (number->string (->posix (at-time date (time 6)))))
26 | (get _)
27 | (response-body _)
28 | (write-bytes _ out))))
29 | #:exists 'replace))
30 |
31 | (define end-date (make-parameter (+days (today) (* 7 6))))
32 |
33 | (define start-date (make-parameter (today)))
34 |
35 | (command-line
36 | #:program "racket earnings-calendar-extract.rkt"
37 | #:once-each
38 | [("-e" "--end-date") ed
39 | "End date. Defaults to today + 6 weeks"
40 | (end-date (iso8601->date ed))]
41 | [("-s" "--start-date") sd
42 | "Start date. Defaults to today"
43 | (start-date (iso8601->date sd))])
44 |
45 | (define delay-interval 10)
46 |
47 | (with-task-server (for-each (λ (i) (schedule-delayed-task (λ () (download-day (+days (start-date) i)))
48 | (* i delay-interval)))
49 | (range 0 (period-ref (period-between (start-date) (end-date) '(days)) 'days)))
50 | ; add a final task that will halt the task server
51 | (schedule-delayed-task
52 | (λ () (schedule-stop-task)) (* delay-interval (period-ref (period-between (start-date) (end-date) '(days)) 'days)))
53 | (run-tasks))
54 |
--------------------------------------------------------------------------------
/estimate-extract.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | net/http-easy
6 | racket/cmdline
7 | racket/file
8 | racket/list
9 | racket/port
10 | tasks
11 | threading)
12 |
13 | (define (download-estimates symbol)
14 | (make-directory* (string-append "/var/local/zacks/estimates/" (~t (today) "yyyy-MM-dd")))
15 | (call-with-output-file (string-append "/var/local/zacks/estimates/" (~t (today) "yyyy-MM-dd") "/" symbol ".detailed-estimates.html")
16 | (λ (out) (with-handlers ([exn:fail?
17 | (λ (error)
18 | (displayln (string-append "Encountered error for " symbol))
19 | (displayln error))])
20 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/detailed-earning-estimates")
21 | (get _)
22 | (response-body _)
23 | (write-bytes _ out))))
24 | #:exists 'replace))
25 |
26 | (define db-user (make-parameter "user"))
27 |
28 | (define db-name (make-parameter "local"))
29 |
30 | (define db-pass (make-parameter ""))
31 |
32 | (define first-symbol (make-parameter ""))
33 |
34 | (define last-symbol (make-parameter ""))
35 |
36 | (command-line
37 | #:program "racket estimate-extract.2023-02-27.rkt"
38 | #:once-each
39 | [("-f" "--first-symbol") first
40 | "First symbol to query. Defaults to nothing"
41 | (first-symbol first)]
42 | [("-l" "--last-symbol") last
43 | "Last symbol to query. Defaults to nothing"
44 | (last-symbol last)]
45 | [("-n" "--db-name") name
46 | "Database name. Defaults to 'local'"
47 | (db-name name)]
48 | [("-p" "--db-pass") password
49 | "Database password"
50 | (db-pass password)]
51 | [("-u" "--db-user") user
52 | "Database user name. Defaults to 'user'"
53 | (db-user user)])
54 |
55 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
56 |
57 | (define symbols (query-list dbc "
58 | select
59 | act_symbol
60 | from
61 | nasdaq.symbol
62 | where
63 | is_etf = false and
64 | is_test_issue = false and
65 | is_next_shares = false and
66 | security_name !~ 'ETN' and
67 | nasdaq_symbol !~ '[-\\$\\+\\*#!@%\\^=~]' and
68 | case when nasdaq_symbol ~ '[A-Z]{4}[L-Z]'
69 | then security_name !~ '(Note|Preferred|Right|Unit|Warrant)'
70 | else true
71 | end and
72 | last_seen = (select max(last_seen) from nasdaq.symbol) and
73 | case when $1 != ''
74 | then act_symbol >= $1
75 | else true
76 | end and
77 | case when $2 != ''
78 | then act_symbol <= $2
79 | else true
80 | end
81 | order by
82 | act_symbol;
83 | "
84 | (first-symbol)
85 | (last-symbol)))
86 |
87 | (disconnect dbc)
88 |
89 | (define delay-interval 10)
90 |
91 | (define delays (map (λ (x) (* delay-interval x)) (range 0 (length symbols))))
92 |
93 | (with-task-server (for-each (λ (l) (schedule-delayed-task (λ () (thread (λ () (download-estimates (first l)))))
94 | (second l)))
95 | (map list symbols delays))
96 | ; add a final task that will halt the task server
97 | (schedule-delayed-task (λ () (schedule-stop-task)) (* delay-interval (length delays)))
98 | (run-tasks))
99 |
--------------------------------------------------------------------------------
/dump-dolt-earnings-calendar.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | racket/cmdline
6 | racket/string
7 | racket/system)
8 |
9 | (define base-folder (make-parameter "/var/local/dolt/earnings"))
10 |
11 | (define as-of-date (make-parameter (~t (today) "yyyy-MM-dd")))
12 |
13 | (define db-user (make-parameter "user"))
14 |
15 | (define db-name (make-parameter "local"))
16 |
17 | (define db-pass (make-parameter ""))
18 |
19 | (command-line
20 | #:program "racket dump-dolt-calendar.rkt"
21 | #:once-each
22 | [("-b" "--base-folder") folder
23 | "Base dolt folder. Defaults to /var/local/dolt/earnings"
24 | (base-folder folder)]
25 | [("-d" "--date") date
26 | "Final date for history retrieval. Defaults to today"
27 | (as-of-date date)]
28 | [("-n" "--db-name") name
29 | "Database name. Defaults to 'local'"
30 | (db-name name)]
31 | [("-p" "--db-pass") password
32 | "Database password"
33 | (db-pass password)]
34 | [("-u" "--db-user") user
35 | "Database user name. Defaults to 'user'"
36 | (db-user user)])
37 |
38 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
39 |
40 | ; earnings-calendar
41 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt sql -q \"delete from earnings_calendar where date >= date_sub('" (as-of-date) "', interval 7 day)\";"))
42 |
43 | (define earnings-calendar-file (string-append (base-folder) "/earnings-calendar-" (as-of-date) ".csv"))
44 |
45 | (call-with-output-file* earnings-calendar-file
46 | (λ (out)
47 | (displayln "act_symbol,date,when" out)
48 | (for-each (λ (row)
49 | (displayln (string-join (vector->list row) ",") out))
50 | (query-rows dbc "
51 | select
52 | act_symbol::text,
53 | date::text,
54 | coalesce(\"when\"::text, '')
55 | from
56 | zacks.earnings_calendar
57 | where
58 | date >= $1::text::date - '7 days'::interval;
59 | "
60 | (as-of-date))))
61 | #:exists 'replace)
62 |
63 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt sql -q \"
64 | with ecm (act_symbol, max_date, bsa_date) as (
65 | select
66 | ec.act_symbol,
67 | max(ec.date),
68 | bsa.date
69 | from
70 | earnings_calendar ec
71 | join
72 | (select distinct
73 | act_symbol,
74 | date
75 | from
76 | balance_sheet_assets bsa
77 | union
78 | (select
79 | act_symbol,
80 | date_sub(date_add(date_add(max(date), interval 1 day), interval 3 month), interval 1 day)
81 | from
82 | balance_sheet_assets
83 | group by
84 | act_symbol)
85 | order by
86 | act_symbol,
87 | date) bsa
88 | on
89 | ec.act_symbol = bsa.act_symbol and
90 | ec.date > bsa.date and
91 | ec.date <= date_sub(date_add(date_add(bsa.date, interval 1 day), interval 3 month), interval 1 day)
92 | group by
93 | ec.act_symbol,
94 | bsa.date
95 | )
96 | delete
97 | ec
98 | from
99 | earnings_calendar ec
100 | join
101 | ecm
102 | where
103 | ec.act_symbol = ecm.act_symbol and
104 | ec.date != ecm.max_date and
105 | ec.date > ecm.bsa_date and
106 | ec.date <= date_sub(date_add(date_add(ecm.bsa_date, interval 1 day), interval 3 month), interval 1 day);
107 | \""))
108 |
109 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt table import -u --continue earnings_calendar earnings-calendar-" (as-of-date) ".csv"))
110 |
111 | (system (string-append "cd " (base-folder) "; /usr/local/bin/dolt add earnings_calendar; "
112 | "/usr/local/bin/dolt commit -m 'earnings_calendar " (as-of-date) " update'; /usr/local/bin/dolt push --silent"))
113 |
--------------------------------------------------------------------------------
/dump-dat.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | racket/cmdline
6 | racket/vector)
7 |
8 | (define start-date (make-parameter (~t (today) "yyyy-MM-dd")))
9 |
10 | (define end-date (make-parameter (~t (today) "yyyy-MM-dd")))
11 |
12 | (define db-user (make-parameter "user"))
13 |
14 | (define db-name (make-parameter "local"))
15 |
16 | (define db-pass (make-parameter ""))
17 |
18 | (command-line
19 | #:program "racket dump-dat.rkt"
20 | #:once-each
21 | [("-e" "--end-date") end
22 | "Final date for history retrieval. Defaults to today"
23 | (end-date end)]
24 | [("-n" "--db-name") name
25 | "Database name. Defaults to 'local'"
26 | (db-name name)]
27 | [("-p" "--db-pass") password
28 | "Database password"
29 | (db-pass password)]
30 | [("-s" "--start-date") start
31 | "Earliest date for history retrieval. Defaults to today"
32 | (start-date start)]
33 | [("-u" "--db-user") user
34 | "Database user name. Defaults to 'user'"
35 | (db-user user)])
36 |
37 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
38 |
39 | (define (vector->csv-line vec)
40 | (if (= 1 (vector-length vec))
41 | (vector-ref vec 0)
42 | (string-append (vector-ref vec 0) "," (vector->csv-line (vector-drop vec 1)))))
43 |
44 | (for-each (λ (date)
45 | (call-with-output-file (string-append "/var/local/dat/zacks/eps-estimate/" date ".csv")
46 | (λ (out)
47 | (displayln "act_symbol,date,period,period_end_date,consensus,recent,count,high,low,year_ago" out)
48 | (for-each (λ (row)
49 | (displayln (vector->csv-line row) out))
50 | (query-rows dbc "
51 | select
52 | act_symbol::text,
53 | date::text,
54 | period::text,
55 | period_end_date::text,
56 | consensus::text,
57 | recent::text,
58 | count::text,
59 | high::text,
60 | low::text,
61 | year_ago::text
62 | from
63 | zacks.eps_estimate
64 | where
65 | date = $1::text::date and
66 | consensus is not null and
67 | recent is not null and
68 | count is not null and
69 | high is not null and
70 | low is not null and
71 | year_ago is not null
72 | order by
73 | act_symbol, date, period, period_end_date
74 | "
75 | date)))
76 | #:exists 'replace))
77 | (query-list dbc "
78 | select distinct
79 | date::text
80 | from
81 | zacks.eps_estimate
82 | where
83 | date >= $1::text::date and
84 | date <= $2::text::date
85 | order by
86 | date;
87 | "
88 | (start-date)
89 | (end-date)))
90 |
91 | (for-each (λ (date)
92 | (call-with-output-file (string-append "/var/local/dat/zacks/sales-estimate/" date ".csv")
93 | (λ (out)
94 | (displayln "act_symbol,date,period,period_end_date,consensus,count,high,low,year_ago" out)
95 | (for-each (λ (row)
96 | (displayln (vector->csv-line row) out))
97 | (query-rows dbc "
98 | select
99 | act_symbol::text,
100 | date::text,
101 | period::text,
102 | period_end_date::text,
103 | consensus::text,
104 | count::text,
105 | high::text,
106 | low::text,
107 | year_ago::text
108 | from
109 | zacks.sales_estimate
110 | where
111 | date = $1::text::date and
112 | consensus is not null and
113 | count is not null and
114 | high is not null and
115 | low is not null and
116 | year_ago is not null
117 | order by
118 | act_symbol, date, period, period_end_date
119 | "
120 | date)))
121 | #:exists 'replace))
122 | (query-list dbc "
123 | select distinct
124 | date::text
125 | from
126 | zacks.sales_estimate
127 | where
128 | date >= $1::text::date and
129 | date <= $2::text::date
130 | order by
131 | date;
132 | "
133 | (start-date)
134 | (end-date)))
135 |
136 |
--------------------------------------------------------------------------------
/dividend-calendar-transform-load.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | json
6 | racket/cmdline
7 | racket/list
8 | racket/port
9 | racket/sequence
10 | racket/string
11 | threading)
12 |
13 | (define base-folder (make-parameter "/var/local/zacks/dividend-calendar"))
14 |
15 | (define folder-date (make-parameter (today)))
16 |
17 | (define db-user (make-parameter "user"))
18 |
19 | (define db-name (make-parameter "local"))
20 |
21 | (define db-pass (make-parameter ""))
22 |
23 | (command-line
24 | #:program "racket transform-load.rkt"
25 | #:once-each
26 | [("-b" "--base-folder") folder
27 | "Dividend Calendar base folder. Defaults to /var/local/zacks/dividend-calendar"
28 | (base-folder folder)]
29 | [("-d" "--folder-date") date
30 | "Dividend Calendar folder date. Defaults to today"
31 | (folder-date (iso8601->date date))]
32 | [("-n" "--db-name") name
33 | "Database name. Defaults to 'local'"
34 | (db-name name)]
35 | [("-p" "--db-pass") password
36 | "Database password"
37 | (db-pass password)]
38 | [("-u" "--db-user") user
39 | "Database user name. Defaults to 'user'"
40 | (db-user user)])
41 |
42 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
43 |
44 | ; we clean up the future part of the table in case dividend dates have been shifted
45 | (query-exec dbc "
46 | delete from
47 | zacks.dividend_calendar
48 | where
49 | ex_date >= $1::text::date;
50 | "
51 | (~t (folder-date) "yyyy-MM-dd"))
52 |
53 | (parameterize ([current-directory (string-append (base-folder) "/" (~t (folder-date) "yyyy-MM-dd") "/")])
54 | (for ([p (sequence-filter (λ (p) (string-contains? (path->string p) ".json")) (in-directory (current-directory)))])
55 | (let* ([file-name (path->string p)]
56 | [date-of-dividend (string-replace (string-replace file-name (path->string (current-directory)) "") ".json" "")])
57 | (call-with-input-file file-name
58 | (λ (in)
59 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to parse "
60 | file-name
61 | " for date "
62 | date-of-dividend))
63 | (displayln e))])
64 | (~> (port->string in)
65 | (regexp-replace* #rx"<.*?>" _ "")
66 | (regexp-replace* #rx"[A-Z\\.]+ Quick Quote" _ "")
67 | (string-replace _ "window.app_data = " "")
68 | (string->jsexpr _)
69 | (hash-ref _ 'data)
70 | (for-each (λ (ticker-div-list)
71 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to insert "
72 | (first ticker-div-list)
73 | " for date "
74 | date-of-dividend))
75 | (displayln e)
76 | (rollback-transaction dbc))])
77 | (start-transaction dbc)
78 | ; if we have a record from last week for this symbol, move it forward
79 | (query-exec dbc "
80 | delete from
81 | zacks.dividend_calendar
82 | where
83 | act_symbol = $1 and
84 | ex_date >= $2::text::date - '7 days'::interval
85 | "
86 | (first ticker-div-list)
87 | (~t (folder-date) "yyyy-MM-dd"))
88 | (query-exec dbc "
89 | insert into zacks.dividend_calendar (
90 | act_symbol,
91 | ex_date,
92 | amount,
93 | payable_date
94 | ) values (
95 | $1,
96 | $2::text::date,
97 | $3::text::decimal,
98 | case
99 | when $4 = '--' then null
100 | else $4::text::date
101 | end
102 | ) on conflict do nothing;
103 | "
104 | (first ticker-div-list)
105 | (sixth ticker-div-list)
106 | (string-replace (fourth ticker-div-list) "$" "")
107 | (eighth ticker-div-list))
108 | (commit-transaction dbc))) _))))))))
109 |
110 | ; vacuum (garbage collect) and reindex table as we deleted from it earlier
111 | (query-exec dbc "
112 | vacuum full freeze analyze zacks.dividend_calendar;
113 | ")
114 |
115 | (query-exec dbc "
116 | reindex table zacks.dividend_calendar;
117 | ")
118 |
119 | (disconnect dbc)
120 |
--------------------------------------------------------------------------------
/financial-statement-extract.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | net/http-easy
6 | racket/cmdline
7 | racket/file
8 | racket/list
9 | racket/port
10 | tasks
11 | threading)
12 |
13 | (define (download-income-statement symbol)
14 | (make-directory* (string-append "/var/local/zacks/income-statement/" (~t (today) "yyyy-MM-dd")))
15 | (with-handlers ([exn:fail?
16 | (λ (error)
17 | (displayln (string-append "Encountered error for " symbol))
18 | (displayln error))])
19 | (call-with-output-file* (string-append "/var/local/zacks/income-statement/" (~t (today) "yyyy-MM-dd") "/" symbol ".income-statement.html")
20 | (λ (out)
21 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/income-statement")
22 | (get _)
23 | (response-body _)
24 | (write-bytes _ out)))
25 | #:exists 'replace)))
26 |
27 | (define (download-balance-sheet symbol)
28 | (make-directory* (string-append "/var/local/zacks/balance-sheet/" (~t (today) "yyyy-MM-dd")))
29 | (with-handlers ([exn:fail?
30 | (λ (error)
31 | (displayln (string-append "Encountered error for " symbol))
32 | (displayln error))])
33 | (call-with-output-file* (string-append "/var/local/zacks/balance-sheet/" (~t (today) "yyyy-MM-dd") "/" symbol ".balance-sheet.html")
34 | (λ (out)
35 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/balance-sheet")
36 | (get _)
37 | (response-body _)
38 | (write-bytes _ out)))
39 | #:exists 'replace)))
40 |
41 | (define (download-cash-flow-statement symbol)
42 | (make-directory* (string-append "/var/local/zacks/cash-flow-statement/" (~t (today) "yyyy-MM-dd")))
43 | (with-handlers ([exn:fail?
44 | (λ (error)
45 | (displayln (string-append "Encountered error for " symbol))
46 | (displayln error))])
47 | (call-with-output-file* (string-append "/var/local/zacks/cash-flow-statement/" (~t (today) "yyyy-MM-dd") "/" symbol ".cash-flow-statement.html")
48 | (λ (out)
49 | (~> (string-append "https://www.zacks.com/stock/quote/" symbol "/cash-flow-statements")
50 | (get _)
51 | (response-body _)
52 | (write-bytes _ out)))
53 | #:exists 'replace)))
54 |
55 | (define db-user (make-parameter "user"))
56 |
57 | (define db-name (make-parameter "local"))
58 |
59 | (define db-pass (make-parameter ""))
60 |
61 | (define first-symbol (make-parameter ""))
62 |
63 | (define last-symbol (make-parameter ""))
64 |
65 | (command-line
66 | #:program "racket financial-statement-extract.rkt"
67 | #:once-each
68 | [("-f" "--first-symbol") first
69 | "First symbol to query. Defaults to nothing"
70 | (first-symbol first)]
71 | [("-l" "--last-symbol") last
72 | "Last symbol to query. Defaults to nothing"
73 | (last-symbol last)]
74 | [("-n" "--db-name") name
75 | "Database name. Defaults to 'local'"
76 | (db-name name)]
77 | [("-p" "--db-pass") password
78 | "Database password"
79 | (db-pass password)]
80 | [("-u" "--db-user") user
81 | "Database user name. Defaults to 'user'"
82 | (db-user user)])
83 |
84 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
85 |
86 | (define symbols (query-list dbc "
87 | select
88 | act_symbol
89 | from
90 | nasdaq.symbol
91 | where
92 | is_etf = false and
93 | is_test_issue = false and
94 | is_next_shares = false and
95 | security_name !~ 'ETN' and
96 | nasdaq_symbol !~ '[-\\$\\+\\*#!@%\\^=~]' and
97 | case when nasdaq_symbol ~ '[A-Z]{4}[L-Z]'
98 | then security_name !~ '(Note|Preferred|Right|Unit|Warrant)'
99 | else true
100 | end and
101 | last_seen = (select max(last_seen) from nasdaq.symbol) and
102 | case when $1 != ''
103 | then act_symbol >= $1
104 | else true
105 | end and
106 | case when $2 != ''
107 | then act_symbol <= $2
108 | else true
109 | end
110 | order by
111 | act_symbol;
112 | "
113 | (first-symbol)
114 | (last-symbol)))
115 |
116 | (disconnect dbc)
117 |
118 | (define delay-interval 12)
119 |
120 | (define delays (map (λ (x) (* delay-interval x)) (range 0 (length symbols))))
121 |
122 | (with-task-server (for-each (λ (l) (schedule-delayed-task (λ () (thread (λ () (download-income-statement (first l)))))
123 | (second l))
124 | (schedule-delayed-task (λ () (thread (λ () (download-balance-sheet (first l)))))
125 | (+ 4 (second l)))
126 | (schedule-delayed-task (λ () (thread (λ () (download-cash-flow-statement (first l)))))
127 | (+ 8 (second l))))
128 | (map list symbols delays))
129 | ; add a final task that will halt the task server
130 | (schedule-delayed-task (λ () (schedule-stop-task)) (* delay-interval (length delays)))
131 | (run-tasks))
132 |
--------------------------------------------------------------------------------
/earnings-calendar-transform-load.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require db
4 | gregor
5 | json
6 | racket/cmdline
7 | racket/list
8 | racket/port
9 | racket/sequence
10 | racket/string
11 | threading)
12 |
13 | (define base-folder (make-parameter "/var/local/zacks/earnings-calendar"))
14 |
15 | (define folder-date (make-parameter (today)))
16 |
17 | (define db-user (make-parameter "user"))
18 |
19 | (define db-name (make-parameter "local"))
20 |
21 | (define db-pass (make-parameter ""))
22 |
23 | (command-line
24 | #:program "racket transform-load.rkt"
25 | #:once-each
26 | [("-b" "--base-folder") folder
27 | "Earnings Calendar base folder. Defaults to /var/local/zacks/earnings-calendar"
28 | (base-folder folder)]
29 | [("-d" "--folder-date") date
30 | "Earnings Calendar folder date. Defaults to today"
31 | (folder-date (iso8601->date date))]
32 | [("-n" "--db-name") name
33 | "Database name. Defaults to 'local'"
34 | (db-name name)]
35 | [("-p" "--db-pass") password
36 | "Database password"
37 | (db-pass password)]
38 | [("-u" "--db-user") user
39 | "Database user name. Defaults to 'user'"
40 | (db-user user)])
41 |
42 | (define dbc (postgresql-connect #:user (db-user) #:database (db-name) #:password (db-pass)))
43 |
44 | ; we clean up the future part of the table in case earnings dates have been shifted
45 | (query-exec dbc "
46 | delete from
47 | zacks.earnings_calendar
48 | where
49 | date >= $1::text::date;
50 | "
51 | (~t (folder-date) "yyyy-MM-dd"))
52 |
53 | (parameterize ([current-directory (string-append (base-folder) "/" (~t (folder-date) "yyyy-MM-dd") "/")])
54 | (for ([p (sequence-filter (λ (p) (string-contains? (path->string p) ".json")) (in-directory (current-directory)))])
55 | (let* ([file-name (path->string p)]
56 | [date-of-earnings (string-replace (string-replace file-name (path->string (current-directory)) "") ".json" "")])
57 | (call-with-input-file file-name
58 | (λ (in)
59 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to parse "
60 | file-name
61 | " for date "
62 | date-of-earnings))
63 | (displayln e))])
64 | (~> (port->string in)
65 | (regexp-replace* #rx"<.*?>" _ "")
66 | (regexp-replace* #rx"[A-Z\\.]+ Quick Quote" _ "")
67 | (string-replace _ "window.app_data = " "")
68 | (string->jsexpr _)
69 | (hash-ref _ 'data)
70 | (for-each (λ (ticker-when-list)
71 | (with-handlers ([exn:fail? (λ (e) (displayln (string-append "Failed to insert "
72 | (first ticker-when-list)
73 | " for date "
74 | date-of-earnings))
75 | (displayln e)
76 | (rollback-transaction dbc))])
77 | (start-transaction dbc)
78 | ; if we have a record from last week for this symbol, move it forward
79 | (query-exec dbc "
80 | delete from
81 | zacks.earnings_calendar
82 | where
83 | act_symbol = $1 and
84 | date >= $2::text::date - '7 days'::interval
85 | "
86 | (first ticker-when-list)
87 | (~t (folder-date) "yyyy-MM-dd"))
88 | (query-exec dbc "
89 | insert into zacks.earnings_calendar (
90 | act_symbol,
91 | date,
92 | \"when\"
93 | ) values (
94 | $1,
95 | $2::text::date,
96 | case $3
97 | when 'amc' then 'After market close'::zacks.when
98 | when 'bmo' then 'Before market open'::zacks.when
99 | when '--' then NULL
100 | end
101 | ) on conflict do nothing;
102 | "
103 | (first ticker-when-list)
104 | date-of-earnings
105 | (fourth ticker-when-list))
106 | (commit-transaction dbc))) _))))))))
107 |
108 | ; remove estimated dates when the estimate moves
109 | (query-exec dbc "
110 | delete from
111 | zacks.earnings_calendar ec
112 | using
113 | (select
114 | ec.act_symbol,
115 | max(ec.date) as max_date,
116 | bsa.date as bsa_date
117 | from
118 | zacks.earnings_calendar ec
119 | join
120 | (select distinct
121 | act_symbol,
122 | date
123 | from
124 | zacks.balance_sheet_assets bsa
125 | union
126 | (select
127 | act_symbol,
128 | (((max(date) + '1 day'::interval) + '3 months'::interval) - '1 day'::interval)::date
129 | from
130 | zacks.balance_sheet_assets
131 | group by
132 | act_symbol)
133 | order by
134 | act_symbol,
135 | date) bsa
136 | on
137 | ec.act_symbol = bsa.act_symbol and
138 | ec.date > bsa.date and
139 | ec.date <= ((bsa.date + '1 day'::interval) + '3 months'::interval) - '1 day'::interval
140 | group by
141 | ec.act_symbol,
142 | bsa.date) ecm
143 | where
144 | ec.act_symbol = ecm.act_symbol and
145 | ec.date != max_date and
146 | ec.date > bsa_date and
147 | ec.date <= ((bsa_date + '1 day'::interval) + '3 months'::interval) - '1 day'::interval;
148 | ")
149 |
150 | ; vacuum (garbage collect) and reindex table as we deleted from it earlier
151 | (query-exec dbc "
152 | vacuum full freeze analyze zacks.earnings_calendar;
153 | ")
154 |
155 | (query-exec dbc "
156 | reindex table zacks.earnings_calendar;
157 | ")
158 |
159 | (disconnect dbc)
160 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # zacks-estimates-financial-statements
2 |
3 | These Racket programs will download the Zacks "Detailed Estimates" and "Financials" HTML documents and insert the
4 | estimates/statement data into a PostgreSQL database. The intended usage is:
5 |
6 | ```bash
7 | $ racket estimate-extract.rkt
8 | $ racket estimate-transform-load.rkt
9 | ```
10 |
11 | ```bash
12 | $ racket financial-statement-extract.rkt
13 | $ racket balance-sheet-transform-load.rkt
14 | $ racket cash-flow-statement-transform-load.rkt
15 | $ racket income-statement-transform-load.rkt
16 | ```
17 |
18 | ```bash
19 | $ racket earnings-calendar-extract.rkt
20 | $ racket earnings-calendar-transform-load.rkt
21 | ```
22 |
23 | You will need to provide a database password for many of the above programs. The available parameters are:
24 |
25 | ```bash
26 | $ racket estimate-extract.rkt -h
27 | racket estimate-extract.rkt [