├── .gitignore ├── src ├── util │ ├── cursor.ts │ ├── file.ts │ ├── Sequence.ts │ ├── index.ts │ ├── string.ts │ ├── Printer.ts │ ├── assert.ts │ ├── BufferList.ts │ ├── array.ts │ ├── test.ts │ ├── event.ts │ ├── command.ts │ ├── time.ts │ └── counters.ts ├── database │ ├── KVStore.ts │ ├── index.ts │ ├── JsonFile.ts │ ├── ObjectStore.ts │ ├── CsvFile.ts │ ├── Tabular.ts │ └── FilesystemObjectStore.ts ├── app │ ├── crypto │ │ ├── rules.ts │ │ ├── setup.ts │ │ ├── config.ts │ │ ├── coins.ts │ │ ├── index.ts │ │ ├── streamer.ts │ │ ├── README.md │ │ ├── converseon.ts │ │ ├── streamerDB.ts │ │ ├── apiDB.ts │ │ ├── api.ts │ │ └── utils.ts │ ├── twidl │ │ ├── config.ts │ │ └── README.md │ └── vsa │ │ ├── rules.ts │ │ ├── index.ts │ │ ├── setup.ts │ │ ├── config.ts │ │ ├── api.ts │ │ ├── visua │ │ ├── test.ts │ │ ├── fakeserver.ts │ │ └── visua.ts │ │ ├── README.md │ │ └── streamer.ts ├── twitter │ ├── README.md │ ├── index.ts │ ├── TwitterBase.ts │ ├── test │ │ ├── messagedrop.ts │ │ ├── streamerror.ts │ │ ├── streamtimeout.ts │ │ ├── index.ts │ │ └── disconnect.ts │ ├── TwitterStreamInterface.ts │ ├── examples │ │ ├── simple.ts │ │ ├── groups.ts │ │ ├── tsv.ts │ │ ├── frequency.ts │ │ ├── longcount.ts │ │ └── backfill.ts │ ├── TwitterStreamProxyServer.ts │ ├── TwitterAccount.ts │ ├── TwitterDynamoDBTweetSentimentTable.ts │ ├── TwitterDynamoDBTweetTable.ts │ ├── Tweet.ts │ ├── FakeTwitterStream.ts │ ├── TwitterStreamGroups.ts │ ├── TwitterSearch.ts │ ├── TwitterStream.ts │ └── TwitterStreamer.ts └── http │ ├── examples │ ├── promisifiedclient.ts │ ├── client.ts │ ├── secureclient.ts │ ├── timeout.ts │ ├── pipelinedclient.ts │ ├── example-cert.pem │ ├── continuousclient.ts │ ├── server.ts │ ├── example-key.pem │ └── secureserver.ts │ ├── index.ts │ ├── HttpEndpoint.ts │ ├── HttpProtocol.ts │ ├── server │ ├── HttpProxy.ts │ ├── HttpServerCertificates.ts │ ├── HttpServer.ts │ └── HttpRouter.ts │ ├── client │ ├── HttpPromisifiedRequestPool.ts │ ├── HttpConnectionPool.ts │ ├── HttpRequestPool.ts │ └── HttpRequest.ts │ └── stream │ ├── HttpStream.ts │ └── HttpResilientStream.ts ├── tsconfig.json ├── CONTRIBUTING.md ├── package.json ├── README.md └── CODE_OF_CONDUCT.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | .vscode 4 | TODO 5 | build 6 | node_modules 7 | package-lock.json 8 | yarn.lock 9 | yarn-error.log 10 | .data -------------------------------------------------------------------------------- /src/util/cursor.ts: -------------------------------------------------------------------------------- 1 | export function hideCursor() { 2 | process.stdout.write('\x1b[?25l') 3 | } 4 | 5 | export function showCursor() { 6 | process.stdout.write('\x1b[?25h') 7 | } 8 | -------------------------------------------------------------------------------- /src/util/file.ts: -------------------------------------------------------------------------------- 1 | import { promises as fs } from 'fs' 2 | 3 | export async function exists(filename: string): Promise { 4 | try { 5 | await fs.access(filename) 6 | return true 7 | } catch { 8 | return false 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/database/KVStore.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj } from '../util' 5 | 6 | export interface KVStore { 7 | get(key: string): Promise 8 | set(key: string, value: Obj): Promise 9 | } 10 | -------------------------------------------------------------------------------- /src/database/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export * from './CsvFile' 5 | export * from './DynamoDB' 6 | export * from './FilesystemObjectStore' 7 | export * from './JsonFile' 8 | export * from './KVStore' 9 | export * from './ObjectStore' 10 | export * from './Tabular' 11 | -------------------------------------------------------------------------------- /src/app/crypto/rules.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export function createStreamRules(): Record { 5 | const rules: Record = { bitcoin: '(bitcoin OR btc) -cash -is:retweet' } 6 | return rules 7 | } 8 | 9 | export function createStreamProbabilities(): Record { 10 | return { bitcoin: 1 } 11 | } 12 | -------------------------------------------------------------------------------- /src/app/twidl/config.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export const PROGRESS_BAR_WIDTH = 20 5 | 6 | export const TWITTER_ACCOUNT = process.env.TWITTER_ACCOUNT || '' 7 | export const TWITTER_EMAIL = process.env.TWITTER_EMAIL || '' 8 | export const TWITTER_LABEL = process.env.TWITTER_LABEL 9 | export const TWITTER_PASSWORD = process.env.TWITTER_PASSWORD || '' 10 | -------------------------------------------------------------------------------- /src/twitter/README.md: -------------------------------------------------------------------------------- 1 | # Partial Twitter SDK 2 | 3 | This folder includes helper classes for accessing the 4 | [Twitter Premium v1.1 API](https://developer.twitter.com/en/docs/twitter-api/premium). This is far from a comprehensive 5 | SDK for this API and focuses on PowerTrack streaming support. 6 | 7 | For additional functionality, please look to Twitter's 8 | newer [v2 API](https://developer.twitter.com/en/docs/twitter-api). 9 | -------------------------------------------------------------------------------- /src/twitter/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export * from './FakeTwitterStream' 5 | export * from './Tweet' 6 | export * from './TwitterAccount' 7 | export * from './TwitterBase' 8 | export * from './TwitterDynamoDBTweetTable' 9 | export * from './TwitterSearch' 10 | export * from './TwitterStream' 11 | export * from './TwitterStreamer' 12 | export * from './TwitterStreamGroups' 13 | export * from './TwitterStreamInterface' 14 | export * from './TwitterStreamProxyServer' 15 | -------------------------------------------------------------------------------- /src/http/examples/promisifiedclient.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { HttpPromisifiedRequestPool } from '..' 5 | 6 | async function main() { 7 | const pool = new HttpPromisifiedRequestPool('https://localhost:3000') 8 | const responses = await Promise.all([ 9 | pool.GET('/'), 10 | pool.GET('/ping'), 11 | pool.GET('/wait/1000'), 12 | ]) 13 | pool.close() 14 | console.log(responses) 15 | } 16 | 17 | main().catch(e => { 18 | console.error(e) 19 | process.exit(1) 20 | }) 21 | -------------------------------------------------------------------------------- /src/util/Sequence.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { assertInteger } from './assert' 5 | 6 | export class Sequence { 7 | private sequence = 0 8 | 9 | constructor(private readonly limit = Number.MAX_SAFE_INTEGER) { 10 | assertInteger(limit, 1, Number.MAX_SAFE_INTEGER, 'Sequence limit') 11 | } 12 | 13 | public get next(): number { 14 | const seq = this.sequence 15 | if (++this.sequence === this.limit) { 16 | this.sequence = 0 17 | } 18 | return seq 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/util/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export * from './file' 5 | export * from './time' 6 | export * from './assert' 7 | export * from './cursor' 8 | export * from './string' 9 | export * from './command' 10 | export * from './Printer' 11 | export * from './counters' 12 | export * from './Sequence' 13 | export * from './BufferList' 14 | export * as event from './event' 15 | 16 | export type Obj = Record 17 | 18 | export function safe(func: () => T): T | undefined { 19 | try { 20 | return func() 21 | } catch {} 22 | } 23 | -------------------------------------------------------------------------------- /src/http/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export * from './client/HttpConnection' 5 | export * from './client/HttpPromisifiedRequestPool' 6 | export * from './client/HttpRequest' 7 | export * from './client/HttpRequestPool' 8 | export * from './HttpEndpoint' 9 | export * from './HttpProtocol' 10 | export * from './server/HttpProxy' 11 | export * from './server/HttpRouter' 12 | export * from './server/HttpServer' 13 | export * from './server/HttpServerCertificates' 14 | export * from './stream/HttpResilientStream' 15 | export * from './stream/HttpStream' 16 | -------------------------------------------------------------------------------- /src/util/string.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export function pad(content: any, length: number, chr: string): string { 5 | const str = String(content) 6 | const spaces = chr.substr(0, 1).repeat(Math.max(0, Math.abs(Math.round(length)) - str.length)) 7 | return length < 0 ? str + spaces : spaces + str 8 | } 9 | 10 | export function splitOnce(str: string, delimiter: string): [string, string] | undefined { 11 | const index = str.indexOf(delimiter) 12 | return 0 <= index ? [str.substr(0, index), str.substr(index + delimiter.length)] : undefined 13 | } 14 | -------------------------------------------------------------------------------- /src/twitter/TwitterBase.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { TwitterAccount } from './TwitterAccount' 5 | 6 | export interface TwitterBaseOptions { 7 | label?: string 8 | } 9 | 10 | export class TwitterBase { 11 | protected label: string 12 | 13 | constructor(protected account: TwitterAccount, options: TwitterBaseOptions = {}) { 14 | if (options.label !== undefined && typeof options.label !== 'string') { 15 | throw new Error(`Invalid Twitter label: ${options.label}`) 16 | } 17 | this.label = (options.label as string) || 'prod' 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowJs": true, 4 | "declaration": true, 5 | "downlevelIteration": true, 6 | "esModuleInterop": true, 7 | "experimentalDecorators": true, 8 | "lib":[ 9 | "dom", 10 | "es2019", 11 | "esnext.bigint" 12 | ], 13 | "module": "commonjs", 14 | "noImplicitAny": true, 15 | "outDir": "./build", 16 | "resolveJsonModule": true, 17 | "rootDir": "./src", 18 | "sourceMap": true, 19 | "strict": true, 20 | "target": "es2020" 21 | }, 22 | "include": [ 23 | "./src/**/*.ts", 24 | "./src/**/*.d.ts" 25 | ], 26 | "typeRoots": [ 27 | "./custom.d.ts", 28 | "./node_modules/@types" 29 | ] 30 | } -------------------------------------------------------------------------------- /src/util/Printer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { assertInteger } from './assert' 5 | 6 | export class Printer { 7 | private spacer = '' 8 | private lineCount = 0 9 | 10 | constructor(spacing = 8) { 11 | this.setSpacing(spacing) 12 | } 13 | 14 | public printLines(...lines: string[]): void { 15 | process.stdout.write('\x1b[A'.repeat(this.lineCount)) 16 | for (const line of lines) { 17 | console.log(line + this.spacer) 18 | } 19 | this.lineCount = lines.length 20 | } 21 | 22 | public setSpacing(spacing: number) { 23 | assertInteger(spacing, 0, 128, 'Spacing') 24 | this.spacer = ' '.repeat(spacing) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/http/examples/client.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { HttpConnection, HttpConnectionError, HttpTransaction } from '..' 5 | 6 | const con = new HttpConnection('localhost', { port: 3000 }) 7 | 8 | con.addResponseListener(({ response }: HttpTransaction) => { 9 | console.log('Received response:') 10 | console.log(response) 11 | process.exit(0) 12 | }) 13 | 14 | con.addErrorListener((error: HttpConnectionError) => { 15 | console.error('Http connection error. Terminating') 16 | console.log(error) 17 | process.exit(1) 18 | }) 19 | 20 | con.addReadyListener(() => { 21 | console.log('Http connection ready. Sending request') 22 | con.GET('/', undefined) 23 | }) 24 | -------------------------------------------------------------------------------- /src/app/crypto/setup.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { createStreamRules } from './rules' 6 | import { TwitterAccount, TwitterStream } from '../../twitter' 7 | 8 | export async function setTwitterStreamRules() { 9 | const twitterAccount = new TwitterAccount( 10 | config.TWITTER_ACCOUNT, 11 | config.TWITTER_EMAIL, 12 | config.TWITTER_PASSWORD, 13 | ) 14 | const twitterStream = new TwitterStream(twitterAccount) 15 | const rules = createStreamRules() 16 | console.log('Setting Twitter stream rules:') 17 | for (const rule in rules) { 18 | console.log(` ${rule}: ${rules[rule]}`) 19 | } 20 | await twitterStream.setStreamRules(rules) 21 | } 22 | -------------------------------------------------------------------------------- /src/http/examples/secureclient.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { HttpConnection, HttpConnectionError, HttpTransaction } from '..' 5 | 6 | const con = new HttpConnection('localhost', { port: 3000, useTLS: true }) 7 | 8 | con.addResponseListener(({ response }: HttpTransaction) => { 9 | console.log('Received response:') 10 | console.log(response) 11 | process.exit(0) 12 | }) 13 | 14 | con.addErrorListener((error: HttpConnectionError) => { 15 | console.error('Http connection error. Terminating') 16 | console.log(error) 17 | process.exit(1) 18 | }) 19 | 20 | con.addReadyListener(() => { 21 | console.log('Http connection ready. Sending request') 22 | con.GET('/', undefined) 23 | }) 24 | -------------------------------------------------------------------------------- /src/app/vsa/rules.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | 6 | export function createStreamRules(): Record { 7 | const rules: Record = { images: 'has:images -is:retweet' } 8 | for (const [brand, context] of Object.entries(config.BRANDS)) { 9 | rules[`@${brand}`] = `context:${context} -is:retweet` 10 | } 11 | return rules 12 | } 13 | 14 | export function createStreamProbabilities(): Record { 15 | const rules: Record = { images: Math.round(3 / config.VISUA_DETECTION_RATE) } 16 | for (const [brand, context] of Object.entries(config.BRANDS)) { 17 | rules[`@${brand}`] = 1 18 | } 19 | return rules 20 | } 21 | -------------------------------------------------------------------------------- /src/http/examples/timeout.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { HttpConnection, HttpConnectionError, HttpTransaction } from '..' 5 | 6 | const con = new HttpConnection('localhost', { port: 3000, activityTimeoutMs: 1000 }) 7 | 8 | con.addResponseListener(({ response }: HttpTransaction) => { 9 | console.log('Received response:') 10 | console.log(response) 11 | process.exit(0) 12 | }) 13 | 14 | con.addErrorListener((error: HttpConnectionError) => { 15 | console.error(`HTTP connection error${error.originalError ? ': ' + error.originalError : ''}`) 16 | process.exit(1) 17 | }) 18 | 19 | con.addReadyListener(() => { 20 | console.log('Http connection ready. Sending request') 21 | con.GET('/wait/2000', undefined) 22 | }) 23 | -------------------------------------------------------------------------------- /src/util/assert.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export function assert(condition: boolean, message: string): void { 5 | if (!condition) { 6 | throw new Error(message) 7 | } 8 | } 9 | 10 | export function assertInteger(num: number, min?: number, max?: number, label?: string): number { 11 | assert(typeof num === 'number', `${label ? `${label} n` : 'N'}ot a number: ${num}`) 12 | assert(Math.floor(num) === num, `${label ? `${label} n` : 'N'}ot an integer: ${num}`) 13 | if (min !== undefined) { 14 | assert(min <= num, `${label ? `${label} t` : 'T'}oo small: ${num} < ${min}`) 15 | } 16 | if (max !== undefined) { 17 | assert(num <= max, `${label ? `${label} t` : 'T'}oo large: ${max} < ${num}`) 18 | } 19 | return num 20 | } 21 | -------------------------------------------------------------------------------- /src/twitter/test/messagedrop.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { Tweet, TwitterAccount, TwitterStream } from '..' 6 | 7 | import { testSet } from '../../util/test' 8 | testSet('MESSAGE_TIMEOUT') 9 | 10 | const twitterAccount = new TwitterAccount( 11 | process.env.TWITTER_ACCOUNT, 12 | process.env.TWITTER_EMAIL, 13 | process.env.TWITTER_PASSWORD, 14 | ) 15 | 16 | export async function main() { 17 | const twitterStream = new TwitterStream(twitterAccount) 18 | await twitterStream.setStreamRules({ rule: 'has:images lang:en -is:retweet' }) 19 | twitterStream.addListener((tweet: Tweet) => console.log(tweet.id)) 20 | twitterStream.connect() 21 | 22 | await sleep(41000) 23 | 24 | console.log('Disconnecting stream') 25 | twitterStream.disconnect() 26 | } 27 | -------------------------------------------------------------------------------- /src/twitter/TwitterStreamInterface.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Tweet } from './Tweet' 5 | import { HttpStreamEventType } from '../http' 6 | import { EventListener, SimpleEventListener } from '../util/event' 7 | 8 | export interface StreamedTweet extends Tweet { 9 | rules: string[] 10 | } 11 | 12 | export type TwitterStreamEventType = HttpStreamEventType | 'tweet' 13 | 14 | export interface TwitterStreamInterface { 15 | addListener( 16 | typeOrListener: TwitterStreamEventType | SimpleEventListener, 17 | listener?: EventListener | SimpleEventListener, 18 | ): void 19 | 20 | connect(): void 21 | 22 | disconnect(): void 23 | 24 | setStreamRules(newRulesRecord: Record, force?: boolean): Promise 25 | } 26 | -------------------------------------------------------------------------------- /src/twitter/test/streamerror.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { Tweet, TwitterAccount, TwitterStream } from '..' 6 | 7 | import { testSet } from '../../util/test' 8 | testSet('STREAM_SERVER_ERROR') 9 | 10 | const twitterAccount = new TwitterAccount( 11 | process.env.TWITTER_ACCOUNT, 12 | process.env.TWITTER_EMAIL, 13 | process.env.TWITTER_PASSWORD, 14 | ) 15 | 16 | export async function main() { 17 | const twitterStream = new TwitterStream(twitterAccount) 18 | await twitterStream.setStreamRules({ rule: 'has:images lang:en -is:retweet' }) 19 | twitterStream.addListener((tweet: Tweet) => console.log(tweet.id)) 20 | twitterStream.connect() 21 | 22 | await sleep(7000) 23 | 24 | console.log('Disconnecting stream') 25 | twitterStream.disconnect() 26 | } 27 | -------------------------------------------------------------------------------- /src/twitter/test/streamtimeout.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { Tweet, TwitterAccount, TwitterStream } from '..' 6 | 7 | import { testSet } from '../../util/test' 8 | testSet('STREAM_SERVER_TIMEOUT') 9 | 10 | const twitterAccount = new TwitterAccount( 11 | process.env.TWITTER_ACCOUNT, 12 | process.env.TWITTER_EMAIL, 13 | process.env.TWITTER_PASSWORD, 14 | ) 15 | 16 | export async function main() { 17 | const twitterStream = new TwitterStream(twitterAccount) 18 | await twitterStream.setStreamRules({ rule: 'has:images lang:en -is:retweet' }) 19 | twitterStream.addListener((tweet: Tweet) => console.log(tweet.id)) 20 | twitterStream.connect() 21 | 22 | await sleep(13000) 23 | 24 | console.log('Disconnecting stream') 25 | twitterStream.disconnect() 26 | } 27 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to get patches from you! 4 | 5 | ## Getting Started 6 | 7 | We follow the [GitHub Flow Workflow](https://guides.github.com/introduction/flow/) 8 | 9 | 1. Fork the project 10 | 1. Check out the `main` branch 11 | 1. Create a feature branch 12 | 1. Write code and tests for your change 13 | 1. From your branch, make a pull request against `twitterdev/twitter-streaming-framework/main` 14 | 1. Work with repo maintainers to get your change reviewed 15 | 1. Wait for your change to be pulled into `twitterdev/twitter-streaming-framework/main` 16 | 1. Delete your feature branch 17 | 18 | ## License 19 | 20 | By contributing your code, you agree to license your contribution under the 21 | terms of the APLv2: https://github.com/twitterdev/twitter-streaming-framework/blob/main/LICENSE 22 | 23 | ## Code of Conduct 24 | 25 | Read our [Code of Conduct](CODE_OF_CONDUCT.md) for the project. 26 | -------------------------------------------------------------------------------- /src/twitter/examples/simple.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { Tweet, TwitterAccount, TwitterStream } from '..' 6 | 7 | async function main() { 8 | const twitterAccount = new TwitterAccount( 9 | process.env.TWITTER_ACCOUNT, 10 | process.env.TWITTER_EMAIL, 11 | process.env.TWITTER_PASSWORD, 12 | ) 13 | const twitterStream = new TwitterStream(twitterAccount) 14 | await twitterStream.setStreamRules({ streaming: 'streaming -is:retweet' }) 15 | 16 | twitterStream.addListener((tweet: Tweet) => { 17 | console.log(tweet.id, tweet.text.replace(/[\r\n]/g, '.')) 18 | }) 19 | 20 | twitterStream.connect() 21 | 22 | console.log('Streaming for 15 seconds...') 23 | await sleep(15000) 24 | 25 | twitterStream.disconnect() 26 | } 27 | 28 | main().catch(e => { 29 | console.error(e) 30 | process.exit(1) 31 | }) 32 | -------------------------------------------------------------------------------- /src/util/BufferList.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export class BufferList { 5 | private readonly buffers: Buffer[] 6 | private _length: number 7 | 8 | constructor(other?: BufferList) { 9 | this.buffers = other ? other.buffers : [] 10 | this._length = other ? other._length : 0 11 | } 12 | 13 | public append(buffer: Buffer): void { 14 | this.buffers.push(buffer) 15 | this._length += buffer.length 16 | } 17 | 18 | public appendList(other: BufferList): void { 19 | for (const buffer of other.buffers) { 20 | this.append(buffer) 21 | } 22 | } 23 | 24 | public get length(): number { 25 | return this._length 26 | } 27 | 28 | public toBuffer(): Buffer { 29 | return Buffer.concat(this.buffers) 30 | } 31 | 32 | public toString(encoding: BufferEncoding = 'utf-8'): string { 33 | return this.toBuffer().toString(encoding) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/http/HttpEndpoint.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { URL } from 'url' 5 | import { Agent } from 'https' 6 | 7 | export interface HttpEndpointOptions { 8 | rejectUnauthorized?: boolean 9 | } 10 | 11 | export class HttpEndpoint { 12 | public readonly agent?: Agent 13 | 14 | constructor(public readonly url: string, options: HttpEndpointOptions = {}) { 15 | if (typeof url !== 'string') { 16 | throw new Error(`HTTP URL must be a string: ${url}`) 17 | } 18 | const u = new URL(url) 19 | if (u.protocol !== 'https:' || !u.hostname) { 20 | throw new Error(`Invalid HTTPS URL: ${url}`) 21 | } 22 | 23 | if (options.rejectUnauthorized === false) { 24 | this.agent = new Agent({ 25 | host: u.hostname, 26 | port: parseInt(u.port || '443'), 27 | path: '/', 28 | rejectUnauthorized: false, 29 | }) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/twitter/test/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { testClear } from '../../util/test' 5 | 6 | const tests: Record | undefined = 7 | typeof process.env.TESTS === 'string' 8 | ? process.env.TESTS.split(',').reduce( 9 | (obj: Record, str: string) => { obj[str] = true; return obj }, 10 | {}, 11 | ) 12 | : undefined 13 | 14 | async function run(name: string) { 15 | if (!tests || tests[name]) { 16 | console.log(`\n\x1b[35m***** Starting test: ${name} *****\x1b[0m\n`) 17 | testClear() 18 | const { main } = require('./' + name) 19 | await main() 20 | } 21 | } 22 | 23 | ;(async () => { 24 | 25 | await run('disconnect') 26 | await run('messagedrop') 27 | await run('streamerror') 28 | await run('streamtimeout') 29 | 30 | })().catch(e => { 31 | console.error(e) 32 | console.error(JSON.stringify(e)) 33 | process.exit(1) 34 | }) 35 | -------------------------------------------------------------------------------- /src/http/HttpProtocol.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj } from '../util' 5 | 6 | export type HttpHeaders = Record 7 | 8 | export function httpStringifyHeaders(headers: HttpHeaders): string { 9 | return Object.keys(headers).map(key => `${key}: ${headers[key]}\r\n`).join('') 10 | } 11 | 12 | export type HttpMethod = 13 | | 'CONNECT' 14 | | 'DELETE' 15 | | 'GET' 16 | | 'HEAD' 17 | | 'OPTIONS' 18 | | 'PATCH' 19 | | 'POST' 20 | | 'PUT' 21 | | 'TRACE' 22 | 23 | export interface HttpRequest { 24 | method: HttpMethod 25 | path: string 26 | headers?: HttpHeaders 27 | body?: string 28 | } 29 | 30 | export interface HttpResponse { 31 | statusCode: number 32 | statusText: string 33 | responseHeaders: HttpHeaders 34 | responseText: string 35 | responseBody?: Obj 36 | } 37 | 38 | export interface HttpTransaction { 39 | request: HttpRequest 40 | response: HttpResponse 41 | } 42 | -------------------------------------------------------------------------------- /src/twitter/examples/groups.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { TweetGroup, TwitterAccount, TwitterStreamGroups } from '..' 6 | 7 | async function main() { 8 | const twitterAccount = new TwitterAccount( 9 | process.env.TWITTER_ACCOUNT, 10 | process.env.TWITTER_EMAIL, 11 | process.env.TWITTER_PASSWORD, 12 | ) 13 | const twitterStreamGroups = new TwitterStreamGroups(twitterAccount) 14 | twitterStreamGroups.setStreamRules({ images: 'has:images -is:retweet' }) 15 | 16 | twitterStreamGroups.addListener(({ groupId, tweets }: TweetGroup) => { 17 | console.log(`Group ${groupId}: ${tweets.length} Tweets`) 18 | }) 19 | 20 | twitterStreamGroups.connect() 21 | 22 | console.log('Streaming for 20 seconds...') 23 | await sleep(20000) 24 | 25 | twitterStreamGroups.disconnect() 26 | } 27 | 28 | main().catch(e => { 29 | console.error(e) 30 | process.exit(1) 31 | }) 32 | -------------------------------------------------------------------------------- /src/util/array.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | interface Array { 5 | random(): T 6 | remove(element: T): Array 7 | toDict(): Record 8 | uniq(): Array 9 | } 10 | 11 | Array.prototype.random = function random(): T | undefined { 12 | return this.length === 0 ? undefined : this[Math.floor(Math.random() * this.length)] 13 | } 14 | 15 | Array.prototype.remove = function remove(element: T): Array { 16 | const index = this.indexOf(element) 17 | if (index < 0) { 18 | throw new Error('Array element not found') 19 | } 20 | this.splice(index, 1) 21 | return this 22 | } 23 | 24 | Array.prototype.toDict = function toDict(): Record { 25 | return this.map(e => e.toString()).reduce((res, key)=> (res[key] = true, res), {} as Record) 26 | } 27 | 28 | Array.prototype.uniq = function uniq(): Array { 29 | return this.filter((element: any, index: number) => index === 0 || element !== this[index - 1]) 30 | } 31 | -------------------------------------------------------------------------------- /src/database/JsonFile.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { promises as fs } from 'fs' 5 | import { Obj, exists } from '../util' 6 | 7 | export class JsonFile { 8 | constructor(private readonly filename: string) { 9 | } 10 | 11 | private record(values: Obj): string { 12 | return JSON.stringify(values, null, ' ') 13 | } 14 | 15 | public async close(): Promise { 16 | await fs.appendFile(this.filename, '\n]') 17 | } 18 | 19 | public async open(): Promise { 20 | if (!(await exists(this.filename))) { 21 | await fs.writeFile(this.filename, '[\n') 22 | } 23 | } 24 | 25 | public async appendOne(values: Obj): Promise { 26 | const size = (await fs.stat(this.filename)).size 27 | const prefix = 2 < size ? ',\n' : '' 28 | await fs.appendFile(this.filename, prefix + this.record(values)) 29 | } 30 | 31 | public async appendArray(valuesArray: Obj[]): Promise { 32 | await fs.appendFile(this.filename, valuesArray.map(values => this.record(values)).join(',\n')) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/http/examples/pipelinedclient.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { pad } from '../../util' 5 | import { HttpConnection, HttpConnectionError, HttpTransaction } from '..' 6 | 7 | const con = new HttpConnection( 8 | 'https://localhost:3000', 9 | { defaultRequestHeaders: { 'content-type': 'text/plain' } }, 10 | ) 11 | 12 | con.addErrorListener((error: HttpConnectionError) => { 13 | console.error('Http connection error. Terminating') 14 | console.log(error) 15 | process.exit(1) 16 | }) 17 | 18 | let count = 10 19 | 20 | con.addResponseListener((tx: HttpTransaction) => { 21 | console.log('Received response:', tx.request.body, '=>', tx.response.responseText) 22 | if (--count === 0) { 23 | process.exit(0) 24 | } 25 | }) 26 | 27 | con.addReadyListener(() => { 28 | console.log('Http connection ready. Sending requests:') 29 | for (let i = 0; i < count; i++) { 30 | const req = pad(i, 2, '0') 31 | console.log('Sending request:', req) 32 | con.POST('/echo/3000', undefined, req) 33 | } 34 | console.log() 35 | }) 36 | -------------------------------------------------------------------------------- /src/twitter/TwitterStreamProxyServer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { TwitterAccount } from './TwitterAccount' 5 | import { createTwitterStreamEndpoint } from './TwitterStream' 6 | import { TwitterBase, TwitterBaseOptions } from './TwitterBase' 7 | import { HttpProxy, HttpServer, HttpServerOptions } from '../http' 8 | 9 | // 10 | // Twitter stream proxy server. Mostly used for testing diconnects. 11 | // 12 | export class TwitterStreamProxyServer extends TwitterBase { 13 | private server: HttpServer 14 | 15 | constructor( 16 | account: TwitterAccount, 17 | twitterOptions: TwitterBaseOptions = {}, 18 | serverOptions: HttpServerOptions = {}, 19 | ) { 20 | super(account, twitterOptions) 21 | const url = createTwitterStreamEndpoint(this.account, this.label) 22 | const proxy = new HttpProxy(url, this.account.auth) 23 | this.server = new HttpServer(proxy, serverOptions) 24 | } 25 | 26 | public start(): void { 27 | this.server.start() 28 | } 29 | 30 | public async stop(): Promise { 31 | return this.server.stop() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/database/ObjectStore.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export interface ObjectListing { 5 | bucketName: string 6 | objectName: string 7 | size: number 8 | timeCreated: number 9 | timeModified: number 10 | } 11 | 12 | export interface ObjectStore { 13 | doesObjectExist(bucketName: string, objectName: string): Promise 14 | getObjectInfo(bucketName: string, objectName: string): Promise 15 | getObject(bucketName: string, objectName: string): Promise 16 | listObjects(bucketName: string, options?: { namesOnly: boolean }): Promise 17 | putObject(bucketName: string, objectName: string, data: Buffer): Promise 18 | } 19 | 20 | function isValidName(name: string): boolean { 21 | return typeof name === 'string' && /^[.\w\-\:\_\$]+$/.test(name) 22 | } 23 | 24 | export function isValidBucketName(bucketName: string): boolean { 25 | return isValidName(bucketName) 26 | } 27 | 28 | export function isValidObjectName(objectName: string): boolean { 29 | return isValidName(objectName) 30 | } 31 | -------------------------------------------------------------------------------- /src/app/crypto/config.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export const API_MAX_RESPONSE_SIZE = 512 * 1024 5 | export const API_PORT = 4000 6 | export const BATCH_INTERVAL = 60000 7 | export const COIN_API_MAX_ATTEMPTS = 3 8 | export const EXPECTED_TWEET_RATE = 10 9 | export const HEARTBEAT_INTERVAL_MS = 1000 10 | export const OBJECT_STORE_BASE_PATH = '.' 11 | export const OBJECT_STORE_BUCKET_NAME = '.data' 12 | export const PRINT_COUNTERS_INTERVAL_MS: undefined /* never */ | 0 /* immediate */ | number = 500 13 | export const PRINT_COUNTERS_LEVEL = 'debug' 14 | 15 | export const CONVERSEON_API_KEY = process.env.CONVERSEON_API_KEY 16 | export const AWS_REGION = process.env.AWS_REGION 17 | export const AWS_DYNAMODB_ENDPOINT = process.env.AWS_DYNAMODB_ENDPOINT 18 | export const CRYPTO_SENTIMENT_TABLE_NAME = process.env.CRYPTO_SENTIMENT_TABLE_NAME || 'crypto-sentiment' 19 | 20 | export const TWITTER_ACCOUNT = process.env.TWITTER_ACCOUNT 21 | export const TWITTER_EMAIL = process.env.TWITTER_EMAIL 22 | export const TWITTER_PASSWORD = process.env.TWITTER_PASSWORD 23 | export const TWITTER_USE_FAKE_STREAM = false 24 | -------------------------------------------------------------------------------- /src/http/examples/example-cert.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIC9jCCAd4CCQDt8nzQjKCJRzANBgkqhkiG9w0BAQUFADA9MQswCQYDVQQGEwJ1 3 | czELMAkGA1UECAwCY2ExEjAQBgNVBAcMCXN1bm55dmFsZTENMAsGA1UECgwEYWNt 4 | ZTAeFw0yMTEwMDEyMTM4MzNaFw00OTAyMTUyMTM4MzNaMD0xCzAJBgNVBAYTAnVz 5 | MQswCQYDVQQIDAJjYTESMBAGA1UEBwwJc3Vubnl2YWxlMQ0wCwYDVQQKDARhY21l 6 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9ffyOA+z+t4tROOiehd7 7 | 48+Z2vA3+X0U6FkdWy+trmbU4dk2/5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13 8 | HTy4hOiS7CX/cBY4RrewWu0ezkFk3h6Eslc7GSirDWEQE9ar645HiSYt47Kszkdn 9 | R46YURPYwZEp2/lPnL8XNmalWJCVm+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOf 10 | hsX89mBYP/SDX7Yxwc/uWx0w+FkisaIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN 11 | 8ZhpNOMseTZyRqXnN5Ii7Qr74SrhYPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNX 12 | twIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQCnwVNmuaPKMXtIGdSzOtpONR2nHRj+ 13 | dux5z9T0IuEB4wqeTXFPClTVt5uI0yfq0L6bXwwiBGwDJsJaQOw03fDRx/BEsTRT 14 | YTX7QI7ipWcLS6yNWHCj0nS1KjqWRcXPL9DikPXxZwDmh2OAiuBGDIcI0YmAB0oa 15 | ywYe/5ABM60poF74izNe+mLDy0+Zqs3YijltoMzWcVnEgmZO352O1olgKUxrCbdZ 16 | yj94ML8zyAmLMEJz9nx8Sk1wJQA7/z4ZGKHbdMtZmqvnnetY1zKS6ilb6UCIuaJ/ 17 | EkhiKgh94g1X6McmizmQDZEOeF1572FLIMd/WFKzcAOLdjw2NDL/5Cd3 18 | -----END CERTIFICATE----- 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "twitter-streaming-framework", 3 | "version": "0.0.1", 4 | "description": "TypeScript/Node.js framework for processing Twitter data stream.", 5 | "scripts": { 6 | "build": "rimraf build && tsc -b", 7 | "copyright": "[ `find src -name \"*.[jt]s\" | wc -l` -eq `find src -name \"*.[jt]s\" -exec grep \"Copyright 2021 Twitter\" {} \\; | wc -l` ] || echo Error: Missing copyright header", 8 | "clean": "rimraf build node_modules package-lock.json", 9 | "watch": "rimraf build && tsc -b tsconfig.json -w", 10 | "crypto:start": "node build/app/crypto --stream --api", 11 | "crypto:start:db": "node build/app/crypto --streamdb --apidb" 12 | }, 13 | "author": "Avner Braverman", 14 | "license": "Apache-2.0", 15 | "devDependencies": { 16 | "@types/memory-cache": "^0.2.2", 17 | "@types/node": "^16.11.4", 18 | "@types/rimraf": "^3.0.2", 19 | "rimraf": "^3.0.2", 20 | "ts-node": "^10.4.0", 21 | "typescript": "^4.4.4" 22 | }, 23 | "engines": { 24 | "node": "16.x" 25 | }, 26 | "dependencies": { 27 | "@aws-sdk/client-dynamodb": "^3.39.0", 28 | "dotenv": "^16.0.0", 29 | "express": "^4.18.1", 30 | "memory-cache": "^0.2.0" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/app/vsa/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { ApiRouter } from './api' 5 | import * as config from './config' 6 | import { stream } from './streamer' 7 | import { HttpServer } from '../../http' 8 | import { counters, getCommandLineOptions } from '../../util' 9 | import { createDynamoDBTables, setTwitterStreamRules } from './setup' 10 | 11 | async function main(): Promise { 12 | const options = getCommandLineOptions({ 13 | api: 'Start API server', 14 | backfill: 'Backfill Tweets from last heartbeat', 15 | stream: 'Start streaming', 16 | setup: 'Setup streaming rules and create DynamoDB tables', 17 | }) 18 | 19 | if (options.api) { 20 | counters.monitor(config.PRINT_COUNTERS_INTERVAL_MS, config.PRINT_COUNTERS_LEVEL) 21 | const server = new HttpServer(new ApiRouter(), { port: config.API_PORT }) 22 | server.start() 23 | } else if (options.setup) { 24 | await setTwitterStreamRules() 25 | await createDynamoDBTables() 26 | } else if (options.stream) { 27 | stream(options.backfill !== undefined) 28 | } 29 | } 30 | 31 | if (typeof require === 'function' && require.main === module) { 32 | main().catch(e => { console.error(e); process.exit(1) }) 33 | } 34 | -------------------------------------------------------------------------------- /src/http/examples/continuousclient.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Printer } from '../../util' 5 | import { HttpRequestPool, HttpRequestPoolResponse } from '..' 6 | 7 | const CONNECTION_COUNT = 10 8 | const SERVER_LATENCY_MS = 43 9 | 10 | const pool = new HttpRequestPool('https://localhost:3000', { connectionCount: CONNECTION_COUNT }) 11 | 12 | let count = 0 13 | const printer = new Printer(4) 14 | 15 | setInterval(() => { 16 | printer.printLines( 17 | `Active connections: ${pool.getConnectionsCount()}`, 18 | `Requests in flight: ${pool.getInflightCount()}`, 19 | `Requests per second: ${count}`, 20 | ) 21 | count = 0 22 | }, 1000) 23 | 24 | const sendRequest = () => pool.GET(`/wait/${SERVER_LATENCY_MS}`, undefined) 25 | 26 | pool.addResponseListener(({ response }: HttpRequestPoolResponse) => { 27 | if (!response) { 28 | console.error('Disconnected') 29 | process.exit(1) 30 | } 31 | if (response.statusCode !== 200) { 32 | console.error('Server error:', response.statusCode, response.statusText) 33 | process.exit(1) 34 | } 35 | count++ 36 | sendRequest() 37 | }) 38 | 39 | console.log(`Sending ${CONNECTION_COUNT} concurrent requests, each with ${SERVER_LATENCY_MS}ms added latency`) 40 | for (let i = 0; i < CONNECTION_COUNT; i++) { 41 | sendRequest() 42 | } 43 | -------------------------------------------------------------------------------- /src/http/examples/server.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Printer } from '../../util' 5 | import { 6 | HttpRouter, 7 | httpRouterMethod, 8 | HttpRouterRequest, 9 | HttpRouterResponse, 10 | HttpServer, 11 | } from '..' 12 | 13 | let count = 0 14 | const printer = new Printer(4) 15 | setInterval(() => { 16 | printer.printLines(`Serving ${count} requests per second`) 17 | count = 0 18 | }, 1000) 19 | 20 | class Server extends HttpRouter { 21 | @httpRouterMethod('POST', /^\/echo(\/(\d{1,4}))?\/?$/) 22 | public echo(req: HttpRouterRequest, res: HttpRouterResponse) { 23 | const wait = req.params![1] 24 | if (wait === undefined) { 25 | return [200, req.body] 26 | } 27 | setTimeout(() => res.respond(200, req.body), parseInt(wait)) 28 | } 29 | 30 | @httpRouterMethod('GET', '/') 31 | public index() { 32 | count++ 33 | return [200, 'Hello, HTTP!\n'] 34 | } 35 | 36 | @httpRouterMethod('GET') 37 | public ping() { 38 | count++ 39 | return [200, { oops: 'pong' }] 40 | } 41 | 42 | @httpRouterMethod('GET', /^\/wait\/(\d{1,4})\/?$/) 43 | public wait(req: HttpRouterRequest, res: HttpRouterResponse) { 44 | count++ 45 | setTimeout(() => res.respond(200, 'Wait is over'), parseInt(req.params![0])) 46 | } 47 | } 48 | 49 | new HttpServer(new Server(), { port: 3000 }).start() 50 | -------------------------------------------------------------------------------- /src/app/vsa/setup.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { createStreamRules } from './rules' 6 | import { getDynamoDBClient, DynamoDBKVStore } from '../../database' 7 | import { TwitterAccount, TwitterDynamoDBTweetTable, TwitterStream } from '../../twitter' 8 | 9 | export async function createDynamoDBTables() { 10 | const client = getDynamoDBClient(config.AWS_REGION, config.AWS_DYNAMODB_ENDPOINT) 11 | 12 | const tweets = new TwitterDynamoDBTweetTable(client, config.TWEET_TABLE_NAME) 13 | console.log('Creating DynamoDB Tweets table:', tweets.tableName) 14 | console.log('DynamoDB Tweets table ARN:', await tweets.create()) 15 | 16 | const control = new DynamoDBKVStore(client, config.CONTROL_TABLE_NAME) 17 | console.log('Creating DynamoDB control table:', control.tableName) 18 | console.log('DynamoDB control table ARN:', await control.create()) 19 | } 20 | 21 | export async function setTwitterStreamRules() { 22 | const twitterAccount = new TwitterAccount( 23 | config.TWITTER_ACCOUNT, 24 | config.TWITTER_EMAIL, 25 | config.TWITTER_PASSWORD, 26 | ) 27 | const twitterStream = new TwitterStream(twitterAccount) 28 | const rules = createStreamRules() 29 | console.log('Setting Twitter stream rules:') 30 | for (const rule in rules) { 31 | console.log(` ${rule}: ${rules[rule]}`) 32 | } 33 | await twitterStream.setStreamRules(rules) 34 | } 35 | -------------------------------------------------------------------------------- /src/http/server/HttpProxy.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import net from 'net' 5 | import http from 'http' 6 | import https from 'https' 7 | import { Sequence } from '../../util' 8 | import { HttpHeaders } from '../HttpProtocol' 9 | import { HttpEndpoint } from '../HttpEndpoint' 10 | import { HttpServerHandler } from './HttpServer' 11 | 12 | export class HttpProxy implements HttpServerHandler { 13 | private requestOptions: Record 14 | private sockets: Record = {} 15 | private sequence = new Sequence() 16 | 17 | constructor(private readonly endpoint: HttpEndpoint, requestHeaders: HttpHeaders) { 18 | this.requestOptions = { headers: requestHeaders, ...(endpoint.agent ? { agent: endpoint.agent } : {}) } 19 | } 20 | 21 | public onConnection(socket: net.Socket): void { 22 | const sid = this.sequence.next 23 | this.sockets[sid] = socket 24 | socket.on('close', () => delete this.sockets[sid]) 25 | } 26 | 27 | public onRequest(clientReq: http.IncomingMessage, serverRes: http.ServerResponse): void { 28 | const req = https.request(this.endpoint.url, this.requestOptions, (res: http.IncomingMessage) => { 29 | res.pipe(serverRes, { end: true }) 30 | }) 31 | clientReq.pipe(req, { end: true }) 32 | } 33 | 34 | public onStop(): void { 35 | for (const sid in this.sockets) { 36 | this.sockets[sid].destroy() 37 | delete this.sockets[sid] 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/app/crypto/coins.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { assert } from '../../util' 6 | import { request } from '../../http' 7 | 8 | interface Ticker { 9 | converted_last?: { 10 | usd?: number 11 | } 12 | converted_volume?: { 13 | usd?: number 14 | } 15 | trust_score?: string 16 | } 17 | 18 | interface Transaction { 19 | usd: number 20 | volume: number 21 | } 22 | 23 | async function getLatestCoinToUSDRateOnce(coin: string): Promise { 24 | //const res = await request(`https://api.coingecko.com/api/v3/coins/${coin}/tickers`) 25 | const response = await request(`https://nomics.com/data/currencies-ticker\?filter\=any\&interval\=1d\"e-currency\=USD\&symbols\=BTC`) 26 | const res = JSON.parse(response as string); 27 | assert( 28 | typeof res === 'object' && Array.isArray(res.items) && 0 < res.items.length, 29 | `Empty response: ${JSON.stringify(res)}` 30 | ) 31 | const price = (res as any).items.find((item : {id: string; price: string}) => item.id === 'BTC').price; 32 | 33 | return Math.round(Number(price)) 34 | } 35 | 36 | export async function getLatestCoinToUSDRate(coin: string): Promise { 37 | let error 38 | for (let attempts = 0; attempts < config.COIN_API_MAX_ATTEMPTS; attempts++) { 39 | try { 40 | return getLatestCoinToUSDRateOnce(coin) 41 | } catch (err) { 42 | error = err 43 | } 44 | } 45 | throw new Error(`Error getting coin rate: ${error}`) 46 | } 47 | -------------------------------------------------------------------------------- /src/http/client/HttpPromisifiedRequestPool.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { HttpHeaders, HttpRequest, HttpResponse } from '../HttpProtocol' 5 | import { HttpRequestPool, HttpRequestPoolOptions, HttpRequestPoolResponse } from './HttpRequestPool' 6 | 7 | export interface HttpPromisifiedRequestPoolResponse { 8 | attempts: number 9 | elapsed: number 10 | request: HttpRequest 11 | response?: HttpResponse 12 | } 13 | 14 | type Resolver = (value: any) => void 15 | 16 | export class HttpPromisifiedRequestPool { 17 | private pool: HttpRequestPool 18 | 19 | constructor(public readonly host: string, options: HttpRequestPoolOptions = {}) { 20 | this.pool = new HttpRequestPool(host, options) 21 | this.pool.addResponseListener(({ userp, ...rest }: HttpRequestPoolResponse) => userp(rest)) 22 | } 23 | 24 | public close(): void { 25 | this.pool.close() 26 | } 27 | 28 | public async request(httpRequest: HttpRequest): Promise { 29 | return new Promise(resolve => this.pool.request({ ...httpRequest, userp: resolve })) 30 | } 31 | 32 | public GET(path: string, headers?: HttpHeaders): Promise { 33 | return this.request({ method: 'GET', path, headers }) 34 | } 35 | 36 | public POST(path: string, body?: string, headers?: HttpHeaders): Promise { 37 | return this.request({ method: 'POST', path, headers, body }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/database/CsvFile.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { promises as fs } from 'fs' 5 | import { Obj, exists } from '../util' 6 | import { Tabular, TabularOptions } from './Tabular' 7 | 8 | export class CsvFile extends Tabular { 9 | constructor(private readonly filename: string, fields: string[], options: TabularOptions = {}) { 10 | super(fields, options) 11 | } 12 | 13 | private record(values: Obj): string { 14 | this.validate(values) 15 | return this.originalFieldNames.map(field => this.escape(values[field])).join(',') 16 | } 17 | 18 | protected escape(value: any): string { 19 | if (value === undefined && this.allowEmptyFields) { 20 | return '' 21 | } 22 | const str = String(value).trim().replace(/\n/g, ' ') 23 | return str.includes('"') || str.includes(',') ? `"${str.replace(/"/g, '""')}"` : str 24 | } 25 | 26 | protected transformFieldName(originalFieldName: string): string { 27 | return this.escape(originalFieldName) 28 | } 29 | 30 | public async close(): Promise { 31 | } 32 | 33 | public async open(): Promise { 34 | if (!(await exists(this.filename))) { 35 | await fs.writeFile(this.filename, this.transformedFieldNames.join(',') + '\n') 36 | } 37 | } 38 | 39 | public async appendOne(values: Obj): Promise { 40 | await fs.appendFile(this.filename, this.record(values) + '\n') 41 | } 42 | 43 | public async appendArray(valuesArray: Obj[]): Promise { 44 | await fs.appendFile(this.filename, valuesArray.map(values => this.record(values)).join('\n') + '\n') 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/util/test.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj } from '.' 5 | 6 | let tests: Record = {} 7 | 8 | type Handler = () => void 9 | 10 | function generateTest(tester: (name: string) => (...args: any[]) => any): Obj { 11 | return new Proxy({}, { 12 | get: (target: any, name: string) => tester(name) 13 | }) 14 | } 15 | 16 | let testAfterCounters: Record> = {} 17 | 18 | export const testAfter = generateTest((name: string) => 19 | (tag: string, threshold: number, handler: Handler) => { 20 | if (tests[name]) { 21 | if (!testAfterCounters[name]) { 22 | testAfterCounters[name] = {} 23 | } 24 | if (testAfterCounters[name][tag] === undefined) { 25 | testAfterCounters[name][tag] = 0 26 | } 27 | if (threshold < ++testAfterCounters[name][tag]) { 28 | return handler() 29 | } 30 | } 31 | } 32 | ) 33 | 34 | let testOnceFlags: Record = {} 35 | 36 | export const testOnce = generateTest((name: string) => 37 | (handler: Handler) => { 38 | if (tests[name] && !testOnceFlags[name]) { 39 | testOnceFlags[name] = true 40 | return handler() 41 | } 42 | } 43 | ) 44 | 45 | export const testRun = generateTest((name: string) => 46 | (handler: Handler) => { 47 | if (tests[name]) { 48 | return handler() 49 | } 50 | } 51 | ) 52 | 53 | export function testClear() { 54 | tests = {} 55 | testAfterCounters = {} 56 | testOnceFlags = {} 57 | } 58 | 59 | export function testSet(name: string, enable = true) { 60 | tests[name] = enable 61 | } 62 | -------------------------------------------------------------------------------- /src/twitter/TwitterAccount.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | const emailRegex = new RegExp( 5 | '(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:' + 6 | '[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-' + 7 | '\\x09\\x0b\\x0c\\x0e-\\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+' + 8 | '[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|' + 9 | '[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|' + 10 | '[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]' + 11 | '|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])' 12 | ) 13 | 14 | // 15 | // Twitter v1.1 account information. 16 | // 17 | export class TwitterAccount { 18 | private _auth: { Authorization: string } 19 | private _name: string 20 | 21 | constructor(account?: string, email?: string, password?: string) { 22 | if (typeof account !== 'string' || account.trim().length === 0) { 23 | throw new Error(`Invalid Twitter account: ${account}`) 24 | } 25 | if (typeof email !== 'string' || !emailRegex.test(email)) { 26 | throw new Error(`Invalid email: ${email}`) 27 | } 28 | if (typeof password !== 'string' || password.trim().length === 0) { 29 | throw new Error(`Invalid Twitter password: ${password}`) 30 | } 31 | 32 | this._auth = { Authorization: `Basic ${Buffer.from(`${email}:${password}`).toString('base64')}` } 33 | this._name = account 34 | } 35 | 36 | public get auth(): { Authorization: string } { 37 | return this._auth 38 | } 39 | 40 | public get name(): string { 41 | return this._name 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/http/examples/example-key.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEowIBAAKCAQEA9ffyOA+z+t4tROOiehd748+Z2vA3+X0U6FkdWy+trmbU4dk2 3 | /5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13HTy4hOiS7CX/cBY4RrewWu0ezkFk 4 | 3h6Eslc7GSirDWEQE9ar645HiSYt47KszkdnR46YURPYwZEp2/lPnL8XNmalWJCV 5 | m+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOfhsX89mBYP/SDX7Yxwc/uWx0w+Fki 6 | saIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN8ZhpNOMseTZyRqXnN5Ii7Qr74Srh 7 | YPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNXtwIDAQABAoIBAAsJtm/3nG8Mm2F1 8 | CayK8z9U6KWflBN5HoASx7N430VtTe5YrhQoWekwxlVGCF+3Z358CbfOWEOkH+cj 9 | CFCwAYFpYSptuXIpfl4MUTgzNBHQhEpjOuCQ5AmQypEldw+hpHQPdSWb++/Wq8aF 10 | FSYopCTc7E7vIhIFrqg1dvIkzSjeudTZAiYnQ2vvgPsvnEO3YAqUo0AkwHl/bQeN 11 | VOC46aqMZAPf1Y5UmW3/0ua8HTuVbHk5QMZlWGbEPe2RbR3ILeGAvruIxSyhk9yO 12 | PXRsYUj5uLHAAoAPXXF5hsngHxkpY9VSOv1C3LhBL6HyNf2MmDhsOZglyU6rxgB4 13 | 2tf/FpkCgYEA/cQw0TyhVFlmdSkG7aN54ibnkkjnOr2onl95mUBZxea2hvozP8zZ 14 | BjCW2A9oo8z6HK6BJy1B0M8d9pUlUcHAMJ2eB/36tih2temytMbigyf2g0uvmUEP 15 | YLCaHBSSBKh2Y0CxzTJkqZEAh3PewiOOhhU9q66Rv+8Aifipx/FjOD0CgYEA+CIv 16 | SH4jPW9CtvGs45oZ9MaaZkhcjuDQ2R4iCSKW1cFOhQVUOnhRCu26vBiuJhwM9vwi 17 | 12yD/qJOm73wbN0iIoZTDRqoWCUur3mgIXw5HGe0uoV5MIEFbOkW/9nj6vk+a/9v 18 | CqaypGJT2Usd95BWHxlxPwfF9FtxW+czOdJMWwMCgYB0sJqiHHczCkkK5urAq8OI 19 | MsuZgNyTLlMzQEPyLJ0bW5PjTXnzhIbnScCTacJ8T+1S8wuAsFbrZdIpaTvX9Hgj 20 | 4tagZjG7QbAUxnnelvXhyaaZiVwd5MTleU/kSbE7YxvNWBpqeRnAv2S25JkyPJd1 21 | IJ9TKtrqn0RoLWglAOLXIQKBgCbWovQD2lw5WAXumhMeAcYQeAZeeS5b/hSd5NHt 22 | OhLHKRUlGmP0hSrivwHGEywf49+c4484iwiGOyuhdUp06mzg/Yrli0gQudf5f5j+ 23 | KqpJiT5QugFfkIvViCYP4t7amGyrFKRkJz4XrewrF8uyKejAQLuO6esvjPTHoXsB 24 | cbYlAoGBAJ7kP4V9koRS63AudjMqZa3BL9edM8yi/T/B51eKMltbxnd7Aa6e+nKj 25 | djA+bw2+54DG+ygZtQ2DNdM+VeKzBd0j2Q66egKBOm39psQaMMktxsFoW+MiYc1T 26 | Q9XAQiN0PQ/aPPvVfmY1z9LxvUvJLtsiXVpYhlgKEyQql/b0UJeY 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TWItter STreaming (TWIST) Framework 2 | 3 | The TWItter STreaming (TWIST) framework contains [Node.js](https://nodejs.org/) utilities written in 4 | [TypeScript](https://www.typescriptlang.org/) for implementing Extract, Transform and Load (ETL) 5 | pipelines. The framework handles streaming of Tweets from Twitter's 6 | [PowerTrack API](https://developer.twitter.com/en/docs/twitter-api/enterprise/powertrack-api/overview), 7 | and provides tools for sending high volumes of data to HTTP APIs for transformation and loading the 8 | results into a database. 9 | 10 | The framework also includes basic tools for building APIs on top of the data loaded into the 11 | database. 12 | 13 | ## What's in the box? 14 | 15 | The `src/` directory contains the following folders: 16 | 17 | * **`database/`** base classes for accessing databases and tables. 18 | * **`http/`** classes for streaming HTTP data, managing highly parallel requests and serving APIs. 19 | * **`twitter/`** classes for streaming and searching Tweets through the Twitter API. 20 | * **`util/`** useful helpers. 21 | 22 | Some of these folders (e.g. `http` and `twitter`) include an `examples` subfolder with specific examples 23 | for each package. In addition , the `src/app` direcotry includes a number of sample applications: 24 | 25 | * [**crypto/**](src/app/crypto/README.md) a dashboard showing public conversation trends around various crypto coins. 26 | * [**twidl/**](src/app/twidl/README.md) a downloader tool for large batches of historic Tweets. 27 | * [**vsa/**](src/app/vsa/README.md) a Visual Search API (VSA) for searching Tweets with logos or textual 28 | brand mentions. 29 | 30 | ## Contact us 31 | 32 | ### Issues? 33 | 34 | Please open tickets and pull requests on Github. 35 | 36 | ### Security Issues? 37 | 38 | Please report sensitive security issues via Twitter's bug-bounty program (https://hackerone.com/twitter) 39 | rather than GitHub. 40 | -------------------------------------------------------------------------------- /src/http/server/HttpServerCertificates.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import fs from 'fs' 5 | 6 | export interface HttpServerCertificatesOptions { 7 | 8 | // Raw key 9 | key?: Buffer | string 10 | 11 | // Key file name, see examples/example-key.pem 12 | keyfile?: string 13 | 14 | // Raw certificate 15 | cert?: Buffer | string 16 | 17 | // Certificate file name, see examples/example-cert.pem 18 | certfile?: string 19 | } 20 | 21 | export class HttpServerCertificates { 22 | public readonly key: Buffer 23 | public readonly cert: Buffer 24 | 25 | constructor(options: HttpServerCertificatesOptions) { 26 | this.key = this.load('key', options.key, options.keyfile, 'RSA PRIVATE KEY', 24) 27 | this.cert = this.load('certificate', options.cert, options.certfile, 'CERTIFICATE', 15) 28 | } 29 | 30 | private load( 31 | name: string, 32 | value: Buffer | string | undefined, 33 | file: string | undefined, 34 | head: string, 35 | lines: number, 36 | ): Buffer { 37 | if (value === undefined && file === undefined) { 38 | throw new Error(`Neither ${name} value or ${name} file specified`) 39 | } 40 | if (value !== undefined && file !== undefined) { 41 | throw new Error(`Both ${name} value and ${name} file specified. Can't decide which one to use`) 42 | } 43 | 44 | const val: string = value === undefined 45 | ? fs.readFileSync(file!, 'utf-8') 46 | : typeof value === 'object' 47 | ? value.toString('utf-8') 48 | : value.replace(/\n\s*/g, '\n') 49 | 50 | const regex = new RegExp( 51 | `^\\n?-----BEGIN ${head}-----\\n([\\w\\+\\/]{64}\\n){${ 52 | lines}}[\\w\\+\\/]{48,62}=?=?\\n-----END ${head}-----\\n?$` 53 | ) 54 | if (!regex.test(val)) { 55 | throw new Error(`Invalid ${name}`) 56 | } 57 | 58 | return Buffer.from(val) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/app/vsa/config.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export const CONTROL_TABLE_NAME = 'twist-vsa-control' 5 | export const TWEET_TABLE_NAME = 'twist-vsa-tweets' 6 | 7 | export const API_MAX_RESULTS = 500 8 | export const API_PORT = 4000 9 | export const AWS_REGION = process.env.AWS_REGION 10 | export const AWS_DYNAMODB_ENDPOINT = process.env.AWS_DYNAMODB_ENDPOINT 11 | export const EXPECTED_IMAGE_RATE = 100 12 | export const HEARTBEAT_INTERVAL_MS = 1000 13 | export const PRINT_COUNTERS_INTERVAL_MS: undefined /* never */ | 0 /* immediate */ | number = 500 14 | export const PRINT_COUNTERS_LEVEL = 'debug' 15 | 16 | export const TWITTER_ACCOUNT = process.env.TWITTER_ACCOUNT 17 | export const TWITTER_EMAIL = process.env.TWITTER_EMAIL 18 | export const TWITTER_PASSWORD = process.env.TWITTER_PASSWORD 19 | export const TWITTER_USE_FAKE_STREAM = false 20 | 21 | export const VISUA_ACTIVITY_TIMEOUT_MS = 2000 22 | export const VISUA_API_LATENCY_MS = 1000 23 | export const VISUA_DETECTION_RATE = 0.05 24 | export const VISUA_DEVELOPER_KEY = process.env.VISUA_DEVELOPER_KEY 25 | export const VISUA_ENDPOINT = process.env.VISUA_ENDPOINT 26 | export const VISUA_FAKE_API_DELAY = Math.round(VISUA_API_LATENCY_MS * 0.9) 27 | export const VISUA_FAKE_JOB_TIME_SEC = 3 28 | export const VISUA_MAX_ATTEMPTS = 3 29 | export const VISUA_PIPELINE_DEPTH = Math.round(EXPECTED_IMAGE_RATE / 10) 30 | export const VISUA_PIPELINED_CONNECTION_COUNT = 31 | Math.round(2 * EXPECTED_IMAGE_RATE * (VISUA_API_LATENCY_MS / 1000) / VISUA_PIPELINE_DEPTH) 32 | export const VISUA_WAIT_TIME = 15000 33 | 34 | export const BRANDS = { 35 | adidas: '47.10026773952', 36 | asics: '47.10026876714', 37 | columbia: '47.10043412809', 38 | newbalance: '47.10027577872', 39 | nike: '47.10026482869', 40 | patagonia: '47.10042735382', 41 | puma: '47.10024011568', 42 | reebok: '47.10026482134', 43 | underarmour: '47.10024011486', 44 | } 45 | -------------------------------------------------------------------------------- /src/twitter/examples/tsv.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { promises as fs } from 'fs' 5 | import { Tweet, TwitterAccount, TwitterStream } from '..' 6 | 7 | const SECONDS = 3600 8 | const RULES = { bitcoin: '(bitcoin OR btc) -cash -is:retweet' } 9 | const FILENAME = 'out.tsv' 10 | const BATCH = 10 11 | 12 | let totalTweets = 0 13 | let seconds: number 14 | let tweets: Tweet[] = [] 15 | 16 | async function onInterval() { 17 | process.stdout.write(`\r${totalTweets += tweets.length} Tweets in ${++seconds} seconds`) 18 | const isDone = SECONDS <= seconds 19 | if (BATCH <= tweets.length || isDone) { 20 | await fs.appendFile( 21 | FILENAME, 22 | tweets 23 | .map(t => `${t.id}\t${t.date.toISOString()}\t${ 24 | t.text.replace(/\s/g, ' ')}\t${t.type}\t${t.user}\t${JSON.stringify(t.full).replace(/\s/g, ' ')}\n`) 25 | .join('') 26 | ) 27 | tweets = [] 28 | } 29 | if (isDone) { 30 | console.log() 31 | process.exit(0) 32 | } 33 | } 34 | 35 | function onTweet(tweet: Tweet) { 36 | if (seconds === undefined) { 37 | console.log() 38 | seconds = 0 39 | setInterval(onInterval, 1000) 40 | } 41 | tweets.push(tweet) 42 | } 43 | 44 | async function main() { 45 | const twitterAccount = new TwitterAccount( 46 | process.env.TWITTER_ACCOUNT, 47 | process.env.TWITTER_EMAIL, 48 | process.env.TWITTER_PASSWORD, 49 | ) 50 | const twitterStream = new TwitterStream(twitterAccount) 51 | console.log('Setting rules:', RULES) 52 | await twitterStream.setStreamRules(RULES, true) 53 | 54 | await fs.writeFile(FILENAME, 'id\tdate\ttext\ttype\tuser\tfull\n') 55 | 56 | twitterStream.addListener(onTweet) 57 | twitterStream.addListener('connected', () => process.stdout.write('Waiting... ')) 58 | twitterStream.connect() 59 | } 60 | 61 | main().catch(e => { 62 | console.error(e) 63 | process.exit(1) 64 | }) 65 | -------------------------------------------------------------------------------- /src/app/vsa/api.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { assert, counters, Minutes } from '../../util' 6 | import { DynamoDBSearchResults, getDynamoDBClient } from '../../database' 7 | import { HttpRouter, httpRouterMethod, HttpRouterRequest } from '../../http' 8 | import { TwitterDynamoDBPartialTweetRecord, TwitterDynamoDBTweetTable, twitterDynamoDBTweetSearch } from '../../twitter' 9 | 10 | const dynamodDBClient = getDynamoDBClient(config.AWS_REGION, config.AWS_DYNAMODB_ENDPOINT) 11 | const tweetTable = new TwitterDynamoDBTweetTable(dynamodDBClient, config.TWEET_TABLE_NAME) 12 | 13 | const BRAND_REGEX_STR = '[a-zA-Z]\\w+' 14 | const BRAND_REGEX = new RegExp(`^${BRAND_REGEX_STR}$`) 15 | const SEARCH_REGEX = new RegExp(`^\/search/(${BRAND_REGEX_STR})\/(${Minutes.REGEX_STR})(\/(${Minutes.REGEX_STR}))?\/?$`) 16 | 17 | export async function search( 18 | brand: string, 19 | startTime: string, 20 | endTime?: string, 21 | full = false, 22 | ): Promise> { 23 | assert(BRAND_REGEX.test(brand), `Invalid brand: ${brand}`) 24 | const qf = async (minute: Minutes) => { 25 | const res = await tweetTable.query(brand, minute) 26 | return res?.map(record => { const { brand, uid, ...data } = record as any; return data }) 27 | } 28 | return twitterDynamoDBTweetSearch(startTime, endTime, full, config.API_MAX_RESULTS, qf) 29 | } 30 | 31 | export class ApiRouter extends HttpRouter { 32 | constructor() { 33 | super({ cors: true }) 34 | } 35 | 36 | @httpRouterMethod('GET', SEARCH_REGEX) 37 | public async search(req: HttpRouterRequest) { 38 | counters.info.requests.search.inc() 39 | const [brand, startTime, _, endTime] = req.params! 40 | const ret = await search(brand, startTime, endTime, req.query?.format?.toLocaleLowerCase() === 'full') 41 | return [200, ret] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/app/crypto/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | import 'dotenv/config' 4 | import { ApiRouter } from './api' 5 | import { ApiRouter as ApiRouterDB } from './apiDB' 6 | import * as config from './config' 7 | import { stream } from './streamer' 8 | import { stream as streamDB } from './streamerDB' 9 | import { HttpServer } from '../../http' 10 | import { setTwitterStreamRules } from './setup' 11 | import { counters, getCommandLineOptions } from '../../util' 12 | 13 | async function main(): Promise { 14 | const options = getCommandLineOptions({ 15 | api: 'Start API server', 16 | apidb: 'Start API server using DynamoDB', 17 | backfill: 'Backfill Tweets from last heartbeat', 18 | stream: 'Start streaming', 19 | streamdb: 'Start streaming using DynamoDB', 20 | setup: 'Setup streaming rules and create DynamoDB tables', 21 | }) 22 | 23 | if (options.setup) { 24 | await setTwitterStreamRules() 25 | } 26 | if (options.api) { 27 | console.info('Start API Server') 28 | counters.monitor(config.PRINT_COUNTERS_INTERVAL_MS, config.PRINT_COUNTERS_LEVEL) 29 | const server = new HttpServer(new ApiRouter(), { port: config.API_PORT }) 30 | server.start() 31 | } 32 | if (options.stream) { 33 | console.info('Start Streaming Tweets') 34 | stream(options.backfill !== undefined) 35 | } 36 | if (options.apidb) { 37 | console.info('Start API Server using DynamoDB') 38 | counters.monitor(config.PRINT_COUNTERS_INTERVAL_MS, config.PRINT_COUNTERS_LEVEL) 39 | const server = new HttpServer(new ApiRouterDB(), { port: config.API_PORT }) 40 | server.start() 41 | } 42 | if (options.streamdb) { 43 | console.info('Start Streaming Tweets using DynamoDB') 44 | streamDB(options.backfill !== undefined) 45 | } 46 | } 47 | 48 | if (typeof require === 'function' && require.main === module) { 49 | main().catch(e => { console.error(e); process.exit(1) }) 50 | } 51 | -------------------------------------------------------------------------------- /src/app/crypto/streamer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { counters } from '../../util' 6 | import { createStreamProbabilities } from './rules' 7 | import { FilesystemObjectStore } from '../../database' 8 | import { FakeTwitterStream, StreamedTweet, TwitterAccount, TwitterStreamer } from '../../twitter' 9 | import {getDataToStore} from "./utils"; 10 | 11 | const fos = new FilesystemObjectStore(config.OBJECT_STORE_BASE_PATH) 12 | let interval: NodeJS.Timeout 13 | let streamedTweets: StreamedTweet[] = [] 14 | 15 | async function onInterval() { 16 | try { 17 | counters.info.streamer.writes.inc() 18 | counters.info.streamer.tweetsInBatch.set(0) 19 | 20 | const coin = 'bitcoin' 21 | const tweets = streamedTweets 22 | streamedTweets = [] 23 | const payload = await getDataToStore(tweets, coin) 24 | 25 | await fos.putObject(config.OBJECT_STORE_BUCKET_NAME, String(payload.timeMs), Buffer.from(JSON.stringify(payload))) 26 | 27 | } catch (error) { 28 | console.log(error) 29 | counters.warn.streamer.errors.inc() 30 | } 31 | } 32 | 33 | function onStreamedTweet(streamedTweet: StreamedTweet): void { 34 | if (!interval) { 35 | interval = setInterval(onInterval, config.BATCH_INTERVAL) 36 | } 37 | counters.info.streamer.tweetsInBatch.inc() 38 | streamedTweets.push(streamedTweet) 39 | } 40 | 41 | export function stream(shouldBackfill = false) { 42 | const streamer = new TwitterStreamer( 43 | config.TWITTER_USE_FAKE_STREAM 44 | ? { twitterStream: new FakeTwitterStream(config.EXPECTED_TWEET_RATE, createStreamProbabilities()) } 45 | : { 46 | heartbeatIntervalMs: config.HEARTBEAT_INTERVAL_MS, 47 | heartbeatMonitoringIntervalMs: config.PRINT_COUNTERS_INTERVAL_MS, 48 | heartbeatMonitoringLevel: config.PRINT_COUNTERS_LEVEL, 49 | twitterAccount: new TwitterAccount(config.TWITTER_ACCOUNT, config.TWITTER_EMAIL, config.TWITTER_PASSWORD), 50 | } 51 | ) 52 | 53 | streamer.addListener(onStreamedTweet) 54 | streamer.connect(shouldBackfill) 55 | } 56 | -------------------------------------------------------------------------------- /src/app/crypto/README.md: -------------------------------------------------------------------------------- 1 | # Crypto Dashboard Application 2 | 3 | This application displays a dashboard of pricing and Twitter trendsaround notable crypto coins. It contains 4 | a streaming engine for loading Twitter data and coing prices into an opbject store. It implements an API 5 | for exposing Tweets and trends from the object store. 6 | 7 | The app currently uses a filesystem based object store, configured by default to create a directory named 8 | `crypto` inside the current directory and put object files in there. You can change these settings and others 9 | in the app's configuration file `config.ts`. 10 | 11 | ## Prepare 12 | 13 | Make sure you have [Node.js](https://nodejs.org/) installed. 14 | 15 | This application uses Twitter's 16 | [PowerTrack API](https://developer.twitter.com/en/docs/twitter-api/enterprise/powertrack-api/overview) to 17 | stream Tweets. Configure the following environment variables with your Twitter Enterprise account credentials: 18 | 19 | * `TWITTER_ACCOUNT` 20 | * `TWITTER_EMAIL` 21 | * `TWITTER_PASSWORD` 22 | 23 | ## Build 24 | 25 | Build the application by running the following commands in the root of the project: 26 | 27 | ```bash 28 | npm install 29 | npm run build 30 | ``` 31 | 32 | ## Setup 33 | 34 | Run the following command in the root of the project: 35 | 36 | ```bash 37 | node build/app/crypto --setup 38 | ``` 39 | 40 | This will configure the required streaming rules through the Twitter API. 41 | 42 | ## Stream 43 | 44 | The following command will stream Tweets and coin prices into the object store: 45 | 46 | ```bash 47 | node build/app/crypto --stream 48 | ``` 49 | 50 | Let it run at least for a few minutes to load meaningful. 51 | 52 | ### Explore 53 | 54 | First, start the API server with the following command: 55 | 56 | ```bash 57 | node build/app/crypto --api 58 | ``` 59 | 60 | The API should now be available on `http://localhost:4000/`. 61 | 62 | You can now get Tweets and dollar rates with 63 | 64 | ``` 65 | curl http://localhost:4000/bitcoin/(/)? 66 | ``` 67 | 68 | where `` and `` are UTC timestamps in on-minute resolution in the form `yyyy-mm-ddThh:mm`. 69 | -------------------------------------------------------------------------------- /src/database/Tabular.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj, assert } from '../util' 5 | 6 | export interface TabularOptions { 7 | allowEmptyFields?: boolean 8 | ignoreUnrecognizedFields?: boolean 9 | } 10 | 11 | export abstract class Tabular { 12 | protected readonly allowEmptyFields: boolean 13 | protected readonly ignoreUnrecognizedFields: boolean 14 | protected readonly fieldCount: number 15 | protected readonly originalFieldNames: string[] = [] 16 | protected readonly transformedFieldNames: string[] = [] 17 | protected readonly fieldRecord: Record = {} 18 | 19 | constructor(fields: string[], options: TabularOptions = {}) { 20 | this.allowEmptyFields = options.allowEmptyFields === false ? false : true 21 | this.ignoreUnrecognizedFields = options.ignoreUnrecognizedFields === false ? false : true 22 | this.fieldCount = fields.length 23 | assert(0 < this.fieldCount, 'No fields') 24 | for (const originalFieldName of fields) { 25 | const transformedFieldName = this.transformFieldName(originalFieldName) 26 | assert(this.fieldRecord[originalFieldName] === undefined, `Duplicate field: ${originalFieldName}`) 27 | this.fieldRecord[originalFieldName] = true 28 | this.originalFieldNames.push(originalFieldName) 29 | this.transformedFieldNames.push(transformedFieldName) 30 | } 31 | } 32 | 33 | protected isValid(values: Obj): string | undefined { 34 | let matches = 0 35 | for (const field of Object.keys(values)) { 36 | if (field in this.fieldRecord) { 37 | matches++ 38 | } else if (!this.ignoreUnrecognizedFields) { 39 | return `Unrecognized field: ${field}` 40 | } 41 | } 42 | if (matches < this.fieldCount && !this.allowEmptyFields) { 43 | return 'Empty fields' 44 | } 45 | } 46 | 47 | protected transformFieldName(originalFieldName: string): string { 48 | return originalFieldName 49 | } 50 | 51 | protected validate(values: Obj): void { 52 | const error = this.isValid(values) 53 | assert(error === undefined, error!) 54 | } 55 | 56 | public abstract appendOne(values: Obj): Promise 57 | 58 | public abstract appendArray(valuesArray: Obj[]): Promise 59 | } 60 | -------------------------------------------------------------------------------- /src/app/crypto/converseon.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { request } from '../../http' 5 | import querystring from 'querystring' 6 | import { Obj, assert } from '../../util' 7 | 8 | export interface ConverseonSentiment { 9 | value: 'positive' | 'neutral' | 'negative' 10 | confidence: number 11 | } 12 | 13 | const MAX_BATCH_SIZE = 50 14 | 15 | export class Converseon { 16 | private url: string 17 | 18 | constructor(apiKey= '') { 19 | assert(/^[0-9a-f]{32}$/.test(apiKey), 'Invalid Converseon API key') 20 | const params: Record = { 21 | apiKey, 22 | coreEngineId: 17, 23 | 'annotation.emotion': false, 24 | 'annotation.intensity': false, 25 | 'annotation.spam': false, 26 | 'annotation.polarity': true, 27 | } 28 | this.url = `https://conveyapi.conversus.ai/v2/process/?${querystring.stringify(params)}` 29 | } 30 | 31 | private async runSentimentBatch(texts: string[]): Promise { 32 | const body: Record = {} 33 | for (let i = 0; i < texts.length; i++) { 34 | body[`batch[${i}].id`] = i 35 | body[`batch[${i}].text`] = texts[i] 36 | } 37 | const raw = await request(this.url, { retry: true, headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, body }) 38 | assert( 39 | typeof(raw) === 'object' && raw && raw.status && raw.status.code === 200, 40 | 'Error sending request to Converseon', 41 | ) 42 | const res = raw as Obj 43 | assert(Array.isArray(res.documents), 'Error in Converseon response') 44 | 45 | const sentiments: ConverseonSentiment[] = [] 46 | for (const { id, annotations } of res.documents) { 47 | sentiments[parseInt(id)] = annotations.sentiment as ConverseonSentiment 48 | } 49 | return sentiments 50 | } 51 | 52 | public async sentiment(texts: string[]): Promise { 53 | const inputs: string[][] = [] 54 | while (MAX_BATCH_SIZE < texts.length) { 55 | inputs.push(texts.splice(0, MAX_BATCH_SIZE)) 56 | } 57 | inputs.push(texts) 58 | 59 | const outputs = await Promise.all(inputs.map(input => this.runSentimentBatch(input))) 60 | return ([] as ConverseonSentiment[]).concat(...outputs) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/util/event.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | export class EventContext { 5 | private _propagationStopped = false 6 | 7 | constructor(public readonly type?: Type) { 8 | } 9 | 10 | public get propagationStopped(): boolean { 11 | return this._propagationStopped 12 | } 13 | 14 | public stopPropagation(): void { 15 | this._propagationStopped = true 16 | } 17 | } 18 | 19 | export type EventListener = (event: Event, context: EventContext) => void 20 | 21 | export class EventDispatcher { 22 | private listeners: Record[]> = {} 23 | 24 | private _addListener(index: string, listener: EventListener): void { 25 | if (!this.listeners[index]) { 26 | this.listeners[index] = [] 27 | } 28 | this.listeners[index].push(listener) 29 | } 30 | 31 | public addListener(type: Type, listener: EventListener): void { 32 | this._addListener(String(type), listener) 33 | } 34 | 35 | public copyListeners(other: EventDispatcher): void { 36 | for (const [index, listeners] of Object.entries(other.listeners)) { 37 | for (const listener of listeners) { 38 | this._addListener(index, listener) 39 | } 40 | } 41 | } 42 | 43 | public fire(type: Type, event: Event): void { 44 | const context = new EventContext(type) 45 | const listeners: EventListener[] = this.listeners[String(type)] || [] 46 | for (const listener of listeners) { 47 | listener(event, context) 48 | if (context.propagationStopped) { 49 | break 50 | } 51 | } 52 | } 53 | 54 | public getListenerCount(type: Type): number { 55 | const index = String(type) 56 | return this.listeners[index] ? this.listeners[index].length : 0 57 | } 58 | } 59 | 60 | export type SimpleEventListener = (event: Event, context: EventContext) => void 61 | 62 | export class SimpleEventDispatcher { 63 | private dispatcher = new EventDispatcher() 64 | 65 | public addListener(listener: SimpleEventListener): void { 66 | this.dispatcher.addListener(undefined, listener) 67 | } 68 | 69 | public fire(event: Event): void { 70 | this.dispatcher.fire(undefined, event) 71 | } 72 | 73 | public getListenerCount(): number { 74 | return this.dispatcher.getListenerCount(undefined) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/twitter/examples/frequency.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import ospath from 'path' 5 | import { TwitterAccount, TwitterStream } from '..' 6 | 7 | const DURATION = 30 8 | 9 | async function main() { 10 | if ( 11 | process.argv.length < 3 || 12 | 4 < process.argv.length || 13 | (process.argv.length === 3 && (process.argv[2] === '-f' || process.argv[2] === '--force')) || 14 | (process.argv.length === 4 && process.argv[2] !== '-f' && process.argv[2] !== '--force') 15 | ) { 16 | const exe = ospath.basename(process.argv[1]) 17 | console.error(`Usage: node ${exe} [-f|--force] `) 18 | console.error() 19 | console.error('Examples:') 20 | console.error(` node ${exe} twitter`) 21 | console.error(` node ${exe} 'hello world'`) 22 | console.error(` node ${exe} 'has:images lang:en -is:retweet'`) 23 | console.error(` node ${exe} --force 'has:images -is:retweet'`) 24 | console.error() 25 | console.error('Learn more:') 26 | console.error(' https://developer.twitter.com/en/docs/twitter-api/enterprise/rules-and-filtering/building-a-rule') 27 | console.error() 28 | process.exit(1) 29 | } 30 | const force = process.argv.length === 4 31 | const rule = process.argv[process.argv.length - 1] 32 | 33 | const twitterAccount = new TwitterAccount( 34 | process.env.TWITTER_ACCOUNT, 35 | process.env.TWITTER_EMAIL, 36 | process.env.TWITTER_PASSWORD, 37 | ) 38 | const twitterStream = new TwitterStream(twitterAccount) 39 | 40 | console.log('Setting rule:', rule) 41 | await twitterStream.setStreamRules({ rule }, force) 42 | 43 | let count: number 44 | twitterStream.addListener(() => { 45 | if (count !== undefined) { 46 | return count++ 47 | } 48 | 49 | count = 0 50 | let countdown = DURATION 51 | function onTimeout() { 52 | process.stdout.write(`\rMeasuring for ${countdown} seconds... `) 53 | if (countdown-- === 0) { 54 | const frequency = Math.floor(10 * count / DURATION) / 10 55 | console.log(`\rTweets per second: ${frequency} `) 56 | twitterStream.disconnect() 57 | } else { 58 | setTimeout(onTimeout, 1000) 59 | } 60 | } 61 | onTimeout() 62 | 63 | }) 64 | 65 | twitterStream.addListener('connected', () => process.stdout.write('Waiting... ')) 66 | twitterStream.connect() 67 | } 68 | 69 | main().catch(e => { 70 | console.error(e) 71 | process.exit(1) 72 | }) 73 | -------------------------------------------------------------------------------- /src/app/vsa/visua/test.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Visua } from './visua' 5 | import { Printer } from '../../../util' 6 | 7 | const img = 'http://s3.visua.com/pub/test-logo.jpg' 8 | 9 | async function measureLatency(visua: Visua): Promise { 10 | 11 | async function perf(label:string, func: () => Promise) { 12 | const latencies: number[] = [] 13 | for (let i = 1000; 0 < i; i--) { 14 | const start = Date.now() 15 | await func() 16 | latencies.push(Date.now() - start) 17 | process.stdout.write(`\r${label}: ${i} `) 18 | } 19 | console.log(`\r${label}: `) 20 | latencies.sort() 21 | const last = latencies.length - 1 22 | console.log(' 90%: ', latencies[Math.round(last * 0.9)]) 23 | console.log(' 99%: ', latencies[Math.round(last * 0.99)]) 24 | console.log(' 99.9%: ', latencies[Math.round(last * 0.999)]) 25 | console.log(' Maximum: ', latencies[last]) 26 | console.log(' Average: ', latencies.reduce((acc, val) => acc + val, 0) / latencies.length) 27 | console.log() 28 | } 29 | 30 | await perf('Start detection', () => visua.startDetection(img)) 31 | const token = await visua.startDetection(img) 32 | await perf('Poll detection', () => visua.pollDetection(token)) 33 | } 34 | 35 | async function runOneJob(visua: Visua): Promise { 36 | console.log('Staring detection job') 37 | const token = await visua.startDetection(img) 38 | console.log('Job started:', token) 39 | const printer = new Printer(0) 40 | for (let i = 1; ; i++) { 41 | await new Promise(res => setTimeout(res, 1000)) 42 | const res = await visua.pollDetection(token) 43 | if (res.status === 'pending') { 44 | if (i === 60) { 45 | printer.printLines(`Giving up after ${i} seconds`) 46 | return 47 | } 48 | printer.printLines(`Waited ${i} second${i === 1 ? '' : 's'}`) 49 | } 50 | if (res.status === 'error') { 51 | console.log('Error') 52 | return 53 | } 54 | if (res.status === 'complete') { 55 | printer.printLines(`Done in ${i} seconds`) 56 | console.log(res) 57 | return 58 | } 59 | } 60 | } 61 | 62 | async function main() { 63 | const visua = new Visua( 64 | process.env.VISUA_DEVELOPER_KEY, 65 | { apiEndpoint: process.env.VISUA_ENDPOINT }, 66 | ) 67 | await runOneJob(visua) 68 | // await measureLatency(visua) 69 | visua.close() 70 | } 71 | 72 | main().catch(e => { 73 | console.error(e) 74 | process.exit(1) 75 | }) 76 | -------------------------------------------------------------------------------- /src/twitter/examples/longcount.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import fs from 'fs' 5 | import { pad } from '../../util' 6 | import { StreamedTweet, TwitterAccount, TwitterStream } from '..' 7 | 8 | const SAMPLES = 1500 // 25 hours 9 | const INTERVAL = 60000 10 | const RULES = { images: 'has:images -is:retweet', videos: 'has:videos -is:retweet' } 11 | 12 | let samples: number 13 | const counts = { 14 | tweetsWithImages: 0, 15 | images: 0, 16 | tweetsWithVideos: 0, 17 | } 18 | 19 | function getTimestamp(): string { 20 | const now = new Date() 21 | const hh = pad(now.getUTCHours(), 2, '0') 22 | const mm = pad(now.getUTCMinutes(), 2, '0') 23 | return `${hh}:${mm}` 24 | } 25 | 26 | function perSecond(count: number): number { 27 | return Math.round(count * 1000 / INTERVAL) 28 | } 29 | 30 | function onInterval() { 31 | const logline = [ 32 | getTimestamp(), 33 | perSecond(counts.tweetsWithImages), 34 | perSecond(counts.images), 35 | perSecond(counts.tweetsWithVideos), 36 | ].join(',') + '\n' 37 | counts.tweetsWithImages = counts.images = counts.tweetsWithVideos = 0 38 | const done = SAMPLES <= ++samples 39 | process.stdout.write(`\r${samples} samples`) 40 | fs.appendFile( 41 | 'longcount.csv', 42 | logline, 43 | (err) => { 44 | if (err) { 45 | console.error(err) 46 | process.exit(1) 47 | } 48 | if (done) { 49 | console.log('\nDone.') 50 | process.exit(0) 51 | } 52 | }, 53 | ) 54 | } 55 | 56 | function onTweet(tweet: StreamedTweet) { 57 | if (tweet.rules.includes('images')) { 58 | counts.tweetsWithImages += 1 59 | counts.images += tweet.media.length 60 | } 61 | if (tweet.rules.includes('videos')) { 62 | counts.tweetsWithVideos += 1 63 | } 64 | if (samples === undefined) { 65 | console.log() 66 | samples = 0 67 | setInterval(onInterval, INTERVAL) 68 | } 69 | } 70 | 71 | async function main() { 72 | const twitterAccount = new TwitterAccount( 73 | process.env.TWITTER_ACCOUNT, 74 | process.env.TWITTER_EMAIL, 75 | process.env.TWITTER_PASSWORD, 76 | ) 77 | const twitterStream = new TwitterStream(twitterAccount) 78 | console.log('Setting rules:', RULES) 79 | await twitterStream.setStreamRules(RULES, true) 80 | twitterStream.addListener(onTweet) 81 | twitterStream.addListener('connected', () => process.stdout.write('Waiting... ')) 82 | twitterStream.connect() 83 | } 84 | 85 | main().catch(e => { 86 | console.error(e) 87 | process.exit(1) 88 | }) 89 | -------------------------------------------------------------------------------- /src/app/crypto/streamerDB.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import {counters } from '../../util' 6 | import { createStreamProbabilities } from './rules' 7 | import { getDynamoDBClient } from '../../database' 8 | import { 9 | FakeTwitterStream, 10 | StreamedTweet, 11 | TwitterAccount, 12 | TwitterStreamer 13 | } from '../../twitter' 14 | import { 15 | TwitterDynamoDBTweetSentimentRecord, 16 | TwitterDynamoDBTweetSentimentTable 17 | } from "../../twitter/TwitterDynamoDBTweetSentimentTable"; 18 | import { getDataToStore } from './utils' 19 | 20 | const dynamoDBClient = getDynamoDBClient(config.AWS_REGION); 21 | const tweetSentimentTable = new TwitterDynamoDBTweetSentimentTable(dynamoDBClient, config.CRYPTO_SENTIMENT_TABLE_NAME); 22 | 23 | async function saveSentimentResults(record: TwitterDynamoDBTweetSentimentRecord): Promise { 24 | counters.info.streamer.totalWrites.inc() 25 | counters.debug.streamer.activeWrites.inc() 26 | await tweetSentimentTable.store(record) 27 | counters.debug.streamer.activeWrites.dec() 28 | } 29 | 30 | let interval: NodeJS.Timeout 31 | let streamedTweets: StreamedTweet[] = [] 32 | 33 | async function onInterval() { 34 | try { 35 | counters.info.streamer.writes.inc() 36 | counters.info.streamer.tweetsInBatch.set(0) 37 | 38 | const coin = 'bitcoin' 39 | const tweets = streamedTweets 40 | streamedTweets = [] 41 | const payload = await getDataToStore(tweets, coin) 42 | await saveSentimentResults(payload); 43 | 44 | } catch (error) { 45 | console.log(error) 46 | counters.warn.streamer.errors.inc() 47 | } 48 | } 49 | 50 | function onStreamedTweet(streamedTweet: StreamedTweet): void { 51 | if (!interval) { 52 | interval = setInterval(onInterval, config.BATCH_INTERVAL) 53 | } 54 | counters.info.streamer.tweetsInBatch.inc() 55 | streamedTweets.push(streamedTweet) 56 | } 57 | 58 | export function stream(shouldBackfill = false) { 59 | const streamer = new TwitterStreamer( 60 | config.TWITTER_USE_FAKE_STREAM 61 | ? { twitterStream: new FakeTwitterStream(config.EXPECTED_TWEET_RATE, createStreamProbabilities()) } 62 | : { 63 | heartbeatIntervalMs: config.HEARTBEAT_INTERVAL_MS, 64 | heartbeatMonitoringIntervalMs: config.PRINT_COUNTERS_INTERVAL_MS, 65 | heartbeatMonitoringLevel: config.PRINT_COUNTERS_LEVEL, 66 | twitterAccount: new TwitterAccount(config.TWITTER_ACCOUNT, config.TWITTER_EMAIL, config.TWITTER_PASSWORD), 67 | } 68 | ) 69 | 70 | streamer.addListener(onStreamedTweet) 71 | streamer.connect(shouldBackfill) 72 | } 73 | -------------------------------------------------------------------------------- /src/twitter/TwitterDynamoDBTweetSentimentTable.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import {Minutes, Obj} from '../util' 5 | import { DynamoDBClient } from '@aws-sdk/client-dynamodb' 6 | import { 7 | DynamoDBHashKey, 8 | DynamoDBRangeKey, 9 | DynamoDBSearchResults, 10 | DynamoDBTable, 11 | DynamoDBTimedPrefixQueryFunction, 12 | dynamodDBTimedPrefixSearch, 13 | } from '../database' 14 | import {ConverseonSentiment} from "../app/crypto/converseon"; 15 | 16 | 17 | export interface TwitterDynamoDBTweetSentiments { 18 | tweetIds: Array 19 | sentiment: Obj 20 | sentimentByFollowers: Obj 21 | } 22 | 23 | export interface TwitterDynamoDBTweetSentimentsStored { 24 | tweetIds: string; 25 | sentiment: string 26 | sentimentByFollowers: string 27 | } 28 | 29 | export type TwitterDynamoDBSentMetadata = { 30 | coin: string; 31 | timeMs: number; 32 | usdRate: number; 33 | score: string 34 | scoreByFollowers: string 35 | } 36 | 37 | export type TwitterDynamoDBTweetSentimentRaw = TwitterDynamoDBTweetSentimentsStored & TwitterDynamoDBSentMetadata 38 | 39 | export type TwitterDynamoDBTweetSentimentRecord = TwitterDynamoDBTweetSentiments & TwitterDynamoDBSentMetadata 40 | 41 | export class TwitterDynamoDBTweetSentimentTable extends DynamoDBTable { 42 | constructor(client: DynamoDBClient, tableName: string, timeToLiveHours?: number) { 43 | super( 44 | client, 45 | tableName, 46 | new DynamoDBHashKey('coin'), 47 | new DynamoDBRangeKey('timeMs'), 48 | timeToLiveHours === undefined ? undefined : 'expirationTime', 49 | timeToLiveHours, 50 | ) 51 | } 52 | 53 | public async queryTimeRange(coin: string, startTime: number, endTime: number): Promise { 54 | const res = await this.doQueryTimeRange(coin, startTime.toString(), endTime.toString()) 55 | return res ? res.map(({tweetIds,sentiment,sentimentByFollowers, timeMs, ...rest}) => ({...rest, timeMs: Number(timeMs), tweetIds: JSON.parse(tweetIds),sentiment: JSON.parse(sentiment),sentimentByFollowers: JSON.parse(sentimentByFollowers)})) : undefined 56 | } 57 | 58 | public async store({coin, timeMs, tweetIds, sentiment, sentimentByFollowers,...rest}: TwitterDynamoDBTweetSentimentRecord): Promise { 59 | await this.doStore( 60 | coin, 61 | timeMs, 62 | { 63 | ...rest, 64 | sentiment: JSON.stringify(sentiment), 65 | sentimentByFollowers: JSON.stringify(sentimentByFollowers), 66 | tweetIds: JSON.stringify(tweetIds), 67 | }, 68 | ) 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /src/util/command.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import path from 'path' 5 | import { pad } from './string' 6 | 7 | export interface CommandLineOption { 8 | argument?: string 9 | description: string 10 | parser?: (arg: string) => any, 11 | required?: boolean 12 | } 13 | 14 | export function getCommandLineOptions(options: Record): Record { 15 | const opts: Record = {} 16 | for (const [name, val] of Object.entries(options)) { 17 | opts[name] = typeof val === 'string' ? { description: val } : val 18 | } 19 | 20 | function usage(error?: string) { 21 | if (error) { 22 | console.error(error) 23 | console.error() 24 | } 25 | console.error('usage:', path.basename(process.argv[1]), '[options]') 26 | console.error() 27 | console.error(' Options:') 28 | const names = Object.keys(opts).sort() 29 | const keys = names.map(name => opts[name].argument === undefined ? name : `${name} <${opts[name].argument}>`) 30 | const width = keys.reduce((a, v) => Math.max(a, v.length), 0) 31 | for (let i = 0; i < names.length; i++) { 32 | const opt = opts[names[i]] 33 | console.error(` --${pad(keys[i], -width, ' ')} ${opt.required ? '(required) ' : ''}${opt.description}`) 34 | } 35 | console.error() 36 | process.exit(1) 37 | } 38 | 39 | if (process.argv.length == 2) { 40 | usage() 41 | } 42 | 43 | const values: Record = {} 44 | for (let i = 2; i < process.argv.length; i++) { 45 | const arg = process.argv[i] 46 | const name = arg.substring(2) 47 | if (!arg.startsWith('--')) { 48 | usage(`Option should begin with '--': ${arg}`) 49 | } 50 | if (!(name in opts)) { 51 | usage(`Unrecognized option: ${name}`) 52 | } 53 | const opt = opts[name] 54 | if (opt.argument) { 55 | if (i === process.argv.length - 1) { 56 | usage(`Option ${name} requires an argument`) 57 | } 58 | let val = process.argv[++i] 59 | if (opt.parser) { 60 | try { 61 | val = opt.parser(val) 62 | } catch (e: any) { 63 | usage(`Option ${name} argument error: ${e.message}`) 64 | } 65 | } 66 | values[name] = val 67 | } else { 68 | values[name] = true 69 | } 70 | } 71 | 72 | let missing = false 73 | for (const name of Object.keys(opts).sort()) { 74 | if (!(name in values) && opts[name].required) { 75 | console.error(`Missing required option: ${name}`) 76 | missing = true 77 | } 78 | } 79 | if (missing) { 80 | console.error() 81 | usage() 82 | } 83 | 84 | return values 85 | } 86 | -------------------------------------------------------------------------------- /src/twitter/TwitterDynamoDBTweetTable.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Tweet } from './Tweet' 5 | import { Minutes } from '../util' 6 | import { DynamoDBClient } from '@aws-sdk/client-dynamodb' 7 | import { 8 | DynamoDBHashKey, 9 | DynamoDBRangeKey, 10 | DynamoDBSearchResults, 11 | DynamoDBTable, 12 | DynamoDBTimedPrefixQueryFunction, 13 | dynamodDBTimedPrefixSearch, 14 | } from '../database' 15 | 16 | export interface TwitterDynamoDBTweetRecord { 17 | imageUrl?: string 18 | tweetId: string 19 | tweetMedia?: string[] 20 | tweetText: string 21 | tweetTime: string 22 | tweetType: string 23 | tweetUser: string 24 | tweetFull: string 25 | } 26 | 27 | export type TwitterDynamoDBPartialTweetRecord = { 28 | imageUrl?: string 29 | tweetId: string 30 | tweetMedia?: string[] 31 | tweetText: string 32 | tweetTime: string 33 | tweetType: string 34 | tweetUser: string 35 | tweetFull?: string 36 | } 37 | 38 | export class TwitterDynamoDBTweetTable extends DynamoDBTable { 39 | constructor(client: DynamoDBClient, tableName: string, timeToLiveHours?: number) { 40 | super( 41 | client, 42 | tableName, 43 | new DynamoDBHashKey('brand'), 44 | new DynamoDBRangeKey('uid'), 45 | timeToLiveHours === undefined ? undefined : 'expirationTime', 46 | timeToLiveHours, 47 | ) 48 | } 49 | 50 | public async query(brand: string, minutes: Minutes): Promise { 51 | return this.doPrefixQuery(brand, `${minutes.toShortISOString()}`) 52 | } 53 | 54 | public async store(pkey: string, tweet: Tweet): Promise { 55 | await this.doStore( 56 | pkey, 57 | `${(new Minutes(tweet.date)).toShortISOString()}=${tweet.id}`, 58 | { 59 | tweetId: tweet.id, 60 | ...(0 < tweet.media.length ? { tweetMedia: tweet.media } : {}), 61 | tweetText: tweet.text, 62 | tweetTime: tweet.date.toISOString(), 63 | tweetType: tweet.type, 64 | tweetUser: tweet.user, 65 | tweetFull: JSON.stringify(tweet.full), 66 | }, 67 | ) 68 | } 69 | } 70 | 71 | export async function twitterDynamoDBTweetSearch( 72 | startTime: string, 73 | endTime: string | undefined, 74 | full: boolean, 75 | maxResults: number, 76 | qf: DynamoDBTimedPrefixQueryFunction, 77 | ): Promise> { 78 | const res = await dynamodDBTimedPrefixSearch(startTime, endTime, maxResults, qf) 79 | return full 80 | ? res 81 | : { 82 | nextStartTime: res.nextStartTime, 83 | results: res.results.map(t => { const { tweetFull, ...rest } = t; return rest }), 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/twitter/Tweet.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj } from '../util' 5 | 6 | export type TweetType = 'original' | 'quote' | 'reply' | 'retweet' 7 | 8 | // 9 | // This is a subset of the information streamed from Twitter, which we 10 | // find convenient and sufficient for most applications. 11 | // 12 | export class Tweet { 13 | public readonly id: string 14 | public readonly date: Date 15 | public readonly media: string[] 16 | public readonly text: string 17 | public readonly type: TweetType 18 | public readonly user: string 19 | 20 | constructor(public readonly full: Obj) { 21 | if (typeof full.id_str !== 'string' || !/^\d{1,20}$/.test(full.id_str)) { 22 | throw new Error(`Invalid Tweet id: ${full.id_str}`) 23 | } 24 | this.id = full.id_str 25 | this.date = new Date(full.created_at) 26 | this.media = getMediaObjects(full).map(getMediaLink) 27 | this.text = full.extended_tweet ? full.extended_tweet.full_text : full.text 28 | this.type = getTweetType(full) 29 | this.user = full.user.screen_name 30 | } 31 | } 32 | 33 | function getTweetType(full: Obj): TweetType { 34 | if (full.in_reply_to_status_id) { 35 | return 'reply' 36 | } 37 | if (full.is_quote_status && !full.text.startsWith('RT')) { 38 | return 'quote' 39 | } 40 | if (full.retweeted_status && full.text.startsWith('RT')) { 41 | return 'retweet' 42 | } 43 | return 'original' 44 | } 45 | 46 | function getMediaObjects(full: Obj): { media_url: string }[] { 47 | if (full.extended_entities && full.extended_entities.media) { 48 | return full.extended_entities.media 49 | } 50 | if (full.entities.media) { 51 | return full.entities.media 52 | } 53 | if (full.extended_tweet && full.extended_tweet.entities.media) { 54 | return full.extended_tweet.entities.media 55 | } 56 | if (full.quoted_status) { 57 | if (full.quoted_status.entities.media) { 58 | return full.quoted_status.entities.media 59 | } 60 | if (full.quoted_status.extended_tweet && full.quoted_status.extended_tweet.entities.media) { 61 | return full.quoted_status.extended_tweet.entities.media 62 | } 63 | } 64 | return [] 65 | } 66 | 67 | function getMediaLink(media: Obj): string { 68 | if ( 69 | media.type !== 'video' || 70 | !media.video_info || 71 | !media.video_info.variants || 72 | !Array.isArray(media.video_info.variants) || 73 | media.video_info.variants.length === 0 74 | ) { 75 | return media.media_url 76 | } 77 | 78 | const variants = [...media.video_info.variants] as Obj[] 79 | variants.sort((x: Obj, y: Obj) => (x.bitrate || 0) < (y.bitrate || 0) ? 1 : -1) 80 | 81 | const url = variants[0].url 82 | const index = url.indexOf('?') 83 | return 0 < index ? url.substring(0, index) : url 84 | } 85 | -------------------------------------------------------------------------------- /src/app/twidl/README.md: -------------------------------------------------------------------------------- 1 | # Twidl: A Tweet Downloader Application 2 | 3 | Tweets go back to [March 21 2006](https://twitter.com/jack/status/20). In many cases it can be useful to 4 | searche of past Tweets that match cetrain 5 | [search queries](https://developer.twitter.com/en/docs/twitter-api/enterprise/search-api/guides/operators), 6 | and in some cases results can include tens or even hundreds of thousdands of Tweets. 7 | 8 | This Tweet Downloader (**twidl**) application is designed to run such large batch downloads. It first connects 9 | the the Twitter API to get a count estimating the number of Tweets that match a cetrain search quesry over the 10 | specified time range, then proceeds to download the Tweets. 11 | 12 | The application includes mechanisms for reconnecting to the API in case of network disconnects of if the download 13 | hits the API's rate limits. It also tracks the progress of the download and continuously displays the time of the 14 | last Tweet downloaded successfully. This timestamp can be used to restart the download in case the application 15 | itself fails. 16 | 17 | ## Prepare 18 | 19 | Make sure you have [Node.js](https://nodejs.org/) installed. 20 | 21 | This application uses Twitter's 22 | [Search API](https://developer.twitter.com/en/docs/twitter-api/enterprise/search-api/overview) to 23 | download Tweets. Configure the following environment variables with your Twitter Enterprise account credentials: 24 | 25 | * `TWITTER_ACCOUNT` 26 | * `TWITTER_EMAIL` 27 | * `TWITTER_PASSWORD` 28 | 29 | You can also configure the `TWITTER_LABEL` variable if you don't want to use the standard `prod` label. 30 | 31 | ## Build 32 | 33 | Build the application by running the following commands in the root of the project: 34 | 35 | ```bash 36 | npm install 37 | npm run build 38 | ``` 39 | 40 | ## Download 41 | 42 | Run **twidl** using the following command: 43 | 44 | ```bash 45 | node build/app/twidl 46 | ``` 47 | 48 | It will display a detailed help message with the required and optional command line arguments. 49 | 50 | As an example, the following command will download Tweets containing the term "web3" from the first hour of 2022: 51 | 52 | ```bash 53 | node build/app/twidl --query web3 --start 2022-01-01T00:00:00Z --end 2022-01-02T00:00:00Z --csv ./tweets.csv 54 | ``` 55 | 56 | This query matches slightly over 100,000 Tweets. 57 | 58 | Add the `--count` option to your command, if you only want to get the number of matches without actually downloading 59 | any data. 60 | 61 | ### Customize your results 62 | 63 | The API returns a lot of 64 | [information](https://developer.twitter.com/en/docs/twitter-api/enterprise/data-dictionary/native-enriched-objects/tweet) 65 | per Tweet. Currently the application saves out a small subset of this information. You can edit the function 66 | `transformTweet` in [index.ts](index.ts) to add or change the data twitten out by the downloader. 67 | -------------------------------------------------------------------------------- /src/app/vsa/visua/fakeserver.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import '../util/array' 5 | import * as config from '../config' 6 | import { Obj, Printer, Sequence, pad } from '../../../util' 7 | import { HttpRouter, httpRouterMethod, HttpRouterRequest, HttpRouterResponse, HttpServer } from '../../../http' 8 | 9 | class FakeVisuaBackend { 10 | private sequence = new Sequence(10000) 11 | private detetionCompletionTimes: Record = {} 12 | 13 | private detectLogos() { 14 | return config.VISUA_DETECTION_RATE <= Math.random() 15 | ? [] 16 | : [ { type: 'logo', name: Object.keys(config.BRANDS).random() } ] 17 | } 18 | 19 | public startDetection(url: string): Obj { 20 | const hash = pad(this.sequence.next, 4, '0') 21 | this.detetionCompletionTimes[hash] = Date.now() + 1000 * config.VISUA_FAKE_JOB_TIME_SEC 22 | return { data: { requestHash: hash } } 23 | } 24 | 25 | public pollDetection(hash: string): Obj | undefined { 26 | const completionTime = this.detetionCompletionTimes[hash] 27 | if (completionTime === undefined) { 28 | return 29 | } 30 | const isComplete = completionTime <= Date.now() 31 | return isComplete 32 | ? { data: { mediaInfo: { width: 400, height: 300 }, detections: this.detectLogos() } } 33 | : { errorMessage: 'Processing still in progress:' } 34 | } 35 | } 36 | 37 | class FakeVisuaServer extends HttpRouter { 38 | private readonly visua = new FakeVisuaBackend() 39 | private readonly printer = new Printer() 40 | private startDetectionRequests = 0 41 | private pollDetectionRequests = 0 42 | 43 | constructor() { 44 | super() 45 | setInterval(() => this.onInterval(), 1000) 46 | } 47 | 48 | private onInterval() { 49 | this.printer.printLines( 50 | `Detection requests per second: \x1b[33m${this.startDetectionRequests}\x1b[0m`, 51 | `Poll requests per second: \x1b[33m${this.pollDetectionRequests}\x1b[0m`, 52 | ) 53 | this.startDetectionRequests = 0 54 | this.pollDetectionRequests = 0 55 | } 56 | 57 | @httpRouterMethod('POST', '/detect') 58 | public startDetection(req: HttpRouterRequest, res: HttpRouterResponse) { 59 | this.startDetectionRequests += 1 60 | if (typeof req.body !== 'object' || typeof req.body.mediaUrl !== 'string') { 61 | return [400, 'No media URL'] 62 | } 63 | const body = this.visua.startDetection(req.body.mediaUrl) 64 | setTimeout(() => res.respond(200, body), config.VISUA_FAKE_API_DELAY) 65 | } 66 | 67 | @httpRouterMethod('GET', /^\/detect\/(\w+)\/response\/?$/) 68 | public pollDetection(req: HttpRouterRequest, res: HttpRouterResponse) { 69 | this.pollDetectionRequests += 1 70 | const hash = req.params![0] 71 | const status = this.visua.pollDetection(hash) 72 | if (status === undefined) { 73 | return [404, `Invalid detection hash: ${hash}`] 74 | } 75 | setTimeout(() => res.respond(200, status), config.VISUA_FAKE_API_DELAY) 76 | } 77 | } 78 | 79 | new HttpServer(new FakeVisuaServer(), { port: 3000 }).start() 80 | -------------------------------------------------------------------------------- /src/util/time.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { pad } from './string' 5 | import { assert, assertInteger } from './assert' 6 | 7 | export function sleep(milliseconds: number): Promise { 8 | return new Promise(resolve => setTimeout(resolve, milliseconds)) 9 | } 10 | 11 | export class Minutes { 12 | public static readonly REGEX_STR = '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}' 13 | public static readonly REGEX = new RegExp(`^${Minutes.REGEX_STR}$`) 14 | 15 | private readonly date: Date 16 | public readonly minutes: number 17 | 18 | constructor(timestamp?: Date | number | string) { 19 | switch (typeof timestamp) { 20 | case 'number': 21 | assertInteger(timestamp, 0, undefined, `Invalid timestamp: ${timestamp}`) 22 | this.date = new Date(timestamp * 60000) 23 | break 24 | case 'object': 25 | assert(timestamp instanceof Date, `Invalid timestamp: ${timestamp}`) 26 | this.date = timestamp 27 | break 28 | case 'string': 29 | assert(Minutes.REGEX.test(timestamp), `Invalid timestamp: ${timestamp}`) 30 | this.date = new Date(`${timestamp}:00.000Z`) 31 | break 32 | case 'undefined': 33 | this.date = new Date() 34 | break 35 | default: 36 | throw new Error(`Invalid timestamp: ${timestamp}`) 37 | } 38 | this.minutes = Math.floor(this.date.getTime() / 60000) 39 | } 40 | 41 | public add(minutes: number): Minutes { 42 | return new Minutes(this.minutes + minutes) 43 | } 44 | 45 | public eq(other: Minutes): boolean { 46 | return this.minutes === other.minutes 47 | } 48 | 49 | public ge(other: Minutes): boolean { 50 | return this.minutes >= other.minutes 51 | } 52 | 53 | public gt(other: Minutes): boolean { 54 | return this.minutes > other.minutes 55 | } 56 | 57 | public le(other: Minutes): boolean { 58 | return this.minutes <= other.minutes 59 | } 60 | 61 | public lt(other: Minutes): boolean { 62 | return this.minutes < other.minutes 63 | } 64 | 65 | public ne(other: Minutes): boolean { 66 | return this.minutes !== other.minutes 67 | } 68 | 69 | public next(): Minutes { 70 | return this.add(1) 71 | } 72 | 73 | public toShortISOString(): string { 74 | return this.date.toISOString().substr(0, 16) 75 | } 76 | } 77 | 78 | export class Timeout { 79 | private timeout?: NodeJS.Timeout 80 | 81 | constructor(private milliseconds: number, private handler: () => void) { 82 | assertInteger(milliseconds, 1, 3600, 'Timer duration milliseconds') 83 | } 84 | 85 | public clear(): void { 86 | if (this.timeout) { 87 | clearTimeout(this.timeout) 88 | this.timeout = undefined 89 | } 90 | } 91 | 92 | public reset(): void { 93 | this.clear() 94 | this.set() 95 | } 96 | 97 | public set(): void { 98 | if (!this.timeout) { 99 | this.timeout = setTimeout(this.handler, this.milliseconds) 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/database/FilesystemObjectStore.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import ospath from 'path' 5 | import { assert } from '../util' 6 | import { promises as fs } from 'fs' 7 | import { ObjectListing, ObjectStore, isValidBucketName, isValidObjectName } from './ObjectStore' 8 | 9 | export class FilesystemObjectStore implements ObjectStore { 10 | 11 | constructor(private readonly basepath: string) { 12 | } 13 | 14 | private makeFilename(bucketName: string, objectName: string): { dirname: string, filename: string } { 15 | assert(isValidBucketName(bucketName), `Invalid bucket name: ${bucketName}`) 16 | assert(isValidObjectName(objectName), `Invalid object name: ${objectName}`) 17 | const dirname = ospath.join(this.basepath, bucketName) 18 | const filename = ospath.join(dirname, objectName) 19 | return { dirname, filename } 20 | } 21 | 22 | public async doesObjectExist(bucketName: string, objectName: string): Promise { 23 | const info = await this.getObjectInfo(bucketName, objectName) 24 | return info === undefined ? false : true 25 | } 26 | 27 | public async getObjectInfo(bucketName: string, objectName: string): Promise { 28 | try { 29 | const st = await fs.stat(this.makeFilename(bucketName, objectName).filename) 30 | return { 31 | bucketName, 32 | objectName, 33 | size: st.size, 34 | timeCreated: st.birthtimeMs, 35 | timeModified: st.mtimeMs, 36 | } 37 | } catch (error: any) { 38 | if (error.code === 'ENOENT') { 39 | return 40 | } 41 | throw error 42 | } 43 | } 44 | 45 | public async getObject(bucketName: string, objectName: string): Promise { 46 | try { 47 | return await fs.readFile(this.makeFilename(bucketName, objectName).filename) 48 | } catch (error: any) { 49 | if (error.code === 'ENOENT') { 50 | return 51 | } 52 | throw error 53 | } 54 | } 55 | 56 | public async listObjects(bucketName: string, options?: { namesOnly: boolean }): Promise { 57 | assert(isValidBucketName(bucketName), `Invalid bucket name: ${bucketName}`) 58 | const dirname = ospath.join(this.basepath, bucketName) 59 | let files 60 | try { 61 | files = await fs.readdir(dirname) 62 | } catch (error: any) { 63 | if (error.code === 'ENOENT') { 64 | return [] 65 | } 66 | throw error 67 | } 68 | if (options && options.namesOnly) { 69 | return files 70 | } 71 | return Promise.all(files.map(fn => this.getObjectInfo(bucketName, fn) as Promise)) 72 | } 73 | 74 | public async putObject(bucketName: string, objectName: string, data: Buffer): Promise { 75 | const { dirname, filename } = this.makeFilename(bucketName, objectName) 76 | try { 77 | await fs.mkdir(dirname, { mode: 0o755 }) 78 | } catch (error: any) { 79 | if (error.code !== 'EEXIST') { 80 | throw error 81 | } 82 | } 83 | return fs.writeFile(filename, data) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/app/crypto/apiDB.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { assert, counters } from '../../util' 6 | import { getDynamoDBClient } from '../../database' 7 | import { HttpRouter, httpRouterMethod, HttpRouterRequest } from '../../http' 8 | import { TwitterDynamoDBTweetSentimentTable } from "../../twitter/TwitterDynamoDBTweetSentimentTable" 9 | import { COIN_REGEX, getCombinedResults, getDatapointFrequency, ONE_WEEK_MS, Result, URL_LATEST_REGEX, URL_REGEX } from "./utils" 10 | 11 | interface Entry { 12 | timeMs: number 13 | coin: string 14 | tweetIds: Array 15 | usdRate: number 16 | 17 | } 18 | 19 | interface ApiResults { 20 | results: Entry[] 21 | nextStartTime?: string 22 | } 23 | 24 | const dynamoDBClient = getDynamoDBClient(config.AWS_REGION); 25 | const tweetSentimentTable = new TwitterDynamoDBTweetSentimentTable(dynamoDBClient, config.CRYPTO_SENTIMENT_TABLE_NAME); 26 | 27 | export async function getHandler(coin: string, startTime: number, endTime?: number): Promise { 28 | assert(COIN_REGEX.test(coin), `Invalid coin: ${coin}`) 29 | 30 | const startTimestamp = startTime 31 | const endTimestamp = endTime ? endTime : startTimestamp + 60 * 1000 32 | assert(startTimestamp <= endTimestamp, `End time: ${endTime} precedes start time: ${startTime}`) 33 | assert(endTimestamp - startTime < ONE_WEEK_MS, 'More than a week worth of data requested') 34 | if (startTimestamp === endTimestamp) { 35 | return { results: [] } 36 | } 37 | 38 | const dataFrequency = getDatapointFrequency(startTimestamp, endTimestamp) 39 | const results = await tweetSentimentTable.queryTimeRange(coin, startTimestamp, endTimestamp) || [] 40 | 41 | const combinedResults = getCombinedResults(results as Result[], dataFrequency) 42 | 43 | return { results: combinedResults } 44 | } 45 | 46 | export async function getLatestHandler(coin: string, frequency = 1): Promise { 47 | assert(COIN_REGEX.test(coin), `Invalid coin: ${coin}`) 48 | 49 | const endTimestamp = new Date().getTime() 50 | const startTimestamp = endTimestamp - (frequency + 2) * 60 * 1000 51 | 52 | const results = (await tweetSentimentTable.queryTimeRange(coin, startTimestamp, endTimestamp) || []) as Result[] 53 | 54 | const combinedResults = getCombinedResults(results.slice(-frequency), frequency) 55 | 56 | return { results: combinedResults } 57 | } 58 | 59 | export class ApiRouter extends HttpRouter { 60 | constructor() { 61 | super({ cors: true }) 62 | } 63 | 64 | @httpRouterMethod('GET', URL_REGEX) 65 | public async trends(req: HttpRouterRequest) { 66 | counters.info.requests.trends.inc() 67 | const [coin, startTime, _, endTime] = req.params! 68 | const ret = await getHandler(coin, Number(startTime), Number(endTime)) 69 | return [200, ret] 70 | } 71 | 72 | @httpRouterMethod('GET', URL_LATEST_REGEX) 73 | public async trendLatest(req: HttpRouterRequest) { 74 | counters.info.requests.trends.inc() 75 | const [coin, _, frequency = 1] = req.params! 76 | 77 | const ret = await getLatestHandler(coin, Number(frequency)) 78 | return [200, ret] 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/http/server/HttpServer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import net from 'net' 5 | import http from 'http' 6 | import https from 'https' 7 | import { assert } from '../../util' 8 | import { HttpServerCertificates } from './HttpServerCertificates' 9 | 10 | export interface HttpServerHandler { 11 | 12 | // New connection created 13 | onConnection?: (socket: net.Socket) => void 14 | 15 | // New request arrived 16 | onRequest: http.RequestListener 17 | 18 | // Server started 19 | onStart?: () => void 20 | 21 | // Server stopped 22 | onStop?: () => void 23 | } 24 | 25 | export interface HttpServerOptions { 26 | 27 | // SSL certificates for secure HTTPS servers 28 | certificates?: HttpServerCertificates 29 | 30 | // Port to listen on 31 | port?: number 32 | } 33 | 34 | export type HttpServerState = 'init' | 'listening' | 'stopping' 35 | 36 | // 37 | // An HTTP/S server you can start and stop. An HTTPS secure server is created if 38 | // the constructor options include SSL certificates. 39 | // 40 | export class HttpServer { 41 | private port: number 42 | private server: http.Server 43 | private _state: HttpServerState = 'init' 44 | 45 | constructor(public readonly handler: HttpServerHandler, options: HttpServerOptions = {}) { 46 | this.port = options.port !== undefined ? options.port : (options.certificates ? 443 : 80) 47 | const onRequest = (req: http.IncomingMessage, res: http.ServerResponse) => { 48 | if (this.state !== 'listening') { 49 | res.writeHead(503, { 'Content-Type': 'text/plain' }) 50 | res.write('Service Unavailable') 51 | res.end() 52 | return 53 | } 54 | handler.onRequest(req, res) 55 | } 56 | this.server = options.certificates 57 | ? https.createServer(options.certificates, onRequest) 58 | : http.createServer(onRequest) 59 | } 60 | 61 | public get state(): HttpServerState { 62 | return this._state 63 | } 64 | 65 | public start(port?: number): void { 66 | if (this._state === 'listening') { 67 | return 68 | } 69 | assert(this._state === 'init', `Cannot start listening while in state: ${this._state}`) 70 | const listener = this.server.listen(port === undefined ? this.port : port, () => { 71 | if (this.handler.onStart) { 72 | this.handler.onStart() 73 | } 74 | }) 75 | if (this.handler.onConnection) { 76 | listener.on('connection', (socket: net.Socket) => this.handler.onConnection!(socket)) 77 | } 78 | this._state = 'listening' 79 | } 80 | 81 | public async stop(): Promise { 82 | if (this._state !== 'listening') { 83 | return 84 | } 85 | this._state = 'stopping' 86 | if (this.handler.onStop) { 87 | this.handler.onStop() 88 | } 89 | await new Promise((resolve, reject) => { 90 | this.server.close((error: any) => { 91 | this._state = 'init' 92 | if (error) { 93 | reject(error) 94 | } else { 95 | resolve() 96 | } 97 | }) 98 | }) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/twitter/FakeTwitterStream.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import '../util/array' 5 | import { assert } from '../util' 6 | import { EventListener, SimpleEventDispatcher, SimpleEventListener } from '../util/event' 7 | import { StreamedTweet, TwitterStreamEventType, TwitterStreamInterface } from './TwitterStreamInterface' 8 | 9 | // 10 | // Fake stream fo Tweets for testing. 11 | // 12 | export class FakeTwitterStream implements TwitterStreamInterface { 13 | private interval?: NodeJS.Timeout 14 | private dispatcher = new SimpleEventDispatcher() 15 | private rules: string[] = [] 16 | 17 | constructor( 18 | private readonly tweetsPerSecond: 1 | 2 | 5 | 10 | 50 | 100 | 200 | 400 | 600 | 800 | 1000, 19 | ruleProbabilities: Record, 20 | ) { 21 | for (const [rule, probability] of Object.entries(ruleProbabilities)) { 22 | for (let i = 0; i < probability; i++) { 23 | this.rules.push(rule) 24 | } 25 | } 26 | } 27 | 28 | public addListener( 29 | typeOrListener: TwitterStreamEventType | SimpleEventListener, 30 | listener?: EventListener | SimpleEventListener, 31 | ): void { 32 | if (typeof typeOrListener === 'string') { 33 | assert(listener !== undefined, 'Listener type provided but no listener specified') 34 | if (typeOrListener !== 'tweet') { 35 | console.warn(`Event not supported by FakeTwitterStream: ${typeOrListener}. Ignored`) 36 | return 37 | } 38 | this.dispatcher.addListener(listener as SimpleEventListener) 39 | } else { 40 | assert(listener === undefined, 'Two listener provided') 41 | this.dispatcher.addListener(typeOrListener as SimpleEventListener) 42 | } 43 | } 44 | 45 | public connect(): void { 46 | assert(!this.interval, 'Already connected') 47 | const ms = this.tweetsPerSecond < 200 ? 1000 / this.tweetsPerSecond : 5 48 | const repeat = this.tweetsPerSecond < 200 ? 1 : this.tweetsPerSecond / 200 49 | console.log(`Connected: ms=${ms} repeat=${repeat}`) 50 | this.interval = setInterval( 51 | () => { 52 | const now = new Date() 53 | const timebase = (now.getTime() % 1000000) * 10 54 | for (let i = 0; i < repeat; i++) { 55 | const timestamp = String(timebase + i) 56 | this.dispatcher.fire({ 57 | id: timestamp, 58 | date: now, 59 | media: [ 'http://example.com/image.jpg' ], 60 | text: `Example ${timestamp}`, 61 | type: 'original', 62 | user: 'johnappleseed', 63 | rules: [this.rules.random() || 'rule'], 64 | full: {}, 65 | }) 66 | } 67 | }, 68 | ms, 69 | ) 70 | } 71 | 72 | public disconnect(): void { 73 | assert(!this.interval, 'Not connected') 74 | clearInterval(this.interval!) 75 | this.interval = undefined 76 | } 77 | 78 | public async setStreamRules(newRulesRecord: Record, force?: boolean): Promise { 79 | console.log('Skipping setting of stream rules:', newRulesRecord) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/twitter/TwitterStreamGroups.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Tweet } from './Tweet' 5 | import { TwitterAccount } from './TwitterAccount' 6 | import { TwitterStream, TwitterStreamOptions } from './TwitterStream' 7 | import { SimpleEventDispatcher, SimpleEventListener } from '../util/event' 8 | 9 | const DELAY_SEC = 10 /* stream delay */ + 3 /* wait for out of order tweets */ 10 | 11 | export interface TweetGroup { 12 | groupId: string 13 | tweets: Tweet[] 14 | } 15 | 16 | export interface TwitterStreamGroupsOptions extends TwitterStreamOptions { 17 | 18 | // Time window length in seconds 19 | groupDurationSec?: number 20 | } 21 | 22 | // 23 | // Stream Tweets and collect them into groups based on time windows. 24 | // 25 | export class TwitterStreamGroups { 26 | private twitterStream: TwitterStream 27 | private groupDurationMs: number 28 | private interval?: NodeJS.Timeout 29 | private groups: Record = {} 30 | private dispatcher = new SimpleEventDispatcher() 31 | private delay: number 32 | 33 | constructor(account: TwitterAccount, options: TwitterStreamGroupsOptions = {}) { 34 | this.twitterStream = new TwitterStream(account, options) 35 | this.twitterStream.addListener((tweet: Tweet) => this.onTweet(tweet)) 36 | const groupDurationSec = options.groupDurationSec || 1 37 | if (groupDurationSec < 1 || 60 < groupDurationSec) { 38 | throw new Error(`Group duration out of bounds: ${groupDurationSec} seconds`) 39 | } 40 | this.groupDurationMs = groupDurationSec * 1000 41 | this.delay = Math.ceil(DELAY_SEC / groupDurationSec) 42 | } 43 | 44 | private getGroupNumber(date?: Date): number { 45 | return Math.floor((date ? date.getTime() : Date.now()) / this.groupDurationMs) 46 | } 47 | 48 | private onTweet(tweet: Tweet) { 49 | const groupId = this.getGroupNumber(tweet.date).toString(36) 50 | if (!this.groups[groupId]) { 51 | this.groups[groupId] = [] 52 | } 53 | this.groups[groupId].push(tweet) 54 | } 55 | 56 | private onInterval(): void { 57 | const now = this.getGroupNumber() 58 | for (const groupId of Object.keys(this.groups)) { 59 | if (this.delay < now - parseInt(groupId, 36)) { 60 | this.dispatcher.fire({ groupId, tweets: this.groups[groupId] }) 61 | delete this.groups[groupId] 62 | } 63 | } 64 | } 65 | 66 | public addListener(listener: SimpleEventListener): void { 67 | this.dispatcher.addListener(listener) 68 | } 69 | 70 | public connect(): void { 71 | if (this.interval) { 72 | throw new Error('Streaming already in progress') 73 | } 74 | this.twitterStream.connect() 75 | this.interval = setInterval(() => this.onInterval(), 1000) 76 | } 77 | 78 | public disconnect(): void { 79 | if (!this.interval) { 80 | throw new Error('Not streaming') 81 | } 82 | clearInterval(this.interval) 83 | this.interval = undefined 84 | this.twitterStream.disconnect() 85 | } 86 | 87 | public setStreamRules(rules: Record = {}, force = true): Promise { 88 | return this.twitterStream.setStreamRules(rules, force) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/app/vsa/README.md: -------------------------------------------------------------------------------- 1 | # Visual Search API 2 | 3 | This application implements a brand search API for tweets. It contains a streaming engine for loading Tweets with 4 | images or Tweets that match Twitter context annotations for supported brands. It then runs all images through 5 | the [Visua](https://visua.com/)'s computer vision API to match logos for supported brands. All matched Tweets 6 | are stored in a DynamodDB table. The application also implements an API for searching the database by brand and 7 | date range. 8 | 9 | ## Prepare 10 | 11 | Make sure you have [Node.js](https://nodejs.org/) installed. 12 | 13 | This application uses Twitter's 14 | [PowerTrack API](https://developer.twitter.com/en/docs/twitter-api/enterprise/powertrack-api/overview) to 15 | stream Tweets. Configure the following environment variables with your Twitter Enterprise account credentials: 16 | 17 | * `TWITTER_ACCOUNT` 18 | * `TWITTER_EMAIL` 19 | * `TWITTER_PASSWORD` 20 | 21 | You will also need to configure your Visua developer key in the environment variable `VISUA_DEVELOPER_KEY`. 22 | 23 | ### Using a cloud database 24 | 25 | This application uses the [AWS DynamodDB database](https://aws.amazon.com/dynamodb/) because it's easy to 26 | provision and easy to use. Make sure to setup AWS credentials in your environment variables or home 27 | directory. You may also set the environment variable `AWS_REGION` to use a region other than the 28 | default `us-east-1`. 29 | 30 | ### Using a local database 31 | 32 | If you don't have easy access to an AWS account, you can use run a local version of the database. You can 33 | [download a run your own copy)[https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.DownloadingAndRunning.html] 34 | but the easiest way is probably to use docker: `docker run -p 8000:8000 amazon/dynamodb-local`. 35 | 36 | You will also need to add an environment variable `export AWS_DYNAMODB_ENDPOINT=http://localhost:8000/` or 37 | edit config.ts directly, to point the code at the local database. 38 | 39 | ## Build 40 | 41 | Build the application by running the following commands in the root of the project: 42 | 43 | ```bash 44 | npm install 45 | npm run build 46 | ``` 47 | 48 | ## Setup 49 | 50 | Run the following command in the root of the project: 51 | 52 | ```bash 53 | node build/app/vsa --setup 54 | ``` 55 | 56 | This will create the required tables in DynamoDB and setup the required streaming rules through the 57 | Twitter API. 58 | 59 | ## Stream 60 | 61 | The following command will stream Tweets and coin prices into DynamoDB: 62 | 63 | ```bash 64 | node build/app/vsa --stream 65 | ``` 66 | 67 | Let it run at least for a few minutes to get meaningful data into the database. 68 | 69 | ### Explore 70 | 71 | First, start the API server with the following command: 72 | 73 | ```bash 74 | node build/app/vsa --api 75 | ``` 76 | 77 | The API should now be available on `http://localhost:4000/`. 78 | 79 | You can now get Tweets with 80 | 81 | ``` 82 | curl http://localhost:4000/search//(/)? 83 | ``` 84 | 85 | where `` is one of the brand names listed in [config.ts](./config.ts) and `` 86 | and `` are UTC timestamps in on-minute resolution in the form `yyyy-mm-ddThh:mm`. 87 | -------------------------------------------------------------------------------- /src/app/vsa/streamer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { Visua } from './visua/visua' 6 | import { counters } from '../../util' 7 | import { createStreamProbabilities } from './rules' 8 | import { DynamoDBKVStore, getDynamoDBClient } from '../../database' 9 | import { 10 | FakeTwitterStream, 11 | StreamedTweet, 12 | Tweet, 13 | TwitterAccount, 14 | TwitterDynamoDBTweetTable, 15 | TwitterStreamer, 16 | } from '../../twitter' 17 | 18 | const visua = new Visua(config.VISUA_DEVELOPER_KEY, { 19 | activityTimeoutMs: config.VISUA_ACTIVITY_TIMEOUT_MS, 20 | apiEndpoint: config.VISUA_ENDPOINT, 21 | connectionCount: config.VISUA_PIPELINED_CONNECTION_COUNT, 22 | maxAttempts: config.VISUA_MAX_ATTEMPTS, 23 | maxPipelineDepth: config.VISUA_PIPELINE_DEPTH, 24 | waitTimeForDetectionMs: config.VISUA_WAIT_TIME, 25 | }) 26 | 27 | const dynamodDBClient = getDynamoDBClient(config.AWS_REGION, config.AWS_DYNAMODB_ENDPOINT) 28 | const tweetTable = new TwitterDynamoDBTweetTable(dynamodDBClient, config.TWEET_TABLE_NAME) 29 | 30 | async function saveTweet(tweet: Tweet, brands: string[]): Promise { 31 | for (const brand of brands) { 32 | counters.info.streamer.totalWrites.inc() 33 | counters.debug.streamer.activeWrites.inc() 34 | await tweetTable.store(brand, tweet) 35 | counters.debug.streamer.activeWrites.dec() 36 | } 37 | } 38 | 39 | async function onImage(tweet: Tweet, url: string): Promise { 40 | counters.debug.streamer.activeImages.inc() 41 | counters.debug.streamer.activeDetections.inc() 42 | const res = await visua.detectLogosInImage(url) 43 | counters.debug.streamer.activeDetections.dec() 44 | if (res.status === 'complete' && res.logos !== undefined) { 45 | counters.info.streamer.totalDetections.inc(res.logos.length) 46 | await saveTweet(tweet, res.logos) 47 | } 48 | counters.debug.streamer.activeImages.dec() 49 | } 50 | 51 | function onStreamedTweet(streamedTweet: StreamedTweet): void { 52 | const { rules, ...tweet } = streamedTweet 53 | if (rules.includes('images')) { 54 | counters.debug.streamer.totalTweetsWithImages.inc() 55 | for (const url of tweet.media) { 56 | counters.info.streamer.totalImages.inc() 57 | onImage(tweet, url) 58 | } 59 | } 60 | saveTweet(tweet, rules.filter(rule => rule.startsWith('@')).map(rule => rule.substr(1))) 61 | } 62 | 63 | export function stream(shouldBackfill = false) { 64 | const streamer = new TwitterStreamer( 65 | config.TWITTER_USE_FAKE_STREAM 66 | ? { twitterStream: new FakeTwitterStream(config.EXPECTED_IMAGE_RATE, createStreamProbabilities()) } 67 | : { 68 | backfillMarginMinutes: Math.ceil((config.VISUA_WAIT_TIME + 2 * config.VISUA_API_LATENCY_MS + 1000) / 60000), 69 | heartbeatIntervalMs: config.HEARTBEAT_INTERVAL_MS, 70 | heartbeatMonitoringIntervalMs: config.PRINT_COUNTERS_INTERVAL_MS, 71 | heartbeatMonitoringLevel: config.PRINT_COUNTERS_LEVEL, 72 | heartbeatStore: new DynamoDBKVStore(dynamodDBClient, config.CONTROL_TABLE_NAME), 73 | twitterAccount: new TwitterAccount(config.TWITTER_ACCOUNT, config.TWITTER_EMAIL, config.TWITTER_PASSWORD), 74 | } 75 | ) 76 | 77 | streamer.addListener(onStreamedTweet) 78 | streamer.connect(shouldBackfill) 79 | } 80 | -------------------------------------------------------------------------------- /src/twitter/examples/backfill.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { pad } from '../../util' 5 | import { TwitterAccount, TwitterStream } from '..' 6 | 7 | const BACKFILL_MINUTES = 2 8 | const RUNNING_AVERAGE_LENGTH = 5 9 | 10 | class Counter { 11 | private _value = 0 12 | private readonly history: number[] = [] 13 | 14 | constructor(private readonly runningAverageLength = RUNNING_AVERAGE_LENGTH) { 15 | } 16 | 17 | public get average(): number { 18 | return this.history.length === 0 19 | ? 0 20 | : Math.round(this.history.reduce((a, v) => a + v, 0) / this.history.length) 21 | } 22 | 23 | public inc(delta = 1): number { 24 | return this._value += delta 25 | } 26 | 27 | public reset(): void { 28 | if (this.history.length === this.runningAverageLength) { 29 | this.history.shift() 30 | } 31 | this.history.push(this._value) 32 | this._value = 0 33 | } 34 | 35 | public get value(): number { 36 | return this._value 37 | } 38 | 39 | public set value(newValue: number) { 40 | this._value = newValue 41 | } 42 | } 43 | 44 | const counters: Record = { 45 | current: new Counter(), 46 | backfill: new Counter(), 47 | diff: new Counter(), 48 | } 49 | let index = 0 50 | let totalBackfill = 0 51 | let interval: NodeJS.Timeout 52 | 53 | function onInterval() { 54 | counters.diff.value = counters.backfill.value - counters.current.value 55 | totalBackfill += counters.diff.value 56 | console.log( 57 | [ 58 | pad(index++, 5, ' '), 59 | `${pad(counters.backfill.value, 4, ' ')} \x1b[38;5;240m${pad(`(${counters.backfill.average})`, 6, ' ')}\x1b[0m`, 60 | `${pad(counters.current.value, 4, ' ')} \x1b[38;5;240m${pad(`(${counters.current.average})`, 6, ' ')}\x1b[0m`, 61 | `${pad(counters.diff.value, 4, ' ')} \x1b[38;5;240m${pad(`(${counters.diff.average})`, 6, ' ')}\x1b[0m`, 62 | pad(totalBackfill, 13, ' '), 63 | ].join(' ') 64 | ) 65 | 66 | if (10 < index && Math.abs(counters.diff.average) < 10) { 67 | console.log() 68 | console.log(`Backfilled ${totalBackfill} tweets from ${BACKFILL_MINUTES} minutes in ${ 69 | index - RUNNING_AVERAGE_LENGTH} seconds`) 70 | process.exit(0) 71 | } 72 | 73 | counters.current.reset() 74 | counters.backfill.reset() 75 | counters.diff.reset() 76 | } 77 | 78 | function onTweet(name: string): void { 79 | if (interval === undefined) { 80 | console.log() 81 | console.log('\x1b[33mIndex Backfill Current Difference TotalBackfill\x1b[0m') 82 | interval = setInterval(onInterval, 1000) 83 | } 84 | counters[name].inc() 85 | } 86 | 87 | function createStream( 88 | twitterAccount: TwitterAccount, 89 | name: string, 90 | params?: Record, 91 | ): TwitterStream { 92 | const st = new TwitterStream(twitterAccount, params) 93 | st.addListener(() => onTweet(name)) 94 | st.connect() 95 | return st 96 | } 97 | 98 | async function main() { 99 | const twitterAccount = new TwitterAccount( 100 | process.env.TWITTER_ACCOUNT, 101 | process.env.TWITTER_EMAIL, 102 | process.env.TWITTER_PASSWORD, 103 | ) 104 | createStream(twitterAccount, 'current') 105 | createStream(twitterAccount, 'backfill', { backfillMinutes: BACKFILL_MINUTES }) 106 | // await twitterStream.setStreamRules({ images: 'has:images -is:retweet' }, true) 107 | } 108 | 109 | main().catch(e => { 110 | console.error(e) 111 | process.exit(1) 112 | }) 113 | -------------------------------------------------------------------------------- /src/app/crypto/api.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import * as config from './config' 5 | import { assert, counters } from '../../util' 6 | import { FilesystemObjectStore, ObjectListing } from '../../database' 7 | import { HttpRouter, httpRouterMethod, HttpRouterRequest } from '../../http' 8 | import { 9 | COIN_REGEX, 10 | getCombinedResults, 11 | getDatapointFrequency, 12 | ONE_WEEK_MS, 13 | Result, 14 | URL_LATEST_REGEX, 15 | URL_REGEX 16 | } from './utils' 17 | 18 | interface ApiResults { 19 | results: Array 20 | nextStartTime?: string 21 | } 22 | 23 | const fos = new FilesystemObjectStore(config.OBJECT_STORE_BASE_PATH) 24 | 25 | export async function getHandler(coin: string, startTime: number, endTime?: number): Promise { 26 | assert(COIN_REGEX.test(coin), `Invalid coin: ${coin}`) 27 | 28 | const startTimestamp = startTime 29 | const endTimestamp = endTime ? endTime : startTimestamp + 60 * 1000 30 | assert(startTimestamp <= endTimestamp, `End time: ${endTime} precedes start time: ${startTime}`) 31 | assert(endTimestamp - startTime < ONE_WEEK_MS, 'More than a week worth of data requested') 32 | const dataFrequency = getDatapointFrequency(startTimestamp, endTimestamp) 33 | 34 | if (startTimestamp === endTimestamp) { 35 | return { results: [] } 36 | } 37 | 38 | const res = await fos.listObjects(config.OBJECT_STORE_BUCKET_NAME) 39 | if (res === undefined) { 40 | return { results: [] } 41 | } 42 | 43 | const listings = (res as ObjectListing[]) 44 | .filter(listing => Number(listing.objectName) >= startTimestamp && Number(listing.objectName) <= endTimestamp) 45 | 46 | const results = await Promise.all( 47 | listings 48 | .map(async listing => { 49 | const buffer = await fos.getObject(config.OBJECT_STORE_BUCKET_NAME, listing.objectName) 50 | return JSON.parse(buffer!.toString()) as Result 51 | }) 52 | ) 53 | 54 | const combinedResults = getCombinedResults(results, dataFrequency) 55 | 56 | return { results: combinedResults } 57 | } 58 | 59 | export async function getLatestHandler(coin: string, frequency = 1): Promise { 60 | const res = await fos.listObjects(config.OBJECT_STORE_BUCKET_NAME) 61 | if (res === undefined) { 62 | return { results: [] } 63 | } 64 | const listings = (res as ObjectListing[]).slice(-1 * frequency) 65 | 66 | const results = await Promise.all( 67 | listings 68 | .map(async listing => { 69 | const buffer = await fos.getObject(config.OBJECT_STORE_BUCKET_NAME, listing.objectName) 70 | return JSON.parse(buffer!.toString()) as Result 71 | }) 72 | ) 73 | 74 | const combinedResults = getCombinedResults(results, frequency) 75 | 76 | return { results: combinedResults } 77 | } 78 | 79 | export class ApiRouter extends HttpRouter { 80 | constructor() { 81 | super({ cors: true }) 82 | } 83 | 84 | @httpRouterMethod('GET', URL_REGEX) 85 | public async trends(req: HttpRouterRequest) { 86 | counters.info.requests.trends.inc() 87 | const [coin, startTime, _, endTime] = req.params! 88 | 89 | const ret = await getHandler(coin, Number(startTime), Number(endTime)) 90 | return [200, ret] 91 | } 92 | 93 | @httpRouterMethod('GET', URL_LATEST_REGEX) 94 | public async trendLatest(req: HttpRouterRequest) { 95 | counters.info.requests.trends.inc() 96 | const [coin, _, frequency = 1] = req.params! 97 | 98 | const ret = await getLatestHandler(coin, Number(frequency)) 99 | return [200, ret] 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/twitter/TwitterSearch.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Obj } from '../util' 5 | import { Tweet } from './Tweet' 6 | import { request } from '../http' 7 | import { TwitterBase } from './TwitterBase' 8 | 9 | export interface TwitterSearchResults { 10 | next: string | undefined 11 | tweets: Tweet[] 12 | } 13 | 14 | // 15 | // Use the search API endpoints to count or search for Tweets matching specified 16 | // queries. It also supports iteration over the search endpoint to download larger 17 | // volumes of Tweets. 18 | // 19 | export class TwitterSearch extends TwitterBase { 20 | 21 | private async request(path: string, query?: Obj, body?: Obj): Promise { 22 | const opts = { 23 | query, 24 | body, 25 | headers: this.account.auth, 26 | timeout: 30000, 27 | retryInitialTimeout: 15000, 28 | retryMaxAttempts: 7, 29 | } 30 | const url = `https://gnip-api.twitter.com/search/fullarchive/accounts/${this.account.name}/${path}` 31 | const res = await request(url, opts) 32 | if (typeof res !== 'object') { 33 | throw new Error(`Invalid API response: ${res}`) 34 | } 35 | return res 36 | } 37 | 38 | // Tweets /////////////////////////////////////////////// 39 | 40 | public static readonly minMaxResults = 10 41 | public static readonly maxMaxResults = 500 42 | public static readonly defaultMaxResults = TwitterSearch.maxMaxResults 43 | 44 | public async count(query: string, opts: Obj = {}): Promise { 45 | const startTime = opts.startTime && TwitterSearch.validateTime(opts.startTime) 46 | const endTime = opts.endTime && TwitterSearch.validateTime(opts.endTime) 47 | 48 | let next: string | undefined 49 | let total = 0 50 | do { 51 | const res = await this.request( 52 | `${this.label}/counts.json`, 53 | { 54 | bucket: 'day', 55 | query, 56 | ...(next ? { next } : {}), 57 | ...(startTime ? { fromDate: startTime } : {}), 58 | ...(endTime ? { toDate: endTime } : {}), 59 | }, 60 | ) 61 | total += res.totalCount 62 | next = res.next 63 | } while (next) 64 | return total 65 | } 66 | 67 | public async download( 68 | query: string, 69 | startTime: Date | string, 70 | endTime: Date | string, 71 | tweetLoader: (tweets: Tweet[]) => Promise, 72 | ): Promise { 73 | let next: string | undefined 74 | let loader: (() => Promise) | undefined 75 | do { 76 | const [res, _]: [TwitterSearchResults, unknown] = await Promise.all([ 77 | this.search(query, { maxResults: TwitterSearch.maxMaxResults, startTime, endTime, next }), 78 | loader && loader(), 79 | ]) 80 | loader = 0 < res.tweets.length ? () => tweetLoader(res.tweets) : undefined 81 | next = res.next 82 | } while (next) 83 | if (loader) { 84 | await loader() 85 | } 86 | } 87 | 88 | public async search(query: string, opts: Obj = {}): Promise { 89 | const startTime = opts.startTime && TwitterSearch.validateTime(opts.startTime) 90 | const endTime = opts.endTime && TwitterSearch.validateTime(opts.endTime) 91 | const res = await this.request( 92 | `${this.label}.json`, 93 | {}, 94 | { 95 | query, 96 | maxResults: opts.maxResults || TwitterSearch.defaultMaxResults, 97 | ...(startTime ? { fromDate: startTime } : {}), 98 | ...(endTime ? { toDate: endTime } : {}), 99 | ...(opts.next ? { next: opts.next } : {}), 100 | }, 101 | ) 102 | return { tweets: (res.results || []).map((raw: any) => new Tweet(raw)).reverse(), next: res.next } 103 | } 104 | 105 | // Static /////////////////////////////////////////////// 106 | 107 | static validateTime(time: Date | string): string { 108 | const tm = time instanceof Date ? time.toISOString() : time 109 | if (typeof tm !== 'string' || !/^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(.\d\d\d)?Z$/.test(tm)) { 110 | throw new Error(`Invalid time format: ${time}`) 111 | } 112 | return tm.substring(0, 16).replace(/[^\d]/g, '') 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/app/vsa/visua/visua.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import '../util/array' 5 | import querystring from 'querystring' 6 | import { assert, assertInteger, counters, sleep } from '../../../util' 7 | import { HttpPromisifiedRequestPool, HttpRequestPoolOptions } from '../../../http' 8 | 9 | export interface VisuaOptions extends HttpRequestPoolOptions { 10 | apiEndpoint?: string 11 | waitTimeForDetectionMs?: number 12 | } 13 | 14 | export interface VisuaDetectionResult { 15 | hash: string 16 | status: 'complete' | 'error' | 'pending' 17 | size?: { width: number, height: number } 18 | logos?: string[] 19 | } 20 | 21 | export class Visua { 22 | private pool: HttpPromisifiedRequestPool 23 | private waitTimeForDetectionMs: number 24 | 25 | constructor(developerKey?: string, options: VisuaOptions = {}) { 26 | assert(typeof developerKey !== 'string' || /^[\da-z]+$/.test(developerKey), `Invalid key: ${developerKey}`) 27 | const { apiEndpoint, waitTimeForDetectionMs, ...originalRequestPoolOptions } = options 28 | 29 | this.waitTimeForDetectionMs = waitTimeForDetectionMs || 30000 30 | assertInteger(this.waitTimeForDetectionMs, 0, 120000, 'Wait time for detetion') 31 | 32 | // This code is designed to handle 1000 outgoing HTTP requests per second. Attempts 33 | // to use the builtin HTTP clients failed miserably due to performance issue. For 34 | // this reason we are using a custom massively parallel HTTP client here. 35 | // 36 | const requestPoolOptions = { 37 | ...originalRequestPoolOptions, 38 | defaultRequestHeaders: { 39 | ...(originalRequestPoolOptions.defaultRequestHeaders || {}), 40 | 'X-DEVELOPER-KEY': developerKey!, 41 | 'Content-Type': 'application/x-www-form-urlencoded', 42 | }, 43 | } 44 | this.pool = new HttpPromisifiedRequestPool(apiEndpoint || 'https://api.visua.com', requestPoolOptions) 45 | } 46 | 47 | public close(): void { 48 | this.pool.close() 49 | } 50 | 51 | public async detectLogosInImage(url: string): Promise { 52 | counters.debug.Visua.activeDetections.inc() 53 | const token = await this.startDetection(url) 54 | counters.debug.Visua.activeDetections.dec() 55 | 56 | counters.debug.Visua.activeWaits.inc() 57 | await sleep(this.waitTimeForDetectionMs) 58 | counters.debug.Visua.activeWaits.dec() 59 | 60 | counters.debug.Visua.activePolls.inc() 61 | const res = await this.pollDetection(token) 62 | counters.debug.Visua.activePolls.dec() 63 | 64 | if (res.status === 'pending') { 65 | counters.warn.Visua.detectionPending.inc() 66 | } else if (res.status === 'complete') { 67 | counters.debug.Visua.detectionComplete.inc() 68 | } 69 | return res 70 | } 71 | 72 | public async pollDetection(hash: string): Promise { 73 | const res = await this.pool.GET(`/detect/${hash}/response`) 74 | counters.debug.Visua.pollLatencyAvg.avg(res.elapsed) 75 | counters.debug.Visua.pollLatencyMax.max(res.elapsed) 76 | const body = res.response?.responseBody 77 | if (!body) { 78 | counters.error.Visua.connectionError.inc() 79 | return { hash, status: 'error' } 80 | } 81 | if (typeof body.errorMessage === 'string' && body.errorMessage.startsWith('Processing still in progress:')) { 82 | return { hash, status: 'pending' } 83 | } 84 | if (body.errorMessage || !body.data) { 85 | counters.error.Visua.detectionError.inc() 86 | return { hash, status: 'error' } 87 | } 88 | const logos = body.data.detections 89 | .filter((det: any) => det.type === 'logo') 90 | .map((det: any) => det.name.toLowerCase()) 91 | .sort() 92 | .uniq() 93 | return { hash, status: 'complete', logos, size: body.data.mediaInfo } 94 | } 95 | 96 | public async startDetection(url: string): Promise { 97 | const res = await this.pool.POST('/detect', querystring.stringify({ mediaUrl: url })) 98 | counters.debug.Visua.detectLatencyAvg.avg(res.elapsed) 99 | counters.debug.Visua.detectLatencyMax.max(res.elapsed) 100 | return res.response?.responseBody?.data.requestHash 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/twitter/test/disconnect.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { sleep } from '../../util' 5 | import { HttpEndpoint, HttpServerCertificates } from '../../http' 6 | import { Tweet, TwitterAccount, TwitterStream, TwitterStreamProxyServer } from '..' 7 | 8 | 9 | const twitterAccount = new TwitterAccount( 10 | process.env.TWITTER_ACCOUNT, 11 | process.env.TWITTER_EMAIL, 12 | process.env.TWITTER_PASSWORD, 13 | ) 14 | 15 | const certificates = new HttpServerCertificates({ 16 | key:` 17 | -----BEGIN RSA PRIVATE KEY----- 18 | MIIEowIBAAKCAQEA9ffyOA+z+t4tROOiehd748+Z2vA3+X0U6FkdWy+trmbU4dk2 19 | /5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13HTy4hOiS7CX/cBY4RrewWu0ezkFk 20 | 3h6Eslc7GSirDWEQE9ar645HiSYt47KszkdnR46YURPYwZEp2/lPnL8XNmalWJCV 21 | m+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOfhsX89mBYP/SDX7Yxwc/uWx0w+Fki 22 | saIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN8ZhpNOMseTZyRqXnN5Ii7Qr74Srh 23 | YPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNXtwIDAQABAoIBAAsJtm/3nG8Mm2F1 24 | CayK8z9U6KWflBN5HoASx7N430VtTe5YrhQoWekwxlVGCF+3Z358CbfOWEOkH+cj 25 | CFCwAYFpYSptuXIpfl4MUTgzNBHQhEpjOuCQ5AmQypEldw+hpHQPdSWb++/Wq8aF 26 | FSYopCTc7E7vIhIFrqg1dvIkzSjeudTZAiYnQ2vvgPsvnEO3YAqUo0AkwHl/bQeN 27 | VOC46aqMZAPf1Y5UmW3/0ua8HTuVbHk5QMZlWGbEPe2RbR3ILeGAvruIxSyhk9yO 28 | PXRsYUj5uLHAAoAPXXF5hsngHxkpY9VSOv1C3LhBL6HyNf2MmDhsOZglyU6rxgB4 29 | 2tf/FpkCgYEA/cQw0TyhVFlmdSkG7aN54ibnkkjnOr2onl95mUBZxea2hvozP8zZ 30 | BjCW2A9oo8z6HK6BJy1B0M8d9pUlUcHAMJ2eB/36tih2temytMbigyf2g0uvmUEP 31 | YLCaHBSSBKh2Y0CxzTJkqZEAh3PewiOOhhU9q66Rv+8Aifipx/FjOD0CgYEA+CIv 32 | SH4jPW9CtvGs45oZ9MaaZkhcjuDQ2R4iCSKW1cFOhQVUOnhRCu26vBiuJhwM9vwi 33 | 12yD/qJOm73wbN0iIoZTDRqoWCUur3mgIXw5HGe0uoV5MIEFbOkW/9nj6vk+a/9v 34 | CqaypGJT2Usd95BWHxlxPwfF9FtxW+czOdJMWwMCgYB0sJqiHHczCkkK5urAq8OI 35 | MsuZgNyTLlMzQEPyLJ0bW5PjTXnzhIbnScCTacJ8T+1S8wuAsFbrZdIpaTvX9Hgj 36 | 4tagZjG7QbAUxnnelvXhyaaZiVwd5MTleU/kSbE7YxvNWBpqeRnAv2S25JkyPJd1 37 | IJ9TKtrqn0RoLWglAOLXIQKBgCbWovQD2lw5WAXumhMeAcYQeAZeeS5b/hSd5NHt 38 | OhLHKRUlGmP0hSrivwHGEywf49+c4484iwiGOyuhdUp06mzg/Yrli0gQudf5f5j+ 39 | KqpJiT5QugFfkIvViCYP4t7amGyrFKRkJz4XrewrF8uyKejAQLuO6esvjPTHoXsB 40 | cbYlAoGBAJ7kP4V9koRS63AudjMqZa3BL9edM8yi/T/B51eKMltbxnd7Aa6e+nKj 41 | djA+bw2+54DG+ygZtQ2DNdM+VeKzBd0j2Q66egKBOm39psQaMMktxsFoW+MiYc1T 42 | Q9XAQiN0PQ/aPPvVfmY1z9LxvUvJLtsiXVpYhlgKEyQql/b0UJeY 43 | -----END RSA PRIVATE KEY-----`, 44 | cert: ` 45 | -----BEGIN CERTIFICATE----- 46 | MIIC9jCCAd4CCQDt8nzQjKCJRzANBgkqhkiG9w0BAQUFADA9MQswCQYDVQQGEwJ1 47 | czELMAkGA1UECAwCY2ExEjAQBgNVBAcMCXN1bm55dmFsZTENMAsGA1UECgwEYWNt 48 | ZTAeFw0yMTEwMDEyMTM4MzNaFw00OTAyMTUyMTM4MzNaMD0xCzAJBgNVBAYTAnVz 49 | MQswCQYDVQQIDAJjYTESMBAGA1UEBwwJc3Vubnl2YWxlMQ0wCwYDVQQKDARhY21l 50 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9ffyOA+z+t4tROOiehd7 51 | 48+Z2vA3+X0U6FkdWy+trmbU4dk2/5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13 52 | HTy4hOiS7CX/cBY4RrewWu0ezkFk3h6Eslc7GSirDWEQE9ar645HiSYt47Kszkdn 53 | R46YURPYwZEp2/lPnL8XNmalWJCVm+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOf 54 | hsX89mBYP/SDX7Yxwc/uWx0w+FkisaIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN 55 | 8ZhpNOMseTZyRqXnN5Ii7Qr74SrhYPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNX 56 | twIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQCnwVNmuaPKMXtIGdSzOtpONR2nHRj+ 57 | dux5z9T0IuEB4wqeTXFPClTVt5uI0yfq0L6bXwwiBGwDJsJaQOw03fDRx/BEsTRT 58 | YTX7QI7ipWcLS6yNWHCj0nS1KjqWRcXPL9DikPXxZwDmh2OAiuBGDIcI0YmAB0oa 59 | ywYe/5ABM60poF74izNe+mLDy0+Zqs3YijltoMzWcVnEgmZO352O1olgKUxrCbdZ 60 | yj94ML8zyAmLMEJz9nx8Sk1wJQA7/z4ZGKHbdMtZmqvnnetY1zKS6ilb6UCIuaJ/ 61 | EkhiKgh94g1X6McmizmQDZEOeF1572FLIMd/WFKzcAOLdjw2NDL/5Cd3 62 | -----END CERTIFICATE-----`, 63 | }) 64 | 65 | const PORT = 3000 66 | 67 | export async function main() { 68 | const twitterStreamProxyServer = new TwitterStreamProxyServer( 69 | twitterAccount, 70 | {}, 71 | { port: PORT, certificates }, 72 | ) 73 | twitterStreamProxyServer.start() 74 | 75 | const twitterStream = new TwitterStream( 76 | twitterAccount, 77 | { endpoint: new HttpEndpoint(`https://localhost:${PORT}`, { rejectUnauthorized: false }) }, 78 | ) 79 | await twitterStream.setStreamRules({ rule: 'has:images lang:en -is:retweet' }) 80 | twitterStream.addListener((tweet: Tweet) => console.log(tweet.id)) 81 | twitterStream.connect() 82 | 83 | await sleep(5000) 84 | console.log('Stopping proxy') 85 | await twitterStreamProxyServer.stop() 86 | console.log('Proxy stopped') 87 | 88 | await sleep(20000) 89 | console.log('Restarting proxy') 90 | twitterStreamProxyServer.start() 91 | 92 | await sleep(10000) 93 | console.log('Disconnecting stream') 94 | twitterStream.disconnect() 95 | console.log('Stopping proxy') 96 | await twitterStreamProxyServer.stop() 97 | } 98 | -------------------------------------------------------------------------------- /src/http/stream/HttpStream.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import http from 'http' 5 | import https from 'https' 6 | import { Obj } from '../../util' 7 | import { testOnce } from '../../util/test' 8 | import { HttpEndpoint } from '../HttpEndpoint' 9 | import { EventDispatcher, EventListener } from '../../util/event' 10 | 11 | export type HttpStreamEventType = 'connected' | 'message' 12 | 13 | export class HttpStreamError extends Error { 14 | constructor(message: string, public readonly code: number) { 15 | super(message) 16 | } 17 | 18 | public toStr(): string { 19 | const code = this.code == HttpStreamError.EANY ? 'EANY' 20 | : this.code == HttpStreamError.EEND ? 'EEND' 21 | : this.code == HttpStreamError.EREQUEST ? 'EREQUEST' 22 | : this.code == HttpStreamError.ERESPONSE ? 'ERESPONSE' 23 | : `HTTP(${this.code})` 24 | return `${this.code}: ${this.message}` 25 | } 26 | 27 | public static readonly EANY = 0 // any error 28 | public static readonly EEND = -1 29 | public static readonly EREQUEST = -2 30 | public static readonly ERESPONSE = -3 31 | } 32 | 33 | // 34 | // Handle streaming HTTP/S messages, sent over a single connection as a response 35 | // to a single request. This class uses the built in HTTP client and emits one 36 | // message event for each sequence of response bytes separated by a newline (\r\n) 37 | // character combination. 38 | // 39 | export class HttpStream { 40 | private req?: http.ClientRequest 41 | private buffers: Buffer[] = [] 42 | private eventDispatcher = new EventDispatcher() 43 | private errorDispatcher = new EventDispatcher() 44 | 45 | constructor(endpoint: HttpEndpoint, headers: Obj) { 46 | 47 | if (testOnce.STREAM_SERVER_TIMEOUT(() => { 48 | console.debug('Simulating stream server timeout') 49 | return true 50 | })) return 51 | 52 | const options = { headers, ...(endpoint.agent ? { agent: endpoint.agent } : {}) } 53 | this.req = https.request(endpoint.url, options, (res: http.IncomingMessage) => { 54 | 55 | testOnce.STREAM_SERVER_ERROR(() => { 56 | console.debug('Injecting HTTP error: 999') 57 | res.statusCode = 999 58 | }) 59 | 60 | if (typeof res.statusCode !== 'number' || res.statusCode < 200 || 299 < res.statusCode) { 61 | this.handleError( 62 | `Error connecting to streaming server: ${res.statusCode}`, 63 | typeof res.statusCode === 'number' ? res.statusCode : HttpStreamError.EREQUEST, 64 | ) 65 | return 66 | } 67 | 68 | this.eventDispatcher.fire('connected', endpoint.url) 69 | 70 | res.on('data', (buf: Buffer) => { 71 | this.handleBuffer(buf) 72 | }) 73 | res.on('end', (buf: Buffer) => { 74 | this.handleBuffer(buf) 75 | this.handleError('Disconnected', HttpStreamError.EEND) 76 | }) 77 | res.on('error', (error: any) => { 78 | this.handleError(`Response error: ${error.message}: ${error.code}`, HttpStreamError.ERESPONSE) 79 | }) 80 | }) 81 | 82 | this.req.on('error', (error: any) => { 83 | this.handleError(`Request error: ${error.message}: ${error.code}`, HttpStreamError.EREQUEST) 84 | }) 85 | 86 | this.req.end() 87 | } 88 | 89 | private handleBuffer(buf: Buffer): void { 90 | if (!this.req) { 91 | return 92 | } 93 | this.buffers.push(buf) 94 | if (buf && 2 <= buf.length && buf[buf.length - 2] === 13 && buf[buf.length - 1] === 10) { 95 | Buffer.concat(this.buffers) 96 | .toString() 97 | .split('\r\n') 98 | .forEach((message: string) => message.length && this.eventDispatcher.fire('message', message)) 99 | this.buffers.splice(0, this.buffers.length) 100 | } 101 | } 102 | 103 | private handleError(message: string, code: number) { 104 | if (!this.req) { 105 | return 106 | } 107 | const error = new HttpStreamError(message, code) 108 | this.errorDispatcher.fire(code, error) 109 | this.errorDispatcher.fire(HttpStreamError.EANY, error) 110 | } 111 | 112 | public addErrorListener(type: number, listener: EventListener): void { 113 | this.errorDispatcher.addListener(type, listener) 114 | } 115 | 116 | public addEventListener(type: HttpStreamEventType, listener: EventListener): void { 117 | this.eventDispatcher.addListener(type, listener) 118 | } 119 | 120 | public copyEventListeners(dispatcher: EventDispatcher): void { 121 | this.eventDispatcher.copyListeners(dispatcher) 122 | } 123 | 124 | public close(): void { 125 | if (this.req) { 126 | this.req.destroy() 127 | this.req = undefined 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/http/client/HttpConnectionPool.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import '../../util/array' 5 | import { assert, assertInteger, counters } from '../../util' 6 | import { SimpleEventDispatcher, SimpleEventListener } from '../../util/event' 7 | import { 8 | HttpConnection, 9 | HttpConnectionError, 10 | HttpConnectionOptions, 11 | HttpConnectionRequest, 12 | HttpConnectionTransaction, 13 | } from './HttpConnection' 14 | 15 | export interface HttpConnectionPoolOptions extends HttpConnectionOptions { 16 | 17 | // Number of open HTTP/S connections 18 | connectionCount?: number 19 | } 20 | 21 | // 22 | // Multiplex HTTP requests over a number of connections. Upon construction, the 23 | // class attempts to establish the requested number of connections. It keeps track 24 | // of which connections are connected or not, and only sends requests on active 25 | // connections. 26 | // 27 | export class HttpConnectionPool { 28 | public readonly connectionCount: number 29 | 30 | private readonly all: HttpConnection[] = [] 31 | private readonly disabled: HttpConnection[] = [] 32 | private readonly enabled: HttpConnection[] = [] 33 | 34 | private inflight = 0 35 | 36 | private readonly errorDispatcher = new SimpleEventDispatcher>() 37 | private readonly readyDispatcher = new SimpleEventDispatcher() 38 | private readonly responseDispatcher = new SimpleEventDispatcher>() 39 | 40 | constructor(host: string, options: HttpConnectionPoolOptions = {}) { 41 | const { connectionCount, ...connectionOptions } = options 42 | 43 | this.connectionCount = connectionCount || 1 44 | assertInteger(this.connectionCount, 1, 5000, 'Connection count') 45 | 46 | for (let i = 0; i < this.connectionCount; i++) { 47 | const con = this.createConnection(host, connectionOptions) 48 | this.all.push(con) 49 | this.disabled.push(con) 50 | counters.debug.HttpConnectionPool.disabled.inc() 51 | } 52 | } 53 | 54 | private createConnection(host: string, connectionOptions: HttpConnectionOptions): HttpConnection { 55 | const con = new HttpConnection(host, connectionOptions) 56 | 57 | con.addReadyListener(() => { 58 | this.disabled.remove(con) 59 | counters.debug.HttpConnectionPool.disabled.dec() 60 | this.enabled.push(con) 61 | counters.debug.HttpConnectionPool.enabled.inc() 62 | this.readyDispatcher.fire() 63 | }) 64 | 65 | con.addErrorListener((error: HttpConnectionError) => { 66 | if (this.enabled.indexOf(con)) { 67 | throw error 68 | } 69 | this.inflight -= error.requests.length 70 | this.enabled.remove(con) 71 | counters.debug.HttpConnectionPool.enabled.dec() 72 | this.disabled.push(con) 73 | counters.debug.HttpConnectionPool.disabled.inc() 74 | this.errorDispatcher.fire(error) 75 | }) 76 | 77 | con.addResponseListener((tx: HttpConnectionTransaction) => { 78 | this.inflight-- 79 | const cons = this.enabled 80 | const index = cons.indexOf(con) 81 | assert(0 <= index, 'Connection not found') 82 | const inflight = con.getInflightCount()! 83 | let i = index - 1 84 | while (0 <= i && inflight < cons[i].getInflightCount()!) { 85 | i-- 86 | } 87 | if (i < index - 1) { 88 | cons.splice(index, 1) 89 | cons.splice(i + 1, 0, con) 90 | } 91 | this.responseDispatcher.fire(tx) 92 | }) 93 | 94 | return con 95 | } 96 | 97 | public addErrorListener(listener: SimpleEventListener>): void { 98 | this.errorDispatcher.addListener(listener) 99 | } 100 | 101 | public addReadyListener(listener: SimpleEventListener): void { 102 | this.readyDispatcher.addListener(listener) 103 | } 104 | 105 | public addResponseListener(listener: SimpleEventListener>): void { 106 | this.responseDispatcher.addListener(listener) 107 | } 108 | 109 | public close(): void { 110 | for (const con of this.all) { 111 | con.close() 112 | } 113 | } 114 | 115 | public getConnectionsCount(): number { 116 | return this.enabled.length 117 | } 118 | 119 | public getInflightCount(): number { 120 | return this.inflight 121 | } 122 | 123 | public request(req: HttpConnectionRequest): void { 124 | const cons = this.enabled 125 | assert(0 < cons.length, 'No active connections') 126 | const con = cons[0] 127 | con.request(req) 128 | this.inflight++ 129 | const inflight = con.getInflightCount()! 130 | let index = 1 131 | while (index < cons.length && cons[index].getInflightCount()! < inflight) { 132 | index++ 133 | } 134 | if (1 < index) { 135 | cons.splice(index, 0, con) 136 | cons.shift() 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/http/server/HttpRouter.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import http from 'http' 5 | import querystring from 'querystring' 6 | import { HttpMethod } from '../HttpProtocol' 7 | import { HttpServerHandler } from './HttpServer' 8 | 9 | // 10 | // Experimantal TypeScript decorator for handler methods. See examples/server.ts 11 | // 12 | export function httpRouterMethod(method: HttpMethod, filter?: RegExp | string) { 13 | return function (target: any, propertyKey: string, descriptor: PropertyDescriptor) { 14 | descriptor.value.method = method 15 | descriptor.value.filter = filter === undefined ? '/' + propertyKey : filter 16 | } 17 | } 18 | 19 | export type HttpRouterRequestBody = Record | string | undefined 20 | 21 | export interface HttpRouterRequest extends http.IncomingMessage { 22 | body: HttpRouterRequestBody 23 | params?: string[] 24 | query?: Record 25 | } 26 | 27 | export interface HttpRouterResponse extends http.ServerResponse { 28 | respond(statusCode: number, responseBody: any): void 29 | } 30 | 31 | export interface HttpRouterOptions { 32 | cors?: boolean 33 | } 34 | 35 | // 36 | // This class servers as a handler for HttpServer requests and routes different 37 | // requests to different handler methods. In order to do this, it uses attributes 38 | // attached to each method. 39 | // 40 | // The easiest way to get those in place, is to extend this class and use the 41 | // httpRouterMethod function above as an experimental TypeScript decorator to 42 | // each handler method. See examples/server.ts 43 | // 44 | export abstract class HttpRouter implements HttpServerHandler { 45 | private routes: { filter: RegExp | string, funcname: string, method: string }[] = [] 46 | 47 | constructor(private readonly options: HttpRouterOptions = {}) { 48 | const funcnames = Object.getOwnPropertyNames(this.constructor.prototype) 49 | for (const funcname of funcnames) { 50 | const func = (this as any)[funcname] 51 | if (func.method && func.filter) { 52 | this.routes.push({ filter: func.filter, funcname, method: func.method }) 53 | } 54 | } 55 | } 56 | 57 | private resolve(method: string, path: string): { funcname: string, params?: string[] } | undefined { 58 | for (const route of this.routes) { 59 | if (route.method !== method) { 60 | continue 61 | } 62 | if (typeof route.filter === 'string' && route.filter === path) { 63 | return { funcname: route.funcname } 64 | } 65 | if (route.filter instanceof RegExp) { 66 | const match = path.match(route.filter) 67 | if (match) { 68 | return { funcname: route.funcname, params: match.slice(1) } 69 | } 70 | } 71 | } 72 | } 73 | 74 | public onRequest(req: http.IncomingMessage, res: http.ServerResponse): void { 75 | const responseHeaders = this.options.cors 76 | ? { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': '*' } 77 | : {} 78 | 79 | const buffers: Buffer[] = [] 80 | req.on('data', buffer => buffers.push(buffer)) 81 | 82 | req.on('end', async () => { 83 | let body: HttpRouterRequestBody 84 | if (buffers.length) { 85 | const str = Buffer.concat(buffers).toString('utf-8') 86 | const contentType = req.headers['content-type'] || 'application/octet-stream' 87 | if (contentType === 'application/json') { 88 | body = JSON.parse(str) 89 | } else if (contentType === 'application/x-www-form-urlencoded') { 90 | body = querystring.parse(str) 91 | } else { 92 | body = str 93 | } 94 | } 95 | 96 | function respond(statusCode: number, responseBody: any): void { 97 | res.statusCode = statusCode 98 | const isJSON = typeof responseBody === 'object' 99 | res.setHeader('Content-Type', isJSON ? 'application/json' : 'text/plain') 100 | for (const [key, value] of Object.entries(responseHeaders)) { 101 | res.setHeader(key, value) 102 | } 103 | res.end(isJSON ? JSON.stringify(responseBody) : String(responseBody)) 104 | } 105 | 106 | const parts = req.url?.split('?') 107 | if (!Array.isArray(parts) || parts.length < 1 || 2 < parts.length) { 108 | return respond(404, 'Not Found') 109 | } 110 | 111 | const match = this.resolve(req.method!, parts[0]) 112 | if (!match) { 113 | return respond(404, 'Not Found') 114 | } 115 | 116 | const query = parts.length === 2 ? querystring.parse(parts[1]) : undefined 117 | 118 | try { 119 | const ret = (this as any)[match.funcname]({ ...req, body, params: match.params, query }, { ...res, respond }) 120 | const out = ret instanceof Promise ? await ret : ret 121 | if (Array.isArray(out) && out.length === 2 && typeof out[0] === 'number') { 122 | respond(out[0], out[1]) 123 | } 124 | } catch (e) { 125 | console.error(e) 126 | return respond(500, 'Internal Server Error') 127 | } 128 | }) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/util/counters.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Printer } from './Printer' 5 | import { assertInteger } from './assert' 6 | import { hideCursor, showCursor } from './cursor' 7 | import { SimpleEventDispatcher, SimpleEventListener } from './event' 8 | 9 | class CountersEvent { 10 | constructor(public readonly name: string, public readonly op: string, public readonly value: number) { 11 | } 12 | } 13 | 14 | const dispatcher = new SimpleEventDispatcher() 15 | 16 | class AveragingWindow { 17 | private readonly samples: { value: number, time: number }[] = [] 18 | private sum = 0 19 | 20 | constructor(private readonly span = 1000) { 21 | } 22 | 23 | public append(value: number): number { 24 | const time = Date.now() 25 | this.samples.push({ value, time }) 26 | this.sum += value 27 | const expired = time - this.span 28 | while (this.samples[0].time < expired) { 29 | this.sum -= this.samples[0].value 30 | this.samples.shift() 31 | } 32 | return Math.round(this.sum / this.samples.length) 33 | } 34 | } 35 | 36 | class Counter { 37 | public value = 0 38 | private window?: AveragingWindow 39 | 40 | constructor(public readonly group: string, public readonly name: string, public readonly level: number) { 41 | } 42 | 43 | private update(op: string, value: number): number { 44 | this.value = value 45 | dispatcher.fire(new CountersEvent(this.name, op, this.value)) 46 | return this.value 47 | } 48 | 49 | public avg(value: number) { 50 | if (!this.window) { 51 | this.window = new AveragingWindow() 52 | } 53 | return this.update('avg', this.window.append(value)) 54 | } 55 | 56 | public dec(delta = 1): number { 57 | return this.update('dec', this.value - delta) 58 | } 59 | 60 | public inc(delta = 1): number { 61 | return this.update('inc', this.value + delta) 62 | } 63 | 64 | public min(value: number): number { 65 | return this.update('min', Math.min(this.value, value)) 66 | } 67 | 68 | public max(value: number): number { 69 | return this.update('max', Math.max(this.value, value)) 70 | } 71 | 72 | public set(value: number): number { 73 | return this.update('set', value) 74 | } 75 | } 76 | 77 | const values: Record = {} 78 | const groups: Record = {} 79 | const printer = new Printer(4) 80 | const keys: string[] = [] 81 | 82 | const LINE = ['\x1b[38;5;240m', '\x1b[38;5;7m', '\x1b[38;5;214m', '\x1b[31m'] 83 | const VALUE = ['', '\x1b[33m', '', ''] 84 | 85 | const DEBUG = 0 86 | const INFO = 1 87 | const WARN = 2 88 | const ERROR = 3 89 | 90 | export type CountersLevel = 'debug' | 'info' | 'warn' | 'error' 91 | const LEVELS = { 'debug': DEBUG, 'info': INFO, 'warn': WARN, 'error': ERROR } 92 | const PREFIX = ['DEBUG', 'INFO ', 'WARN ', 'ERROR'] 93 | 94 | function createGroup(group: string, level: number) { 95 | return new Proxy({}, { 96 | get: (target: any, name: string) => { 97 | const fn = `${group}.${name}` 98 | if (!values[fn]) { 99 | const nm = name.toLocaleLowerCase() 100 | values[fn] = new Counter(group, name, level) 101 | keys.push(fn) 102 | keys.sort() 103 | const lengths = keys.map(key => key.length) 104 | printer.setSpacing(4 + Math.max(...lengths) - Math.min(...lengths)) 105 | } 106 | return values[fn] 107 | } 108 | }) 109 | } 110 | 111 | function createProxy(level: number) { 112 | return new Proxy({}, { 113 | get: (target: any, key: string) => { 114 | const g = `${key}.${level}` 115 | if (!groups[g]) { 116 | groups[g] = createGroup(key, level) 117 | } 118 | return groups[g] 119 | }, 120 | }) 121 | } 122 | 123 | class Counters { 124 | public readonly debug = createProxy(DEBUG) 125 | public readonly info = createProxy(INFO) 126 | public readonly warn = createProxy(WARN) 127 | public readonly error = createProxy(ERROR) 128 | 129 | public addUpdateListener(listener: SimpleEventListener) { 130 | dispatcher.addListener(listener) 131 | } 132 | 133 | public monitor(interval: undefined /* never */ | 0 /* immediate */ | number, level: CountersLevel = 'info'): void { 134 | if (interval === undefined || process.stdout.isTTY !== true) { 135 | return 136 | } 137 | hideCursor() 138 | 139 | if (interval === 0) { 140 | counters.addUpdateListener(() => this.print(level)) 141 | } else { 142 | assertInteger(interval, 1, undefined, `Invalid interval: ${interval}`) 143 | setInterval(() => this.print(level), interval) 144 | } 145 | function onExit() { 146 | showCursor() 147 | console.log() 148 | process.exit(0) 149 | } 150 | process.on('exit', onExit) 151 | process.on('SIGINT', onExit) 152 | process.on('SIGTERM', onExit) 153 | } 154 | 155 | public print(levelName: CountersLevel = 'info'): void { 156 | const lines = keys 157 | .filter(key => LEVELS[levelName] <= values[key].level) 158 | .map(key => { 159 | const val = values[key] 160 | const level = val.level 161 | return `${LINE[level]}${PREFIX[level]} ${val.group}.${val.name}: ${VALUE[level]}${val.value}\x1b[0m` 162 | }) 163 | if (0 < lines.length) { 164 | printer.printLines(...lines) 165 | } 166 | } 167 | } 168 | 169 | export const counters = new Counters() 170 | -------------------------------------------------------------------------------- /src/http/examples/secureserver.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // 5 | // secureserver.ts 6 | // 7 | // This example set up a simple HTTPS server on port 3000. In order 8 | // to test the server run: 9 | // 10 | // curl -k https://localhost:3000/ 11 | // 12 | // Secure servers require a private key and SSL certificate. In this 13 | // example we use self certified keys, which requires the -k flag for 14 | // curl above. you can read more about certificates here: 15 | // 16 | // https://nodejs.org/api/tls.html#tlsssl-concepts 17 | // 18 | // Note that SSL keys and certificates are typically stored in two 19 | // files name -key.pem and -cert.pem as shown in the 20 | // link above. Here we embed self certified keys to simplify running 21 | // this example. This is probably *not* what you want to do in real 22 | // code. 23 | // 24 | 25 | import { Printer } from '../../util' 26 | import { 27 | HttpRouter, 28 | httpRouterMethod, 29 | HttpRouterRequest, 30 | HttpRouterResponse, 31 | HttpServer, 32 | HttpServerCertificates, 33 | } from '..' 34 | 35 | let count = 0 36 | const printer = new Printer(4) 37 | setInterval(() => { 38 | printer.printLines(`Serving ${count} requests per second`) 39 | count = 0 40 | }, 1000) 41 | 42 | class Server extends HttpRouter { 43 | @httpRouterMethod('POST', /^\/echo(\/(\d{1,4}))?\/?$/) 44 | public echo(req: HttpRouterRequest, res: HttpRouterResponse) { 45 | const wait = req.params![1] 46 | if (wait === undefined) { 47 | return [200, req.body] 48 | } 49 | setTimeout(() => res.respond(200, req.body), parseInt(wait)) 50 | } 51 | 52 | @httpRouterMethod('GET', '/') 53 | public index() { 54 | count++ 55 | return [200, 'Hello, HTTPS!\n'] 56 | } 57 | 58 | @httpRouterMethod('GET') 59 | public ping() { 60 | count++ 61 | return [200, { oops: 'pong' }] 62 | } 63 | 64 | @httpRouterMethod('GET', /^\/wait\/(\d{1,4})\/?$/) 65 | public wait(req: HttpRouterRequest, res: HttpRouterResponse) { 66 | count++ 67 | setTimeout(() => res.respond(200, 'Wait is over'), parseInt(req.params![0])) 68 | } 69 | } 70 | 71 | const certificates = new HttpServerCertificates({ 72 | key:` 73 | -----BEGIN RSA PRIVATE KEY----- 74 | MIIEowIBAAKCAQEA9ffyOA+z+t4tROOiehd748+Z2vA3+X0U6FkdWy+trmbU4dk2 75 | /5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13HTy4hOiS7CX/cBY4RrewWu0ezkFk 76 | 3h6Eslc7GSirDWEQE9ar645HiSYt47KszkdnR46YURPYwZEp2/lPnL8XNmalWJCV 77 | m+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOfhsX89mBYP/SDX7Yxwc/uWx0w+Fki 78 | saIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN8ZhpNOMseTZyRqXnN5Ii7Qr74Srh 79 | YPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNXtwIDAQABAoIBAAsJtm/3nG8Mm2F1 80 | CayK8z9U6KWflBN5HoASx7N430VtTe5YrhQoWekwxlVGCF+3Z358CbfOWEOkH+cj 81 | CFCwAYFpYSptuXIpfl4MUTgzNBHQhEpjOuCQ5AmQypEldw+hpHQPdSWb++/Wq8aF 82 | FSYopCTc7E7vIhIFrqg1dvIkzSjeudTZAiYnQ2vvgPsvnEO3YAqUo0AkwHl/bQeN 83 | VOC46aqMZAPf1Y5UmW3/0ua8HTuVbHk5QMZlWGbEPe2RbR3ILeGAvruIxSyhk9yO 84 | PXRsYUj5uLHAAoAPXXF5hsngHxkpY9VSOv1C3LhBL6HyNf2MmDhsOZglyU6rxgB4 85 | 2tf/FpkCgYEA/cQw0TyhVFlmdSkG7aN54ibnkkjnOr2onl95mUBZxea2hvozP8zZ 86 | BjCW2A9oo8z6HK6BJy1B0M8d9pUlUcHAMJ2eB/36tih2temytMbigyf2g0uvmUEP 87 | YLCaHBSSBKh2Y0CxzTJkqZEAh3PewiOOhhU9q66Rv+8Aifipx/FjOD0CgYEA+CIv 88 | SH4jPW9CtvGs45oZ9MaaZkhcjuDQ2R4iCSKW1cFOhQVUOnhRCu26vBiuJhwM9vwi 89 | 12yD/qJOm73wbN0iIoZTDRqoWCUur3mgIXw5HGe0uoV5MIEFbOkW/9nj6vk+a/9v 90 | CqaypGJT2Usd95BWHxlxPwfF9FtxW+czOdJMWwMCgYB0sJqiHHczCkkK5urAq8OI 91 | MsuZgNyTLlMzQEPyLJ0bW5PjTXnzhIbnScCTacJ8T+1S8wuAsFbrZdIpaTvX9Hgj 92 | 4tagZjG7QbAUxnnelvXhyaaZiVwd5MTleU/kSbE7YxvNWBpqeRnAv2S25JkyPJd1 93 | IJ9TKtrqn0RoLWglAOLXIQKBgCbWovQD2lw5WAXumhMeAcYQeAZeeS5b/hSd5NHt 94 | OhLHKRUlGmP0hSrivwHGEywf49+c4484iwiGOyuhdUp06mzg/Yrli0gQudf5f5j+ 95 | KqpJiT5QugFfkIvViCYP4t7amGyrFKRkJz4XrewrF8uyKejAQLuO6esvjPTHoXsB 96 | cbYlAoGBAJ7kP4V9koRS63AudjMqZa3BL9edM8yi/T/B51eKMltbxnd7Aa6e+nKj 97 | djA+bw2+54DG+ygZtQ2DNdM+VeKzBd0j2Q66egKBOm39psQaMMktxsFoW+MiYc1T 98 | Q9XAQiN0PQ/aPPvVfmY1z9LxvUvJLtsiXVpYhlgKEyQql/b0UJeY 99 | -----END RSA PRIVATE KEY-----`, 100 | cert: ` 101 | -----BEGIN CERTIFICATE----- 102 | MIIC9jCCAd4CCQDt8nzQjKCJRzANBgkqhkiG9w0BAQUFADA9MQswCQYDVQQGEwJ1 103 | czELMAkGA1UECAwCY2ExEjAQBgNVBAcMCXN1bm55dmFsZTENMAsGA1UECgwEYWNt 104 | ZTAeFw0yMTEwMDEyMTM4MzNaFw00OTAyMTUyMTM4MzNaMD0xCzAJBgNVBAYTAnVz 105 | MQswCQYDVQQIDAJjYTESMBAGA1UEBwwJc3Vubnl2YWxlMQ0wCwYDVQQKDARhY21l 106 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9ffyOA+z+t4tROOiehd7 107 | 48+Z2vA3+X0U6FkdWy+trmbU4dk2/5opvefCjN76qxyRtDrcNfk3clMAXvXxLY13 108 | HTy4hOiS7CX/cBY4RrewWu0ezkFk3h6Eslc7GSirDWEQE9ar645HiSYt47Kszkdn 109 | R46YURPYwZEp2/lPnL8XNmalWJCVm+cW5K7Sro6Dn+hAmMjl67R6V7KzjMkFtbOf 110 | hsX89mBYP/SDX7Yxwc/uWx0w+FkisaIw8YYMm0a5PfEBjSWXVxeVhPw5F/OF+iZN 111 | 8ZhpNOMseTZyRqXnN5Ii7Qr74SrhYPSoqWuFqit2kejNqO77zjnjKTNe+cpR1UNX 112 | twIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQCnwVNmuaPKMXtIGdSzOtpONR2nHRj+ 113 | dux5z9T0IuEB4wqeTXFPClTVt5uI0yfq0L6bXwwiBGwDJsJaQOw03fDRx/BEsTRT 114 | YTX7QI7ipWcLS6yNWHCj0nS1KjqWRcXPL9DikPXxZwDmh2OAiuBGDIcI0YmAB0oa 115 | ywYe/5ABM60poF74izNe+mLDy0+Zqs3YijltoMzWcVnEgmZO352O1olgKUxrCbdZ 116 | yj94ML8zyAmLMEJz9nx8Sk1wJQA7/z4ZGKHbdMtZmqvnnetY1zKS6ilb6UCIuaJ/ 117 | EkhiKgh94g1X6McmizmQDZEOeF1572FLIMd/WFKzcAOLdjw2NDL/5Cd3 118 | -----END CERTIFICATE-----`, 119 | }) 120 | 121 | new HttpServer( new Server(), { port: 3000, certificates }).start() 122 | -------------------------------------------------------------------------------- /src/twitter/TwitterStream.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { Tweet } from './Tweet' 5 | import querystring from 'querystring' 6 | import { TwitterAccount } from './TwitterAccount' 7 | import { assert, assertInteger, Obj } from '../util' 8 | import { TwitterBase, TwitterBaseOptions } from './TwitterBase' 9 | import { EventListener, SimpleEventDispatcher, SimpleEventListener } from '../util/event' 10 | import { HttpEndpoint, HttpResilientStream, HttpStreamEventType, request } from '../http' 11 | import { StreamedTweet, TwitterStreamEventType, TwitterStreamInterface } from './TwitterStreamInterface' 12 | 13 | export function createTwitterStreamEndpoint( 14 | account: TwitterAccount, 15 | label: string, 16 | params?: Record, 17 | ): HttpEndpoint { 18 | const qs = params === undefined ? '' : '?' + querystring.stringify(params) 19 | const url = `https://gnip-stream.twitter.com/stream/powertrack/accounts/${ 20 | account.name}/publishers/twitter/${label}.json${qs}` 21 | return new HttpEndpoint(url) 22 | } 23 | 24 | export interface TwitterStreamOptions extends TwitterBaseOptions { 25 | 26 | // Start the stream 1-5 minutes in the past (in case of past failure) 27 | backfillMinutes?: number 28 | 29 | // An alternative API endpoint (for example, when using a proxy) 30 | endpoint?: HttpEndpoint 31 | } 32 | 33 | // 34 | // this class uses HttpResilientStream to stream Tweets from Twitter's 35 | // PowerTrack API. It also supports the rule API to set the rules to 36 | // filter streamed Tweets. 37 | // 38 | export class TwitterStream extends TwitterBase implements TwitterStreamInterface { 39 | private stream: HttpResilientStream 40 | private tweetDispatcher = new SimpleEventDispatcher() 41 | 42 | constructor(account: TwitterAccount, options: TwitterStreamOptions = {}) { 43 | super(account, options) 44 | const params = options.backfillMinutes === undefined 45 | ? undefined 46 | : { backfillMinutes: assertInteger(options.backfillMinutes, 1, 5, 'Backfill minutes') } 47 | this.stream = new HttpResilientStream( 48 | options.endpoint || createTwitterStreamEndpoint(this.account, this.label, params), 49 | this.account.auth, 50 | { 51 | connectTimeoutMs: 8000, 52 | messageTimeoutMs: 30000, 53 | connectionMinWaitMs: 1000, 54 | connectionMaxWaitMs: 8000, 55 | }, 56 | ) 57 | this.stream.addEventListener('message', (message: string) => { 58 | try { 59 | const raw = JSON.parse(message) 60 | const tweet = new Tweet(raw) as StreamedTweet 61 | tweet.rules = raw.matching_rules.map((r: any) => r.tag) 62 | this.tweetDispatcher.fire(tweet) 63 | } catch (e: any) { 64 | console.error('TwitterStream:', e) 65 | console.error('TwitterStream:', message) 66 | } 67 | }) 68 | } 69 | 70 | public addListener( 71 | typeOrListener: TwitterStreamEventType | SimpleEventListener, 72 | listener?: EventListener | SimpleEventListener, 73 | ): void { 74 | if (typeof typeOrListener === 'string') { 75 | assert(listener !== undefined, 'Listener type provided but no listener specified') 76 | if (typeOrListener === 'tweet') { 77 | this.tweetDispatcher.addListener(listener as SimpleEventListener) 78 | } else { 79 | this.stream.addEventListener(typeOrListener, listener as EventListener) 80 | } 81 | } else { 82 | assert(listener === undefined, 'Two listener provided') 83 | this.tweetDispatcher.addListener(typeOrListener as SimpleEventListener) 84 | } 85 | } 86 | 87 | public connect(): void { 88 | this.stream.connect() 89 | } 90 | 91 | public disconnect(): void { 92 | this.stream.disconnect() 93 | } 94 | 95 | public async setStreamRules(newRulesRecord: Record = {}, force = true): Promise { 96 | const url = `https://data-api.twitter.com/rules/powertrack/accounts/${ 97 | this.account.name}/publishers/twitter/${this.label}.json` 98 | 99 | const res = await request(url, { headers: this.account.auth }) 100 | if (typeof res !== 'object') { 101 | throw new Error(`Invalid API response: ${res}`) 102 | } 103 | const oldRules = res.rules.map((rule: Obj) => [rule.tag, rule.value]).sort() 104 | 105 | const newRules = Object.entries(newRulesRecord) 106 | const match = oldRules.length === newRules.length && 107 | newRules 108 | .sort() 109 | .map((newRule, i) => newRule[0] === oldRules[i][0] && newRule[1] === oldRules[i][1]) 110 | .reduce((result, same) => result && same, true) 111 | if (match) { 112 | return 113 | } 114 | 115 | if (oldRules.length !== 0) { 116 | if (force) { 117 | console.log(`Deleting existing rules:\n${oldRules.map((r: string) => ` ${r[0]}: ${r[1]}`).join('\n')}`) 118 | await request(url + '?_method=delete', { headers: this.account.auth, body: { rules: res.rules } }) 119 | } else { 120 | console.error(`Existing rules:\n${oldRules.map((r: string) => ' ' + r).join('\n')}`) 121 | throw new Error('New rules will override existing rules') 122 | } 123 | } 124 | 125 | console.log(`Setting stream rules:\n${newRules.map(r => ` ${r[0]}: ${r[1]}`).join('\n')}`) 126 | await request( 127 | url, 128 | { headers: this.account.auth, body: { rules: newRules.map(r => ({ tag: r[0], value: r[1] })) } }, 129 | ) 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/http/client/HttpRequestPool.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { assertInteger, counters } from '../../util' 5 | import { HttpHeaders, HttpRequest, HttpResponse } from '../HttpProtocol' 6 | import { SimpleEventDispatcher, SimpleEventListener } from '../../util/event' 7 | import { HttpConnectionPool, HttpConnectionPoolOptions } from './HttpConnectionPool' 8 | import { HttpConnectionError, HttpConnectionRequest, HttpConnectionTransaction } from './HttpConnection' 9 | 10 | export interface HttpRequestPoolOptions extends HttpConnectionPoolOptions { 11 | 12 | // Number of atempts to send a request before emitting an error 13 | maxAttempts?: number 14 | 15 | // Number of requests to send in parallel over a single connection 16 | maxPipelineDepth?: number 17 | } 18 | 19 | export interface HttpRequestPoolResponse { 20 | 21 | // Number of attempts made to send the request 22 | attempts: number 23 | 24 | // Elapsed time (in milliseconds) for the last attempt 25 | elapsed: number 26 | 27 | // Http request 28 | request: HttpRequest 29 | 30 | // Possible response (might be undefined if all attempts failed to send the request) 31 | response?: HttpResponse 32 | 33 | // User object provided with the original request 34 | userp: T 35 | } 36 | 37 | interface U { 38 | attempts: number 39 | sendTime?: number 40 | userp: T 41 | } 42 | 43 | // 44 | // An HTTP/S request intefrace optimized for sending massively parallel requests to 45 | // a server or API. This class maintains an HttpConnectionPool of persistent HTTP/S 1.1 46 | // connections to the server and pipelines requests allong these different connections. 47 | // 48 | // The class relies on the underlying recconect mechanism provided by HttpConnection and 49 | // HttpConnectionPool to maintain the connection and adds its own mechanism for 50 | // retransmitting requests in cases of connection failures. 51 | // 52 | export class HttpRequestPool { 53 | private readonly maxAttempts: number 54 | private readonly maxPipelineDepth: number 55 | private readonly connectionPool: HttpConnectionPool> 56 | private readonly requestQueue: HttpConnectionRequest>[] = [] 57 | private readonly maxInflight: number 58 | private readonly dispatcher = new SimpleEventDispatcher>() 59 | 60 | constructor(host: string, options: HttpRequestPoolOptions = {}) { 61 | const { maxAttempts, maxPipelineDepth, ...connectionPoolOptions } = options 62 | 63 | this.maxAttempts = maxAttempts || 1 64 | assertInteger(this.maxAttempts, 1, 10, 'Max attempts') 65 | 66 | this.maxPipelineDepth = maxPipelineDepth || 1 67 | assertInteger(this.maxPipelineDepth, 1, 100, 'Pipeline depth') 68 | 69 | const respond = (req: HttpConnectionRequest>, response?: HttpResponse): void => { 70 | const { userp, ...request } = req 71 | this.dispatcher.fire({ 72 | attempts: userp.attempts, 73 | elapsed: Date.now() - userp.sendTime!, 74 | userp: userp.userp, 75 | request, 76 | response, 77 | }) 78 | } 79 | 80 | this.connectionPool = new HttpConnectionPool(host, connectionPoolOptions) 81 | 82 | this.connectionPool.addReadyListener(() => this.sendNextRequests()) 83 | 84 | this.connectionPool.addErrorListener((error: HttpConnectionError>) => { 85 | counters.warn.HttpRequestPool.errors.inc() 86 | for (const req of error.requests) { 87 | if (req.userp.attempts < this.maxAttempts) { 88 | this.requestQueue.push(req) 89 | } else { 90 | counters.warn.HttpRequestPool.abortedRequests.inc() 91 | respond(req) 92 | } 93 | } 94 | }) 95 | 96 | this.connectionPool.addResponseListener(({ request, response }: HttpConnectionTransaction>) => { 97 | counters.debug.HttpRequestPool.responses.inc() 98 | counters.debug.HttpRequestPool.inflight.dec() 99 | respond(request, response) 100 | this.sendNextRequests() 101 | }) 102 | 103 | this.maxInflight = this.connectionPool.connectionCount * this.maxPipelineDepth 104 | } 105 | 106 | private sendNextRequests(): void { 107 | while ( 108 | 0 < this.requestQueue.length && 109 | 0 < this.connectionPool.getConnectionsCount() && 110 | this.connectionPool.getInflightCount() < this.maxInflight 111 | ) { 112 | const req = this.requestQueue.shift()! 113 | req.userp.attempts++ 114 | req.userp.sendTime = Date.now() 115 | this.connectionPool.request(req) 116 | counters.debug.HttpRequestPool.inflight.inc() 117 | counters.debug.HttpRequestPool.requests.inc() 118 | } 119 | } 120 | 121 | public addResponseListener(listener: SimpleEventListener>): void { 122 | this.dispatcher.addListener(listener) 123 | } 124 | 125 | public close(): void { 126 | this.connectionPool.close() 127 | } 128 | 129 | public getConnectionsCount(): number { 130 | return this.connectionPool.getConnectionsCount() 131 | } 132 | 133 | public getInflightCount(): number { 134 | return this.connectionPool.getInflightCount() 135 | } 136 | 137 | public request(request: HttpConnectionRequest): void { 138 | const { userp, ...other } = request 139 | this.requestQueue.push({ ...other, userp: { attempts: 0, userp } }) 140 | this.sendNextRequests() 141 | } 142 | 143 | public GET(path: string, userp: T, headers?: HttpHeaders): void { 144 | this.request({ method: 'GET', path, userp, headers }) 145 | } 146 | 147 | public POST(path: string, userp: T, body?: string, headers?: HttpHeaders): void { 148 | this.request({ method: 'POST', path, userp, body, headers }) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct v2.0 2 | 3 | This code of conduct outlines our expectations for participants within the [@TwitterOSS](https://twitter.com/twitteross) community, as well as steps to reporting unacceptable behavior. We are committed to providing a welcoming and inspiring community for all and expect our code of conduct to be honored. Anyone who violates this code of conduct may be banned from the community. 4 | 5 | Our open source community strives to: 6 | 7 | * **Be friendly and patient.** 8 | * **Be welcoming**: We strive to be a community that welcomes and supports people of all backgrounds and identities. This includes, but is not limited to members of any race, ethnicity, culture, national origin, colour, immigration status, social and economic class, educational level, sex, sexual orientation, gender identity and expression, age, size, family status, political belief, religion, and mental and physical ability. 9 | * **Be considerate**: Your work will be used by other people, and you in turn will depend on the work of others. Any decision you take will affect users and colleagues, and you should take those consequences into account when making decisions. Remember that we're a world-wide community, so you might not be communicating in someone else's primary language. 10 | * **Be respectful**: Not all of us will agree all the time, but disagreement is no excuse for poor behavior and poor manners. We might all experience some frustration now and then, but we cannot allow that frustration to turn into a personal attack. It’s important to remember that a community where people feel uncomfortable or threatened is not a productive one. 11 | * **Be careful in the words that you choose**: we are a community of professionals, and we conduct ourselves professionally. Be kind to others. Do not insult or put down other participants. Harassment and other exclusionary behavior aren't acceptable. This includes, but is not limited to: 12 | * Violent threats or language directed against another person. 13 | * Discriminatory jokes and language. 14 | * Posting sexually explicit or violent material. 15 | * Posting (or threatening to post) other people's personally identifying information ("doxing"). 16 | * Personal insults, especially those using racist or sexist terms. 17 | * Unwelcome sexual attention. 18 | * Advocating for, or encouraging, any of the above behavior. 19 | * Repeated harassment of others. In general, if someone asks you to stop, then stop. 20 | * **When we disagree, try to understand why**: Disagreements, both social and technical, happen all the time. It is important that we resolve disagreements and differing views constructively. Remember that we’re different. The strength of our community comes from its diversity, people from a wide range of backgrounds. Different people have different perspectives on issues. Being unable to understand why someone holds a viewpoint doesn’t mean that they’re wrong. Don’t forget that it is human to err and blaming each other doesn’t get us anywhere. Instead, focus on helping to resolve issues and learning from mistakes. 21 | 22 | This code is not exhaustive or complete. It serves to distill our common understanding of a collaborative, shared environment, and goals. We expect it to be followed in spirit as much as in the letter. 23 | 24 | ### Diversity Statement 25 | 26 | We encourage everyone to participate and are committed to building a community for all. Although we may not be able to satisfy everyone, we all agree that everyone is equal. Whenever a participant has made a mistake, we expect them to take responsibility for it. If someone has been harmed or offended, it is our responsibility to listen carefully and respectfully, and do our best to right the wrong. 27 | 28 | Although this list cannot be exhaustive, we explicitly honor diversity in age, gender, gender identity or expression, culture, ethnicity, language, national origin, political beliefs, profession, race, religion, sexual orientation, socioeconomic status, and technical ability. We will not tolerate discrimination based on any of the protected 29 | characteristics above, including participants with disabilities. 30 | 31 | ### Reporting Issues 32 | 33 | If you experience or witness unacceptable behavior—or have any other concerns—please report it by contacting us via [opensource+codeofconduct@twitter.com](mailto:opensource+codeofconduct@twitter.com). All reports will be handled with discretion. In your report please include: 34 | 35 | - Your contact information. 36 | - Names (real, nicknames, or pseudonyms) of any individuals involved. If there are additional witnesses, please 37 | include them as well. Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public IRC logger), please include a link. 38 | - Any additional information that may be helpful. 39 | 40 | After filing a report, a representative will contact you personally. If the person who is harassing you is part of the response team, they will recuse themselves from handling your incident. A representative will then review the incident, follow up with any additional questions, and make a decision as to how to respond. We will respect confidentiality requests for the purpose of protecting victims of abuse. 41 | 42 | Anyone asked to stop unacceptable behavior is expected to comply immediately. If an individual engages in unacceptable behavior, the representative may take any action they deem appropriate, up to and including a permanent ban from our community without warning. 43 | 44 | ## Thanks 45 | 46 | This code of conduct is based on the [Open Code of Conduct](https://github.com/todogroup/opencodeofconduct) from the [TODOGroup](https://todogroup.org). 47 | 48 | We are thankful for their work and all the communities who have paved the way with code of conducts. 49 | -------------------------------------------------------------------------------- /src/http/client/HttpRequest.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import http from 'http' 5 | import https from 'https' 6 | import { URL } from 'url' 7 | import querystring from 'querystring' 8 | import { Obj, sleep } from '../../util' 9 | import { HttpMethod } from '../HttpProtocol' 10 | 11 | const TIMEOUT_MS = 10000 12 | 13 | const httpAgent = new http.Agent({ keepAlive: true }) 14 | const httpsAgent = new https.Agent({ keepAlive: true }) 15 | 16 | export interface HttpRequestOpts { 17 | body?: Obj | string 18 | headers?: http.OutgoingHttpHeaders 19 | keepalive?: boolean 20 | method?: HttpMethod 21 | query?: Obj 22 | retry?: boolean 23 | retryInitialTimeout?: number 24 | retryMaxAttempts?: number 25 | timeout?: number // 0 to disable 26 | } 27 | 28 | export class HttpRequestError extends Error { 29 | constructor(message: string, public code: number = 0, public details: string = '') { 30 | super(message) 31 | } 32 | 33 | public static ETIMEOUT = 900 34 | public static EPARSERESPONSE = 901 35 | public static ESENDREQUEST = 902 36 | // public static ECONNRESET = 903 37 | // public static EPIPE = 904 38 | } 39 | 40 | // 41 | // A conveniece wrapper for sending request using the built in HTTP client. Unlike 42 | // HttpConnection, this interface is not designed for massive parallel requests. 43 | // 44 | export async function request(url: string, opts: HttpRequestOpts = {}): Promise { 45 | const u = new URL(url) 46 | const isSecure = u.protocol === 'https:' 47 | const qurl = url + (opts.query ? '?' + querystring.stringify(opts.query) : '') 48 | 49 | const options: http.RequestOptions = { 50 | ...(opts.method ? { method: opts.method } : {}), 51 | ...(opts.keepalive !== false ? { agent: isSecure ? httpsAgent : httpAgent } : {}), 52 | ...(opts.headers ? { headers: { ...opts.headers } } : ({} as http.OutgoingHttpHeaders)) 53 | } 54 | 55 | function sendRequest(): Promise { 56 | let encoded: Uint8Array 57 | if (opts.body) { 58 | let body: string 59 | const contentType = options.headers!['Content-Type'] || 'application/json' 60 | switch (contentType) { 61 | case 'application/json': 62 | if (typeof opts.body !== 'object') { 63 | throw new Error(`Expected object body for JSON request: ${opts.body}`) 64 | } 65 | body = JSON.stringify(opts.body) 66 | break 67 | case 'application/x-www-form-urlencoded': 68 | if (typeof opts.body !== 'object') { 69 | throw new Error(`Expected object body for urlencoded request: ${opts.body}`) 70 | } 71 | body = querystring.stringify(opts.body) 72 | break 73 | case 'application/octet-stream': 74 | if (typeof opts.body !== 'string') { 75 | throw new Error(`Expected string body for octet-stream request: ${opts.body}`) 76 | } 77 | body = opts.body 78 | break 79 | default: 80 | throw new Error(`Unsupported content type: ${contentType}`) 81 | } 82 | encoded = (new TextEncoder()).encode(body) 83 | options.method = options.method || 'POST' 84 | options.headers!['Content-Type'] = contentType 85 | options.headers!['Content-Length'] = encoded.length 86 | } 87 | 88 | return new Promise((resolve, reject) => { 89 | let code: number 90 | let timeout: NodeJS.Timeout | undefined 91 | 92 | if ((typeof opts.timeout === 'number' && 0 < opts.timeout) || opts.timeout === undefined) { 93 | const timeoutMs = opts.timeout || TIMEOUT_MS 94 | timeout = setTimeout(() => { 95 | timeout = undefined 96 | if (!code) { 97 | reject(new HttpRequestError(`Request timed out after ${timeoutMs} ms (qurl: ${qurl})`, HttpRequestError.ETIMEOUT)) 98 | } 99 | }, timeoutMs) 100 | } 101 | 102 | const req = (isSecure ? https : http).request(qurl, options, (res: any) => { 103 | const buffers: Buffer[] = [] 104 | 105 | res.on('data', (data: Buffer) => buffers.push(data)) 106 | 107 | res.on('end', (data: Buffer) => { 108 | buffers.push(data) 109 | if (timeout) { 110 | clearTimeout(timeout) 111 | timeout = undefined 112 | } 113 | if (!code) { 114 | const responseText: string = buffers.join('').trim() 115 | let responseBody: Obj | undefined 116 | if ((res.headers['content-type'] || '').startsWith('application/json')) { 117 | try { 118 | responseBody = JSON.parse(responseText) 119 | } catch (e: any) { 120 | reject( 121 | new HttpRequestError('Error parsing JSON response', HttpRequestError.EPARSERESPONSE, responseText) 122 | ) 123 | return 124 | } 125 | } 126 | if (res.statusCode < 200 || 299 < res.statusCode) { 127 | reject(new HttpRequestError(`API error: ${res.statusCode}`, code = res.statusCode, responseText)) 128 | } else { 129 | resolve(responseBody || responseText) 130 | } 131 | } 132 | }) 133 | }) 134 | 135 | req.on('error', (error: any) => { 136 | if (timeout) { 137 | clearTimeout(timeout) 138 | timeout = undefined 139 | } 140 | if (!code) { 141 | reject(new HttpRequestError(`Error sending request: ${error}`, HttpRequestError.ESENDREQUEST)) 142 | } 143 | }) 144 | 145 | if (encoded) { 146 | req.write(encoded) 147 | } 148 | req.end() 149 | }) 150 | } 151 | 152 | const retry = opts.retry || opts.retryInitialTimeout || opts.retryMaxAttempts 153 | let timeout = opts.retryInitialTimeout || 1000 154 | let attempts = opts.retryMaxAttempts || 4 155 | let lastError: any 156 | const start = Date.now() 157 | for (let attempt = 0; ++attempt <= attempts;) { 158 | try { 159 | const res = await sendRequest() // must await to catch 160 | return res 161 | } catch (e: any) { 162 | lastError = e 163 | if (retry && (e.code === 429 || e.code === HttpRequestError.ETIMEOUT)) { 164 | if (attempt < attempts) { 165 | await sleep(timeout) 166 | timeout *= 2 167 | } 168 | } else if (retry && e.code == HttpRequestError.ESENDREQUEST) { 169 | // retry immediately 170 | } else { 171 | throw e 172 | } 173 | } 174 | } 175 | 176 | const elapsed = Date.now() - start 177 | throw new HttpRequestError( 178 | `Request timed out after ${attempts} attempts and ${elapsed} ms (last error code was ${lastError.code})`, 179 | HttpRequestError.ETIMEOUT 180 | ) 181 | } 182 | -------------------------------------------------------------------------------- /src/app/crypto/utils.ts: -------------------------------------------------------------------------------- 1 | import {Converseon, ConverseonSentiment} from "./converseon"; 2 | import {getLatestCoinToUSDRate} from "./coins"; 3 | import * as config from "./config"; 4 | import {StreamedTweet} from "../../twitter"; 5 | 6 | export type TweetStored = { 7 | id: string; 8 | followers_count: number; 9 | } 10 | 11 | export type TwitterRank = { 12 | score: string 13 | scoreByFollowers: string 14 | sentiment: { 15 | positive: number 16 | neutral: number 17 | negative: number 18 | } 19 | sentimentByFollowers: { 20 | positive: number 21 | neutral: number 22 | negative: number 23 | totalFollowers: number 24 | } 25 | } 26 | 27 | export type Entry = { 28 | timeMs: number 29 | coin: string 30 | tweetIds: Array 31 | usdRate: number 32 | } 33 | 34 | export type Result = Entry & TwitterRank 35 | 36 | export type ResultMapped = Entry & TwitterRank & { tweetCount: number } 37 | 38 | export const COIN_REGEX_STR = '[a-z]+' 39 | export const COIN_REGEX = new RegExp(`^${COIN_REGEX_STR}$`) 40 | export const URL_REGEX = new RegExp(`^\/(${COIN_REGEX_STR})\/(\\d+)(\/(\\d+))?\/?$`) 41 | export const URL_LATEST_REGEX = new RegExp(`^\/(${COIN_REGEX_STR})\/latest(\/(\\d+))?\/?$`) 42 | 43 | export const FIVE_MIN_MS = 1000 * 60 * 5 44 | export const ONE_WEEK_MS = 1000 * 60 * 60 * 24 * 7 + FIVE_MIN_MS 45 | export const ONE_HOUR_MS = 1000 * 60 * 60 46 | 47 | export const scoreOptions = ['positive', 'neutral', 'negative'] 48 | 49 | const converseon = new Converseon(config.CONVERSEON_API_KEY) 50 | 51 | export function computeTwitterRank(tweets: Array, sentiments: Array): TwitterRank { 52 | const defaultValue = {sentiment: {positive: 0, neutral: 0, negative: 0}, sentimentByFollowers: {positive: 0, neutral: 0, negative: 0, totalFollowers: 0}} 53 | if (!tweets || tweets.length === 0) { 54 | return {score: 'neutral', scoreByFollowers: 'neutral', ...defaultValue} 55 | } 56 | const ranks = tweets.reduce(({sentiment, sentimentByFollowers}, {followers_count}, idx) => { 57 | 58 | const tweetSentiment = sentiments[idx] 59 | const value = tweetSentiment?.value || 'neutral' 60 | 61 | return { 62 | sentiment:{ 63 | ...sentiment, 64 | [value]: sentiment[value] + 1, 65 | }, 66 | sentimentByFollowers: { 67 | ...sentimentByFollowers, 68 | [value]: sentimentByFollowers[value] + followers_count, 69 | totalFollowers: sentimentByFollowers.totalFollowers + followers_count, 70 | } 71 | } 72 | }, defaultValue) 73 | 74 | // @ts-ignore 75 | const maxRank = (rankType: 'sentiment' | 'sentimentByFollowers') => (max: string, v: string) => ranks[rankType][max] > ranks[rankType][v] ? max : v 76 | const score = scoreOptions.reduce(maxRank('sentiment')) 77 | const scoreByFollowers = scoreOptions.reduce(maxRank('sentimentByFollowers')) 78 | 79 | return { 80 | ...ranks, 81 | score, 82 | scoreByFollowers 83 | } 84 | } 85 | 86 | export async function getDataToStore(streamedTweets: StreamedTweet[], coin = 'bitcoin') { 87 | const timeMs = new Date().getTime(); 88 | const [usdRate, sentiments] = await Promise.all([ 89 | getLatestCoinToUSDRate(coin), 90 | converseon.sentiment(streamedTweets.map(tweet => tweet.text)), 91 | ]) 92 | const tweets = streamedTweets.map(({id, full: {user: {followers_count}}}, idx) => ({id, followers_count})) 93 | const tweetIds = tweets.sort( 94 | (a, b) => b.followers_count - a.followers_count) 95 | .map(({id}) => id) 96 | const twitterRank = computeTwitterRank(tweets, sentiments) 97 | return { timeMs, coin, ...twitterRank, tweetIds, usdRate } 98 | } 99 | 100 | export function getDatapointFrequency(startTimestamp: number, endTimestamp: number) { 101 | const diff = endTimestamp - startTimestamp; 102 | if (diff <= ONE_HOUR_MS) { 103 | return 1 104 | } else if (diff <= ONE_HOUR_MS * 2) { 105 | return 2 106 | } else if (diff <= ONE_HOUR_MS * 4) { 107 | return 3 108 | } else if (diff <= ONE_HOUR_MS * 24) { 109 | return 10 110 | } else if (diff <= ONE_HOUR_MS * 24 * 2) { 111 | return 15 112 | } else { 113 | return 30 114 | } 115 | } 116 | 117 | const TWEET_ID_SAMPLE_SIZE = 5 118 | 119 | function getTweetResultMapped(result?: Result) { 120 | if (!result) { 121 | return result 122 | } 123 | const tweetIds = result.tweetIds || [] 124 | return { 125 | ...result, 126 | tweetCount: tweetIds.length, 127 | tweetIds: tweetIds.slice(0, TWEET_ID_SAMPLE_SIZE) 128 | } 129 | } 130 | 131 | export function getCombinedResultAveraged(resultA?: ResultMapped, resultB?: ResultMapped, frequency = 1) { 132 | if (!resultA && resultB) { 133 | return resultB 134 | } 135 | if (resultA && !resultB) { 136 | return resultA 137 | } 138 | if (resultA && resultB) { 139 | return { 140 | ...resultA, 141 | tweetCount: Math.round((resultA.tweetCount + resultB.tweetCount) / frequency), 142 | tweetIds: [...resultA.tweetIds, ...resultB.tweetIds], 143 | sentiment: { 144 | neutral: Math.round((resultA.sentiment.neutral + resultB.sentiment.neutral) / frequency), 145 | positive: Math.round((resultA.sentiment.positive + resultB.sentiment.positive) / frequency), 146 | negative: Math.round((resultA.sentiment.neutral + resultB.sentiment.negative) / frequency), 147 | }, 148 | sentimentByFollowers: { 149 | neutral: Math.round((resultA.sentimentByFollowers.neutral + resultB.sentimentByFollowers.neutral) / frequency), 150 | positive: Math.round((resultA.sentimentByFollowers.positive + resultB.sentimentByFollowers.positive) / frequency), 151 | negative: Math.round((resultA.sentimentByFollowers.negative + resultB.sentimentByFollowers.negative) / frequency), 152 | totalFollowers: Math.round((resultA.sentimentByFollowers.totalFollowers + resultB.sentimentByFollowers.totalFollowers) / frequency), 153 | }, 154 | } 155 | } 156 | } 157 | 158 | export function getCombinedResults(results: Result[], dataFrequency: number) { 159 | let resultsCondensed: Result[] = [] 160 | let tempCombinedResult: ResultMapped|undefined = undefined 161 | let counter = 0 162 | for (let i = results.length - 1; i >= 0; i--) { 163 | const currentResult = getTweetResultMapped(results[i]) 164 | counter++ 165 | if (counter === dataFrequency) { 166 | tempCombinedResult = getCombinedResultAveraged(tempCombinedResult, currentResult, dataFrequency) 167 | resultsCondensed = tempCombinedResult ? [tempCombinedResult,...resultsCondensed] : resultsCondensed 168 | tempCombinedResult = undefined 169 | counter = 0 170 | } else { 171 | tempCombinedResult = getCombinedResultAveraged(tempCombinedResult, currentResult) 172 | } 173 | } 174 | 175 | return resultsCondensed 176 | } -------------------------------------------------------------------------------- /src/http/stream/HttpResilientStream.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { assert, Obj } from '../../util' 5 | import { testAfter } from '../../util/test' 6 | import { HttpEndpoint } from '../HttpEndpoint' 7 | import { EventContext, EventDispatcher, EventListener } from '../../util/event' 8 | import { HttpStream, HttpStreamError, HttpStreamEventType } from './HttpStream' 9 | 10 | export interface HttpResilientStreamOptions { 11 | 12 | // Time interval for trackign connection status 13 | intervalMs?: number 14 | 15 | // Timeout for original HTTP request connection attempt 16 | connectTimeoutMs?: number 17 | 18 | // Timeout between stream messages 19 | messageTimeoutMs?: number 20 | 21 | // Minimal boundry for exponential reconnect backoff 22 | connectionMinWaitMs?: number 23 | 24 | // Maximal boundry for exponential reconnect backoff 25 | connectionMaxWaitMs?: number 26 | } 27 | 28 | // 29 | // This class uses HttpStream to stream messages over an HTTP/S connection, adding 30 | // support for reconnecting (with exponential backoff) upon error or timeout. 31 | // 32 | export class HttpResilientStream { 33 | private readonly intervalMs: number 34 | private readonly connectTimeoutMs: number 35 | private readonly messageTimeoutMs: number 36 | private readonly connectionMinWaitMs: number 37 | private readonly connectionMaxWaitMs: number 38 | 39 | private stream?: HttpStream 40 | private interval?: NodeJS.Timeout 41 | private connectionId = 0 42 | 43 | private connectTimestamp?: number 44 | private messageTimestamp?: number 45 | 46 | private waitMs?: number 47 | 48 | private eventDispatcher = new EventDispatcher() 49 | private state: 'connected' | 'connecting' | 'init' | 'waiting' = 'init' 50 | 51 | constructor(private endpoint: HttpEndpoint, private headers: Obj, options: HttpResilientStreamOptions = {}) { 52 | this.intervalMs = options.intervalMs || 1000 53 | this.connectTimeoutMs = options.connectTimeoutMs || 8000 54 | this.messageTimeoutMs = options.messageTimeoutMs || 5000 55 | this.connectionMinWaitMs = options.connectionMinWaitMs || 1000 56 | this.connectionMaxWaitMs = options.connectionMaxWaitMs || 8000 57 | } 58 | 59 | private createStream(): void { 60 | const connectionId = ++this.connectionId 61 | this.stream = new HttpStream(this.endpoint, this.headers) 62 | this.stream.addEventListener('connected', () => this.onConnect(connectionId)) 63 | this.stream.addEventListener( 64 | 'message', 65 | (_: string, context: EventContext) => this.onMessage(context, connectionId), 66 | ) 67 | this.stream.addErrorListener(HttpStreamError.EANY, (event: HttpStreamError) => this.onStreamError(event)) 68 | this.stream.copyEventListeners(this.eventDispatcher) 69 | this.interval = setInterval(() => this.onInterval(), this.intervalMs) 70 | this.connectTimestamp = Date.now() 71 | } 72 | 73 | private destroyStream(): void { 74 | clearInterval(this.interval!) 75 | this.stream!.close() 76 | this.interval = undefined 77 | this.stream = undefined 78 | this.connectTimestamp = undefined 79 | this.messageTimestamp = undefined 80 | } 81 | 82 | private onConnect(connectionId: number): void { 83 | if (connectionId !== this.connectionId) { 84 | console.log('HttpResilientStream: Ignoring old connection id', connectionId, 'already at', this.connectionId) 85 | return 86 | } 87 | console.log(`HttpResilientStream: Connected to ${this.endpoint.url} (id=${connectionId})`) 88 | assert(this.state === 'connecting', `Invalid state: ${this.state}`) 89 | this.connectTimestamp = undefined 90 | this.waitMs = undefined 91 | this.state = 'connected' 92 | } 93 | 94 | private onInterval(): void { 95 | // Using an interval and polling timestamps is a preformance optimization, 96 | // so we don't have to call setTimeout after each message received 97 | 98 | const now = Date.now() 99 | 100 | if (this.state === 'connecting') { 101 | assert(this.connectTimestamp !== undefined, 'No connect timestamp') 102 | const elapsed = now - this.connectTimestamp! 103 | if (this.connectTimeoutMs <= elapsed) { 104 | console.log(`HttpResilientStream: Connection attempt timed out after ${elapsed} ms`) 105 | this.waitMs = undefined 106 | this.destroyStream() 107 | this.createStream() 108 | } 109 | return 110 | } 111 | 112 | if (this.state === 'connected') { 113 | const elapsed = now - (this.messageTimestamp || now) 114 | if (this.messageTimeoutMs <= elapsed) { 115 | console.log(`HttpResilientStream: Stream timed out after ${elapsed} ms`) 116 | this.destroyStream() 117 | this.createStream() 118 | this.state = 'connecting' 119 | } 120 | return 121 | } 122 | } 123 | 124 | private onMessage(context: EventContext, connectionId: number): void { 125 | if (testAfter.MESSAGE_TIMEOUT(`cid${connectionId}`, 5, () => { 126 | console.debug('Simulating message drop') 127 | context.stopPropagation() 128 | return true 129 | })) return 130 | this.messageTimestamp = Date.now() 131 | } 132 | 133 | private onStreamError(error: HttpStreamError) { 134 | if (this.state === 'connecting') { 135 | console.log('HttpResilientStream: Error connecting to stream:', error.toStr()) 136 | this.destroyStream() 137 | this.waitMs = Math.min(this.waitMs ? this.waitMs * 2 : this.connectionMinWaitMs, this.connectionMaxWaitMs) 138 | console.log(`HttpResilientStream: Waiting ${this.waitMs} ms before reconnecting`) 139 | this.state = 'waiting' 140 | setTimeout( 141 | () => { 142 | console.log('HttpResilientStream: Done waiting, attempting to reconnect') 143 | this.createStream() 144 | this.state = 'connecting' 145 | }, 146 | this.waitMs, 147 | ) 148 | return 149 | } 150 | 151 | if (this.state === 'connected') { 152 | console.log('HttpResilientStream: Error detected while streaming:', error.toStr()) 153 | console.log('HttpResilientStream: Attempting to reconnect') 154 | this.destroyStream() 155 | this.createStream() 156 | this.state = 'connecting' 157 | return 158 | } 159 | } 160 | 161 | public addEventListener(type: HttpStreamEventType, listener: EventListener): void { 162 | this.eventDispatcher.addListener(type, listener) 163 | if (this.stream) { 164 | this.stream.addEventListener(type, listener) 165 | } 166 | } 167 | 168 | public copyEventListeners(dispatcher: EventDispatcher): void { 169 | this.eventDispatcher.copyListeners(dispatcher) 170 | } 171 | 172 | public connect(): void { 173 | assert(this.state === 'init', 'Stream already connected') 174 | assert(this.stream === undefined, 'Has stream') 175 | assert(this.interval === undefined, 'Has interval') 176 | this.createStream() 177 | this.state = 'connecting' 178 | } 179 | 180 | public disconnect() { 181 | assert(this.state !== 'init', 'Stream not connected') 182 | if (this.state === 'connected' || this.state === 'connecting') { 183 | assert(this.stream !== undefined, 'No stream') 184 | assert(this.interval !== undefined, 'No interval') 185 | this.destroyStream() 186 | } 187 | if (this.state === 'waiting') { 188 | this.waitMs = undefined 189 | } 190 | this.state = 'init' 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/twitter/TwitterStreamer.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Twitter, Inc. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | import { KVStore } from '../database' 5 | import { TwitterAccount } from './TwitterAccount' 6 | import { TwitterStream, TwitterStreamOptions } from './TwitterStream' 7 | import { CountersLevel, assert, assertInteger, counters } from '../util' 8 | import { SimpleEventDispatcher, SimpleEventListener } from '../util/event' 9 | import { TwitterStreamInterface, StreamedTweet } from './TwitterStreamInterface' 10 | 11 | export interface TwitterStreamerOptions { 12 | 13 | // Start the stream with a few (1-5) minutes backfill 14 | backfillMarginMinutes?: number 15 | 16 | // Time between heartbeats 17 | heartbeatIntervalMs?: number 18 | 19 | // Time interval for monitoring counters 20 | heartbeatMonitoringIntervalMs?: number 21 | 22 | // Counter monitoring level: debug, info, warn or error 23 | heartbeatMonitoringLevel?: CountersLevel 24 | 25 | // If no messages are generated during this number of heartbeat, crash the process 26 | heartbeatSilentIntervalsLimit?: number 27 | 28 | // Key-value datastore for tracking heartbeats 29 | heartbeatStore?: KVStore 30 | 31 | // Twitter account for connecting to PowerTrack (if specified, twitterStream must be undefined) 32 | twitterAccount?: TwitterAccount 33 | 34 | // Existing Twitter stream (if specified, twitterAccount must be undefined) 35 | twitterStream?: TwitterStreamInterface 36 | 37 | // Options for creating a new stream (if twitterAccount is provided) 38 | twitterStreamOptions?: TwitterStreamOptions 39 | } 40 | 41 | // 42 | // Stream Tweets from the PowerTrack API on a specific account or through and existing 43 | // TwitterStreamInterface (e.g. a FakeTwitterStream). 44 | // 45 | // This class supports backfill on Twitter streams. Backfill, specified in (1-5) minutes, 46 | // starts the stream a few minutes in the past. This is useful in cases of failure, where 47 | // the streamer needs to be restarted and needs to catch up on Tweets lost during its 48 | // downtime. 49 | // 50 | // To facilitate backfill, this class provides a heartbeat mechanism. Heartbeat tracks 51 | // incoming messages every time interval. It serves two functions: 52 | // 1. Crash the process if the stream is silent for specified number of heartbeats 53 | // 2. Optionally record the last time a message was received. The recorded time can 54 | // be used to determine required backfill, in case the streamer process crashes 55 | // or is terminated, and a replacement process is span up 56 | // 57 | export class TwitterStreamer { 58 | private readonly backfillMarginMinutes: number 59 | private readonly heartbeatIntervalMs?: number 60 | private readonly heartbeatMonitoringIntervalMs?: number 61 | private readonly heartbeatMonitoringLevel: CountersLevel 62 | private readonly heartbeatSilentIntervalsLimit: number 63 | private readonly heartbeatStore?: KVStore 64 | private readonly twitterAccount?: TwitterAccount 65 | private readonly twitterStream?: TwitterStreamInterface 66 | private readonly twitterStreamOptions?: TwitterStreamOptions 67 | 68 | private stream?: TwitterStreamInterface 69 | private readonly dispatcher = new SimpleEventDispatcher() 70 | 71 | private streamListenerAdded = false 72 | 73 | private heartbeat?: NodeJS.Timeout 74 | private heartbeatSlideCounter = 0 75 | private lastTweetCount = 0 76 | 77 | constructor(options: TwitterStreamerOptions) { 78 | this.backfillMarginMinutes = options.backfillMarginMinutes || 0 79 | assertInteger(this.backfillMarginMinutes, 0, 5, `Invalid backgill margin: ${this.backfillMarginMinutes} minutes`) 80 | 81 | assert( 82 | options.twitterAccount !== undefined || options.twitterStream !== undefined, 83 | 'Twitter streamer requires either account or stream', 84 | ) 85 | assert( 86 | options.twitterAccount === undefined || options.twitterStream === undefined, 87 | 'Twitter streamer cannot accept both account and stream', 88 | ) 89 | assert( 90 | options.twitterStream === undefined || options.twitterStreamOptions === undefined, 91 | 'Twitter stremer cannot accept stream options with stream', 92 | ) 93 | this.twitterAccount = options.twitterAccount 94 | this.twitterStream = options.twitterStream 95 | 96 | if (options.heartbeatIntervalMs !== undefined) { 97 | this.heartbeatIntervalMs = assertInteger( 98 | options.heartbeatIntervalMs, 99 | 100, 100 | undefined, 101 | `Invalid heartbeat interval: ${options.heartbeatIntervalMs}`, 102 | ) 103 | } 104 | this.heartbeatMonitoringIntervalMs = options.heartbeatMonitoringIntervalMs || options.heartbeatIntervalMs 105 | this.heartbeatMonitoringLevel = options.heartbeatMonitoringLevel || 'info' 106 | this.heartbeatSilentIntervalsLimit = assertInteger( 107 | options.heartbeatSilentIntervalsLimit || 3, 108 | 1, 109 | undefined, 110 | `Invalid heartbeat silent intervals limit: ${options.heartbeatSilentIntervalsLimit}` 111 | ) 112 | this.heartbeatStore = options.heartbeatStore 113 | } 114 | 115 | private onHeartbeat(): void { 116 | const currentTweetCount = counters.debug.streamer.totalTweets.value 117 | if ( 118 | this.lastTweetCount === currentTweetCount && 119 | ++this.heartbeatSlideCounter === this.heartbeatSilentIntervalsLimit 120 | ) { 121 | console.error('\nConnection too slient. Terminating') 122 | process.exit(0) 123 | } 124 | this.heartbeatSlideCounter = 0 125 | this.lastTweetCount = currentTweetCount 126 | this.heartbeatStore?.set('heartbeat', { date: (new Date()).toISOString() }) 127 | } 128 | 129 | private onStreamedTweet(streamedTweet: StreamedTweet): void { 130 | if (!this.heartbeat && this.heartbeatIntervalMs !== undefined) { 131 | counters.monitor(this.heartbeatMonitoringIntervalMs, this.heartbeatMonitoringLevel) 132 | this.heartbeat = setInterval(() => this.onHeartbeat(), this.heartbeatIntervalMs) 133 | } 134 | counters.debug.streamer.totalTweets.inc() 135 | this.dispatcher.fire(streamedTweet) 136 | } 137 | 138 | public addListener(listener: SimpleEventListener): void { 139 | this.dispatcher.addListener(listener) 140 | } 141 | 142 | public async connect(shouldBackfill = false): Promise { 143 | assert(this.stream === undefined, 'Already connected') 144 | if (this.twitterStream) { 145 | this.stream = this.twitterStream 146 | if (!this.streamListenerAdded) { 147 | this.stream.addListener((streamedTweet: StreamedTweet) => this.onStreamedTweet(streamedTweet)) 148 | this.streamListenerAdded = true 149 | } 150 | } else { 151 | let backfillMinutes = 0 152 | if (shouldBackfill) { 153 | const heartbeat = await this.heartbeatStore?.get('heartbeat') 154 | if (heartbeat) { 155 | const elapsed = Date.now() - (new Date(heartbeat.date)).getTime() 156 | backfillMinutes = this.backfillMarginMinutes + Math.ceil(elapsed / 60000) 157 | } 158 | if (5 < backfillMinutes) { 159 | console.warn(`Data was lost. ${backfillMinutes} minutes time gap is over 5 minute backfill limit`) 160 | backfillMinutes = 5 161 | } 162 | console.log(`Backfilling ${backfillMinutes} minutes`) 163 | } 164 | const opts = { ...(this.twitterStreamOptions || {}), ...(0 < backfillMinutes ? { backfillMinutes } : {}) } 165 | this.stream = new TwitterStream(this.twitterAccount!, opts) 166 | this.stream.addListener((streamedTweet: StreamedTweet) => this.onStreamedTweet(streamedTweet)) 167 | } 168 | this.stream.connect() 169 | } 170 | 171 | public disconnect(): void { 172 | assert(this.stream !== undefined, 'Not connected') 173 | if (this.heartbeat) { 174 | clearInterval(this.heartbeat) 175 | this.heartbeat = undefined 176 | } 177 | this.stream!.disconnect() 178 | this.stream = undefined 179 | } 180 | } 181 | --------------------------------------------------------------------------------