├── .github ├── workflows │ ├── .java-version │ └── build.yaml └── renovate.json5 ├── settings.gradle ├── .gitignore ├── example-toot.png ├── example-tweet.png ├── gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── libs.versions.toml ├── .editorconfig ├── src ├── main │ └── kotlin │ │ └── com │ │ └── jakewharton │ │ └── dumbo │ │ ├── twitterImageApi.kt │ │ ├── util.kt │ │ ├── identityMapping.kt │ │ ├── dumboDb.kt │ │ ├── mastodon.kt │ │ ├── mediaDb.kt │ │ ├── mastodonAuth.kt │ │ ├── cli.kt │ │ ├── mastodonApi.kt │ │ ├── app.kt │ │ └── twitterArchive.kt └── test │ └── kotlin │ └── com │ └── jakewharton │ └── dumbo │ ├── InMemoryDumboDb.kt │ ├── MastodonApiTest.kt │ └── MastodonTest.kt ├── gradlew.bat ├── README.md ├── gradlew └── LICENSE.txt /.github/workflows/.java-version: -------------------------------------------------------------------------------- 1 | 25 2 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'dumbo' 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Gradle 2 | build 3 | .gradle 4 | /reports 5 | 6 | # IntelliJ 7 | .idea 8 | -------------------------------------------------------------------------------- /example-toot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JakeWharton/dumbo/HEAD/example-toot.png -------------------------------------------------------------------------------- /example-tweet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JakeWharton/dumbo/HEAD/example-tweet.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JakeWharton/dumbo/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yaml] 12 | indent_style = space 13 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/twitterImageApi.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import okhttp3.ResponseBody 4 | import retrofit2.http.GET 5 | import retrofit2.http.Path 6 | import retrofit2.http.Streaming 7 | 8 | interface TwimgApi { 9 | @Streaming 10 | @GET("/media/{filename}:{quality}") 11 | suspend fun downloadImage( 12 | @Path("filename") filename: String, 13 | @Path("quality") quality: String, 14 | ): ResponseBody 15 | } 16 | -------------------------------------------------------------------------------- /src/test/kotlin/com/jakewharton/dumbo/InMemoryDumboDb.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | class InMemoryDumboDb(vararg pairs: Pair) : DumboDb { 4 | private val map = mutableMapOf(*pairs) 5 | 6 | override fun contains(tweetId: String) = tweetId in map 7 | 8 | override fun get(tweetId: String): String? { 9 | return map[tweetId] 10 | } 11 | 12 | override fun minusAssign(tweetId: String) { 13 | map -= tweetId 14 | } 15 | 16 | override fun set(tweetId: String, statusId: String?) { 17 | map[tweetId] = statusId 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | pull_request: {} 5 | workflow_dispatch: {} 6 | push: 7 | branches: 8 | - 'trunk' 9 | tags-ignore: 10 | - '**' 11 | 12 | env: 13 | GRADLE_OPTS: "-Dorg.gradle.jvmargs=-Xmx4g -Dorg.gradle.daemon=false -Dkotlin.incremental=false" 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v6 20 | - uses: gradle/actions/wrapper-validation@v5 21 | - uses: actions/setup-java@v5 22 | with: 23 | distribution: 'zulu' 24 | java-version-file: .github/workflows/.java-version 25 | - run: ./gradlew build 26 | -------------------------------------------------------------------------------- /.github/renovate.json5: -------------------------------------------------------------------------------- 1 | { 2 | $schema: 'https://docs.renovatebot.com/renovate-schema.json', 3 | extends: [ 4 | 'config:recommended', 5 | ], 6 | ignorePresets: [ 7 | // Ensure we get the latest version and are not pinned to old versions. 8 | 'workarounds:javaLTSVersions', 9 | ], 10 | customManagers: [ 11 | // Update .java-version file with the latest JDK version. 12 | { 13 | customType: 'regex', 14 | fileMatch: [ 15 | '\\.java-version$', 16 | ], 17 | matchStrings: [ 18 | '(?.*)\\n', 19 | ], 20 | datasourceTemplate: 'java-version', 21 | depNameTemplate: 'java', 22 | // Only write the major version. 23 | extractVersionTemplate: '^(?\\d+)', 24 | }, 25 | ], 26 | } 27 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/util.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import kotlin.reflect.KProperty1 4 | import kotlin.reflect.full.declaredMemberProperties 5 | 6 | fun Any?.toQuickPrettyString(): String { 7 | fun Any?.recurse() = toQuickPrettyString().prependIndent(" ").substring(2) 8 | return when { 9 | this is Collection<*> -> joinToString( 10 | separator = ",", 11 | prefix = "[", 12 | postfix = "\n]", 13 | transform = { "\n ${it.recurse()}"}, 14 | ) 15 | this is CharSequence -> "\"$this\"" 16 | this == null || !this::class.isData -> toString() 17 | else -> this::class.declaredMemberProperties.joinToString( 18 | separator = ",", 19 | prefix = "${this::class.simpleName}(", 20 | postfix = "\n)", 21 | transform = { 22 | "\n ${it.name} = ${(it as KProperty1).get(this).recurse()}" 23 | } 24 | ) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/identityMapping.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import java.nio.file.Path 4 | import kotlin.io.path.readText 5 | import kotlinx.serialization.SerialName 6 | import kotlinx.serialization.Serializable 7 | import net.peanuuutz.tomlkt.Toml 8 | 9 | interface IdentityMapping { 10 | fun map(userId: String, userHandle: String): String 11 | 12 | companion object { 13 | val Empty: IdentityMapping = TomlIdentityMapping() 14 | 15 | fun of( 16 | byId: Map = emptyMap(), 17 | byName: Map = emptyMap(), 18 | ): IdentityMapping { 19 | return TomlIdentityMapping(byId, byName) 20 | } 21 | 22 | fun loadToml(toml: Path): IdentityMapping { 23 | val parsed = Toml.decodeFromString(TomlIdentityMapping.serializer(), toml.readText()) 24 | return TomlIdentityMapping( 25 | byId = parsed.byId, 26 | byName = parsed.byName, 27 | ) 28 | } 29 | } 30 | } 31 | 32 | @Serializable 33 | private class TomlIdentityMapping( 34 | @SerialName("by-id") 35 | val byId: Map = emptyMap(), 36 | @SerialName("by-name") 37 | val byName: Map = emptyMap(), 38 | ): IdentityMapping { 39 | 40 | override fun map(userId: String, userHandle: String): String { 41 | return byId[userId] ?: byName[userHandle] ?: "@${userHandle}@twitter.com" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /gradle/libs.versions.toml: -------------------------------------------------------------------------------- 1 | [versions] 2 | kotlin = "2.3.0" 3 | retrofit = "3.0.0" 4 | okhttp = "5.3.2" 5 | kotlinx-serialization = "1.9.0" 6 | 7 | [libraries] 8 | kotlin-gradlePlugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" } 9 | kotlin-serializationPlugin = { module = "org.jetbrains.kotlin:kotlin-serialization", version.ref = "kotlin" } 10 | 11 | kotlinx-coroutines-core = "org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2" 12 | 13 | kotlinx-serialization-core = { module = "org.jetbrains.kotlinx:kotlinx-serialization-core", version.ref = "kotlinx-serialization" } 14 | kotlinx-serialization-json = { module = "org.jetbrains.kotlinx:kotlinx-serialization-json", version.ref = "kotlinx-serialization" } 15 | kotlinx-serialization-json-okio = { module = "org.jetbrains.kotlinx:kotlinx-serialization-json-okio", version.ref = "kotlinx-serialization" } 16 | 17 | retrofit-core = { module = "com.squareup.retrofit2:retrofit", version.ref = "retrofit" } 18 | retrofit-converter-kotlinxSerialization = { module = "com.squareup.retrofit2:converter-kotlinx-serialization", version.ref = "retrofit" } 19 | 20 | okhttp-core = { module = "com.squareup.okhttp3:okhttp", version.ref = "okhttp" } 21 | okhttp-loggingInterceptor = { module = "com.squareup.okhttp3:logging-interceptor", version.ref = "okhttp" } 22 | 23 | tika-core = "org.apache.tika:tika-core:3.2.3" 24 | 25 | clikt = "com.github.ajalt.clikt:clikt:5.0.3" 26 | okio = "com.squareup.okio:okio:3.16.4" 27 | jsoup = "org.jsoup:jsoup:1.21.2" 28 | tomlkt = "net.peanuuutz:tomlkt:0.2.0" 29 | 30 | junit = "junit:junit:4.13.2" 31 | assertk = "com.willowtreeapps.assertk:assertk:0.28.1" 32 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/dumboDb.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import java.nio.file.Path 4 | import kotlin.io.path.appendText 5 | import kotlin.io.path.exists 6 | import kotlin.io.path.notExists 7 | import kotlin.io.path.readLines 8 | import kotlin.io.path.writeLines 9 | import kotlin.io.path.writeText 10 | 11 | interface DumboDb { 12 | operator fun contains(tweetId: String): Boolean 13 | operator fun get(tweetId: String): String? 14 | operator fun minusAssign(tweetId: String) 15 | operator fun set(tweetId: String, statusId: String?) 16 | } 17 | 18 | class NioPathDumboDb( 19 | directory: Path, 20 | ) : DumboDb { 21 | private val opLogPath = directory.resolve("dumbo_log.txt") 22 | 23 | override operator fun contains(tweetId: String): Boolean { 24 | if (opLogPath.notExists()) return false 25 | val startString = "$tweetId " 26 | return opLogPath.readLines() 27 | .any { it == tweetId || it.startsWith(startString) } 28 | } 29 | 30 | override operator fun get(tweetId: String): String? { 31 | val startString = "$tweetId " 32 | val line = opLogPath.readLines() 33 | .first { it == tweetId || it.startsWith(startString) } 34 | return line.split(' ', limit = 2).getOrNull(1) 35 | } 36 | 37 | override operator fun minusAssign(tweetId: String) { 38 | opLogPath.writeLines( 39 | opLogPath.readLines() 40 | .filter { !it.startsWith("$tweetId ") }, 41 | ) 42 | } 43 | 44 | override operator fun set(tweetId: String, statusId: String?) { 45 | val output = buildString { 46 | append(tweetId) 47 | if (statusId != null) { 48 | append(' ') 49 | append(statusId) 50 | } 51 | append('\n') 52 | } 53 | // TODO https://youtrack.jetbrains.com/issue/KT-55659 54 | if (opLogPath.exists()) { 55 | opLogPath.appendText(output) 56 | } else { 57 | opLogPath.writeText(output) 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/mastodon.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import com.jakewharton.dumbo.Tweet.MediaEntity 4 | import com.jakewharton.dumbo.Tweet.MentionEntity 5 | import com.jakewharton.dumbo.Tweet.UrlEntity 6 | import java.time.Instant 7 | 8 | data class Toot( 9 | val text: String, 10 | val posted: Instant, 11 | val language: String, 12 | val inReplyToId: String? = null, 13 | val media: List = emptyList(), 14 | ) { 15 | data class Media( 16 | val id: String, 17 | val filename: String, 18 | ) 19 | 20 | companion object { 21 | fun fromTweet( 22 | tweet: Tweet, 23 | dumboDb: DumboDb, 24 | identityMapping: IdentityMapping, 25 | ): Toot { 26 | val text = buildString { 27 | var index = 0 28 | for (entity in tweet.entities.sortedBy { it.indices.first }) { 29 | if (entity.indices.first > index) { 30 | append(tweet.text.substring(index, entity.indices.first)) 31 | } 32 | when (entity) { 33 | is UrlEntity -> { 34 | append(entity.url) 35 | } 36 | is MentionEntity -> { 37 | append(identityMapping.map(entity.id, entity.username)) 38 | } 39 | is MediaEntity -> { 40 | // If the text is already non-empty then it will have contained a space 41 | // between the existing text and the URL to the media. Remove that space. 42 | if (isNotEmpty()) { 43 | check(this[lastIndex] == ' ') 44 | deleteCharAt(lastIndex) 45 | } 46 | // Nothing to append as text! 47 | } 48 | } 49 | index = entity.indices.last 50 | } 51 | if (index < tweet.text.length) { 52 | append(tweet.text.substring(index)) 53 | } 54 | } 55 | val media = buildList { 56 | for (entity in tweet.entities.filterIsInstance()) { 57 | this += Media( 58 | id = entity.id, 59 | filename = entity.filename, 60 | ) 61 | } 62 | } 63 | val inReplyToId = if (tweet.inReplyToId == null) { 64 | null 65 | } else { 66 | checkNotNull(dumboDb[tweet.inReplyToId]) { 67 | "Unable to map tweet ${tweet.id} replying to ${tweet.inReplyToId} without tootMap entry" 68 | } 69 | } 70 | return Toot( 71 | text = text, 72 | posted = tweet.createdAt, 73 | language = tweet.language, 74 | inReplyToId = inReplyToId, 75 | media = media, 76 | ) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/kotlin/com/jakewharton/dumbo/MastodonApiTest.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import assertk.assertThat 4 | import assertk.assertions.isEqualTo 5 | import org.junit.Test 6 | 7 | class MastodonApiTest { 8 | @Test fun namedEntityUnescaping() { 9 | val input = StatusEntity( 10 | id = "1", 11 | rawContent = """Hacked together an ActionBar helper which uses native on 3.0+ and GreenDroid on pre-3.0 through single API. Will polish & release this week.""" 12 | ) 13 | val expected = "Hacked together an ActionBar helper which uses native on 3.0+ and GreenDroid on pre-3.0 through single API. Will polish & release this week." 14 | assertThat(input.content).isEqualTo(expected) 15 | } 16 | 17 | @Test fun multipleParagraphsAndLinks() { 18 | val input = StatusEntity( 19 | id = "1", 20 | rawContent = """

ThreeTenABP 1.4.3 released which bumps the ThreeTenBP dependency to 1.6.4 and includes the 2022f tzdb.

ThreeTenABP changes: github.com/JakeWharton/ThreeTe

ThreeTenBP changes: threeten.org/threetenbp/change

2022f tzdb changes: mm.icann.org/pipermail/tz-anno

""" 21 | ) 22 | val expected = """ 23 | |ThreeTenABP 1.4.3 released which bumps the ThreeTenBP dependency to 1.6.4 and includes the 2022f tzdb. 24 | | 25 | |ThreeTenABP changes: https://github.com/JakeWharton/ThreeTenABP/blob/trunk/CHANGELOG.md#version-143-2022-11-03 26 | | 27 | |ThreeTenBP changes: https://www.threeten.org/threetenbp/changes-report.html#a1.6.4 28 | | 29 | |2022f tzdb changes: https://mm.icann.org/pipermail/tz-announce/2022-October/000075.html 30 | """.trimMargin() 31 | assertThat(input.content).isEqualTo(expected) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/mediaDb.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import java.nio.file.Files 4 | import java.nio.file.Path 5 | import kotlin.io.path.createDirectories 6 | import kotlin.io.path.div 7 | import kotlin.io.path.exists 8 | import kotlin.io.path.notExists 9 | import kotlin.time.Duration.Companion.seconds 10 | import kotlinx.coroutines.delay 11 | import okhttp3.MediaType 12 | import okhttp3.MediaType.Companion.toMediaType 13 | import okhttp3.MultipartBody 14 | import okhttp3.RequestBody 15 | import okio.BufferedSink 16 | import okio.sink 17 | import okio.source 18 | import org.apache.tika.Tika 19 | import retrofit2.HttpException 20 | 21 | class MediaDb( 22 | directory: Path, 23 | private val mastodonApi: MastodonApi, 24 | private val authentication: String, 25 | private val twimgApi: TwimgApi, 26 | ) { 27 | private val tika = Tika() 28 | private val originalDir = directory / "dumbo-media" 29 | private val archiveDir = directory / "data/tweets_media" 30 | 31 | suspend fun uploadMedia(id: String, path: String): String { 32 | val filename = "$id-$path" 33 | val original = originalDir / filename 34 | val archived = archiveDir / filename 35 | 36 | if (original.notExists()) { 37 | try { 38 | twimgApi.downloadImage(path, "orig").source().use { source -> 39 | originalDir.createDirectories() 40 | original.sink().use(source::readAll) 41 | } 42 | } catch (e: HttpException) { 43 | if (e.code() != 404) { 44 | throw e 45 | } 46 | } 47 | } 48 | 49 | val upload = original.takeIf { it.exists() } 50 | ?: archived.takeIf { it.exists() } 51 | ?: throw IllegalStateException("No media available for $id $path") 52 | 53 | val contentType = tika.detect(upload).toMediaType() 54 | 55 | val uploadResponse = mastodonApi.uploadMedia( 56 | authorization = authentication, 57 | file = MultipartBody.Part.createFormData("file", id, PathRequestBody(upload, contentType)), 58 | description = "", 59 | ) 60 | return when (uploadResponse.code()) { 61 | 200 -> { 62 | // Media was small enough to be processed synchronously. 63 | uploadResponse.body()!!.id 64 | } 65 | 202 -> { 66 | val attachmentId = uploadResponse.body()!!.id 67 | // Media was enqueued to be processed. Wait for it to be processed... 68 | while (true) { 69 | delay(10.seconds) 70 | val getResponse = mastodonApi.getMedia(authentication, attachmentId) 71 | when (getResponse.code()) { 72 | 200 -> break 73 | 206 -> continue 74 | else -> throw HttpException(getResponse) 75 | } 76 | } 77 | attachmentId 78 | } 79 | else -> throw HttpException(uploadResponse) 80 | } 81 | } 82 | } 83 | 84 | private class PathRequestBody( 85 | private val path: Path, 86 | private val contentType: MediaType, 87 | ) : RequestBody() { 88 | override fun contentType() = contentType 89 | override fun contentLength() = Files.size(path) 90 | 91 | override fun writeTo(sink: BufferedSink) { 92 | path.source().use(sink::writeAll) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | 74 | 75 | @rem Execute Gradle 76 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* 77 | 78 | :end 79 | @rem End local scope for the variables with windows NT shell 80 | if %ERRORLEVEL% equ 0 goto mainEnd 81 | 82 | :fail 83 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 84 | rem the _cmd.exe /c_ return code! 85 | set EXIT_CODE=%ERRORLEVEL% 86 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 87 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 88 | exit /b %EXIT_CODE% 89 | 90 | :mainEnd 91 | if "%OS%"=="Windows_NT" endlocal 92 | 93 | :omega 94 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/mastodonAuth.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import java.nio.file.Path 4 | import java.util.Scanner 5 | import kotlin.io.path.exists 6 | import kotlin.io.path.readText 7 | import kotlin.io.path.writeText 8 | import kotlinx.serialization.Serializable 9 | import kotlinx.serialization.encodeToString 10 | import kotlinx.serialization.json.Json 11 | import kotlinx.serialization.json.JsonObject 12 | import okhttp3.HttpUrl 13 | 14 | class MastodonAuthenticator( 15 | directory: Path, 16 | private val host: HttpUrl, 17 | private val api: MastodonApi, 18 | private val scanner: Scanner, 19 | ) { 20 | private val dumboAuthPath = directory.resolve("dumbo_auth.json") 21 | 22 | suspend fun obtain(): String { 23 | var auth = if (dumboAuthPath.exists()) { 24 | val jsonObject = json.decodeFromString(JsonObject.serializer(), dumboAuthPath.readText()) 25 | val serializer = if ("access_token" in jsonObject) { 26 | MastodonAuthStage2.serializer() 27 | } else { 28 | MastodonAuthStage1.serializer() 29 | } 30 | json.decodeFromJsonElement(serializer, jsonObject) 31 | } else { 32 | val createApplicationEntity = api.createApplication( 33 | clientName = "Dumbo Tweet Importer", 34 | redirectUris = "urn:ietf:wg:oauth:2.0:oob", 35 | scopes = "read write", 36 | website = "https://github.com/JakeWharton/dumbo", 37 | ) 38 | val auth = MastodonAuthStage1( 39 | client_id = createApplicationEntity.client_id, 40 | client_secret = createApplicationEntity.client_secret, 41 | ) 42 | dumboAuthPath.writeText(json.encodeToString(auth)) 43 | auth 44 | } 45 | 46 | if (auth is MastodonAuthStage1) { 47 | val authUrl = host.newBuilder("oauth/authorize")!! 48 | .addQueryParameter("client_id", auth.client_id) 49 | .addQueryParameter("scope", "read write") 50 | .addQueryParameter("redirect_uri", "urn:ietf:wg:oauth:2.0:oob") 51 | .addQueryParameter("response_type", "code") 52 | .build() 53 | println() 54 | println("Visit $authUrl in your browser") 55 | print("Paste resulting code and press enter: ") 56 | val code = scanner.next()!! 57 | println() 58 | 59 | val createTokenEntity = api.createOauthToken( 60 | clientId = auth.client_id, 61 | clientSecret = auth.client_secret, 62 | redirectUri = "urn:ietf:wg:oauth:2.0:oob", 63 | grantType = "authorization_code", 64 | code = code, 65 | scope = "read write" 66 | ) 67 | check(createTokenEntity.token_type == "Bearer") 68 | check("write" in createTokenEntity.scope.split(" ")) 69 | auth = MastodonAuthStage2( 70 | client_id = auth.client_id, 71 | client_secret = auth.client_secret, 72 | access_token = createTokenEntity.access_token, 73 | ) 74 | dumboAuthPath.writeText(json.encodeToString(auth)) 75 | } 76 | 77 | check(auth is MastodonAuthStage2) 78 | val header = "Bearer ${auth.access_token}" 79 | api.verifyCredentials(header) 80 | 81 | return header 82 | } 83 | 84 | private companion object { 85 | val json = Json { prettyPrint = true } 86 | } 87 | } 88 | 89 | private sealed interface MastodonAuthStage 90 | 91 | @Serializable 92 | private data class MastodonAuthStage1( 93 | val client_id: String, 94 | val client_secret: String, 95 | ) : MastodonAuthStage 96 | 97 | @Serializable 98 | private data class MastodonAuthStage2( 99 | val client_id: String, 100 | val client_secret: String, 101 | val access_token: String, 102 | ) : MastodonAuthStage 103 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/cli.kt: -------------------------------------------------------------------------------- 1 | @file:JvmName("Main") 2 | 3 | package com.jakewharton.dumbo 4 | 5 | import com.github.ajalt.clikt.core.CliktCommand 6 | import com.github.ajalt.clikt.core.main 7 | import com.github.ajalt.clikt.parameters.arguments.argument 8 | import com.github.ajalt.clikt.parameters.arguments.help 9 | import com.github.ajalt.clikt.parameters.options.convert 10 | import com.github.ajalt.clikt.parameters.options.flag 11 | import com.github.ajalt.clikt.parameters.options.help 12 | import com.github.ajalt.clikt.parameters.options.option 13 | import com.github.ajalt.clikt.parameters.options.required 14 | import com.github.ajalt.clikt.parameters.types.path 15 | import java.nio.file.FileSystem 16 | import java.nio.file.FileSystems 17 | import kotlinx.coroutines.runBlocking 18 | import kotlinx.serialization.json.Json 19 | import okhttp3.HttpUrl.Companion.toHttpUrl 20 | import okhttp3.MediaType.Companion.toMediaType 21 | import okhttp3.OkHttpClient 22 | import okhttp3.logging.HttpLoggingInterceptor 23 | import okhttp3.logging.HttpLoggingInterceptor.Level.BASIC 24 | import retrofit2.Retrofit 25 | import retrofit2.converter.kotlinx.serialization.asConverterFactory 26 | import retrofit2.create 27 | 28 | fun main(vararg args: String) { 29 | DumboCommand(FileSystems.getDefault()).main(args) 30 | } 31 | 32 | private class DumboCommand( 33 | fs: FileSystem, 34 | ) : CliktCommand(name = "dumbo") { 35 | private val debug by option(hidden = true).flag() 36 | private val host by option("--host", metavar = "URL") 37 | .help("Mastodon server host") 38 | .convert { it.toHttpUrl() } 39 | .required() 40 | private val edits by option() 41 | .help("Edit Mastodon posts if Tweet or mapping changed") 42 | .flag() 43 | private val identityMapping by option("--identities", metavar = "TOML") 44 | .help(""" 45 | |A TOML file mapping Twitter IDs or usernames to Mastodon handles 46 | | 47 | |Format: 48 | |``` 49 | |[by-id] 50 | |123="@foo@example.com" 51 | | 52 | |[by-name] 53 | |bar="@bar@example.com" 54 | |``` 55 | |""".trimMargin()) 56 | .path(fileSystem = fs, canBeDir = false) 57 | private val archiveDir by argument(name = "ARCHIVE") 58 | .help("Directory of extracted Twitter archive") 59 | .path(fileSystem = fs, mustExist = true, canBeFile = false) 60 | 61 | override fun run() { 62 | val okhttp = OkHttpClient.Builder() 63 | .apply { 64 | if (debug) { 65 | addInterceptor(HttpLoggingInterceptor(::println).setLevel(BASIC)) 66 | } 67 | } 68 | .build() 69 | 70 | val json = Json { 71 | ignoreUnknownKeys = true 72 | } 73 | val converterFactory = json.asConverterFactory("application/json".toMediaType()) 74 | 75 | val mastodonApi = Retrofit.Builder() 76 | .client(okhttp) 77 | .baseUrl(host) 78 | .addConverterFactory(converterFactory) 79 | .build() 80 | .create() 81 | 82 | val twimgApi = Retrofit.Builder() 83 | .client(okhttp) 84 | .baseUrl("https://pbs.twimg.com") 85 | .build() 86 | .create() 87 | 88 | // Parse outside of Clikt converter so exceptions propagate. 89 | val identityMapping = identityMapping?.let(IdentityMapping::loadToml) ?: IdentityMapping.Empty 90 | 91 | val dumboApp = DumboApp(mastodonApi, twimgApi) 92 | try { 93 | runBlocking { 94 | dumboApp.run(host, archiveDir, identityMapping, edits, debug) 95 | } 96 | } finally { 97 | okhttp.connectionPool.evictAll() 98 | okhttp.dispatcher.executorService.shutdown() 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/mastodonApi.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import kotlinx.serialization.SerialName 4 | import kotlinx.serialization.Serializable 5 | import kotlinx.serialization.Transient 6 | import kotlinx.serialization.json.JsonObject 7 | import okhttp3.MultipartBody 8 | import org.jsoup.Jsoup 9 | import retrofit2.Response 10 | import retrofit2.http.Field 11 | import retrofit2.http.FormUrlEncoded 12 | import retrofit2.http.GET 13 | import retrofit2.http.Header 14 | import retrofit2.http.Multipart 15 | import retrofit2.http.POST 16 | import retrofit2.http.PUT 17 | import retrofit2.http.Part 18 | import retrofit2.http.Path 19 | 20 | interface MastodonApi { 21 | @FormUrlEncoded 22 | @POST("api/v1/apps") 23 | suspend fun createApplication( 24 | @Field("client_name") clientName: String, 25 | @Field("redirect_uris") redirectUris: String, 26 | @Field("scopes") scopes: String, 27 | @Field("website") website: String, 28 | ): CreateApplicationEntity 29 | 30 | @FormUrlEncoded 31 | @POST("oauth/token") 32 | suspend fun createOauthToken( 33 | @Field("client_id") clientId: String, 34 | @Field("client_secret") clientSecret: String, 35 | @Field("redirect_uri") redirectUri: String, 36 | @Field("grant_type") grantType: String, 37 | @Field("code") code: String, 38 | @Field("scope") scope: String, 39 | ): CreateTokenEntity 40 | 41 | @GET("api/v1/accounts/verify_credentials") 42 | suspend fun verifyCredentials( 43 | @Header("Authorization") authorization: String, 44 | ): AccountEntity 45 | 46 | @FormUrlEncoded 47 | @POST("api/v1/statuses") 48 | suspend fun createStatus( 49 | @Header("Authorization") authorization: String, 50 | @Header("Idempotency-Key") idempotency: String, 51 | @Field("status") content: String, 52 | @Field("language") language: String?, 53 | @Field("created_at") createdAt: String, 54 | @Field("in_reply_to_id") inReplyToId: String?, 55 | @Field("media_ids[]") mediaIds: List, 56 | ): StatusEntity 57 | 58 | @GET("api/v1/statuses/{id}") 59 | suspend fun getStatus( 60 | @Path("id") id: String, 61 | ): StatusEntity 62 | 63 | @FormUrlEncoded 64 | @PUT("api/v1/statuses/{id}") 65 | suspend fun editStatus( 66 | @Header("Authorization") authorization: String, 67 | @Header("Idempotency-Key") idempotency: String, 68 | @Path("id") id: String, 69 | @Field("status") content: String, 70 | @Field("media_ids[]") mediaIds: List, 71 | ): StatusEntity 72 | 73 | @Multipart 74 | @POST("api/v2/media") 75 | suspend fun uploadMedia( 76 | @Header("Authorization") authorization: String, 77 | @Part file: MultipartBody.Part, 78 | @Part("description") description: String, 79 | ): Response 80 | 81 | @GET("api/v1/media/{id}") 82 | suspend fun getMedia( 83 | @Header("Authorization") authorization: String, 84 | @Path("id") id: String, 85 | ): Response 86 | } 87 | 88 | @Serializable 89 | data class CreateApplicationEntity( 90 | val client_id: String, 91 | val client_secret: String, 92 | ) 93 | 94 | @Serializable 95 | data class CreateTokenEntity( 96 | val access_token: String, 97 | val token_type: String, 98 | val scope: String, 99 | ) 100 | 101 | @Serializable 102 | data class AccountEntity( 103 | val id: String, 104 | ) 105 | 106 | @Serializable 107 | data class StatusEntity( 108 | val id: String, 109 | @SerialName("content") val rawContent: String, 110 | val media_attachments: List = emptyList(), 111 | ) { 112 | @Transient 113 | val content: String = run { 114 | val parsed = Jsoup.parseBodyFragment(rawContent).body() 115 | if (parsed.childrenSize() == 0) { 116 | parsed.text() 117 | } else { 118 | parsed.children().joinToString("\n\n") { it.text() } 119 | } 120 | } 121 | } 122 | 123 | @Serializable 124 | data class UploadMediaEntity( 125 | val id: String, 126 | ) 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dumbo Tweet Importer 2 | 3 | Import a Twitter archive into a Mastodon account. 4 | 5 | | Twitter | Mastodon | 6 | |------------------------|-----------------------| 7 | | ![](example-tweet.png) | ![](example-toot.png) | 8 | 9 | ## Usage 10 | 11 | Dumbo requires modifications be made to your Masotodon server. 12 | The Mastodon API does not allow inserting posts in the past. 13 | In order to write statuses which occur in the past, a `created_at` parameter is needed. 14 | 15 | ### Modifying Official Mastodon 16 | 17 | The following patches should be applied to the Mastodon web app. 18 | 19 |
20 | Mastodon 4.x 21 | Note: These diffs were produced against Mastodon v4.1.7. 22 | Slight changes may be needed for other 4.x versions. 23 | 24 | `app/controllers/api/v1/statuses_controller.rb`: 25 | ```diff 26 | @@ -61,4 +61,5 @@ 27 | visibility: status_params[:visibility], 28 | language: status_params[:language], 29 | + created_at: status_params[:created_at], 30 | scheduled_at: status_params[:scheduled_at], 31 | application: doorkeeper_token.application, 32 | @@ -130,4 +130,5 @@ 33 | :language, 34 | :scheduled_at, 35 | + :created_at, 36 | media_ids: [], 37 | media_attributes: [ 38 | ``` 39 | 40 | `app/services/post_status_service.rb`: 41 | ```diff 42 | @@ -58,4 +58,5 @@ 43 | @visibility = @options[:visibility] || @account.user&.setting_default_privacy 44 | @visibility = :unlisted if @visibility&.to_sym == :public && @account.silenced? 45 | + @created_at = @options[:created_at]&.to_datetime 46 | @scheduled_at = @options[:scheduled_at]&.to_datetime 47 | @scheduled_at = nil if scheduled_in_the_past? 48 | @@ -99,6 +99,8 @@ 49 | Trends.tags.register(@status) 50 | LinkCrawlWorker.perform_async(@status.id) 51 | - DistributionWorker.perform_async(@status.id) 52 | - ActivityPub::DistributionWorker.perform_async(@status.id) 53 | + if not @options[:created_at] 54 | + DistributionWorker.perform_async(@status.id) 55 | + ActivityPub::DistributionWorker.perform_async(@status.id) 56 | + end 57 | PollExpirationNotifyWorker.perform_at(@status.poll.expires_at, @status.poll.id) if @status.poll 58 | end 59 | @@ -174,4 +174,5 @@ 60 | visibility: @visibility, 61 | language: valid_locale_cascade(@options[:language], @account.user&.preferred_posting_language, I18n.default_locale), 62 | + created_at: @created_at, 63 | application: @options[:application], 64 | rate_limit: @options[:with_rate_limit], 65 | ``` 66 | 67 |
68 | 69 |
70 | Mastodon 3.x 71 | Note: These diffs were produced against Mastodon v3.5.3. 72 | Slight changes may be needed for other 3.x versions. 73 | 74 | `app/controllers/api/v1/statuses_controller.rb`: 75 | ```diff 76 | @@ -46,4 +46,5 @@ 77 | visibility: status_params[:visibility], 78 | language: status_params[:language], 79 | + created_at: status_params[:created_at], 80 | scheduled_at: status_params[:scheduled_at], 81 | application: doorkeeper_token.application, 82 | @@ -110,4 +111,5 @@ 83 | :visibility, 84 | :language, 85 | + :created_at, 86 | :scheduled_at, 87 | media_ids: [], 88 | ``` 89 | 90 | `app/services/post_status_service.rb`: 91 | ```diff 92 | @@ -95,6 +95,8 @@ 93 | Trends.tags.register(@status) 94 | LinkCrawlWorker.perform_async(@status.id) 95 | - DistributionWorker.perform_async(@status.id) 96 | - ActivityPub::DistributionWorker.perform_async(@status.id) 97 | + if not @options[:created_at] 98 | + DistributionWorker.perform_async(@status.id) 99 | + ActivityPub::DistributionWorker.perform_async(@status.id) 100 | + end 101 | PollExpirationNotifyWorker.perform_at(@status.poll.expires_at, @status.poll.id) if @status.poll 102 | end 103 | @@ -168,4 +170,5 @@ 104 | visibility: @visibility, 105 | language: valid_locale_cascade(@options[:language], @account.user&.preferred_posting_language, I18n.default_locale), 106 | + created_at: @options[:created_at], 107 | application: @options[:application], 108 | rate_limit: @options[:with_rate_limit], 109 | ``` 110 |
111 | 112 | Apply these patches and then restart the app or container. 113 | 114 | When you are completely done with your import you can undo these changes. 115 | Either revert manually, reinstall the app, or recreate the container. 116 | 117 | ### Running Dumbo 118 | 119 | TODO 120 | 121 | # License 122 | 123 | Copyright 2022 Jake Wharton 124 | 125 | Licensed under the Apache License, Version 2.0 (the "License"); 126 | you may not use this file except in compliance with the License. 127 | You may obtain a copy of the License at 128 | 129 | http://www.apache.org/licenses/LICENSE-2.0 130 | 131 | Unless required by applicable law or agreed to in writing, software 132 | distributed under the License is distributed on an "AS IS" BASIS, 133 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 134 | See the License for the specific language governing permissions and 135 | limitations under the License. 136 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/app.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import java.nio.file.Path 4 | import java.time.ZoneOffset.UTC 5 | import java.util.Scanner 6 | import java.util.UUID 7 | import kotlin.system.exitProcess 8 | import okhttp3.HttpUrl 9 | import retrofit2.HttpException 10 | 11 | class DumboApp( 12 | private val mastodonApi: MastodonApi, 13 | private val twimgApi: TwimgApi, 14 | ) { 15 | suspend fun run( 16 | host: HttpUrl, 17 | archiveDir: Path, 18 | identityMapping: IdentityMapping, 19 | performEdits: Boolean, 20 | debug: Boolean, 21 | ) { 22 | fun debug(body: () -> Any) { 23 | if (debug) { 24 | println("DEBUG ${body()}") 25 | } 26 | } 27 | 28 | val scanner = Scanner(System.`in`) 29 | 30 | val authenticator = MastodonAuthenticator(archiveDir, host, mastodonApi, scanner) 31 | val authorization = authenticator.obtain() 32 | 33 | val mediaDb = MediaDb(archiveDir, mastodonApi, authorization, twimgApi) 34 | 35 | val twitterArchive = TwitterArchive(archiveDir) 36 | val tweets = twitterArchive.loadTweets() 37 | debug { "Loaded ${tweets.size} tweets" } 38 | 39 | val dumboDb = NioPathDumboDb(archiveDir) 40 | 41 | for (tweet in tweets) { 42 | if (tweet.isRetweet) { 43 | debug { "[${tweet.id}] Do not keep retweets of tweets from other authors" } 44 | continue 45 | } 46 | if (tweet.isMention) { 47 | debug { "[${tweet.id}] Do not keep @mentions to individual accounts" } 48 | continue 49 | } 50 | 51 | if (tweet.inReplyToId != null && tweet.inReplyToId !in dumboDb) { 52 | debug { "[${tweet.id}] Do not keep replies to tweets which are not my own or which we explicitly skipped" } 53 | continue 54 | } 55 | 56 | val existingStatus = if (tweet.id in dumboDb) { 57 | val existingTootId = dumboDb[tweet.id] 58 | if (existingTootId == null) { 59 | debug { "[${tweet.id}] This Tweet was explicitly ignored" } 60 | continue 61 | } 62 | if (!performEdits) { 63 | debug { "[${tweet.id}] This Tweet was already posted and we are not performing edits" } 64 | continue 65 | } 66 | try { 67 | mastodonApi.getStatus(existingTootId) 68 | } catch (e: HttpException) { 69 | if (e.code() == 404) { 70 | println("Cross-posted tweet (${tweet.url}) was deleted from Mastodon.") 71 | print("Remove from log ($inputYes, $inputNo, $inputSkip): ") 72 | when (val input = scanner.next()) { 73 | inputYes -> { 74 | dumboDb -= tweet.id 75 | println("-------") 76 | null 77 | } 78 | inputNo -> { 79 | return 80 | } 81 | inputSkip -> { 82 | println("-------") 83 | continue 84 | } 85 | else -> { 86 | System.err.println("Unknown input: $input") 87 | exitProcess(129) 88 | } 89 | } 90 | } else { 91 | throw e 92 | } 93 | } 94 | } else { 95 | null 96 | } 97 | 98 | val toot = Toot.fromTweet(tweet, dumboDb, identityMapping) 99 | 100 | if (existingStatus != null && isUpToDate(toot, existingStatus)) { 101 | debug { "[${tweet.id}] Existing post unchanged" } 102 | continue 103 | } 104 | 105 | println(tweet.toQuickPrettyString()) 106 | println() 107 | if (existingStatus != null) { 108 | println("OLD ${existingStatus.toQuickPrettyString()}") 109 | println() 110 | print("NEW ") 111 | } 112 | println(toot.toQuickPrettyString()) 113 | println() 114 | print("Post? ($inputYes, $inputNo, $inputSkip): ") 115 | when (val input = scanner.next()) { 116 | inputYes -> { 117 | // TODO Only upload media if media is what changed. 118 | val attachmentIds = buildList { 119 | for ((index, media) in toot.media.withIndex()) { 120 | debug { "[${tweet.id}] Uploading attachment ${index + 1} of ${toot.media.size}" } 121 | this += mediaDb.uploadMedia(media.id, media.filename) 122 | } 123 | } 124 | 125 | if (existingStatus != null) { 126 | mastodonApi.editStatus( 127 | id = existingStatus.id, 128 | authorization = authorization, 129 | idempotency = UUID.randomUUID().toString(), 130 | content = toot.text, 131 | mediaIds = attachmentIds, 132 | ) 133 | } else { 134 | val statusEntity = mastodonApi.createStatus( 135 | authorization = authorization, 136 | idempotency = UUID.randomUUID().toString(), 137 | content = toot.text, 138 | language = toot.language, 139 | createdAt = toot.posted.atOffset(UTC).toString(), 140 | inReplyToId = toot.inReplyToId, 141 | mediaIds = attachmentIds, 142 | ) 143 | 144 | dumboDb[tweet.id] = statusEntity.id 145 | } 146 | } 147 | 148 | inputNo -> { 149 | dumboDb[tweet.id] = null 150 | } 151 | 152 | inputSkip -> Unit // Nothing to do! 153 | else -> { 154 | System.err.println("Unknown input: $input") 155 | exitProcess(129) 156 | } 157 | } 158 | 159 | println("-------") 160 | } 161 | } 162 | 163 | private fun isUpToDate(toot: Toot, status: StatusEntity): Boolean { 164 | if (toot.text != status.content) { 165 | return false 166 | } 167 | if (toot.media.size != status.media_attachments.size) { 168 | return false 169 | } 170 | // TODO Compare image binaries? 171 | return true 172 | } 173 | 174 | private companion object { 175 | private const val inputYes = "yes" 176 | private const val inputNo = "no" 177 | private const val inputSkip = "skip" 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | 118 | 119 | # Determine the Java command to use to start the JVM. 120 | if [ -n "$JAVA_HOME" ] ; then 121 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 122 | # IBM's JDK on AIX uses strange locations for the executables 123 | JAVACMD=$JAVA_HOME/jre/sh/java 124 | else 125 | JAVACMD=$JAVA_HOME/bin/java 126 | fi 127 | if [ ! -x "$JAVACMD" ] ; then 128 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 129 | 130 | Please set the JAVA_HOME variable in your environment to match the 131 | location of your Java installation." 132 | fi 133 | else 134 | JAVACMD=java 135 | if ! command -v java >/dev/null 2>&1 136 | then 137 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 138 | 139 | Please set the JAVA_HOME variable in your environment to match the 140 | location of your Java installation." 141 | fi 142 | fi 143 | 144 | # Increase the maximum file descriptors if we can. 145 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 146 | case $MAX_FD in #( 147 | max*) 148 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 149 | # shellcheck disable=SC2039,SC3045 150 | MAX_FD=$( ulimit -H -n ) || 151 | warn "Could not query maximum file descriptor limit" 152 | esac 153 | case $MAX_FD in #( 154 | '' | soft) :;; #( 155 | *) 156 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 157 | # shellcheck disable=SC2039,SC3045 158 | ulimit -n "$MAX_FD" || 159 | warn "Could not set maximum file descriptor limit to $MAX_FD" 160 | esac 161 | fi 162 | 163 | # Collect all arguments for the java command, stacking in reverse order: 164 | # * args from the command line 165 | # * the main class name 166 | # * -classpath 167 | # * -D...appname settings 168 | # * --module-path (only if needed) 169 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 170 | 171 | # For Cygwin or MSYS, switch paths to Windows format before running java 172 | if "$cygwin" || "$msys" ; then 173 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 174 | 175 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 176 | 177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 178 | for arg do 179 | if 180 | case $arg in #( 181 | -*) false ;; # don't mess with options #( 182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 183 | [ -e "$t" ] ;; #( 184 | *) false ;; 185 | esac 186 | then 187 | arg=$( cygpath --path --ignore --mixed "$arg" ) 188 | fi 189 | # Roll the args list around exactly as many times as the number of 190 | # args, so each arg winds up back in the position where it started, but 191 | # possibly modified. 192 | # 193 | # NB: a `for` loop captures its iteration list before it begins, so 194 | # changing the positional parameters here affects neither the number of 195 | # iterations, nor the values presented in `arg`. 196 | shift # remove old arg 197 | set -- "$@" "$arg" # push replacement arg 198 | done 199 | fi 200 | 201 | 202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 204 | 205 | # Collect all arguments for the java command: 206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 207 | # and any embedded shellness will be escaped. 208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 209 | # treated as '${Hostname}' itself on the command line. 210 | 211 | set -- \ 212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 213 | -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ 214 | "$@" 215 | 216 | # Stop when "xargs" is not available. 217 | if ! command -v xargs >/dev/null 2>&1 218 | then 219 | die "xargs is not available" 220 | fi 221 | 222 | # Use "xargs" to parse quoted args. 223 | # 224 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 225 | # 226 | # In Bash we could simply go: 227 | # 228 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 229 | # set -- "${ARGS[@]}" "$@" 230 | # 231 | # but POSIX shell has neither arrays nor command substitution, so instead we 232 | # post-process each arg (as a line of input to sed) to backslash-escape any 233 | # character that might be a shell metacharacter, then use eval to reverse 234 | # that process (while maintaining the separation between arguments), and wrap 235 | # the whole thing up as a single "set" statement. 236 | # 237 | # This will of course break if any of these variables contains a newline or 238 | # an unmatched quote. 239 | # 240 | 241 | eval "set -- $( 242 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 243 | xargs -n1 | 244 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 245 | tr '\n' ' ' 246 | )" '"$@"' 247 | 248 | exec "$JAVACMD" "$@" 249 | -------------------------------------------------------------------------------- /src/main/kotlin/com/jakewharton/dumbo/twitterArchive.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import com.jakewharton.dumbo.Tweet.MediaEntity 4 | import com.jakewharton.dumbo.Tweet.MentionEntity 5 | import com.jakewharton.dumbo.Tweet.UrlEntity 6 | import java.nio.file.Path 7 | import java.time.Instant 8 | import java.time.OffsetDateTime 9 | import java.time.format.DateTimeFormatter 10 | import java.util.Locale 11 | import kotlinx.serialization.ExperimentalSerializationApi 12 | import kotlinx.serialization.KSerializer 13 | import kotlinx.serialization.SerialName 14 | import kotlinx.serialization.Serializable 15 | import kotlinx.serialization.builtins.ListSerializer 16 | import kotlinx.serialization.builtins.serializer 17 | import kotlinx.serialization.descriptors.PrimitiveKind.STRING 18 | import kotlinx.serialization.descriptors.PrimitiveSerialDescriptor 19 | import kotlinx.serialization.encoding.Decoder 20 | import kotlinx.serialization.encoding.Encoder 21 | import kotlinx.serialization.json.Json 22 | import kotlinx.serialization.json.JsonElement 23 | import kotlinx.serialization.json.okio.decodeFromBufferedSource 24 | import okio.ByteString.Companion.encodeUtf8 25 | import okio.buffer 26 | import okio.source 27 | 28 | class TwitterArchive( 29 | directory: Path, 30 | ) { 31 | private val tweets = directory.resolve("data/tweets.js") 32 | 33 | @OptIn(ExperimentalSerializationApi::class) 34 | fun loadTweets(): List { 35 | val entries = tweets.source().buffer().use { source -> 36 | check(source.rangeEquals(0, tweetsPrefix)) { 37 | "$tweets did not start with $tweetsPrefix" 38 | } 39 | source.skip(tweetsPrefix.size.toLong()) 40 | 41 | json.decodeFromBufferedSource(ListSerializer(ArchiveTweetEntry.serializer()), source) 42 | } 43 | return entries.map { 44 | Tweet( 45 | id = it.tweet.id, 46 | inReplyToId = it.tweet.in_reply_to_status_id, 47 | createdAt = it.tweet.created_at, 48 | language = it.tweet.lang, 49 | text = it.tweet.full_text, 50 | entities = buildList { 51 | this += it.tweet.entities.urls.map { entity -> 52 | UrlEntity( 53 | url = entity.expanded_url, 54 | indices = entity.indices, 55 | ) 56 | } 57 | this += it.tweet.entities.user_mentions.map { entity -> 58 | MentionEntity( 59 | id = entity.id, 60 | username = entity.screen_name, 61 | indices = entity.indices, 62 | ) 63 | } 64 | this += it.tweet.entities.media.map { entity -> 65 | MediaEntity( 66 | id = entity.id, 67 | filename = entity.media_url.substringAfterLast("/"), 68 | indices = entity.indices, 69 | ) 70 | } 71 | } 72 | ) 73 | }.sorted() 74 | } 75 | 76 | private companion object { 77 | val tweetsPrefix = "window.YTD.tweets.part0 = ".encodeUtf8() 78 | val json = Json { 79 | ignoreUnknownKeys = false 80 | } 81 | } 82 | } 83 | 84 | /** A user-friendly model of a Tweet massaged from the raw JSON of the archive. */ 85 | data class Tweet( 86 | val id: String, 87 | val inReplyToId: String? = null, 88 | val createdAt: Instant, 89 | val language: String, 90 | val text: String, 91 | val entities: List = emptyList(), 92 | ) : Comparable { 93 | /** A clickable URL. */ 94 | val url get() = "https://twitter.com/twitter/status/$id" 95 | val isRetweet get() = text.startsWith("RT @") 96 | val isMention get() = text.startsWith("@") 97 | 98 | override fun compareTo(other: Tweet) = comparator.compare(this, other) 99 | 100 | private companion object { 101 | private val comparator = compareBy(Tweet::createdAt) 102 | .thenByDescending(Tweet::id) 103 | } 104 | 105 | sealed interface Entity { 106 | val indices: IntRange 107 | } 108 | data class UrlEntity( 109 | val url: String, 110 | override val indices: IntRange, 111 | ) : Entity 112 | data class MentionEntity( 113 | val id: String, 114 | val username: String, 115 | override val indices: IntRange, 116 | ) : Entity 117 | data class MediaEntity( 118 | val id: String, 119 | val filename: String, 120 | override val indices: IntRange, 121 | ) : Entity 122 | } 123 | 124 | /** 125 | * A full modeling of the Twitter archive's JSON with as few defaults as possible 126 | * and deserialized without ignoring keys to ensure nothing is missed. 127 | */ 128 | @Serializable 129 | private data class ArchiveTweetEntry( 130 | val tweet: Tweet, 131 | ) { 132 | @Serializable 133 | data class Tweet( 134 | val edit_info: JsonElement, 135 | /** Always false, even if true. */ 136 | val retweeted: Boolean, 137 | val source: String, 138 | val entities: Entities, 139 | val extended_entities: Entities? = null, 140 | @Serializable(TwoStringArrayIntRangeSerializer::class) 141 | val display_text_range: IntRange, 142 | val favorite_count: UInt, 143 | val id_str: String, 144 | val truncated: Boolean, 145 | val retweet_count: UInt, 146 | val id: String, 147 | val possibly_sensitive: Boolean = false, 148 | @Serializable(TwitterTimestampSerializer::class) 149 | val created_at: Instant, 150 | val favorited: Boolean, 151 | val full_text: String, 152 | val lang: String, 153 | val contributors: List = emptyList(), 154 | val in_reply_to_status_id: String? = null, 155 | val in_reply_to_status_id_str: String? = null, 156 | val in_reply_to_user_id: String? = null, 157 | val in_reply_to_user_id_str: String? = null, 158 | val in_reply_to_screen_name: String? = null, 159 | val coordinates: Coordinates? = null, 160 | val geo: Coordinates? = null, 161 | ) 162 | 163 | @Serializable 164 | data class Coordinates( 165 | val type: CoordinateType, 166 | val coordinates: List, // TODO parse 167 | ) 168 | 169 | @Serializable 170 | enum class CoordinateType { 171 | Point, 172 | } 173 | 174 | @Serializable 175 | data class Entities( 176 | val hashtags: List = emptyList(), 177 | val media: List = emptyList(), 178 | val symbols: List = emptyList(), 179 | val user_mentions: List = emptyList(), 180 | val urls: List = emptyList(), 181 | ) 182 | 183 | @Serializable 184 | data class SymbolEntity( 185 | val text: String, 186 | @Serializable(TwoStringArrayIntRangeSerializer::class) 187 | val indices: IntRange, 188 | ) 189 | 190 | @Serializable 191 | data class HashtagEntity( 192 | val text: String, 193 | @Serializable(TwoStringArrayIntRangeSerializer::class) 194 | val indices: IntRange, 195 | ) 196 | 197 | @Serializable 198 | data class MediaEntity( 199 | val expanded_url: String, 200 | @Serializable(TwoStringArrayIntRangeSerializer::class) 201 | val indices: IntRange, 202 | val url: String, 203 | val media_url: String, 204 | val id_str: String, 205 | val id: String, 206 | val media_url_https: String, 207 | val sizes: MediaSizes, 208 | val type: MediaType, 209 | val display_url: String, 210 | val video_info: MediaVideoInfo? = null, 211 | val source_status_id: String? = null, 212 | val source_status_id_str: String? = null, 213 | val source_user_id: String? = null, 214 | val source_user_id_str: String? = null, 215 | val additional_media_info: AdditionalMediaInfo? = null, 216 | ) 217 | 218 | @Serializable 219 | data class AdditionalMediaInfo( 220 | val monetizable: Boolean, 221 | val title: String? = null, 222 | val description: String? = null, 223 | val embeddable: Boolean = false, // TODO correct default? 224 | ) 225 | 226 | @Serializable 227 | data class MediaVideoInfo( 228 | val aspect_ratio: List, // TODO Pair? 229 | val variants: List, 230 | val duration_millis: UInt? = null, 231 | ) 232 | 233 | @Serializable 234 | data class MediaVideoVariant( 235 | val bitrate: UInt? = null, 236 | val content_type: String, 237 | val url: String, 238 | ) 239 | 240 | @Serializable 241 | data class MediaSizes( 242 | val small: MediaSize, 243 | val medium: MediaSize, 244 | val large: MediaSize, 245 | val thumb: MediaSize, 246 | ) 247 | 248 | @Serializable 249 | data class MediaSize( 250 | val w: UInt, 251 | val h: UInt, 252 | val resize: MediaResize, 253 | ) 254 | 255 | @Serializable 256 | enum class MediaResize { 257 | @SerialName("fit") Fit, 258 | @SerialName("crop") Crop, 259 | } 260 | 261 | @Serializable 262 | enum class MediaType { 263 | @SerialName("photo") Photo, 264 | @SerialName("video") Video, 265 | @SerialName("animated_gif") AnimatedGif, 266 | } 267 | 268 | @Serializable 269 | data class UserMentionEntity( 270 | val name: String, 271 | val screen_name: String, 272 | @Serializable(TwoStringArrayIntRangeSerializer::class) 273 | val indices: IntRange, 274 | val id_str: String, 275 | val id: String, 276 | ) 277 | 278 | @Serializable 279 | data class UrlEntity( 280 | val url: String, 281 | val expanded_url: String, 282 | val display_url: String, 283 | @Serializable(TwoStringArrayIntRangeSerializer::class) 284 | val indices: IntRange, 285 | ) 286 | } 287 | 288 | private object TwitterTimestampSerializer : KSerializer { 289 | private val formatter = DateTimeFormatter.ofPattern("EEE MMM dd HH:mm:ss ZZ yyyy", Locale.US) 290 | override val descriptor = PrimitiveSerialDescriptor("twitter_timestamp", STRING) 291 | 292 | override fun deserialize(decoder: Decoder): Instant { 293 | val string = decoder.decodeString() 294 | return OffsetDateTime.parse(string, formatter).toInstant() 295 | } 296 | 297 | override fun serialize(encoder: Encoder, value: Instant) { 298 | throw AssertionError() 299 | } 300 | } 301 | 302 | private object TwoStringArrayIntRangeSerializer : KSerializer { 303 | private val delegate = ListSerializer(String.serializer()) 304 | override val descriptor get() = delegate.descriptor 305 | 306 | override fun deserialize(decoder: Decoder): IntRange { 307 | val items = delegate.deserialize(decoder) 308 | check(items.size == 2) { items.toString() } 309 | return items[0].toInt()..items[1].toInt() 310 | } 311 | 312 | override fun serialize(encoder: Encoder, value: IntRange) { 313 | throw AssertionError() 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/test/kotlin/com/jakewharton/dumbo/MastodonTest.kt: -------------------------------------------------------------------------------- 1 | package com.jakewharton.dumbo 2 | 3 | import assertk.assertFailure 4 | import assertk.assertions.hasMessage 5 | import assertk.assertions.isInstanceOf 6 | import com.jakewharton.dumbo.Tweet.MentionEntity 7 | import com.jakewharton.dumbo.Tweet.UrlEntity 8 | import java.time.Instant 9 | import org.junit.Assert.assertEquals 10 | import org.junit.Test 11 | 12 | class MastodonTest { 13 | @Test fun urlsReplaced() { 14 | val tweet = Tweet( 15 | id = "87764348256272384", 16 | createdAt = Instant.parse("2011-07-04T06:07:05Z"), 17 | language = "en", 18 | text = "SeriesGuide beta (http://t.co/Ysy68q4) is now using ActionBarSherlock. Please support and fork!! http://t.co/CxvKWoE", 19 | entities = listOf( 20 | UrlEntity( 21 | url = "https://market.android.com/search?q=seriesguide", 22 | indices = 18..37, 23 | ), 24 | UrlEntity( 25 | url = "https://github.com/UweTrottmann/SeriesGuide", 26 | indices = 97..116, 27 | ), 28 | ), 29 | ) 30 | val expected = Toot( 31 | text = "SeriesGuide beta (https://market.android.com/search?q=seriesguide) is now using ActionBarSherlock. Please support and fork!! https://github.com/UweTrottmann/SeriesGuide", 32 | posted = Instant.parse("2011-07-04T06:07:05Z"), 33 | language = "en", 34 | ) 35 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 36 | assertEquals(expected, actual) 37 | } 38 | 39 | @Test fun urlsOutOfOrderReplaced() { 40 | val tweet = Tweet( 41 | id = "87764348256272384", 42 | createdAt = Instant.parse("2011-07-04T06:07:05Z"), 43 | language = "en", 44 | text = "SeriesGuide beta (http://t.co/Ysy68q4) is now using ActionBarSherlock. Please support and fork!! http://t.co/CxvKWoE", 45 | entities = listOf( 46 | UrlEntity( 47 | url = "https://github.com/UweTrottmann/SeriesGuide", 48 | indices = 97..116, 49 | ), 50 | UrlEntity( 51 | url = "https://market.android.com/search?q=seriesguide", 52 | indices = 18..37, 53 | ), 54 | ), 55 | ) 56 | val expected = Toot( 57 | text = "SeriesGuide beta (https://market.android.com/search?q=seriesguide) is now using ActionBarSherlock. Please support and fork!! https://github.com/UweTrottmann/SeriesGuide", 58 | posted = Instant.parse("2011-07-04T06:07:05Z"), 59 | language = "en", 60 | ) 61 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 62 | assertEquals(expected, actual) 63 | } 64 | 65 | @Test fun replyMapHit() { 66 | val replyMap = InMemoryDumboDb( 67 | "1" to null, 68 | "2" to "1234", 69 | ) 70 | val tweet = Tweet( 71 | id = "3", 72 | inReplyToId = "2", 73 | createdAt = Instant.parse("2011-07-04T06:07:05Z"), 74 | language = "en", 75 | text = "Just setting up my Dumbo", 76 | ) 77 | val expected = Toot( 78 | text = "Just setting up my Dumbo", 79 | posted = Instant.parse("2011-07-04T06:07:05Z"), 80 | language = "en", 81 | inReplyToId = "1234", 82 | ) 83 | val actual = Toot.fromTweet(tweet, replyMap, IdentityMapping.Empty) 84 | assertEquals(expected, actual) 85 | } 86 | 87 | @Test fun replyMapExplicitNullThrows() { 88 | val replyMap = InMemoryDumboDb( 89 | "1" to null, 90 | "2" to "1234", 91 | ) 92 | val tweet = Tweet( 93 | id = "3", 94 | inReplyToId = "1", 95 | createdAt = Instant.parse("2011-07-04T06:07:05Z"), 96 | language = "en", 97 | text = "Just setting up my Dumbo", 98 | ) 99 | assertFailure { 100 | Toot.fromTweet(tweet, replyMap, IdentityMapping.Empty) 101 | }.isInstanceOf() 102 | .hasMessage("Unable to map tweet 3 replying to 1 without tootMap entry") 103 | } 104 | 105 | @Test fun replyMapMissThrows() { 106 | val replyMap = InMemoryDumboDb( 107 | "1" to null, 108 | "2" to "1234", 109 | ) 110 | val tweet = Tweet( 111 | id = "4", 112 | inReplyToId = "3", 113 | createdAt = Instant.parse("2011-07-04T06:07:05Z"), 114 | language = "en", 115 | text = "Just setting up my Dumbo", 116 | ) 117 | assertFailure { 118 | Toot.fromTweet(tweet, replyMap, IdentityMapping.Empty) 119 | }.isInstanceOf() 120 | .hasMessage("Unable to map tweet 4 replying to 3 without tootMap entry") 121 | } 122 | 123 | @Test fun mentionsReplacedWithMastodonConvention() { 124 | val tweet = Tweet( 125 | id = "91268136095068160", 126 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 127 | language = "en", 128 | text = "Got psuedo-confirmation from @retomeier that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 129 | entities = listOf( 130 | MentionEntity( 131 | id = "124", 132 | username = "retomeier", 133 | indices = 29..39, 134 | ) 135 | ), 136 | ) 137 | val expected = Toot( 138 | text = "Got psuedo-confirmation from @retomeier@twitter.com that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 139 | posted = Instant.parse("2011-07-13T22:09:53Z"), 140 | language = "en", 141 | ) 142 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 143 | assertEquals(expected, actual) 144 | } 145 | 146 | @Test fun mentionsMappedById() { 147 | val mapping = IdentityMapping.of( 148 | byId = mapOf( 149 | "124" to "@retomeier@example.com", 150 | ), 151 | byName = mapOf( 152 | "retomeier" to "@nope@nope.nope", 153 | ), 154 | ) 155 | val tweet = Tweet( 156 | id = "91268136095068160", 157 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 158 | language = "en", 159 | text = "Got psuedo-confirmation from @retomeier that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 160 | entities = listOf( 161 | MentionEntity( 162 | id = "124", 163 | username = "retomeier", 164 | indices = 29..39, 165 | ) 166 | ), 167 | ) 168 | val expected = Toot( 169 | text = "Got psuedo-confirmation from @retomeier@example.com that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 170 | posted = Instant.parse("2011-07-13T22:09:53Z"), 171 | language = "en", 172 | ) 173 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), mapping) 174 | assertEquals(expected, actual) 175 | } 176 | 177 | @Test fun mentionsMappedByName() { 178 | val mapping = IdentityMapping.of( 179 | byName = mapOf( 180 | "retomeier" to "@retomeier@example.com", 181 | ), 182 | ) 183 | val tweet = Tweet( 184 | id = "91268136095068160", 185 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 186 | language = "en", 187 | text = "Got psuedo-confirmation from @retomeier that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 188 | entities = listOf( 189 | MentionEntity( 190 | id = "124", 191 | username = "retomeier", 192 | indices = 29..39, 193 | ) 194 | ), 195 | ) 196 | val expected = Toot( 197 | text = "Got psuedo-confirmation from @retomeier@example.com that the action bar will not be part of future compat library revs! Good news for ActionBarSherlock.", 198 | posted = Instant.parse("2011-07-13T22:09:53Z"), 199 | language = "en", 200 | ) 201 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), mapping) 202 | assertEquals(expected, actual) 203 | } 204 | 205 | @Test fun mediaOnlySingle() { 206 | val tweet = Tweet( 207 | id = "91268136095068160", 208 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 209 | language = "en", 210 | text = "http://example.com", 211 | entities = listOf( 212 | Tweet.MediaEntity( 213 | id = "124", 214 | filename = "example.png", 215 | indices = 0..18, 216 | ), 217 | ), 218 | ) 219 | val expected = Toot( 220 | text = "", 221 | posted = Instant.parse("2011-07-13T22:09:53Z"), 222 | language = "en", 223 | media = listOf( 224 | Toot.Media( 225 | id = "124", 226 | filename = "example.png", 227 | ), 228 | ), 229 | ) 230 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 231 | assertEquals(expected, actual) 232 | } 233 | 234 | @Test fun mediaOnlyMany() { 235 | val tweet = Tweet( 236 | id = "91268136095068160", 237 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 238 | language = "en", 239 | text = "http://example.com http://example.net http://example.org", 240 | entities = listOf( 241 | Tweet.MediaEntity( 242 | id = "124", 243 | filename = "example1.png", 244 | indices = 0..18, 245 | ), 246 | Tweet.MediaEntity( 247 | id = "125", 248 | filename = "example2.png", 249 | indices = 19..37, 250 | ), 251 | Tweet.MediaEntity( 252 | id = "126", 253 | filename = "example3.png", 254 | indices = 38..56, 255 | ), 256 | ), 257 | ) 258 | val expected = Toot( 259 | text = "", 260 | posted = Instant.parse("2011-07-13T22:09:53Z"), 261 | language = "en", 262 | media = listOf( 263 | Toot.Media( 264 | id = "124", 265 | filename = "example1.png", 266 | ), 267 | Toot.Media( 268 | id = "125", 269 | filename = "example2.png", 270 | ), 271 | Toot.Media( 272 | id = "126", 273 | filename = "example3.png", 274 | ), 275 | ), 276 | ) 277 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 278 | assertEquals(expected, actual) 279 | } 280 | 281 | @Test fun textWithMedia() { 282 | val tweet = Tweet( 283 | id = "91268136095068160", 284 | createdAt = Instant.parse("2011-07-13T22:09:53Z"), 285 | language = "en", 286 | text = "Some text goes here! http://example.com", 287 | entities = listOf( 288 | Tweet.MediaEntity( 289 | id = "124", 290 | filename = "example.png", 291 | indices = 21..39, 292 | ), 293 | ), 294 | ) 295 | val expected = Toot( 296 | text = "Some text goes here!", 297 | posted = Instant.parse("2011-07-13T22:09:53Z"), 298 | language = "en", 299 | media = listOf( 300 | Toot.Media( 301 | id = "124", 302 | filename = "example.png", 303 | ), 304 | ), 305 | ) 306 | val actual = Toot.fromTweet(tweet, InMemoryDumboDb(), IdentityMapping.Empty) 307 | assertEquals(expected, actual) 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | --------------------------------------------------------------------------------