├── .gitignore ├── .travis.yml.template ├── README.md ├── plot.sc ├── plot.sh ├── sonatype-stats.sc ├── sonatype-stats.sh ├── to-gh-pages.sh ├── update.sh └── upload.sh /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /.travis.yml.template: -------------------------------------------------------------------------------- 1 | language: scala 2 | jdk: openjdk8 3 | script: scripts/update.sh 4 | cache: 5 | directories: 6 | - $HOME/.cache 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sonatype-stats 2 | 3 | The scripts in this repository allow to fetch and regularly update project 4 | statistics from Sonatype. You can run them either [locally](#test-it-locally), 5 | or from a [cron job on Travis CI](#cron-job-on-travis-ci). They output raw data 6 | (CSV and JSON), and an HTML file displaying total number of downloads and unique 7 | IPs per month. 8 | 9 | For example, see these [graphs for the coursier organization](https://coursier.github.io/sonatype-stats). 10 | 11 | ## Test it locally 12 | 13 | Fetch / update stats with 14 | ```bash 15 | $ SONATYPE_PROJECT=io.get-coursier \ 16 | SONATYPE_USERNAME=… SONATYPE_PASSWORD=… \ 17 | ./sonatype-stats.sh 18 | ``` 19 | (replace `io.get-coursier` with your own Sonatype organization) 20 | This fetches CSV and JSON files from `oss.sonatype.org`, and places them 21 | under `data/`. 22 | 23 | Create or update the plots with 24 | ```bash 25 | $ ./plot.sh 26 | ``` 27 | This creates a `stats.html` web page, that displays the number of downloads 28 | and of unique IPs that hit your project, for each month. 29 | 30 | Open `stats.html` in your browser to visualize it. 31 | 32 | ## Cron job on Travis CI 33 | 34 | ### Clone this repo 35 | 36 | Clone this repository, and set up your clone in Travis CI. 37 | 38 | ### Travis CI settings 39 | 40 | In your clone settings on Travis CI, add the following environment variables (these are secret by default): 41 | - `SONATYPE_USERNAME`: your Sonatype username, or the name part of your Sonatype token, 42 | - `SONATYPE_PASSWORD`: your Sonatype password, or the password part of your Sonatype token, 43 | - `SONATYPE_PROJECT`: the Sonatype project you want statistics for (should be the organization you publish under, like `com.github.user`), 44 | - `GH_TOKEN`: a GitHub personal access token. Create one on GitHub, by going into Settings > Developer settings > Personal access tokens (just the public repo rights should be enough). 45 | 46 | ### Cron job 47 | 48 | From GitHub, navigate to your clone, then to its `stats` branch. Add an empty 49 | file, to trigger a Travis CI job for the `stats` branch. 50 | 51 | Then in the Travis CI settings of your clone, in the cron section, 52 | add a daily cron for the `stats` branch. 53 | 54 | ### GitHub page 55 | 56 | Once the Travis CI job ran once, a page with the total downloads and unique IPs, 57 | per month, should have been pushed to the `gh-pages` branch of your clone. 58 | 59 | Navigate to it, at 60 | ``` 61 | https://your-user-name.github.io/sonatype-stats 62 | ``` 63 | 64 | -------------------------------------------------------------------------------- /plot.sc: -------------------------------------------------------------------------------- 1 | 2 | import $ivy.`com.github.tototoshi::scala-csv:1.3.5` 3 | import $ivy.`com.twitter::algebird-core:0.13.0` 4 | import $ivy.`org.plotly-scala::plotly-render:0.5.2` 5 | 6 | import java.io.File 7 | import java.nio.file.{Files, Paths} 8 | import java.time._ 9 | 10 | import com.twitter.algebird.Operators._ 11 | import plotly._ 12 | import plotly.element._ 13 | import plotly.layout._ 14 | import plotly.Plotly._ 15 | import upickle.default._ 16 | import ujson.{read => _, _} 17 | 18 | import com.github.tototoshi.csv._ 19 | 20 | 21 | case class UniqueIpData(total: Int) 22 | case class UniqueIpResp(data: UniqueIpData) 23 | implicit val uniqueIpDataRW: ReadWriter[UniqueIpData] = macroRW 24 | implicit val uniqueIpRespRW: ReadWriter[UniqueIpResp] = macroRW 25 | 26 | def blobPattern(s: String) = 27 | ("\\Q" + s.replace("*", "\\E.*\\Q") + "\\E").r 28 | 29 | def csvScatter(dir: File, name: String, filterNames: Option[String] = None, filterOutMonths: Set[YearMonth] = Set()) = { 30 | 31 | val patternOpt = filterNames.map(blobPattern(_).pattern) 32 | 33 | val data = for { 34 | 35 | year <- 2015 to Year.now(ZoneOffset.UTC).getValue 36 | month <- 1 to 12 37 | 38 | f = new File(dir, f"$year/$month%02d.csv") 39 | if f.exists() 40 | 41 | ym = YearMonth.of(year, month) 42 | 43 | elem <- CSVReader.open(f) 44 | .iterator 45 | .map(l => (ym, l(0), l(1).toInt)) 46 | .toVector 47 | 48 | } yield elem 49 | 50 | val byMonth = data 51 | .filter { 52 | patternOpt match { 53 | case None => _ => true 54 | case Some(p) => t => p.matcher(t._2).matches() 55 | } 56 | } 57 | .map { case (ym, _, n) => ym -> n } 58 | .sumByKey 59 | .toVector 60 | .sortBy(_._1) 61 | 62 | def byMonth0 = byMonth.filter { case (ym, _) => !filterOutMonths(ym) } 63 | 64 | def x = byMonth0.map(_._1).map { m => 65 | plotly.element.LocalDateTime(m.getYear, m.getMonthValue, 1, 0, 0, 0) 66 | } 67 | def y = byMonth0.map(_._2) 68 | 69 | Bar(x, y, name = name) 70 | } 71 | 72 | def uniqueIpScatter(dir: File, name: String) = { 73 | 74 | val data = for { 75 | 76 | year <- 2015 to Year.now(ZoneOffset.UTC).getValue 77 | month <- 1 to 12 78 | 79 | f = new File(dir, f"$year/$month%02d.json") 80 | if f.exists() 81 | 82 | ym = YearMonth.of(year, month) 83 | 84 | } yield { 85 | val s = new String(Files.readAllBytes(f.toPath), "UTF-8") 86 | val resp = read[UniqueIpResp](ujson.read(s)) 87 | ym -> resp.data.total 88 | } 89 | 90 | def x = data.map(_._1).map { m => 91 | plotly.element.LocalDateTime(m.getYear, m.getMonthValue, 1, 0, 0, 0) 92 | } 93 | def y = data.map(_._2) 94 | 95 | Bar(x, y, name = name) 96 | } 97 | 98 | val dataBase = Paths.get("data") 99 | 100 | val filterNames = sys.env.get("NAME_FILTER") 101 | 102 | val downloadsTraces = Seq( 103 | csvScatter(dataBase.resolve("stats").toFile, "# downloads", filterNames) 104 | ) 105 | 106 | val uniqueIpsTraces = 107 | if (filterNames.isEmpty) 108 | Seq( 109 | uniqueIpScatter(dataBase.resolve("unique-ips").toFile, "Unique IPs") 110 | ) 111 | else 112 | Nil 113 | 114 | 115 | val dlDivId = "downloads" 116 | val ipDivId = "uniqueips" 117 | 118 | val layout = Layout() 119 | 120 | val html = 121 | s""" 122 | | 123 | | 124 | |${layout.title.getOrElse("plotly chart")} 125 | | 126 | | 127 | | 128 | |
129 | |
130 | | 134 | | 135 | | 136 | |""".stripMargin 137 | 138 | Files.write(Paths.get("stats.html"), html.getBytes("UTF-8")) 139 | -------------------------------------------------------------------------------- /plot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target" 5 | 6 | mkdir -p "$TARGET" 7 | 8 | if [ ! -x "$TARGET/coursier" ]; then 9 | curl -Lo "$TARGET/coursier" https://git.io/coursier-cli 10 | chmod +x "$TARGET/coursier" 11 | fi 12 | 13 | "$TARGET/coursier" launch com.lihaoyi:ammonite_2.12.8:1.6.4 \ 14 | -M ammonite.Main \ 15 | -- \ 16 | "$(dirname "${BASH_SOURCE[0]}")/plot.sc" 17 | -------------------------------------------------------------------------------- /sonatype-stats.sc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env amm 2 | 3 | import $ivy.`com.softwaremill.sttp::core:1.5.10` 4 | 5 | import java.nio.file._ 6 | import java.time.{YearMonth, ZoneOffset} 7 | 8 | import com.softwaremill.sttp.quick._ 9 | import upickle.default._ 10 | import ujson.{read => _, _} 11 | 12 | object Responses { 13 | 14 | case class UniqueIpData(total: Int) 15 | implicit val uniqueIpDataRW: ReadWriter[UniqueIpData] = macroRW 16 | case class UniqueIpResp(data: UniqueIpData) 17 | implicit val uniqueIpRespRW: ReadWriter[UniqueIpResp] = macroRW 18 | 19 | case class Elem(id: String, name: String) 20 | implicit val elemRW: ReadWriter[Elem] = macroRW 21 | 22 | } 23 | 24 | import Responses._ 25 | 26 | object Params { 27 | 28 | // organization one was granted write access to 29 | val proj = sys.env.getOrElse( 30 | "SONATYPE_PROJECT", 31 | sys.error("SONATYPE_PROJECT not set") 32 | ) 33 | // actual organization used for publishing (must have proj as prefix) 34 | val organization = sys.env.getOrElse("SONATYPE_PROJECT", proj) 35 | 36 | val sonatypeUser = sys.env.getOrElse( 37 | "SONATYPE_USERNAME", 38 | sys.error("SONATYPE_USERNAME not set") 39 | ) 40 | val sonatypePassword: String = sys.env.getOrElse( 41 | "SONATYPE_PASSWORD", 42 | sys.error("SONATYPE_PASSWORD not set") 43 | ) 44 | 45 | val start = YearMonth.now(ZoneOffset.UTC) 46 | 47 | val projId = { 48 | 49 | val projectIds = { 50 | val projResp = sttp 51 | .auth.basic(Params.sonatypeUser, Params.sonatypePassword) 52 | .header("Accept", "application/json") 53 | .get(uri"https://oss.sonatype.org/service/local/stats/projects") 54 | .send() 55 | 56 | if (!projResp.isSuccess) 57 | sys.error("Error getting project list: " + projResp.statusText) 58 | 59 | val respJson = ujson.read(projResp.body.right.get) 60 | 61 | read[Seq[Elem]](respJson("data")) 62 | .map(e => e.name -> e.id) 63 | .toMap 64 | } 65 | 66 | projectIds(Params.proj) 67 | } 68 | 69 | val cutOff = start.minusMonths(4L) 70 | 71 | val base = Paths.get("data") 72 | } 73 | 74 | case class Data( 75 | base: Path, 76 | ext: String, 77 | empty: String => Boolean, 78 | name: String, 79 | tpe: String, 80 | projId: String, 81 | organization: String 82 | ) { 83 | 84 | def fileFor(monthYear: YearMonth): Path = { 85 | val year = monthYear.getYear 86 | val month = monthYear.getMonth.getValue 87 | base.resolve(f"$year%04d/$month%02d.$ext") 88 | } 89 | 90 | def exists(monthYear: YearMonth): Boolean = 91 | Files.isRegularFile(fileFor(monthYear)) 92 | 93 | def write(monthYear: YearMonth, content: String): Unit = { 94 | System.err.println(s"Writing $monthYear (${content.length} B)") 95 | val f = fileFor(monthYear) 96 | Files.createDirectories(f.getParent) 97 | Files.write(f, content.getBytes("UTF-8")) 98 | } 99 | 100 | def urlFor(monthYear: YearMonth) = { 101 | val year = monthYear.getYear 102 | val month = monthYear.getMonth.getValue 103 | 104 | uri"https://oss.sonatype.org/service/local/stats/$name?p=$projId&g=$organization&a=&t=$tpe&from=${f"$year%04d$month%02d"}&nom=1" 105 | } 106 | 107 | def process(monthYears: Iterator[YearMonth]): Iterator[(YearMonth, Boolean)] = 108 | monthYears 109 | .filter { monthYear => 110 | !exists(monthYear) 111 | } 112 | .map { monthYear => 113 | 114 | val u = urlFor(monthYear) 115 | 116 | System.err.println(s"Getting $monthYear: $u") 117 | 118 | val statResp = sttp 119 | .auth.basic(Params.sonatypeUser, Params.sonatypePassword) 120 | .header("Accept", "application/json") 121 | .get(u) 122 | .send() 123 | 124 | if (!statResp.isSuccess) 125 | sys.error("Error getting project stats: " + statResp.statusText) 126 | 127 | val stats = statResp.body.right.get.trim 128 | 129 | val empty0 = empty(stats) 130 | if (empty0) 131 | System.err.println(s"Empty response at $monthYear") 132 | else 133 | write(monthYear, stats) 134 | 135 | monthYear -> !empty0 136 | } 137 | } 138 | 139 | val statsData = Data( 140 | Params.base.resolve("stats"), 141 | "csv", 142 | _.isEmpty, 143 | "slices_csv", 144 | "raw", 145 | Params.projId, 146 | Params.organization 147 | ) 148 | 149 | val perArtifactUniqueIpsData = Data( 150 | Params.base.resolve("per-artifact-unique-ips"), 151 | "csv", 152 | _.isEmpty, 153 | "slices_csv", 154 | "ip", 155 | Params.projId, 156 | Params.organization 157 | ) 158 | 159 | val uniqueIpsData = Data( 160 | Params.base.resolve("unique-ips"), 161 | "json", 162 | s => read[UniqueIpResp](ujson.read(s)).data.total <= 0, 163 | "timeline", 164 | "ip", 165 | Params.projId, 166 | Params.organization 167 | ) 168 | 169 | for (data <- Seq(statsData, perArtifactUniqueIpsData, uniqueIpsData)) { 170 | val it = Iterator.iterate(Params.start)(_.minusMonths(1L)) 171 | val processed = data.process(it) 172 | .takeWhile { 173 | case (monthYear, nonEmpty) => 174 | nonEmpty || monthYear.compareTo(Params.cutOff) >= 0 175 | } 176 | .length 177 | 178 | System.err.println(s"Processed $processed months in ${data.base} for type ${data.tpe}") 179 | } 180 | 181 | -------------------------------------------------------------------------------- /sonatype-stats.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target" 5 | 6 | mkdir -p "$TARGET" 7 | 8 | if [ ! -x "$TARGET/coursier" ]; then 9 | curl -Lo "$TARGET/coursier" https://git.io/coursier-cli 10 | chmod +x "$TARGET/coursier" 11 | fi 12 | 13 | "$TARGET/coursier" launch com.lihaoyi:ammonite_2.12.8:1.6.4 \ 14 | -M ammonite.Main \ 15 | -- \ 16 | "$(dirname "${BASH_SOURCE[0]}")/sonatype-stats.sc" 17 | -------------------------------------------------------------------------------- /to-gh-pages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euv 3 | 4 | DIR="$(pwd)" 5 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target" 6 | 7 | mkdir -p "$TARGET" 8 | cd "$TARGET" 9 | 10 | 11 | if [ -d gh-pages ]; then 12 | echo "Removing former gh-pages clone" 13 | rm -rf gh-pages 14 | fi 15 | 16 | echo "Cloning" 17 | git clone "https://${GH_TOKEN}@github.com/$TRAVIS_REPO_SLUG.git" -b gh-pages gh-pages 18 | cd gh-pages 19 | 20 | git config user.name "Travis-CI" 21 | git config user.email "invalid@travis-ci.com" 22 | 23 | GH_PAGES_DEST="${GH_PAGES_DEST:-"index.html"}" 24 | cp "$DIR/stats.html" "$GH_PAGES_DEST" 25 | git add -- "$GH_PAGES_DEST" 26 | 27 | MSG="Update Sonatype statistics" 28 | 29 | if git status | grep "nothing to commit" >/dev/null 2>&1; then 30 | echo "Nothing changed" 31 | else 32 | git commit -m "$MSG" 33 | 34 | echo "Pushing changes" 35 | git push origin gh-pages 36 | fi 37 | -------------------------------------------------------------------------------- /update.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euv 3 | 4 | 5 | DIR="$(dirname "${BASH_SOURCE[0]}")" 6 | 7 | "$DIR/sonatype-stats.sh" 8 | 9 | "$DIR/plot.sh" 10 | 11 | "$DIR/upload.sh" 12 | 13 | "$DIR/to-gh-pages.sh" 14 | -------------------------------------------------------------------------------- /upload.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euv 3 | 4 | git config user.name "Travis-CI" 5 | git config user.email "invalid@travis-ci.com" 6 | 7 | git remote add writable "https://${GH_TOKEN}@github.com/$TRAVIS_REPO_SLUG.git" 8 | 9 | git add -- stats.html data 10 | 11 | MSG="Update Sonatype statistics" 12 | 13 | if git status | grep "nothing to commit" >/dev/null 2>&1; then 14 | echo "Nothing changed" 15 | else 16 | git commit -m "$MSG" 17 | 18 | echo "Pushing changes" 19 | git push writable HEAD:"$TRAVIS_BRANCH" 20 | fi 21 | --------------------------------------------------------------------------------