├── .gitignore
├── .travis.yml.template
├── README.md
├── plot.sc
├── plot.sh
├── sonatype-stats.sc
├── sonatype-stats.sh
├── to-gh-pages.sh
├── update.sh
└── upload.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 |
--------------------------------------------------------------------------------
/.travis.yml.template:
--------------------------------------------------------------------------------
1 | language: scala
2 | jdk: openjdk8
3 | script: scripts/update.sh
4 | cache:
5 | directories:
6 | - $HOME/.cache
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sonatype-stats
2 |
3 | The scripts in this repository allow to fetch and regularly update project
4 | statistics from Sonatype. You can run them either [locally](#test-it-locally),
5 | or from a [cron job on Travis CI](#cron-job-on-travis-ci). They output raw data
6 | (CSV and JSON), and an HTML file displaying total number of downloads and unique
7 | IPs per month.
8 |
9 | For example, see these [graphs for the coursier organization](https://coursier.github.io/sonatype-stats).
10 |
11 | ## Test it locally
12 |
13 | Fetch / update stats with
14 | ```bash
15 | $ SONATYPE_PROJECT=io.get-coursier \
16 | SONATYPE_USERNAME=… SONATYPE_PASSWORD=… \
17 | ./sonatype-stats.sh
18 | ```
19 | (replace `io.get-coursier` with your own Sonatype organization)
20 | This fetches CSV and JSON files from `oss.sonatype.org`, and places them
21 | under `data/`.
22 |
23 | Create or update the plots with
24 | ```bash
25 | $ ./plot.sh
26 | ```
27 | This creates a `stats.html` web page, that displays the number of downloads
28 | and of unique IPs that hit your project, for each month.
29 |
30 | Open `stats.html` in your browser to visualize it.
31 |
32 | ## Cron job on Travis CI
33 |
34 | ### Clone this repo
35 |
36 | Clone this repository, and set up your clone in Travis CI.
37 |
38 | ### Travis CI settings
39 |
40 | In your clone settings on Travis CI, add the following environment variables (these are secret by default):
41 | - `SONATYPE_USERNAME`: your Sonatype username, or the name part of your Sonatype token,
42 | - `SONATYPE_PASSWORD`: your Sonatype password, or the password part of your Sonatype token,
43 | - `SONATYPE_PROJECT`: the Sonatype project you want statistics for (should be the organization you publish under, like `com.github.user`),
44 | - `GH_TOKEN`: a GitHub personal access token. Create one on GitHub, by going into Settings > Developer settings > Personal access tokens (just the public repo rights should be enough).
45 |
46 | ### Cron job
47 |
48 | From GitHub, navigate to your clone, then to its `stats` branch. Add an empty
49 | file, to trigger a Travis CI job for the `stats` branch.
50 |
51 | Then in the Travis CI settings of your clone, in the cron section,
52 | add a daily cron for the `stats` branch.
53 |
54 | ### GitHub page
55 |
56 | Once the Travis CI job ran once, a page with the total downloads and unique IPs,
57 | per month, should have been pushed to the `gh-pages` branch of your clone.
58 |
59 | Navigate to it, at
60 | ```
61 | https://your-user-name.github.io/sonatype-stats
62 | ```
63 |
64 |
--------------------------------------------------------------------------------
/plot.sc:
--------------------------------------------------------------------------------
1 |
2 | import $ivy.`com.github.tototoshi::scala-csv:1.3.5`
3 | import $ivy.`com.twitter::algebird-core:0.13.0`
4 | import $ivy.`org.plotly-scala::plotly-render:0.5.2`
5 |
6 | import java.io.File
7 | import java.nio.file.{Files, Paths}
8 | import java.time._
9 |
10 | import com.twitter.algebird.Operators._
11 | import plotly._
12 | import plotly.element._
13 | import plotly.layout._
14 | import plotly.Plotly._
15 | import upickle.default._
16 | import ujson.{read => _, _}
17 |
18 | import com.github.tototoshi.csv._
19 |
20 |
21 | case class UniqueIpData(total: Int)
22 | case class UniqueIpResp(data: UniqueIpData)
23 | implicit val uniqueIpDataRW: ReadWriter[UniqueIpData] = macroRW
24 | implicit val uniqueIpRespRW: ReadWriter[UniqueIpResp] = macroRW
25 |
26 | def blobPattern(s: String) =
27 | ("\\Q" + s.replace("*", "\\E.*\\Q") + "\\E").r
28 |
29 | def csvScatter(dir: File, name: String, filterNames: Option[String] = None, filterOutMonths: Set[YearMonth] = Set()) = {
30 |
31 | val patternOpt = filterNames.map(blobPattern(_).pattern)
32 |
33 | val data = for {
34 |
35 | year <- 2015 to Year.now(ZoneOffset.UTC).getValue
36 | month <- 1 to 12
37 |
38 | f = new File(dir, f"$year/$month%02d.csv")
39 | if f.exists()
40 |
41 | ym = YearMonth.of(year, month)
42 |
43 | elem <- CSVReader.open(f)
44 | .iterator
45 | .map(l => (ym, l(0), l(1).toInt))
46 | .toVector
47 |
48 | } yield elem
49 |
50 | val byMonth = data
51 | .filter {
52 | patternOpt match {
53 | case None => _ => true
54 | case Some(p) => t => p.matcher(t._2).matches()
55 | }
56 | }
57 | .map { case (ym, _, n) => ym -> n }
58 | .sumByKey
59 | .toVector
60 | .sortBy(_._1)
61 |
62 | def byMonth0 = byMonth.filter { case (ym, _) => !filterOutMonths(ym) }
63 |
64 | def x = byMonth0.map(_._1).map { m =>
65 | plotly.element.LocalDateTime(m.getYear, m.getMonthValue, 1, 0, 0, 0)
66 | }
67 | def y = byMonth0.map(_._2)
68 |
69 | Bar(x, y, name = name)
70 | }
71 |
72 | def uniqueIpScatter(dir: File, name: String) = {
73 |
74 | val data = for {
75 |
76 | year <- 2015 to Year.now(ZoneOffset.UTC).getValue
77 | month <- 1 to 12
78 |
79 | f = new File(dir, f"$year/$month%02d.json")
80 | if f.exists()
81 |
82 | ym = YearMonth.of(year, month)
83 |
84 | } yield {
85 | val s = new String(Files.readAllBytes(f.toPath), "UTF-8")
86 | val resp = read[UniqueIpResp](ujson.read(s))
87 | ym -> resp.data.total
88 | }
89 |
90 | def x = data.map(_._1).map { m =>
91 | plotly.element.LocalDateTime(m.getYear, m.getMonthValue, 1, 0, 0, 0)
92 | }
93 | def y = data.map(_._2)
94 |
95 | Bar(x, y, name = name)
96 | }
97 |
98 | val dataBase = Paths.get("data")
99 |
100 | val filterNames = sys.env.get("NAME_FILTER")
101 |
102 | val downloadsTraces = Seq(
103 | csvScatter(dataBase.resolve("stats").toFile, "# downloads", filterNames)
104 | )
105 |
106 | val uniqueIpsTraces =
107 | if (filterNames.isEmpty)
108 | Seq(
109 | uniqueIpScatter(dataBase.resolve("unique-ips").toFile, "Unique IPs")
110 | )
111 | else
112 | Nil
113 |
114 |
115 | val dlDivId = "downloads"
116 | val ipDivId = "uniqueips"
117 |
118 | val layout = Layout()
119 |
120 | val html =
121 | s"""
122 | |
123 | |
124 | |${layout.title.getOrElse("plotly chart")}
125 | |
126 | |
127 | |
128 | |
129 | |
130 | |
134 | |
135 | |
136 | |""".stripMargin
137 |
138 | Files.write(Paths.get("stats.html"), html.getBytes("UTF-8"))
139 |
--------------------------------------------------------------------------------
/plot.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 |
4 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target"
5 |
6 | mkdir -p "$TARGET"
7 |
8 | if [ ! -x "$TARGET/coursier" ]; then
9 | curl -Lo "$TARGET/coursier" https://git.io/coursier-cli
10 | chmod +x "$TARGET/coursier"
11 | fi
12 |
13 | "$TARGET/coursier" launch com.lihaoyi:ammonite_2.12.8:1.6.4 \
14 | -M ammonite.Main \
15 | -- \
16 | "$(dirname "${BASH_SOURCE[0]}")/plot.sc"
17 |
--------------------------------------------------------------------------------
/sonatype-stats.sc:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env amm
2 |
3 | import $ivy.`com.softwaremill.sttp::core:1.5.10`
4 |
5 | import java.nio.file._
6 | import java.time.{YearMonth, ZoneOffset}
7 |
8 | import com.softwaremill.sttp.quick._
9 | import upickle.default._
10 | import ujson.{read => _, _}
11 |
12 | object Responses {
13 |
14 | case class UniqueIpData(total: Int)
15 | implicit val uniqueIpDataRW: ReadWriter[UniqueIpData] = macroRW
16 | case class UniqueIpResp(data: UniqueIpData)
17 | implicit val uniqueIpRespRW: ReadWriter[UniqueIpResp] = macroRW
18 |
19 | case class Elem(id: String, name: String)
20 | implicit val elemRW: ReadWriter[Elem] = macroRW
21 |
22 | }
23 |
24 | import Responses._
25 |
26 | object Params {
27 |
28 | // organization one was granted write access to
29 | val proj = sys.env.getOrElse(
30 | "SONATYPE_PROJECT",
31 | sys.error("SONATYPE_PROJECT not set")
32 | )
33 | // actual organization used for publishing (must have proj as prefix)
34 | val organization = sys.env.getOrElse("SONATYPE_PROJECT", proj)
35 |
36 | val sonatypeUser = sys.env.getOrElse(
37 | "SONATYPE_USERNAME",
38 | sys.error("SONATYPE_USERNAME not set")
39 | )
40 | val sonatypePassword: String = sys.env.getOrElse(
41 | "SONATYPE_PASSWORD",
42 | sys.error("SONATYPE_PASSWORD not set")
43 | )
44 |
45 | val start = YearMonth.now(ZoneOffset.UTC)
46 |
47 | val projId = {
48 |
49 | val projectIds = {
50 | val projResp = sttp
51 | .auth.basic(Params.sonatypeUser, Params.sonatypePassword)
52 | .header("Accept", "application/json")
53 | .get(uri"https://oss.sonatype.org/service/local/stats/projects")
54 | .send()
55 |
56 | if (!projResp.isSuccess)
57 | sys.error("Error getting project list: " + projResp.statusText)
58 |
59 | val respJson = ujson.read(projResp.body.right.get)
60 |
61 | read[Seq[Elem]](respJson("data"))
62 | .map(e => e.name -> e.id)
63 | .toMap
64 | }
65 |
66 | projectIds(Params.proj)
67 | }
68 |
69 | val cutOff = start.minusMonths(4L)
70 |
71 | val base = Paths.get("data")
72 | }
73 |
74 | case class Data(
75 | base: Path,
76 | ext: String,
77 | empty: String => Boolean,
78 | name: String,
79 | tpe: String,
80 | projId: String,
81 | organization: String
82 | ) {
83 |
84 | def fileFor(monthYear: YearMonth): Path = {
85 | val year = monthYear.getYear
86 | val month = monthYear.getMonth.getValue
87 | base.resolve(f"$year%04d/$month%02d.$ext")
88 | }
89 |
90 | def exists(monthYear: YearMonth): Boolean =
91 | Files.isRegularFile(fileFor(monthYear))
92 |
93 | def write(monthYear: YearMonth, content: String): Unit = {
94 | System.err.println(s"Writing $monthYear (${content.length} B)")
95 | val f = fileFor(monthYear)
96 | Files.createDirectories(f.getParent)
97 | Files.write(f, content.getBytes("UTF-8"))
98 | }
99 |
100 | def urlFor(monthYear: YearMonth) = {
101 | val year = monthYear.getYear
102 | val month = monthYear.getMonth.getValue
103 |
104 | uri"https://oss.sonatype.org/service/local/stats/$name?p=$projId&g=$organization&a=&t=$tpe&from=${f"$year%04d$month%02d"}&nom=1"
105 | }
106 |
107 | def process(monthYears: Iterator[YearMonth]): Iterator[(YearMonth, Boolean)] =
108 | monthYears
109 | .filter { monthYear =>
110 | !exists(monthYear)
111 | }
112 | .map { monthYear =>
113 |
114 | val u = urlFor(monthYear)
115 |
116 | System.err.println(s"Getting $monthYear: $u")
117 |
118 | val statResp = sttp
119 | .auth.basic(Params.sonatypeUser, Params.sonatypePassword)
120 | .header("Accept", "application/json")
121 | .get(u)
122 | .send()
123 |
124 | if (!statResp.isSuccess)
125 | sys.error("Error getting project stats: " + statResp.statusText)
126 |
127 | val stats = statResp.body.right.get.trim
128 |
129 | val empty0 = empty(stats)
130 | if (empty0)
131 | System.err.println(s"Empty response at $monthYear")
132 | else
133 | write(monthYear, stats)
134 |
135 | monthYear -> !empty0
136 | }
137 | }
138 |
139 | val statsData = Data(
140 | Params.base.resolve("stats"),
141 | "csv",
142 | _.isEmpty,
143 | "slices_csv",
144 | "raw",
145 | Params.projId,
146 | Params.organization
147 | )
148 |
149 | val perArtifactUniqueIpsData = Data(
150 | Params.base.resolve("per-artifact-unique-ips"),
151 | "csv",
152 | _.isEmpty,
153 | "slices_csv",
154 | "ip",
155 | Params.projId,
156 | Params.organization
157 | )
158 |
159 | val uniqueIpsData = Data(
160 | Params.base.resolve("unique-ips"),
161 | "json",
162 | s => read[UniqueIpResp](ujson.read(s)).data.total <= 0,
163 | "timeline",
164 | "ip",
165 | Params.projId,
166 | Params.organization
167 | )
168 |
169 | for (data <- Seq(statsData, perArtifactUniqueIpsData, uniqueIpsData)) {
170 | val it = Iterator.iterate(Params.start)(_.minusMonths(1L))
171 | val processed = data.process(it)
172 | .takeWhile {
173 | case (monthYear, nonEmpty) =>
174 | nonEmpty || monthYear.compareTo(Params.cutOff) >= 0
175 | }
176 | .length
177 |
178 | System.err.println(s"Processed $processed months in ${data.base} for type ${data.tpe}")
179 | }
180 |
181 |
--------------------------------------------------------------------------------
/sonatype-stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 |
4 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target"
5 |
6 | mkdir -p "$TARGET"
7 |
8 | if [ ! -x "$TARGET/coursier" ]; then
9 | curl -Lo "$TARGET/coursier" https://git.io/coursier-cli
10 | chmod +x "$TARGET/coursier"
11 | fi
12 |
13 | "$TARGET/coursier" launch com.lihaoyi:ammonite_2.12.8:1.6.4 \
14 | -M ammonite.Main \
15 | -- \
16 | "$(dirname "${BASH_SOURCE[0]}")/sonatype-stats.sc"
17 |
--------------------------------------------------------------------------------
/to-gh-pages.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euv
3 |
4 | DIR="$(pwd)"
5 | TARGET="$(dirname "${BASH_SOURCE[0]}")/target"
6 |
7 | mkdir -p "$TARGET"
8 | cd "$TARGET"
9 |
10 |
11 | if [ -d gh-pages ]; then
12 | echo "Removing former gh-pages clone"
13 | rm -rf gh-pages
14 | fi
15 |
16 | echo "Cloning"
17 | git clone "https://${GH_TOKEN}@github.com/$TRAVIS_REPO_SLUG.git" -b gh-pages gh-pages
18 | cd gh-pages
19 |
20 | git config user.name "Travis-CI"
21 | git config user.email "invalid@travis-ci.com"
22 |
23 | GH_PAGES_DEST="${GH_PAGES_DEST:-"index.html"}"
24 | cp "$DIR/stats.html" "$GH_PAGES_DEST"
25 | git add -- "$GH_PAGES_DEST"
26 |
27 | MSG="Update Sonatype statistics"
28 |
29 | if git status | grep "nothing to commit" >/dev/null 2>&1; then
30 | echo "Nothing changed"
31 | else
32 | git commit -m "$MSG"
33 |
34 | echo "Pushing changes"
35 | git push origin gh-pages
36 | fi
37 |
--------------------------------------------------------------------------------
/update.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euv
3 |
4 |
5 | DIR="$(dirname "${BASH_SOURCE[0]}")"
6 |
7 | "$DIR/sonatype-stats.sh"
8 |
9 | "$DIR/plot.sh"
10 |
11 | "$DIR/upload.sh"
12 |
13 | "$DIR/to-gh-pages.sh"
14 |
--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euv
3 |
4 | git config user.name "Travis-CI"
5 | git config user.email "invalid@travis-ci.com"
6 |
7 | git remote add writable "https://${GH_TOKEN}@github.com/$TRAVIS_REPO_SLUG.git"
8 |
9 | git add -- stats.html data
10 |
11 | MSG="Update Sonatype statistics"
12 |
13 | if git status | grep "nothing to commit" >/dev/null 2>&1; then
14 | echo "Nothing changed"
15 | else
16 | git commit -m "$MSG"
17 |
18 | echo "Pushing changes"
19 | git push writable HEAD:"$TRAVIS_BRANCH"
20 | fi
21 |
--------------------------------------------------------------------------------