├── .gitignore ├── cransim.conf └── cransim.go /.gitignore: -------------------------------------------------------------------------------- 1 | /cransim 2 | /data 3 | -------------------------------------------------------------------------------- /cransim.conf: -------------------------------------------------------------------------------- 1 | # cransim upstart service 2 | 3 | description "cransim" 4 | author "Joe Cheng " 5 | 6 | start on runlevel [2345] 7 | stop on runlevel [!2345] 8 | 9 | console log 10 | 11 | respawn 12 | respawn limit 10 5 13 | umask 022 14 | 15 | setuid www-data 16 | chdir /srv/cransim 17 | exec /srv/cransim/cransim 18 | 19 | -------------------------------------------------------------------------------- /cransim.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "compress/gzip" 6 | "errors" 7 | "io" 8 | "io/ioutil" 9 | "log" 10 | "net" 11 | "os" 12 | "os/exec" 13 | "time" 14 | ) 15 | 16 | var chanNextLine chan string = make(chan string) 17 | var conns []chan string = make([]chan string, 0, 0) 18 | var chanRegister chan chan string = make(chan chan string) 19 | var chanUnregister chan chan string = make(chan chan string) 20 | const timeOffset = 28 * 24 * time.Hour 21 | const addr = ":6789" 22 | 23 | func sync() error { 24 | today := virtualNow().Truncate(24 * time.Hour) 25 | 26 | // Delete obsolete files 27 | files, err := ioutil.ReadDir("data") 28 | if err != nil { 29 | return err 30 | } 31 | for _, file := range files { 32 | if file.IsDir() { 33 | continue 34 | } 35 | t, err := time.Parse("2006-01-02.csv.gz", file.Name()) 36 | if err != nil { 37 | continue 38 | } 39 | if t.Before(today) { 40 | log.Println("Deleting", "data/" + file.Name()) 41 | err = os.Remove("data/" + file.Name()) 42 | if err != nil { 43 | log.Println("Error:", err) 44 | } 45 | } 46 | } 47 | 48 | for i := 0; i < 2; i++ { 49 | year := today.AddDate(0, 0, i).Format("2006") 50 | fname := today.AddDate(0, 0, i).Format("2006-01-02") + ".csv.gz" 51 | fi, err := os.Stat("data/" + fname) 52 | if err != nil && !os.IsNotExist(err) { 53 | return err 54 | } 55 | if err != nil || fi.Size() < 30 { 56 | log.Println("Downloading " + fname) 57 | cmd := exec.Command("bash", "-c", "curl http://cran-logs.rstudio.com/"+year+"/"+fname+" | zcat | sort | gzip > data/"+fname) 58 | err = cmd.Run() 59 | if err != nil { 60 | return err 61 | } 62 | } 63 | 64 | } 65 | return nil 66 | } 67 | 68 | func handleConn(conn net.Conn) { 69 | log.Println("Connection opened from", conn.RemoteAddr()) 70 | ch := make(chan string, 1000) 71 | chanRegister <- ch 72 | go doWrites(conn, ch) 73 | 74 | buf := make([]byte, 1, 1) 75 | conn.Read(buf) 76 | chanUnregister <- ch 77 | conn.Close() 78 | close(ch) 79 | log.Println("Connection closed from ", conn.RemoteAddr()) 80 | } 81 | 82 | func doWrites(conn net.Conn, ch chan string) { 83 | for { 84 | str, ok := <-ch 85 | if !ok { 86 | return 87 | } 88 | conn.SetWriteDeadline(time.Now().Add(30 * time.Second)) 89 | _, err := conn.Write([]byte(str)) 90 | if err != nil { 91 | conn.Close() 92 | return 93 | } 94 | } 95 | } 96 | 97 | func virtualNow() time.Time { 98 | return time.Now().UTC().Add(-timeOffset) 99 | } 100 | 101 | type Scanner struct { 102 | dir string 103 | reader io.ReadCloser 104 | gzreader *gzip.Reader 105 | scanner *bufio.Scanner 106 | nextDate time.Time 107 | } 108 | 109 | func NewScanner(dir string) *Scanner { 110 | return &Scanner{dir: dir, nextDate: virtualNow()} 111 | } 112 | 113 | func (s *Scanner) NextLine() (string, error) { 114 | if s.scanner == nil { 115 | filename := s.dir + "/" + s.nextDate.Format("2006-01-02") + ".csv.gz" 116 | fi, err := os.Stat(filename) 117 | if err != nil { 118 | return "", err 119 | } 120 | if fi.Size() < 30 { 121 | // Failed downloads are 20 bytes for some reason 122 | return "", errors.New("Ignoring empty file") 123 | } 124 | r, err := os.Open(filename) 125 | if err != nil { 126 | return "", err 127 | } 128 | log.Println("Scanning file", filename) 129 | s.nextDate = s.nextDate.Add(24 * time.Hour) 130 | s.reader = r 131 | s.gzreader, err = gzip.NewReader(s.reader) 132 | if err != nil { 133 | return "", err 134 | } 135 | s.scanner = bufio.NewScanner(s.gzreader) 136 | } 137 | 138 | if s.scanner.Scan() { 139 | return s.scanner.Text(), nil 140 | } 141 | scanErr := s.scanner.Err() 142 | s.reader.Close() 143 | s.gzreader.Close() 144 | s.reader = nil 145 | s.gzreader = nil 146 | s.scanner = nil 147 | 148 | if scanErr != nil { 149 | return "", scanErr 150 | } else { 151 | return s.NextLine() 152 | } 153 | } 154 | 155 | func data() { 156 | firstLine := false 157 | scanner := NewScanner("data") 158 | for { 159 | nextLine, err := scanner.NextLine() 160 | if err != nil { 161 | log.Println("Error:", err) 162 | time.Sleep(1 * time.Second) 163 | continue 164 | } 165 | timeStr := nextLine[:len("\"2014-11-14\",\"06:07:18\"")] 166 | val, err := time.Parse("\"2006-01-02\",\"15:04:05\"", timeStr) 167 | if err != nil { 168 | log.Println("Error parsing time:", err) 169 | continue 170 | } 171 | 172 | wait := val.Sub(virtualNow()) 173 | if wait < 0 && firstLine { 174 | log.Println("Skipping", ) 175 | continue 176 | } 177 | if wait > 0 { 178 | time.Sleep(wait) 179 | } 180 | chanNextLine <- nextLine + "\n" 181 | } 182 | } 183 | 184 | func service() { 185 | for { 186 | select { 187 | case ch := <-chanRegister: 188 | conns = append(conns, ch) 189 | log.Println(len(conns), "active connection(s)") 190 | case ch := <-chanUnregister: 191 | found := false 192 | for i, el := range conns { 193 | if el == ch { 194 | found = true 195 | conns[i] = nil 196 | conns = append(conns[:i], conns[i+1:]...) 197 | break 198 | } 199 | } 200 | if !found { 201 | log.Println("Couldn't find channel to unregister!") 202 | } else { 203 | log.Println(len(conns), "active connection(s)") 204 | } 205 | case str := <-chanNextLine: 206 | for _, ch := range conns { 207 | select { 208 | case ch <- str: break 209 | default: break 210 | } 211 | } 212 | } 213 | } 214 | } 215 | 216 | func main() { 217 | err := os.MkdirAll("data", 0755) 218 | if err != nil { 219 | log.Fatal(err) 220 | } 221 | 222 | err = sync() 223 | if err != nil { 224 | log.Fatal(err) 225 | } 226 | 227 | go (func() { 228 | for { 229 | time.Sleep(30 * time.Second) 230 | sync() 231 | } 232 | })() 233 | 234 | go service() 235 | go data() 236 | 237 | ln, err := net.Listen("tcp", addr) 238 | if err != nil { 239 | log.Fatal(err) 240 | } 241 | log.Println("Listening on", addr) 242 | for { 243 | conn, err := ln.Accept() 244 | if err != nil { 245 | // handle error 246 | continue 247 | } 248 | go handleConn(conn) 249 | } 250 | } 251 | --------------------------------------------------------------------------------