├── .DS_Store ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── netintel-package.R └── netintel.R ├── README.Rmd ├── README.md ├── examples └── nitestigraph.R ├── man ├── Alien.Vault.Reputation.Rd ├── BulkOrigin.Rd ├── BulkOriginASN.Rd ├── BulkPeer.Rd ├── CIRCL.BGP.Rank.Rd ├── Nothink.Blocklist.Rd ├── SANS.ASN.Detail.Rd ├── Zeus.Blocklist.Rd └── netintel-package.Rd ├── netintel.Rproj └── tests ├── README.knit.md ├── test-all.R └── testthat └── test-netintel.R /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/netintel/16e3ee1e3b83d231d3c3ab2a4f3b0a750ff3e07d/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.*md$ 4 | ^examples$ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | .Rproj 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: netintel 2 | Type: Package 3 | Title: Various network intelligence routines 4 | Version: 1.2.0 5 | Date: 2014-08-30 6 | Author: Bob Rudis and David Severski 7 | Maintainer: Bob Rudis 8 | Description: Various network intelligence routines 9 | License: GPL-2 10 | Suggests: 11 | testthat 12 | Imports: 13 | plyr, 14 | data.table, 15 | httr 16 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | 3 | export(Alien.Vault.Reputation) 4 | export(BulkOrigin) 5 | export(BulkOriginASN) 6 | export(BulkPeer) 7 | export(CIRCL.BGP.Rank) 8 | export(Nothink.Blocklist) 9 | export(SANS.ASN.Detail) 10 | export(Zeus.Blocklist) 11 | import(data.table) 12 | import(httr) 13 | import(plyr) 14 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # CHANGES IN netintel VERSION 1.2.0 # 2 | 3 | ## NEW FEATURES 4 | 5 | - added support for Zeus & Nothink blocklist retrieval 6 | 7 | 8 | # CHANGES IN netintel VERSION 1.1.0 # 9 | 10 | ## NEW FEATURES 11 | 12 | - all the core functions have been completely rewritten 13 | 14 | - the functions that take AS numbers as parameters automatically figure out whether the "AS" prefix is required or not 15 | 16 | - `Alien.Vault.Reputation` now returns a `data.table` vs a `data.frame` -------------------------------------------------------------------------------- /R/netintel-package.R: -------------------------------------------------------------------------------- 1 | #' netintel - a package containing various function to support IPv4 address, ASN and general node intelligence lookups 2 | #' 3 | #' When attempting to uncover "badness", a security data scientist, 4 | #' intelligence/ops analyst or incident responder needs metadata on the IP 5 | #' addresses, hosts, domains, URLs, etc the come across. This package provides 6 | #' functions that aid in collecting this metadata from within R. 7 | #' @docType package 8 | #' @name netintel-package 9 | #' @aliases netintel 10 | #' @author Bob Rudis <\url{http://github.com/hrbrmstr} and David Severski <\url{https://github.com/davidski}> 11 | #' @seealso The latest/development version of the package is on github - \url{http://github.com/hrbrmstr/netintel} where there are examples of usage. 12 | #' @import plyr httr data.table 13 | #' 14 | NULL 15 | -------------------------------------------------------------------------------- /R/netintel.R: -------------------------------------------------------------------------------- 1 | # short function to trim leading/trailing whitespace 2 | trim <- function (x) ifelse(is.na(x), NA, gsub("^\\s+|\\s+$", "", x)) 3 | 4 | # short function to trim leading/trailing whitespace from all character columns 5 | trimdf <- function(df, stringsAsFactors=FALSE) { 6 | data.frame(lapply(df, function (v) { 7 | if (is.character(v)) { 8 | trim(v) 9 | } else { 10 | v 11 | } 12 | }), stringsAsFactors=stringsAsFactors) 13 | } 14 | 15 | #' @title Retrieves BGP Origin ASN info for a list of IPv4 addresses 16 | #' @description Returns a list (slots are named by the input IPv4 addresses) 17 | #' with lookup results per slot 18 | #' @param ip.list vector of IPv4 address (character - dotted-decimal) 19 | #' @param host which server to perform the lookup (chr) - 20 | #' defaults to \code{v4.whois.cymru.com} 21 | #' @param port TCP port to use to connect to \code{host} (int) - 22 | #' defaults to port \code{43} 23 | #' @return data frame of BGP Origin ASN lookup results 24 | #' \itemize{ 25 | #' \item \code{AS} - AS # 26 | #' \item \code{IP} - IPv4 (passed in) 27 | #' \item \code{BGP.Prefix} - BGP CIDR 28 | #' \item \code{CC} - Country code 29 | #' \item \code{Registry} - Registry it falls under 30 | #' \item \code{Allocated} - date it was allocated 31 | #' \item \code{AS.Name} - AS name 32 | #' } 33 | #' @note The Team Cymru's service is NOT a GeoIP service! Do not use this 34 | #' function for that as your results will not be accurate. 35 | #' @seealso \url{http://www.team-cymru.org/Services/} 36 | #' @export 37 | #' 38 | BulkOrigin <- function(ip.list, host="v4.whois.cymru.com", port=43) { 39 | 40 | # setup query 41 | cmd <- "begin\nverbose\n" 42 | ips <- paste(unlist(ip.list), collapse="\n") 43 | cmd <- sprintf("%s%s\nend\n", cmd, ips) 44 | 45 | # setup connection and post query 46 | con <- socketConnection(host=host, port=port, blocking=TRUE, open="r+") 47 | cat(cmd, file=con) 48 | response <- readLines(con) 49 | close(con) 50 | 51 | # trim header, split fields and convert results 52 | response <- trimdf(read.csv(textConnection(response[2:length(response)]), 53 | stringsAsFactors=FALSE, sep="|", header=FALSE)) 54 | names(response) <- c("AS", "IP", "BGP.Prefix", "CC", 55 | "Registry", "Allocated", "AS.Name") 56 | response[response=="NA"] <- NA 57 | 58 | return(response) 59 | 60 | } 61 | 62 | #' @title Retrieves BGP Peer ASN info for a list of IPv4 addresses 63 | #' @description Retrieves BGP Peer ASN info for a list of IPv4 addresses 64 | #' @param ip.list vector of IPv4 address (character - dotted-decimal) 65 | #' @param host which server to perform the lookup (chr) - 66 | #' defaults to \code{v4.whois.cymru.com} 67 | #' @param port TCP port to use to connect to \code{host} (int) - 68 | #' defaults to \code{43} 69 | #' @return data frame of BGP Peer ASN lookup results 70 | #' \itemize{ 71 | #' \item \code{Peer.AS} - peer AS # 72 | #' \item \code{IP} - IPv4 (passsed in) 73 | #' \item \code{BGP.Prefix} - BGP CIDR block 74 | #' \item \code{CC} - Country code 75 | #' \item \code{Registry} - Registry it falls under 76 | #' \item \code{Allocated} - date allocated 77 | #' \item \code{Peer.AS.Name} - peer name 78 | #' } 79 | #' @note The Team Cymru's service is NOT a GeoIP service! Do not use this 80 | #' function for that as your results will not be accurate. 81 | #' @seealso \url{http://www.team-cymru.org/Services/} 82 | #' @export 83 | #' 84 | BulkPeer <- function(ip.list, host="v4-peer.whois.cymru.com", port=43) { 85 | 86 | # setup query 87 | cmd <- "begin\nverbose\n" 88 | ips <- paste(unlist(ip.list), collapse="\n") 89 | cmd <- sprintf("%s%s\nend\n", cmd, ips) 90 | 91 | # setup connection and post query 92 | con <- socketConnection(host=host, port=port, blocking=TRUE, open="r+") 93 | cat(cmd, file=con) 94 | response <- readLines(con) 95 | close(con) 96 | 97 | # trim header, split fields and convert results 98 | response <- trimdf(read.csv(textConnection(response[2:length(response)]), 99 | stringsAsFactors=FALSE, sep="|", header=FALSE)) 100 | names(response) <- c("Peer.AS", "IP", "BGP.Prefix", "CC", 101 | "Registry", "Allocated", "Peer.AS.Name") 102 | response[response=="NA"] <- NA 103 | 104 | return(response) 105 | 106 | } 107 | 108 | #' @title Retrieves BGP Origin ASN info for a list of ASN ids 109 | #' @description Retrieves BGP Origin ASN info for a list of ASN ids 110 | #' @param asn.list character vector of ASN ids (character) 111 | #' @param host which server to perform the lookup (chr) - 112 | #' defaults to \code{v4.whois.cymru.com} 113 | #' @param port TCP port to use to connect to \code{host} (int) - 114 | #' defaults to \code{43} 115 | #' @return data frame of BGP Origin ASN lookup results 116 | #' \itemize{ 117 | #' \item \code{AS} - AS # 118 | #' \item \code{CC} - Country code 119 | #' \item \code{Registry} - registry it falls under 120 | #' \item \code{Allocated} - when it was allocated 121 | #' \item \code{AS.Name} - name associated with the allocation 122 | #' } 123 | #' @note The Team Cymru's service is NOT a GeoIP service! Do not use this 124 | #' function for that as your results will not be accurate. 125 | #' @seealso \url{http://www.team-cymru.org/Services/} 126 | #' @export 127 | #' 128 | BulkOriginASN <- function(asn.list, host="v4.whois.cymru.com", port=43) { 129 | 130 | # setup query 131 | cmd <- "begin\nverbose\n" 132 | ips <- paste(unlist(ifelse(grepl("^AS", asn.list), asn.list, 133 | sprintf("AS%s", asn.list))), collapse="\n") 134 | cmd <- sprintf("%s%s\nend\n", cmd, ips) 135 | 136 | # setup connection and post query 137 | con <- socketConnection(host=host, port=port, blocking=TRUE, open="r+") 138 | cat(cmd, file=con) 139 | response <- readLines(con) 140 | close(con) 141 | 142 | # trim header, split fields and convert results 143 | response <- trimdf(read.csv(textConnection(response[2:length(response)]), 144 | stringsAsFactors=FALSE, sep="|", header=FALSE)) 145 | names(response) <- c("AS", "CC", "Registry", "Allocated", "AS.Name") 146 | response[response=="NA"] <- NA 147 | 148 | return(response) 149 | 150 | } 151 | 152 | #' @title Retrieves CIRCL aggregated, historical/current BGP rank data 153 | #' @description Retrieves CIRCL aggregated, historical/current BGP rank data 154 | #' @param asn.list character vector of ASN ids (character) 155 | #' @param circl.base.url CIRCL server base URL (chr) - 156 | #' defaults to \code{http://bgpranking.circl.lu/csv/} 157 | #' @return data frame of CIRCL rank data 158 | #' \itemize{ 159 | #' \item \code{asn} asn # 160 | #' \item \code{day} date 161 | #' \item \code{rank} current rank that day 162 | #' } 163 | #' @seealso 164 | #' \itemize{ 165 | #' \item Background on CIRCL Project (+source) \url{https://github.com/CIRCL/bgp-ranking} 166 | #' \item CIRCL BGP site \url{http://bgpranking.circl.lu/} 167 | #' } 168 | #' @export 169 | #' @examples 170 | #' CIRCL.BGP.Rank(57954) 171 | #' 172 | CIRCL.BGP.Rank <- function(asn.list, 173 | circl.base.url="http://bgpranking.circl.lu/csv/") { 174 | 175 | ranks <- ldply(lapply( ifelse(grepl("^AS", asn.list), 176 | gsub("^AS", "", asn.list), asn.list), 177 | function(asn) { 178 | cbind(asn, read.csv(sprintf("%s%s", 179 | circl.base.url, asn), 180 | stringsAsFactors=FALSE)) 181 | }), rbind) 182 | 183 | return(ranks) 184 | 185 | } 186 | 187 | #' @title Retrieves SANS ASN intel currently tracked IP detail 188 | #' @description Retrieves SANS ASN intel currently tracked IP detail 189 | #' @param asn ASN to lookup (character) - no \code{AS} prefix 190 | #' @param sans.base.url SANS server base URL (chr) - defaults to 191 | #' \code{http://isc.sans.edu/asdetailsascii.html?as=} 192 | #' @return data frame of SANS ASN IP data 193 | #' \itemize{ 194 | #' \item \code{Source.IP} is 0 padded so each byte is three digits long 195 | #' \item \code{Reports.Count} number of packets received 196 | #' \item \code{Targets.Count} number of target IPs that reported packets from this source 197 | #' \item \code{First.Seen} First time we saw a packet from this source 198 | #' \item \code{Last.Seen} Last time we saw a packet from this source 199 | #' \item \code{Updated.Date.Time} Last date+time the record was updated 200 | #' } 201 | #' @note IPs are removed if they have not been seen in 30 days. 202 | #' @seealso \url{https://isc.sans.edu/as.html} 203 | #' @export 204 | #' 205 | SANS.ASN.Detail <- function(asn, sans.base.url="http://isc.sans.edu/asdetailsascii.html?as=") { 206 | 207 | asn <- gsub("^AS", "", asn) 208 | src <- GET(sprintf("%s%s", sans.base.url, asn)) 209 | asn.df <- read.table(textConnection(content(src, as="text")), header=FALSE, sep="\t") 210 | names(asn.df) <- c("Source.IP", "Reports.Count", "Targets.Count", 211 | "First.Seen", "Last.Seen", "Updated.Date.Time") 212 | 213 | return(asn.df) 214 | 215 | } 216 | 217 | 218 | #' @title Retrieves Alien Vault's IP reputation database 219 | #' @description Retrieves Alien Vault's IP reputation database. 220 | #' @details 221 | #' AlienValut refreshes every hour, but the onus is on the caller to force a 222 | #' refresh. First-time call will setup a cache directory & file in the user's 223 | #' home directory, download & generate the data frame then write the data frame 224 | #' out as an R object. Future calls will just re-read this data frame unless 225 | #' \code{refresh == TRUE} should the function refresh the database. 226 | #' 227 | #' Please be kind to the AlienValut servers & only refresh if you really need to. 228 | #' @param refresh refresh the database? (bool) 229 | #' @param alien.vault.reputation.url URL of the AlienVault data (chr) - 230 | #' defaults to \code{http://reputation.alienvault.com/reputation.data} 231 | #' @return data.table with IP & Reputation information. 232 | #' \itemize{ 233 | #' \item \code{IP} - IPv4 address 234 | #' \item \code{Risk} - how risky is the target (1-10) 235 | #' \item \code{Reliability} - how reliable is the rating (1-10) 236 | #' \item \code{Activity} - what type of host is it 237 | #' \item \code{Country} - what is the IPv4 country of origin 238 | #' \item \code{City} - what is the IPv4 city of origin 239 | #' \item \code{Latitude} - geolocated latitude of the IPv4 240 | #' \item \code{Longitude} - geolocated longitude of the IPv4 241 | #' } 242 | #' @seealso 243 | #' \itemize{ 244 | #' \item Background on AlienValut's IP rep db: \url{http://labs.alienvault.com/labs/index.php/projects/open-source-ip-reputation-portal/download-ip-reputation-database/} 245 | #' \item More info on AlienVault's database: \url{http://www.slideshare.net/alienvault/building-an-ip-reputation-engine-tracking-the-miscreants} 246 | #' } 247 | #' @export 248 | #' 249 | Alien.Vault.Reputation <- function(refresh=FALSE, alien.vault.reputation.url="http://reputation.alienvault.com/reputation.data") { 250 | 251 | # TODO: What is field 8? 252 | # TODO: Need to split out the ";" separated factors? 253 | 254 | av.dir <- file.path(path.expand("~"), ".ipcache") 255 | av.file <- file.path(av.dir, "alienvaultrep.rda") 256 | av.data.file <- file.path(av.dir, "reputation.data") 257 | 258 | dir.create(av.dir, showWarnings=FALSE) 259 | 260 | if (refresh || file.access(av.file, 4)!=0) { 261 | 262 | suppressWarnings(av.dt <- fread(alien.vault.reputation.url, sep="#", 263 | stringsAsFactors=FALSE)) 264 | setnames(av.dt, colnames(av.dt), c("IP", "Risk", "Reliability", "Activity", 265 | "Country", "City", "LatLon", "x")) 266 | 267 | av.dt[, Latitude:=unlist(strsplit(LatLon, split=","))[[1]], by=LatLon] 268 | av.dt[, Longitude:=unlist(strsplit(LatLon, split=","))[[2]], by=LatLon] 269 | av.dt$LatLon <- NULL 270 | av.dt$x <- NULL 271 | 272 | setkey(av.dt, IP) 273 | 274 | av.dt$Risk <- factor(av.dt$Risk) 275 | av.dt$Reliability <- factor(av.dt$Reliability) 276 | av.dt$Country <- factor(av.dt$Country) 277 | av.dt$City <- factor(av.dt$City) 278 | 279 | save(av.dt, file=av.file, compress=TRUE) 280 | 281 | } else { 282 | av.df = load(av.file) 283 | } 284 | 285 | return(av.dt) 286 | 287 | } 288 | 289 | #' @title Retrieves Zeus Blocklist (IP/FQDN/URL) 290 | #' @description Retrieves Zeus Blocklist (IP/FQDN/URL) 291 | #' @details 292 | #' The Zeus blocklist refreshes regularly, but the onus is on the caller to force a 293 | #' refresh. First-time call will setup a cache directory & file in the user's 294 | #' home directory, download & generate the data frame then write the data frame 295 | #' out as an R object. Future calls will just re-read this data frame unless 296 | #' \code{refresh == TRUE} should the function refresh the database. 297 | #' 298 | #' @param refresh refresh the database? (bool) 299 | #' @param domains_url Zeus domains blocklist URL (chr) - 300 | #' defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist} 301 | #' @param ips_url Zeus IP blocklist URL (chr) - 302 | #' defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist} 303 | #' @param urls_url Zeus compromised URLs blocklist URL (chr) - 304 | #' defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=compromised} 305 | #' @return List of three singe-column data frames, one for each blocklist 306 | #' \itemize{ 307 | #' \item \code{domains} - Zeus domains (column name: \code{domain}) 308 | #' \item \code{ips} - Zeus ips (column name: \code{IP}) 309 | #' \item \code{urls} - Zeus domains (column name: \code{URL}) 310 | #' } 311 | #' @seealso Zeus blocklist info - \url{https://zeustracker.abuse.ch/blocklist.php} 312 | #' @export 313 | Zeus.Blocklist <- function(refresh=FALSE, 314 | domains_url="https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist", 315 | ips_url="https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist", 316 | urls_url="https://zeustracker.abuse.ch/blocklist.php?download=compromised") { 317 | 318 | zeus.dir <- file.path(path.expand("~"), ".ipcache") 319 | 320 | zeus.data.file <- file.path(zeus.dir, "zeus.rda") 321 | 322 | zeus.ips.file <- file.path(zeus.dir, "zeus_ipblocklist.txt") 323 | zeus.domains.file <- file.path(zeus.dir, "zeus_domainblocklist.txt") 324 | zeus.urls.file <- file.path(zeus.dir, "zeus_compromised.txt") 325 | 326 | dir.create(zeus.dir, showWarnings=FALSE) 327 | 328 | if (refresh || file.access(zeus.data.file, 4)!=0) { 329 | 330 | dom <- GET(domains_url) 331 | write(content(dom, "text"), file=zeus.domains.file) 332 | 333 | ips <- GET(ips_url) 334 | write(content(ips, "text"), file=zeus.ips.file) 335 | 336 | urls <- GET(urls_url) 337 | write(content(urls, "text"), file=zeus.urls.file) 338 | 339 | dom_df <- read.table(textConnection(content(dom, "text")), stringsAsFactors=FALSE) 340 | setnames(dom_df, colnames(dom_df), "domain") 341 | ips_df <- read.table(textConnection(content(ips, "text")), stringsAsFactors=FALSE) 342 | setnames(ips_df, colnames(ips_df), "IP") 343 | url_df <- read.table(textConnection(content(urls, "text")), stringsAsFactors=FALSE) 344 | setnames(url_df, colnames(url_df), "URL") 345 | 346 | save(dom_df, ips_df, url_df, file=zeus.data.file) 347 | 348 | } else { 349 | load(zeus.data.file) 350 | } 351 | 352 | return(zeus=list(domains=dom_df, ips=ips_df, urls=url_df)) 353 | 354 | } 355 | 356 | #' @title Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 357 | #' @description Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 358 | #' @details 359 | #' The Nothink blocklist refreshes regularly, but the onus is on the caller to force a 360 | #' refresh. First-time call will setup a cache directory & file in the user's 361 | #' home directory, download & generate the data frame then write the data frame 362 | #' out as an R object. Future calls will just re-read this data frame unless 363 | #' \code{refresh == TRUE} should the function refresh the database. 364 | #' 365 | #' @param refresh refresh the database? (bool) 366 | #' @param nothink_url Nothink blacklist URL (chr) - 367 | #' defaults to \code{http://www.nothink.org/blacklist/blacklist_malware_dns.txt} 368 | #' @return List of two singe-column data frames, one for each blocklist 369 | #' \itemize{ 370 | #' \item \code{domains} - Zeus domains (column name: \code{domain}) 371 | #' \item \code{ips} - Zeus ips (column name: \code{IP}) 372 | #' } 373 | #' @seealso Nothink - \url{http://www.nothink.org/} 374 | #' @export 375 | Nothink.Blocklist <- function(refresh=FALSE, 376 | nothink_url="http://www.nothink.org/blacklist/blacklist_malware_dns.txt") { 377 | 378 | nothink.dir <- file.path(path.expand("~"), ".ipcache") 379 | 380 | nothink.data.file <- file.path(nothink.dir, "nothink.rda") 381 | 382 | nothink.file <- file.path(nothink.dir, "nothink.txt") 383 | 384 | dir.create(nothink.dir, showWarnings=FALSE) 385 | 386 | if (refresh || file.access(nothink.data.file, 4)!=0) { 387 | 388 | dat <- GET(nothink_url) 389 | write(content(dat, "text"), file=nothink.file) 390 | 391 | dat_v <- grep("^#|^\ *$", readLines(textConnection(content(dat, "text"))), invert=TRUE, value=TRUE) 392 | 393 | is_ip <- validateIP(dat_v) 394 | 395 | ips_df <- data.frame(IP=dat_v[is_ip], stringsAsFactors=FALSE) 396 | dom_df <- data.frame(domain=dat_v[!is_ip], stringsAsFactors=FALSE) 397 | 398 | save(dom_df, ips_df, file=nothink.data.file) 399 | 400 | } else { 401 | load(nothink.data.file) 402 | } 403 | 404 | return(nothink=list(domains=dom_df, ips=ips_df)) 405 | 406 | } 407 | 408 | .validateIP <- function(ip) { 409 | 410 | res <- regexpr('^(((2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?)\\.){3}(2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?))$', ip) 411 | return(min(res) > 0) 412 | 413 | } 414 | 415 | validateIP <- Vectorize(.validateIP) -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "README" 3 | author: "Bob Rudis" 4 | date: "August 30, 2014" 5 | output: 6 | md_document: 7 | variant: markdown_github 8 | --- 9 | 10 | Version 1.2 adds the [Zeus](https://zeustracker.abuse.ch/blocklist.php) and [Nothink](http://www.nothink.org/) blocklists 11 | 12 | Version 1.1 brings a significant update to the core components of the `netinel` package. Every function has been re-written to be as fast as possible without resorting to `Rcpp` functions. The intent of the package is to provide as many IP & ASN intelligence routines to those using R for Security Data Science and security intel/ops/IR work. 13 | 14 | It relies on `httr`, `plyr` & `data.table`. 15 | 16 | Current function list: 17 | 18 | - `Alien.Vault.Reputation` - Retrieves Alien Vault's IP reputation database 19 | - `BulkOrigin` - Retrieves BGP Origin ASN info for a list of IPv4 addresses 20 | - `BulkOriginASN` - Retrieves BGP Origin ASN info for a list of ASN ids 21 | - `BulkPeer` - Retrieves BGP Peer ASN info for a list of IPv4 addresses 22 | - `CIRCL.BGP.Rank` - Retrieves CIRCL aggregated, historical/current BGP rank data 23 | - `SANS.ASN.Detail` - Retrieves SANS ASN intel currently tracked IP detail 24 | - `Zeus.Blocklist` - Retrieves Zeus Blocklist (IP/FQDN/URL) 25 | - `Nothink.Blocklist` - Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 26 | 27 | ### Installation 28 | 29 | ```{r eval=FALSE} 30 | devtools::install_github("hrbrmstr/netintel") 31 | library(netintel) 32 | ``` 33 | 34 | ### Usage 35 | 36 | ```{r} 37 | library(netintel) 38 | 39 | # current verison 40 | packageVersion("netintel") 41 | 42 | # Bulk stuff 43 | BulkOrigin("162.243.111.4") 44 | BulkOriginASN(62567) 45 | BulkPeer("162.243.111.4") 46 | 47 | # CIRCL 48 | 49 | head(CIRCL.BGP.Rank(62567)) 50 | 51 | # SANS was flaky so no example 52 | 53 | # SANS.ASN.Detail(62567) 54 | 55 | # AlienVault 56 | 57 | head(Alien.Vault.Reputation()) 58 | 59 | # Zeus 60 | 61 | str(Zeus.Blocklist()) 62 | 63 | # Nothink 64 | 65 | str(Nothink.Blocklist()) 66 | ``` 67 | 68 | ### Test Results 69 | 70 | ```{r} 71 | library(netintel) 72 | library(testthat) 73 | 74 | date() 75 | 76 | test_dir("tests/") 77 | 78 | ``` 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Version 1.2 adds the [Zeus](https://zeustracker.abuse.ch/blocklist.php) and [Nothink](http://www.nothink.org/) blocklists 2 | 3 | Version 1.1 brings a significant update to the core components of the `netinel` package. Every function has been re-written to be as fast as possible without resorting to `Rcpp` functions. The intent of the package is to provide as many IP & ASN intelligence routines to those using R for Security Data Science and security intel/ops/IR work. 4 | 5 | It relies on `httr`, `plyr` & `data.table`. 6 | 7 | Current function list: 8 | 9 | - `Alien.Vault.Reputation` - Retrieves Alien Vault's IP reputation database 10 | - `BulkOrigin` - Retrieves BGP Origin ASN info for a list of IPv4 addresses 11 | - `BulkOriginASN` - Retrieves BGP Origin ASN info for a list of ASN ids 12 | - `BulkPeer` - Retrieves BGP Peer ASN info for a list of IPv4 addresses 13 | - `CIRCL.BGP.Rank` - Retrieves CIRCL aggregated, historical/current BGP rank data 14 | - `SANS.ASN.Detail` - Retrieves SANS ASN intel currently tracked IP detail 15 | - `Zeus.Blocklist` - Retrieves Zeus Blocklist (IP/FQDN/URL) 16 | - `Nothink.Blocklist` - Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 17 | 18 | ### Installation 19 | 20 | ``` {.r} 21 | devtools::install_github("hrbrmstr/netintel") 22 | library(netintel) 23 | ``` 24 | 25 | ### Usage 26 | 27 | ``` {.r} 28 | library(netintel) 29 | 30 | # current verison 31 | packageVersion("netintel") 32 | ``` 33 | 34 | ## [1] '1.2.0' 35 | 36 | ``` {.r} 37 | # Bulk stuff 38 | BulkOrigin("162.243.111.4") 39 | ``` 40 | 41 | ## AS IP BGP.Prefix CC Registry Allocated 42 | ## 1 62567 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 43 | ## AS.Name 44 | ## 1 DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US 45 | 46 | ``` {.r} 47 | BulkOriginASN(62567) 48 | ``` 49 | 50 | ## AS CC Registry Allocated 51 | ## 1 62567 US arin 2013-07-11 52 | ## AS.Name 53 | ## 1 DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US 54 | 55 | ``` {.r} 56 | BulkPeer("162.243.111.4") 57 | ``` 58 | 59 | ## Peer.AS IP BGP.Prefix CC Registry Allocated 60 | ## 1 174 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 61 | ## 2 286 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 62 | ## 3 3257 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 63 | ## 4 3356 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 64 | ## 5 4565 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 65 | ## 6 22822 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 66 | ## Peer.AS.Name 67 | ## 1 COGENT-174 - Cogent Communications,US 68 | ## 2 KPN KPN International / KPN Eurorings,NL 69 | ## 3 TINET-BACKBONE Tinet SpA,DE 70 | ## 4 LEVEL3 - Level 3 Communications, Inc.,US 71 | ## 5 MEGAPATH2-US - MegaPath Networks Inc.,US 72 | ## 6 LLNW - Limelight Networks, Inc.,US 73 | 74 | ``` {.r} 75 | # CIRCL 76 | 77 | head(CIRCL.BGP.Rank(62567)) 78 | ``` 79 | 80 | ## asn day rank 81 | ## 1 62567 2014-06-30 1.001 82 | ## 2 62567 2013-07-27 1.000 83 | ## 3 62567 2014-03-28 1.003 84 | ## 4 62567 2011-04-30 1.000 85 | ## 5 62567 2013-09-09 1.004 86 | ## 6 62567 2013-09-08 1.004 87 | 88 | ``` {.r} 89 | # SANS was flaky so no example 90 | 91 | # SANS.ASN.Detail(62567) 92 | 93 | # AlienVault 94 | 95 | head(Alien.Vault.Reputation()) 96 | ``` 97 | 98 | ## IP Risk Reliability Activity Country City 99 | ## 1: 1.0.131.184 1 2 Malicious Host TH 100 | ## 2: 1.121.142.154 4 2 Malicious Host AU 101 | ## 3: 1.121.164.195 4 2 Malicious Host AU Coorparoo 102 | ## 4: 1.123.40.5 4 2 Malicious Host AU Adelaide 103 | ## 5: 1.133.228.176 4 2 Malicious Host AU 104 | ## 6: 1.159.48.252 4 2 Malicious Host AU 105 | ## Latitude Longitude 106 | ## 1: 15.0 100.0 107 | ## 2: -27.0 133.0 108 | ## 3: -27.5 153.050003052 109 | ## 4: -34.9286994934 138.598602295 110 | ## 5: -27.0 133.0 111 | ## 6: -27.0 133.0 112 | 113 | ``` {.r} 114 | # Zeus 115 | 116 | str(Zeus.Blocklist()) 117 | ``` 118 | 119 | ## List of 3 120 | ## $ domains:'data.frame': 856 obs. of 1 variable: 121 | ## ..$ domain: chr [1:856] "039b1ee.netsolhost.com" "03a6b7a.netsolhost.com" "03a6f57.netsolhost.com" "1day.su" ... 122 | ## $ ips :'data.frame': 213 obs. of 1 variable: 123 | ## ..$ IP: chr [1:213] "103.241.0.100" "103.4.52.150" "103.7.59.135" "107.181.174.84" ... 124 | ## $ urls :'data.frame': 673 obs. of 1 variable: 125 | ## ..$ URL: chr [1:673] "190.104.217.181/~ssiprueb/wp-includes/css/b.exe" "190.104.217.181/~ssiprueb/wp-includes/css/cfg.bin" "190.104.217.181/~ssiprueb/wp-includes/css/login.php" "210.37.11.238/jm32/includes/site/bot.exe" ... 126 | 127 | ``` {.r} 128 | # Nothink 129 | 130 | str(Nothink.Blocklist()) 131 | ``` 132 | 133 | ## List of 2 134 | ## $ domains:'data.frame': 202 obs. of 1 variable: 135 | ## ..$ domain: chr [1:202] "1.h8cbf.in" "199.222.35.192.in-addr.arpa." "2.0.168.192.in-addr.arpa." "202.222.35.192.in-addr.arpa" ... 136 | ## $ ips :'data.frame': 235 obs. of 1 variable: 137 | ## ..$ IP: chr [1:235] "130.14.108.54" "130.14.11.194" "130.14.118.141" "130.14.129.171" ... 138 | 139 | ### Test Results 140 | 141 | ``` {.r} 142 | library(netintel) 143 | library(testthat) 144 | 145 | date() 146 | ``` 147 | 148 | ## [1] "Sat Aug 30 07:06:39 2014" 149 | 150 | ``` {.r} 151 | test_dir("tests/") 152 | ``` 153 | 154 | ## Team CYMRU : ... 155 | ## CIRCL : . 156 | ## AlienVault : . 157 | ## Zeus : . 158 | ## Nothink : . 159 | -------------------------------------------------------------------------------- /examples/nitestigraph.R: -------------------------------------------------------------------------------- 1 | library(igraph) 2 | library(netintel) 3 | library(plyr) 4 | 5 | # 6 | # test script for a few of the netintel libraries 7 | # 8 | 9 | # load AlienValut data 10 | z = Alien.Vault.Reputation() 11 | 12 | # populate some IPs 13 | ips = c("100.43.81.11","100.43.81.7","107.20.39.216","108.166.87.63","109.152.4.217","109.73.79.58","119.235.237.17","128.12.248.13","128.221.197.57","128.221.197.60","128.221.224.57","129.241.249.6","134.226.56.7","137.157.8.253","137.69.117.58","142.56.86.35","146.255.96.169","150.203.4.24","152.62.109.57","152.62.109.62","160.83.30.185","160.83.30.202","160.83.72.205","161.69.220.1","168.159.192.57","168.244.164.254","173.165.182.190","173.57.120.151","175.41.236.5","176.34.78.244","178.85.44.139","184.172.0.214","184.72.187.192","193.164.138.35","194.203.96.184","198.22.122.158","199.181.136.59","204.191.88.251","204.4.182.15","205.185.121.149","206.112.95.181","206.47.249.246","207.189.121.46","207.54.134.4","209.221.90.250","212.36.53.166","216.119.144.209","216.43.0.10","23.20.117.241","23.20.204.157","23.20.9.81","23.22.63.190","24.207.64.10","24.64.233.203","37.59.16.223","49.212.154.200","50.16.130.169","50.16.179.34","50.16.29.33","50.17.13.221","50.17.43.219","50.18.234.67","63.71.9.108","64.102.249.7","64.31.190.1","65.210.5.50","65.52.1.12","65.60.80.199","66.152.247.114","66.193.16.162","66.249.71.143","66.249.71.47","66.249.72.76","66.41.34.181","69.164.221.186","69.171.229.245","69.28.149.29","70.164.152.31","71.127.49.50","71.41.139.254","71.87.20.2","74.112.131.127","74.114.47.11","74.121.22.10","74.125.178.81","74.125.178.82","74.125.178.88","74.125.178.94","74.176.163.56","76.118.2.138","76.126.174.105","76.14.60.62","76.168.198.238","76.22.130.45","77.79.6.37","81.137.59.193","82.132.239.186","82.132.239.97","8.28.16.254","83.111.54.154","83.251.15.145","84.61.15.10","85.90.76.149","88.211.53.36","89.204.182.67","93.186.30.114","96.27.136.169","97.107.138.192","98.158.20.231","98.158.20.237") 14 | 15 | # let's only look at C&C servers 16 | ips = z[grep("C&C",z$Activity,fixed=TRUE),1] 17 | 18 | # get BGP origin & peers 19 | origin = BulkOrigin(ips) 20 | peers = BulkPeer(ips) 21 | 22 | # start graphing 23 | g = graph.empty() 24 | 25 | # Make IP vertices; IP endpoints are red 26 | g = g + vertices(ips,size=1,color="red",group=1) 27 | 28 | # Make BGP vertices ; BGP nodes are light blue 29 | g = g + vertices(unique(c(peers$Peer.AS, origin$AS)),size=1.5,color="orange",group=2) 30 | 31 | # no labels 32 | V(g)$label = "" 33 | 34 | # Make IP/BGP edges 35 | ip.edges = lapply(ips,function(x) { 36 | iAS = origin[origin$IP==x,]$AS 37 | lapply(iAS,function(y){ 38 | c(x,y) 39 | }) 40 | }) 41 | 42 | # Make BGP/peer edges 43 | bgp.edges = lapply(unique(origin$BGP.Prefix),function(x) { 44 | startAS = unique(origin[origin$BGP.Prefix==x,]$AS) 45 | lapply(startAS,function(z) { 46 | pAS = peers[peers$BGP.Prefix==x,]$Peer.AS 47 | lapply(pAS,function(y) { 48 | c(z,y) 49 | }) 50 | }) 51 | }) 52 | 53 | # get total graph node count 54 | node.count = table(c(unlist(ip.edges),unlist(bgp.edges))) 55 | 56 | # add edges 57 | g = g + edges(unlist(ip.edges)) 58 | g = g + edges(unlist(bgp.edges)) 59 | 60 | # base edge weight == 1 61 | E(g)$weight = 1 62 | 63 | # size nodes according to connectivity 64 | #V(g)$size = 2 65 | 66 | # simplify the graph 67 | g = simplify(g, edge.attr.comb=list(weight="sum")) 68 | 69 | # no arrows 70 | E(g)$arrow.size = 0 71 | 72 | # best layout for this 73 | L = layout.fruchterman.reingold(g) 74 | 75 | # plot the graph 76 | plot(g,margin=0) 77 | -------------------------------------------------------------------------------- /man/Alien.Vault.Reputation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{Alien.Vault.Reputation} 3 | \alias{Alien.Vault.Reputation} 4 | \title{Retrieves Alien Vault's IP reputation database} 5 | \usage{ 6 | Alien.Vault.Reputation(refresh = FALSE, 7 | alien.vault.reputation.url = "http://reputation.alienvault.com/reputation.data") 8 | } 9 | \arguments{ 10 | \item{refresh}{refresh the database? (bool)} 11 | 12 | \item{alien.vault.reputation.url}{URL of the AlienVault data (chr) - 13 | defaults to \code{http://reputation.alienvault.com/reputation.data}} 14 | } 15 | \value{ 16 | data.table with IP & Reputation information. 17 | \itemize{ 18 | \item \code{IP} - IPv4 address 19 | \item \code{Risk} - how risky is the target (1-10) 20 | \item \code{Reliability} - how reliable is the rating (1-10) 21 | \item \code{Activity} - what type of host is it 22 | \item \code{Country} - what is the IPv4 country of origin 23 | \item \code{City} - what is the IPv4 city of origin 24 | \item \code{Latitude} - geolocated latitude of the IPv4 25 | \item \code{Longitude} - geolocated longitude of the IPv4 26 | } 27 | } 28 | \description{ 29 | Retrieves Alien Vault's IP reputation database. 30 | } 31 | \details{ 32 | AlienValut refreshes every hour, but the onus is on the caller to force a 33 | refresh. First-time call will setup a cache directory & file in the user's 34 | home directory, download & generate the data frame then write the data frame 35 | out as an R object. Future calls will just re-read this data frame unless 36 | \code{refresh == TRUE} should the function refresh the database. 37 | 38 | Please be kind to the AlienValut servers & only refresh if you really need to. 39 | } 40 | \seealso{ 41 | \itemize{ 42 | \item Background on AlienValut's IP rep db: \url{http://labs.alienvault.com/labs/index.php/projects/open-source-ip-reputation-portal/download-ip-reputation-database/} 43 | \item More info on AlienVault's database: \url{http://www.slideshare.net/alienvault/building-an-ip-reputation-engine-tracking-the-miscreants} 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /man/BulkOrigin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{BulkOrigin} 3 | \alias{BulkOrigin} 4 | \title{Retrieves BGP Origin ASN info for a list of IPv4 addresses} 5 | \usage{ 6 | BulkOrigin(ip.list, host = "v4.whois.cymru.com", port = 43) 7 | } 8 | \arguments{ 9 | \item{ip.list}{vector of IPv4 address (character - dotted-decimal)} 10 | 11 | \item{host}{which server to perform the lookup (chr) - 12 | defaults to \code{v4.whois.cymru.com}} 13 | 14 | \item{port}{TCP port to use to connect to \code{host} (int) - 15 | defaults to port \code{43}} 16 | } 17 | \value{ 18 | data frame of BGP Origin ASN lookup results 19 | \itemize{ 20 | \item \code{AS} - AS # 21 | \item \code{IP} - IPv4 (passed in) 22 | \item \code{BGP.Prefix} - BGP CIDR 23 | \item \code{CC} - Country code 24 | \item \code{Registry} - Registry it falls under 25 | \item \code{Allocated} - date it was allocated 26 | \item \code{AS.Name} - AS name 27 | } 28 | } 29 | \description{ 30 | Returns a list (slots are named by the input IPv4 addresses) 31 | with lookup results per slot 32 | } 33 | \note{ 34 | The Team Cymru's service is NOT a GeoIP service! Do not use this 35 | function for that as your results will not be accurate. 36 | } 37 | \seealso{ 38 | \url{http://www.team-cymru.org/Services/} 39 | } 40 | 41 | -------------------------------------------------------------------------------- /man/BulkOriginASN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{BulkOriginASN} 3 | \alias{BulkOriginASN} 4 | \title{Retrieves BGP Origin ASN info for a list of ASN ids} 5 | \usage{ 6 | BulkOriginASN(asn.list, host = "v4.whois.cymru.com", port = 43) 7 | } 8 | \arguments{ 9 | \item{asn.list}{character vector of ASN ids (character)} 10 | 11 | \item{host}{which server to perform the lookup (chr) - 12 | defaults to \code{v4.whois.cymru.com}} 13 | 14 | \item{port}{TCP port to use to connect to \code{host} (int) - 15 | defaults to \code{43}} 16 | } 17 | \value{ 18 | data frame of BGP Origin ASN lookup results 19 | \itemize{ 20 | \item \code{AS} - AS # 21 | \item \code{CC} - Country code 22 | \item \code{Registry} - registry it falls under 23 | \item \code{Allocated} - when it was allocated 24 | \item \code{AS.Name} - name associated with the allocation 25 | } 26 | } 27 | \description{ 28 | Retrieves BGP Origin ASN info for a list of ASN ids 29 | } 30 | \note{ 31 | The Team Cymru's service is NOT a GeoIP service! Do not use this 32 | function for that as your results will not be accurate. 33 | } 34 | \seealso{ 35 | \url{http://www.team-cymru.org/Services/} 36 | } 37 | 38 | -------------------------------------------------------------------------------- /man/BulkPeer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{BulkPeer} 3 | \alias{BulkPeer} 4 | \title{Retrieves BGP Peer ASN info for a list of IPv4 addresses} 5 | \usage{ 6 | BulkPeer(ip.list, host = "v4-peer.whois.cymru.com", port = 43) 7 | } 8 | \arguments{ 9 | \item{ip.list}{vector of IPv4 address (character - dotted-decimal)} 10 | 11 | \item{host}{which server to perform the lookup (chr) - 12 | defaults to \code{v4.whois.cymru.com}} 13 | 14 | \item{port}{TCP port to use to connect to \code{host} (int) - 15 | defaults to \code{43}} 16 | } 17 | \value{ 18 | data frame of BGP Peer ASN lookup results 19 | \itemize{ 20 | \item \code{Peer.AS} - peer AS # 21 | \item \code{IP} - IPv4 (passsed in) 22 | \item \code{BGP.Prefix} - BGP CIDR block 23 | \item \code{CC} - Country code 24 | \item \code{Registry} - Registry it falls under 25 | \item \code{Allocated} - date allocated 26 | \item \code{Peer.AS.Name} - peer name 27 | } 28 | } 29 | \description{ 30 | Retrieves BGP Peer ASN info for a list of IPv4 addresses 31 | } 32 | \note{ 33 | The Team Cymru's service is NOT a GeoIP service! Do not use this 34 | function for that as your results will not be accurate. 35 | } 36 | \seealso{ 37 | \url{http://www.team-cymru.org/Services/} 38 | } 39 | 40 | -------------------------------------------------------------------------------- /man/CIRCL.BGP.Rank.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{CIRCL.BGP.Rank} 3 | \alias{CIRCL.BGP.Rank} 4 | \title{Retrieves CIRCL aggregated, historical/current BGP rank data} 5 | \usage{ 6 | CIRCL.BGP.Rank(asn.list, circl.base.url = "http://bgpranking.circl.lu/csv/") 7 | } 8 | \arguments{ 9 | \item{asn.list}{character vector of ASN ids (character)} 10 | 11 | \item{circl.base.url}{CIRCL server base URL (chr) - 12 | defaults to \code{http://bgpranking.circl.lu/csv/}} 13 | } 14 | \value{ 15 | data frame of CIRCL rank data 16 | \itemize{ 17 | \item \code{asn} asn # 18 | \item \code{day} date 19 | \item \code{rank} current rank that day 20 | } 21 | } 22 | \description{ 23 | Retrieves CIRCL aggregated, historical/current BGP rank data 24 | } 25 | \examples{ 26 | CIRCL.BGP.Rank(57954) 27 | } 28 | \seealso{ 29 | \itemize{ 30 | \item Background on CIRCL Project (+source) \url{https://github.com/CIRCL/bgp-ranking} 31 | \item CIRCL BGP site \url{http://bgpranking.circl.lu/} 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/Nothink.Blocklist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{Nothink.Blocklist} 3 | \alias{Nothink.Blocklist} 4 | \title{Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN)} 5 | \usage{ 6 | Nothink.Blocklist(refresh = FALSE, 7 | nothink_url = "http://www.nothink.org/blacklist/blacklist_malware_dns.txt") 8 | } 9 | \arguments{ 10 | \item{refresh}{refresh the database? (bool)} 11 | 12 | \item{nothink_url}{Nothink blacklist URL (chr) - 13 | defaults to \code{http://www.nothink.org/blacklist/blacklist_malware_dns.txt}} 14 | } 15 | \value{ 16 | List of two singe-column data frames, one for each blocklist 17 | \itemize{ 18 | \item \code{domains} - Zeus domains (column name: \code{domain}) 19 | \item \code{ips} - Zeus ips (column name: \code{IP}) 20 | } 21 | } 22 | \description{ 23 | Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 24 | } 25 | \details{ 26 | The Nothink blocklist refreshes regularly, but the onus is on the caller to force a 27 | refresh. First-time call will setup a cache directory & file in the user's 28 | home directory, download & generate the data frame then write the data frame 29 | out as an R object. Future calls will just re-read this data frame unless 30 | \code{refresh == TRUE} should the function refresh the database. 31 | } 32 | \seealso{ 33 | Nothink - \url{http://www.nothink.org/} 34 | } 35 | 36 | -------------------------------------------------------------------------------- /man/SANS.ASN.Detail.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{SANS.ASN.Detail} 3 | \alias{SANS.ASN.Detail} 4 | \title{Retrieves SANS ASN intel currently tracked IP detail} 5 | \usage{ 6 | SANS.ASN.Detail(asn, 7 | sans.base.url = "http://isc.sans.edu/asdetailsascii.html?as=") 8 | } 9 | \arguments{ 10 | \item{asn}{ASN to lookup (character) - no \code{AS} prefix} 11 | 12 | \item{sans.base.url}{SANS server base URL (chr) - defaults to 13 | \code{http://isc.sans.edu/asdetailsascii.html?as=}} 14 | } 15 | \value{ 16 | data frame of SANS ASN IP data 17 | \itemize{ 18 | \item \code{Source.IP} is 0 padded so each byte is three digits long 19 | \item \code{Reports.Count} number of packets received 20 | \item \code{Targets.Count} number of target IPs that reported packets from this source 21 | \item \code{First.Seen} First time we saw a packet from this source 22 | \item \code{Last.Seen} Last time we saw a packet from this source 23 | \item \code{Updated.Date.Time} Last date+time the record was updated 24 | } 25 | } 26 | \description{ 27 | Retrieves SANS ASN intel currently tracked IP detail 28 | } 29 | \note{ 30 | IPs are removed if they have not been seen in 30 days. 31 | } 32 | \seealso{ 33 | \url{https://isc.sans.edu/as.html} 34 | } 35 | 36 | -------------------------------------------------------------------------------- /man/Zeus.Blocklist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \name{Zeus.Blocklist} 3 | \alias{Zeus.Blocklist} 4 | \title{Retrieves Zeus Blocklist (IP/FQDN/URL)} 5 | \usage{ 6 | Zeus.Blocklist(refresh = FALSE, 7 | domains_url = "https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist", 8 | ips_url = "https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist", 9 | urls_url = "https://zeustracker.abuse.ch/blocklist.php?download=compromised") 10 | } 11 | \arguments{ 12 | \item{refresh}{refresh the database? (bool)} 13 | 14 | \item{domains_url}{Zeus domains blocklist URL (chr) - 15 | defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist}} 16 | 17 | \item{ips_url}{Zeus IP blocklist URL (chr) - 18 | defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist}} 19 | 20 | \item{urls_url}{Zeus compromised URLs blocklist URL (chr) - 21 | defaults to \code{https://zeustracker.abuse.ch/blocklist.php?download=compromised}} 22 | } 23 | \value{ 24 | List of three singe-column data frames, one for each blocklist 25 | \itemize{ 26 | \item \code{domains} - Zeus domains (column name: \code{domain}) 27 | \item \code{ips} - Zeus ips (column name: \code{IP}) 28 | \item \code{urls} - Zeus domains (column name: \code{URL}) 29 | } 30 | } 31 | \description{ 32 | Retrieves Zeus Blocklist (IP/FQDN/URL) 33 | } 34 | \details{ 35 | The Zeus blocklist refreshes regularly, but the onus is on the caller to force a 36 | refresh. First-time call will setup a cache directory & file in the user's 37 | home directory, download & generate the data frame then write the data frame 38 | out as an R object. Future calls will just re-read this data frame unless 39 | \code{refresh == TRUE} should the function refresh the database. 40 | } 41 | \seealso{ 42 | Zeus blocklist info - \url{https://zeustracker.abuse.ch/blocklist.php} 43 | } 44 | 45 | -------------------------------------------------------------------------------- /man/netintel-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1.99): do not edit by hand 2 | \docType{package} 3 | \name{netintel-package} 4 | \alias{netintel} 5 | \alias{netintel-package} 6 | \title{netintel - a package containing various function to support IPv4 address, ASN and general node intelligence lookups} 7 | \description{ 8 | When attempting to uncover "badness", a security data scientist, 9 | intelligence/ops analyst or incident responder needs metadata on the IP 10 | addresses, hosts, domains, URLs, etc the come across. This package provides 11 | functions that aid in collecting this metadata from within R. 12 | } 13 | \author{ 14 | Bob Rudis <\url{http://github.com/hrbrmstr} and David Severski <\url{https://github.com/davidski}> 15 | } 16 | \seealso{ 17 | The latest/development version of the package is on github - \url{http://github.com/hrbrmstr/netintel} where there are examples of usage. 18 | } 19 | 20 | -------------------------------------------------------------------------------- /netintel.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /tests/README.knit.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "README" 3 | author: "Bob Rudis" 4 | date: "August 30, 2014" 5 | output: 6 | md_document: 7 | variant: markdown_github 8 | --- 9 | 10 | Version 1.2 adds the [Zeus](https://zeustracker.abuse.ch/blocklist.php) and [Nothink](http://www.nothink.org/) blocklists 11 | 12 | Version 1.1 brings a significant update to the core components of the `netinel` package. Every function has been re-written to be as fast as possible without resorting to `Rcpp` functions. The intent of the package is to provide as many IP & ASN intelligence routines to those using R for Security Data Science and security intel/ops/IR work. 13 | 14 | It relies on `httr`, `plyr` & `data.table`. 15 | 16 | Current function list: 17 | 18 | - `Alien.Vault.Reputation` - Retrieves Alien Vault's IP reputation database 19 | - `BulkOrigin` - Retrieves BGP Origin ASN info for a list of IPv4 addresses 20 | - `BulkOriginASN` - Retrieves BGP Origin ASN info for a list of ASN ids 21 | - `BulkPeer` - Retrieves BGP Peer ASN info for a list of IPv4 addresses 22 | - `CIRCL.BGP.Rank` - Retrieves CIRCL aggregated, historical/current BGP rank data 23 | - `SANS.ASN.Detail` - Retrieves SANS ASN intel currently tracked IP detail 24 | - `Zeus.Blocklist` - Retrieves Zeus Blocklist (IP/FQDN/URL) 25 | - `Nothink.Blocklist` - Retrieves Nothink Malware DNS network traffic blacklist (IP/FQDN) 26 | 27 | ### Installation 28 | 29 | 30 | ```r 31 | devtools::install_github("hrbrmstr/netintel") 32 | library(netintel) 33 | ``` 34 | 35 | ### Usage 36 | 37 | 38 | ```r 39 | library(netintel) 40 | 41 | # current verison 42 | packageVersion("netintel") 43 | ``` 44 | 45 | ``` 46 | ## [1] '1.1.0' 47 | ``` 48 | 49 | ```r 50 | # Bulk stuff 51 | BulkOrigin("162.243.111.4") 52 | ``` 53 | 54 | ``` 55 | ## AS IP BGP.Prefix CC Registry Allocated 56 | ## 1 62567 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 57 | ## AS.Name 58 | ## 1 DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US 59 | ``` 60 | 61 | ```r 62 | BulkOriginASN(62567) 63 | ``` 64 | 65 | ``` 66 | ## AS CC Registry Allocated 67 | ## 1 62567 US arin 2013-07-11 68 | ## AS.Name 69 | ## 1 DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US 70 | ``` 71 | 72 | ```r 73 | BulkPeer("162.243.111.4") 74 | ``` 75 | 76 | ``` 77 | ## Peer.AS IP BGP.Prefix CC Registry Allocated 78 | ## 1 174 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 79 | ## 2 286 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 80 | ## 3 3257 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 81 | ## 4 3356 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 82 | ## 5 4565 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 83 | ## 6 22822 162.243.111.4 162.243.0.0/17 US arin 2013-09-06 84 | ## Peer.AS.Name 85 | ## 1 COGENT-174 - Cogent Communications,US 86 | ## 2 KPN KPN International / KPN Eurorings,NL 87 | ## 3 TINET-BACKBONE Tinet SpA,DE 88 | ## 4 LEVEL3 - Level 3 Communications, Inc.,US 89 | ## 5 MEGAPATH2-US - MegaPath Networks Inc.,US 90 | ## 6 LLNW - Limelight Networks, Inc.,US 91 | ``` 92 | 93 | ```r 94 | # CIRCL 95 | 96 | head(CIRCL.BGP.Rank(62567)) 97 | ``` 98 | 99 | ``` 100 | ## asn day rank 101 | ## 1 62567 2014-06-30 1.001 102 | ## 2 62567 2013-07-27 1.000 103 | ## 3 62567 2014-03-28 1.003 104 | ## 4 62567 2011-04-30 1.000 105 | ## 5 62567 2013-09-09 1.004 106 | ## 6 62567 2013-09-08 1.004 107 | ``` 108 | 109 | ```r 110 | # SANS was flaky so no example 111 | 112 | # SANS.ASN.Detail(62567) 113 | 114 | # AlienVault 115 | 116 | head(Alien.Vault.Reputation()) 117 | ``` 118 | 119 | ``` 120 | ## IP Risk Reliability Activity Country City 121 | ## 1: 1.0.131.184 1 2 Malicious Host TH 122 | ## 2: 1.121.142.154 4 2 Malicious Host AU 123 | ## 3: 1.121.164.195 4 2 Malicious Host AU Coorparoo 124 | ## 4: 1.123.40.5 4 2 Malicious Host AU Adelaide 125 | ## 5: 1.133.228.176 4 2 Malicious Host AU 126 | ## 6: 1.159.48.252 4 2 Malicious Host AU 127 | ## Latitude Longitude 128 | ## 1: 15.0 100.0 129 | ## 2: -27.0 133.0 130 | ## 3: -27.5 153.050003052 131 | ## 4: -34.9286994934 138.598602295 132 | ## 5: -27.0 133.0 133 | ## 6: -27.0 133.0 134 | ``` 135 | 136 | ```r 137 | # Zeus 138 | 139 | str(Zeus.Blocklist()) 140 | ``` 141 | 142 | ``` 143 | ## List of 3 144 | ## $ domains:'data.frame': 856 obs. of 1 variable: 145 | ## ..$ domain: chr [1:856] "039b1ee.netsolhost.com" "03a6b7a.netsolhost.com" "03a6f57.netsolhost.com" "1day.su" ... 146 | ## $ ips :'data.frame': 213 obs. of 1 variable: 147 | ## ..$ IP: chr [1:213] "103.241.0.100" "103.4.52.150" "103.7.59.135" "107.181.174.84" ... 148 | ## $ urls :'data.frame': 673 obs. of 1 variable: 149 | ## ..$ URL: chr [1:673] "190.104.217.181/~ssiprueb/wp-includes/css/b.exe" "190.104.217.181/~ssiprueb/wp-includes/css/cfg.bin" "190.104.217.181/~ssiprueb/wp-includes/css/login.php" "210.37.11.238/jm32/includes/site/bot.exe" ... 150 | ``` 151 | 152 | ```r 153 | # Nothink 154 | 155 | str(Nothink.Blocklist) 156 | ``` 157 | 158 | ``` 159 | ## function (refresh = FALSE, nothink_url = "http://www.nothink.org/blacklist/blacklist_malware_dns.txt") 160 | ``` 161 | 162 | ### Test Results 163 | 164 | 165 | -------------------------------------------------------------------------------- /tests/test-all.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(data.table) 3 | library(httr) 4 | 5 | test_check("netintel") -------------------------------------------------------------------------------- /tests/testthat/test-netintel.R: -------------------------------------------------------------------------------- 1 | context("Team CYMRU") 2 | 3 | test_that("we can perform Team CYMRU functions", { 4 | 5 | # success 6 | expect_that(BulkOrigin("162.243.111.4"), equals(structure(list(AS = 62567, IP = "162.243.111.4", BGP.Prefix = "162.243.0.0/17", 7 | CC = "US", Registry = "arin", Allocated = "2013-09-06", AS.Name = "DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US"), .Names = c("AS", 8 | "IP", "BGP.Prefix", "CC", "Registry", "Allocated", "AS.Name"), row.names = c(NA, 9 | -1L), class = "data.frame"))) 10 | expect_that(BulkOriginASN(62567), equals(structure(list(AS = 62567, CC = "US", Registry = "arin", Allocated = "2013-07-11", 11 | AS.Name = "DIGITALOCEAN-ASN-NY2 - Digital Ocean, Inc.,US"), .Names = c("AS", 12 | "CC", "Registry", "Allocated", "AS.Name"), row.names = c(NA, 13 | -1L), class = "data.frame"))) 14 | 15 | 16 | expect_that(BulkPeer("162.243.111.4"), equals(structure(list(Peer.AS = c(174, 286, 3257, 3356, 4565, 22822), 17 | IP = c("162.243.111.4", "162.243.111.4", "162.243.111.4", 18 | "162.243.111.4", "162.243.111.4", "162.243.111.4"), BGP.Prefix = c("162.243.0.0/17", 19 | "162.243.0.0/17", "162.243.0.0/17", "162.243.0.0/17", "162.243.0.0/17", 20 | "162.243.0.0/17"), CC = c("US", "US", "US", "US", "US", "US" 21 | ), Registry = c("arin", "arin", "arin", "arin", "arin", "arin" 22 | ), Allocated = c("2013-09-06", "2013-09-06", "2013-09-06", 23 | "2013-09-06", "2013-09-06", "2013-09-06"), Peer.AS.Name = c("COGENT-174 - Cogent Communications,US", 24 | "KPN KPN International / KPN Eurorings,NL", "TINET-BACKBONE Tinet SpA,DE", 25 | "LEVEL3 - Level 3 Communications, Inc.,US", "MEGAPATH2-US - MegaPath Networks Inc.,US", 26 | "LLNW - Limelight Networks, Inc.,US")), .Names = c("Peer.AS", 27 | "IP", "BGP.Prefix", "CC", "Registry", "Allocated", "Peer.AS.Name" 28 | ), row.names = c(NA, -6L), class = "data.frame"))) 29 | }) 30 | 31 | context("CIRCL") 32 | 33 | test_that("we can perform CIRCL functions", { 34 | 35 | expect_that(tail(CIRCL.BGP.Rank(62567))[1,1], equals(62567)) 36 | #expect_that(gethostbyname("f0011"), equals(character(0))) 37 | #expect_that(gethostbyname("f0011"), equals(character(0))) 38 | 39 | }) 40 | 41 | # context("SANS") 42 | # 43 | # test_that("we can perform SANS functions", { 44 | # 45 | # expect_that(SANS.ASN.Detail(62567), is_a("data.frame")) 46 | # #expect_that(gethostbyname("f0011"), equals(character(0))) 47 | # #expect_that(gethostbyname("f0011"), equals(character(0))) 48 | # 49 | # }) 50 | 51 | context("AlienVault") 52 | 53 | test_that("we can perform AlienVault function", { 54 | 55 | expect_that(Alien.Vault.Reputation(), is_a("data.table")) 56 | #expect_that(gethostbyname("f0011"), equals(character(0))) 57 | #expect_that(gethostbyname("f0011"), equals(character(0))) 58 | 59 | }) 60 | 61 | context("Zeus") 62 | 63 | test_that("we can perform Zeus function", { 64 | 65 | expect_that(Zeus.Blocklist(), is_a("list")) 66 | #expect_that(gethostbyname("f0011"), equals(character(0))) 67 | #expect_that(gethostbyname("f0011"), equals(character(0))) 68 | 69 | }) 70 | 71 | 72 | context("Nothink") 73 | 74 | test_that("we can perform Nothink function", { 75 | 76 | expect_that(Nothink.Blocklist(), is_a("list")) 77 | #expect_that(gethostbyname("f0011"), equals(character(0))) 78 | #expect_that(gethostbyname("f0011"), equals(character(0))) 79 | 80 | }) --------------------------------------------------------------------------------